本文整理汇总了Python中node.Node.is_nominal方法的典型用法代码示例。如果您正苦于以下问题:Python Node.is_nominal方法的具体用法?Python Node.is_nominal怎么用?Python Node.is_nominal使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类node.Node
的用法示例。
在下文中一共展示了Node.is_nominal方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: more_tests
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def more_tests():
n2 = Node()
n2.name = "attrib2"
n2.label = 1
n3 = Node()
n3.name = "attrib3"
n3.label = 0
n0 = Node()
n0.name = "attrib0"
n0.is_nominal = True
n0.children = {1: n2, 2: n3}
n4 = Node()
n4.name = "attrib4"
n4.label = 2
n5 = Node()
n5.name = "attrib5"
n5.label = 3
n1 = Node()
n1.name = "attrib1"
n1.is_nominal = True
n1.children = {1: n4, 2: n5}
n = Node()
n.label = None
n.decision_attribute = 1
n.is_nominal = True
n.name = "attrib"
n.children = {1: n0, 2: n1}
print n.print_dnf_tree()
print n.print_tree()
print breadth_first_search(n, [n])
attribute_metadata = [{'name': "winner",'is_nominal': True},{'name': "opprundifferential",'is_nominal': False}]
data_set = [[1, 0.27], [0, 0.42], [0, 0.86], [0, 0.68], [0, 0.04], [1, 0.01], [1, 0.33], [1, 0.42], [1, 0.42], [0, 0.51], [1, 0.4]]
numerical_splits_count = [5, 5]
n = ID3(data_set, attribute_metadata, numerical_splits_count, 0)
print validation_accuracy(n,data_set)
numerical_splits_count = [1, 1]
n = ID3(data_set, attribute_metadata, numerical_splits_count, 5)
print validation_accuracy(n,data_set)
numerical_splits_count = [5, 5]
n = ID3(data_set, attribute_metadata, numerical_splits_count, 5)
print validation_accuracy(n,data_set)
print n.print_tree()
n = reduced_error_pruning(n,data_set,[[1, 0.11], [0, 0.42], [0, 0.86], [0, 0.55], [0, 0.66], [1, 0.01], [1, 0.11], [1, 0.84], [1, 0.42], [0, 0.51], [1, 0.4]])
print n.print_tree()
return n
示例2: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
# Your code here
print depth
Dtree = Node()
if len(data_set) == 0:
return Dtree
c = check_homogenous([[element[0]] for element in data_set])
if isinstance(c,int):
Dtree.label = c
return Dtree
elif len(data_set[0]) == 1 or depth <= 0 or [0]*(len(numerical_splits_count)-1) == numerical_splits_count[1:]:
Dtree.label = mode(data_set)
return Dtree
else:
data_set = missingValues(data_set)
best_attribute,threshold = pick_best_attribute(data_set,attribute_metadata,numerical_splits_count)
if not(best_attribute):
Dtree.label = mode(data_set)
return Dtree
Dtree.decision_attribute = best_attribute
Dtree.modeVal = mode([[element[Dtree.decision_attribute]] for element in data_set])
Dtree.name = attribute_metadata[best_attribute]['name']
if threshold:
Dtree.is_nominal = False
Dtree.splitting_value = threshold
less,greater = split_on_numerical(data_set,best_attribute,threshold)
new_nsc = numerical_splits_count
new_nsc[best_attribute] -= 1
Dtree.children = [ID3(less,attribute_metadata,new_nsc,depth-1),ID3(greater,attribute_metadata,new_nsc,depth-1)]
else:
Dtree.is_nominal = True
n_dict = split_on_nominal(data_set,best_attribute)
new_attribute_metadata = attribute_metadata
new_attribute_metadata.pop(best_attribute)
#try:
Dtree.children = [ID3(removeAttribute(value,best_attribute),new_attribute_metadata,numerical_splits_count,depth-1) for key,value in n_dict.iteritems()]
#except AttributeError:
# print n_dict
# print best_attribute
# print threshold
#raise Exception("wut")
return Dtree
pass
示例3: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
node = Node() # new node
entropy_bound = 0.15 # entropy of data_set must be below bound to become a leaf
pick_best = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count) # tuple
best_attribute = pick_best[0] # best attribute to split on
split_value = pick_best[1] # best value to split on
if entropy(data_set) < entropy_bound or depth == 0 or best_attribute == False:
node.label = mode(data_set)
return node
if split_value is not False: # if there is a split value (best attribute is numeric)
split_data = split_on_numerical(data_set, best_attribute, split_value) # splitting data by split value (lesser, greater)
node.is_nominal = False # node is numeric
node.splitting_value = split_value # best value to split on
node.children[0] = ID3(split_data[0], attribute_metadata, numerical_splits_count, depth - 1) # less than split value
node.children[1] = ID3(split_data[1], attribute_metadata, numerical_splits_count, depth - 1) # greater than split value
node.name = attribute_metadata[best_attribute]['name']
node.decision_attribute = best_attribute # best attribute to split on
else: # best_attribute is nominal
split_data = split_on_nominal(data_set, best_attribute) # returns a dictionary with nominal attributes as keys
node.is_nominal = True # node is nominal
split_data_copy = deepcopy(split_data) # deep copy split_data
### filling in missing data ###
for key in split_data_copy.keys():
if key is None:
# find most common attribute and add the missing attribute data into the most common attribute
greatest_length = -1
mode_att = None
for att, data in split_data_copy.iteritems():
if len(data) > greatest_length:
greatest_length = len(data)
mode_att = att
for data in split_data_copy[key]:
split_data_copy[mode_att].append(data) # adds all the None data into the mode attribute
split_data_copy.pop(key, None) # removes the None attribute data
# add a children for each nominal attribute
for key in split_data_copy:
node.children[key] = ID3(split_data_copy[key], attribute_metadata, numerical_splits_count, depth - 1)
node.name = attribute_metadata[best_attribute]['name']
node.decision_attribute = best_attribute
# print node.children
return node
示例4: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
preprocessing(data_set, attribute_metadata)
if check_homogenous(data_set) != None:
ans = Node()
ans.label = check_homogenous(data_set)
elif depth == 0:
ans = Node()
ans.label = mode(data_set)
else:
best = pick_best_attribute(data_set, attribute_metadata,
numerical_splits_count)
if best[0] == False:
ans = Node()
ans.label = mode(data_set)
else:
ans = Node()
ans.decision_attribute = best[0]
ans.name = attribute_metadata[best[0]]['name']
depth -= 1
if str(best[1]) == 'False':
ans.is_nominal = True
ans.children = {}
divide = split_on_nominal(data_set, best[0])
for x in divide.keys():
ans.children[x] = ID3(divide[x], attribute_metadata,
numerical_splits_count, depth)
else:
ans.is_nominal = False
ans.children = []
ans.splitting_value = best[1]
divide = split_on_numerical(data_set, best[0], best[1])
ans.children.append(ID3(divide[0], attribute_metadata,
numerical_splits_count, depth))
ans.children.append(ID3(divide[1], attribute_metadata,
numerical_splits_count, depth))
return ans
示例5: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
root = Node()
homogenous = check_homogenous(data_set)
if homogenous!= None:
root.label = homogenous
return root
if depth == 0 or len(data_set)==0 or len(attribute_metadata)<=1:
root.label = mode(data_set)
return root
best_att, best_split = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
if(numerical_splits_count[best_att]==0):
root.label = mode(data_set)
return root
if best_att == False:
root.label = mode(data_set)
return root
root.decision_attribute = best_att
root.splitting_value = best_split
root.name = attribute_metadata[best_att]['name']
root.is_nominal = attribute_metadata[best_att]['is_nominal']
if(root.is_nominal):
examples = {}
for k, val in split_on_nominal(data_set, best_att).items():
if is_missing(val, best_att):
val = replace_missing(val, best_att)
examples[k] = ID3(val, attribute_metadata, numerical_splits_count, depth-1)
root.children = examples
else:
root.children = []
examples = [0,0]
first_split, second_split = split_on_numerical(data_set, best_att, best_split)
if is_missing(first_split, best_att):
first_split= replace_missing(first_split, best_att)
if is_missing(second_split, best_att):
second_split = replace_missing(second_split, best_att)
numerical_splits_count[best_att] -= 1
examples[0] = ID3(first_split, attribute_metadata, numerical_splits_count, depth-1)
examples[1] = ID3(second_split, attribute_metadata, numerical_splits_count, depth-1)
root.children.append(examples[0])
root.children.append(examples[1])
return root
示例6: ID3_recursive
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3_recursive(data_set, attribute_metadata, numerical_splits_count, depth, attribute_modes_dict):
if depth == 0 or check_homogenous(data_set) is not None or len(attribute_metadata) == 0:
return default_node(data_set)
else:
(best_attribute, split_value) = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
if best_attribute == False:
return default_node(data_set)
node = Node()
node.decision_attribute = best_attribute
node.name = attribute_metadata[best_attribute]['name']
node.is_nominal = attribute_metadata[best_attribute]['is_nominal']
node.value = attribute_modes_dict[best_attribute]
updated_numerical_splits_count = copy.deepcopy(numerical_splits_count)
updated_numerical_splits_count[best_attribute] -= 1
if node.is_nominal:
examples = split_on_nominal(data_set, best_attribute)
for key, values in examples.items():
node.children[key] = ID3_recursive(values, attribute_metadata, updated_numerical_splits_count, depth - 1, attribute_modes_dict)
else:
node.splitting_value = split_value
(less, greater_or_equal) = split_on_numerical(data_set, best_attribute, split_value)
node.children[0] = ID3_recursive(less, attribute_metadata, updated_numerical_splits_count, depth - 1, attribute_modes_dict)
node.children[1] = ID3_recursive(greater_or_equal, attribute_metadata, updated_numerical_splits_count, depth - 1, attribute_modes_dict)
return node
示例7: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
# Your code here
n = Node()
n.mode = mode(data_set)
label = check_homogenous(data_set)
if label is not None:
n.label = label
return n
elif depth == 0:
n.label = mode(data_set)
return n
else:
best, sv = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
if not best:
n.label = mode(data_set)
return n
n.decision_attribute = best
n.splitting_value = sv
n.name = attribute_metadata[best]['name']
#numeric
if n.splitting_value:
m = split_on_numerical(data_set, best, n.splitting_value)
numerical_splits_count[best] = numerical_splits_count[best] - 1
if not m[0] or not m[1]:
n.label = mode(data_set)
else:
n_small = ID3(m[0], attribute_metadata, numerical_splits_count, depth-1)
n_big = ID3(m[1], attribute_metadata, numerical_splits_count, depth-1)
n.children = [n_small, n_big]
#nominal
else:
n.is_nominal = True
m = split_on_nominal(data_set, best)
for k,v in m.items():
if m[k]:
n_curr = ID3(m[k], attribute_metadata, numerical_splits_count, depth-1)
if n_curr.decision_attribute != n.decision_attribute:
n.children[k] = n_curr
return n
示例8: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
preprocessing(data_set, attribute_metadata)
if check_homogenous(data_set) != None:
root = Node()
root.label = check_homogenous(data_set)
else:
if depth == 0:
root = Node()
root.label = mode(data_set)
else:
best = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
if best[0] == False:
root = Node()
root.label = mode(data_set)
else:
root = Node()
root.decision_attribute = best[0]
root.name = attribute_metadata[best[0]]['name']
depth -= 1
if str(best[1]) == 'False':
root.is_nominal = True
root.children = {}
subsets = split_on_nominal(data_set, best[0])
for splitval in subsets.keys():
root.children[splitval] = ID3(subsets[splitval], attribute_metadata, numerical_splits_count, depth)
else:
root.is_nominal = False
root.children = []
root.splitting_value = best[1]
subsets = split_on_numerical(data_set, best[0], best[1])
#numerical_splits_count[best[0]] -= 1
print numerical_splits_count
print depth
root.children.append(ID3(subsets[0], attribute_metadata, numerical_splits_count, depth))
root.children.append(ID3(subsets[1], attribute_metadata, numerical_splits_count, depth))
return root
示例9: helper
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def helper(data_set, attribute_metadata, numerical_splits_count, depth):
root = Node()
root.name = 'default'
if len(data_set) == 0 :
return root
else :
if check_homogenous(data_set) != None :
root.label = check_homogenous(data_set)
return root
else :
if len(attribute_metadata) == 1 or depth == 0 :
root.label = mode(data_set)
return root
else :
best_attribute = pick_best_attribute(data_set, attribute_metadata , numerical_splits_count)
if best_attribute[0] == False :
root.label = mode(data_set)
return root
else :
root.name = attribute_metadata[best_attribute[0]]['name']
root.decision_attribute = best_attribute[0]
if best_attribute[1] == False : # dictionary
root.is_nominal = None
temp_dict = split_on_nominal(data_set,best_attribute[0])
depth -= 1
for key in temp_dict.keys():
root.children[key] = helper(temp_dict[key],attribute_metadata,numerical_splits_count,depth)
else :
numerical_splits_count[best_attribute[0]] -= 1
root.is_nominal = best_attribute[1]
root.splitting_value = best_attribute[1]
temp_tuple = split_on_numerical(data_set,best_attribute[0] , best_attribute[1])
depth -= 1
root.children[0] = (helper(temp_tuple[0] ,attribute_metadata,numerical_splits_count,depth))
root.children[1] = (helper(temp_tuple[1] ,attribute_metadata,numerical_splits_count,depth))
return root
示例10: test_breadth_first_search
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def test_breadth_first_search():
n0 = Node()
n0.label = 1
n1 = Node()
n1.label = 0
n = Node()
n.label = None
n.decision_attribute = 1
n.is_nominal = True
n.name = "whatever"
n.children = {1: n0, 2: n1}
print n.print_tree()
print breadth_first_search(n)
return n
示例11: ID3_helper
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3_helper(data_set, attribute_metadata, numerical_splits_count, depth, nominal_keys):
att = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
print "before"
# print attribute_metadata
# print numerical_splits_count
print att
print "after"
if depth == 0 or att[0] == False: #depth or gain ratio is 0
d = Node()
default = mode(data_set)
d.label = default
return d
elif check_homogenous(data_set) is not None:
d = Node()
d.label = check_homogenous(data_set)
return d
else: #how to recursion
root = Node()
# att = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
# if att[0] != False:
root.label = None
root.decision_attribute = att[0]
root.name = attribute_metadata[att[0]].get('name')
root.is_nominal = attribute_metadata[att[0]].get('is_nominal')
if root.is_nominal == False:
numerical_splits_count[att[0]] -= 1
root.splitting_value = att[1]
root.children = []
left_dataset = []
right_dataset = []
for i in xrange(len(data_set)):
if data_set[i][att[0]] < att[1]:
left_dataset.append(data_set[i])
else:
right_dataset.append(data_set[i])
depth = depth - 1
root.children.append(ID3_helper(left_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys))
root.children.append(ID3_helper(right_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys))
else:
root.children = {}
for key in nominal_keys[att[0]]:
chile_dataset = []
for i in xrange(len(data_set)):
if data_set[i][att[0]] == key:
chile_dataset.append(data_set[i])
child = ID3_helper(chile_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys)
root.children.update({key: child})
return root
示例12: copy_node
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def copy_node(node):
new_node = Node()
new_node.label = node.label
new_node.decision_attribute = node.decision_attribute
new_node.is_nominal = node.is_nominal
new_node.value = node.value
new_node.splitting_value = node.splitting_value
if node.is_nominal:
new_node.children = {}
for key in node.children:
new_node.children[key] = copy_node(node.children[key])
else:
new_node.children = []
for i in range(len(node.children)):
new_node.children.append(copy_node(node.children[i]))
new_node.name = node.name
return new_node
示例13: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
data_set = copy.deepcopy(data_set)
num_splits = copy.deepcopy(numerical_splits_count)
node = Node()
Default = 0
best_value = []
if not data_set:
return "Empty_data"
elif depth == 0:
node.label = mode(data_set)
return node
elif check_homogenous(data_set) != None:
node.label = check_homogenous(data_set)
return node
elif len(data_set[0]) == 1:
node.label = mode(data_set)
return node
elif len(attribute_metadata) == 0:
node.label = mode(data_set)
return node
else:
node.label = None
#print num_splits
(best_attrnumber,split_value) = pick_best_attribute(data_set, attribute_metadata,num_splits)
node.decision_attribute = best_attrnumber
node.splitting_value = split_value
node.name = attribute_metadata[best_attrnumber].values()[1]
#print node.decision_attribute
if attribute_metadata[best_attrnumber].values()[0]:
node.is_nominal = True
node.splitting_value = None
examples = split_on_nominal(data_set, best_attrnumber)
else:
node.is_nominal = False
node.splitting_value = split_value
examples = split_on_numerical(data_set, best_attrnumber,split_value)
num_splits[best_attrnumber] = num_splits[best_attrnumber] - 1
if num_splits[best_attrnumber] == 0:
del attribute_metadata[best_attrnumber]
if node.is_nominal == True:
for v in examples.keys():
if num_splits[best_attrnumber] <= 0:
for i in range(len(examples[v])):
del examples[v][i][best_attrnumber]
node.children[v] = ID3(examples[v], attribute_metadata, num_splits, depth-1)
return node
else:
for i in range(len(examples)):
if num_splits[best_attrnumber] <= 0:
for j in range(len(examples[i])):
del examples[i][j][best_attrnumber]
node.children[i] = ID3(examples[i], attribute_metadata, num_splits, depth-1)
return node
示例14: collect_node_leaf
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
collect_node_leaf(root.children[child], nodes, leaves)
a0 = Node()
b0 = Node()
b1 = Node()
c0 = Node()
c1 = Node()
c2 = Node()
c3 = Node()
c0.label = 0
c1.label = 1
c2.label = 0
c3.label = 1
a0.name = "weather"
a0.is_nominal = True
a0.label = None
b0.name = "#injury"
b0.label = None
b0.is_nominal = False
b0.splitting_value = 50
b0.children = [c0, c1]
b1.name = "#audience"
b1.label = None
b1.is_nominal = False
b1.splitting_value = 20.9
b1.children = [c0, c1]
a0.children = {1: b0, -1: b1}
nodes = []
leaves = []
示例15: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
# Your code here
# decision tree to be returned
node = Node()
# base case
theta = 0.0 # threshold of entropy
if not data_set:
node.label = '?'
return node
elif depth == 0:
node.label = mode(data_set)
return node
elif check_homogenous(data_set):
node.label = data_set[0][0]
return node
# no attributes to split
elif numerical_splits_count[1:] == [0] * (len(numerical_splits_count) - 1):
node.label = mode(data_set)
return node
elif entropy(data_set) == theta:
node.label = mode(data_set)
return node
# split on best attribute
splitting_attr, splitting_value = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
# avoid pass by reference error
numerical_splits_count = list(numerical_splits_count)
numerical_splits_count[splitting_attr] -= 1
# describe the node
node.decision_attribute = splitting_attr
node.is_nominal = attribute_metadata[splitting_attr]['is_nominal']
node.splitting_value = splitting_value
node.name = attribute_metadata[splitting_attr]['name']
node.value = mode(data_set) # value store mode of non-leaf node
# if is nominal
if node.is_nominal:
# put data in data_set into different branches
branches = {}
for data in data_set:
if data[splitting_attr] not in branches:
branches[data[splitting_attr]] = []
branches[data[splitting_attr]].append(data)
for attr, sub_data_set in branches.items():
node.children[attr] = ID3(sub_data_set, attribute_metadata, numerical_splits_count, depth - 1)
# else is numeric
else:
left_sub_data_set = []
right_sub_data_set = []
for data in data_set:
if data[splitting_attr] < splitting_value:
left_sub_data_set.append(data)
else:
right_sub_data_set.append(data)
node.children = []
node.children.append(ID3(left_sub_data_set, attribute_metadata, numerical_splits_count, depth - 1))
if node.children[0].label == '?':
node.children[0].label = mode(data_set)
node.children.append(ID3(right_sub_data_set, attribute_metadata, numerical_splits_count, depth - 1))
if node.children[1].label == '?':
node.children[1].label = mode(data_set)
# return the generated tree
return node