本文整理汇总了Python中node.Node.splitting_value方法的典型用法代码示例。如果您正苦于以下问题:Python Node.splitting_value方法的具体用法?Python Node.splitting_value怎么用?Python Node.splitting_value使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类node.Node
的用法示例。
在下文中一共展示了Node.splitting_value方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
root = Node()
homogenous = check_homogenous(data_set)
if homogenous!= None:
root.label = homogenous
return root
if depth == 0 or len(data_set)==0 or len(attribute_metadata)<=1:
root.label = mode(data_set)
return root
best_att, best_split = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
if(numerical_splits_count[best_att]==0):
root.label = mode(data_set)
return root
if best_att == False:
root.label = mode(data_set)
return root
root.decision_attribute = best_att
root.splitting_value = best_split
root.name = attribute_metadata[best_att]['name']
root.is_nominal = attribute_metadata[best_att]['is_nominal']
if(root.is_nominal):
examples = {}
for k, val in split_on_nominal(data_set, best_att).items():
if is_missing(val, best_att):
val = replace_missing(val, best_att)
examples[k] = ID3(val, attribute_metadata, numerical_splits_count, depth-1)
root.children = examples
else:
root.children = []
examples = [0,0]
first_split, second_split = split_on_numerical(data_set, best_att, best_split)
if is_missing(first_split, best_att):
first_split= replace_missing(first_split, best_att)
if is_missing(second_split, best_att):
second_split = replace_missing(second_split, best_att)
numerical_splits_count[best_att] -= 1
examples[0] = ID3(first_split, attribute_metadata, numerical_splits_count, depth-1)
examples[1] = ID3(second_split, attribute_metadata, numerical_splits_count, depth-1)
root.children.append(examples[0])
root.children.append(examples[1])
return root
示例2: ID3_recursive
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3_recursive(data_set, attribute_metadata, numerical_splits_count, depth, attribute_modes_dict):
if depth == 0 or check_homogenous(data_set) is not None or len(attribute_metadata) == 0:
return default_node(data_set)
else:
(best_attribute, split_value) = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
if best_attribute == False:
return default_node(data_set)
node = Node()
node.decision_attribute = best_attribute
node.name = attribute_metadata[best_attribute]['name']
node.is_nominal = attribute_metadata[best_attribute]['is_nominal']
node.value = attribute_modes_dict[best_attribute]
updated_numerical_splits_count = copy.deepcopy(numerical_splits_count)
updated_numerical_splits_count[best_attribute] -= 1
if node.is_nominal:
examples = split_on_nominal(data_set, best_attribute)
for key, values in examples.items():
node.children[key] = ID3_recursive(values, attribute_metadata, updated_numerical_splits_count, depth - 1, attribute_modes_dict)
else:
node.splitting_value = split_value
(less, greater_or_equal) = split_on_numerical(data_set, best_attribute, split_value)
node.children[0] = ID3_recursive(less, attribute_metadata, updated_numerical_splits_count, depth - 1, attribute_modes_dict)
node.children[1] = ID3_recursive(greater_or_equal, attribute_metadata, updated_numerical_splits_count, depth - 1, attribute_modes_dict)
return node
示例3: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
# Your code here
n = Node()
n.mode = mode(data_set)
label = check_homogenous(data_set)
if label is not None:
n.label = label
return n
elif depth == 0:
n.label = mode(data_set)
return n
else:
best, sv = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
if not best:
n.label = mode(data_set)
return n
n.decision_attribute = best
n.splitting_value = sv
n.name = attribute_metadata[best]['name']
#numeric
if n.splitting_value:
m = split_on_numerical(data_set, best, n.splitting_value)
numerical_splits_count[best] = numerical_splits_count[best] - 1
if not m[0] or not m[1]:
n.label = mode(data_set)
else:
n_small = ID3(m[0], attribute_metadata, numerical_splits_count, depth-1)
n_big = ID3(m[1], attribute_metadata, numerical_splits_count, depth-1)
n.children = [n_small, n_big]
#nominal
else:
n.is_nominal = True
m = split_on_nominal(data_set, best)
for k,v in m.items():
if m[k]:
n_curr = ID3(m[k], attribute_metadata, numerical_splits_count, depth-1)
if n_curr.decision_attribute != n.decision_attribute:
n.children[k] = n_curr
return n
示例4: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
# Your code here
print depth
Dtree = Node()
if len(data_set) == 0:
return Dtree
c = check_homogenous([[element[0]] for element in data_set])
if isinstance(c,int):
Dtree.label = c
return Dtree
elif len(data_set[0]) == 1 or depth <= 0 or [0]*(len(numerical_splits_count)-1) == numerical_splits_count[1:]:
Dtree.label = mode(data_set)
return Dtree
else:
data_set = missingValues(data_set)
best_attribute,threshold = pick_best_attribute(data_set,attribute_metadata,numerical_splits_count)
if not(best_attribute):
Dtree.label = mode(data_set)
return Dtree
Dtree.decision_attribute = best_attribute
Dtree.modeVal = mode([[element[Dtree.decision_attribute]] for element in data_set])
Dtree.name = attribute_metadata[best_attribute]['name']
if threshold:
Dtree.is_nominal = False
Dtree.splitting_value = threshold
less,greater = split_on_numerical(data_set,best_attribute,threshold)
new_nsc = numerical_splits_count
new_nsc[best_attribute] -= 1
Dtree.children = [ID3(less,attribute_metadata,new_nsc,depth-1),ID3(greater,attribute_metadata,new_nsc,depth-1)]
else:
Dtree.is_nominal = True
n_dict = split_on_nominal(data_set,best_attribute)
new_attribute_metadata = attribute_metadata
new_attribute_metadata.pop(best_attribute)
#try:
Dtree.children = [ID3(removeAttribute(value,best_attribute),new_attribute_metadata,numerical_splits_count,depth-1) for key,value in n_dict.iteritems()]
#except AttributeError:
# print n_dict
# print best_attribute
# print threshold
#raise Exception("wut")
return Dtree
pass
示例5: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
node = Node() # new node
entropy_bound = 0.15 # entropy of data_set must be below bound to become a leaf
pick_best = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count) # tuple
best_attribute = pick_best[0] # best attribute to split on
split_value = pick_best[1] # best value to split on
if entropy(data_set) < entropy_bound or depth == 0 or best_attribute == False:
node.label = mode(data_set)
return node
if split_value is not False: # if there is a split value (best attribute is numeric)
split_data = split_on_numerical(data_set, best_attribute, split_value) # splitting data by split value (lesser, greater)
node.is_nominal = False # node is numeric
node.splitting_value = split_value # best value to split on
node.children[0] = ID3(split_data[0], attribute_metadata, numerical_splits_count, depth - 1) # less than split value
node.children[1] = ID3(split_data[1], attribute_metadata, numerical_splits_count, depth - 1) # greater than split value
node.name = attribute_metadata[best_attribute]['name']
node.decision_attribute = best_attribute # best attribute to split on
else: # best_attribute is nominal
split_data = split_on_nominal(data_set, best_attribute) # returns a dictionary with nominal attributes as keys
node.is_nominal = True # node is nominal
split_data_copy = deepcopy(split_data) # deep copy split_data
### filling in missing data ###
for key in split_data_copy.keys():
if key is None:
# find most common attribute and add the missing attribute data into the most common attribute
greatest_length = -1
mode_att = None
for att, data in split_data_copy.iteritems():
if len(data) > greatest_length:
greatest_length = len(data)
mode_att = att
for data in split_data_copy[key]:
split_data_copy[mode_att].append(data) # adds all the None data into the mode attribute
split_data_copy.pop(key, None) # removes the None attribute data
# add a children for each nominal attribute
for key in split_data_copy:
node.children[key] = ID3(split_data_copy[key], attribute_metadata, numerical_splits_count, depth - 1)
node.name = attribute_metadata[best_attribute]['name']
node.decision_attribute = best_attribute
# print node.children
return node
示例6: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
preprocessing(data_set, attribute_metadata)
if check_homogenous(data_set) != None:
ans = Node()
ans.label = check_homogenous(data_set)
elif depth == 0:
ans = Node()
ans.label = mode(data_set)
else:
best = pick_best_attribute(data_set, attribute_metadata,
numerical_splits_count)
if best[0] == False:
ans = Node()
ans.label = mode(data_set)
else:
ans = Node()
ans.decision_attribute = best[0]
ans.name = attribute_metadata[best[0]]['name']
depth -= 1
if str(best[1]) == 'False':
ans.is_nominal = True
ans.children = {}
divide = split_on_nominal(data_set, best[0])
for x in divide.keys():
ans.children[x] = ID3(divide[x], attribute_metadata,
numerical_splits_count, depth)
else:
ans.is_nominal = False
ans.children = []
ans.splitting_value = best[1]
divide = split_on_numerical(data_set, best[0], best[1])
ans.children.append(ID3(divide[0], attribute_metadata,
numerical_splits_count, depth))
ans.children.append(ID3(divide[1], attribute_metadata,
numerical_splits_count, depth))
return ans
示例7: ID3_helper
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3_helper(data_set, attribute_metadata, numerical_splits_count, depth, nominal_keys):
att = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
print "before"
# print attribute_metadata
# print numerical_splits_count
print att
print "after"
if depth == 0 or att[0] == False: #depth or gain ratio is 0
d = Node()
default = mode(data_set)
d.label = default
return d
elif check_homogenous(data_set) is not None:
d = Node()
d.label = check_homogenous(data_set)
return d
else: #how to recursion
root = Node()
# att = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
# if att[0] != False:
root.label = None
root.decision_attribute = att[0]
root.name = attribute_metadata[att[0]].get('name')
root.is_nominal = attribute_metadata[att[0]].get('is_nominal')
if root.is_nominal == False:
numerical_splits_count[att[0]] -= 1
root.splitting_value = att[1]
root.children = []
left_dataset = []
right_dataset = []
for i in xrange(len(data_set)):
if data_set[i][att[0]] < att[1]:
left_dataset.append(data_set[i])
else:
right_dataset.append(data_set[i])
depth = depth - 1
root.children.append(ID3_helper(left_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys))
root.children.append(ID3_helper(right_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys))
else:
root.children = {}
for key in nominal_keys[att[0]]:
chile_dataset = []
for i in xrange(len(data_set)):
if data_set[i][att[0]] == key:
chile_dataset.append(data_set[i])
child = ID3_helper(chile_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys)
root.children.update({key: child})
return root
示例8: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
preprocessing(data_set, attribute_metadata)
if check_homogenous(data_set) != None:
root = Node()
root.label = check_homogenous(data_set)
else:
if depth == 0:
root = Node()
root.label = mode(data_set)
else:
best = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
if best[0] == False:
root = Node()
root.label = mode(data_set)
else:
root = Node()
root.decision_attribute = best[0]
root.name = attribute_metadata[best[0]]['name']
depth -= 1
if str(best[1]) == 'False':
root.is_nominal = True
root.children = {}
subsets = split_on_nominal(data_set, best[0])
for splitval in subsets.keys():
root.children[splitval] = ID3(subsets[splitval], attribute_metadata, numerical_splits_count, depth)
else:
root.is_nominal = False
root.children = []
root.splitting_value = best[1]
subsets = split_on_numerical(data_set, best[0], best[1])
#numerical_splits_count[best[0]] -= 1
print numerical_splits_count
print depth
root.children.append(ID3(subsets[0], attribute_metadata, numerical_splits_count, depth))
root.children.append(ID3(subsets[1], attribute_metadata, numerical_splits_count, depth))
return root
示例9: copy_node
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def copy_node(node):
new_node = Node()
new_node.label = node.label
new_node.decision_attribute = node.decision_attribute
new_node.is_nominal = node.is_nominal
new_node.value = node.value
new_node.splitting_value = node.splitting_value
if node.is_nominal:
new_node.children = {}
for key in node.children:
new_node.children[key] = copy_node(node.children[key])
else:
new_node.children = []
for i in range(len(node.children)):
new_node.children.append(copy_node(node.children[i]))
new_node.name = node.name
return new_node
示例10: helper
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def helper(data_set, attribute_metadata, numerical_splits_count, depth):
root = Node()
root.name = 'default'
if len(data_set) == 0 :
return root
else :
if check_homogenous(data_set) != None :
root.label = check_homogenous(data_set)
return root
else :
if len(attribute_metadata) == 1 or depth == 0 :
root.label = mode(data_set)
return root
else :
best_attribute = pick_best_attribute(data_set, attribute_metadata , numerical_splits_count)
if best_attribute[0] == False :
root.label = mode(data_set)
return root
else :
root.name = attribute_metadata[best_attribute[0]]['name']
root.decision_attribute = best_attribute[0]
if best_attribute[1] == False : # dictionary
root.is_nominal = None
temp_dict = split_on_nominal(data_set,best_attribute[0])
depth -= 1
for key in temp_dict.keys():
root.children[key] = helper(temp_dict[key],attribute_metadata,numerical_splits_count,depth)
else :
numerical_splits_count[best_attribute[0]] -= 1
root.is_nominal = best_attribute[1]
root.splitting_value = best_attribute[1]
temp_tuple = split_on_numerical(data_set,best_attribute[0] , best_attribute[1])
depth -= 1
root.children[0] = (helper(temp_tuple[0] ,attribute_metadata,numerical_splits_count,depth))
root.children[1] = (helper(temp_tuple[1] ,attribute_metadata,numerical_splits_count,depth))
return root
示例11: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
# Your code here
# decision tree to be returned
node = Node()
# base case
theta = 0.0 # threshold of entropy
if not data_set:
node.label = '?'
return node
elif depth == 0:
node.label = mode(data_set)
return node
elif check_homogenous(data_set):
node.label = data_set[0][0]
return node
# no attributes to split
elif numerical_splits_count[1:] == [0] * (len(numerical_splits_count) - 1):
node.label = mode(data_set)
return node
elif entropy(data_set) == theta:
node.label = mode(data_set)
return node
# split on best attribute
splitting_attr, splitting_value = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
# avoid pass by reference error
numerical_splits_count = list(numerical_splits_count)
numerical_splits_count[splitting_attr] -= 1
# describe the node
node.decision_attribute = splitting_attr
node.is_nominal = attribute_metadata[splitting_attr]['is_nominal']
node.splitting_value = splitting_value
node.name = attribute_metadata[splitting_attr]['name']
node.value = mode(data_set) # value store mode of non-leaf node
# if is nominal
if node.is_nominal:
# put data in data_set into different branches
branches = {}
for data in data_set:
if data[splitting_attr] not in branches:
branches[data[splitting_attr]] = []
branches[data[splitting_attr]].append(data)
for attr, sub_data_set in branches.items():
node.children[attr] = ID3(sub_data_set, attribute_metadata, numerical_splits_count, depth - 1)
# else is numeric
else:
left_sub_data_set = []
right_sub_data_set = []
for data in data_set:
if data[splitting_attr] < splitting_value:
left_sub_data_set.append(data)
else:
right_sub_data_set.append(data)
node.children = []
node.children.append(ID3(left_sub_data_set, attribute_metadata, numerical_splits_count, depth - 1))
if node.children[0].label == '?':
node.children[0].label = mode(data_set)
node.children.append(ID3(right_sub_data_set, attribute_metadata, numerical_splits_count, depth - 1))
if node.children[1].label == '?':
node.children[1].label = mode(data_set)
# return the generated tree
return node
示例12: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
# Your code here
root = Node()
#print 'depth =', depth
#if depth == 0: #Depth check
# root.label = mode(data_set)
#else:
# root.label = check_homogenous(data_set)
root.label = mode(data_set)
homogeneous = check_homogenous(data_set)
#print 'label=', root.label
if homogeneous != None or depth == 0: #If data set isn't homogeneous or max depth
return root # Finished with this branch
else:
best_att = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
#print 'best_att=', best_att
#print 'data_set=', data_set
if best_att == (False, False): #Nathan: Exception here since (False, False) can be interpreted as (0, False) and ID3 tries to split on the class
#root.label = mode(data_set)
#print 'False, False -> label=', root.label
return root
else:
root.decision_attribute = best_att[0]
root.is_nominal = attribute_metadata[best_att[0]]['is_nominal']
root.splitting_value = best_att[1]
#outcomes = [] # this is the classes in the data_set - #Nathan: moved all this to check_homogeneous
#for i in range(0, len(data_set)):
# outcomes.append([data_set[i][0]])
#done = check_homogenous(outcomes)
#root.label = done
root.name = attribute_metadata[best_att[0]]['name']
child_numerical_splits_count = numerical_splits_count
### this is not correct
# root.children should not have subset datasets in values for each attribute thing
if root.is_nominal == True:
root.children = {}
data = split_on_nominal(data_set, root.decision_attribute)
sub_depth = depth - 1
for i in data.keys():
new_node = ID3(data[i], attribute_metadata, child_numerical_splits_count, sub_depth)
#print sub_depth
#print new_node, 'nom'
#print [new_node.classify(x) == x[0] for x in data_set]
root.children[i] = new_node
#root.children = split_on_nominal(data_set, root.decision_attribute)
elif root.is_nominal == False:
root.children = []
data = split_on_numerical(data_set, root.decision_attribute, root.splitting_value)
child_numerical_splits_count[root.decision_attribute] = child_numerical_splits_count[root.decision_attribute]-1
sub_depth = depth - 1
for i in range(len(data)):
new_node = ID3(data[i], attribute_metadata, child_numerical_splits_count, sub_depth)
#print sub_depth
#print new_node, 'num'
root.children.append(new_node)
else:
print 'Troubles brewing'
return root
# best_feature_values = {s.sample[best_feature]
# for s in training_samples}
# for value in best_feature_values:
# samples = [s for s in training_samples
# if s.sample[best_feature] == value]
# # Recursively, create a child node.
# root.children = create_decision_tree(samples,
# predicting_features)
# root_node[value] = child
# return root_node
#while tree.label == None:
# GenerateTree(X)
# If NodeEntropy(X) < ThresholdI **entropy equation 9.3 <---- function below
## threshold = 0.001
# Create leaf labelled by majority class in X
## mode function
# Return
#pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
#.........这里部分代码省略.........
示例13: ID3_helper
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3_helper(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
# '''
leaf = Node()
threshold =0.1
if(len(data_set) == 0 or depth == 0 or check_homogenous(data_set) != None or entropy(data_set)<threshold or len(attribute_metadata) == 0):
#if examples empty return default default(mode)
#or if all examples have same classification return that classification (mode of the dataset is the same as the homogenous classification)
#or if depth has reached its limit
# attributes is empty return mode
leaf.label = mode(data_set)
leaf.decision_attribute = None
leaf.is_nominal = None
leaf.value = mode(data_set)
leaf.splitting_value = None
leaf.name = None
return leaf
else:
best_attribute,splitting_value = pick_best_attribute(data_set,attribute_metadata,numerical_splits_count)
#tree<- a new decision tree with root best
leaf.label = None
leaf.decision_attribute = best_attribute
leaf.name = attribute_metadata[best_attribute]['name']
# attribute_metadata.pop(best_attribute) #remove best attribute from list of attributes
numerical_splits_count[best_attribute] -= 1 #lower numerical splits of this attribute by 1
#case of zero information gain on all possible splitting attributes
if(best_attribute == False):
leaf.label = mode(data_set)
leaf.decision_attribute= None
leaf.name = None
leaf.splitting_value = None
leaf.value = None
return leaf
elif(splitting_value == False): #case of nominal attribute
leaf.is_nominal = True
examples = split_on_nominal(data_set,best_attribute)
leaf.splitting_value = splitting_value
# dictionary (key=attribute value, value=node)
dictionary = {}
for value, data in examples.iteritems():
dictionary[value]= ID3_helper(data,attribute_metadata,numerical_splits_count,depth-1)
leaf.children = dictionary
return leaf
# recursive call to ID3
else: #case of numeric
examples = split_on_numerical(data_set,best_attribute,splitting_value)
leaf.is_nominal = False
leaf.splitting_value=splitting_value
#list of 2 nodes
leaf.children = [
ID3_helper(examples[0],attribute_metadata,numerical_splits_count,depth-1),
ID3_helper(examples[1],attribute_metadata,numerical_splits_count,depth-1)
]
return leaf
return leaf
示例14: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
if not data_set:
return Node()
elif check_homogenous(data_set) != None:
n = Node()
n.label = check_homogenous(data_set)
return n
elif not attribute_metadata:
n = Node()
n.label = mode(data_set)
return n
elif depth == 0:
n = Node()
n.label = mode(data_set)
return n
else:
best, split_value = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
#Find mode of best attribute column
best_data = []
for sublist in data_set:
if sublist[best] != None:
best_data.append(sublist[best])
best_mode = max(set(best_data), key=best_data.count)
#Replace missing values of best attribute column with mode
data_copy = copy.deepcopy(data_set)
for row in data_copy:
if row[best] == None:
row[best] = best_mode
if attribute_metadata[best]['is_nominal'] == False:
numerical_splits_count[best] -= 1
if best == False:
n = Node()
n.label = mode(data_set)
return n
tree = Node() #the root
tree.is_nominal = attribute_metadata[best]['is_nominal']
tree.decision_attribute = best
tree.splitting_value = split_value
tree.name = attribute_metadata[best]['name']
tree.value = best_mode
data_sub = []
#if a nominal attribute
if attribute_metadata[best]['is_nominal'] == True:
best_attributes_dict = split_on_nominal(data_copy, best)
for v in best_attributes_dict:
subtree = ID3(best_attributes_dict[v], attribute_metadata, numerical_splits_count, depth - 1)
tree.children[v] = subtree #adding branch to the tree
#if numerical attribute
else:
splits = split_on_numerical(data_copy, best, split_value)
for v in splits:
subtree = ID3(v, attribute_metadata, numerical_splits_count, depth - 1)
tree.children[splits.index(v)] = subtree #adding branch to the tree
return tree
示例15: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
# need to keep track of numerical_splits_count
# decrease corresponding numerical_splits_count entry each time a numeric entry is split on
if data_set == None:
return None
if len(data_set) == 0:
return None
n = Node()
homogenous_value = check_homogenous(data_set)
if homogenous_value != None:
n.label = homogenous_value
return n
if depth == 0:
n.label = mode(data_set)
return n
(best_i, split_value) = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
if best_i == False:
n.label = mode(data_set)
return n
n.label = None
# setting decision attribute to index of the attribute with the highest gain ratio
n.decision_attribute = best_i
n.name = attribute_metadata[best_i]['name']
# setting splitting_value to the split_value from pick_best_attribute
n.splitting_value = split_value
# if split_value is not false then we are dealing with a numeric
if split_value != False:
n.is_nominal = False
left_data, right_data = split_on_numerical (data_set, best_i, split_value)
numerical_splits_count[best_i] -= 1
left_node = ID3(left_data, attribute_metadata, numerical_splits_count, depth-1)
right_node = ID3(right_data, attribute_metadata, numerical_splits_count, depth-1)
n.children = []
n.children.append(left_node)
n.children.append(right_node)
return n
else:
n.is_nominal = True
kid_set = split_on_nominal(data_set, best_i)
n.children = {}
for key, val in kid_set.iteritems():
newNode = ID3(val, attribute_metadata, numerical_splits_count, depth - 1)
if newNode != None:
n.children[key] = newNode
return n