本文整理汇总了Python中node.Node.label方法的典型用法代码示例。如果您正苦于以下问题:Python Node.label方法的具体用法?Python Node.label怎么用?Python Node.label使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类node.Node
的用法示例。
在下文中一共展示了Node.label方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
root = Node()
homogenous = check_homogenous(data_set)
if homogenous!= None:
root.label = homogenous
return root
if depth == 0 or len(data_set)==0 or len(attribute_metadata)<=1:
root.label = mode(data_set)
return root
best_att, best_split = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
if(numerical_splits_count[best_att]==0):
root.label = mode(data_set)
return root
if best_att == False:
root.label = mode(data_set)
return root
root.decision_attribute = best_att
root.splitting_value = best_split
root.name = attribute_metadata[best_att]['name']
root.is_nominal = attribute_metadata[best_att]['is_nominal']
if(root.is_nominal):
examples = {}
for k, val in split_on_nominal(data_set, best_att).items():
if is_missing(val, best_att):
val = replace_missing(val, best_att)
examples[k] = ID3(val, attribute_metadata, numerical_splits_count, depth-1)
root.children = examples
else:
root.children = []
examples = [0,0]
first_split, second_split = split_on_numerical(data_set, best_att, best_split)
if is_missing(first_split, best_att):
first_split= replace_missing(first_split, best_att)
if is_missing(second_split, best_att):
second_split = replace_missing(second_split, best_att)
numerical_splits_count[best_att] -= 1
examples[0] = ID3(first_split, attribute_metadata, numerical_splits_count, depth-1)
examples[1] = ID3(second_split, attribute_metadata, numerical_splits_count, depth-1)
root.children.append(examples[0])
root.children.append(examples[1])
return root
示例2: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
# Your code here
n = Node()
if (n.label != None):
return n
elif (depth == 0):
n.label = mode(data_set)
return n
elif (not attribute_metadata):
n.label = mode(data_set)
return n
elif check_homogenous(data_set):
n.label = check_homogenous(data_set)
return n
else:
attr_pos, attr_val = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
n.decision_attribute = attr_pos
n.name = attribute_metadata[attr_pos]['name']
depth -=1
return make_children(n, data_set, attr_pos, attr_val, attribute_metadata, numerical_splits_count, depth)
示例3: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
# Your code here
n = Node()
n.mode = mode(data_set)
label = check_homogenous(data_set)
if label is not None:
n.label = label
return n
elif depth == 0:
n.label = mode(data_set)
return n
else:
best, sv = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
if not best:
n.label = mode(data_set)
return n
n.decision_attribute = best
n.splitting_value = sv
n.name = attribute_metadata[best]['name']
#numeric
if n.splitting_value:
m = split_on_numerical(data_set, best, n.splitting_value)
numerical_splits_count[best] = numerical_splits_count[best] - 1
if not m[0] or not m[1]:
n.label = mode(data_set)
else:
n_small = ID3(m[0], attribute_metadata, numerical_splits_count, depth-1)
n_big = ID3(m[1], attribute_metadata, numerical_splits_count, depth-1)
n.children = [n_small, n_big]
#nominal
else:
n.is_nominal = True
m = split_on_nominal(data_set, best)
for k,v in m.items():
if m[k]:
n_curr = ID3(m[k], attribute_metadata, numerical_splits_count, depth-1)
if n_curr.decision_attribute != n.decision_attribute:
n.children[k] = n_curr
return n
示例4: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
# Your code here
print depth
Dtree = Node()
if len(data_set) == 0:
return Dtree
c = check_homogenous([[element[0]] for element in data_set])
if isinstance(c,int):
Dtree.label = c
return Dtree
elif len(data_set[0]) == 1 or depth <= 0 or [0]*(len(numerical_splits_count)-1) == numerical_splits_count[1:]:
Dtree.label = mode(data_set)
return Dtree
else:
data_set = missingValues(data_set)
best_attribute,threshold = pick_best_attribute(data_set,attribute_metadata,numerical_splits_count)
if not(best_attribute):
Dtree.label = mode(data_set)
return Dtree
Dtree.decision_attribute = best_attribute
Dtree.modeVal = mode([[element[Dtree.decision_attribute]] for element in data_set])
Dtree.name = attribute_metadata[best_attribute]['name']
if threshold:
Dtree.is_nominal = False
Dtree.splitting_value = threshold
less,greater = split_on_numerical(data_set,best_attribute,threshold)
new_nsc = numerical_splits_count
new_nsc[best_attribute] -= 1
Dtree.children = [ID3(less,attribute_metadata,new_nsc,depth-1),ID3(greater,attribute_metadata,new_nsc,depth-1)]
else:
Dtree.is_nominal = True
n_dict = split_on_nominal(data_set,best_attribute)
new_attribute_metadata = attribute_metadata
new_attribute_metadata.pop(best_attribute)
#try:
Dtree.children = [ID3(removeAttribute(value,best_attribute),new_attribute_metadata,numerical_splits_count,depth-1) for key,value in n_dict.iteritems()]
#except AttributeError:
# print n_dict
# print best_attribute
# print threshold
#raise Exception("wut")
return Dtree
pass
示例5: more_tests
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def more_tests():
n2 = Node()
n2.name = "attrib2"
n2.label = 1
n3 = Node()
n3.name = "attrib3"
n3.label = 0
n0 = Node()
n0.name = "attrib0"
n0.is_nominal = True
n0.children = {1: n2, 2: n3}
n4 = Node()
n4.name = "attrib4"
n4.label = 2
n5 = Node()
n5.name = "attrib5"
n5.label = 3
n1 = Node()
n1.name = "attrib1"
n1.is_nominal = True
n1.children = {1: n4, 2: n5}
n = Node()
n.label = None
n.decision_attribute = 1
n.is_nominal = True
n.name = "attrib"
n.children = {1: n0, 2: n1}
print n.print_dnf_tree()
print n.print_tree()
print breadth_first_search(n, [n])
attribute_metadata = [{'name': "winner",'is_nominal': True},{'name': "opprundifferential",'is_nominal': False}]
data_set = [[1, 0.27], [0, 0.42], [0, 0.86], [0, 0.68], [0, 0.04], [1, 0.01], [1, 0.33], [1, 0.42], [1, 0.42], [0, 0.51], [1, 0.4]]
numerical_splits_count = [5, 5]
n = ID3(data_set, attribute_metadata, numerical_splits_count, 0)
print validation_accuracy(n,data_set)
numerical_splits_count = [1, 1]
n = ID3(data_set, attribute_metadata, numerical_splits_count, 5)
print validation_accuracy(n,data_set)
numerical_splits_count = [5, 5]
n = ID3(data_set, attribute_metadata, numerical_splits_count, 5)
print validation_accuracy(n,data_set)
print n.print_tree()
n = reduced_error_pruning(n,data_set,[[1, 0.11], [0, 0.42], [0, 0.86], [0, 0.55], [0, 0.66], [1, 0.01], [1, 0.11], [1, 0.84], [1, 0.42], [0, 0.51], [1, 0.4]])
print n.print_tree()
return n
示例6: test_breadth_first_search
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def test_breadth_first_search():
n0 = Node()
n0.label = 1
n1 = Node()
n1.label = 0
n = Node()
n.label = None
n.decision_attribute = 1
n.is_nominal = True
n.name = "whatever"
n.children = {1: n0, 2: n1}
print n.print_tree()
print breadth_first_search(n)
return n
示例7: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
See Textbook for algorithm.
Make sure to handle unknown values, some suggested approaches were
given in lecture.
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
preprocessing(data_set, attribute_metadata)
if check_homogenous(data_set) != None:
ans = Node()
ans.label = check_homogenous(data_set)
elif depth == 0:
ans = Node()
ans.label = mode(data_set)
else:
best = pick_best_attribute(data_set, attribute_metadata,
numerical_splits_count)
if best[0] == False:
ans = Node()
ans.label = mode(data_set)
else:
ans = Node()
ans.decision_attribute = best[0]
ans.name = attribute_metadata[best[0]]['name']
depth -= 1
if str(best[1]) == 'False':
ans.is_nominal = True
ans.children = {}
divide = split_on_nominal(data_set, best[0])
for x in divide.keys():
ans.children[x] = ID3(divide[x], attribute_metadata,
numerical_splits_count, depth)
else:
ans.is_nominal = False
ans.children = []
ans.splitting_value = best[1]
divide = split_on_numerical(data_set, best[0], best[1])
ans.children.append(ID3(divide[0], attribute_metadata,
numerical_splits_count, depth))
ans.children.append(ID3(divide[1], attribute_metadata,
numerical_splits_count, depth))
return ans
示例8: ID3_helper
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def ID3_helper(data_set, attribute_metadata, numerical_splits_count, depth, nominal_keys):
att = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
print "before"
# print attribute_metadata
# print numerical_splits_count
print att
print "after"
if depth == 0 or att[0] == False: #depth or gain ratio is 0
d = Node()
default = mode(data_set)
d.label = default
return d
elif check_homogenous(data_set) is not None:
d = Node()
d.label = check_homogenous(data_set)
return d
else: #how to recursion
root = Node()
# att = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
# if att[0] != False:
root.label = None
root.decision_attribute = att[0]
root.name = attribute_metadata[att[0]].get('name')
root.is_nominal = attribute_metadata[att[0]].get('is_nominal')
if root.is_nominal == False:
numerical_splits_count[att[0]] -= 1
root.splitting_value = att[1]
root.children = []
left_dataset = []
right_dataset = []
for i in xrange(len(data_set)):
if data_set[i][att[0]] < att[1]:
left_dataset.append(data_set[i])
else:
right_dataset.append(data_set[i])
depth = depth - 1
root.children.append(ID3_helper(left_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys))
root.children.append(ID3_helper(right_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys))
else:
root.children = {}
for key in nominal_keys[att[0]]:
chile_dataset = []
for i in xrange(len(data_set)):
if data_set[i][att[0]] == key:
chile_dataset.append(data_set[i])
child = ID3_helper(chile_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys)
root.children.update({key: child})
return root
示例9: reduced_error_pruning
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def reduced_error_pruning(root,training_set,validation_set):
'''
take the a node, training set, and validation set and returns the improved node.
You can implement this as you choose, but the goal is to remove some nodes such that doing so improves validation accuracy.
'''
if root.label != None or not validation_set:
return root
else:
baseacc = validation_accuracy(root,validation_set)
#treebest = root
# To prune the tree, remove the subtree and assign
# it a leaf node whose value is the most common
# classification of examples associated with that node.
newtree = Node()
newtree.label = mode(validation_set)
if validation_accuracy(newtree,validation_set) > baseacc:
return newtree
if root.is_nominal: # if the tree split according to nominal
new = split_on_nominal(validation_set, root.decision_attribute)
i = 0
for key in root.children:
validation_set = new[i]
root.children[key] = reduced_error_pruning(root.children[key],training_set,validation_set)
i = i + 1
else: # if the tree split according to numeric
new = split_on_numerical(validation_set, root.decision_attribute, root.splitting_value)
validation0 = new[0]
validation1 = new[1]
root.children[0] = reduced_error_pruning(root.children[0],training_set,validation0)
root.children[1] = reduced_error_pruning(root.children[1],training_set,validation1)
return root
示例10: reduced_error_pruning
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def reduced_error_pruning(root,training_set,validation_set):
'''
take the a node, training set, and validation set and returns the improved node.
You can implement this as you choose, but the goal is to remove some nodes such that doing so improves validation accuracy.
NOTE you will probably not need to use the training set for your pruning strategy, but it's passed as an argument in the starter code just in case.
'''
if root.label or validation_set == []:
return root
else:
nmode = Node()
nmode.label = mode(validation_set)
curracc = validation_accuracy(root,validation_set)
modeacc = validation_accuracy(nmode,validation_set)
if modeacc >= curracc:
return nmode
elif not root.is_nominal:
left,right = split_on_numerical(validation_set,root.decision_attribute,root.splitting_value)
root.children = [reduced_error_pruning(root.children[0],training_set,left),reduced_error_pruning(root.children[1],training_set,right)]
else:
subdata = split_on_nominal(validation_set,root.decision_attribute)
for (key,val) in root.children.iteritems():
currdata = []
if key in subdata:
currdata = subdata[key]
root.children[key] = reduced_error_pruning(val,training_set,currdata)
return root
pass
示例11: reduced_error_pruning
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def reduced_error_pruning(root,training_set,validation_set, depth, parent):
'''
take the a node, training set, and validation set and returns the improved node.
You can implement this as you choose, but the goal is to remove some nodes such that doing so improves validation accuracy.
'''
if root.label != None or validation_set == []:
return root
new_node = Node()
new_node.label = mode(validation_set)
if validation_accuracy(new_node, validation_set)>= validation_accuracy(root, validation_set):
index = parent.children.index(root)
parent.children[index] = new_node
return parent
if root.is_nominal:
splits = split_on_nominal(validation_subset, root.decision_attribute)
for i in range(len(root.children)):
if root.decision_attribute == i:
reduced_error_pruning(root.children[i], splits[i], depth + 1, root)
else:
splits = split_on_numerical(validation_set, root.decision_attribute, root.splitting_value)
reduced_error_pruning(root.children[0],training_set, splits[0], depth + 1,root)
reduced_error_pruning(root.children[1],training_set, splits[1], depth + 1,root)
return root
示例12: reduced_error_pruning
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def reduced_error_pruning(root, validation_set):
'''
take the a node, training set, and validation set and returns the improved node.
You can implement this as you choose, but the goal is to remove some nodes such that doing so improves validation accuracy.
'''
# can't prune if we have a leaf node or run out of validation data
if root.label or not validation_set:
return root
# candidate leaf node, return it if accuracy improves
prune_leaf = Node()
prune_leaf.label = mode(validation_set)
if validation_accuracy(prune_leaf, validation_set) >= validation_accuracy(root, validation_set):
return prune_leaf
# otherwise prune the children recursively
if root.is_nominal:
root.children = {k: reduced_error_pruning(v, filter(lambda x: x[root.decision_attribute] == k, validation_set)) for
(k, v) in root.children.items()}
else:
root.children = [reduced_error_pruning(root.children[0],
filter(lambda x: (x[root.decision_attribute] or root.mode) < root.splitting_value,
validation_set)),
reduced_error_pruning(root.children[1],
filter(lambda x: (x[root.decision_attribute] or root.mode) >= root.splitting_value,
validation_set))]
return root
示例13: reduced_error_pruning
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def reduced_error_pruning(temp_root, temp_originroot, root, originroot,
training_set, validation_set, attribute_metadata):
if temp_root.is_nominal == True:
subset = split_on_nominal(training_set, temp_root.decision_attribute)
for div in temp_root.children.keys():
if temp_root.children[div].label == None:
new_Node = Node()
new_Node.label = mode(subset[div])
new_Node.children = {}
temp = copy.deepcopy(temp_root.children[div])
temp_root.children[div] = new_Node
prune_acc = validation_accuracy(temp_originroot, validation_set,
attribute_metadata)
acc = validation_accuracy(originroot, validation_set,
attribute_metadata)
if prune_acc >= acc:
print prune_acc
root.children[div] = new_Node
else:
temp_root.children[div] = temp
reduced_error_pruning(temp_root.children[div],
temp_originroot, root.children[div], originroot,
subset[div], validation_set, attribute_metadata)
if temp_root.is_nominal == False:
subset = split_on_numerical(training_set, root.decision_attribute,
root.splitting_value)
for i in xrange(0, 2):
if temp_root.children[i].label == None:
new_Node = Node()
new_Node.label = mode(subset[i])
new_Node.children = {}
temp = copy.deepcopy(temp_root.children[i])
temp_root.children[i] = new_Node
prune_acc = validation_accuracy(temp_originroot, validation_set,
attribute_metadata)
acc = validation_accuracy(originroot, validation_set,
attribute_metadata)
if prune_acc >= acc:
print prune_acc
root.children[i] = new_Node
else:
temp_root.children[i] = temp
reduced_error_pruning(temp_root.children[i], temp_originroot,
root.children[i], originroot, subset[i], validation_set,
attribute_metadata)
示例14: ID3
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
'''
========================================================================================================
Input: A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
maximum depth to search to (depth = 0 indicates that this node should output a label)
========================================================================================================
Output: The node representing the decision tree learned over the given data set
========================================================================================================
'''
preprocessing(data_set, attribute_metadata)
if check_homogenous(data_set) != None:
root = Node()
root.label = check_homogenous(data_set)
else:
if depth == 0:
root = Node()
root.label = mode(data_set)
else:
best = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
if best[0] == False:
root = Node()
root.label = mode(data_set)
else:
root = Node()
root.decision_attribute = best[0]
root.name = attribute_metadata[best[0]]['name']
depth -= 1
if str(best[1]) == 'False':
root.is_nominal = True
root.children = {}
subsets = split_on_nominal(data_set, best[0])
for splitval in subsets.keys():
root.children[splitval] = ID3(subsets[splitval], attribute_metadata, numerical_splits_count, depth)
else:
root.is_nominal = False
root.children = []
root.splitting_value = best[1]
subsets = split_on_numerical(data_set, best[0], best[1])
#numerical_splits_count[best[0]] -= 1
print numerical_splits_count
print depth
root.children.append(ID3(subsets[0], attribute_metadata, numerical_splits_count, depth))
root.children.append(ID3(subsets[1], attribute_metadata, numerical_splits_count, depth))
return root
示例15: create_decision_tree_for_k
# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def create_decision_tree_for_k(self, pos_data, neg_data, depth, attr, max_depth=None):
'''
Trains and returns a decision tree with the information gain
as the tree splitting criterion. Criterion is a binary function
which checks to see if a word exists in the tweet or not
'''
root = Node(depth=depth)
if self.root is None:
self.root = root
if depth == max_depth:
if len(pos_data) > len(neg_data):
root.label = 1
return root
else:
root.label = -1
return root
if len(pos_data) == 0:
root.label = -1
return root
elif len(neg_data) == 0:
root.label = 1
return root
print 'Current depth: {}'.format(depth)
criterion_word = self.max_gain(pos_data, neg_data)
root.criterion = criterion_word
# cps = set positive tweets that contain the word
# cns = set negative tweets that contain the word
# ncps = set positive tweets that do not contain the word
# ncns = set negative tweets that do not contain the word
cps, ncps, cns, ncns = self.split_on_word(pos_data, neg_data, criterion_word)
if criterion_word == 'rain':
print 'Contains: {}, {}'.format(cps, cns)
print 'Not Contains: {}, {}'.format(ncps, ncns)
root.left = self.create_decision_tree_for_k(ncps, ncns, depth+1, attr, max_depth=max_depth)
root.right = self.create_decision_tree_for_k(cps, cns, depth+1, attr, max_depth=max_depth)
return root