Python Node.label方法代码示例

本文整理汇总了Python中node.Node.label方法的典型用法代码示例。如果您正苦于以下问题：Python Node.label方法的具体用法？Python Node.label怎么用？Python Node.label使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类node.Node的用法示例。

在下文中一共展示了Node.label方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    
    root = Node()
    homogenous = check_homogenous(data_set)
    if homogenous!= None:
        root.label = homogenous
        return root
        
    if depth == 0  or len(data_set)==0 or len(attribute_metadata)<=1:
        root.label = mode(data_set)
        return root
        
    best_att, best_split = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
    if(numerical_splits_count[best_att]==0):
        root.label = mode(data_set)
        return root
        

    if best_att == False:
        root.label = mode(data_set)
        return root
        
    root.decision_attribute = best_att
    root.splitting_value = best_split
    root.name = attribute_metadata[best_att]['name']
    root.is_nominal = attribute_metadata[best_att]['is_nominal']
    if(root.is_nominal):
        examples = {}
        
        for k, val in split_on_nominal(data_set, best_att).items():
            if is_missing(val, best_att):
                val = replace_missing(val, best_att)
            examples[k] = ID3(val, attribute_metadata, numerical_splits_count, depth-1)
        root.children = examples
    else:
        root.children = []
        examples = [0,0]
        first_split, second_split = split_on_numerical(data_set, best_att, best_split)
        if is_missing(first_split, best_att):
            first_split= replace_missing(first_split, best_att)
        if is_missing(second_split, best_att):
            second_split = replace_missing(second_split, best_att)
        numerical_splits_count[best_att] -= 1
        examples[0] = ID3(first_split, attribute_metadata, numerical_splits_count, depth-1)
        examples[1] = ID3(second_split, attribute_metadata, numerical_splits_count, depth-1)
        root.children.append(examples[0])
        root.children.append(examples[1])
    return root

开发者ID:jlowrie，项目名称:Problem-Set-2，代码行数:62，代码来源:ID3.py

示例2: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    # Your code here
    n = Node()

    if (n.label != None):
        return n
    elif (depth == 0):
        n.label = mode(data_set)
        return n    
    elif (not attribute_metadata):
        n.label = mode(data_set)
        return n
    elif check_homogenous(data_set):
        n.label = check_homogenous(data_set)
        return n
    else:
        attr_pos, attr_val = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
        n.decision_attribute = attr_pos
        n.name = attribute_metadata[attr_pos]['name']
        depth -=1
        return make_children(n, data_set, attr_pos, attr_val, attribute_metadata, numerical_splits_count, depth)

开发者ID:cwend，项目名称:PS2code，代码行数:35，代码来源:ID3.py

示例3: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    # Your code here

    n = Node()
    n.mode = mode(data_set)
    label = check_homogenous(data_set)

    if label is not None:
        n.label = label
        return n

    elif depth == 0:
        n.label = mode(data_set)
        return n

    else:
        best, sv = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)

        if not best:
            n.label = mode(data_set)
            return n

        n.decision_attribute = best
        n.splitting_value = sv
        n.name = attribute_metadata[best]['name']

        #numeric
        if n.splitting_value:
            m = split_on_numerical(data_set, best, n.splitting_value)
            numerical_splits_count[best] = numerical_splits_count[best] - 1
            if not m[0] or not m[1]:
                n.label = mode(data_set)
            else:
                n_small = ID3(m[0], attribute_metadata, numerical_splits_count, depth-1)
                n_big = ID3(m[1], attribute_metadata, numerical_splits_count, depth-1)
                n.children = [n_small, n_big]

        #nominal
        else:
            n.is_nominal = True
            m = split_on_nominal(data_set, best)
            for k,v in m.items():
                if m[k]:
                    n_curr = ID3(m[k], attribute_metadata, numerical_splits_count, depth-1)
                    if n_curr.decision_attribute != n.decision_attribute:
                        n.children[k] = n_curr
        return n

开发者ID:jinhoonbang，项目名称:decision_tree，代码行数:61，代码来源:ID3.py

示例4: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================
    '''
    # Your code here
    print depth
    Dtree = Node()
    if len(data_set) == 0:
        return Dtree
    c = check_homogenous([[element[0]] for element in data_set])
    if isinstance(c,int):
         Dtree.label = c
         return Dtree
    elif len(data_set[0]) == 1 or depth <= 0 or [0]*(len(numerical_splits_count)-1) == numerical_splits_count[1:]:
         Dtree.label = mode(data_set)
         return Dtree
    else:
         data_set = missingValues(data_set)
         best_attribute,threshold = pick_best_attribute(data_set,attribute_metadata,numerical_splits_count)
         if not(best_attribute):
             Dtree.label = mode(data_set)
             return Dtree
         
         Dtree.decision_attribute = best_attribute
         Dtree.modeVal = mode([[element[Dtree.decision_attribute]] for element in data_set])
         Dtree.name = attribute_metadata[best_attribute]['name']
         if threshold:
             Dtree.is_nominal = False
             Dtree.splitting_value = threshold
             less,greater = split_on_numerical(data_set,best_attribute,threshold)
             new_nsc = numerical_splits_count
             new_nsc[best_attribute] -= 1
             Dtree.children = [ID3(less,attribute_metadata,new_nsc,depth-1),ID3(greater,attribute_metadata,new_nsc,depth-1)]
         else:
             Dtree.is_nominal = True
             n_dict = split_on_nominal(data_set,best_attribute)
             new_attribute_metadata = attribute_metadata
             new_attribute_metadata.pop(best_attribute)
             #try:
             Dtree.children = [ID3(removeAttribute(value,best_attribute),new_attribute_metadata,numerical_splits_count,depth-1) for key,value in n_dict.iteritems()]
             #except AttributeError:
              #   print n_dict
               #  print best_attribute
                # print threshold
                 #raise Exception("wut")
    return Dtree       
    pass

开发者ID:AnujIravane，项目名称:349，代码行数:57，代码来源:ID3.py

示例5: more_tests

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def more_tests():
    n2 = Node()
    n2.name = "attrib2"
    n2.label = 1

    n3 = Node()
    n3.name = "attrib3"
    n3.label = 0 
    
    n0 = Node()
    n0.name = "attrib0"
    n0.is_nominal = True
    n0.children = {1: n2, 2: n3}

    n4 = Node()
    n4.name = "attrib4"
    n4.label = 2

    n5 = Node()
    n5.name = "attrib5"
    n5.label = 3
    
    n1 = Node()
    n1.name = "attrib1"
    n1.is_nominal = True
    n1.children = {1: n4, 2: n5}

    n = Node()
    n.label = None
    n.decision_attribute = 1
    n.is_nominal = True
    n.name = "attrib"
    n.children = {1: n0, 2: n1}
    print n.print_dnf_tree()
    print n.print_tree()
    print breadth_first_search(n, [n])

    attribute_metadata = [{'name': "winner",'is_nominal': True},{'name': "opprundifferential",'is_nominal': False}]
    data_set = [[1, 0.27], [0, 0.42], [0, 0.86], [0, 0.68], [0, 0.04], [1, 0.01], [1, 0.33], [1, 0.42], [1, 0.42], [0, 0.51], [1, 0.4]]
    numerical_splits_count = [5, 5]
    n = ID3(data_set, attribute_metadata, numerical_splits_count, 0)
    print validation_accuracy(n,data_set)

    numerical_splits_count = [1, 1]
    n = ID3(data_set, attribute_metadata, numerical_splits_count, 5)
    print validation_accuracy(n,data_set)

    numerical_splits_count = [5, 5]
    n = ID3(data_set, attribute_metadata, numerical_splits_count, 5)
    print validation_accuracy(n,data_set)
    
    print n.print_tree()
    n = reduced_error_pruning(n,data_set,[[1, 0.11], [0, 0.42], [0, 0.86], [0, 0.55], [0, 0.66], [1, 0.01], [1, 0.11], [1, 0.84], [1, 0.42], [0, 0.51], [1, 0.4]])
    print n.print_tree()
    return n

开发者ID:obiorahm，项目名称:PS2code，代码行数:57，代码来源:pruning.py

示例6: test_breadth_first_search

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def test_breadth_first_search():
    n0 = Node()
    n0.label = 1
    n1 = Node()
    n1.label = 0
    n = Node()
    n.label = None
    n.decision_attribute = 1
    n.is_nominal = True
    n.name = "whatever"
    n.children = {1: n0, 2: n1}
    print n.print_tree()
    print breadth_first_search(n)
    return n

开发者ID:obiorahm，项目名称:PS2code，代码行数:16，代码来源:pruning.py

示例7: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    preprocessing(data_set, attribute_metadata)
    if check_homogenous(data_set) != None:
        ans = Node()
        ans.label = check_homogenous(data_set)
    elif depth == 0:
            ans = Node()
            ans.label = mode(data_set)
    else:
        best = pick_best_attribute(data_set, attribute_metadata, 
            numerical_splits_count)
        if best[0] == False:
            ans = Node()
            ans.label = mode(data_set)
        else:
            ans = Node()
            ans.decision_attribute = best[0]
            ans.name = attribute_metadata[best[0]]['name']
            depth -= 1
            if str(best[1]) == 'False':
                ans.is_nominal = True
                ans.children = {}
                divide = split_on_nominal(data_set, best[0])
                for x in divide.keys():
                    ans.children[x] = ID3(divide[x], attribute_metadata, 
                        numerical_splits_count, depth)
            else:
                ans.is_nominal = False
                ans.children = []
                ans.splitting_value = best[1]
                divide = split_on_numerical(data_set, best[0], best[1])
                ans.children.append(ID3(divide[0], attribute_metadata, 
                    numerical_splits_count, depth))
                ans.children.append(ID3(divide[1], attribute_metadata, 
                    numerical_splits_count, depth)) 
    return ans

开发者ID:HERMANNITY，项目名称:EECS349，代码行数:50，代码来源:ID3.py

示例8: ID3_helper

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def ID3_helper(data_set, attribute_metadata, numerical_splits_count, depth, nominal_keys):
    att = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
    print "before"
    # print attribute_metadata
    # print numerical_splits_count
    print att
    print "after"
    if depth == 0 or att[0] == False:  #depth or gain ratio is 0
        d = Node()
        default = mode(data_set)
        d.label = default
        return d
    elif check_homogenous(data_set) is not None:
        d = Node()
        d.label = check_homogenous(data_set)
        return d
    else:  #how to recursion
        root = Node()
        # att = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
        # if att[0] != False:
        root.label = None
        root.decision_attribute = att[0]
        root.name = attribute_metadata[att[0]].get('name')
        root.is_nominal = attribute_metadata[att[0]].get('is_nominal')
        if root.is_nominal == False:
            numerical_splits_count[att[0]] -= 1
            root.splitting_value = att[1]
            root.children = []
            left_dataset = []
            right_dataset = []
            for i in xrange(len(data_set)):
                if data_set[i][att[0]] < att[1]:
                    left_dataset.append(data_set[i])
                else:
                    right_dataset.append(data_set[i])
            depth = depth - 1
            root.children.append(ID3_helper(left_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys))
            root.children.append(ID3_helper(right_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys))
        else:
            root.children = {}
            for key in nominal_keys[att[0]]:
                chile_dataset = []
                for i in xrange(len(data_set)):
                    if data_set[i][att[0]] == key:
                        chile_dataset.append(data_set[i])
                child = ID3_helper(chile_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys)
                root.children.update({key: child})
        return root

开发者ID:SixuanYu，项目名称:ps2，代码行数:50，代码来源:ID3.py

示例9: reduced_error_pruning

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def reduced_error_pruning(root,training_set,validation_set):
    '''
    take the a node, training set, and validation set and returns the improved node.
    You can implement this as you choose, but the goal is to remove some nodes such that doing so improves validation accuracy.
    '''
    if root.label != None or not validation_set:
        return root

    else:
        baseacc = validation_accuracy(root,validation_set)
        #treebest = root
        # To prune the tree, remove the subtree and assign 
        # it a leaf node whose value is the most common 
        # classification of examples associated with that node.
        newtree = Node()
        newtree.label = mode(validation_set)
        if validation_accuracy(newtree,validation_set) > baseacc:
            return newtree 
        if root.is_nominal: # if the tree split according to nominal
            new = split_on_nominal(validation_set, root.decision_attribute)
            i = 0
            for key in root.children:
                validation_set = new[i]
                root.children[key] = reduced_error_pruning(root.children[key],training_set,validation_set)
                i = i + 1
        else: # if the tree split according to numeric 
            new = split_on_numerical(validation_set, root.decision_attribute, root.splitting_value)
            validation0 = new[0]
            validation1 = new[1]
            root.children[0] = reduced_error_pruning(root.children[0],training_set,validation0)
            root.children[1] = reduced_error_pruning(root.children[1],training_set,validation1)
        return root

开发者ID:britlovefan，项目名称:DecisionTree，代码行数:34，代码来源:pruning.py

示例10: reduced_error_pruning

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def reduced_error_pruning(root,training_set,validation_set):
    '''
    take the a node, training set, and validation set and returns the improved node.
    You can implement this as you choose, but the goal is to remove some nodes such that doing so improves validation accuracy.
    NOTE you will probably not need to use the training set for your pruning strategy, but it's passed as an argument in the starter code just in case.
    '''

    if root.label or validation_set == []:
        return root
    else:
        nmode = Node()
        nmode.label = mode(validation_set)

        curracc = validation_accuracy(root,validation_set)
        modeacc = validation_accuracy(nmode,validation_set)

        if modeacc >= curracc:
            return nmode
        elif not root.is_nominal:
            left,right = split_on_numerical(validation_set,root.decision_attribute,root.splitting_value)
            root.children = [reduced_error_pruning(root.children[0],training_set,left),reduced_error_pruning(root.children[1],training_set,right)]
        else:
            subdata = split_on_nominal(validation_set,root.decision_attribute)
            for (key,val) in root.children.iteritems():
                currdata = []
                if key in subdata:
                    currdata = subdata[key]
                root.children[key] = reduced_error_pruning(val,training_set,currdata)

        return root

    pass

开发者ID:jnbohrer，项目名称:ID3-Decision-Tree，代码行数:34，代码来源:pruning.py

示例11: reduced_error_pruning

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def reduced_error_pruning(root,training_set,validation_set, depth, parent):
    '''
    take the a node, training set, and validation set and returns the improved node.
    You can implement this as you choose, but the goal is to remove some nodes such that doing so improves validation accuracy.
    '''

    if root.label != None or validation_set == []: 
        return root 

    
    new_node = Node()
    new_node.label = mode(validation_set)
    if validation_accuracy(new_node, validation_set)>= validation_accuracy(root, validation_set):
        index = parent.children.index(root)
        parent.children[index] = new_node
        return parent
    if root.is_nominal: 
        splits = split_on_nominal(validation_subset, root.decision_attribute)
    
        for i in range(len(root.children)):
            
            if root.decision_attribute == i:
                reduced_error_pruning(root.children[i], splits[i], depth + 1, root)
            
    else: 
        splits = split_on_numerical(validation_set, root.decision_attribute, root.splitting_value)
        
        reduced_error_pruning(root.children[0],training_set, splits[0], depth + 1,root)
        
        reduced_error_pruning(root.children[1],training_set, splits[1], depth + 1,root)
            
    return root

开发者ID:jlowrie，项目名称:Problem-Set-2，代码行数:34，代码来源:pruning.py

示例12: reduced_error_pruning

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def reduced_error_pruning(root, validation_set):
    '''
    take the a node, training set, and validation set and returns the improved node.
    You can implement this as you choose, but the goal is to remove some nodes such that doing so improves validation accuracy.
    '''
    # can't prune if we have a leaf node or run out of validation data
    if root.label or not validation_set:
        return root

    # candidate leaf node, return it if accuracy improves
    prune_leaf = Node()
    prune_leaf.label = mode(validation_set)
    if validation_accuracy(prune_leaf, validation_set) >= validation_accuracy(root, validation_set):
        return prune_leaf

    # otherwise prune the children recursively
    if root.is_nominal:
        root.children = {k: reduced_error_pruning(v, filter(lambda x: x[root.decision_attribute] == k, validation_set)) for
                         (k, v) in root.children.items()}
    else:
        root.children = [reduced_error_pruning(root.children[0],
                                               filter(lambda x: (x[root.decision_attribute] or root.mode) < root.splitting_value,
                                                      validation_set)),
                         reduced_error_pruning(root.children[1],
                                               filter(lambda x: (x[root.decision_attribute] or root.mode) >= root.splitting_value,
                                                      validation_set))]
    return root

开发者ID:Wayne-X，项目名称:EECS349，代码行数:29，代码来源:pruning.py

示例13: reduced_error_pruning

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def reduced_error_pruning(temp_root, temp_originroot, root, originroot, 
    training_set, validation_set, attribute_metadata):
    if temp_root.is_nominal == True:
        subset = split_on_nominal(training_set, temp_root.decision_attribute)

        for div in temp_root.children.keys():
            if temp_root.children[div].label == None:
                new_Node = Node()
                new_Node.label = mode(subset[div])
                new_Node.children = {}
                temp = copy.deepcopy(temp_root.children[div])
                temp_root.children[div] = new_Node
                prune_acc = validation_accuracy(temp_originroot, validation_set,
                 attribute_metadata)
                acc = validation_accuracy(originroot, validation_set,
                 attribute_metadata)
                if prune_acc >= acc:
                    print prune_acc
                    root.children[div] = new_Node
                else:
                    temp_root.children[div] = temp
                    reduced_error_pruning(temp_root.children[div],
                     temp_originroot, root.children[div], originroot,
                     subset[div], validation_set, attribute_metadata)
    if temp_root.is_nominal == False:
        subset = split_on_numerical(training_set, root.decision_attribute,
         root.splitting_value)
        for i in xrange(0, 2):
            if temp_root.children[i].label == None:
                new_Node = Node()
                new_Node.label = mode(subset[i])
                new_Node.children = {}
                temp = copy.deepcopy(temp_root.children[i])
                temp_root.children[i] = new_Node
                prune_acc = validation_accuracy(temp_originroot, validation_set,
                 attribute_metadata)
                acc = validation_accuracy(originroot, validation_set,
                 attribute_metadata)
                if prune_acc >= acc:
                    print prune_acc
                    root.children[i] = new_Node
                else:
                    temp_root.children[i] = temp
                    reduced_error_pruning(temp_root.children[i], temp_originroot,
                     root.children[i], originroot, subset[i], validation_set, 
                     attribute_metadata)

开发者ID:HERMANNITY，项目名称:EECS349，代码行数:48，代码来源:pruning.py

示例14: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    preprocessing(data_set, attribute_metadata)
    if check_homogenous(data_set) != None:
        root = Node()
        root.label = check_homogenous(data_set)
    else: 
        if depth == 0:
            root = Node()
            root.label = mode(data_set)
        else:
            best = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
            if best[0] == False:
                root = Node()
                root.label = mode(data_set)
            else:
                root = Node()
                root.decision_attribute = best[0]
                root.name = attribute_metadata[best[0]]['name']
                depth -= 1
                if str(best[1]) == 'False':
                    root.is_nominal = True
                    root.children = {}
                    subsets = split_on_nominal(data_set, best[0])
                    for splitval in subsets.keys():
                        root.children[splitval] = ID3(subsets[splitval], attribute_metadata, numerical_splits_count, depth)
                else:
                    root.is_nominal = False
                    root.children = []
                    root.splitting_value = best[1]
                    subsets = split_on_numerical(data_set, best[0], best[1])
                    #numerical_splits_count[best[0]] -= 1
                    print numerical_splits_count
                    print depth
                    root.children.append(ID3(subsets[0], attribute_metadata, numerical_splits_count, depth))
                    root.children.append(ID3(subsets[1], attribute_metadata, numerical_splits_count, depth)) 
    return root

开发者ID:3011204077，项目名称:ml-project，代码行数:47，代码来源:ID3.py

示例15: create_decision_tree_for_k

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import label [as 别名]
	def create_decision_tree_for_k(self, pos_data, neg_data, depth, attr, max_depth=None):
		'''
		Trains and returns a decision tree with the information gain
		as the tree splitting criterion. Criterion is a binary function
		which checks to see if a word exists in the tweet or not
		'''

		root = Node(depth=depth)

		if self.root is None:
			self.root = root

		if depth == max_depth:
			if len(pos_data) > len(neg_data):
				root.label = 1
				return root
			else:
				root.label = -1
				return root

		if len(pos_data) == 0:
			root.label = -1
			return root
		elif len(neg_data) == 0:
			root.label = 1
			return root

		print 'Current depth: {}'.format(depth)
		criterion_word = self.max_gain(pos_data, neg_data)
		root.criterion = criterion_word

		# cps = set positive tweets that contain the word
		# cns = set negative tweets that contain the word
		# ncps = set positive tweets that do not contain the word
		# ncns = set negative tweets that do not contain the word
		cps, ncps, cns, ncns = self.split_on_word(pos_data, neg_data, criterion_word)

		if criterion_word == 'rain':
			print 'Contains: {}, {}'.format(cps, cns)
			print 'Not Contains: {}, {}'.format(ncps, ncns)

		root.left = self.create_decision_tree_for_k(ncps, ncns, depth+1, attr, max_depth=max_depth)
		root.right = self.create_decision_tree_for_k(cps, cns, depth+1, attr, max_depth=max_depth)
		return root

开发者ID:taehoonl，项目名称:HashtagWeather，代码行数:46，代码来源:dtree.py

注：本文中的node.Node.label方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。