当前位置: 首页>>代码示例>>Python>>正文


Python Node.decision_attribute方法代码示例

本文整理汇总了Python中node.Node.decision_attribute方法的典型用法代码示例。如果您正苦于以下问题:Python Node.decision_attribute方法的具体用法?Python Node.decision_attribute怎么用?Python Node.decision_attribute使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在node.Node的用法示例。


在下文中一共展示了Node.decision_attribute方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import decision_attribute [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
    maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    node = Node() # new node
    entropy_bound = 0.15 # entropy of data_set must be below bound to become a leaf
    pick_best = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count) # tuple
    best_attribute = pick_best[0] # best attribute to split on
    split_value = pick_best[1] # best value to split on
    if entropy(data_set) < entropy_bound or depth == 0 or best_attribute == False: 
        node.label = mode(data_set)
        return node
    if split_value is not False: # if there is a split value (best attribute is numeric)
        split_data = split_on_numerical(data_set, best_attribute, split_value) # splitting data by split value (lesser, greater)
        node.is_nominal = False # node is numeric
        node.splitting_value = split_value # best value to split on
        node.children[0] = ID3(split_data[0], attribute_metadata, numerical_splits_count, depth - 1) # less than split value
        node.children[1] = ID3(split_data[1], attribute_metadata, numerical_splits_count, depth - 1) # greater than split value
        node.name = attribute_metadata[best_attribute]['name']
        node.decision_attribute = best_attribute # best attribute to split on
    else: # best_attribute is nominal
        split_data = split_on_nominal(data_set, best_attribute) # returns a dictionary with nominal attributes as keys
        node.is_nominal = True # node is nominal
        split_data_copy = deepcopy(split_data) # deep copy split_data
        ### filling in missing data ###
        for key in split_data_copy.keys():
            if key is None: 
                # find most common attribute and add the missing attribute data into the most common attribute
                greatest_length = -1
                mode_att = None
                for att, data in split_data_copy.iteritems():
                    if len(data) > greatest_length:
                        greatest_length = len(data)
                        mode_att = att
                for data in split_data_copy[key]:
                    split_data_copy[mode_att].append(data) # adds all the None data into the mode attribute 
                split_data_copy.pop(key, None) # removes the None attribute data
        # add a children for each nominal attribute
        for key in split_data_copy: 
            node.children[key] = ID3(split_data_copy[key], attribute_metadata, numerical_splits_count, depth - 1)
        node.name = attribute_metadata[best_attribute]['name']
        node.decision_attribute = best_attribute
    # print node.children
    return node
开发者ID:brianzhan,项目名称:EECS349Psets,代码行数:55,代码来源:ID3.py

示例2: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import decision_attribute [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    
    root = Node()
    homogenous = check_homogenous(data_set)
    if homogenous!= None:
        root.label = homogenous
        return root
        
    if depth == 0  or len(data_set)==0 or len(attribute_metadata)<=1:
        root.label = mode(data_set)
        return root
        
    best_att, best_split = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
    if(numerical_splits_count[best_att]==0):
        root.label = mode(data_set)
        return root
        

    if best_att == False:
        root.label = mode(data_set)
        return root
        
    root.decision_attribute = best_att
    root.splitting_value = best_split
    root.name = attribute_metadata[best_att]['name']
    root.is_nominal = attribute_metadata[best_att]['is_nominal']
    if(root.is_nominal):
        examples = {}
        
        for k, val in split_on_nominal(data_set, best_att).items():
            if is_missing(val, best_att):
                val = replace_missing(val, best_att)
            examples[k] = ID3(val, attribute_metadata, numerical_splits_count, depth-1)
        root.children = examples
    else:
        root.children = []
        examples = [0,0]
        first_split, second_split = split_on_numerical(data_set, best_att, best_split)
        if is_missing(first_split, best_att):
            first_split= replace_missing(first_split, best_att)
        if is_missing(second_split, best_att):
            second_split = replace_missing(second_split, best_att)
        numerical_splits_count[best_att] -= 1
        examples[0] = ID3(first_split, attribute_metadata, numerical_splits_count, depth-1)
        examples[1] = ID3(second_split, attribute_metadata, numerical_splits_count, depth-1)
        root.children.append(examples[0])
        root.children.append(examples[1])
    return root
开发者ID:jlowrie,项目名称:Problem-Set-2,代码行数:62,代码来源:ID3.py

示例3: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import decision_attribute [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    # Your code here
    n = Node()

    if (n.label != None):
        return n
    elif (depth == 0):
        n.label = mode(data_set)
        return n    
    elif (not attribute_metadata):
        n.label = mode(data_set)
        return n
    elif check_homogenous(data_set):
        n.label = check_homogenous(data_set)
        return n
    else:
        attr_pos, attr_val = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
        n.decision_attribute = attr_pos
        n.name = attribute_metadata[attr_pos]['name']
        depth -=1
        return make_children(n, data_set, attr_pos, attr_val, attribute_metadata, numerical_splits_count, depth)
开发者ID:cwend,项目名称:PS2code,代码行数:35,代码来源:ID3.py

示例4: ID3_recursive

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import decision_attribute [as 别名]
def ID3_recursive(data_set, attribute_metadata, numerical_splits_count, depth, attribute_modes_dict):
    if depth == 0 or check_homogenous(data_set) is not None or len(attribute_metadata) == 0:
        return default_node(data_set)    
    else:
        (best_attribute, split_value) = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
        if best_attribute == False:
            return default_node(data_set)
        
        node = Node()
        node.decision_attribute = best_attribute
        node.name = attribute_metadata[best_attribute]['name']
        node.is_nominal = attribute_metadata[best_attribute]['is_nominal']
        node.value = attribute_modes_dict[best_attribute]
        updated_numerical_splits_count = copy.deepcopy(numerical_splits_count)
        updated_numerical_splits_count[best_attribute] -= 1

        if node.is_nominal:
            examples = split_on_nominal(data_set, best_attribute)
            for key, values in examples.items():
                node.children[key] = ID3_recursive(values, attribute_metadata, updated_numerical_splits_count, depth - 1, attribute_modes_dict)
        else:
            node.splitting_value = split_value
            (less, greater_or_equal) = split_on_numerical(data_set, best_attribute, split_value)
            node.children[0] = ID3_recursive(less, attribute_metadata, updated_numerical_splits_count, depth - 1, attribute_modes_dict)
            node.children[1] = ID3_recursive(greater_or_equal, attribute_metadata, updated_numerical_splits_count, depth - 1, attribute_modes_dict)
        
        return node
开发者ID:benmel,项目名称:ml_ps2,代码行数:29,代码来源:ID3.py

示例5: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import decision_attribute [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    # Your code here

    n = Node()
    n.mode = mode(data_set)
    label = check_homogenous(data_set)

    if label is not None:
        n.label = label
        return n

    elif depth == 0:
        n.label = mode(data_set)
        return n

    else:
        best, sv = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)

        if not best:
            n.label = mode(data_set)
            return n

        n.decision_attribute = best
        n.splitting_value = sv
        n.name = attribute_metadata[best]['name']

        #numeric
        if n.splitting_value:
            m = split_on_numerical(data_set, best, n.splitting_value)
            numerical_splits_count[best] = numerical_splits_count[best] - 1
            if not m[0] or not m[1]:
                n.label = mode(data_set)
            else:
                n_small = ID3(m[0], attribute_metadata, numerical_splits_count, depth-1)
                n_big = ID3(m[1], attribute_metadata, numerical_splits_count, depth-1)
                n.children = [n_small, n_big]

        #nominal
        else:
            n.is_nominal = True
            m = split_on_nominal(data_set, best)
            for k,v in m.items():
                if m[k]:
                    n_curr = ID3(m[k], attribute_metadata, numerical_splits_count, depth-1)
                    if n_curr.decision_attribute != n.decision_attribute:
                        n.children[k] = n_curr
        return n
开发者ID:jinhoonbang,项目名称:decision_tree,代码行数:61,代码来源:ID3.py

示例6: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import decision_attribute [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================
    '''
    # Your code here
    print depth
    Dtree = Node()
    if len(data_set) == 0:
        return Dtree
    c = check_homogenous([[element[0]] for element in data_set])
    if isinstance(c,int):
         Dtree.label = c
         return Dtree
    elif len(data_set[0]) == 1 or depth <= 0 or [0]*(len(numerical_splits_count)-1) == numerical_splits_count[1:]:
         Dtree.label = mode(data_set)
         return Dtree
    else:
         data_set = missingValues(data_set)
         best_attribute,threshold = pick_best_attribute(data_set,attribute_metadata,numerical_splits_count)
         if not(best_attribute):
             Dtree.label = mode(data_set)
             return Dtree
         
         Dtree.decision_attribute = best_attribute
         Dtree.modeVal = mode([[element[Dtree.decision_attribute]] for element in data_set])
         Dtree.name = attribute_metadata[best_attribute]['name']
         if threshold:
             Dtree.is_nominal = False
             Dtree.splitting_value = threshold
             less,greater = split_on_numerical(data_set,best_attribute,threshold)
             new_nsc = numerical_splits_count
             new_nsc[best_attribute] -= 1
             Dtree.children = [ID3(less,attribute_metadata,new_nsc,depth-1),ID3(greater,attribute_metadata,new_nsc,depth-1)]
         else:
             Dtree.is_nominal = True
             n_dict = split_on_nominal(data_set,best_attribute)
             new_attribute_metadata = attribute_metadata
             new_attribute_metadata.pop(best_attribute)
             #try:
             Dtree.children = [ID3(removeAttribute(value,best_attribute),new_attribute_metadata,numerical_splits_count,depth-1) for key,value in n_dict.iteritems()]
             #except AttributeError:
              #   print n_dict
               #  print best_attribute
                # print threshold
                 #raise Exception("wut")
    return Dtree       
    pass
开发者ID:AnujIravane,项目名称:349,代码行数:57,代码来源:ID3.py

示例7: more_tests

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import decision_attribute [as 别名]
def more_tests():
    n2 = Node()
    n2.name = "attrib2"
    n2.label = 1

    n3 = Node()
    n3.name = "attrib3"
    n3.label = 0 
    
    n0 = Node()
    n0.name = "attrib0"
    n0.is_nominal = True
    n0.children = {1: n2, 2: n3}

    n4 = Node()
    n4.name = "attrib4"
    n4.label = 2

    n5 = Node()
    n5.name = "attrib5"
    n5.label = 3
    
    n1 = Node()
    n1.name = "attrib1"
    n1.is_nominal = True
    n1.children = {1: n4, 2: n5}

    n = Node()
    n.label = None
    n.decision_attribute = 1
    n.is_nominal = True
    n.name = "attrib"
    n.children = {1: n0, 2: n1}
    print n.print_dnf_tree()
    print n.print_tree()
    print breadth_first_search(n, [n])

    attribute_metadata = [{'name': "winner",'is_nominal': True},{'name': "opprundifferential",'is_nominal': False}]
    data_set = [[1, 0.27], [0, 0.42], [0, 0.86], [0, 0.68], [0, 0.04], [1, 0.01], [1, 0.33], [1, 0.42], [1, 0.42], [0, 0.51], [1, 0.4]]
    numerical_splits_count = [5, 5]
    n = ID3(data_set, attribute_metadata, numerical_splits_count, 0)
    print validation_accuracy(n,data_set)

    numerical_splits_count = [1, 1]
    n = ID3(data_set, attribute_metadata, numerical_splits_count, 5)
    print validation_accuracy(n,data_set)

    numerical_splits_count = [5, 5]
    n = ID3(data_set, attribute_metadata, numerical_splits_count, 5)
    print validation_accuracy(n,data_set)
    
    print n.print_tree()
    n = reduced_error_pruning(n,data_set,[[1, 0.11], [0, 0.42], [0, 0.86], [0, 0.55], [0, 0.66], [1, 0.01], [1, 0.11], [1, 0.84], [1, 0.42], [0, 0.51], [1, 0.4]])
    print n.print_tree()
    return n
开发者ID:obiorahm,项目名称:PS2code,代码行数:57,代码来源:pruning.py

示例8: test_breadth_first_search

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import decision_attribute [as 别名]
def test_breadth_first_search():
    n0 = Node()
    n0.label = 1
    n1 = Node()
    n1.label = 0
    n = Node()
    n.label = None
    n.decision_attribute = 1
    n.is_nominal = True
    n.name = "whatever"
    n.children = {1: n0, 2: n1}
    print n.print_tree()
    print breadth_first_search(n)
    return n
开发者ID:obiorahm,项目名称:PS2code,代码行数:16,代码来源:pruning.py

示例9: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import decision_attribute [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    preprocessing(data_set, attribute_metadata)
    if check_homogenous(data_set) != None:
        ans = Node()
        ans.label = check_homogenous(data_set)
    elif depth == 0:
            ans = Node()
            ans.label = mode(data_set)
    else:
        best = pick_best_attribute(data_set, attribute_metadata, 
            numerical_splits_count)
        if best[0] == False:
            ans = Node()
            ans.label = mode(data_set)
        else:
            ans = Node()
            ans.decision_attribute = best[0]
            ans.name = attribute_metadata[best[0]]['name']
            depth -= 1
            if str(best[1]) == 'False':
                ans.is_nominal = True
                ans.children = {}
                divide = split_on_nominal(data_set, best[0])
                for x in divide.keys():
                    ans.children[x] = ID3(divide[x], attribute_metadata, 
                        numerical_splits_count, depth)
            else:
                ans.is_nominal = False
                ans.children = []
                ans.splitting_value = best[1]
                divide = split_on_numerical(data_set, best[0], best[1])
                ans.children.append(ID3(divide[0], attribute_metadata, 
                    numerical_splits_count, depth))
                ans.children.append(ID3(divide[1], attribute_metadata, 
                    numerical_splits_count, depth)) 
    return ans
开发者ID:HERMANNITY,项目名称:EECS349,代码行数:50,代码来源:ID3.py

示例10: ID3_helper

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import decision_attribute [as 别名]
def ID3_helper(data_set, attribute_metadata, numerical_splits_count, depth, nominal_keys):
    att = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
    print "before"
    # print attribute_metadata
    # print numerical_splits_count
    print att
    print "after"
    if depth == 0 or att[0] == False:  #depth or gain ratio is 0
        d = Node()
        default = mode(data_set)
        d.label = default
        return d
    elif check_homogenous(data_set) is not None:
        d = Node()
        d.label = check_homogenous(data_set)
        return d
    else:  #how to recursion
        root = Node()
        # att = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
        # if att[0] != False:
        root.label = None
        root.decision_attribute = att[0]
        root.name = attribute_metadata[att[0]].get('name')
        root.is_nominal = attribute_metadata[att[0]].get('is_nominal')
        if root.is_nominal == False:
            numerical_splits_count[att[0]] -= 1
            root.splitting_value = att[1]
            root.children = []
            left_dataset = []
            right_dataset = []
            for i in xrange(len(data_set)):
                if data_set[i][att[0]] < att[1]:
                    left_dataset.append(data_set[i])
                else:
                    right_dataset.append(data_set[i])
            depth = depth - 1
            root.children.append(ID3_helper(left_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys))
            root.children.append(ID3_helper(right_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys))
        else:
            root.children = {}
            for key in nominal_keys[att[0]]:
                chile_dataset = []
                for i in xrange(len(data_set)):
                    if data_set[i][att[0]] == key:
                        chile_dataset.append(data_set[i])
                child = ID3_helper(chile_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys)
                root.children.update({key: child})
        return root
开发者ID:SixuanYu,项目名称:ps2,代码行数:50,代码来源:ID3.py

示例11: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import decision_attribute [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    preprocessing(data_set, attribute_metadata)
    if check_homogenous(data_set) != None:
        root = Node()
        root.label = check_homogenous(data_set)
    else: 
        if depth == 0:
            root = Node()
            root.label = mode(data_set)
        else:
            best = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
            if best[0] == False:
                root = Node()
                root.label = mode(data_set)
            else:
                root = Node()
                root.decision_attribute = best[0]
                root.name = attribute_metadata[best[0]]['name']
                depth -= 1
                if str(best[1]) == 'False':
                    root.is_nominal = True
                    root.children = {}
                    subsets = split_on_nominal(data_set, best[0])
                    for splitval in subsets.keys():
                        root.children[splitval] = ID3(subsets[splitval], attribute_metadata, numerical_splits_count, depth)
                else:
                    root.is_nominal = False
                    root.children = []
                    root.splitting_value = best[1]
                    subsets = split_on_numerical(data_set, best[0], best[1])
                    #numerical_splits_count[best[0]] -= 1
                    print numerical_splits_count
                    print depth
                    root.children.append(ID3(subsets[0], attribute_metadata, numerical_splits_count, depth))
                    root.children.append(ID3(subsets[1], attribute_metadata, numerical_splits_count, depth)) 
    return root
开发者ID:3011204077,项目名称:ml-project,代码行数:47,代码来源:ID3.py

示例12: copy_node

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import decision_attribute [as 别名]
def copy_node(node):
    new_node = Node()
    new_node.label = node.label
    new_node.decision_attribute = node.decision_attribute
    new_node.is_nominal = node.is_nominal
    new_node.value = node.value
    new_node.splitting_value = node.splitting_value

    if node.is_nominal:
        new_node.children = {}
        for key in node.children:
            new_node.children[key] = copy_node(node.children[key])
    else:
        new_node.children = []
        for i in range(len(node.children)):
            new_node.children.append(copy_node(node.children[i]))
    new_node.name = node.name

    return new_node
开发者ID:obiorahm,项目名称:PS2code,代码行数:21,代码来源:pruning.py

示例13: helper

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import decision_attribute [as 别名]
def helper(data_set, attribute_metadata, numerical_splits_count, depth):
    root = Node()
    root.name = 'default'
    if len(data_set) == 0 :
        return root
    else :
        if check_homogenous(data_set) != None :
            root.label = check_homogenous(data_set)
            return root
        else :
            if  len(attribute_metadata) == 1 or depth == 0 :
                root.label = mode(data_set)
                return root
            else :
                best_attribute = pick_best_attribute(data_set, attribute_metadata , numerical_splits_count)
                if best_attribute[0] == False :
                    root.label = mode(data_set)
                    return root


                else :
                    root.name = attribute_metadata[best_attribute[0]]['name']

                    root.decision_attribute = best_attribute[0]

                    if best_attribute[1] == False : # dictionary

                        root.is_nominal = None
                        temp_dict = split_on_nominal(data_set,best_attribute[0])
                        depth -= 1
                        for key in temp_dict.keys():
                            root.children[key] = helper(temp_dict[key],attribute_metadata,numerical_splits_count,depth)
                    else :
                        numerical_splits_count[best_attribute[0]] -= 1

                        root.is_nominal = best_attribute[1]
                        root.splitting_value = best_attribute[1]
                        temp_tuple = split_on_numerical(data_set,best_attribute[0] , best_attribute[1])
                        depth -= 1
                        root.children[0] = (helper(temp_tuple[0] ,attribute_metadata,numerical_splits_count,depth))
                        root.children[1] = (helper(temp_tuple[1] ,attribute_metadata,numerical_splits_count,depth))
                    return root
开发者ID:Jiawen-Ou,项目名称:Machine-Learning---Decision-Tree,代码行数:44,代码来源:ID3.py

示例14: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import decision_attribute [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    
    n = Node()
    if data_set == None or len(data_set) == 0:
        # print "reach state where data is empty"
        n.label = 1
        return n
    
    homo = check_homogenous(data_set)
    if homo != None:
        n.label = homo
        return n
    
    if depth == 0:
        n.label = mode (data_set)
        return n

    (best, split) = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)

    if best == False:
        n.label = mode (data_set)
        return n

    n.label = None
    n.decision_attribute = best
    n.name = attribute_metadata[best]['name']
    n.splitting_value = split
    # Numeric case

    if split != False:      
        n.is_nominal = False
        
        leftData, rightData = split_on_numerical (data_set, best, split)

        numerical_splits_count[best] -= 1

        
        leftTree = ID3(leftData, attribute_metadata, numerical_splits_count, depth-1)
        rightTree = ID3(rightData, attribute_metadata, numerical_splits_count, depth-1)
        n.children = []


        
        n.children.append(leftTree)
        n.children.append(rightTree)

        return n
        
    else:
        n.is_nominal = True
        dsets = split_on_nominal (data_set, best)
        n.children = {}
        for key, val in dsets.iteritems():
            newTree = ID3(val, attribute_metadata, numerical_splits_count, depth - 1)
            if newTree != None:
                n.children[key] = newTree
        return n
    pass
开发者ID:RifleZhang,项目名称:eecs349,代码行数:72,代码来源:ID3.py

示例15: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import decision_attribute [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
    maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    if not data_set:
        return Node()
    elif check_homogenous(data_set) != None:
        n = Node()
        n.label = check_homogenous(data_set)
        return n
    elif not attribute_metadata:
        n = Node()
        n.label = mode(data_set)
        return n
    elif depth == 0:
        n = Node()
        n.label = mode(data_set)
        return n
    else:
        best, split_value = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
        #Find mode of best attribute column
        best_data = []
        for sublist in data_set:
            if sublist[best] != None:
                best_data.append(sublist[best])
        best_mode = max(set(best_data), key=best_data.count)
        #Replace missing values of best attribute column with mode
        data_copy = copy.deepcopy(data_set)
        for row in data_copy:
            if row[best] == None:
                row[best] = best_mode
        if attribute_metadata[best]['is_nominal'] == False:
            numerical_splits_count[best] -= 1
        if best == False:
            n = Node()
            n.label = mode(data_set)
            return n
        tree = Node() #the root 
        tree.is_nominal = attribute_metadata[best]['is_nominal']
        tree.decision_attribute = best
        tree.splitting_value = split_value
        tree.name = attribute_metadata[best]['name']
        tree.value = best_mode
        data_sub = []
        #if a nominal attribute
        if attribute_metadata[best]['is_nominal'] == True:
            best_attributes_dict = split_on_nominal(data_copy, best)
            for v in best_attributes_dict:
                subtree = ID3(best_attributes_dict[v], attribute_metadata, numerical_splits_count, depth - 1)
                tree.children[v] = subtree #adding branch to the tree
        #if numerical attribute
        else:
            splits = split_on_numerical(data_copy, best, split_value)
            for v in splits:
                subtree = ID3(v, attribute_metadata, numerical_splits_count, depth - 1)
                tree.children[splits.index(v)] = subtree #adding branch to the tree
        return tree
开发者ID:olivergoodman,项目名称:idthree,代码行数:68,代码来源:ID3.py


注:本文中的node.Node.decision_attribute方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。