当前位置: 首页>>代码示例>>Python>>正文


Python Node.is_nominal方法代码示例

本文整理汇总了Python中node.Node.is_nominal方法的典型用法代码示例。如果您正苦于以下问题:Python Node.is_nominal方法的具体用法?Python Node.is_nominal怎么用?Python Node.is_nominal使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在node.Node的用法示例。


在下文中一共展示了Node.is_nominal方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: more_tests

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def more_tests():
    n2 = Node()
    n2.name = "attrib2"
    n2.label = 1

    n3 = Node()
    n3.name = "attrib3"
    n3.label = 0 
    
    n0 = Node()
    n0.name = "attrib0"
    n0.is_nominal = True
    n0.children = {1: n2, 2: n3}

    n4 = Node()
    n4.name = "attrib4"
    n4.label = 2

    n5 = Node()
    n5.name = "attrib5"
    n5.label = 3
    
    n1 = Node()
    n1.name = "attrib1"
    n1.is_nominal = True
    n1.children = {1: n4, 2: n5}

    n = Node()
    n.label = None
    n.decision_attribute = 1
    n.is_nominal = True
    n.name = "attrib"
    n.children = {1: n0, 2: n1}
    print n.print_dnf_tree()
    print n.print_tree()
    print breadth_first_search(n, [n])

    attribute_metadata = [{'name': "winner",'is_nominal': True},{'name': "opprundifferential",'is_nominal': False}]
    data_set = [[1, 0.27], [0, 0.42], [0, 0.86], [0, 0.68], [0, 0.04], [1, 0.01], [1, 0.33], [1, 0.42], [1, 0.42], [0, 0.51], [1, 0.4]]
    numerical_splits_count = [5, 5]
    n = ID3(data_set, attribute_metadata, numerical_splits_count, 0)
    print validation_accuracy(n,data_set)

    numerical_splits_count = [1, 1]
    n = ID3(data_set, attribute_metadata, numerical_splits_count, 5)
    print validation_accuracy(n,data_set)

    numerical_splits_count = [5, 5]
    n = ID3(data_set, attribute_metadata, numerical_splits_count, 5)
    print validation_accuracy(n,data_set)
    
    print n.print_tree()
    n = reduced_error_pruning(n,data_set,[[1, 0.11], [0, 0.42], [0, 0.86], [0, 0.55], [0, 0.66], [1, 0.01], [1, 0.11], [1, 0.84], [1, 0.42], [0, 0.51], [1, 0.4]])
    print n.print_tree()
    return n
开发者ID:obiorahm,项目名称:PS2code,代码行数:57,代码来源:pruning.py

示例2: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================
    '''
    # Your code here
    print depth
    Dtree = Node()
    if len(data_set) == 0:
        return Dtree
    c = check_homogenous([[element[0]] for element in data_set])
    if isinstance(c,int):
         Dtree.label = c
         return Dtree
    elif len(data_set[0]) == 1 or depth <= 0 or [0]*(len(numerical_splits_count)-1) == numerical_splits_count[1:]:
         Dtree.label = mode(data_set)
         return Dtree
    else:
         data_set = missingValues(data_set)
         best_attribute,threshold = pick_best_attribute(data_set,attribute_metadata,numerical_splits_count)
         if not(best_attribute):
             Dtree.label = mode(data_set)
             return Dtree
         
         Dtree.decision_attribute = best_attribute
         Dtree.modeVal = mode([[element[Dtree.decision_attribute]] for element in data_set])
         Dtree.name = attribute_metadata[best_attribute]['name']
         if threshold:
             Dtree.is_nominal = False
             Dtree.splitting_value = threshold
             less,greater = split_on_numerical(data_set,best_attribute,threshold)
             new_nsc = numerical_splits_count
             new_nsc[best_attribute] -= 1
             Dtree.children = [ID3(less,attribute_metadata,new_nsc,depth-1),ID3(greater,attribute_metadata,new_nsc,depth-1)]
         else:
             Dtree.is_nominal = True
             n_dict = split_on_nominal(data_set,best_attribute)
             new_attribute_metadata = attribute_metadata
             new_attribute_metadata.pop(best_attribute)
             #try:
             Dtree.children = [ID3(removeAttribute(value,best_attribute),new_attribute_metadata,numerical_splits_count,depth-1) for key,value in n_dict.iteritems()]
             #except AttributeError:
              #   print n_dict
               #  print best_attribute
                # print threshold
                 #raise Exception("wut")
    return Dtree       
    pass
开发者ID:AnujIravane,项目名称:349,代码行数:57,代码来源:ID3.py

示例3: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
    maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    node = Node() # new node
    entropy_bound = 0.15 # entropy of data_set must be below bound to become a leaf
    pick_best = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count) # tuple
    best_attribute = pick_best[0] # best attribute to split on
    split_value = pick_best[1] # best value to split on
    if entropy(data_set) < entropy_bound or depth == 0 or best_attribute == False: 
        node.label = mode(data_set)
        return node
    if split_value is not False: # if there is a split value (best attribute is numeric)
        split_data = split_on_numerical(data_set, best_attribute, split_value) # splitting data by split value (lesser, greater)
        node.is_nominal = False # node is numeric
        node.splitting_value = split_value # best value to split on
        node.children[0] = ID3(split_data[0], attribute_metadata, numerical_splits_count, depth - 1) # less than split value
        node.children[1] = ID3(split_data[1], attribute_metadata, numerical_splits_count, depth - 1) # greater than split value
        node.name = attribute_metadata[best_attribute]['name']
        node.decision_attribute = best_attribute # best attribute to split on
    else: # best_attribute is nominal
        split_data = split_on_nominal(data_set, best_attribute) # returns a dictionary with nominal attributes as keys
        node.is_nominal = True # node is nominal
        split_data_copy = deepcopy(split_data) # deep copy split_data
        ### filling in missing data ###
        for key in split_data_copy.keys():
            if key is None: 
                # find most common attribute and add the missing attribute data into the most common attribute
                greatest_length = -1
                mode_att = None
                for att, data in split_data_copy.iteritems():
                    if len(data) > greatest_length:
                        greatest_length = len(data)
                        mode_att = att
                for data in split_data_copy[key]:
                    split_data_copy[mode_att].append(data) # adds all the None data into the mode attribute 
                split_data_copy.pop(key, None) # removes the None attribute data
        # add a children for each nominal attribute
        for key in split_data_copy: 
            node.children[key] = ID3(split_data_copy[key], attribute_metadata, numerical_splits_count, depth - 1)
        node.name = attribute_metadata[best_attribute]['name']
        node.decision_attribute = best_attribute
    # print node.children
    return node
开发者ID:brianzhan,项目名称:EECS349Psets,代码行数:55,代码来源:ID3.py

示例4: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    preprocessing(data_set, attribute_metadata)
    if check_homogenous(data_set) != None:
        ans = Node()
        ans.label = check_homogenous(data_set)
    elif depth == 0:
            ans = Node()
            ans.label = mode(data_set)
    else:
        best = pick_best_attribute(data_set, attribute_metadata, 
            numerical_splits_count)
        if best[0] == False:
            ans = Node()
            ans.label = mode(data_set)
        else:
            ans = Node()
            ans.decision_attribute = best[0]
            ans.name = attribute_metadata[best[0]]['name']
            depth -= 1
            if str(best[1]) == 'False':
                ans.is_nominal = True
                ans.children = {}
                divide = split_on_nominal(data_set, best[0])
                for x in divide.keys():
                    ans.children[x] = ID3(divide[x], attribute_metadata, 
                        numerical_splits_count, depth)
            else:
                ans.is_nominal = False
                ans.children = []
                ans.splitting_value = best[1]
                divide = split_on_numerical(data_set, best[0], best[1])
                ans.children.append(ID3(divide[0], attribute_metadata, 
                    numerical_splits_count, depth))
                ans.children.append(ID3(divide[1], attribute_metadata, 
                    numerical_splits_count, depth)) 
    return ans
开发者ID:HERMANNITY,项目名称:EECS349,代码行数:50,代码来源:ID3.py

示例5: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    
    root = Node()
    homogenous = check_homogenous(data_set)
    if homogenous!= None:
        root.label = homogenous
        return root
        
    if depth == 0  or len(data_set)==0 or len(attribute_metadata)<=1:
        root.label = mode(data_set)
        return root
        
    best_att, best_split = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
    if(numerical_splits_count[best_att]==0):
        root.label = mode(data_set)
        return root
        

    if best_att == False:
        root.label = mode(data_set)
        return root
        
    root.decision_attribute = best_att
    root.splitting_value = best_split
    root.name = attribute_metadata[best_att]['name']
    root.is_nominal = attribute_metadata[best_att]['is_nominal']
    if(root.is_nominal):
        examples = {}
        
        for k, val in split_on_nominal(data_set, best_att).items():
            if is_missing(val, best_att):
                val = replace_missing(val, best_att)
            examples[k] = ID3(val, attribute_metadata, numerical_splits_count, depth-1)
        root.children = examples
    else:
        root.children = []
        examples = [0,0]
        first_split, second_split = split_on_numerical(data_set, best_att, best_split)
        if is_missing(first_split, best_att):
            first_split= replace_missing(first_split, best_att)
        if is_missing(second_split, best_att):
            second_split = replace_missing(second_split, best_att)
        numerical_splits_count[best_att] -= 1
        examples[0] = ID3(first_split, attribute_metadata, numerical_splits_count, depth-1)
        examples[1] = ID3(second_split, attribute_metadata, numerical_splits_count, depth-1)
        root.children.append(examples[0])
        root.children.append(examples[1])
    return root
开发者ID:jlowrie,项目名称:Problem-Set-2,代码行数:62,代码来源:ID3.py

示例6: ID3_recursive

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3_recursive(data_set, attribute_metadata, numerical_splits_count, depth, attribute_modes_dict):
    if depth == 0 or check_homogenous(data_set) is not None or len(attribute_metadata) == 0:
        return default_node(data_set)    
    else:
        (best_attribute, split_value) = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
        if best_attribute == False:
            return default_node(data_set)
        
        node = Node()
        node.decision_attribute = best_attribute
        node.name = attribute_metadata[best_attribute]['name']
        node.is_nominal = attribute_metadata[best_attribute]['is_nominal']
        node.value = attribute_modes_dict[best_attribute]
        updated_numerical_splits_count = copy.deepcopy(numerical_splits_count)
        updated_numerical_splits_count[best_attribute] -= 1

        if node.is_nominal:
            examples = split_on_nominal(data_set, best_attribute)
            for key, values in examples.items():
                node.children[key] = ID3_recursive(values, attribute_metadata, updated_numerical_splits_count, depth - 1, attribute_modes_dict)
        else:
            node.splitting_value = split_value
            (less, greater_or_equal) = split_on_numerical(data_set, best_attribute, split_value)
            node.children[0] = ID3_recursive(less, attribute_metadata, updated_numerical_splits_count, depth - 1, attribute_modes_dict)
            node.children[1] = ID3_recursive(greater_or_equal, attribute_metadata, updated_numerical_splits_count, depth - 1, attribute_modes_dict)
        
        return node
开发者ID:benmel,项目名称:ml_ps2,代码行数:29,代码来源:ID3.py

示例7: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    # Your code here

    n = Node()
    n.mode = mode(data_set)
    label = check_homogenous(data_set)

    if label is not None:
        n.label = label
        return n

    elif depth == 0:
        n.label = mode(data_set)
        return n

    else:
        best, sv = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)

        if not best:
            n.label = mode(data_set)
            return n

        n.decision_attribute = best
        n.splitting_value = sv
        n.name = attribute_metadata[best]['name']

        #numeric
        if n.splitting_value:
            m = split_on_numerical(data_set, best, n.splitting_value)
            numerical_splits_count[best] = numerical_splits_count[best] - 1
            if not m[0] or not m[1]:
                n.label = mode(data_set)
            else:
                n_small = ID3(m[0], attribute_metadata, numerical_splits_count, depth-1)
                n_big = ID3(m[1], attribute_metadata, numerical_splits_count, depth-1)
                n.children = [n_small, n_big]

        #nominal
        else:
            n.is_nominal = True
            m = split_on_nominal(data_set, best)
            for k,v in m.items():
                if m[k]:
                    n_curr = ID3(m[k], attribute_metadata, numerical_splits_count, depth-1)
                    if n_curr.decision_attribute != n.decision_attribute:
                        n.children[k] = n_curr
        return n
开发者ID:jinhoonbang,项目名称:decision_tree,代码行数:61,代码来源:ID3.py

示例8: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    preprocessing(data_set, attribute_metadata)
    if check_homogenous(data_set) != None:
        root = Node()
        root.label = check_homogenous(data_set)
    else: 
        if depth == 0:
            root = Node()
            root.label = mode(data_set)
        else:
            best = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
            if best[0] == False:
                root = Node()
                root.label = mode(data_set)
            else:
                root = Node()
                root.decision_attribute = best[0]
                root.name = attribute_metadata[best[0]]['name']
                depth -= 1
                if str(best[1]) == 'False':
                    root.is_nominal = True
                    root.children = {}
                    subsets = split_on_nominal(data_set, best[0])
                    for splitval in subsets.keys():
                        root.children[splitval] = ID3(subsets[splitval], attribute_metadata, numerical_splits_count, depth)
                else:
                    root.is_nominal = False
                    root.children = []
                    root.splitting_value = best[1]
                    subsets = split_on_numerical(data_set, best[0], best[1])
                    #numerical_splits_count[best[0]] -= 1
                    print numerical_splits_count
                    print depth
                    root.children.append(ID3(subsets[0], attribute_metadata, numerical_splits_count, depth))
                    root.children.append(ID3(subsets[1], attribute_metadata, numerical_splits_count, depth)) 
    return root
开发者ID:3011204077,项目名称:ml-project,代码行数:47,代码来源:ID3.py

示例9: helper

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def helper(data_set, attribute_metadata, numerical_splits_count, depth):
    root = Node()
    root.name = 'default'
    if len(data_set) == 0 :
        return root
    else :
        if check_homogenous(data_set) != None :
            root.label = check_homogenous(data_set)
            return root
        else :
            if  len(attribute_metadata) == 1 or depth == 0 :
                root.label = mode(data_set)
                return root
            else :
                best_attribute = pick_best_attribute(data_set, attribute_metadata , numerical_splits_count)
                if best_attribute[0] == False :
                    root.label = mode(data_set)
                    return root


                else :
                    root.name = attribute_metadata[best_attribute[0]]['name']

                    root.decision_attribute = best_attribute[0]

                    if best_attribute[1] == False : # dictionary

                        root.is_nominal = None
                        temp_dict = split_on_nominal(data_set,best_attribute[0])
                        depth -= 1
                        for key in temp_dict.keys():
                            root.children[key] = helper(temp_dict[key],attribute_metadata,numerical_splits_count,depth)
                    else :
                        numerical_splits_count[best_attribute[0]] -= 1

                        root.is_nominal = best_attribute[1]
                        root.splitting_value = best_attribute[1]
                        temp_tuple = split_on_numerical(data_set,best_attribute[0] , best_attribute[1])
                        depth -= 1
                        root.children[0] = (helper(temp_tuple[0] ,attribute_metadata,numerical_splits_count,depth))
                        root.children[1] = (helper(temp_tuple[1] ,attribute_metadata,numerical_splits_count,depth))
                    return root
开发者ID:Jiawen-Ou,项目名称:Machine-Learning---Decision-Tree,代码行数:44,代码来源:ID3.py

示例10: test_breadth_first_search

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def test_breadth_first_search():
    n0 = Node()
    n0.label = 1
    n1 = Node()
    n1.label = 0
    n = Node()
    n.label = None
    n.decision_attribute = 1
    n.is_nominal = True
    n.name = "whatever"
    n.children = {1: n0, 2: n1}
    print n.print_tree()
    print breadth_first_search(n)
    return n
开发者ID:obiorahm,项目名称:PS2code,代码行数:16,代码来源:pruning.py

示例11: ID3_helper

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3_helper(data_set, attribute_metadata, numerical_splits_count, depth, nominal_keys):
    att = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
    print "before"
    # print attribute_metadata
    # print numerical_splits_count
    print att
    print "after"
    if depth == 0 or att[0] == False:  #depth or gain ratio is 0
        d = Node()
        default = mode(data_set)
        d.label = default
        return d
    elif check_homogenous(data_set) is not None:
        d = Node()
        d.label = check_homogenous(data_set)
        return d
    else:  #how to recursion
        root = Node()
        # att = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
        # if att[0] != False:
        root.label = None
        root.decision_attribute = att[0]
        root.name = attribute_metadata[att[0]].get('name')
        root.is_nominal = attribute_metadata[att[0]].get('is_nominal')
        if root.is_nominal == False:
            numerical_splits_count[att[0]] -= 1
            root.splitting_value = att[1]
            root.children = []
            left_dataset = []
            right_dataset = []
            for i in xrange(len(data_set)):
                if data_set[i][att[0]] < att[1]:
                    left_dataset.append(data_set[i])
                else:
                    right_dataset.append(data_set[i])
            depth = depth - 1
            root.children.append(ID3_helper(left_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys))
            root.children.append(ID3_helper(right_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys))
        else:
            root.children = {}
            for key in nominal_keys[att[0]]:
                chile_dataset = []
                for i in xrange(len(data_set)):
                    if data_set[i][att[0]] == key:
                        chile_dataset.append(data_set[i])
                child = ID3_helper(chile_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys)
                root.children.update({key: child})
        return root
开发者ID:SixuanYu,项目名称:ps2,代码行数:50,代码来源:ID3.py

示例12: copy_node

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def copy_node(node):
    new_node = Node()
    new_node.label = node.label
    new_node.decision_attribute = node.decision_attribute
    new_node.is_nominal = node.is_nominal
    new_node.value = node.value
    new_node.splitting_value = node.splitting_value

    if node.is_nominal:
        new_node.children = {}
        for key in node.children:
            new_node.children[key] = copy_node(node.children[key])
    else:
        new_node.children = []
        for i in range(len(node.children)):
            new_node.children.append(copy_node(node.children[i]))
    new_node.name = node.name

    return new_node
开发者ID:obiorahm,项目名称:PS2code,代码行数:21,代码来源:pruning.py

示例13: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    data_set = copy.deepcopy(data_set)
    num_splits = copy.deepcopy(numerical_splits_count)
    node = Node()
    Default = 0
    best_value = []

    if not data_set:
        return "Empty_data"
    elif depth == 0:
        node.label = mode(data_set)
        return node
    elif check_homogenous(data_set) != None:        
        node.label = check_homogenous(data_set)       
        return node  
    elif len(data_set[0]) == 1:
        node.label = mode(data_set)
        return node                      
    elif len(attribute_metadata) == 0:  
        node.label = mode(data_set)
        return node
    else:
        node.label = None
        #print num_splits
        
        (best_attrnumber,split_value) = pick_best_attribute(data_set, attribute_metadata,num_splits)
        node.decision_attribute = best_attrnumber
        node.splitting_value = split_value
        node.name = attribute_metadata[best_attrnumber].values()[1]
        #print node.decision_attribute
        if attribute_metadata[best_attrnumber].values()[0]:
            node.is_nominal = True
            node.splitting_value = None
            examples = split_on_nominal(data_set, best_attrnumber)
        else:
            node.is_nominal = False
            node.splitting_value = split_value
            examples = split_on_numerical(data_set, best_attrnumber,split_value)
            num_splits[best_attrnumber] = num_splits[best_attrnumber] - 1 
        if num_splits[best_attrnumber] == 0:
            del attribute_metadata[best_attrnumber] 
        if node.is_nominal == True: 
            for v in examples.keys():
                if num_splits[best_attrnumber] <= 0:  
                    for i in range(len(examples[v])):
                        del examples[v][i][best_attrnumber]
                node.children[v] = ID3(examples[v], attribute_metadata, num_splits, depth-1)
            return node
        else:
            for i in range(len(examples)): 
                if num_splits[best_attrnumber] <= 0:                            
                    for j in range(len(examples[i])):
                        del examples[i][j][best_attrnumber]
                node.children[i] = ID3(examples[i], attribute_metadata, num_splits, depth-1)
            return node   
开发者ID:britlovefan,项目名称:DecisionTree,代码行数:69,代码来源:ID3.py

示例14: collect_node_leaf

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
                collect_node_leaf(root.children[child], nodes, leaves)


a0 = Node()
b0 = Node()
b1 = Node()
c0 = Node()
c1 = Node()
c2 = Node()
c3 = Node()
c0.label = 0
c1.label = 1
c2.label = 0
c3.label = 1
a0.name = "weather"
a0.is_nominal = True
a0.label = None
b0.name = "#injury"
b0.label = None
b0.is_nominal = False
b0.splitting_value = 50
b0.children = [c0, c1]
b1.name = "#audience"
b1.label = None
b1.is_nominal = False
b1.splitting_value = 20.9
b1.children = [c0, c1]
a0.children = {1: b0, -1: b1}

nodes = []
leaves = []
开发者ID:SixuanYu,项目名称:ps2,代码行数:33,代码来源:pruning.py

示例15: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import is_nominal [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================
    '''
    # Your code here

    # decision tree to be returned
    node = Node()

    # base case
    theta = 0.0 # threshold of entropy
    if not data_set:
        node.label = '?'
        return node
    elif depth == 0:
        node.label = mode(data_set)
        return node
    elif check_homogenous(data_set):
        node.label = data_set[0][0]
        return node
    # no attributes to split
    elif numerical_splits_count[1:] == [0] * (len(numerical_splits_count) - 1):
        node.label = mode(data_set)
        return node
    elif entropy(data_set) == theta:
        node.label = mode(data_set)
        return node

    # split on best attribute
    splitting_attr, splitting_value = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)

    # avoid pass by reference error
    numerical_splits_count = list(numerical_splits_count)
    numerical_splits_count[splitting_attr] -= 1

    # describe the node
    node.decision_attribute = splitting_attr
    node.is_nominal = attribute_metadata[splitting_attr]['is_nominal']
    node.splitting_value = splitting_value
    node.name = attribute_metadata[splitting_attr]['name']
    node.value = mode(data_set) # value store mode of non-leaf node

    # if is nominal
    if node.is_nominal:
        # put data in data_set into different branches
        branches = {}
        for data in data_set:
            if data[splitting_attr] not in branches:
                branches[data[splitting_attr]] = []
            branches[data[splitting_attr]].append(data)
        for attr, sub_data_set in branches.items():
            node.children[attr] = ID3(sub_data_set, attribute_metadata, numerical_splits_count, depth - 1)
    # else is numeric
    else:
        left_sub_data_set = []
        right_sub_data_set = []
        for data in data_set:
            if data[splitting_attr] < splitting_value:
                left_sub_data_set.append(data)
            else:
                right_sub_data_set.append(data)
        node.children = []
        node.children.append(ID3(left_sub_data_set, attribute_metadata, numerical_splits_count, depth - 1))
        if node.children[0].label == '?':
            node.children[0].label = mode(data_set)
        node.children.append(ID3(right_sub_data_set, attribute_metadata, numerical_splits_count, depth - 1))
        if node.children[1].label == '?':
            node.children[1].label = mode(data_set)

    # return the generated tree
    return node
开发者ID:SHvsMK,项目名称:Decision_Tree,代码行数:81,代码来源:ID3.py


注:本文中的node.Node.is_nominal方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。