当前位置: 首页>>代码示例>>Python>>正文


Python Node.splitting_value方法代码示例

本文整理汇总了Python中node.Node.splitting_value方法的典型用法代码示例。如果您正苦于以下问题:Python Node.splitting_value方法的具体用法?Python Node.splitting_value怎么用?Python Node.splitting_value使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在node.Node的用法示例。


在下文中一共展示了Node.splitting_value方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    
    root = Node()
    homogenous = check_homogenous(data_set)
    if homogenous!= None:
        root.label = homogenous
        return root
        
    if depth == 0  or len(data_set)==0 or len(attribute_metadata)<=1:
        root.label = mode(data_set)
        return root
        
    best_att, best_split = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
    if(numerical_splits_count[best_att]==0):
        root.label = mode(data_set)
        return root
        

    if best_att == False:
        root.label = mode(data_set)
        return root
        
    root.decision_attribute = best_att
    root.splitting_value = best_split
    root.name = attribute_metadata[best_att]['name']
    root.is_nominal = attribute_metadata[best_att]['is_nominal']
    if(root.is_nominal):
        examples = {}
        
        for k, val in split_on_nominal(data_set, best_att).items():
            if is_missing(val, best_att):
                val = replace_missing(val, best_att)
            examples[k] = ID3(val, attribute_metadata, numerical_splits_count, depth-1)
        root.children = examples
    else:
        root.children = []
        examples = [0,0]
        first_split, second_split = split_on_numerical(data_set, best_att, best_split)
        if is_missing(first_split, best_att):
            first_split= replace_missing(first_split, best_att)
        if is_missing(second_split, best_att):
            second_split = replace_missing(second_split, best_att)
        numerical_splits_count[best_att] -= 1
        examples[0] = ID3(first_split, attribute_metadata, numerical_splits_count, depth-1)
        examples[1] = ID3(second_split, attribute_metadata, numerical_splits_count, depth-1)
        root.children.append(examples[0])
        root.children.append(examples[1])
    return root
开发者ID:jlowrie,项目名称:Problem-Set-2,代码行数:62,代码来源:ID3.py

示例2: ID3_recursive

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3_recursive(data_set, attribute_metadata, numerical_splits_count, depth, attribute_modes_dict):
    if depth == 0 or check_homogenous(data_set) is not None or len(attribute_metadata) == 0:
        return default_node(data_set)    
    else:
        (best_attribute, split_value) = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
        if best_attribute == False:
            return default_node(data_set)
        
        node = Node()
        node.decision_attribute = best_attribute
        node.name = attribute_metadata[best_attribute]['name']
        node.is_nominal = attribute_metadata[best_attribute]['is_nominal']
        node.value = attribute_modes_dict[best_attribute]
        updated_numerical_splits_count = copy.deepcopy(numerical_splits_count)
        updated_numerical_splits_count[best_attribute] -= 1

        if node.is_nominal:
            examples = split_on_nominal(data_set, best_attribute)
            for key, values in examples.items():
                node.children[key] = ID3_recursive(values, attribute_metadata, updated_numerical_splits_count, depth - 1, attribute_modes_dict)
        else:
            node.splitting_value = split_value
            (less, greater_or_equal) = split_on_numerical(data_set, best_attribute, split_value)
            node.children[0] = ID3_recursive(less, attribute_metadata, updated_numerical_splits_count, depth - 1, attribute_modes_dict)
            node.children[1] = ID3_recursive(greater_or_equal, attribute_metadata, updated_numerical_splits_count, depth - 1, attribute_modes_dict)
        
        return node
开发者ID:benmel,项目名称:ml_ps2,代码行数:29,代码来源:ID3.py

示例3: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    # Your code here

    n = Node()
    n.mode = mode(data_set)
    label = check_homogenous(data_set)

    if label is not None:
        n.label = label
        return n

    elif depth == 0:
        n.label = mode(data_set)
        return n

    else:
        best, sv = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)

        if not best:
            n.label = mode(data_set)
            return n

        n.decision_attribute = best
        n.splitting_value = sv
        n.name = attribute_metadata[best]['name']

        #numeric
        if n.splitting_value:
            m = split_on_numerical(data_set, best, n.splitting_value)
            numerical_splits_count[best] = numerical_splits_count[best] - 1
            if not m[0] or not m[1]:
                n.label = mode(data_set)
            else:
                n_small = ID3(m[0], attribute_metadata, numerical_splits_count, depth-1)
                n_big = ID3(m[1], attribute_metadata, numerical_splits_count, depth-1)
                n.children = [n_small, n_big]

        #nominal
        else:
            n.is_nominal = True
            m = split_on_nominal(data_set, best)
            for k,v in m.items():
                if m[k]:
                    n_curr = ID3(m[k], attribute_metadata, numerical_splits_count, depth-1)
                    if n_curr.decision_attribute != n.decision_attribute:
                        n.children[k] = n_curr
        return n
开发者ID:jinhoonbang,项目名称:decision_tree,代码行数:61,代码来源:ID3.py

示例4: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================
    '''
    # Your code here
    print depth
    Dtree = Node()
    if len(data_set) == 0:
        return Dtree
    c = check_homogenous([[element[0]] for element in data_set])
    if isinstance(c,int):
         Dtree.label = c
         return Dtree
    elif len(data_set[0]) == 1 or depth <= 0 or [0]*(len(numerical_splits_count)-1) == numerical_splits_count[1:]:
         Dtree.label = mode(data_set)
         return Dtree
    else:
         data_set = missingValues(data_set)
         best_attribute,threshold = pick_best_attribute(data_set,attribute_metadata,numerical_splits_count)
         if not(best_attribute):
             Dtree.label = mode(data_set)
             return Dtree
         
         Dtree.decision_attribute = best_attribute
         Dtree.modeVal = mode([[element[Dtree.decision_attribute]] for element in data_set])
         Dtree.name = attribute_metadata[best_attribute]['name']
         if threshold:
             Dtree.is_nominal = False
             Dtree.splitting_value = threshold
             less,greater = split_on_numerical(data_set,best_attribute,threshold)
             new_nsc = numerical_splits_count
             new_nsc[best_attribute] -= 1
             Dtree.children = [ID3(less,attribute_metadata,new_nsc,depth-1),ID3(greater,attribute_metadata,new_nsc,depth-1)]
         else:
             Dtree.is_nominal = True
             n_dict = split_on_nominal(data_set,best_attribute)
             new_attribute_metadata = attribute_metadata
             new_attribute_metadata.pop(best_attribute)
             #try:
             Dtree.children = [ID3(removeAttribute(value,best_attribute),new_attribute_metadata,numerical_splits_count,depth-1) for key,value in n_dict.iteritems()]
             #except AttributeError:
              #   print n_dict
               #  print best_attribute
                # print threshold
                 #raise Exception("wut")
    return Dtree       
    pass
开发者ID:AnujIravane,项目名称:349,代码行数:57,代码来源:ID3.py

示例5: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
    maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    node = Node() # new node
    entropy_bound = 0.15 # entropy of data_set must be below bound to become a leaf
    pick_best = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count) # tuple
    best_attribute = pick_best[0] # best attribute to split on
    split_value = pick_best[1] # best value to split on
    if entropy(data_set) < entropy_bound or depth == 0 or best_attribute == False: 
        node.label = mode(data_set)
        return node
    if split_value is not False: # if there is a split value (best attribute is numeric)
        split_data = split_on_numerical(data_set, best_attribute, split_value) # splitting data by split value (lesser, greater)
        node.is_nominal = False # node is numeric
        node.splitting_value = split_value # best value to split on
        node.children[0] = ID3(split_data[0], attribute_metadata, numerical_splits_count, depth - 1) # less than split value
        node.children[1] = ID3(split_data[1], attribute_metadata, numerical_splits_count, depth - 1) # greater than split value
        node.name = attribute_metadata[best_attribute]['name']
        node.decision_attribute = best_attribute # best attribute to split on
    else: # best_attribute is nominal
        split_data = split_on_nominal(data_set, best_attribute) # returns a dictionary with nominal attributes as keys
        node.is_nominal = True # node is nominal
        split_data_copy = deepcopy(split_data) # deep copy split_data
        ### filling in missing data ###
        for key in split_data_copy.keys():
            if key is None: 
                # find most common attribute and add the missing attribute data into the most common attribute
                greatest_length = -1
                mode_att = None
                for att, data in split_data_copy.iteritems():
                    if len(data) > greatest_length:
                        greatest_length = len(data)
                        mode_att = att
                for data in split_data_copy[key]:
                    split_data_copy[mode_att].append(data) # adds all the None data into the mode attribute 
                split_data_copy.pop(key, None) # removes the None attribute data
        # add a children for each nominal attribute
        for key in split_data_copy: 
            node.children[key] = ID3(split_data_copy[key], attribute_metadata, numerical_splits_count, depth - 1)
        node.name = attribute_metadata[best_attribute]['name']
        node.decision_attribute = best_attribute
    # print node.children
    return node
开发者ID:brianzhan,项目名称:EECS349Psets,代码行数:55,代码来源:ID3.py

示例6: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    preprocessing(data_set, attribute_metadata)
    if check_homogenous(data_set) != None:
        ans = Node()
        ans.label = check_homogenous(data_set)
    elif depth == 0:
            ans = Node()
            ans.label = mode(data_set)
    else:
        best = pick_best_attribute(data_set, attribute_metadata, 
            numerical_splits_count)
        if best[0] == False:
            ans = Node()
            ans.label = mode(data_set)
        else:
            ans = Node()
            ans.decision_attribute = best[0]
            ans.name = attribute_metadata[best[0]]['name']
            depth -= 1
            if str(best[1]) == 'False':
                ans.is_nominal = True
                ans.children = {}
                divide = split_on_nominal(data_set, best[0])
                for x in divide.keys():
                    ans.children[x] = ID3(divide[x], attribute_metadata, 
                        numerical_splits_count, depth)
            else:
                ans.is_nominal = False
                ans.children = []
                ans.splitting_value = best[1]
                divide = split_on_numerical(data_set, best[0], best[1])
                ans.children.append(ID3(divide[0], attribute_metadata, 
                    numerical_splits_count, depth))
                ans.children.append(ID3(divide[1], attribute_metadata, 
                    numerical_splits_count, depth)) 
    return ans
开发者ID:HERMANNITY,项目名称:EECS349,代码行数:50,代码来源:ID3.py

示例7: ID3_helper

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3_helper(data_set, attribute_metadata, numerical_splits_count, depth, nominal_keys):
    att = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
    print "before"
    # print attribute_metadata
    # print numerical_splits_count
    print att
    print "after"
    if depth == 0 or att[0] == False:  #depth or gain ratio is 0
        d = Node()
        default = mode(data_set)
        d.label = default
        return d
    elif check_homogenous(data_set) is not None:
        d = Node()
        d.label = check_homogenous(data_set)
        return d
    else:  #how to recursion
        root = Node()
        # att = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
        # if att[0] != False:
        root.label = None
        root.decision_attribute = att[0]
        root.name = attribute_metadata[att[0]].get('name')
        root.is_nominal = attribute_metadata[att[0]].get('is_nominal')
        if root.is_nominal == False:
            numerical_splits_count[att[0]] -= 1
            root.splitting_value = att[1]
            root.children = []
            left_dataset = []
            right_dataset = []
            for i in xrange(len(data_set)):
                if data_set[i][att[0]] < att[1]:
                    left_dataset.append(data_set[i])
                else:
                    right_dataset.append(data_set[i])
            depth = depth - 1
            root.children.append(ID3_helper(left_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys))
            root.children.append(ID3_helper(right_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys))
        else:
            root.children = {}
            for key in nominal_keys[att[0]]:
                chile_dataset = []
                for i in xrange(len(data_set)):
                    if data_set[i][att[0]] == key:
                        chile_dataset.append(data_set[i])
                child = ID3_helper(chile_dataset, attribute_metadata, numerical_splits_count, depth, nominal_keys)
                root.children.update({key: child})
        return root
开发者ID:SixuanYu,项目名称:ps2,代码行数:50,代码来源:ID3.py

示例8: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    preprocessing(data_set, attribute_metadata)
    if check_homogenous(data_set) != None:
        root = Node()
        root.label = check_homogenous(data_set)
    else: 
        if depth == 0:
            root = Node()
            root.label = mode(data_set)
        else:
            best = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
            if best[0] == False:
                root = Node()
                root.label = mode(data_set)
            else:
                root = Node()
                root.decision_attribute = best[0]
                root.name = attribute_metadata[best[0]]['name']
                depth -= 1
                if str(best[1]) == 'False':
                    root.is_nominal = True
                    root.children = {}
                    subsets = split_on_nominal(data_set, best[0])
                    for splitval in subsets.keys():
                        root.children[splitval] = ID3(subsets[splitval], attribute_metadata, numerical_splits_count, depth)
                else:
                    root.is_nominal = False
                    root.children = []
                    root.splitting_value = best[1]
                    subsets = split_on_numerical(data_set, best[0], best[1])
                    #numerical_splits_count[best[0]] -= 1
                    print numerical_splits_count
                    print depth
                    root.children.append(ID3(subsets[0], attribute_metadata, numerical_splits_count, depth))
                    root.children.append(ID3(subsets[1], attribute_metadata, numerical_splits_count, depth)) 
    return root
开发者ID:3011204077,项目名称:ml-project,代码行数:47,代码来源:ID3.py

示例9: copy_node

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def copy_node(node):
    new_node = Node()
    new_node.label = node.label
    new_node.decision_attribute = node.decision_attribute
    new_node.is_nominal = node.is_nominal
    new_node.value = node.value
    new_node.splitting_value = node.splitting_value

    if node.is_nominal:
        new_node.children = {}
        for key in node.children:
            new_node.children[key] = copy_node(node.children[key])
    else:
        new_node.children = []
        for i in range(len(node.children)):
            new_node.children.append(copy_node(node.children[i]))
    new_node.name = node.name

    return new_node
开发者ID:obiorahm,项目名称:PS2code,代码行数:21,代码来源:pruning.py

示例10: helper

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def helper(data_set, attribute_metadata, numerical_splits_count, depth):
    root = Node()
    root.name = 'default'
    if len(data_set) == 0 :
        return root
    else :
        if check_homogenous(data_set) != None :
            root.label = check_homogenous(data_set)
            return root
        else :
            if  len(attribute_metadata) == 1 or depth == 0 :
                root.label = mode(data_set)
                return root
            else :
                best_attribute = pick_best_attribute(data_set, attribute_metadata , numerical_splits_count)
                if best_attribute[0] == False :
                    root.label = mode(data_set)
                    return root


                else :
                    root.name = attribute_metadata[best_attribute[0]]['name']

                    root.decision_attribute = best_attribute[0]

                    if best_attribute[1] == False : # dictionary

                        root.is_nominal = None
                        temp_dict = split_on_nominal(data_set,best_attribute[0])
                        depth -= 1
                        for key in temp_dict.keys():
                            root.children[key] = helper(temp_dict[key],attribute_metadata,numerical_splits_count,depth)
                    else :
                        numerical_splits_count[best_attribute[0]] -= 1

                        root.is_nominal = best_attribute[1]
                        root.splitting_value = best_attribute[1]
                        temp_tuple = split_on_numerical(data_set,best_attribute[0] , best_attribute[1])
                        depth -= 1
                        root.children[0] = (helper(temp_tuple[0] ,attribute_metadata,numerical_splits_count,depth))
                        root.children[1] = (helper(temp_tuple[1] ,attribute_metadata,numerical_splits_count,depth))
                    return root
开发者ID:Jiawen-Ou,项目名称:Machine-Learning---Decision-Tree,代码行数:44,代码来源:ID3.py

示例11: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================
    '''
    # Your code here

    # decision tree to be returned
    node = Node()

    # base case
    theta = 0.0 # threshold of entropy
    if not data_set:
        node.label = '?'
        return node
    elif depth == 0:
        node.label = mode(data_set)
        return node
    elif check_homogenous(data_set):
        node.label = data_set[0][0]
        return node
    # no attributes to split
    elif numerical_splits_count[1:] == [0] * (len(numerical_splits_count) - 1):
        node.label = mode(data_set)
        return node
    elif entropy(data_set) == theta:
        node.label = mode(data_set)
        return node

    # split on best attribute
    splitting_attr, splitting_value = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)

    # avoid pass by reference error
    numerical_splits_count = list(numerical_splits_count)
    numerical_splits_count[splitting_attr] -= 1

    # describe the node
    node.decision_attribute = splitting_attr
    node.is_nominal = attribute_metadata[splitting_attr]['is_nominal']
    node.splitting_value = splitting_value
    node.name = attribute_metadata[splitting_attr]['name']
    node.value = mode(data_set) # value store mode of non-leaf node

    # if is nominal
    if node.is_nominal:
        # put data in data_set into different branches
        branches = {}
        for data in data_set:
            if data[splitting_attr] not in branches:
                branches[data[splitting_attr]] = []
            branches[data[splitting_attr]].append(data)
        for attr, sub_data_set in branches.items():
            node.children[attr] = ID3(sub_data_set, attribute_metadata, numerical_splits_count, depth - 1)
    # else is numeric
    else:
        left_sub_data_set = []
        right_sub_data_set = []
        for data in data_set:
            if data[splitting_attr] < splitting_value:
                left_sub_data_set.append(data)
            else:
                right_sub_data_set.append(data)
        node.children = []
        node.children.append(ID3(left_sub_data_set, attribute_metadata, numerical_splits_count, depth - 1))
        if node.children[0].label == '?':
            node.children[0].label = mode(data_set)
        node.children.append(ID3(right_sub_data_set, attribute_metadata, numerical_splits_count, depth - 1))
        if node.children[1].label == '?':
            node.children[1].label = mode(data_set)

    # return the generated tree
    return node
开发者ID:SHvsMK,项目名称:Decision_Tree,代码行数:81,代码来源:ID3.py

示例12: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================
    '''
    # Your code here

    root = Node()
    #print 'depth =', depth
    
    #if depth == 0: #Depth check
    #    root.label = mode(data_set)
    #else:
    #    root.label = check_homogenous(data_set)

    root.label = mode(data_set)
    homogeneous = check_homogenous(data_set)

    #print 'label=', root.label
    if homogeneous != None or depth == 0: #If data set isn't homogeneous or max depth
        return root # Finished with this branch
    else:
        best_att = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
        #print 'best_att=', best_att
        #print 'data_set=', data_set
        if best_att == (False, False): #Nathan: Exception here since (False, False) can be interpreted as (0, False) and ID3 tries to split on the class
            #root.label = mode(data_set)
            #print 'False, False -> label=', root.label
            return root
        else:
            root.decision_attribute = best_att[0]
            root.is_nominal = attribute_metadata[best_att[0]]['is_nominal']
            root.splitting_value = best_att[1]
        
    #outcomes = [] # this is the classes in the data_set - #Nathan: moved all this to check_homogeneous
    #for i in range(0, len(data_set)):
    #    outcomes.append([data_set[i][0]])
    #done = check_homogenous(outcomes)
    #root.label = done
    
            root.name = attribute_metadata[best_att[0]]['name']      
            child_numerical_splits_count = numerical_splits_count
    ### this is not correct
    # root.children should not have subset datasets in values for each attribute thing
            if root.is_nominal == True:
                root.children = {}
                data = split_on_nominal(data_set, root.decision_attribute)
                sub_depth = depth - 1
                for i in data.keys():
                    new_node = ID3(data[i], attribute_metadata, child_numerical_splits_count, sub_depth)
                    #print sub_depth
                    #print new_node, 'nom'
                    #print [new_node.classify(x) == x[0] for x in data_set]
                    root.children[i] = new_node
                #root.children = split_on_nominal(data_set, root.decision_attribute)
            
            elif root.is_nominal == False:
                root.children = []
                data = split_on_numerical(data_set, root.decision_attribute, root.splitting_value)
                child_numerical_splits_count[root.decision_attribute] = child_numerical_splits_count[root.decision_attribute]-1
                sub_depth = depth - 1
                for i in range(len(data)):
                    new_node = ID3(data[i], attribute_metadata, child_numerical_splits_count, sub_depth)
                    #print sub_depth
                    #print new_node, 'num'
                    root.children.append(new_node)

            else:
                print 'Troubles brewing'
            
    return root

#                best_feature_values = {s.sample[best_feature]
#                                       for s in training_samples}
#                for value in best_feature_values:
#                    samples = [s for s in training_samples
#                               if s.sample[best_feature] == value]
#                    # Recursively, create a child node.
#                    root.children = create_decision_tree(samples,
#                                                      predicting_features)
#                    root_node[value] = child
#        return root_node
    
    
    #while tree.label == None:
    # GenerateTree(X)
    # If NodeEntropy(X) < ThresholdI   **entropy equation 9.3 <---- function below
                        ## threshold = 0.001
        # Create leaf labelled by majority class in X
                        ## mode function
        # Return

    #pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
#.........这里部分代码省略.........
开发者ID:eng543,项目名称:machineLearningProject_genderClassification,代码行数:103,代码来源:ID3.py

示例13: ID3_helper

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3_helper(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================
    # '''

    leaf = Node()
    threshold =0.1
    if(len(data_set) == 0 or depth == 0 or check_homogenous(data_set) != None or entropy(data_set)<threshold or len(attribute_metadata) == 0):
        #if examples empty return default default(mode)
        #or if all examples have same classification return that classification (mode of the dataset is the same as the homogenous classification)
        #or if depth has reached its limit
        # attributes is empty return mode
        leaf.label = mode(data_set)
        leaf.decision_attribute = None
        leaf.is_nominal = None
        leaf.value = mode(data_set)
        leaf.splitting_value = None
        leaf.name = None
        return leaf
    else:
        best_attribute,splitting_value = pick_best_attribute(data_set,attribute_metadata,numerical_splits_count)
        #tree<- a new decision tree with root best
        leaf.label = None
        leaf.decision_attribute = best_attribute
        leaf.name = attribute_metadata[best_attribute]['name']
        # attribute_metadata.pop(best_attribute)      #remove best attribute from list of attributes
        numerical_splits_count[best_attribute] -= 1 #lower numerical splits of this attribute by 1 

        #case of zero information gain on all possible splitting attributes
        if(best_attribute == False):
            leaf.label = mode(data_set)
            leaf.decision_attribute= None
            leaf.name = None
            leaf.splitting_value = None
            leaf.value = None
            return leaf
        elif(splitting_value == False): #case of nominal attribute
            leaf.is_nominal = True
            examples = split_on_nominal(data_set,best_attribute)
            leaf.splitting_value = splitting_value
            # dictionary (key=attribute value, value=node)
            dictionary = {}
            for value, data in examples.iteritems():
                dictionary[value]= ID3_helper(data,attribute_metadata,numerical_splits_count,depth-1)
            leaf.children = dictionary
            return leaf
            # recursive call to ID3
        else: #case of numeric
            examples = split_on_numerical(data_set,best_attribute,splitting_value)
            leaf.is_nominal = False
            leaf.splitting_value=splitting_value
            #list of 2 nodes
            leaf.children = [
            ID3_helper(examples[0],attribute_metadata,numerical_splits_count,depth-1),
            ID3_helper(examples[1],attribute_metadata,numerical_splits_count,depth-1)
            ]
            return leaf
    return leaf
开发者ID:rromo12,项目名称:EECS349_DecisionTree,代码行数:68,代码来源:ID3.py

示例14: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
    maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    if not data_set:
        return Node()
    elif check_homogenous(data_set) != None:
        n = Node()
        n.label = check_homogenous(data_set)
        return n
    elif not attribute_metadata:
        n = Node()
        n.label = mode(data_set)
        return n
    elif depth == 0:
        n = Node()
        n.label = mode(data_set)
        return n
    else:
        best, split_value = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)
        #Find mode of best attribute column
        best_data = []
        for sublist in data_set:
            if sublist[best] != None:
                best_data.append(sublist[best])
        best_mode = max(set(best_data), key=best_data.count)
        #Replace missing values of best attribute column with mode
        data_copy = copy.deepcopy(data_set)
        for row in data_copy:
            if row[best] == None:
                row[best] = best_mode
        if attribute_metadata[best]['is_nominal'] == False:
            numerical_splits_count[best] -= 1
        if best == False:
            n = Node()
            n.label = mode(data_set)
            return n
        tree = Node() #the root 
        tree.is_nominal = attribute_metadata[best]['is_nominal']
        tree.decision_attribute = best
        tree.splitting_value = split_value
        tree.name = attribute_metadata[best]['name']
        tree.value = best_mode
        data_sub = []
        #if a nominal attribute
        if attribute_metadata[best]['is_nominal'] == True:
            best_attributes_dict = split_on_nominal(data_copy, best)
            for v in best_attributes_dict:
                subtree = ID3(best_attributes_dict[v], attribute_metadata, numerical_splits_count, depth - 1)
                tree.children[v] = subtree #adding branch to the tree
        #if numerical attribute
        else:
            splits = split_on_numerical(data_copy, best, split_value)
            for v in splits:
                subtree = ID3(v, attribute_metadata, numerical_splits_count, depth - 1)
                tree.children[splits.index(v)] = subtree #adding branch to the tree
        return tree
开发者ID:olivergoodman,项目名称:idthree,代码行数:68,代码来源:ID3.py

示例15: ID3

# 需要导入模块: from node import Node [as 别名]
# 或者: from node.Node import splitting_value [as 别名]
def ID3(data_set, attribute_metadata, numerical_splits_count, depth):
    '''
    See Textbook for algorithm.
    Make sure to handle unknown values, some suggested approaches were
    given in lecture.
    ========================================================================================================
    Input:  A data_set, attribute_metadata, maximum number of splits to consider for numerical attributes,
	maximum depth to search to (depth = 0 indicates that this node should output a label)
    ========================================================================================================
    Output: The node representing the decision tree learned over the given data set
    ========================================================================================================

    '''
    # need to keep track of numerical_splits_count
    # decrease corresponding numerical_splits_count entry each time a numeric entry is split on

    if data_set == None:
        return None
    if len(data_set) == 0:
        return None

    n = Node()

    homogenous_value = check_homogenous(data_set)
    if homogenous_value != None:
        n.label = homogenous_value
        return n
    
    if depth == 0:
        n.label = mode(data_set)
        return n

    (best_i, split_value) = pick_best_attribute(data_set, attribute_metadata, numerical_splits_count)

    if best_i == False:
        n.label = mode(data_set)
        return n

    n.label = None

    # setting decision attribute to index of the attribute with the highest gain ratio
    n.decision_attribute = best_i
    n.name = attribute_metadata[best_i]['name']

    # setting splitting_value to the split_value from pick_best_attribute
    n.splitting_value = split_value

    # if split_value is not false then we are dealing with a numeric
    if split_value != False:      
        n.is_nominal = False
        
        left_data, right_data = split_on_numerical (data_set, best_i, split_value)

        numerical_splits_count[best_i] -= 1

        left_node = ID3(left_data, attribute_metadata, numerical_splits_count, depth-1)
        right_node = ID3(right_data, attribute_metadata, numerical_splits_count, depth-1)
        n.children = []

        n.children.append(left_node)
        n.children.append(right_node)

        return n
        
    else:
        n.is_nominal = True
        kid_set = split_on_nominal(data_set, best_i)
        n.children = {}
        for key, val in kid_set.iteritems():
            newNode = ID3(val, attribute_metadata, numerical_splits_count, depth - 1)
            if newNode != None:
                n.children[key] = newNode
        return n
开发者ID:cyrusaf,项目名称:decision_tree,代码行数:75,代码来源:ID3.py


注:本文中的node.Node.splitting_value方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。