当前位置: 首页>>代码示例>>Python>>正文


Python DecisionTreeClassifier.predict方法代码示例

本文整理汇总了Python中sklearn.tree.DecisionTreeClassifier.predict方法的典型用法代码示例。如果您正苦于以下问题:Python DecisionTreeClassifier.predict方法的具体用法?Python DecisionTreeClassifier.predict怎么用?Python DecisionTreeClassifier.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.tree.DecisionTreeClassifier的用法示例。


在下文中一共展示了DecisionTreeClassifier.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
class Ensemble:

	def __init__(self, data):
		self.rf = RandomForestClassifier(n_estimators=80, n_jobs=-1, min_samples_split=45, criterion='entropy')
		self.lda = LDA()
		self.dec = DecisionTreeClassifier(criterion='entropy')
		self.ada = AdaBoostClassifier(n_estimators=500, learning_rate=0.25)

		self.make_prediction(data)


	def make_prediction(self, data):
		'''
		Make an ensemble prediction
		'''
		self.rf.fit(data.features_train, data.labels_train)
		self.lda.fit(data.features_train, data.labels_train)
		self.dec.fit(data.features_train, data.labels_train)
		self.ada.fit(data.features_train, data.labels_train)

		pre_pred = []
		self.pred = []

		ada_pred = self.ada.predict(data.features_test)
		rf_pred = self.rf.predict(data.features_test)
		lda_pred = self.lda.predict(data.features_test)
		dec_pred = self.dec.predict(data.features_test)

		for i in range(len(rf_pred)):
			pre_pred.append([ rf_pred[i], lda_pred[i], dec_pred[i], ada_pred[i] ])

		for entry in pre_pred:
			pred_list = sorted(entry, key=entry.count, reverse=True)
			self.pred.append(pred_list[0])
开发者ID:BHouwens,项目名称:KaggleProjects,代码行数:36,代码来源:ensemble.py

示例2: moDel

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
class moDel(object):
    
    def __init__(self,x):
        self.x = x
        db = pd.io.json.read_json(self.x)
        raw_data = []
        for i in range(len(db['raw_text'])):
            raw_data.append(db['raw_text'][i].encode('utf-8').strip())         
        self.Vectorizer = TfidfVectorizer(min_df=1)
        X_train = self.Vectorizer.fit_transform(raw_data)
        self.y_train = y_train = db['bull']
        self.clf = DecisionTreeClassifier().fit(X_train.toarray(),y_train)
        
        # all data going into the predict function should be fully parsed
    def predict(self,x):
        categories = ['no bias','bias']
        Vct_data = self.Vectorizer.transform([x])
        return categories[self.clf.predict(Vct_data.toarray())] ,metrics.f1_score(self.y_train,self.clf.predict(Vct_data.toarray()))
    
    def predict_other(self,x):
        categories = ['no bias','bias']
        data = []
        result=requests.post('https://api.idolondemand.com/1/api/sync/findsimilar/v1',data={'text':x,'apikey':'34fd5236-4d37-440f-99f6-16985435b18d','indexes':'news_eng','print':'all'}).json()
        for doc in result["documents"]:
            Vct_other_data = self.Vecotrizer.transform([doc['content']])
            result=self.clf.predict(Vct_other)
            score = metrics.f1_score(self.y_train,result)
            title = result['source']= doc['title']
            data.append([categories[result],score,title])
            
        return data
开发者ID:genegurvich,项目名称:dotbs,代码行数:33,代码来源:HackMIT_MOD.py

示例3: text_learning_experiment

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def text_learning_experiment(words_to_remove=[]):
    from_sara  = open("../text_learning/from_sara.txt", "r")
    from_chris = open("../text_learning/from_chris.txt", "r")
    word_data, authors = vectorize_emails(from_sara, from_chris, max_emails=300, words_to_remove=words_to_remove)
    features_train, features_test, labels_train, labels_test = \
        cross_validation.train_test_split(word_data, authors, test_size=0.1, random_state=42)
    vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5,
                                 stop_words='english')
    features_train = vectorizer.fit_transform(features_train)
    features_test  = vectorizer.transform(features_test).toarray()

    features_train = features_train[:150].toarray()
    labels_train   = labels_train[:150]

    clf = DecisionTreeClassifier()
    clf.fit(features_train, labels_train)
    predict_train = clf.predict(features_train)
    predict_test = clf.predict(features_test)
    print "train acc:", accuracy_score(labels_train, predict_train)
    print "test acc: ", accuracy_score(labels_test, predict_test)
    feature_index = np.argmax(clf.feature_importances_)
    feature_importance = clf.feature_importances_[feature_index]
    feature_name = vectorizer.get_feature_names()[feature_index]
    print "Most important feature, and relative importance:", feature_name, ":", feature_importance
    return feature_name, feature_importance
开发者ID:andrei-iusan,项目名称:ud120-projects,代码行数:27,代码来源:poi_id.py

示例4: main

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def main():
    matrix = genMatrix()
    X = [x[:-1] for x in matrix]
    y = [x[-1] for x in matrix]
    pf = DecisionTreeClassifier(random_state=0)
    pf.fit(X,y)
    print pf.predict([0.02456418383518225, 0.030110935023771792, 0.06814580031695722, 0.13549920760697307, 0.06735340729001585])
开发者ID:xjl219,项目名称:webext,代码行数:9,代码来源:PageRec.py

示例5: TreeClassifier

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
class TreeClassifier(Classifier):

    def __init__(self, min_samples_split=20, random_state=99):
        self.classifier = DecisionTreeClassifier(min_samples_split=min_samples_split,
                                                 random_state=random_state)

    def do_train(self, X, y):
        self.classifier.fit(X, y)

    def do_classification(self, X, y):
        self.classifier.predict(X[:, 'age':'thal'])
        print('wtf')

    def visualize_tree(tree, feature_names):
        """Create tree png using graphviz.

        Args
        ----
        tree -- scikit-learn DecsisionTree.
        feature_names -- list of feature names.
        """
        with open("dt.dot", 'w') as f:
            export_graphviz(tree, out_file=f, feature_names=feature_names)

        command = ["dot", "-Tpng", "dt.dot", "-o", "dt.png"]
        try:
            subprocess.check_call(command)
        except Exception, e:
            print(e)
            exit("Could not run dot, ie graphviz, to produce visualization")
开发者ID:rchibana,项目名称:heartDiseaseIA,代码行数:32,代码来源:tree.py

示例6: main

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def main():
    data = run_game()

    clf = DecisionTreeClassifier(criterion='entropy')

    game_data = [[i[0], i[1]] for i in data]
    profits = [i[2] for i in data]

    clf.fit(game_data, profits)

    with open('tree.dot', 'w') as dotfile:
        export_graphviz(
            clf,
            dotfile,
            feature_names=['coin', 'bet']
        )

    predictions_lose1 = [clf.predict([0, 0]) for x in xrange(100)]
    predictions_lose2 = [clf.predict([0, 1]) for x in xrange(100)]
    predictions_win = [clf.predict([1, 1]) for x in xrange(100)]

    print 'All these profit predictions should be zero:'
    print predictions_lose1
    print 'Accuracy was', calculate_accuracy(predictions_lose1, np.array([0]))

    print 'All these profit predictions should be zero:'
    print predictions_lose2
    print 'Accuracy was', calculate_accuracy(predictions_lose2, np.array([0]))

    print 'All these profit predictions should be two:'
    print predictions_win
    print 'Accuracy was', calculate_accuracy(predictions_win, np.array([2]))
开发者ID:kimmobrunfeldt,项目名称:machine-learning,代码行数:34,代码来源:main.py

示例7: main

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def main():
	training_data = genfromtxt('training.txt', delimiter=1)
	test_data = genfromtxt('test.txt', delimiter=1)
	X_test = test_data[:, :57]
	Y_test = test_data[:, 58]

	# get the first predicted_Y_test
	random.shuffle(training_data)
	trainappend_data = training_data[:1, :]
	for j in range(0, 49):
		random.shuffle(training_data)
		trainappend_data = concatenate((trainappend_data, training_data[:1, :]), axis=0)
	clf = DecisionTreeClassifier(criterion="entropy", max_depth=2)
	clf = clf.fit(trainappend_data[:, :57], trainappend_data[:, 58])
	predicted_Y_test = clf.predict(X_test)
	accuracy = getaccuracy(Y_test, predicted_Y_test, 1)
	#print len(trainappend_data)
	#print predicted_Y_test
	print (1-accuracy)

	for i in range(2, 101):
		random.shuffle(training_data)
		trainappend_data2 = training_data[:1, :]
		for j in range(0, 49):
			random.shuffle(training_data)
			trainappend_data2 = concatenate((trainappend_data2, training_data[:1, :]), axis=0)
		clf = clf.fit(trainappend_data2[:, :57], trainappend_data2[:, 58])
		
		predicted_Y_test = predicted_Y_test + clf.predict(X_test)
		accuracy = getaccuracy(Y_test, predicted_Y_test, i)
		#print len(trainappend_data2)
		#print predicted_Y_test
		print (1-accuracy)
开发者ID:Yuechen-Zhao,项目名称:446MachineLearning,代码行数:35,代码来源:2_2_1.py

示例8: test_decision_tree_classifier

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def test_decision_tree_classifier(train_test_sets, criterion="entropy", depth_limited=False):
    """ Decision Tree Classifier with optional depth-limit.

    Args:
        train_test_sets: array of training and testing sets
        criterion: parameter for Decision Tree
        depth_limited: whether or not to prune to best depth
    """
    X_train, X_test, y_train, y_test = train_test_sets

    if depth_limited:
        # TODO: Change number of folds?
        kf = StratifiedKFold(y_train, n_folds=5, shuffle=True, random_state=42)
        # depth = select_dt_depth(X_train, y_train, kf, metric="accuracy")
        depth = 23
        # print "Best depth...", depth
        clf = DecisionTreeClassifier(criterion="entropy", max_depth=depth)
    else:
        clf = DecisionTreeClassifier(criterion="entropy")

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_train)
    print "DECISION TREE CLASSIFIER RESULTS"
    print "\tTraining accuracy is ", metrics.accuracy_score(y_train, y_pred, normalize=True)

    y_pred = clf.predict(X_test)
    print_metrics(y_test, y_pred)

    return metrics.f1_score(y_test, y_pred)
开发者ID:MaiHo,项目名称:controversial-reddit-comments,代码行数:32,代码来源:model.py

示例9: BackoffClassifier

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
class BackoffClassifier(object):
    """docstring for BackoffClassifier"""
    def __init__(self):
        self.clf_main = DecisionTreeClassifier()
        self.clf_backoff = RandomForestClassifier()

    def fit(self, X_train, y_train):
        # Train balanced main model
        w1 = sum(y_train)/len(y_train)
        w0 = 1 - w1
        sample_weight = np.array([w0 if x==0 else w1 for x in y_train])

        self.clf_main.fit(X_train, y_train, sample_weight=sample_weight)

        # Train resampled backoff model to separate recall errors
        # from true zeros
        X_recall_errors = self.get_recall_errors(X_train, y_train)
        N = len(X_recall_errors)

        # TODO: hacer sampling más inteligente
        X_zero = get_zeros(X_train, y_train).iloc[:5 * N,:]

        X_train_re = pd.concat((X_recall_errors, X_zero))
        y_train_re = np.array([1] * len(X_recall_errors) + [0] * len(X_zero))
        
        # w1 = sum(y_train_re)/len(y_train_re)
        # extra_weight_ones = 2.0
        # w1 *= extra_weight_ones

        # w0 = 1 - w1
        # sample_weight = np.array([w0 if x==0 else w1 for x in y_train_re])

        self.clf_backoff.fit(X_train_re, y_train_re)

    def get_recall_errors(self, X, y):
        y_true, y_pred = y, self.clf_main.predict(X)
        recall_error_mask = np.logical_and(y_true, 1 - y_pred)
        X_recall_errors = X.iloc[recall_error_mask,:]

        return X_recall_errors

    def predict(self, X):
        nrows = X.shape[0]
        preds = np.empty(nrows)
        for i in range(nrows):
            x = X.iloc[i,:]
            main_pred = self.clf_main.predict(x)

            if main_pred:
                preds[i] = main_pred
            else:
                coin = random.randint(0, 3)
                if coin:
                    preds[i] = main_pred
                else:
                    backoff_pred = self.clf_backoff.predict(x)
                    preds[i] = backoff_pred

        return preds
开发者ID:pablocelayes,项目名称:sna_classifier,代码行数:61,代码来源:backoffmodel_random.py

示例10: DTree

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def DTree(X, Y, XTest, YTest):
    print '-----------------------------------------------------'
    # dot_data = StringIO()
    # tree.export_graphviz(dtree_model, out_file=dot_data)
    # graph = pydot.graph_from_dot_data(dot_data.getvalue())
    # graph.write_pdf("../dtree.pdf")

    # param_grid = {'max_depth': np.arange(1, 15)}

    # tree_grid = GridSearchCV(DecisionTreeClassifier(), param_grid)
    tree_grid = DecisionTreeClassifier(max_depth=3)
    tree_grid.fit(X, Y)
    export_graphviz(tree_grid, out_file=dot_data)
    graph = pydot.graph_from_dot_data(dot_data.getvalue())
    graph.write_pdf("dtreevis.pdf")

    # print("The best parameters are %s with a score of %0.2f"
    #       % (tree_grid.best_params_, tree_grid.best_score_))

    print "Computing training statistics"
    dtree_predict_time_training = time.time()
    Ypred_dtree_training = tree_grid.predict(X)
    dtree_predict_time_training = time.time() - dtree_predict_time_training

    dtree_accuracy_training = metrics.accuracy_score(Y, Ypred_dtree_training)
    dt_precision_training = metrics.precision_score(Y, Ypred_dtree_training,
                                                    average='binary')
    dtree_recall_training = metrics.recall_score(Y, Ypred_dtree_training,
                                                 average='binary')

    print "DT training prediction time: " + str(dtree_predict_time_training)
    print "DT training accuracy Score: " + str(dtree_accuracy_training)
    print "DT training precision Score: " + str(dt_precision_training)
    print "DT training recall Score: " + str(dtree_recall_training)

    print "Computing testing statistics"
    dtree_predict_time_test = time.time()
    Ypred_dtree_test = tree_grid.predict(XTest)
    dtree_predict_time_test = time.time() - dtree_predict_time_test

    dtree_accuracy_test = metrics.accuracy_score(YTest, Ypred_dtree_test)
    dt_precision_test = metrics.precision_score(YTest, Ypred_dtree_test,
                                                average='binary')
    dtree_recall_test = metrics.recall_score(YTest, Ypred_dtree_test,
                                             average='binary')

    print "DT test prediction time: " + str(dtree_predict_time_test)
    print "DT test accuracy Score: " + str(dtree_accuracy_test)
    print "DT test precision Score: " + str(dt_precision_test)
    print "DT test recall Score: " + str(dtree_recall_test)

    print "Creating ROC curve"
    y_true = YTest
    y_score = tree_grid.predict_proba(XTest)
    fprSVM, trpSVM, _ = metrics.roc_curve(y_true=y_true,
                                          y_score=y_score[:, 0],
                                          pos_label=0)
    plt.plot(fprSVM, trpSVM, 'r-', label='DT')
开发者ID:jhurwitzupenn,项目名称:CIS419Project,代码行数:60,代码来源:trainClassifiers.py

示例11: decision_tree

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def decision_tree(X_vectors, t):
    # leave-one-out strategy to get average accuracy
    n = len(t)
    true_num = 0
    for i in range(n):
        X_train = list(X_vectors)
        del X_train[i]
        t_train = list(t)
        del t_train[i]
        X_test = X_vectors[i]
        t_test = t[i]

        clf = DecisionTreeClassifier()
        clf.fit(X_train, t_train)
        y = clf.predict(X_test)
        if y == t_test:
            true_num += 1
    accuracy = 1.0 * true_num / n

    # 8/2 split
    X = np.array(X_vectors)
    tt = list(t)
    pre = []
    rec = []
    for _ in range(100):
        X_train, X_test, t_train, t_test = train_test_split(X, tt, test_size=0.2)
        clf = DecisionTreeClassifier()
        clf.fit(X_train, t_train)
        y_test = clf.predict(X_test)
        t_pos = 0
        f_pos = 0
        t_neg = 0
        f_neg = 0
        for i in range(len(y_test)):
            if t_test[i] == 1 and y_test[i] == 1:
                t_pos += 1
            elif t_test[i] == 0 and y_test[i] == 1:
                f_pos += 1
            elif t_test[i] == 0 and y_test[i] == 0:
                t_neg += 1
            elif t_test[i] == 1 and y_test[i] == 0:
                f_neg += 1

            if t_pos == 0:
                precision = 0
                recall = 0
            else:
                precision = 1.0 * t_pos / (t_pos + f_pos)
                recall = 1.0 * t_pos / (t_pos + f_neg)
            pre.append(precision)
            rec.append(recall)

    pre = sum(pre) / len(pre)
    rec = sum(rec) / len(rec)
    F = 2 / (1/pre + 1/rec)

    return accuracy, pre, rec, F
开发者ID:Yuliang-Zou,项目名称:FreeFoodCalendar,代码行数:59,代码来源:test.py

示例12: first_layer_hard_accuracy

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
	def first_layer_hard_accuracy(self,pred_on_train):

		'''Might be useful to know how well the first layer classifer performs for 0-1 loss'''

		clf = DecisionTreeClassifier()
		clf = clf.fit(self.x_train, self.y_train_cluster)
		if pred_on_train:
			return sum(np.equal(self.y_train_cluster,clf.predict(self.x_train)))/float(len(self.y_train_cluster))
		else:
			return sum(np.equal(self.y_test_cluster,clf.predict(self.x_test)))/float(len(self.y_test_cluster))
开发者ID:asn264,项目名称:subreddit_classifiers,代码行数:12,代码来源:soft_two_layer_classifier.py

示例13: first_layer_classifier

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
	def first_layer_classifier(self, pred_on_train):

		'''Train a classifier on x_train and y_train_cluster'''

		clf = DecisionTreeClassifier()
		clf = clf.fit(self.x_train, self.y_train_cluster)
		if pred_on_train:
			return clf.predict(self.x_train)
		else:
			return clf.predict(self.x_test)
开发者ID:aditi-nair,项目名称:subreddit_classifiers,代码行数:12,代码来源:hard_two_layer_classifier.py

示例14: learning_curves

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def learning_curves(X_train, y_train, X_cv, y_cv,mdl='DT'):
    """ Calculates the performance of several models with varying sizes of training data.
        The learning and testing error rates for each model are then plotted. """
    
    print "Creating learning curve graphs for max_depths of 1, 3, 6, and 10. . ."
    
    # Create the figure window
    fig = plt.figure(figsize=(10,8))

    # We will vary the training set size so that we have 50 different sizes
    # rint rounds the array elements to nearest integers
    sizes = np.rint(np.linspace(1, len(X_train), 50)).astype(int)
    train_err = np.zeros(len(sizes)-1)
    test_err = np.zeros(len(sizes)-1)

    # Create four different models based on max_depth
    for k, depth in enumerate([1,3,6,10]):
        
        for i, s in enumerate(sizes[1:]): # Ignore the first element 
            # print s
            # Setup a decision tree classifier so that it learns a tree with max_depth = depth
            if (mdl=='DT'):
                clf = DecisionTreeClassifier(max_depth = depth)
            elif (mdl=='RF'):
                clf=RandomForestClassifier(max_depth=depth)
            # Fit the learner to the training data
            clf.fit(X_train[:s], y_train[:s])
            
            # Find the performance on the training set
            train_err[i] = performance_metric(y_train[:s], clf.predict(X_train[:s]))
            
            # Find the performance on the testing set
            test_err[i] = performance_metric(y_cv, clf.predict(X_cv))

        # Subplot the learning curve graph
        ax = fig.add_subplot(2, 2, k+1)
        ax.plot(sizes[1:], test_err, lw = 2, label = 'Testing')
        ax.plot(sizes[1:], train_err, lw = 2, label = 'Training')
        ax.legend(framealpha=0.8)
        ax.set_title('max_depth = %s'%(depth))
        ax.set_xlabel('Number of Data Points in Training Set')
        ax.set_ylabel('$F_1$-score')
        ax.set_xlim([0, len(X_train)])
    
    # Visual aesthetics
    if (mdl=='DT'):
        fig.suptitle('Decision Tree Classifier Learning Performances', fontsize=18, y=1.03)
        fig.savefig('plots/depth_f1_vs_dataPoints_%s.png'%mdl )
    elif (mdl=='RF'):
        fig.suptitle('Random Forest Classifier Learning Performances', fontsize=18, y=1.03)
        fig.savefig('plots/depth_f1_vs_dataPoints_%s.png'%mdl)
    
    print 'Done....creating learning curves'
    fig.tight_layout()
开发者ID:beegeesquare,项目名称:Titanic,代码行数:56,代码来源:modules.py

示例15: runDecisionTreeSimulation

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def runDecisionTreeSimulation(dataTrain, dataTest, dataHold, train_tfidf, test_tfidf, hold_tfidf):
    print 'running decision tree'
    outFile = open('decisionTreeLog.txt','a')

    outFile.write('train==> %d, %d \n'%(train_tfidf.shape[0],train_tfidf.shape[1]))
    outFile.write('test==>  %d, %d \n'%(test_tfidf.shape[0],test_tfidf.shape[1]))
    with SimpleTimer('time to train', outFile):
        clf = DecisionTreeClassifier().fit(train_tfidf, dataTrain.target)
    
    baseScore = clf.score(test_tfidf, dataTest.target)
    initHeight = clf.tree_.max_depth
    print 'baseline score %.3f base height %d' % (baseScore, initHeight)
    outFile.write('baseline score %.3f base height %d \n' % (baseScore, initHeight))
    
    
    res = []
    with SimpleTimer('time to prune', outFile):
        for height in range(initHeight, 40, -25):
#             print 'training for height %d' % height
            clf = DecisionTreeClassifier(max_depth=height).fit(train_tfidf, dataTrain.target)
            score = clf.score(hold_tfidf, dataHold.target)
            res.append((score, height))
            outFile.write('%d %.3f \n' % (height, score))
    res = sorted(res, key=lambda x:x[0], reverse=True)
    print res[:5]
    
    bestDepth = res[0][1]
    print ('best height is %d' % bestDepth)
    outFile.write('best depth is %d  and score is %.3f \n' % (bestDepth, res[0][0]))
        
    bestClf = DecisionTreeClassifier(max_depth=bestDepth)
    bestClf.fit(train_tfidf, dataTrain.target)
    
    predicted = bestClf.predict(test_tfidf)
    
    train_predict = bestClf.predict(train_tfidf)
    
    print 'testing score'
    outFile.write('testing score')
    outputScores(dataTest.target, predicted, outFile)
    print 'training score'
    outFile.write('testing score')
    outputScores(dataTrain.target, train_predict, outFile)
    
    results = predicted == dataTest.target
    wrong = []
    for i in range(len(results)):
        if not results[i]:
            wrong.append(i)
    print 'classifier got these wrong:'
    for i in wrong[:10]:
        print dataTest.data[i], dataTest.target[i]
        outFile.write('%s %d \n' % (dataTest.data[i], dataTest.target[i]))
    plot_learning_curve(bestClf, 'decision tree after pruning from %d to %d depth' % (initHeight, bestDepth), train_tfidf, dataTrain.target, cv=5, n_jobs=4)
开发者ID:anantauprety,项目名称:sentiment-analysis,代码行数:56,代码来源:decision_tree.py


注:本文中的sklearn.tree.DecisionTreeClassifier.predict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。