本文整理汇总了Python中sklearn.tree.DecisionTreeClassifier.predict方法的典型用法代码示例。如果您正苦于以下问题:Python DecisionTreeClassifier.predict方法的具体用法?Python DecisionTreeClassifier.predict怎么用?Python DecisionTreeClassifier.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.tree.DecisionTreeClassifier
的用法示例。
在下文中一共展示了DecisionTreeClassifier.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
class Ensemble:
def __init__(self, data):
self.rf = RandomForestClassifier(n_estimators=80, n_jobs=-1, min_samples_split=45, criterion='entropy')
self.lda = LDA()
self.dec = DecisionTreeClassifier(criterion='entropy')
self.ada = AdaBoostClassifier(n_estimators=500, learning_rate=0.25)
self.make_prediction(data)
def make_prediction(self, data):
'''
Make an ensemble prediction
'''
self.rf.fit(data.features_train, data.labels_train)
self.lda.fit(data.features_train, data.labels_train)
self.dec.fit(data.features_train, data.labels_train)
self.ada.fit(data.features_train, data.labels_train)
pre_pred = []
self.pred = []
ada_pred = self.ada.predict(data.features_test)
rf_pred = self.rf.predict(data.features_test)
lda_pred = self.lda.predict(data.features_test)
dec_pred = self.dec.predict(data.features_test)
for i in range(len(rf_pred)):
pre_pred.append([ rf_pred[i], lda_pred[i], dec_pred[i], ada_pred[i] ])
for entry in pre_pred:
pred_list = sorted(entry, key=entry.count, reverse=True)
self.pred.append(pred_list[0])
示例2: moDel
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
class moDel(object):
def __init__(self,x):
self.x = x
db = pd.io.json.read_json(self.x)
raw_data = []
for i in range(len(db['raw_text'])):
raw_data.append(db['raw_text'][i].encode('utf-8').strip())
self.Vectorizer = TfidfVectorizer(min_df=1)
X_train = self.Vectorizer.fit_transform(raw_data)
self.y_train = y_train = db['bull']
self.clf = DecisionTreeClassifier().fit(X_train.toarray(),y_train)
# all data going into the predict function should be fully parsed
def predict(self,x):
categories = ['no bias','bias']
Vct_data = self.Vectorizer.transform([x])
return categories[self.clf.predict(Vct_data.toarray())] ,metrics.f1_score(self.y_train,self.clf.predict(Vct_data.toarray()))
def predict_other(self,x):
categories = ['no bias','bias']
data = []
result=requests.post('https://api.idolondemand.com/1/api/sync/findsimilar/v1',data={'text':x,'apikey':'34fd5236-4d37-440f-99f6-16985435b18d','indexes':'news_eng','print':'all'}).json()
for doc in result["documents"]:
Vct_other_data = self.Vecotrizer.transform([doc['content']])
result=self.clf.predict(Vct_other)
score = metrics.f1_score(self.y_train,result)
title = result['source']= doc['title']
data.append([categories[result],score,title])
return data
示例3: text_learning_experiment
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def text_learning_experiment(words_to_remove=[]):
from_sara = open("../text_learning/from_sara.txt", "r")
from_chris = open("../text_learning/from_chris.txt", "r")
word_data, authors = vectorize_emails(from_sara, from_chris, max_emails=300, words_to_remove=words_to_remove)
features_train, features_test, labels_train, labels_test = \
cross_validation.train_test_split(word_data, authors, test_size=0.1, random_state=42)
vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5,
stop_words='english')
features_train = vectorizer.fit_transform(features_train)
features_test = vectorizer.transform(features_test).toarray()
features_train = features_train[:150].toarray()
labels_train = labels_train[:150]
clf = DecisionTreeClassifier()
clf.fit(features_train, labels_train)
predict_train = clf.predict(features_train)
predict_test = clf.predict(features_test)
print "train acc:", accuracy_score(labels_train, predict_train)
print "test acc: ", accuracy_score(labels_test, predict_test)
feature_index = np.argmax(clf.feature_importances_)
feature_importance = clf.feature_importances_[feature_index]
feature_name = vectorizer.get_feature_names()[feature_index]
print "Most important feature, and relative importance:", feature_name, ":", feature_importance
return feature_name, feature_importance
示例4: main
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def main():
matrix = genMatrix()
X = [x[:-1] for x in matrix]
y = [x[-1] for x in matrix]
pf = DecisionTreeClassifier(random_state=0)
pf.fit(X,y)
print pf.predict([0.02456418383518225, 0.030110935023771792, 0.06814580031695722, 0.13549920760697307, 0.06735340729001585])
示例5: TreeClassifier
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
class TreeClassifier(Classifier):
def __init__(self, min_samples_split=20, random_state=99):
self.classifier = DecisionTreeClassifier(min_samples_split=min_samples_split,
random_state=random_state)
def do_train(self, X, y):
self.classifier.fit(X, y)
def do_classification(self, X, y):
self.classifier.predict(X[:, 'age':'thal'])
print('wtf')
def visualize_tree(tree, feature_names):
"""Create tree png using graphviz.
Args
----
tree -- scikit-learn DecsisionTree.
feature_names -- list of feature names.
"""
with open("dt.dot", 'w') as f:
export_graphviz(tree, out_file=f, feature_names=feature_names)
command = ["dot", "-Tpng", "dt.dot", "-o", "dt.png"]
try:
subprocess.check_call(command)
except Exception, e:
print(e)
exit("Could not run dot, ie graphviz, to produce visualization")
示例6: main
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def main():
data = run_game()
clf = DecisionTreeClassifier(criterion='entropy')
game_data = [[i[0], i[1]] for i in data]
profits = [i[2] for i in data]
clf.fit(game_data, profits)
with open('tree.dot', 'w') as dotfile:
export_graphviz(
clf,
dotfile,
feature_names=['coin', 'bet']
)
predictions_lose1 = [clf.predict([0, 0]) for x in xrange(100)]
predictions_lose2 = [clf.predict([0, 1]) for x in xrange(100)]
predictions_win = [clf.predict([1, 1]) for x in xrange(100)]
print 'All these profit predictions should be zero:'
print predictions_lose1
print 'Accuracy was', calculate_accuracy(predictions_lose1, np.array([0]))
print 'All these profit predictions should be zero:'
print predictions_lose2
print 'Accuracy was', calculate_accuracy(predictions_lose2, np.array([0]))
print 'All these profit predictions should be two:'
print predictions_win
print 'Accuracy was', calculate_accuracy(predictions_win, np.array([2]))
示例7: main
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def main():
training_data = genfromtxt('training.txt', delimiter=1)
test_data = genfromtxt('test.txt', delimiter=1)
X_test = test_data[:, :57]
Y_test = test_data[:, 58]
# get the first predicted_Y_test
random.shuffle(training_data)
trainappend_data = training_data[:1, :]
for j in range(0, 49):
random.shuffle(training_data)
trainappend_data = concatenate((trainappend_data, training_data[:1, :]), axis=0)
clf = DecisionTreeClassifier(criterion="entropy", max_depth=2)
clf = clf.fit(trainappend_data[:, :57], trainappend_data[:, 58])
predicted_Y_test = clf.predict(X_test)
accuracy = getaccuracy(Y_test, predicted_Y_test, 1)
#print len(trainappend_data)
#print predicted_Y_test
print (1-accuracy)
for i in range(2, 101):
random.shuffle(training_data)
trainappend_data2 = training_data[:1, :]
for j in range(0, 49):
random.shuffle(training_data)
trainappend_data2 = concatenate((trainappend_data2, training_data[:1, :]), axis=0)
clf = clf.fit(trainappend_data2[:, :57], trainappend_data2[:, 58])
predicted_Y_test = predicted_Y_test + clf.predict(X_test)
accuracy = getaccuracy(Y_test, predicted_Y_test, i)
#print len(trainappend_data2)
#print predicted_Y_test
print (1-accuracy)
示例8: test_decision_tree_classifier
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def test_decision_tree_classifier(train_test_sets, criterion="entropy", depth_limited=False):
""" Decision Tree Classifier with optional depth-limit.
Args:
train_test_sets: array of training and testing sets
criterion: parameter for Decision Tree
depth_limited: whether or not to prune to best depth
"""
X_train, X_test, y_train, y_test = train_test_sets
if depth_limited:
# TODO: Change number of folds?
kf = StratifiedKFold(y_train, n_folds=5, shuffle=True, random_state=42)
# depth = select_dt_depth(X_train, y_train, kf, metric="accuracy")
depth = 23
# print "Best depth...", depth
clf = DecisionTreeClassifier(criterion="entropy", max_depth=depth)
else:
clf = DecisionTreeClassifier(criterion="entropy")
clf.fit(X_train, y_train)
y_pred = clf.predict(X_train)
print "DECISION TREE CLASSIFIER RESULTS"
print "\tTraining accuracy is ", metrics.accuracy_score(y_train, y_pred, normalize=True)
y_pred = clf.predict(X_test)
print_metrics(y_test, y_pred)
return metrics.f1_score(y_test, y_pred)
示例9: BackoffClassifier
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
class BackoffClassifier(object):
"""docstring for BackoffClassifier"""
def __init__(self):
self.clf_main = DecisionTreeClassifier()
self.clf_backoff = RandomForestClassifier()
def fit(self, X_train, y_train):
# Train balanced main model
w1 = sum(y_train)/len(y_train)
w0 = 1 - w1
sample_weight = np.array([w0 if x==0 else w1 for x in y_train])
self.clf_main.fit(X_train, y_train, sample_weight=sample_weight)
# Train resampled backoff model to separate recall errors
# from true zeros
X_recall_errors = self.get_recall_errors(X_train, y_train)
N = len(X_recall_errors)
# TODO: hacer sampling más inteligente
X_zero = get_zeros(X_train, y_train).iloc[:5 * N,:]
X_train_re = pd.concat((X_recall_errors, X_zero))
y_train_re = np.array([1] * len(X_recall_errors) + [0] * len(X_zero))
# w1 = sum(y_train_re)/len(y_train_re)
# extra_weight_ones = 2.0
# w1 *= extra_weight_ones
# w0 = 1 - w1
# sample_weight = np.array([w0 if x==0 else w1 for x in y_train_re])
self.clf_backoff.fit(X_train_re, y_train_re)
def get_recall_errors(self, X, y):
y_true, y_pred = y, self.clf_main.predict(X)
recall_error_mask = np.logical_and(y_true, 1 - y_pred)
X_recall_errors = X.iloc[recall_error_mask,:]
return X_recall_errors
def predict(self, X):
nrows = X.shape[0]
preds = np.empty(nrows)
for i in range(nrows):
x = X.iloc[i,:]
main_pred = self.clf_main.predict(x)
if main_pred:
preds[i] = main_pred
else:
coin = random.randint(0, 3)
if coin:
preds[i] = main_pred
else:
backoff_pred = self.clf_backoff.predict(x)
preds[i] = backoff_pred
return preds
示例10: DTree
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def DTree(X, Y, XTest, YTest):
print '-----------------------------------------------------'
# dot_data = StringIO()
# tree.export_graphviz(dtree_model, out_file=dot_data)
# graph = pydot.graph_from_dot_data(dot_data.getvalue())
# graph.write_pdf("../dtree.pdf")
# param_grid = {'max_depth': np.arange(1, 15)}
# tree_grid = GridSearchCV(DecisionTreeClassifier(), param_grid)
tree_grid = DecisionTreeClassifier(max_depth=3)
tree_grid.fit(X, Y)
export_graphviz(tree_grid, out_file=dot_data)
graph = pydot.graph_from_dot_data(dot_data.getvalue())
graph.write_pdf("dtreevis.pdf")
# print("The best parameters are %s with a score of %0.2f"
# % (tree_grid.best_params_, tree_grid.best_score_))
print "Computing training statistics"
dtree_predict_time_training = time.time()
Ypred_dtree_training = tree_grid.predict(X)
dtree_predict_time_training = time.time() - dtree_predict_time_training
dtree_accuracy_training = metrics.accuracy_score(Y, Ypred_dtree_training)
dt_precision_training = metrics.precision_score(Y, Ypred_dtree_training,
average='binary')
dtree_recall_training = metrics.recall_score(Y, Ypred_dtree_training,
average='binary')
print "DT training prediction time: " + str(dtree_predict_time_training)
print "DT training accuracy Score: " + str(dtree_accuracy_training)
print "DT training precision Score: " + str(dt_precision_training)
print "DT training recall Score: " + str(dtree_recall_training)
print "Computing testing statistics"
dtree_predict_time_test = time.time()
Ypred_dtree_test = tree_grid.predict(XTest)
dtree_predict_time_test = time.time() - dtree_predict_time_test
dtree_accuracy_test = metrics.accuracy_score(YTest, Ypred_dtree_test)
dt_precision_test = metrics.precision_score(YTest, Ypred_dtree_test,
average='binary')
dtree_recall_test = metrics.recall_score(YTest, Ypred_dtree_test,
average='binary')
print "DT test prediction time: " + str(dtree_predict_time_test)
print "DT test accuracy Score: " + str(dtree_accuracy_test)
print "DT test precision Score: " + str(dt_precision_test)
print "DT test recall Score: " + str(dtree_recall_test)
print "Creating ROC curve"
y_true = YTest
y_score = tree_grid.predict_proba(XTest)
fprSVM, trpSVM, _ = metrics.roc_curve(y_true=y_true,
y_score=y_score[:, 0],
pos_label=0)
plt.plot(fprSVM, trpSVM, 'r-', label='DT')
示例11: decision_tree
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def decision_tree(X_vectors, t):
# leave-one-out strategy to get average accuracy
n = len(t)
true_num = 0
for i in range(n):
X_train = list(X_vectors)
del X_train[i]
t_train = list(t)
del t_train[i]
X_test = X_vectors[i]
t_test = t[i]
clf = DecisionTreeClassifier()
clf.fit(X_train, t_train)
y = clf.predict(X_test)
if y == t_test:
true_num += 1
accuracy = 1.0 * true_num / n
# 8/2 split
X = np.array(X_vectors)
tt = list(t)
pre = []
rec = []
for _ in range(100):
X_train, X_test, t_train, t_test = train_test_split(X, tt, test_size=0.2)
clf = DecisionTreeClassifier()
clf.fit(X_train, t_train)
y_test = clf.predict(X_test)
t_pos = 0
f_pos = 0
t_neg = 0
f_neg = 0
for i in range(len(y_test)):
if t_test[i] == 1 and y_test[i] == 1:
t_pos += 1
elif t_test[i] == 0 and y_test[i] == 1:
f_pos += 1
elif t_test[i] == 0 and y_test[i] == 0:
t_neg += 1
elif t_test[i] == 1 and y_test[i] == 0:
f_neg += 1
if t_pos == 0:
precision = 0
recall = 0
else:
precision = 1.0 * t_pos / (t_pos + f_pos)
recall = 1.0 * t_pos / (t_pos + f_neg)
pre.append(precision)
rec.append(recall)
pre = sum(pre) / len(pre)
rec = sum(rec) / len(rec)
F = 2 / (1/pre + 1/rec)
return accuracy, pre, rec, F
示例12: first_layer_hard_accuracy
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def first_layer_hard_accuracy(self,pred_on_train):
'''Might be useful to know how well the first layer classifer performs for 0-1 loss'''
clf = DecisionTreeClassifier()
clf = clf.fit(self.x_train, self.y_train_cluster)
if pred_on_train:
return sum(np.equal(self.y_train_cluster,clf.predict(self.x_train)))/float(len(self.y_train_cluster))
else:
return sum(np.equal(self.y_test_cluster,clf.predict(self.x_test)))/float(len(self.y_test_cluster))
示例13: first_layer_classifier
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def first_layer_classifier(self, pred_on_train):
'''Train a classifier on x_train and y_train_cluster'''
clf = DecisionTreeClassifier()
clf = clf.fit(self.x_train, self.y_train_cluster)
if pred_on_train:
return clf.predict(self.x_train)
else:
return clf.predict(self.x_test)
示例14: learning_curves
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def learning_curves(X_train, y_train, X_cv, y_cv,mdl='DT'):
""" Calculates the performance of several models with varying sizes of training data.
The learning and testing error rates for each model are then plotted. """
print "Creating learning curve graphs for max_depths of 1, 3, 6, and 10. . ."
# Create the figure window
fig = plt.figure(figsize=(10,8))
# We will vary the training set size so that we have 50 different sizes
# rint rounds the array elements to nearest integers
sizes = np.rint(np.linspace(1, len(X_train), 50)).astype(int)
train_err = np.zeros(len(sizes)-1)
test_err = np.zeros(len(sizes)-1)
# Create four different models based on max_depth
for k, depth in enumerate([1,3,6,10]):
for i, s in enumerate(sizes[1:]): # Ignore the first element
# print s
# Setup a decision tree classifier so that it learns a tree with max_depth = depth
if (mdl=='DT'):
clf = DecisionTreeClassifier(max_depth = depth)
elif (mdl=='RF'):
clf=RandomForestClassifier(max_depth=depth)
# Fit the learner to the training data
clf.fit(X_train[:s], y_train[:s])
# Find the performance on the training set
train_err[i] = performance_metric(y_train[:s], clf.predict(X_train[:s]))
# Find the performance on the testing set
test_err[i] = performance_metric(y_cv, clf.predict(X_cv))
# Subplot the learning curve graph
ax = fig.add_subplot(2, 2, k+1)
ax.plot(sizes[1:], test_err, lw = 2, label = 'Testing')
ax.plot(sizes[1:], train_err, lw = 2, label = 'Training')
ax.legend(framealpha=0.8)
ax.set_title('max_depth = %s'%(depth))
ax.set_xlabel('Number of Data Points in Training Set')
ax.set_ylabel('$F_1$-score')
ax.set_xlim([0, len(X_train)])
# Visual aesthetics
if (mdl=='DT'):
fig.suptitle('Decision Tree Classifier Learning Performances', fontsize=18, y=1.03)
fig.savefig('plots/depth_f1_vs_dataPoints_%s.png'%mdl )
elif (mdl=='RF'):
fig.suptitle('Random Forest Classifier Learning Performances', fontsize=18, y=1.03)
fig.savefig('plots/depth_f1_vs_dataPoints_%s.png'%mdl)
print 'Done....creating learning curves'
fig.tight_layout()
示例15: runDecisionTreeSimulation
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import predict [as 别名]
def runDecisionTreeSimulation(dataTrain, dataTest, dataHold, train_tfidf, test_tfidf, hold_tfidf):
print 'running decision tree'
outFile = open('decisionTreeLog.txt','a')
outFile.write('train==> %d, %d \n'%(train_tfidf.shape[0],train_tfidf.shape[1]))
outFile.write('test==> %d, %d \n'%(test_tfidf.shape[0],test_tfidf.shape[1]))
with SimpleTimer('time to train', outFile):
clf = DecisionTreeClassifier().fit(train_tfidf, dataTrain.target)
baseScore = clf.score(test_tfidf, dataTest.target)
initHeight = clf.tree_.max_depth
print 'baseline score %.3f base height %d' % (baseScore, initHeight)
outFile.write('baseline score %.3f base height %d \n' % (baseScore, initHeight))
res = []
with SimpleTimer('time to prune', outFile):
for height in range(initHeight, 40, -25):
# print 'training for height %d' % height
clf = DecisionTreeClassifier(max_depth=height).fit(train_tfidf, dataTrain.target)
score = clf.score(hold_tfidf, dataHold.target)
res.append((score, height))
outFile.write('%d %.3f \n' % (height, score))
res = sorted(res, key=lambda x:x[0], reverse=True)
print res[:5]
bestDepth = res[0][1]
print ('best height is %d' % bestDepth)
outFile.write('best depth is %d and score is %.3f \n' % (bestDepth, res[0][0]))
bestClf = DecisionTreeClassifier(max_depth=bestDepth)
bestClf.fit(train_tfidf, dataTrain.target)
predicted = bestClf.predict(test_tfidf)
train_predict = bestClf.predict(train_tfidf)
print 'testing score'
outFile.write('testing score')
outputScores(dataTest.target, predicted, outFile)
print 'training score'
outFile.write('testing score')
outputScores(dataTrain.target, train_predict, outFile)
results = predicted == dataTest.target
wrong = []
for i in range(len(results)):
if not results[i]:
wrong.append(i)
print 'classifier got these wrong:'
for i in wrong[:10]:
print dataTest.data[i], dataTest.target[i]
outFile.write('%s %d \n' % (dataTest.data[i], dataTest.target[i]))
plot_learning_curve(bestClf, 'decision tree after pruning from %d to %d depth' % (initHeight, bestDepth), train_tfidf, dataTrain.target, cv=5, n_jobs=4)