本文整理汇总了Python中sklearn.ensemble.ExtraTreesClassifier.score方法的典型用法代码示例。如果您正苦于以下问题:Python ExtraTreesClassifier.score方法的具体用法?Python ExtraTreesClassifier.score怎么用?Python ExtraTreesClassifier.score使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.ExtraTreesClassifier
的用法示例。
在下文中一共展示了ExtraTreesClassifier.score方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_ERT
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import score [as 别名]
def get_ERT(Xtrain, Xtest, Ytrain, Ytest, gtree):
# Extremely Randomized Trees
ert = ExtraTreesClassifier(n_estimators=1000,max_features=gtree.best_estimator_.max_features,max_depth=gtree.best_estimator_.max_depth,min_samples_split=gtree.best_estimator_.min_samples_split,n_jobs=-1)
ert.fit(Xtrain,Ytrain)
scores = np.empty((2))
scores[0] = ert.score(Xtrain,Ytrain)
scores[1] = ert.score(Xtest,Ytest)
print('Extremely Randomized Trees, train: {0:.02f}% '.format(scores[0]*100))
print('Extremely Randomized Trees, test: {0:.02f}% '.format(scores[1]*100))
return ert
示例2: test3
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import score [as 别名]
def test3():
print("3. Testing softmax for full harmonization...")
trainXc, trainyc = load_dataset("train", "data/chorales_rnn.hdf5")
devXc, devyc = load_dataset("dev", "data/chorales_rnn.hdf5")
testXc, testyc = load_dataset("test", "data/chorales_rnn.hdf5")
stack = lambda x1, x2: numpy.vstack((x1, x2))
hstack = lambda x1, x2: numpy.hstack((x1, x2))
# Remove Oracle features
trainXc = [X[:, range(0,10)] for X in trainXc]
devXc = [X[:, range(0,10)] for X in devXc]
testXc = [X[:, range(0,10)] for X in testXc]
# Aggregate data
Xtrain = stack(reduce(stack, trainXc), reduce(stack, devXc))
ytrain = hstack(reduce(hstack, trainyc), reduce(hstack, devyc))
Xtest, ytest = reduce(stack, testXc), reduce(hstack, testyc)
# Remove padding
ypadding = ytest.max()
Xtrain_up, ytrain_up, Xtest_up, ytest_up = [], [], [], []
for idx, p in enumerate(ytrain):
if p != ypadding:
Xtrain_up.append(Xtrain[idx])
ytrain_up.append(ytrain[idx])
for idx, p in enumerate(ytest):
if p != ypadding:
Xtest_up.append(Xtest[idx])
ytest_up.append(ytest[idx])
Xtrain, ytrain, Xtest, ytest = numpy.array(Xtrain_up), numpy.array(ytrain_up), \
numpy.array(Xtest_up), numpy.array(ytest_up)
encoder, Xtrainsparse, Xtestsparse = encode(Xtrain, Xtest)
RF = RandomForestClassifier(10, "entropy", None)
RF.fit(Xtrain, ytrain)
# Write full harmonization data
with h5py.File('data/chorales_sm.hdf5', "w", libver="latest") as f:
f.create_dataset("Xtrain", Xtrain.shape, dtype="i", data=Xtrain)
f.create_dataset("ytrain", ytrain.shape, dtype="i", data=ytrain)
f.create_dataset("Xtest", Xtest.shape, dtype="i", data=Xtest)
f.create_dataset("ytest", ytest.shape, dtype="i", data=ytest)
print "Full harmonization data written"
score_RF_train = RF.score(Xtrain, ytrain)
score_RF_test = RF.score(Xtest, ytest)
print "R-FOREST: %.2f%% training, %.2f%% test" % (score_RF_train * 100, score_RF_test * 100)
ERF = ExtraTreesClassifier(n_estimators=40, max_depth=None, min_samples_split=1, random_state=0)
ERF.fit(Xtrainsparse, ytrain)
score_ERF_train = ERF.score(Xtrainsparse, ytrain)
score_ERF_test = ERF.score(Xtestsparse, ytest)
print "EXTRA TREES: %.2f%% training, %.2f%% test" % (score_ERF_train * 100, score_ERF_test * 100)
logit = linear_model.LogisticRegression(multi_class='multinomial', solver='lbfgs', C=1)
logit.fit(Xtrainsparse, ytrain)
score_logit_train = logit.score(Xtrainsparse, ytrain)
score_logit_test = logit.score(Xtestsparse, ytest)
print "LOGIT: %.2f%% training, %.2f%% test" % (score_logit_train * 100, score_logit_test * 100)
示例3: get_ERT
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import score [as 别名]
def get_ERT(Xtrain, Ytrain, baseTree, Xtest = None , Ytest = None, verbose = 0):
# Extremely Randomized Trees
ert = ExtraTreesClassifier(n_estimators=1000,max_features=baseTree.best_estimator_.max_features,
max_depth=baseTree.best_estimator_.max_depth,
min_samples_split=baseTree.best_estimator_.min_samples_split,n_jobs=-1)
ert.fit(Xtrain,Ytrain)
if (verbose == 1):
scores = np.empty((2))
scores[0] = ert.score(Xtrain,Ytrain)
print('Extremely Randomized Trees, train: {0:.02f}% '.format(scores[0]*100))
if (type(Xtest) != type(None)):
scores[1] = ert.score(Xtest,Ytest)
print('Extremely Randomized Trees, test: {0:.02f}% '.format(scores[1]*100))
return ert
示例4: many_classify_dtree
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import score [as 别名]
def many_classify_dtree(X,Y):
print("Building the model for decision trees...")
x = []
x.append(X.loc[0:15000])
x.append(X.loc[15000:30000])
x.append(X.loc[30000:45000])
x.append(X.loc[45000:59999])
y = []
y.append(Y.loc[0:15000])
y.append(Y.loc[15000:30000])
y.append(Y.loc[30000:45000])
y.append(Y.loc[45000:60000])
scores = []
for i in range(0,4):
X_train, X_test, y_train, y_test = cross_validation.train_test_split(x[i], y[i], test_size=0.1)
start_time = datetime.now()
#print(start_time)
clf = ExtraTreesClassifier(n_estimators=10)
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)
clf = clf.fit(X_train,y_train)
end_time = datetime.now()
#print(end_time)
scores.append(clf.score(X_test,y_test))
s = 0
for i in range(0,4):
s= s +scores[i]
#print(scores[i])
print("Classification Score using Decision Tree with Drift Detection:" + str(s/4))
示例5: do_extra_trees
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import score [as 别名]
def do_extra_trees(md = None):
from sklearn.ensemble import ExtraTreesClassifier
train_X, train_Y, test_X, test_Y = analysis_glass()
ETC = ExtraTreesClassifier(n_estimators=100, max_depth = md)
ETC.fit(train_X, train_Y)
return ETC.score(test_X, test_Y)
示例6: learn
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import score [as 别名]
def learn(f):
global raw_data
print 'testing classifier'
data = raw_data[raw_data['label'] != 'unknown']
data = data[data['file type'] == 'EXECUTE']
X = data.as_matrix(f)
y = np.array(data['label'].tolist())
#clf = RandomForestClassifier(n_estimators=100)
clf = ExtraTreesClassifier(n_estimators=100)
#clf = AdaBoostClassifier()
scores = sklearn.cross_validation.cross_val_score(clf, X, y, cv=10)
print("predicted accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
seed = 3301
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)
clf.fit(X_train, y_train)
scores = clf.score(X_test, y_test)
print("actual accuracy: %0.2f" % scores)
importances = zip(f, clf.feature_importances_)
importances.sort(key=lambda k:k[1], reverse=True)
for im in importances[0:20]:
print im[0].ljust(30), im[1]
#y_pred = clf.predict(X_test)
#labels = ['good', 'bad']
#cm = confusion_matrix(y_test, y_pred, labels)
#plot_cm(cm, labels)
#joblib.dump(clf, 'model.pkl')
return clf
示例7: random_forest_cross_validate
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import score [as 别名]
def random_forest_cross_validate(targets, features, nprocesses=-1):
cv = cross_validation.KFold(len(features), k=5, indices=False)
#iterate through the training and test cross validation segments and
#run the classifier on each one, aggregating the results into a list
results = []
for i, (traincv, testcv) in enumerate(cv):
cfr = ExtraTreesClassifier(
n_estimators=100,
max_features=None,
verbose=2,
compute_importances=True,
n_jobs=nprocesses,
random_state=0,
)
print "Fitting cross validation #{0}".format(i)
cfr.fit(features[traincv], targets[traincv])
print "Scoring cross validation #{0}".format(i)
cfr.set_params(n_jobs=1) # read in the features to predict, remove bad columns
score = cfr.score(features[testcv], targets[testcv])
print "Score for cross validation #{0}, score: {1}".format(i, score)
mean_diff = get_metric(cfr, features[testcv], targets[testcv])
print "Mean difference: {0}".format(mean_diff)
results.append(mean_diff)
print "Features importance"
features_list = []
for j, importance in enumerate(cfr.feature_importances_):
if importance > 0.0:
column = features.columns[j]
features_list.append((column, importance))
features_list = sorted(features_list, key=lambda x: x[1], reverse=True)
for j, tup in enumerate(features_list):
print j, tup
pickle.dump(features_list, open("important_features.p", 'wb'))
print "Mean difference: {0}".format(mean_diff)
results.append(mean_diff)
示例8: ERFC_Classifier
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import score [as 别名]
def ERFC_Classifier(X_train, X_cv, X_test, Y_train,Y_cv,Y_test, Actual_DS):
print("***************Starting Extreme Random Forest Classifier***************")
t0 = time()
clf = ExtraTreesClassifier(n_estimators=100,n_jobs=-1)
clf.fit(X_train, Y_train)
preds = clf.predict(X_cv)
score = clf.score(X_cv,Y_cv)
print("Extreme Random Forest Classifier - {0:.2f}%".format(100 * score))
Summary = pd.crosstab(label_enc.inverse_transform(Y_cv), label_enc.inverse_transform(preds),
rownames=['actual'], colnames=['preds'])
Summary['pct'] = (Summary.divide(Summary.sum(axis=1), axis=1)).max(axis=1)*100
print(Summary)
#Check with log loss function
epsilon = 1e-15
#ll_output = log_loss_func(Y_cv, preds, epsilon)
preds2 = clf.predict_proba(X_cv)
ll_output2= log_loss(Y_cv, preds2, eps=1e-15, normalize=True)
print(ll_output2)
print("done in %0.3fs" % (time() - t0))
preds3 = clf.predict_proba(X_test)
#preds4 = clf.predict_proba((Actual_DS.ix[:,'feat_1':]))
preds4 = clf.predict_proba(Actual_DS)
print("***************Ending Extreme Random Forest Classifier***************")
return pd.DataFrame(preds2) , pd.DataFrame(preds3),pd.DataFrame(preds4)
示例9: classify
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import score [as 别名]
def classify(X,Y):
print("Building the model for random forests...")
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, Y, test_size=0.1)
clf = ExtraTreesClassifier(n_estimators=10)
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)
clf = clf.fit(X_train,y_train)
print("Classification Score using Random Forests:" + str(clf.score(X_test,y_test)))
示例10: classify
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import score [as 别名]
def classify(X,Y,test_data,test_labels):
print("Building the model for random forests...")
Y = np.ravel(Y)
test_labels = np.ravel(test_labels)
clf = ExtraTreesClassifier(n_estimators=10)
clf = clf.fit(X,Y)
print("Classification Score using Random Forests:" + str(clf.score(test_data,test_labels)))
output = clf.predict(test_data)
return output
示例11: et_classify
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import score [as 别名]
def et_classify(self):
print "Extra Trees"
clf = ExtraTreesClassifier()
clf.fit(self.descr, self.target)
mean = clf.score(self.test_descr, self.test_target)
pred = clf.predict(self.test_descr)
print "Pred ", pred
print "Mean : %3f" % mean
print "Feature Importances ", clf.feature_importances_
示例12: train_model
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import score [as 别名]
def train_model(stats, X_train, Y_train, X_test=None, Y_test=None):
print "Training ExtraTrees classifier"
clf = Classifier(n_estimators=n_estimators,n_jobs=30,
min_samples_leaf=nodesize,
#class_weight='balanced_subsample',
)
clf.fit(X_train,Y_train)
stats["train_acc"] = clf.score(X_train, Y_train)
print "Training complete"
print 'Training Accuracy: %.3f'%stats["train_acc"]
# Breakout early if no test set is given
if X_test is None:
return clf, stats
stats["test_acc"] = clf.score(X_test, Y_test)
print 'Testing Accuracy: %.3f'%stats["test_acc"]
X_test_TP = X_test[Y_test==1]
Y_test_TP = Y_test[Y_test==1]
stats["test_acc_TP"] = clf.score(X_test_TP, Y_test_TP)
print 'Testing Accuracy TP: %.3f'%stats["test_acc_TP"]
X_test_FP = X_test[Y_test==0]
Y_test_FP = Y_test[Y_test==0]
stats["test_acc_FP"] = clf.score(X_test_FP, Y_test_FP)
print 'Testing Accuracy FP: %.3f'%stats["test_acc_FP"]
pred_probas = clf.predict_proba(X_test)[:,1]
Y_predict = clf.predict(X_test)
total_contacts = Y_test.sum()
predicted_contacts = Y_predict[Y_test==1].sum()
print 'Total contacts predicted %i/%i'%(predicted_contacts,total_contacts)
fpr,tpr,_ = roc_curve(Y_test, pred_probas)
stats["ROC_AUC"] = auc(fpr,tpr)
print "ROC area under the curve", stats["ROC_AUC"]
return clf, stats
示例13: train_data_and_score_tree
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import score [as 别名]
def train_data_and_score_tree(features,labels, cv, depth):
f_train, f_test, l_train, l_test = cross_validation.train_test_split(
features, labels, test_size=cv,random_state=0
)
clf = ExtraTreesClassifier(max_depth=depth)
# clf = DecisionTreeClassifier(max_depth=depth)
clf = clf.fit(f_train,l_train)
score = clf.score(f_test,l_test)
return score,clf
示例14: EnsembleMethod
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import score [as 别名]
def EnsembleMethod(X, y):
# divide our data set into a training set and a test set
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
X, y, test_size=TRAIN_TEST_SPLIT_RATIO)
# train with decision tree classifier
decisionTreeClassifier = DecisionTreeClassifier(max_depth=None,
min_samples_split=1, random_state=0)
# use the classifier to fit the data.
decisionTreeClassifier.fit(X_train, y_train)
# print the performance of the classifier
print("====== Decision Tree Classifier ========")
print('TRAIN SCORE', decisionTreeClassifier.score(X_train, y_train))
print('TEST SCORE', decisionTreeClassifier.score(X_test, y_test))
# train with random forest classifier
randomForestClassifier = RandomForestClassifier(n_estimators=10,
max_depth=None, min_samples_split=1, random_state=0)
# use the classifier to fit the data.
randomForestClassifier.fit(X_train, y_train)
# print the performance of the classifier
print("====== Random Forest Classifier ========")
print('TRAIN SCORE', randomForestClassifier.score(X_train, y_train))
print('TEST SCORE', randomForestClassifier.score(X_test, y_test))
# train with extra trees classifier
extraTreesClassifier = ExtraTreesClassifier(n_estimators=10,
max_depth=None, min_samples_split=1, random_state=0)
# use the classifier to fit the data.
extraTreesClassifier.fit(X_train, y_train)
# print the performance of the classifier
print("======= Extra Trees Classifier ========")
print('TRAIN SCORE', extraTreesClassifier.score(X_train, y_train))
print('TEST SCORE', extraTreesClassifier.score(X_test, y_test))
示例15: main
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import score [as 别名]
def main():
results = {}
for currency in currencies:
logging.info('Currency: {0}'.format(currency))
# get data
data = pd.read_csv(
r'../../data/' + currency + '1440.csv',
names=['date', 'time', 'open', 'high', 'low', 'close', 'volume'],
parse_dates=[[0, 1]],
index_col=0,
).astype(float)
logging.info('Loaded {0} rows'.format(len(data)))
# print data.tail()
# extract features
features = extractFeatures(data)
# print features.tail()
# set rewards
rewards = calculateRewards(data)
rewards = rewards[-len(features):]
# print rewards.tail()
# train split
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
features,
rewards,
test_size=0.40,
# random_state=shuffle,
)
logging.info('Data splitted')
# create classifier
logging.info('Classifier: training...')
# rfc = RandomForestClassifier(n_estimators=30)
rfc = ExtraTreesClassifier(n_estimators=20, oob_score=True, bootstrap=True)
rfc.fit(X_train, y_train)
# saving
logging.info('Classifier: saving...')
externals.joblib.dump(rfc, 'models/' + currency + '.pkl', compress=9)
# score
logging.info('Classifier: scoring...')
results[currency] = {
'score': rfc.score(X=X_test, y=y_test),
'oob': rfc.oob_score_,
}
# break
for currency, scores in results.iteritems():
logging.info('{0} score:{1:.2f} oob:{2:.2f}'.format(currency, scores['score'], scores['oob']))