本文整理汇总了Python中sklearn.ensemble.VotingClassifier.predict方法的典型用法代码示例。如果您正苦于以下问题:Python VotingClassifier.predict方法的具体用法?Python VotingClassifier.predict怎么用?Python VotingClassifier.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.VotingClassifier
的用法示例。
在下文中一共展示了VotingClassifier.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_sample_weight
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict [as 别名]
def test_sample_weight():
"""Tests sample_weight parameter of VotingClassifier"""
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = SVC(probability=True, random_state=123)
eclf1 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('svc', clf3)],
voting='soft').fit(X, y, sample_weight=np.ones((len(y),)))
eclf2 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('svc', clf3)],
voting='soft').fit(X, y)
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_array_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
sample_weight = np.random.RandomState(123).uniform(size=(len(y),))
eclf3 = VotingClassifier(estimators=[('lr', clf1)], voting='soft')
eclf3.fit(X, y, sample_weight)
clf1.fit(X, y, sample_weight)
assert_array_equal(eclf3.predict(X), clf1.predict(X))
assert_array_equal(eclf3.predict_proba(X), clf1.predict_proba(X))
clf4 = KNeighborsClassifier()
eclf3 = VotingClassifier(estimators=[
('lr', clf1), ('svc', clf3), ('knn', clf4)],
voting='soft')
msg = ('Underlying estimator \'knn\' does not support sample weights.')
assert_raise_message(ValueError, msg, eclf3.fit, X, y, sample_weight)
示例2: test_set_params
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict [as 别名]
def test_set_params():
"""set_params should be able to set estimators"""
clf1 = LogisticRegression(random_state=123, C=1.0)
clf2 = RandomForestClassifier(random_state=123, max_depth=None)
clf3 = GaussianNB()
eclf1 = VotingClassifier([('lr', clf1), ('rf', clf2)], voting='soft',
weights=[1, 2])
assert_true('lr' in eclf1.named_estimators)
assert_true(eclf1.named_estimators.lr is eclf1.estimators[0][1])
assert_true(eclf1.named_estimators.lr is eclf1.named_estimators['lr'])
eclf1.fit(X, y)
assert_true('lr' in eclf1.named_estimators_)
assert_true(eclf1.named_estimators_.lr is eclf1.estimators_[0])
assert_true(eclf1.named_estimators_.lr is eclf1.named_estimators_['lr'])
eclf2 = VotingClassifier([('lr', clf1), ('nb', clf3)], voting='soft',
weights=[1, 2])
eclf2.set_params(nb=clf2).fit(X, y)
assert_false(hasattr(eclf2, 'nb'))
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
assert_equal(eclf2.estimators[0][1].get_params(), clf1.get_params())
assert_equal(eclf2.estimators[1][1].get_params(), clf2.get_params())
eclf1.set_params(lr__C=10.0)
eclf2.set_params(nb__max_depth=5)
assert_true(eclf1.estimators[0][1].get_params()['C'] == 10.0)
assert_true(eclf2.estimators[1][1].get_params()['max_depth'] == 5)
assert_equal(eclf1.get_params()["lr__C"],
eclf1.get_params()["lr"].get_params()['C'])
示例3: acc_VotingClassifier
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict [as 别名]
def acc_VotingClassifier():
kf = KFold(900, n_folds=10,shuffle=True)
acc = 0.0
temp = 1
conf_mat = [[0 for i in range(10)] for j in range(10)]
clf1 = GaussianNB()
clf2 = RandomForestClassifier(n_estimators=20,max_features=None,class_weight="balanced_subsample")
clf3 = SVC(kernel='rbf', probability=False)
clf4 = LogisticRegression()
eclf = VotingClassifier(estimators=[('gnb', clf1), ('rf', clf2), ('lr', clf4)], voting='hard', weights=[1,3,3])
for train_index, test_index in kf:
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
eclf = eclf.fit(X_train, y_train)
y_predict = eclf.predict(X_test)
acc_loop = getAccuracy(y_predict,y_test)
conf_mat = buildConfusionMatrix(conf_mat,y_predict,y_test)
print("*** Accuracy*** for "+str(temp)+"th time: "+str(acc_loop))
acc += acc_loop
temp +=1
# Checking if the data set is transformed into MFCC(13) or FFT(1000) or KPCA features(else)
if (X.shape[1]==13):
print 'In 13 features if'
valid_mfcc = eclf.predict(validation_set_mfcc)
elif (X.shape[1]==1000):
print 'In 1000 features elif'
valid_fft = eclf.predict(validation_set_fft)
elif (X.shape[1]==100):
print 'In KPCA features else'
valid_kpca = eclf.predict(validation_set_kpca)
acc = (acc/10.0)
printConfusionMatrix(conf_mat)
return acc, getAccuracyFromConfusion(conf_mat),valid_mfcc, valid_fft, valid_kpca
示例4: classify
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict [as 别名]
def classify():
train_X,Y = load_svmlight_file('data/train_last')
test_X,test_Y = load_svmlight_file('data/test_last')
train_X = train_X.toarray()
test_X = test_X.toarray()
Y = [int(y) for y in Y]
# print 'Y:',len(Y)
rows = pd.read_csv('data/log_test2.csv',index_col=0).sort_index().index.unique()
train_n = train_X.shape[0]
m = train_X.shape[1]
test_n = test_X.shape[0]
print train_n,m,#test_n
# 先用训练集训练出所有的分类器
print 'train classify...'
clf1 = LinearDiscriminantAnalysis()
clf2 = GaussianNB()
clf3 = LogisticRegression()
clf4 = RandomForestClassifier()
clf5 = KNeighborsClassifier(n_neighbors=12)
clf6 = AdaBoostClassifier()
# x_train,x_test,y_train,y_test = train_test_split(train_X,Y,test_size=0.2) # 对训练集进行划分
# print x_train.shape
# print x_test.shape
# clf.fit(train_X,Y)
clf = VotingClassifier(estimators=[('la',clf1),('nb',clf2),('lr',clf3),('rf',clf4),('nn',clf5),('ac',clf6)], voting='soft', weights=[1.5,1,1,1,1,1])
# clf1.fit(x_train,y_train)
# clf2.fit(x_train,y_train)
# clf3.fit(x_train,y_train)
# clf4.fit(x_train,y_train)
clf.fit(train_X,Y)
print 'end train classify'
print 'start classify....'
# print metrics.classification_report(Y,predict_Y)
# clf2.fit(train_X,Y)
# print 'clf2 fited...'
# clf3.fit(train_X,Y)
# print 'clf3 fited...'
# clf4.fit(train_X,Y)
# print 'clf4 fited...'
# clf1.fit(train_X,Y)
# print 'clf1 fited...'
# 第一个分类结果
predict_Y = clf.predict(train_X)
# predict_Y = clf.predict(train_X)
print 'classify result:'
print metrics.classification_report(Y,predict_Y)
predict_Y = clf.predict(test_X)
# print predict_Y,len(predict_Y)
print 'end classify...'
# predict_Y = clf.predict(X[cnt_train:]) # 训练注释这一行,输出测试集打开这一行,注释之后的print metric
# predict_Y = clf.predict(test_X) # 训练注释这一行,输出测试集打开这一行,注释之后的print metric
DataFrame(predict_Y,index=rows).to_csv('data/info_test2.csv', header=False)
示例5: test_predict_for_hard_voting
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict [as 别名]
def test_predict_for_hard_voting():
# Test voting classifier with non-integer (float) prediction
clf1 = FaultySVC(random_state=123)
clf2 = GaussianNB()
clf3 = SVC(probability=True, random_state=123)
eclf1 = VotingClassifier(estimators=[
('fsvc', clf1), ('gnb', clf2), ('svc', clf3)], weights=[1, 2, 3],
voting='hard')
eclf1.fit(X, y)
eclf1.predict(X)
示例6: test_set_estimator_none
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict [as 别名]
def test_set_estimator_none():
"""VotingClassifier set_params should be able to set estimators as None"""
# Test predict
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = GaussianNB()
eclf1 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2),
('nb', clf3)],
voting='hard', weights=[1, 0, 0.5]).fit(X, y)
eclf2 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2),
('nb', clf3)],
voting='hard', weights=[1, 1, 0.5])
eclf2.set_params(rf=None).fit(X, y)
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_true(dict(eclf2.estimators)["rf"] is None)
assert_true(len(eclf2.estimators_) == 2)
assert_true(all([not isinstance(est, RandomForestClassifier) for est in
eclf2.estimators_]))
assert_true(eclf2.get_params()["rf"] is None)
eclf1.set_params(voting='soft').fit(X, y)
eclf2.set_params(voting='soft').fit(X, y)
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
msg = ('All estimators are None. At least one is required'
' to be a classifier!')
assert_raise_message(
ValueError, msg, eclf2.set_params(lr=None, rf=None, nb=None).fit, X, y)
# Test soft voting transform
X1 = np.array([[1], [2]])
y1 = np.array([1, 2])
eclf1 = VotingClassifier(estimators=[('rf', clf2), ('nb', clf3)],
voting='soft', weights=[0, 0.5],
flatten_transform=False).fit(X1, y1)
eclf2 = VotingClassifier(estimators=[('rf', clf2), ('nb', clf3)],
voting='soft', weights=[1, 0.5],
flatten_transform=False)
eclf2.set_params(rf=None).fit(X1, y1)
assert_array_almost_equal(eclf1.transform(X1),
np.array([[[0.7, 0.3], [0.3, 0.7]],
[[1., 0.], [0., 1.]]]))
assert_array_almost_equal(eclf2.transform(X1),
np.array([[[1., 0.],
[0., 1.]]]))
eclf1.set_params(voting='hard')
eclf2.set_params(voting='hard')
assert_array_equal(eclf1.transform(X1), np.array([[0, 0], [1, 1]]))
assert_array_equal(eclf2.transform(X1), np.array([[0], [1]]))
示例7: test_parallel_predict
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict [as 别名]
def test_parallel_predict():
"""Check parallel backend of VotingClassifier on toy dataset."""
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = GaussianNB()
X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
y = np.array([1, 1, 2, 2])
eclf1 = VotingClassifier(estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="soft", n_jobs=1).fit(X, y)
eclf2 = VotingClassifier(estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="soft", n_jobs=2).fit(X, y)
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_array_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
示例8: voting_class
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict [as 别名]
def voting_class(X,training_target,Y):
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(random_state=1)
clf3 = GaussianNB()
eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft')
eclf.fit(X[:,0:6],training_target)
proba = eclf.predict_proba(Y[:,0:6])
eclf.predict()
示例9: predict
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict [as 别名]
def predict(self,X_test):
'''
predict the class for each sample
'''
if self.use_append == True:
self.__X_test = X_test
elif self.use_append == False:
temp = []
# first stage
for clf in self.stage_one_clfs:
y_pred = clf[1].predict(X_test)
y_pred = np.reshape(y_pred,(len(y_pred),1))
if self.use_append == True:
self.__X_test = np.hstack((self.__X_test,y_pred))
elif self.use_append == False:
temp.append(y_pred)
if self.use_append == False:
self.__X_test = np.array(temp).T[0]
# second stage
majority_voting = VotingClassifier(estimators=self.stage_two_clfs, voting="hard", weights=self.weights)
y_out = majority_voting.predict(self.__X_test)
return y_out
示例10: voting_fit
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict [as 别名]
def voting_fit(X, y, RESULT_TEST_PATH,RESULT_PATH):
ada_best = fit_adaboost(X, y)
extratree_best = fit_extratree(X, y)
rf_best = fit_rf(X, y)
gbdt_best = fit_xgboost(X, y)
svc_best = fit_svc(X, y)
lr_best = fit_lr(X, y)
votingC = VotingClassifier(estimators=[('rfc', rf_best), ('extc', extratree_best),('lr',lr_best),
('adac', ada_best), ('gbc', gbdt_best)], voting='soft',
n_jobs=4)
votingC.fit(X, y)
test_df = pd.read_csv(RESULT_TEST_PATH)
test = np.array(test_df)
#test_Survived = pd.Series(votingC.predict(test), name="Survived")
result = votingC.predict(test)
test_df.insert(test_df.columns.size, 'Survived', result)
test_df = test_df[['PassengerId', 'Survived']]
test_df['PassengerId'] = test_df['PassengerId'].apply(np.int64)
test_df.to_csv(RESULT_PATH, index=False)
print("finish!")
示例11: main
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict [as 别名]
def main(directory, tools_directory, non_tools_dir):
global path
path = sys.path[0]
start = time.time()
if directory is None or not os.path.isdir(directory):
print "Please input directory containing pdf publications to classify"
sys.exit(1)
x_train, y_train = fetch_from_file()
x_test, test_files = get_test_set(directory)
# Just for testing, update machine learning part later
x_train, x_test = normalize_scale(x_train, x_test)
classifier = VotingClassifier(
[("first", classifier_list[0]), ("second", classifier_list[1]), ("second", classifier_list[2])]
)
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
if os.path.isdir(tools_directory):
shutil.rmtree(tools_directory)
os.makedirs(tools_directory)
if os.path.isdir(non_tools_dir):
shutil.rmtree(non_tools_dir)
os.makedirs(non_tools_dir)
for num, pub in zip(y_pred, test_files):
if num:
shutil.copy2(directory + pub, tools_directory + pub)
else:
shutil.copy2(directory + pub, non_tools_dir + pub)
print "Classification: Seconds taken: " + str(time.time() - start)
示例12: test_sample_weight
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict [as 别名]
def test_sample_weight():
"""Tests sample_weight parameter of VotingClassifier"""
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = SVC(probability=True, random_state=123)
eclf1 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('svc', clf3)],
voting='soft').fit(X, y, sample_weight=np.ones((len(y),)))
eclf2 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('svc', clf3)],
voting='soft').fit(X, y)
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
sample_weight = np.random.RandomState(123).uniform(size=(len(y),))
eclf3 = VotingClassifier(estimators=[('lr', clf1)], voting='soft')
eclf3.fit(X, y, sample_weight)
clf1.fit(X, y, sample_weight)
assert_array_equal(eclf3.predict(X), clf1.predict(X))
assert_array_almost_equal(eclf3.predict_proba(X), clf1.predict_proba(X))
# check that an error is raised and indicative if sample_weight is not
# supported.
clf4 = KNeighborsClassifier()
eclf3 = VotingClassifier(estimators=[
('lr', clf1), ('svc', clf3), ('knn', clf4)],
voting='soft')
msg = ('Underlying estimator KNeighborsClassifier does not support '
'sample weights.')
with pytest.raises(ValueError, match=msg):
eclf3.fit(X, y, sample_weight)
# check that _parallel_fit_estimator will raise the right error
# it should raise the original error if this is not linked to sample_weight
class ClassifierErrorFit(BaseEstimator, ClassifierMixin):
def fit(self, X, y, sample_weight):
raise TypeError('Error unrelated to sample_weight.')
clf = ClassifierErrorFit()
with pytest.raises(TypeError, match='Error unrelated to sample_weight'):
clf.fit(X, y, sample_weight=sample_weight)
示例13: main
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict [as 别名]
def main(path,filename):
batchsT = ['histogramaByN','histogramaColor','patrones2x2ByN','patrones3x3ByN','patronesCirculaesByN_2_5','patronesCirculaesByN_2_9','patronesCirculaesByN_3_9','patronesCirculaesByN_5_9','patronesCirculaesByN_3_5']
batchsAux = ['histogramaByN','histogramaColor','patronesCirculaesByN_2_5','patrones2x2ByN','patrones3x3ByN','patronesCirculaesByN_2_9','patronesCirculaesByN_3_9','patronesCirculaesByN_5_9','patronesCirculaesByN_3_5','patronesCirculaesByN_6_12','patronesCirculaesByN_8_12']
#batchs = ['patrones2x2ByN','patrones3x3ByN','patronesCirculaesByN_2_5','patronesCirculaesByN_2_9']
#batchs = ['patrones2x2ByN','patrones3x3ByN','patronesCirculaesByN_2_5','patronesCirculaesByN_3_5']
#for batch in batchsAux:
#print batch
batchs = batchsAux
#batchs.remove(batch)
X = []
y = []
load_batch(y,path,'clases',filename)
y = [j for i in y for j in i]
for batch in batchs:
load_batch(X,path,batch,filename)
#X,y = load_images('/tmp/train/')
est = [RandomForest(),Boosting()]
for i in xrange(0,15):
est.append(Gradient(i))
for i in xrange(0,4):
est.append(SVM(i))
#scores = cross_validation.cross_val_score(clf, X, y, cv=5)
#print scores
clf = VotingClassifier(estimators=est)
clf.fit(X,y)
pickle.dump( clf, open( "clf_grande.p", "wb" ) )
return
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, y, test_size=0.2,random_state=777)
#print clf.sub_score(X_test,Y_test)
print 'start'
conf_matrix = metrics.confusion_matrix(Y_test,clf.predict(X_test))
print 'confution matrix'
print conf_matrix
return
for name,estim in est:
print name
#estim.fit(X_train,Y_train)
#print estim.score(X_test,Y_test)
print cross_validation.cross_val_score(estim, X, y, cv=5,n_jobs=-1)
print 'voter'
print cross_validation.cross_val_score(clf, X, y, cv=5,n_jobs=-1)
return
#clf.fit(X_train,Y_train)
print clf.score(X_test,Y_test)
return
示例14: classifier
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict [as 别名]
def classifier(self, scoring, cv, eval_using):
adaclf = AdaBoostClassifier(algorithm='SAMME')
xtr = StandardScaler().fit_transform(self.xtr)
xte = StandardScaler().fit_transform(self.xte)
# iterate over each grid score for param tuner
for score in scoring:
print('Tuning parameters of inital classifiers...')
passive_params = param_tuner(PassiveAggressiveClassifier(),
score=score, cv=cv, xtr=xtr,
ytr=self.ytr)
passclf = PassiveAggressiveClassifier().set_params(**passive_params)
sgd_params = param_tuner(SGDClassifier(), score=score, cv=cv,
xtr=xtr, ytr=self.ytr)
sgdclf = SGDClassifier().set_params(**sgd_params)
# cant use resampling/bagging with passive aggressive classifier
# will raise ValueError: The number of class labels must be > 1
# since resampling may results in training sets with 1 class.
print('\n'+'Tuning meta-classifiers with tuned classifier/s...')
bagsgd_params = param_tuner(BaggingClassifier(sgdclf),
score=score, cv=cv, xtr=xtr,
ytr=self.ytr)
bg_sgdclf = BaggingClassifier(sgdclf).set_params(**bagsgd_params)
adasgd_params = param_tuner(adaclf.set_params(base_estimator=sgdclf),
score =score, cv=cv, xtr=xtr,
ytr=self.ytr)
ada_sgdclf = adaclf.set_params(**adasgd_params)
print('Voting on meta-classifiers/classifiers then predicting...')
vote = VotingClassifier(estimators=[('BagSGD', bg_sgdclf),
('adaboostSGD', ada_sgdclf),
('Passive', passclf)],
voting='hard').fit(xtr, self.ytr)
start = time.time()
y_true, y_pred = self.yte, vote.predict(xte)
print('\n' + '-'*5, 'FINAL PREDICTION RESULTS','-'*5 +'\n',
'{0:.4f}'.format(time.time()-start)+'--prediction time(secs)')
clf_evaluation = report(*eval_using, y_true=y_true, y_pred=y_pred)
for reports in clf_evaluation:
print('---',reports)
print(clf_evaluation[reports])
示例15: do_ml
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict [as 别名]
def do_ml(ticker):
X, y, df = extract_featuresets(ticker)
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,y,test_size=0.25)
#clf = neighbors.KNeighborsClassifier()
clf = VotingClassifier([('lsvc', svm.LinearSVC()),
('knn', neighbors.KNeighborsClassifier()),
('rfor', RandomForestClassifier())] )
clf.fit(X_train, y_train)
confidence = clf.score(X_test, y_test)
print('Accuracy', confidence)
predictions = clf.predict(X_test)
print('Predicted spread:', Counter(predictions))
return confidence