本文整理汇总了Python中sklearn.ensemble.AdaBoostClassifier.fit方法的典型用法代码示例。如果您正苦于以下问题:Python AdaBoostClassifier.fit方法的具体用法?Python AdaBoostClassifier.fit怎么用?Python AdaBoostClassifier.fit使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.AdaBoostClassifier
的用法示例。
在下文中一共展示了AdaBoostClassifier.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import fit [as 别名]
class Ensemble:
def __init__(self, data):
self.rf = RandomForestClassifier(n_estimators=80, n_jobs=-1, min_samples_split=45, criterion='entropy')
self.lda = LDA()
self.dec = DecisionTreeClassifier(criterion='entropy')
self.ada = AdaBoostClassifier(n_estimators=500, learning_rate=0.25)
self.make_prediction(data)
def make_prediction(self, data):
'''
Make an ensemble prediction
'''
self.rf.fit(data.features_train, data.labels_train)
self.lda.fit(data.features_train, data.labels_train)
self.dec.fit(data.features_train, data.labels_train)
self.ada.fit(data.features_train, data.labels_train)
pre_pred = []
self.pred = []
ada_pred = self.ada.predict(data.features_test)
rf_pred = self.rf.predict(data.features_test)
lda_pred = self.lda.predict(data.features_test)
dec_pred = self.dec.predict(data.features_test)
for i in range(len(rf_pred)):
pre_pred.append([ rf_pred[i], lda_pred[i], dec_pred[i], ada_pred[i] ])
for entry in pre_pred:
pred_list = sorted(entry, key=entry.count, reverse=True)
self.pred.append(pred_list[0])
示例2: ada_boost_dt
# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import fit [as 别名]
def ada_boost_dt():
"""
Submission: ada_boost_dt_0707_03.csv
E_val: 0.854350
E_in: 0.889561
E_out: 0.8832315976033993
"""
from sklearn.ensemble import AdaBoostClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import cross_val_score
from sklearn.pipeline import Pipeline
X, y = dataset.load_train()
raw_scaler = StandardScaler()
raw_scaler.fit(X)
X_scaled = raw_scaler.transform(X)
ab = AdaBoostClassifier(n_estimators=300)
scores = cross_val_score(ab, X_scaled, y, cv=5, n_jobs=-1)
logger.debug('CV: %s', scores)
logger.debug('E_val: %f', sum(scores) / len(scores))
ab.fit(X_scaled, y)
logger.debug('E_in: %f', Util.auc_score(ab, X_scaled, y))
IO.dump_submission(Pipeline([('scale_raw', raw_scaler),
('ab', ab)]), 'ada_boost_dt_0707_03')
示例3: some
# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import fit [as 别名]
def some(X, Y, X_test, Y_test):
ada = AdaBoostClassifier()
print "Train Model ---"
t1 = time()
ada.fit(X, Y)
t2 = time()
print "Model Trained ----------", t2 - t1
test_errors = []
cur = 1
Y_test2 = []
for k in Y_test:
Y_test2.append(k[0])
print "Testing: "
print Y_test2
pred = ada.predict(X_test)
print pred
accu = 1. - accuracy_score(y_true= Y_test2, y_pred= pred)
print accu
print "STAGED _____________"
for test_predict in (
ada.staged_predict(X_test)):
test_errors.append(
1. - accuracy_score(test_predict, Y_test2))
print "errorss : "
print test_errors
示例4: ab_predictedValue
# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import fit [as 别名]
def ab_predictedValue():
print '----------AdaBoost----------'
ab_clf = AdaBoostClassifier(n_estimators = NoOfEstimators)
ab_clf.fit(train_df[features], train_df['SeriousDlqin2yrs'])
ab_predictedValue = ab_clf.predict_proba(test_df[features])
print 'Feature Importance = %s' % ab_clf.feature_importances_
return ab_predictedValue[:,1]
示例5: AB_results
# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import fit [as 别名]
def AB_results(): # AdaBoostClassifier
print "--------------AdaBoostClassifier-----------------"
rang = [60, 80]
# print "--------------With HOG-----------------"
# ans = []
# print "n_estimators Accuracy"
# for i in rang:
# clf = AdaBoostClassifier(n_estimators=i)
# clf.fit(X_train_hog, y_train)
# mean_accuracy = clf.score(X_test_hog, y_test)
# print i, " ", mean_accuracy
# ans.append('('+str(i)+", "+str(mean_accuracy)+')')
# print ans
# plt.plot(rang, ans, linewidth=2.0)
# plt.xlabel("n_estimators")
# plt.ylabel("mean_accuracy")
# plt.savefig("temp_hog.png")
print "\n--------------Without HOG-----------------"
ans = []
print "n_estimators Accuracy"
for i in rang:
clf = AdaBoostClassifier(n_estimators=i)
clf.fit(X_train, y_train)
mean_accuracy = clf.score(X_test, y_test)
print i, " ", mean_accuracy
ans.append('('+str(i)+", "+str(mean_accuracy)+')')
print ans
plt.plot(rang, ans, linewidth=2.0)
plt.xlabel("n_estimators")
plt.ylabel("mean_accuracy")
plt.savefig("temp_plain.png")
示例6: prediction
# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import fit [as 别名]
def prediction(feat,label):
x_train, x_test, y_train, y_test = cross_validation.train_test_split(feat, label, test_size = 0.25, random_state = 0)
num_leaves = []
accuracy_score = []
auc_score = []
# for depth in range(1,10):
# clf = tree.DecisionTreeClassifier(max_depth = depth)
# clf.fit(x_train,y_train)
# predictions = clf.predict(x_test)
# accuracy = clf.score(x_test,y_test)
# auc = metrics.roc_auc_score(y_test,predictions)
# num_leaves.append(depth)
# accuracy_score.append(accuracy)
# auc_score.append(auc)
for depth in range(1,10):
clf = AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth = depth), n_estimators = 100)
clf.fit(x_train,y_train)
predictions = clf.predict(x_test)
accuracy = clf.score(x_test,y_test)
auc = metrics.roc_auc_score(y_test,predictions)
num_leaves.append(depth)
accuracy_score.append(accuracy)
auc_score.append(auc)
return num_leaves,accuracy_score,auc_score
示例7: runAdaReal
# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import fit [as 别名]
def runAdaReal(arr):#depth, n_est, filename, lrn_rate=1.0):
global file_dir, nEvents, solutionFile, counter
depth = int(arr[0]*100)
n_est = int(arr[1]*100)
lrn_rate = arr[2]
print 'iteration number ' + str(counter)
counter+=1
if depth <= 0 or n_est <= 0 or lrn_rate <= 0:
print 'return 100'
return 100
filename = 'adar_dep'+str(depth)+'_est'+str(n_est)+'_lrn'+str(lrn_rate) # low
bdt_real = AdaBoostClassifier(
tree.DecisionTreeClassifier(max_depth=depth),
n_estimators=n_est,
learning_rate=lrn_rate)
print "AdaBoostReal training"
bdt_real.fit(sigtr[train_input].values,sigtr['Label'].values)
print "AdaBoostReal testing"
bdt_real_pred = bdt_real.predict(sigtest[train_input].values)
solnFile(filename,bdt_real_pred,sigtest['EventId'].values)#
print "AdaBoostReal finished"
ams_score = ams.AMS_metric(solutionFile, file_dir+filename+'.out', nEvents)
print ams_score
logfile.write(filename+': ' + str(ams_score)+'\n')
return -1.0*float(ams_score)
示例8: ANGEL_training
# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import fit [as 别名]
def ANGEL_training(cds_filename, utr_filename, output_pickle, num_workers=3):
coding = [ r for r in SeqIO.parse(open(cds_filename), 'fasta') ]
utr = [ r for r in SeqIO.parse(open(utr_filename), 'fasta') ]
o_all = c_ORFscores.CDSWindowFeat()
add_to_background(o_all, coding)
add_to_background(o_all, utr)
data_pos = get_data_parallel(o_all, coding, [0], num_workers)
data_neg = get_data_parallel(o_all, utr, [0, 1, 2], num_workers)
data = data_neg + data_pos
target = [0]*len(data_neg) + [1]*len(data_pos)
data = np.array(data)
print >> sys.stderr, "data prep done, running classifier...."
bdt = AdaBoostClassifier(n_estimators=50)
bdt.fit(data, target)
print >> sys.stderr, "classifier trained. putting pickle to", output_pickle
with open(output_pickle, 'wb') as f:
dump({'bdt':bdt, 'o_all':o_all}, f)
return data, target, bdt
示例9: adaboost_skin
# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import fit [as 别名]
def adaboost_skin(X_train, y_train, X_test, y_test):
"""Learn the skin data sets with AdaBoost.
X_*: Samples.
y_*: labels.
"""
print 'AdaBoost'
min_iter = 1
max_iter = 200
steps = 30
diff = (max_iter - min_iter) / steps
iterations = [min_iter + diff * step for step in xrange(steps+1)]
scores = []
for T in iterations:
clf = AdaBoostClassifier(
base_estimator=DecisionTreeClassifier(max_depth=1),
algorithm="SAMME",
n_estimators=T)
clf.fit(X_train.toarray(), y_train)
scores.append(100 * clf.score(X_test.toarray(), y_test))
print '\t%d Iterations: %.2f%%' % (T, scores[-1])
return iterations, scores
示例10: runAdaBoost
# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import fit [as 别名]
def runAdaBoost(arr):#depth, n_est, lrn_rate=1.0): # removing filename for the scipy optimise thing '''filename,'''
#ada = AdaBoostClassifier(n_estimators=100)
global file_dir, nEvents, solutionFile, counter
print 'iteration number ' + str(counter)
counter+=1
depth = int(arr[0]*100)
n_est = int(arr[1]*100)
lrn_rate = arr[2]
if depth <= 0 or n_est <= 0 or lrn_rate <= 0:
return 100
fname = 'ada_dep'+str(depth)+'_est'+str(n_est)+'_lrn'+str(lrn_rate)
filename = fname
ada = AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=depth),
algorithm="SAMME",
n_estimators=n_est)#,n_jobs=4)
print "AdaBoost training"
ada.fit(sigtr[train_input].values,sigtr['Label'].values)
print "AdaBoost testing"
ada_pred = ada.predict(sigtest[train_input].values)
solnFile(filename,ada_pred,sigtest['EventId'].values)#
print "AdaBoost finished"
# added for teh scipy optimise thing
ams_score = ams.AMS_metric(solutionFile, file_dir+fname+'.out', nEvents)
print ams_score
logfile.write(fname + ': ' + str(ams_score)+'\n')
return -1.0*float(ams_score) # since we are minimising
示例11: main
# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import fit [as 别名]
def main(sc, spark):
# Load and vectorize the corpus
corpus = load_corpus(sc, spark)
vector = make_vectorizer().fit(corpus)
corpus = vector.transform(corpus)
# Get the sample from the dataset
sample = corpus.sample(False, 0.1).collect()
X = [row['tfidf'] for row in sample]
y = [row['label'] for row in sample]
# Train a Scikit-Learn Model
clf = AdaBoostClassifier()
clf.fit(X, y)
# Broadcast the Scikit-Learn Model to the cluster
clf = sc.broadcast(clf)
# Create accumulators for correct vs incorrect
correct = sc.accumulator(0)
incorrect = sc.accumulator(1)
# Create the accuracy closure
accuracy = make_accuracy_closure(clf, incorrect, correct)
# Compute the number incorrect and correct
corpus.foreachPartition(accuracy)
accuracy = float(correct.value) / float(correct.value + incorrect.value)
print("Global accuracy of model was {}".format(accuracy))
示例12: boost_report
# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import fit [as 别名]
def boost_report():
svm_train_features = list()
svm_train_classes = list()
svm_test_features = list()
svm_test_classes = list()
for record in mit_records:
svm_train_features.append(list(record.features.values()))
svm_train_classes.append(record.my_class)
for record in mim_records:
svm_test_features.append(list(record.features.values()))
svm_test_classes.append(record.my_class)
svm_classifier = svm.SVC(kernel="linear", C=0.1)
svm_classifier.fit(svm_train_features, svm_train_classes)
print("linear kernel svm accuracy: " +
str(svm_classifier.score(svm_test_features, svm_test_classes)))
classifier = AdaBoostClassifier(
base_estimator=svm_classifier,
n_estimators=100,
algorithm='SAMME')
classifier.fit(svm_train_features, svm_train_classes)
print("adaboost accuracy: " +
str(classifier.score(svm_test_features, svm_test_classes)))
示例13: training
# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import fit [as 别名]
def training(baseclassparameters, adaparameters, queue):
treeclassifier = DecisionTreeClassifier(**baseclassparameters)
adaclassifier = AdaBoostClassifier(treeclassifier, **adaparameters)
print "\nBegin calculation with {0} and {1}".format(str(baseclassparameters), str(adaparameters))
adaclassifier.fit(Xtrain, ytrain)
#Predict with the model
prob_predict_test = adaclassifier.predict_proba(Xtest)[:,1]
#Calculate maximal significance
True_Signal_test = prob_predict_test[ytest==1]
True_Bkg_test = prob_predict_test[ytest==0]
best_significance = 0
for x in np.linspace(0, 1, 1000):
S = float(len(True_Signal_test[True_Signal_test>x]))
B = float(len(True_Bkg_test[True_Bkg_test>x]))
significance = S/np.sqrt(S+B)
if significance > best_significance:
best_significance = significance
best_x = x
best_S = S
best_B = B
print "\nCalculation with {} and {} done ".format(str(baseclassparameters), str(adaparameters))
print "Best significance of {0:.2f} archived when cutting at {1:.3f}".format(best_significance, best_x)
print "Signal efficiency: {0:.2f}%".format(100.*best_S/len(True_Signal_test))
print "Background efficiency: {0:.2f}%".format(100.*best_B/len(True_Bkg_test))
print "Purity: {0:.2f}%".format(100.*best_S/(best_S+best_B))
queue.put( (best_significance, baseclassparameters, adaparameters) )
示例14: AdaBoostcls
# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import fit [as 别名]
class AdaBoostcls(object):
"""docstring for ClassName"""
def __init__(self):
self.adaboost_cls = AdaBoostClassifier()
self.prediction = None
self.train_x = None
self.train_y = None
def train_model(self, train_x, train_y):
try:
self.train_x = train_x
self.train_y = train_y
self.adaboost_cls.fit(train_x, train_y)
except:
print(traceback.format_exc())
def predict(self, test_x):
try:
self.test_x = test_x
self.prediction = self.adaboost_cls.predict(test_x)
return self.prediction
except:
print(traceback.format_exc())
def accuracy_score(self, test_y):
try:
# return r2_score(test_y, self.prediction)
return self.adaboost_cls.score(self.test_x, test_y)
except:
print(traceback.format_exc())
示例15: ADA_Classifier
# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import fit [as 别名]
def ADA_Classifier(X_train, X_cv, X_test, Y_train,Y_cv,Y_test, Actual_DS):
print("***************Starting AdaBoost Classifier***************")
t0 = time()
clf = AdaBoostClassifier(n_estimators=300)
clf.fit(X_train, Y_train)
preds = clf.predict(X_cv)
score = clf.score(X_cv,Y_cv)
print("AdaBoost Classifier - {0:.2f}%".format(100 * score))
Summary = pd.crosstab(label_enc.inverse_transform(Y_cv), label_enc.inverse_transform(preds),
rownames=['actual'], colnames=['preds'])
Summary['pct'] = (Summary.divide(Summary.sum(axis=1), axis=1)).max(axis=1)*100
print(Summary)
#Check with log loss function
epsilon = 1e-15
#ll_output = log_loss_func(Y_cv, preds, epsilon)
preds2 = clf.predict_proba(X_cv)
ll_output2= log_loss(Y_cv, preds2, eps=1e-15, normalize=True)
print(ll_output2)
print("done in %0.3fs" % (time() - t0))
preds3 = clf.predict_proba(X_test)
#preds4 = clf.predict_proba((Actual_DS.ix[:,'feat_1':]))
preds4 = clf.predict_proba(Actual_DS)
print("***************Ending AdaBoost Classifier***************")
return pd.DataFrame(preds2) , pd.DataFrame(preds3),pd.DataFrame(preds4)