本文整理汇总了Python中sklearn.ensemble.AdaBoostClassifier类的典型用法代码示例。如果您正苦于以下问题:Python AdaBoostClassifier类的具体用法?Python AdaBoostClassifier怎么用?Python AdaBoostClassifier使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了AdaBoostClassifier类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
class Ensemble:
def __init__(self, data):
self.rf = RandomForestClassifier(n_estimators=80, n_jobs=-1, min_samples_split=45, criterion='entropy')
self.lda = LDA()
self.dec = DecisionTreeClassifier(criterion='entropy')
self.ada = AdaBoostClassifier(n_estimators=500, learning_rate=0.25)
self.make_prediction(data)
def make_prediction(self, data):
'''
Make an ensemble prediction
'''
self.rf.fit(data.features_train, data.labels_train)
self.lda.fit(data.features_train, data.labels_train)
self.dec.fit(data.features_train, data.labels_train)
self.ada.fit(data.features_train, data.labels_train)
pre_pred = []
self.pred = []
ada_pred = self.ada.predict(data.features_test)
rf_pred = self.rf.predict(data.features_test)
lda_pred = self.lda.predict(data.features_test)
dec_pred = self.dec.predict(data.features_test)
for i in range(len(rf_pred)):
pre_pred.append([ rf_pred[i], lda_pred[i], dec_pred[i], ada_pred[i] ])
for entry in pre_pred:
pred_list = sorted(entry, key=entry.count, reverse=True)
self.pred.append(pred_list[0])
示例2: runAdaBoost
def runAdaBoost(arr):#depth, n_est, lrn_rate=1.0): # removing filename for the scipy optimise thing '''filename,'''
#ada = AdaBoostClassifier(n_estimators=100)
global file_dir, nEvents, solutionFile, counter
print 'iteration number ' + str(counter)
counter+=1
depth = int(arr[0]*100)
n_est = int(arr[1]*100)
lrn_rate = arr[2]
if depth <= 0 or n_est <= 0 or lrn_rate <= 0:
return 100
fname = 'ada_dep'+str(depth)+'_est'+str(n_est)+'_lrn'+str(lrn_rate)
filename = fname
ada = AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=depth),
algorithm="SAMME",
n_estimators=n_est)#,n_jobs=4)
print "AdaBoost training"
ada.fit(sigtr[train_input].values,sigtr['Label'].values)
print "AdaBoost testing"
ada_pred = ada.predict(sigtest[train_input].values)
solnFile(filename,ada_pred,sigtest['EventId'].values)#
print "AdaBoost finished"
# added for teh scipy optimise thing
ams_score = ams.AMS_metric(solutionFile, file_dir+fname+'.out', nEvents)
print ams_score
logfile.write(fname + ': ' + str(ams_score)+'\n')
return -1.0*float(ams_score) # since we are minimising
示例3: main
def main():
trainset = np.genfromtxt(open('train.csv','r'), delimiter=',')[1:]
X = np.array([x[1:8] for x in trainset])
y = np.array([x[8] for x in trainset])
#print X,y
import math
for i, x in enumerate(X):
for j, xx in enumerate(x):
if(math.isnan(xx)):
X[i][j] = 26.6
testset = np.genfromtxt(open('test.csv','r'), delimiter = ',')[1:]
test = np.array([x[1:8] for x in testset])
for i, x in enumerate(test):
for j, xx in enumerate(x):
if(math.isnan(xx)):
test[i][j] = 26.6
X, test = decomposition_pca(X, test)
bdt = AdaBoostClassifier(base_estimator = KNeighborsClassifier(n_neighbors=20, algorithm = 'auto'), algorithm="SAMME", n_estimators = 200)
bdt.fit(X, y)
print 'PassengerId,Survived'
for i, t in enumerate(test):
print '%d,%d' % (i + 892, int(bdt.predict(t)[0]))
示例4: train_adaboost
def train_adaboost(features, labels, learning_rate, n_lab, n_runs, n_estim, n_samples):
uniqLabels = np.unique(labels)
print 'Taking ', str(n_lab), ' labels'
uniqLabels = uniqLabels[:n_lab]
used_labels = uniqLabels
pbar = start_progressbar(len(uniqLabels), 'training adaboost for %i labels' %len(uniqLabels))
allLearners = []
for yy ,targetLab in enumerate(uniqLabels):
runs=[]
for rrr in xrange(n_runs):
#import ipdb;ipdb.set_trace()
feats,labs = get_binary_sets(features, labels, targetLab, n_samples)
#print 'fitting stump'
#import ipdb;ipdb.set_trace()
baseClf = DecisionTreeClassifier(max_depth=4, min_samples_leaf=10, min_samples_split=10)
baseClf.fit(feats, labs)
ada_real = AdaBoostClassifier( base_estimator=baseClf, learning_rate=learning_rate,
n_estimators=n_estim,
algorithm="SAMME.R")
#import ipdb;ipdb.set_trace()
runs.append(ada_real.fit(feats, labs))
allLearners.append(runs)
update_progressbar(pbar, yy)
end_progressbar(pbar)
return allLearners, used_labels
示例5: test_oneclass_adaboost_proba
def test_oneclass_adaboost_proba():
# Test predict_proba robustness for one class label input.
# In response to issue #7501
# https://github.com/scikit-learn/scikit-learn/issues/7501
y_t = np.ones(len(X))
clf = AdaBoostClassifier().fit(X, y_t)
assert_array_almost_equal(clf.predict_proba(X), np.ones((len(X), 1)))
示例6: createAdaBoostClassifier
def createAdaBoostClassifier(trainingVectors, targetValues):
clf = AdaBoostClassifier(base_estimator=None, n_estimators=50, learning_rate=1.0, algorithm='SAMME.R', random_state=None)
clf.fit(trainingVectors, targetValues, targetValues*10000)
return(clf)
示例7: cvalidate
def cvalidate():
targetset = np.genfromtxt(open('trainLabels.csv','r'), dtype='f16')
y = [x for x in targetset]
trainset = np.genfromtxt(open('train.csv','r'), delimiter=',', dtype='f16')
X = np.array([x for x in trainset])
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size = 0.3, random_state = 0)
X_train, X_test = decomposition_pca(X_train, X_test)
#SVM
c_range = 10.0 ** np.arange(6.5,7.5,.25)
gamma_range = 10.0 ** np.arange(-2.5,0.5,.25)
parameters = {'kernel':['rbf'], 'C':c_range, 'gamma':gamma_range}
svr = SVC()
clf = grid_search.GridSearchCV(svr, parameters)
clf.fit(X_train, y_train)
bdt = AdaBoostClassifier(base_estimator = clf.best_estimator_,
algorithm="SAMME",
n_estimators=100)
#bdt = AdaBoostClassifier(base_estimator = KNeighborsClassifier(n_neighbors=10))
bdt.fit(X_train, y_train)
print bdt.score(X_test, y_test)
示例8: trainClassifier
def trainClassifier(dataDir, trialName, NUMFISH):
ch = circularHOGExtractor(6,4,3)
nFeats = ch.getNumFields()+1
trainData = np.array([])#np.zeros((len(lst0)+len(lst0c)+len(lst1),nFeats))
targetData = np.array([])#np.hstack((np.zeros(len(lst0)+len(lst0c)),np.ones(len(lst1))))
for tr in range(NUMFISH):
directory = dataDir + '/process/' + trialName + '/FR_ID' + str(tr) + '/'
files = [name for name in os.listdir(directory)]
thisData = np.zeros((len(files),nFeats))
thisTarget = tr*np.ones(len(files))
i = 0
for imName in files:
sample = cv2.imread(directory + imName)
thisIm = cv2.cvtColor(sample, cv2.COLOR_BGR2GRAY)
thisData[i,:] = np.hstack((ch.extract(thisIm), np.mean(thisIm)))
i = i + 1
trainData = np.vstack((trainData, thisData)) if trainData.size else thisData
targetData = np.hstack((targetData, thisTarget)) if targetData.size else thisTarget
#clf = svm.SVC()
clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),algorithm="SAMME",n_estimators=50)
y_pred = clf.fit(trainData,targetData)
pickle.dump(clf, open( dataDir + '/process/' + trialName + '/boost' + trialName + '.p',"wb"))
y_pred = clf.predict(trainData)
print("Number of mislabeled points out of a total %d points : %d" % (trainData.shape[0],(targetData != y_pred).sum()))
示例9: AB_results
def AB_results(): # AdaBoostClassifier
print "--------------AdaBoostClassifier-----------------"
rang = [60, 80]
# print "--------------With HOG-----------------"
# ans = []
# print "n_estimators Accuracy"
# for i in rang:
# clf = AdaBoostClassifier(n_estimators=i)
# clf.fit(X_train_hog, y_train)
# mean_accuracy = clf.score(X_test_hog, y_test)
# print i, " ", mean_accuracy
# ans.append('('+str(i)+", "+str(mean_accuracy)+')')
# print ans
# plt.plot(rang, ans, linewidth=2.0)
# plt.xlabel("n_estimators")
# plt.ylabel("mean_accuracy")
# plt.savefig("temp_hog.png")
print "\n--------------Without HOG-----------------"
ans = []
print "n_estimators Accuracy"
for i in rang:
clf = AdaBoostClassifier(n_estimators=i)
clf.fit(X_train, y_train)
mean_accuracy = clf.score(X_test, y_test)
print i, " ", mean_accuracy
ans.append('('+str(i)+", "+str(mean_accuracy)+')')
print ans
plt.plot(rang, ans, linewidth=2.0)
plt.xlabel("n_estimators")
plt.ylabel("mean_accuracy")
plt.savefig("temp_plain.png")
示例10: prediction
def prediction(feat,label):
x_train, x_test, y_train, y_test = cross_validation.train_test_split(feat, label, test_size = 0.25, random_state = 0)
num_leaves = []
accuracy_score = []
auc_score = []
# for depth in range(1,10):
# clf = tree.DecisionTreeClassifier(max_depth = depth)
# clf.fit(x_train,y_train)
# predictions = clf.predict(x_test)
# accuracy = clf.score(x_test,y_test)
# auc = metrics.roc_auc_score(y_test,predictions)
# num_leaves.append(depth)
# accuracy_score.append(accuracy)
# auc_score.append(auc)
for depth in range(1,10):
clf = AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth = depth), n_estimators = 100)
clf.fit(x_train,y_train)
predictions = clf.predict(x_test)
accuracy = clf.score(x_test,y_test)
auc = metrics.roc_auc_score(y_test,predictions)
num_leaves.append(depth)
accuracy_score.append(accuracy)
auc_score.append(auc)
return num_leaves,accuracy_score,auc_score
示例11: __init__
def __init__(self,n_estimators=50, learning_rate=1.0, algorithm='SAMME.R',\
criterion='gini', splitter='best', max_depth=5, min_samples_split=2, min_samples_leaf=1,\
max_features=None, random_state=None, min_density=None, compute_importances=None):
base_estimator=DecisionTreeClassifier()
self.base_estimator = base_estimator
self.base_estimator_class = self.base_estimator.__class__
self.n_estimators = n_estimators
self.learning_rate = learning_rate
self.algorithm = algorithm
self.splitter = splitter
self.max_depth = max_depth
self.criterion = criterion
self.max_features = max_features
self.min_density = min_density
self.random_state = random_state
self.min_samples_split = min_samples_split
self.min_samples_leaf = min_samples_leaf
self.compute_importances = compute_importances
self.estimator = self.base_estimator_class(criterion=self.criterion, splitter=self.splitter, max_depth=self.max_depth,\
min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, max_features=self.max_features,\
random_state=self.random_state, min_density=self.min_density, compute_importances=self.compute_importances)
AdaBoostClassifier.__init__(self, base_estimator=self.estimator, n_estimators=self.n_estimators, learning_rate=self.learning_rate, algorithm=self.algorithm)
示例12: train_adaboost
def train_adaboost(features, labels):
uniqLabels = np.unique(labels)
print 'TAKING ONLY ', str(N_LAB), ' LABELS FOR SPEED '
uniqLabels = uniqLabels[:N_LAB]
allLearners = []
for targetLab in uniqLabels:
print 'processing for label ', str(targetLab)
runs=[]
#import ipdb;ipdb.set_trace()
for rrr in xrange(N_RUNS):
#import ipdb;ipdb.set_trace()
feats,labs = get_binary_sets(features, labels, targetLab)
#print 'fitting stump'
#import ipdb;ipdb.set_trace()
baseClf = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1)
baseClf.fit(feats, labs)
ada_real = AdaBoostClassifier( base_estimator=baseClf, learning_rate=learning_rate,
n_estimators=N_ESTIM,
algorithm="SAMME.R")
#import ipdb;ipdb.set_trace()
runs.append(ada_real.fit(feats, labs))
allLearners.append(runs)
return allLearners
示例13: classify
def classify(x, y, cv, n_estimator=50):
acc, prec, recall = [], [], []
base_clf = DecisionTreeClassifier(
compute_importances=None,
criterion="entropy",
max_depth=1,
max_features=None,
max_leaf_nodes=None,
min_density=None,
min_samples_leaf=1,
min_samples_split=2,
random_state=None,
splitter="best",
)
global clf
clf = AdaBoostClassifier(base_estimator=base_clf, n_estimators=n_estimator)
for train, test in cv:
x_train, x_test, y_train, y_test = x[train], x[test], y[train], y[test]
clf = clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
acc.append(accuracy_score(y_test, y_pred))
prec.append(precision_score(y_test, y_pred))
recall.append(recall_score(y_test, y_pred))
a = np.mean(acc)
p = np.mean(prec)
r = np.mean(recall)
f = 2 * p * r / (p + r)
return a, p, r, f
示例14: runAdaReal
def runAdaReal(arr):#depth, n_est, filename, lrn_rate=1.0):
global file_dir, nEvents, solutionFile, counter
depth = int(arr[0]*100)
n_est = int(arr[1]*100)
lrn_rate = arr[2]
print 'iteration number ' + str(counter)
counter+=1
if depth <= 0 or n_est <= 0 or lrn_rate <= 0:
print 'return 100'
return 100
filename = 'adar_dep'+str(depth)+'_est'+str(n_est)+'_lrn'+str(lrn_rate) # low
bdt_real = AdaBoostClassifier(
tree.DecisionTreeClassifier(max_depth=depth),
n_estimators=n_est,
learning_rate=lrn_rate)
print "AdaBoostReal training"
bdt_real.fit(sigtr[train_input].values,sigtr['Label'].values)
print "AdaBoostReal testing"
bdt_real_pred = bdt_real.predict(sigtest[train_input].values)
solnFile(filename,bdt_real_pred,sigtest['EventId'].values)#
print "AdaBoostReal finished"
ams_score = ams.AMS_metric(solutionFile, file_dir+filename+'.out', nEvents)
print ams_score
logfile.write(filename+': ' + str(ams_score)+'\n')
return -1.0*float(ams_score)
示例15: do_all_study
def do_all_study(X,y):
names = [ "Decision Tree","Gradient Boosting",
"Random Forest", "AdaBoost", "Naive Bayes"]
classifiers = [
#SVC(),
DecisionTreeClassifier(max_depth=10),
GradientBoostingClassifier(max_depth=10, n_estimators=20, max_features=1),
RandomForestClassifier(max_depth=10, n_estimators=20, max_features=1),
AdaBoostClassifier()]
for name, clf in zip(names, classifiers):
estimator,score = plot_learning_curve(clf, X_train, y_train, scoring='roc_auc')
clf_GBC = GradientBoostingClassifier(max_depth=10, n_estimators=20, max_features=1)
param_name = 'n_estimators'
param_range = [1, 5, 10, 20,40]
plot_validation_curve(clf_GBC, X_train, y_train,
param_name, param_range, scoring='roc_auc')
clf_GBC.fit(X_train,y_train)
y_pred_GBC = clf_GBC.predict_proba(X_test)[:,1]
print("ROC AUC GradientBoostingClassifier: %0.4f" % roc_auc_score(y_test, y_pred_GBC))
clf_AB = AdaBoostClassifier()
param_name = 'n_estimators'
param_range = [1, 5, 10, 20,40]
plot_validation_curve(clf_AB, X_train, y_train,
param_name, param_range, scoring='roc_auc')
clf_AB.fit(X_train,y_train)
y_pred_AB = clf_AB.predict_proba(X_test)[:,1]
print("ROC AUC AdaBoost: %0.4f" % roc_auc_score(y_test, y_pred_AB))