当前位置: 首页>>代码示例>>Python>>正文


Python ensemble.AdaBoostClassifier类代码示例

本文整理汇总了Python中sklearn.ensemble.AdaBoostClassifier的典型用法代码示例。如果您正苦于以下问题:Python AdaBoostClassifier类的具体用法?Python AdaBoostClassifier怎么用?Python AdaBoostClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了AdaBoostClassifier类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

class Ensemble:

	def __init__(self, data):
		self.rf = RandomForestClassifier(n_estimators=80, n_jobs=-1, min_samples_split=45, criterion='entropy')
		self.lda = LDA()
		self.dec = DecisionTreeClassifier(criterion='entropy')
		self.ada = AdaBoostClassifier(n_estimators=500, learning_rate=0.25)

		self.make_prediction(data)


	def make_prediction(self, data):
		'''
		Make an ensemble prediction
		'''
		self.rf.fit(data.features_train, data.labels_train)
		self.lda.fit(data.features_train, data.labels_train)
		self.dec.fit(data.features_train, data.labels_train)
		self.ada.fit(data.features_train, data.labels_train)

		pre_pred = []
		self.pred = []

		ada_pred = self.ada.predict(data.features_test)
		rf_pred = self.rf.predict(data.features_test)
		lda_pred = self.lda.predict(data.features_test)
		dec_pred = self.dec.predict(data.features_test)

		for i in range(len(rf_pred)):
			pre_pred.append([ rf_pred[i], lda_pred[i], dec_pred[i], ada_pred[i] ])

		for entry in pre_pred:
			pred_list = sorted(entry, key=entry.count, reverse=True)
			self.pred.append(pred_list[0])
开发者ID:BHouwens,项目名称:KaggleProjects,代码行数:34,代码来源:ensemble.py

示例2: runAdaBoost

def runAdaBoost(arr):#depth, n_est,  lrn_rate=1.0): # removing filename for the scipy optimise thing '''filename,'''
    #ada = AdaBoostClassifier(n_estimators=100)
    global file_dir, nEvents, solutionFile, counter
    print 'iteration number ' + str(counter)
    counter+=1
    depth = int(arr[0]*100)
    n_est = int(arr[1]*100)
    lrn_rate = arr[2]
    if depth <= 0 or n_est <= 0 or lrn_rate <= 0:
        return 100

    fname = 'ada_dep'+str(depth)+'_est'+str(n_est)+'_lrn'+str(lrn_rate)
    filename = fname
    ada = AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=depth),
                             algorithm="SAMME",
                             n_estimators=n_est)#,n_jobs=4)
    print "AdaBoost training"
    ada.fit(sigtr[train_input].values,sigtr['Label'].values)
    print "AdaBoost testing"
    ada_pred = ada.predict(sigtest[train_input].values)
    solnFile(filename,ada_pred,sigtest['EventId'].values)#
    print "AdaBoost finished"
    # added for teh scipy optimise thing
    ams_score = ams.AMS_metric(solutionFile, file_dir+fname+'.out', nEvents)
    print ams_score
    logfile.write(fname + ': ' + str(ams_score)+'\n')
    return -1.0*float(ams_score) # since we are minimising
开发者ID:tibristo,项目名称:htautau,代码行数:27,代码来源:runAnalysis.py

示例3: main

def main():

    trainset = np.genfromtxt(open('train.csv','r'), delimiter=',')[1:]
    X = np.array([x[1:8] for x in trainset])
    y = np.array([x[8] for x in trainset])
    #print X,y
    import math
    for i, x in enumerate(X):
        for j, xx in enumerate(x):
            if(math.isnan(xx)):
                X[i][j] = 26.6
   
    
    testset = np.genfromtxt(open('test.csv','r'), delimiter = ',')[1:]

    test = np.array([x[1:8] for x in testset])
    for i, x in enumerate(test):
        for j, xx in enumerate(x):
            if(math.isnan(xx)):
                test[i][j] = 26.6
   

    X, test = decomposition_pca(X, test)

    bdt = AdaBoostClassifier(base_estimator = KNeighborsClassifier(n_neighbors=20, algorithm = 'auto'), algorithm="SAMME", n_estimators = 200)
    bdt.fit(X, y)
    


    print 'PassengerId,Survived'
    for i, t in enumerate(test):
        print '%d,%d' % (i + 892, int(bdt.predict(t)[0]))
开发者ID:kingr13,项目名称:entire-src,代码行数:32,代码来源:adaboost.py

示例4: train_adaboost

def train_adaboost(features, labels, learning_rate, n_lab, n_runs, n_estim, n_samples):
    uniqLabels = np.unique(labels)
    print 'Taking ', str(n_lab), ' labels'
    uniqLabels = uniqLabels[:n_lab]
    used_labels = uniqLabels
    pbar = start_progressbar(len(uniqLabels), 'training adaboost for %i labels' %len(uniqLabels))
    allLearners = []
    for yy ,targetLab in enumerate(uniqLabels):
        runs=[]
        for rrr in xrange(n_runs):
            #import ipdb;ipdb.set_trace()
            feats,labs = get_binary_sets(features, labels, targetLab, n_samples)
            #print 'fitting stump'
            #import ipdb;ipdb.set_trace()
            baseClf = DecisionTreeClassifier(max_depth=4, min_samples_leaf=10, min_samples_split=10)
            baseClf.fit(feats, labs)
            ada_real = AdaBoostClassifier( base_estimator=baseClf, learning_rate=learning_rate,
                                      n_estimators=n_estim,
                                      algorithm="SAMME.R")
            #import ipdb;ipdb.set_trace()
            runs.append(ada_real.fit(feats, labs))
        allLearners.append(runs)
        update_progressbar(pbar, yy)
    end_progressbar(pbar)
    
    return allLearners, used_labels
开发者ID:aarslan,项目名称:action_rec,代码行数:26,代码来源:classifier_wrappers.py

示例5: test_oneclass_adaboost_proba

def test_oneclass_adaboost_proba():
    # Test predict_proba robustness for one class label input.
    # In response to issue #7501
    # https://github.com/scikit-learn/scikit-learn/issues/7501
    y_t = np.ones(len(X))
    clf = AdaBoostClassifier().fit(X, y_t)
    assert_array_almost_equal(clf.predict_proba(X), np.ones((len(X), 1)))
开发者ID:daniel-perry,项目名称:scikit-learn,代码行数:7,代码来源:test_weight_boosting.py

示例6: createAdaBoostClassifier

def createAdaBoostClassifier(trainingVectors, targetValues):
    

    clf = AdaBoostClassifier(base_estimator=None, n_estimators=50, learning_rate=1.0, algorithm='SAMME.R', random_state=None)
    clf.fit(trainingVectors, targetValues, targetValues*10000)
    
    return(clf)
开发者ID:TaylorRogers,项目名称:Reddit-Predictor,代码行数:7,代码来源:AdaBoostClassifierfunctions.py

示例7: cvalidate

def cvalidate():
    targetset = np.genfromtxt(open('trainLabels.csv','r'), dtype='f16')
    y = [x for x in targetset]

    trainset = np.genfromtxt(open('train.csv','r'), delimiter=',', dtype='f16')
    X = np.array([x for x in trainset])
    
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size = 0.3, random_state = 0)

    X_train, X_test = decomposition_pca(X_train, X_test)

    #SVM

    c_range = 10.0 ** np.arange(6.5,7.5,.25)
    gamma_range = 10.0 ** np.arange(-2.5,0.5,.25)
    parameters = {'kernel':['rbf'], 'C':c_range,  'gamma':gamma_range} 
    svr = SVC()

    clf = grid_search.GridSearchCV(svr, parameters)
    

    clf.fit(X_train, y_train)
    bdt = AdaBoostClassifier(base_estimator = clf.best_estimator_,
                         algorithm="SAMME",
                         n_estimators=100)

    
    #bdt = AdaBoostClassifier(base_estimator = KNeighborsClassifier(n_neighbors=10))
    bdt.fit(X_train, y_train)
    

    print bdt.score(X_test, y_test)
开发者ID:kingr13,项目名称:entire-src,代码行数:32,代码来源:adaboost.py

示例8: trainClassifier

def trainClassifier(dataDir, trialName, NUMFISH):


    
    ch = circularHOGExtractor(6,4,3) 
    nFeats = ch.getNumFields()+1
    trainData = np.array([])#np.zeros((len(lst0)+len(lst0c)+len(lst1),nFeats))
    targetData = np.array([])#np.hstack((np.zeros(len(lst0)+len(lst0c)),np.ones(len(lst1))))
    for tr in range(NUMFISH):
        directory = dataDir + '/process/' + trialName + '/FR_ID' + str(tr) + '/'
        files = [name for name in os.listdir(directory)]
        thisData = np.zeros((len(files),nFeats))
        thisTarget = tr*np.ones(len(files))
        i = 0
        for imName in files:
            sample = cv2.imread(directory + imName)
            thisIm = cv2.cvtColor(sample, cv2.COLOR_BGR2GRAY)
            
            thisData[i,:] = np.hstack((ch.extract(thisIm), np.mean(thisIm)))
            i = i + 1
        trainData = np.vstack((trainData, thisData)) if trainData.size else thisData
        targetData = np.hstack((targetData, thisTarget)) if targetData.size else thisTarget

    #clf = svm.SVC()
    clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),algorithm="SAMME",n_estimators=50)
    y_pred = clf.fit(trainData,targetData)
    pickle.dump(clf, open( dataDir + '/process/' + trialName + '/boost' + trialName + '.p',"wb"))
    y_pred = clf.predict(trainData)
    print("Number of mislabeled points out of a total %d points : %d" % (trainData.shape[0],(targetData != y_pred).sum()))
开发者ID:ctorney,项目名称:fishOfInterest,代码行数:29,代码来源:trainClassifier.py

示例9: AB_results

def AB_results(): # AdaBoostClassifier
	print "--------------AdaBoostClassifier-----------------"
	rang = [60, 80]
	
	# print "--------------With HOG-----------------"
	# ans = []
	# print "n_estimators	Accuracy"
	# for i in rang:
	# 	clf = AdaBoostClassifier(n_estimators=i)
	# 	clf.fit(X_train_hog, y_train)
	# 	mean_accuracy = clf.score(X_test_hog, y_test)
	# 	print i, "	", mean_accuracy
	# 	ans.append('('+str(i)+", "+str(mean_accuracy)+')')
	# print ans

	# plt.plot(rang, ans, linewidth=2.0)
	# plt.xlabel("n_estimators")
	# plt.ylabel("mean_accuracy")
	# plt.savefig("temp_hog.png")

	
	print "\n--------------Without HOG-----------------"
	ans = []
	print "n_estimators	Accuracy"
	for i in rang:
		clf = AdaBoostClassifier(n_estimators=i)
		clf.fit(X_train, y_train)
		mean_accuracy = clf.score(X_test, y_test)
		print i, "	", mean_accuracy
		ans.append('('+str(i)+", "+str(mean_accuracy)+')')
	print ans
	plt.plot(rang, ans, linewidth=2.0)
	plt.xlabel("n_estimators")
	plt.ylabel("mean_accuracy")
	plt.savefig("temp_plain.png")
开发者ID:vickianand,项目名称:object-classification-for-surveillance,代码行数:35,代码来源:test_classifiers.py

示例10: prediction

def prediction(feat,label):
    x_train, x_test, y_train, y_test = cross_validation.train_test_split(feat, label, test_size = 0.25, random_state = 0)
    num_leaves = []
    accuracy_score = []
    auc_score = []
    # for depth in range(1,10):
    #     clf = tree.DecisionTreeClassifier(max_depth = depth)
    #     clf.fit(x_train,y_train)
    #     predictions = clf.predict(x_test)
    #     accuracy = clf.score(x_test,y_test)
    #     auc = metrics.roc_auc_score(y_test,predictions)
    #     num_leaves.append(depth)
    #     accuracy_score.append(accuracy)
    #     auc_score.append(auc)

    for depth in range(1,10):
        clf = AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth = depth), n_estimators = 100)
        clf.fit(x_train,y_train)
        predictions = clf.predict(x_test)
        accuracy = clf.score(x_test,y_test)
        auc = metrics.roc_auc_score(y_test,predictions)
        num_leaves.append(depth)
        accuracy_score.append(accuracy)
        auc_score.append(auc)


    return num_leaves,accuracy_score,auc_score
开发者ID:yangeric7,项目名称:BigDataProject2016,代码行数:27,代码来源:decisionTree.py

示例11: __init__

    def __init__(self,n_estimators=50, learning_rate=1.0, algorithm='SAMME.R',\
        criterion='gini', splitter='best', max_depth=5, min_samples_split=2, min_samples_leaf=1,\
        max_features=None, random_state=None, min_density=None, compute_importances=None):

        base_estimator=DecisionTreeClassifier()
        self.base_estimator = base_estimator
        self.base_estimator_class = self.base_estimator.__class__
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.algorithm = algorithm
        self.splitter = splitter
        self.max_depth = max_depth
        self.criterion = criterion
        self.max_features = max_features
        self.min_density = min_density
        self.random_state = random_state
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.compute_importances = compute_importances
        
        self.estimator = self.base_estimator_class(criterion=self.criterion, splitter=self.splitter, max_depth=self.max_depth,\
                min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, max_features=self.max_features,\
                random_state=self.random_state, min_density=self.min_density, compute_importances=self.compute_importances)
        
        AdaBoostClassifier.__init__(self, base_estimator=self.estimator, n_estimators=self.n_estimators, learning_rate=self.learning_rate, algorithm=self.algorithm)
开发者ID:euclides-filho,项目名称:kaggle-allstate-purchase,代码行数:25,代码来源:imports.py

示例12: train_adaboost

def train_adaboost(features, labels):
    uniqLabels = np.unique(labels)
    print 'TAKING ONLY ', str(N_LAB), ' LABELS FOR SPEED '
    uniqLabels = uniqLabels[:N_LAB]
    
    allLearners = []
    for targetLab in uniqLabels:
        print 'processing for label ', str(targetLab)
        runs=[]
        #import ipdb;ipdb.set_trace()
        for rrr in xrange(N_RUNS):
            #import ipdb;ipdb.set_trace()
            feats,labs = get_binary_sets(features, labels, targetLab)
            #print 'fitting stump'
            #import ipdb;ipdb.set_trace()
            baseClf = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1)
            baseClf.fit(feats, labs)
            ada_real = AdaBoostClassifier( base_estimator=baseClf, learning_rate=learning_rate,
                                      n_estimators=N_ESTIM,
                                      algorithm="SAMME.R")
            #import ipdb;ipdb.set_trace()
            runs.append(ada_real.fit(feats, labs))
        allLearners.append(runs)
    
    return allLearners
开发者ID:aarslan,项目名称:actionRecognition_old,代码行数:25,代码来源:demo.py

示例13: classify

def classify(x, y, cv, n_estimator=50):
    acc, prec, recall = [], [], []
    base_clf = DecisionTreeClassifier(
        compute_importances=None,
        criterion="entropy",
        max_depth=1,
        max_features=None,
        max_leaf_nodes=None,
        min_density=None,
        min_samples_leaf=1,
        min_samples_split=2,
        random_state=None,
        splitter="best",
    )

    global clf
    clf = AdaBoostClassifier(base_estimator=base_clf, n_estimators=n_estimator)
    for train, test in cv:
        x_train, x_test, y_train, y_test = x[train], x[test], y[train], y[test]
        clf = clf.fit(x_train, y_train)
        y_pred = clf.predict(x_test)
        acc.append(accuracy_score(y_test, y_pred))
        prec.append(precision_score(y_test, y_pred))
        recall.append(recall_score(y_test, y_pred))
    a = np.mean(acc)
    p = np.mean(prec)
    r = np.mean(recall)
    f = 2 * p * r / (p + r)
    return a, p, r, f
开发者ID:harrylclc,项目名称:ist557,代码行数:29,代码来源:boosting.py

示例14: runAdaReal

def runAdaReal(arr):#depth, n_est, filename, lrn_rate=1.0):
    global file_dir, nEvents, solutionFile, counter
    depth = int(arr[0]*100)
    n_est = int(arr[1]*100)
    lrn_rate = arr[2]
    print 'iteration number ' + str(counter)
    counter+=1
    if depth <= 0 or n_est <= 0 or lrn_rate <= 0:
        print 'return 100'
        return 100
    filename =  'adar_dep'+str(depth)+'_est'+str(n_est)+'_lrn'+str(lrn_rate) # low
    bdt_real = AdaBoostClassifier(
        tree.DecisionTreeClassifier(max_depth=depth),
        n_estimators=n_est,
        learning_rate=lrn_rate)
    print "AdaBoostReal training"
    bdt_real.fit(sigtr[train_input].values,sigtr['Label'].values)
    print "AdaBoostReal testing"
    bdt_real_pred = bdt_real.predict(sigtest[train_input].values)
    solnFile(filename,bdt_real_pred,sigtest['EventId'].values)#
    print "AdaBoostReal finished"
    ams_score = ams.AMS_metric(solutionFile, file_dir+filename+'.out', nEvents)
    print ams_score
    logfile.write(filename+': ' + str(ams_score)+'\n')
    return -1.0*float(ams_score)
开发者ID:tibristo,项目名称:htautau,代码行数:25,代码来源:runAnalysis.py

示例15: do_all_study

def do_all_study(X,y):
    
    names = [ "Decision Tree","Gradient Boosting",
             "Random Forest", "AdaBoost", "Naive Bayes"]

    classifiers = [
        #SVC(),
        DecisionTreeClassifier(max_depth=10),
        GradientBoostingClassifier(max_depth=10, n_estimators=20, max_features=1),
        RandomForestClassifier(max_depth=10, n_estimators=20, max_features=1),
        AdaBoostClassifier()]
    for name, clf in zip(names, classifiers):
        estimator,score = plot_learning_curve(clf, X_train, y_train, scoring='roc_auc')


    clf_GBC = GradientBoostingClassifier(max_depth=10, n_estimators=20, max_features=1)
    param_name = 'n_estimators'
    param_range = [1, 5, 10, 20,40]

    plot_validation_curve(clf_GBC, X_train, y_train,
                          param_name, param_range, scoring='roc_auc')
    clf_GBC.fit(X_train,y_train)
    y_pred_GBC = clf_GBC.predict_proba(X_test)[:,1]
    print("ROC AUC GradientBoostingClassifier: %0.4f" % roc_auc_score(y_test, y_pred_GBC))

    clf_AB = AdaBoostClassifier()
    param_name = 'n_estimators'
    param_range = [1, 5, 10, 20,40]

    plot_validation_curve(clf_AB, X_train, y_train,
                          param_name, param_range, scoring='roc_auc')
    clf_AB.fit(X_train,y_train)
    y_pred_AB = clf_AB.predict_proba(X_test)[:,1]
    print("ROC AUC AdaBoost: %0.4f" % roc_auc_score(y_test, y_pred_AB))
开发者ID:macoutouly,项目名称:Classif_01092105,代码行数:34,代码来源:StartingKit2.py


注:本文中的sklearn.ensemble.AdaBoostClassifier类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。