当前位置: 首页>>代码示例>>Python>>正文


Python AdaBoostClassifier.decision_function方法代码示例

本文整理汇总了Python中sklearn.ensemble.AdaBoostClassifier.decision_function方法的典型用法代码示例。如果您正苦于以下问题:Python AdaBoostClassifier.decision_function方法的具体用法?Python AdaBoostClassifier.decision_function怎么用?Python AdaBoostClassifier.decision_function使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.AdaBoostClassifier的用法示例。


在下文中一共展示了AdaBoostClassifier.decision_function方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import decision_function [as 别名]
def main():
    print 'Loading training data ...'
    data_train = pd.read_csv('csv/CamKt12LCTopoSplitFilteredMu100SmallR30YCut414tev_350_500_vxp_0_99-merged.csv')
    r =np.random.rand(data_train.shape[0])
        	#Algorithm = 'AKT10LCTRIM530'
    
    plt.figure(1)
    Y_train = data_train['label'][r<0.9]
#    W_train = data_train['weight'][r<0.9]
    Y_valid = data_train['label'][r>=0.9]
#    W_valid = data_train['weight'][r>=0.9]
#    data_train.drop('AKT10LCTRIM530_MassDropSplit', axis=1, inplace=True)
    for varset in itertools.combinations(data_train.columns.values[1:-1],2):
        print list(varset)
        X_train = data_train[list(varset)][r<0.9]
        X_valid = data_train[list(varset)][r>=0.9]
    
    	  #gbc = Pipeline([("scale", StandardScaler()), ("gbc",GBC(n_estimators=1,verbose=1, max_depth=10,min_samples_leaf=50))])
    	  #	  gbc = GBC(n_estimators=20,verbose=1, max_depth=10,min_samples_leaf=50)
        #gbc = GaussianNB()
        dt = DC(max_depth=3,min_samples_leaf=0.05*len(X_train))
        abc = ABC(dt,algorithm='SAMME',
                         n_estimators=800,
                         learning_rate=0.5)
        print 'Training classifier with all the data..'
        abc.fit(X_train.values, Y_train.values)
#    sample_weight=W_train.values 
        print 'Done.. Applying to validation sample and drawing ROC' 
        prob_predict_valid = abc.predict(X_valid)
        #[:,1]
        #
        print prob_predict_valid
        Y_score = abc.decision_function(X_valid.values)
        print Y_score
        fpr, tpr, _ = roc_curve(Y_valid.values, Y_score)
#        W_valid.values
        labelstring = 'And'.join(var.replace('_','') for var in varset)
        print labelstring    
        plt.plot(tpr, (1-fpr), label=labelstring)
        plt.figure(2)       
        plt.hist(abc.decision_function(X_valid[Y_valid==1.]).ravel(),
         color='r', alpha=0.5, range=(-1.0,1.0), bins=50)
        plt.hist(abc.decision_function(X_valid[Y_valid==0.]).ravel(),
         color='b', alpha=0.5, range=(-1.0,1.0), bins=50)
        plt.xlabel("scikit-learn BDT output")
        plt.savefig(labelstring+'bdtout.pdf')        
    	  #	  labelstring = ' and '.join(var.replace(Algorithm,'') for var in varset)
    plt.figure(1)   
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.ylabel('1- Background Efficiency')
    plt.xlabel('Signal Efficiency')
    plt.title('ROC Curve')
    plt.legend(loc="lower left",prop={'size':6})
    #plt.show()
    plt.savefig('rocmva.pdf')
开发者ID:tibristo,项目名称:BosonTagging,代码行数:58,代码来源:TaggerMVA.py

示例2: test_iris

# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import decision_function [as 别名]
def test_iris():
    # Check consistency on dataset iris.
    classes = np.unique(iris.target)
    clf_samme = prob_samme = None

    for alg in ['SAMME', 'SAMME.R']:
        clf = AdaBoostClassifier(algorithm=alg)
        clf.fit(iris.data, iris.target)

        assert_array_equal(classes, clf.classes_)
        proba = clf.predict_proba(iris.data)
        if alg == "SAMME":
            clf_samme = clf
            prob_samme = proba
        assert_equal(proba.shape[1], len(classes))
        assert_equal(clf.decision_function(iris.data).shape[1], len(classes))

        score = clf.score(iris.data, iris.target)
        assert score > 0.9, "Failed with algorithm %s and score = %f" % \
            (alg, score)

    # Somewhat hacky regression test: prior to
    # ae7adc880d624615a34bafdb1d75ef67051b8200,
    # predict_proba returned SAMME.R values for SAMME.
    clf_samme.algorithm = "SAMME.R"
    assert_array_less(0,
                      np.abs(clf_samme.predict_proba(iris.data) - prob_samme))
开发者ID:0664j35t3r,项目名称:scikit-learn,代码行数:29,代码来源:test_weight_boosting.py

示例3: test_classification_toy

# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import decision_function [as 别名]
def test_classification_toy():
    # Check classification on a toy dataset.
    for alg in ['SAMME', 'SAMME.R']:
        clf = AdaBoostClassifier(algorithm=alg, random_state=0)
        clf.fit(X, y_class)
        assert_array_equal(clf.predict(T), y_t_class)
        assert_array_equal(np.unique(np.asarray(y_t_class)), clf.classes_)
        assert_equal(clf.predict_proba(T).shape, (len(T), 2))
        assert_equal(clf.decision_function(T).shape, (len(T),))
开发者ID:0664j35t3r,项目名称:scikit-learn,代码行数:11,代码来源:test_weight_boosting.py

示例4: n1check

# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import decision_function [as 别名]
def n1check(d_train, d_test, opts):

    # Load the data with no weights and put it into panda format
    # for easier manipulation
    pd_train = pd.DataFrame(d_train.getDataNoWeight())
    pd_test  = pd.DataFrame(d_test.getDataNoWeight())

    # Holder for results
    results = {}

    # Setup classifier
    clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=opts.maxdepth),
                             n_estimators = opts.ntrees,
                             learning_rate = opts.lrate)

    # Train the classifier on total data set for comparison
    clf.fit(pd_train, d_train.targets)
    results['total'] = roc_auc_score(d_test.targets, clf.decision_function(pd_test))


    # Loop over the variables and store the results in dict
    keys    = d_train.t_varnames
    for i in range(len(keys)):
        
        sub_train = pd_train.drop(i,axis=1)
        sub_test  = pd_test.drop(i,axis=1)

        clf.fit(sub_train, d_train.targets)
        results[keys[i]] = roc_auc_score(d_test.targets, clf.decision_function(sub_test))


    # Now that we have the results, print all information
    print "--------------------------------------------"
    for key in results:
        print "Leaving out ", key, "gives score: ", results[key]
    print ""
开发者ID:mrelich,项目名称:MuonAna,代码行数:38,代码来源:nMinus1Check.py

示例5: test_iris

# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import decision_function [as 别名]
def test_iris():
    """Check consistency on dataset iris."""
    classes = np.unique(iris.target)

    for alg in ['SAMME', 'SAMME.R']:
        clf = AdaBoostClassifier(algorithm=alg)
        clf.fit(iris.data, iris.target)

        assert_array_equal(classes, clf.classes_)
        assert_equal(clf.predict_proba(iris.data).shape[1], len(classes))
        assert_equal(clf.decision_function(iris.data).shape[1], len(classes))

        score = clf.score(iris.data, iris.target)
        assert score > 0.9, "Failed with algorithm %s and score = %f" % \
            (alg, score)
开发者ID:Jim-Holmstroem,项目名称:scikit-learn,代码行数:17,代码来源:test_weight_boosting.py

示例6: main

# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import decision_function [as 别名]
def main():
    Algorithm = 'CamKt12LCTopoSplitFilteredMu67SmallR0YCut9'
    print 'Loading training data ...'

    data_train = pd.read_csv(Algorithm+'merged.csv')   
    r =np.random.rand(data_train.shape[0])
    
    #Set label and weight vectors - and drop any unwanted tranining one
    Y_train = data_train['label'].values[r<0.5]
    # W_train = data_train['weight'].values[r<0.9]
    Y_valid = data_train['label'].values[r>=0.5]
    # W_valid = data_train['weight'].values[r>=0.9]
    # data_train.drop('AKT10LCTRIM530_MassDropSplit', axis=1, inplace=True)

    varcombinations = itertools.combinations(data_train.columns.values[1:-1],2)
    fac = lambda n: 1 if n < 2 else n * fac(n - 1)
    combos = lambda n, k: fac(n) / fac(k) / fac(n - k)

    colors = plt.get_cmap('jet')(np.linspace(0, 1.0,combos(len(data_train.columns.values[1:-1]),2) ))

    for varset,color in zip(varcombinations, colors):
        print list(varset)
        X_train = data_train[list(varset)].values[r<0.5]
        X_valid = data_train[list(varset)].values[r>=0.5]


        dt = DC(max_depth=3,min_samples_leaf=0.05*len(X_train))
        abc = ABC(dt,algorithm='SAMME',
                 n_estimators=8,
                 learning_rate=0.5)
        print 'Training classifier with all the data..'
        abc.fit(X_train, Y_train)
        print 'Done.. Applying to validation sample and drawing ROC' 
        prob_predict_valid = abc.predict_proba(X_valid)[:,1]
        Y_score = abc.decision_function(X_valid)
        fpr, tpr, _ = roc_curve(Y_valid, prob_predict_valid)
        labelstring = ' And '.join(var.replace('_','') for var in varset)
        print labelstring
        plt.plot(tpr, (1-fpr), label=labelstring, color=color)

        
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.ylabel('1- Background Efficiency')
    plt.xlabel('Signal Efficiency')
    plt.title(Algorithm+' ROC Curve')
    plt.legend(loc="lower left",prop={'size':6})
    plt.savefig(Algorithm+'rocmva.pdf')
开发者ID:wbhimji,项目名称:BosonTagging,代码行数:50,代码来源:TaggerMVA.py

示例7: ada_boost

# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import decision_function [as 别名]
def ada_boost(X_train, X_test, y_train, y_test, C=1):
	X1 = []
	X2 = []
	y1 = []
	y2 = []
	for x, y in zip(X_train, y_train):
		if y==1:
			y1.append(y)
			X1.append(x)
		else:
			y2.append(y)
			X2.append(x)

	print(y1.count(1))
	print(y2.count(0))
	X1 =np.asarray(X1)
	X2 =np.asarray(X2)
	y1 = np.asarray(y1)
	y2 = np.asarray(y2)
	# y = np.asarray(y)
	X = np.concatenate((X1, X2))
	y = np.concatenate((y1, y2))

	# Create and fit an AdaBoosted decision tree
	bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),
	                         algorithm="SAMME",
	                         n_estimators=200)

	bdt.fit(X, y)

	# Plot the two-class decision scores
	twoclass_output = bdt.decision_function(X)

	print(type(twoclass_output))

	# import IPython
	# IPython.embed()

	y_pre = bdt.predict(X_test)

	return y_pre,classification_report(y_test, y_pre)
开发者ID:chaozc,项目名称:yelp_review_quality_analysis,代码行数:43,代码来源:non_text_feature.py

示例8: zip

# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import decision_function [as 别名]
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
plt.axis("tight")

# Plot the training points
for i, n, c in zip(range(2), class_names, plot_colors):
    idx = np.where(Ydf_train['default_Yes'] == i)
    plt.scatter(Xdf_train[Xdf_train['student']==1.0].ix[idx].ix[:,0], Xdf_train[Xdf_train['student']==1.0].ix[idx].ix[:,2],c=c, cmap=plt.cm.Paired,label="Class %s" % n)
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.legend(loc='upper right')
plt.xlabel("Decision Boundary")
plt.show()


# Plot the two-class decision scores
twoclass_output = clf.decision_function(Xdf_train)
plot_range = (twoclass_output.min(), twoclass_output.max())
plt.subplot(132)
for i, n, c in zip(range(2), class_names, plot_colors):
    idx = np.where(Ydf_train['default_Yes'] == i)
    plt.hist(twoclass_output[idx],
             bins=10,
             range=plot_range,
             facecolor=c,
             label='Class %s' % n,
             alpha=.5)
x1, x2, y1, y2 = plt.axis()
plt.axis((x1, x2, y1, y2 * 1.2))
plt.legend(loc='upper left')
plt.ylabel('Samples')
plt.xlabel('Decision Scores')
开发者ID:golbeck,项目名称:Classification,代码行数:33,代码来源:default_adaboost.py

示例9: __init__

# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import decision_function [as 别名]
class adaBoost:
    __all__=['run','plotFeatureRanking','plotScores']

    def __init__(self, foundVariables, trainingData, trainingClasses, trainingWeights, testingData, testingClasses, adaName, bkg_name):
        """Build a forest and compute the feature importances.
        
        Keyword args:
        foundVariables -- The list of the names of found variabes, can get using Sample_x.returnFoundVariables()
        trainingData -- The training data
        trainingClasses -- The training data classes
        testingData -- the testing data
        testingClasses -- the testing data classes
        adaName -- the name of the object (eg. sig+bkg_name)
        """
        self.ada = AdaBoostClassifier(DecisionTreeClassifier(compute_importances=True,max_depth=4,min_samples_split=2,min_samples_leaf=100),n_estimators=400, learning_rate=0.5, algorithm="SAMME",compute_importances=True)
        #class sklearn.tree.DecisionTreeClassifier(criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_density=0.10000000000000001, max_features=None, compute_importances=False, random_state=None)
        self.foundVariables = foundVariables
        self.trainingData = trainingData
        self.trainingClasses = trainingClasses
        self.testingData = testingData
        self.testingClasses = testingClasses
        self.trainingWeights = trainingWeights
        self.name = adaName
        self.bkg_name = bkg_name
        self.elapsed = 0.0

    def returnName(self):
        return self.name

    def run(self):
        """Run the fitting and testing."""

    #start the fitting and time it
        start = clock()
        print 'starting training on AdaBoostClassifier'
        self.ada.fit(self.trainingData, self.trainingClasses, self.trainingWeights)
        self.elapsed = clock()-start
        print 'time taken for training: ' + str(self.elapsed)
    #set up the arrays for testing/ eval
        #xtA_C = copy.deepcopy(self.testingData)
        #pred = self.ada.predict(xtA_C)
        #import createHists
        #createHists.drawSigBkgDistrib(xtA_C, pred, self.foundVariables) # draw the signal and background distributions together

    # list the importances of each variable in the bdt, get the score on the test data
        self.importancesada = self.ada.feature_importances_
        print 'importances'
        print self.importancesada
        self.score= self.ada.score(self.testingData,self.testingClasses)
        self.params = self.ada.get_params()
        self.std_mat = np.std([tree.feature_importances_ for tree in self.ada.estimators_],
                           axis=0)
        self.indicesada = np.argsort(self.importancesada)[::-1]
        self.variableNamesSorted = []
        for i in self.indicesada:
            self.variableNamesSorted.append(self.foundVariables[i])

# Print the feature ranking
        print "Feature ranking:"

        for f in xrange(12):
            print "%d. feature %d (%f)" % (f + 1, self.indicesada[f], self.importancesada[self.indicesada[f]]) + " " +self.variableNamesSorted[f]
        self.twoclass_output = self.ada.decision_function(self.testingData)
        self.twoclass_output_train = self.ada.decision_function(self.trainingData)
        self.class_proba = self.ada.predict_proba(self.testingData)[:, -1]



    def plotFeatureRanking(self):
        # We need this to run in batch because it complains about not being able to open display
        from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
        import matplotlib.pyplot as plt
        import pylab as pl

        #plot the feature ranking
        pl.figure()
        pl.title("Feature importances Ada")
        pl.bar(xrange(len(self.variableNamesSorted)), self.importancesada[self.indicesada],
               color="r", yerr=self.std_mat[self.indicesada], align="center")
        pl.xticks(xrange(12), self.variableNamesSorted)#indicesada)
        pl.xlim([-1, 12])
        pl.show()

    def plotScores(self, returnROC = False, rocInput = []):
        from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
        import matplotlib.pyplot as plt
        import pylab as pl
        from sklearn.metrics import roc_curve, auc

        plot_colors = "rb"
        plot_step = 1000.0
        class_names = "AB"
    # Plot the training points 
        pl.subplot(131)
        for i, n, c in zip(xrange(2), class_names, plot_colors):
            idx = np.where(self.trainingClasses == i)
            pl.scatter(self.trainingData[idx, 0], self.trainingData[idx, 1],
                       c=c, cmap=pl.cm.Paired,
                       label="Class %s" % n)
        pl.axis("tight")
#.........这里部分代码省略.........
开发者ID:tibristo,项目名称:mva,代码行数:103,代码来源:adaBoost.py

示例10: AdaBoostClassifier

# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import decision_function [as 别名]
#################
#     2 JET     #
#################

# Create BDT object.
bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=3, min_samples_split=0.05),
                         learning_rate=0.15,
                         algorithm="SAMME",
                         n_estimators=200
                         )

# Train BDT for 2 jet.
bdt.fit(train_2jet, train_2jet_class, sample_weight=train_2jet_weights)

# Get decision scores for test set.
twoclass_output = np.array(bdt.decision_function(test_2jet))

# Plot decision histogram.
plot_range = (twoclass_output.min(), twoclass_output.max())
plt.subplot(122)

plot_colors = 2*"r" + 12*"g" + "y" + 3*"b" + 3*"m"
plot_step = 0.02
class_names = ['qqZvvH125', 'qqWlvH125', 'Wbb', 'Wbc', 'Wcc', 'Wbl', 'Wcl', 'Wl',
               'Zbb', 'Zbc', 'Zcc', 'Zbl', 'Zcl', 'Zl', 'ttbar', 'stopt', 'stops',
               'stopWt', 'WW', 'ZZ', 'WZ']

for n, c in zip(class_names, plot_colors):
    this_data = twoclass_output[test_2jet_processes == n]
    this_weights = test_2jet_weights[test_2jet_processes == n] * SF_map_2jet[n]
    plt.hist(this_data,
开发者ID:jpyne17,项目名称:msci-hep,代码行数:33,代码来源:BDT_analysis.py

示例11: int

# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import decision_function [as 别名]
X_train_sig = df_cheat.query(
    hlt2_cut_string)[features][int(0.2*n_events):n_events]
X_train = X_train_bkg.append(X_train_sig, ignore_index=True).values

# DEFINE WHICH PARTS OF TEST AND TRAINING SAMPLES CONTAIN SIGNAL OR BACKGROUND
y_test = int(0.2*n_events)*[0]+int(0.2*n_events)*[1]
y_train = int(0.8*n_events)*[0]+int(0.8*n_events)*[1]

# DEFINE BDT ALGORITHM
dt = DecisionTreeClassifier(max_depth=3,
                            min_samples_leaf=0.05*len(X_train))
bdt = AdaBoostClassifier(dt,
                         algorithm='SAMME',
                         n_estimators=800,
                         learning_rate=0.5)

# RUN BDT TRAINING AND SHOW RESULTS
bdt.fit(X_train, y_train)
sk_y_predicted = bdt.predict(X_test)
print classification_report(y_test, sk_y_predicted,
                            target_names=["background", "signal"])
print "Area under ROC curve: %.4f" % (roc_auc_score(y_test, sk_y_predicted))

plt.hist(bdt.decision_function(X_test_bkg).ravel(), color='r', alpha=0.5,
         range=(-0.4, 0.4), bins=30)
plt.hist(bdt.decision_function(X_test_sig).ravel(), color='b', alpha=0.5,
         range=(-0.4, 0.4), bins=30)
plt.xlabel("scikit-learn BDT output")

plt.savefig('BDT.pdf')
开发者ID:renaudin,项目名称:impactkit-proceedings,代码行数:32,代码来源:compare_data_mc.py

示例12: time

# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import decision_function [as 别名]
            signalScore)
        print "- When we predict that we have a signal event, it is actually signal %.1f%% of the time (%i out of %i)" % (
            100.0 * fcorrect, int(fcorrect * len(predictionsForSignal)),
            len(predictionsForSignal))

        ### PLOT

        # plot feature distributions
        if first:
            first = False
            for idx, indicator in enumerate(whichIndicators):
                featureDistributions(Xtrain, Ytrain, indicator, idx)

        # shamelessly stolen from https://dbaumgartel.wordpress.com/2014/03/14/machine-learning-examples-scikit-learn-versus-tmva-cern-root/

        Classifier_training_S = alg.decision_function(
            Xtrain[Ytrain > 0.5]).ravel()
        Classifier_training_B = alg.decision_function(
            Xtrain[Ytrain < 0.5]).ravel()
        Classifier_testing_S = alg.decision_function(
            Xtest[Ytest > 0.5]).ravel()
        Classifier_testing_B = alg.decision_function(
            Xtest[Ytest < 0.5]).ravel()

        # This will be the min/max of our plots
        c_max = 1.5
        c_min = -1.5

        # Get histograms of the classifiers
        Histo_training_S = np.histogram(
            Classifier_training_S, bins=40, range=(c_min, c_max))
        Histo_training_B = np.histogram(
开发者ID:aminnj,项目名称:makers,代码行数:34,代码来源:bdtTest.py

示例13: precision_recall_curve

# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import decision_function [as 别名]
#log
precision_l, recall_l, thresholds_l = precision_recall_curve(test["los"], log.decision_function(test_variables))
pl.plot(recall_l, precision_l)
pl.xlabel("precision")
pl.ylabel("recall")
pl.title("LogisticRegression")
pl.show()
#cart
precision_c, recall_c, thresholds_c = precision_recall_curve(test["los"], test_cart_prob[::,1])
pl.plot(recall_c, precision_c)
pl.xlabel("precision")
pl.ylabel("recall")
pl.title("CART")
pl.show()
#ad
precision_ad, recall_ad, thresholds_ad = precision_recall_curve(test["los"], ad.decision_function(test_variables))
pl.plot(recall_ad, precision_ad)
pl.xlabel("precision")
pl.ylabel("recall")
pl.title("AdBoosting")
pl.show()
#Naive
precision_n, recall_n, thresholds_n = precision_recall_curve(test["los"], test_naive_prob[::,1])
pl.plot(recall_n, precision_n)
pl.xlabel("precision")
pl.ylabel("recall")
pl.title("NaiveBayes")
pl.show()
#integral
plt.plot(recall_l, precision_l)
plt.plot(recall_c, precision_c)
开发者ID:DataDisruptor,项目名称:Employee-Churn-Rate-Analysis,代码行数:33,代码来源:_Employeement_DiZhu.py

示例14: zip

# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import decision_function [as 别名]

# Plot the class probabilities
class_proba = ada.predict_proba(x)[:, -1]
pl.subplot(132)
for i, n, c in zip(xrange(2), class_names, plot_colors):
    pl.hist(class_proba[y == i],
            bins=20,
            range=(0, 1),
            facecolor=c,
            label='Class %s' % n)
pl.legend(loc='upper center')
pl.ylabel('Samples')
pl.xlabel('Class Probability')

# Plot the two-class decision scores
twoclass_output = ada.decision_function(x)
pl.subplot(133)
for i, n, c in zip(xrange(2), class_names, plot_colors):
    pl.hist(twoclass_output[y == i],
            bins=20,
            range=(-1, 1),
            facecolor=c,
            label='Class %s' % n)
pl.legend(loc='upper right')
pl.ylabel('Samples')
pl.xlabel('Two-class Decision Scores')

pl.subplots_adjust(wspace=0.25)
pl.show()
开发者ID:tibristo,项目名称:hbb,代码行数:31,代码来源:testscikit.py

示例15: bdtModel

# 需要导入模块: from sklearn.ensemble import AdaBoostClassifier [as 别名]
# 或者: from sklearn.ensemble.AdaBoostClassifier import decision_function [as 别名]
def bdtModel(df_sig_train, df_bkg_train, df_sig_test, df_bkg_test):

    # '---------- Prepare Training ----------'

    X_sig = np.array(df_sig_train)
    y_sig = np.array(X_sig.shape[0] * [1])
    X_bkg = np.array(df_bkg_train)
    y_bkg = np.array(X_bkg.shape[0] * [0])

    X = np.concatenate((X_sig, X_bkg))
    y = np.concatenate((y_sig, y_bkg))

    print 'X_sig.shape: ', X_sig.shape
    print 'y_sig.shape: ', y_sig.shape
    print 'X_bkg.shape: ', X_bkg.shape
    print 'y_bkg.shape: ', y_bkg.shape
    print 'X.shape: ', X.shape
    print 'y.shape: ', y.shape

    # '---------- Prepare Testing ----------'

    X_sig_test = np.array(df_sig_test)
    y_sig_test = np.array(X_sig_test.shape[0] * [1])
    X_bkg_test = np.array(df_bkg_test)
    y_bkg_test = np.array(X_bkg_test.shape[0] * [0])

    X_test = np.concatenate((X_sig_test, X_bkg_test))
    y_test = np.concatenate((y_sig_test, y_bkg_test))

    print 'X_sig_test.shape: ', X_sig_test.shape
    print 'y_sig_test.shape: ', y_sig_test.shape
    print 'X_bkg_test.shape: ', X_bkg_test.shape
    print 'y_bkg_test.shape: ', y_bkg_test.shape
    print 'X_test.shape: ', X_test.shape
    print 'y_test.shape: ', y_test.shape


    # '---------- Model ----------'

    #scaler = preprocessing.StandardScaler().fit(X)
    #X = scaler.transform(X)

    #model = svm.SVC(C = 50, kernel = 'rbf', tol=0.001, gamma=0.005, probability=True)
    #model.fit(X, y)

    dt = DecisionTreeClassifier(max_depth=3,
                                min_samples_leaf=0.05*len(X))
    model = AdaBoostClassifier(dt,
                             algorithm='SAMME',
                             n_estimators=400,
                             learning_rate=0.5)
    
    model.fit(X, y)


    print '---------- Training/Testing info ----------'

    print 'Accuracy (training): ', model.score(X, y)
    print 'Null Error Rate (training): ', y.mean()


    #X_test = scaler.transform(X_test)
    predicted_test = model.predict(X_test)

    predicted_test_clever = (predicted_test + y_test).tolist()
    error_test = float(predicted_test_clever.count(1)) / float(len(predicted_test_clever))
    print "Error: ", error_test

    print "Accuracy (testing): ", metrics.accuracy_score(y_test, predicted_test)
    print "Recall (testing): ",   metrics.recall_score(y_test, predicted_test)
    print "F1 score (testing): ", metrics.f1_score(y_test, predicted_test)
    print "ROC area under curve (testing): ", metrics.roc_auc_score(y_test, predicted_test)

    #'PTS','AST','REB','STL','BLK','FG_PCT','FG3_PCT','FT_PCT','MIN','EFF','WL']
    #user_input = scaler.transform(np.array([10, 1, 2, 0, 2, 0.3, 0.3, 0.3, 10, 5, 1], dtype=float))
    #user_input = scaler.transform(np.array([10,1,2,2,2,2,2,2,2,2,1], dtype=float))
    #user_input = scaler.transform(np.array([10,1,2], dtype=float))
    user_input = np.array([10.15, 1.95, 6.77, 1.12, 0.28, 0.51, 0.37, 0.47, 32.5, 14.8, 0.53], dtype=float)

    score = model.decision_function(user_input)
    print 'Score (user input): ', score
    result = model.predict_proba(user_input)
    print 'Probability of 1 (user input): ', result



    # '--------- Visualization -----------'

    Classifier_training_S = model.decision_function(X[y>0.5]).ravel()
    Classifier_training_B = model.decision_function(X[y<0.5]).ravel()
    Classifier_testing_S = model.decision_function(X_test[y_test>0.5]).ravel()
    Classifier_testing_B = model.decision_function(X_test[y_test<0.5]).ravel()

    (h_test_s, h_test_b) =  visualSigBkg("BDT", Classifier_training_S, Classifier_training_B, Classifier_testing_S, Classifier_testing_B)


    # '-------- Variable Importance ---------'
    feature_importance = model.feature_importances_
    # make importances relative to max importance
    feature_importance = 100.0 * (feature_importance / feature_importance.max())
#.........这里部分代码省略.........
开发者ID:XiaoxiaoWang87,项目名称:InsightPrj,代码行数:103,代码来源:svm_model.py


注:本文中的sklearn.ensemble.AdaBoostClassifier.decision_function方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。