Python GradientBoostingClassifier.score方法代码示例

本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.score方法的典型用法代码示例。如果您正苦于以下问题：Python GradientBoostingClassifier.score方法的具体用法？Python GradientBoostingClassifier.score怎么用？Python GradientBoostingClassifier.score使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.GradientBoostingClassifier的用法示例。

在下文中一共展示了GradientBoostingClassifier.score方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_classification_synthetic

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def test_classification_synthetic():
    # Test GradientBoostingClassifier on synthetic dataset used by
    # Hastie et al. in ESLII Example 12.7.
    X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)

    X_train, X_test = X[:2000], X[2000:]
    y_train, y_test = y[:2000], y[2000:]

    for loss in ('deviance', 'exponential'):

        gbrt = GradientBoostingClassifier(n_estimators=100, min_samples_split=1,
                                          max_depth=1, loss=loss,
                                          learning_rate=1.0, random_state=0)
        gbrt.fit(X_train, y_train)
        error_rate = (1.0 - gbrt.score(X_test, y_test))
        assert error_rate < 0.09, \
            "GB(loss={}) failed with error {}".format(loss, error_rate)

        gbrt = GradientBoostingClassifier(n_estimators=200, min_samples_split=1,
                                          max_depth=1,
                                          learning_rate=1.0, subsample=0.5,
                                          random_state=0)
        gbrt.fit(X_train, y_train)
        error_rate = (1.0 - gbrt.score(X_test, y_test))
        assert error_rate < 0.08, ("Stochastic GradientBoostingClassifier(loss={}) "
                                   "failed with error {}".format(loss, error_rate))

开发者ID:BobChew，项目名称:scikit-learn，代码行数:28，代码来源:test_gradient_boosting.py

示例2: run_gradient_boosting_classifier

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def run_gradient_boosting_classifier(data, _max_depth):
    (feature_train, feature_test, label_train, label_test) = train_test_split(data[:, 0:-1], data[:, -1].astype(int),
                                                                              test_size=0.25)
    # TODO: Vary Number of Estimators and Learning Rate
    gbc = GradientBoostingClassifier(learning_rate=0.1, n_estimators=50, max_depth=_max_depth, verbose = True)
    gbc.fit(feature_train, label_train)
    training_error = gbc.score(feature_train, label_train)
    #cross_validation_score = cross_val_score(gbc, feature_train, label_train, cv=10)
    testing_error = gbc.score(feature_test, label_test)

    print "Random Forest Results for Max Depth:", _max_depth
    print "Training Accuracy:", training_error
    #print "10-fold Cross Validation Accuracy: %0.2f (+/- %0.2f)" % (cross_validation_score.mean(), cross_validation_score.std() * 2)
    print "Testing Accuracy:", testing_error

    feature_importance = gbc.feature_importances_
    stddev = np.std([tree[0].feature_importances_ for tree in gbc.estimators_], axis=0)
    indices = np.argsort(feature_importance)[::-1]

    # Print the feature ranking
    print("Feature ranking:")
    for f in range(len(feature_importance)):
        print("%d. feature %d (%f)" % (f + 1, indices[f], feature_importance[indices[f]]))

    plot_feature_importance(feature_importance, indices, stddev, "gradient-boosted-classifier-feature-importance-depth-" + str(_max_depth))

开发者ID:BeifeiZhou，项目名称:social-network-recommendation，代码行数:27，代码来源:runClassifier.py

示例3: test_zero_estimator_clf

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def test_zero_estimator_clf():
    # Test if ZeroEstimator works for classification.
    X = iris.data
    y = np.array(iris.target)
    est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
                                     random_state=1, init=ZeroEstimator())
    est.fit(X, y)

    assert_greater(est.score(X, y), 0.96)

    est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
                                     random_state=1, init='zero')
    est.fit(X, y)

    assert_greater(est.score(X, y), 0.96)

    # binary clf
    mask = y != 0
    y[mask] = 1
    y[~mask] = 0
    est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
                                     random_state=1, init='zero')
    est.fit(X, y)
    assert_greater(est.score(X, y), 0.96)

    est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
                                     random_state=1, init='foobar')
    assert_raises(ValueError, est.fit, X, y)

开发者ID:amueller，项目名称:scikit-learn，代码行数:30，代码来源:test_gradient_boosting.py

示例4: plotLearningCurve

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def plotLearningCurve(dat,lab,optim):

    '''
    This function plots the learning curve for the classifier

    Parameters:
    -----------
    dat: numpy array with all records
    lab: numpay array with class labels of all records
    optim: optimal parameters for classifier

    '''

    clf = GradientBoostingClassifier(learning_rate = optim[0], subsample = optim[1])

    # split training data into train and test (already chose optimal parameters)
    xTrain, xTest, yTrain, yTest = cross_validation.train_test_split(dat, lab, 
                                                                     test_size = 0.3)

    # choose various sizes of training set to model on to generate learning curve
    szV = range(10, np.shape(xTrain)[0], int(np.shape(xTrain)[0]) / 10)
    szV.append(np.shape(xTrain)[0])

    LCvals = np.zeros((len(szV),3), dtype = np.float64) # store data points of learning curve
    for i in xrange(0, len(szV)):
        clf = clf.fit(xTrain[:szV[i],:], yTrain[:szV[i]])
        LCvals[i,0] = szV[i]
        LCvals[i,1] = clf.score(xTest, yTest)
        LCvals[i,2] = clf.score(xTrain[:szV[i],:], yTrain[:szV[i]])

    #print LCvals

    # generate figure
    fig = plt.figure(1, figsize = (10,10))
    prop = matplotlib.font_manager.FontProperties(size=15.5)
    ax = fig.add_subplot(1, 1, 1)
    ax.plot(LCvals[:,0] / np.float64(np.shape(xTrain)[0]), 1.0 - LCvals[:,1], 
            label = 'Test Set')
    ax.plot(LCvals[:,0] / np.float64(np.shape(xTrain)[0]), 1.0 - LCvals[:,2],
            label = 'Training Set')
    ax.set_ylabel(r"Error", fontsize = 20)
    ax.set_xlabel(r"% of Training Set Used", fontsize = 20)
    ax.axis([0.0, 1.0, -0.1, 0.5])
    plt.legend(loc = 'upper right', prop = prop)
    plt.savefig('LC_GB.pdf', bbox_inches = 'tight')
    fig.clear()

    # where is model failing?
    
    predProb = clf.predict_proba(xTest)
    tmp = np.zeros((np.shape(predProb)[0], np.shape(predProb)[1] + 2))
    tmp[:,:-2] = predProb
    tmp[:,-2] = clf.predict(xTest)
    tmp[:,-1] = yTest
    mask = tmp[:,-2] != tmp[:,-1]
    print tmp[mask]
    print mask.sum(), len(xTest)
    
    print tmp[:50,:]

开发者ID:pzukin，项目名称:Titanic，代码行数:61，代码来源:titanicGB.py

示例5: TestGradBoost

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def TestGradBoost(dat, lab):

    '''
    This function finds the optimal parameters for the classifier

    Parameters:
    -----------
    dat: numpy array with all records
    lab: numpy array with class labels of all records

    Returns:
    --------
    par: optimal parameters for the classifier

    '''

    # Gradient Boost parameters. Will choose one based on which does best on the validation set
    # learning_rate, subsample
    lr = np.linspace(0.01, 0.2, num = 5)
    sub = np.linspace(0.1, 1.0, num = 5)
    par = [(e,f) for e in lr for f in sub]

    # want to try different ensembles to get error bar on score
    num = 10
    seed = np.random.randint(1000000, size = num)
    valScore = np.zeros((num, len(par)))
    testScore = np.zeros((num, len(par)))

    for nv in xrange(0, num):

        print 'Ensemble:', nv + 1

        # split training data into train, validation, test (60, 20, 20)
        xTrain, xTmp, yTrain, yTmp = cross_validation.train_test_split(dat, lab, 
                                                                       test_size = 0.4, 
                                                                       random_state = seed[nv])
        xVal, xTest, yVal, yTest = cross_validation.train_test_split(xTmp, yTmp, 
                                                                     test_size = 0.5, 
                                                                     random_state = seed[nv])

        # now train RF for each parameter combination
        for i in xrange(0,len(par)):
        
            clf = GradientBoostingClassifier(learning_rate = par[i][0], subsample = par[i][1])
            clf = clf.fit(xTrain, yTrain)
            valScore[nv,i] = clf.score(xVal, yVal)
            testScore[nv,i] = clf.score(xTest, yTest)

    # Find optimal parameters
    tmp = np.argmax(np.mean(valScore, axis = 0))
    print
    print 'Optimal parameters (learning rate, subsampling):', par[tmp]
    print ('Mean | Std Score (Validation set):', np.mean(valScore, axis = 0)[tmp], 
           '|', np.std(valScore, axis = 0)[tmp])
    print ('Mean | Std Score (Test set):', np.mean(testScore, axis = 0)[tmp],
           '|', np.std(testScore, axis = 0)[tmp])

    # Return optimal parameters
    return par[tmp]

开发者ID:pzukin，项目名称:Titanic，代码行数:61，代码来源:titanicGB.py

示例6: main

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def main():

    # generate synthetic binary classification data
    # (name refers to example 10.2 in ESL textbook...see refs below)
    X, y = make_hastie_10_2()

    # perform train/test split (no need to shuffle)
    split_pt = int(TRAIN_PCT * len(X))
    X_train, X_test = X[:split_pt], X[split_pt:]
    y_train, y_test = y[:split_pt], y[split_pt:]

    # single dec stump
    stump_clf = DecisionTreeClassifier(
        max_depth=1)
    stump_clf.fit(X_train, y_train)
    stump_score = round(stump_clf.score(X_test, y_test), 3)
    print 'decision stump acc = {}\t(max_depth = 1)'.format(stump_score)

    # single dec tree (max_depth=3)
    tree_clf = DecisionTreeClassifier(max_depth=3)
    tree_clf.fit(X_train, y_train)
    tree_score = round(tree_clf.score(X_test, y_test), 3)
    print 'decision tree acc = {}\t(max_depth = 5)\n'.format(tree_score)

    # gbt: a powerful ensemble technique
    gbt_scores = list()
    for k in (10, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500):
        print 'fitting gbt for n_estimators = {}...'.format(k)

        gbt_clf = GradientBoostingClassifier(
            n_estimators=k,         # number of weak learners for this iteration
            max_depth=1,            # weak learners are dec stumps
            learning_rate=1.0)      # regularization (shrinkage) hyperparam

        gbt_clf.fit(X_train, y_train)
        gbt_scores.append(round(gbt_clf.score(X_test, y_test), 3))

    print '\ngbt accuracy =\n{}\n'.format(gbt_scores)

    # stochastic gbt (using subsampling)
    sgbt_scores = list()
    for k in (10, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500):
        print 'fitting sgbt for n_estimators = {}...'.format(k)

        sgbt_clf = GradientBoostingClassifier(
            n_estimators=k,         # number of weak learners for this iteration
            max_depth=1,            # weak learners are dec stumps
            subsample=0.5,          # % of training set used by each bc
            learning_rate=1.0)      # regularization (shrinkage) hyperparam

        sgbt_clf.fit(X_train, y_train)
        sgbt_scores.append(round(sgbt_clf.score(X_test, y_test), 3))

    print '\nsgbt accuracy =\n{}'.format(sgbt_scores)

开发者ID:Adusei，项目名称:science，代码行数:56，代码来源:gbt.py

示例7: trainAndPredict

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def trainAndPredict(num_trees, train_num):
    train_X = X[:train_num]
    train_y = y[:train_num]  

    test_X = X[train_num:]
    test_y = y[train_num:]

    #clf = svm.SVC()
    clf = GradientBoostingClassifier(n_estimators=num_trees, learning_rate=0.5, max_depth=2, random_state=0)
    clf.fit(train_X, train_y) 
    return (clf.score(train_X, train_y), clf.score(test_X, test_y))

开发者ID:xinranhe，项目名称:Dota2，代码行数:13，代码来源:match_predict.py

示例8: l1_penalty_solver

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def l1_penalty_solver(train_data,test_data,n_est,m_d):

    best = 0.0
    best_Output = []
    for j in [10**(x) for x in xrange(-3,-2,1)]:
        
        X, y = train_data[:,1::], train_data[:,0]
        x1, y1 = test_data[:,1::], test_data[:,0]
        
        # Set regularization parameter
        for C in range(10,11,1):
            # turn down tolerance for short training time
            #cls = svm.SVC(kernel='poly',degree=3).fit(X,y)
            cls = GradientBoostingClassifier(n_estimators=n_est,max_depth=m_d).fit(X,y)
            #cls = DecisionTreeClassifier().fit(X,y)
            #cls = LogisticRegression(C=C, penalty='l1', tol=j).fit(X, y)
            #cls = LogisticRegression(C=C, penalty='l2', tol=j).fit(X, y) 

            val1 = cls.predict(x1)
            #val1 = cls.predict(x1)
            val2 = val1 #cls.predict(x1)
                        
            count = 0.
            for i in range(len(val1)):
                if val1[i] == y1[i]:
                    count +=1.
                else:
                    continue
            result1 = count/len(val1)

            count = 0.
            for i in range(len(val2)):
                if val2[i] == y1[i]:
                    count +=1.
                else:
                    continue
            result2 = count/len(val2)
    
            if result1>best:
                best = result1
                best_Output = val1
            if result2>best:
                best = result2
                best_Output = val2
     
        
    pr.print_results(best_Output)
    #return best
    return [cls.score(X,y),cls.score(x1,y1)]

开发者ID:cianmj，项目名称:kaggle，代码行数:51，代码来源:l1_penalty.py

示例9: gbPredict

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def gbPredict(LOSS, N_EST, L_RATE, M_DEPT, SUB_S, W_START, N_FOLD, EX_F, TRAIN_DATA_X, TRAIN_DATA_Y, TEST__DATA_X, isProb):
    # feature extraction
    ### clf  = GradientBoostingClassifier(loss=LOSS, n_estimators=N_EST, learning_rate=L_RATE, max_depth=M_DEPT, subsample=SUB_S, warm_start=W_START).fit(TRAIN_DATA_X, TRAIN_DATA_Y)
    ### extA = delFeatMin(clf.feature_importances_, EX_F)
    ### TRAIN_DATA_X = TRAIN_DATA_X[:, extA]
    # k-fold validation
    kf   = KFold(TRAIN_DATA_Y.shape[0], n_folds=N_FOLD)
    tesV = 0.0
    for train_index, test_index in kf:
        X_train, X_test = TRAIN_DATA_X[train_index], TRAIN_DATA_X[test_index]
        y_train, y_test = TRAIN_DATA_Y[train_index], TRAIN_DATA_Y[test_index]
        clf  =  GradientBoostingClassifier(loss=LOSS, n_estimators=N_EST, learning_rate=L_RATE, max_depth=M_DEPT, subsample=SUB_S, warm_start=W_START).fit(X_train, y_train)
        tesK =  1 - clf.score(X_test, y_test)
        tesV += tesK
    eVal = tesV / N_FOLD
    # train all data
    clf  = GradientBoostingClassifier(loss=LOSS, n_estimators=N_EST, learning_rate=L_RATE, max_depth=M_DEPT, subsample=SUB_S, warm_start=W_START).fit(TRAIN_DATA_X, TRAIN_DATA_Y)
    TEST__DATA_X = TEST__DATA_X[:, extA]
    if isProb:
        data = clf.predict_proba(TEST__DATA_X)
    else:
        data = clf.predict(TEST__DATA_X)

    print "Eval =", eVal, "with n_esti =", N_EST, "l_rate =", L_RATE, "m_dep =", M_DEPT, "sub_s =", SUB_S, "ex_num =", EX_F, "and loss is", LOSS

    return (data, eVal)

开发者ID:TeamSDJ，项目名称:ML_2015_Final，代码行数:28，代码来源:gb.py

示例10: GB_Classifier

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def GB_Classifier(X_train, X_cv, X_test, Y_train,Y_cv,Y_test, Actual_DS):
    print("***************Starting Gradient Boosting***************")
    t0 = time()
    clf = GradientBoostingClassifier(n_estimators=500,learning_rate=0.01)
    clf.fit(X_train, Y_train)
    preds = clf.predict(X_cv)
    score = clf.score(X_cv,Y_cv)

    print("Gradient Boosting - {0:.2f}%".format(100 * score))
    Summary = pd.crosstab(label_enc.inverse_transform(Y_cv), label_enc.inverse_transform(preds),
                      rownames=['actual'], colnames=['preds'])
    Summary['pct'] = (Summary.divide(Summary.sum(axis=1), axis=1)).max(axis=1)*100
    print(Summary)

    #Check with log loss function
    epsilon = 1e-15
    #ll_output = log_loss_func(Y_cv, preds, epsilon)
    preds2 = clf.predict_proba(X_cv)
    ll_output2= log_loss(Y_cv, preds2, eps=1e-15, normalize=True)
    print(ll_output2)

    print("done in %0.3fs" % (time() - t0))

    preds3 = clf.predict_proba(X_test)
    #preds4 = clf.predict_proba((Actual_DS.ix[:,'feat_1':]))
    preds4 = clf.predict_proba(Actual_DS)

    print("***************Ending Gradient Boosting***************")
    return pd.DataFrame(preds2),pd.DataFrame(preds3),pd.DataFrame(preds4)

开发者ID:roshankr，项目名称:DS_Competition，代码行数:31，代码来源:Otto_Classification.py

示例11: classify

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def classify(train, train_sample_ids, test_sample_ids, whichClassifier):
  feature_names = list(train.columns)
  feature_names.remove("click_bool")
  feature_names.remove("booking_bool")
  feature_names.remove("gross_bookings_usd")
  #feature_names.remove("date_time")
  feature_names.remove("position")

  # Create Train and Test
  trainX = train[feature_names][train_sample_ids]
  testX = train[feature_names][test_sample_ids]
  Y_columns = ["click_bool", "booking_bool", "position"]
  trainY = train[Y_columns][train_sample_ids].apply(lambda x: objective(x, whichClassifier), axis=1)
  testY = train[Y_columns][test_sample_ids].apply(lambda x: objective(x, whichClassifier), axis=1)

  print "Train: ", len(trainY)
  print "Test: ", len(testY)

  print("Training the Classifier")
  classifier = GradientBoostingClassifier(n_estimators=1024, 
                                          verbose=3,
                                          subsample=0.8,
                                          min_samples_split=10,
                                          max_depth = 6,
                                          random_state=1)
  classifier.fit(trainX, trainY)
    
  print "Score = ", classifier.score(testX, testY)

  return classifier

开发者ID:sjuvekar，项目名称:Kaggle-Expedia-Raking，代码行数:32，代码来源:train.py

示例12: train_gbt

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def train_gbt(filename, color, name):
	'''Train on Gradient Boosted Trees Classifier'''
	# Read data
	data2 = pd.read_csv(filename, encoding="utf")
	X = data2.ix[:, 1:-1]
	y = data2.ix[:, -1]

	# Split into train, validation and test
	X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

	# Define model
	clf1 = GradientBoostingClassifier(learning_rate=0.05, max_depth=5, random_state=42)
	
	# Fit model
	t0 = time()
	clf1.fit(X_train, y_train)
	pred_probas = clf1.predict_proba(X_val)

	predictions = clf1.predict(X_val)
	
	print "Score", clf1.score(X_val, y_val)

	importances = clf1.feature_importances_
	indices = np.argsort(importances)[::-1]
	
	# Metrics & Plotting
	metrics[1, 0] = precision_score(y_val, predictions)
	metrics[1, 1] = recall_score(y_val, predictions)
	metrics[1, 2] = f1_score(y_val, predictions)
	metrics[1, 3] = time() - t0

	fpr_rf, tpr_rf, _ = roc_curve(y_val, predictions)
	plt.plot(fpr_rf, tpr_rf, color=color, label=name)

	return importances, indices

开发者ID:amy12xx，项目名称:TelecomChurnPrediction，代码行数:37，代码来源:explore_models.py

示例13: main

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def main():
    
    train_f = pd.read_csv(train_path, header=0, parse_dates=['Dates'])
    print train_f.dtypes

    X, Y = get_feature(train_f, "training_set")
    

    ### TRAINING
    clf = GradientBoostingClassifier(n_estimators=50)
    # clf = RandomForestClassifier(n_estimators=2)
    # clf = LogisticRegression(n_jobs=4)

    X, Y = shuffle_XY(X, Y)
    data_len = len(X)
    train_len = data_len * 95 / 100 
    val_len = data_len - train_len
    X_train = X[:train_len]
    X_val = X[train_len:]
    Y_train = Y[:train_len]
    Y_val = Y[train_len:]
    
    clf = clf.fit(X_train, Y_train)
    print "Training done"

    
    val_acc = clf.score(X_val, Y_val)
    print "Val acc:", val_acc

    val_pred = clf.predict_proba(X_val)
    

    # print max(Y_val), min(Y_val)
    # print Y_val, Y_val + 1
    val_log = 0.0
    cnt = 0
    for y in Y_val:
        val_log += math.log(val_pred[cnt, y]+0.0000001)
        cnt += 1
    val_log =  - val_log / len(Y_val)
    print "Val log loss:", val_log
 
    # print "Val loss:", log_loss(Y_val+1, val_pred) # Note the +1 here!
    """
    # scores = cross_val_score(clf, X, Y)
    # print "Cross val acc:", scores.mean()
    """

    ### Testing

    test_f = pd.read_csv(test_path, header=0, parse_dates=['Dates'])
    # print test_f.dtypes

    X_test, _ = get_feature(test_f, "test_set")
    Y_test = clf.predict_proba(X_test)

    ### Write results
    # write_results(Y_test)
    write_results_prob(Y_test)

开发者ID:ruoyanwang，项目名称:datasci，代码行数:61，代码来源:gradient_boosting_tree.py

示例14: rand_forest_train

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
    def rand_forest_train(self):
        # 读取本地用户特征信息
        users = pd.read_csv('names.csv')
        # 选取similarity、platform、reputation、entropy作为判别人类或机器的特征
        X = users[['similarity', 'platform', 'reputation', 'entropy']]
        y = users['human_or_machine']

        # 对原始数据进行分割， 25%的数据用于测试
        from sklearn.cross_validation import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=33)

        # 对类别特征进行转化，成为特征向量
        from sklearn.feature_extraction import DictVectorizer
        vec = DictVectorizer(sparse=False)
        X_train = vec.fit_transform(X_train.to_dict(orient='record'))
        X_test = vec.transform(X_test.to_dict(orient='record'))

        # 使用单一决策树进行集成模型的训练及预测分析
        from sklearn.tree import DecisionTreeClassifier
        dtc = DecisionTreeClassifier()
        dtc.fit(X_train, y_train)
        dtc_y_pred = dtc.predict(X_test)

        # 使用随机森林分类器进行集成模型的训练及预测分析
        from sklearn.ensemble import RandomForestClassifier
        rfc = RandomForestClassifier()
        rfc.fit(X_train, y_train)
        rfc_y_pred = rfc.predict(X_test)

        # 使用梯度提升决策树进行集成模型的训练及预测分析
        from sklearn.ensemble import GradientBoostingClassifier
        gbc = GradientBoostingClassifier()
        gbc.fit(X_train, y_train)
        gbc_y_pred = gbc.predict(X_test)

        from sklearn.metrics import classification_report
        # 输出单一决策树在测试集上的分类准确性， 以及更加详细的精确率 召回率 F1指标
        print("单一决策树的准确性为", dtc.score(X_test, y_test))
        print(classification_report(dtc_y_pred, y_test))

        # 输出随机森林分类器在测试集上的分类准确性，以及更加详细的精确率 召回率 F1指标
        print("随机森林分类器的准确性为", rfc.score(X_test, y_test))
        print(classification_report(rfc_y_pred, y_test))

        # 输出梯度提升决策树在测试集上的分类准确性，以及更加详细的精确率 召回率 F1指标
        print("梯度提升决策树的准确性为", gbc.score(X_test, y_test))
        print(classification_report(gbc_y_pred, y_test))


        users = pd.read_csv('values.csv')

        # 检验是否为机器或人类
        X = users[['similarity', 'platform', 'reputation', 'entropy']]
        X = vec.transform(X.to_dict(orient='record'))
        print(rfc.predict(X))

        self.dtc = dtc
        self.rfc = rfc
        self.gbc = gbc

开发者ID:jryyufeng，项目名称:learngit，代码行数:61，代码来源:random_forest.py

示例15: gbdt_clf

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def gbdt_clf(x_train,x_test,y_train,y_test):
    clf = GradientBoostingClassifier(n_estimators=100)
    clf.fit(x_train,y_train)
    y_pred = clf.predict_proba(x_test)[:,1]
    print "gbdt F1 scores",clf.score(x_test,y_test)
    scores = roc_auc_score(y_test,y_pred)
    print "gbdt_clf scores: ",scores
    joblib.dump(clf,'./output/gbdt_clf.model')

开发者ID:jkmiao，项目名称:ipin2015，代码行数:10，代码来源:gen_model.py

注：本文中的sklearn.ensemble.GradientBoostingClassifier.score方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。