当前位置: 首页>>代码示例>>Python>>正文


Python GradientBoostingClassifier.fit方法代码示例

本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.fit方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.fit方法的具体用法?Python GradientBoostingClassifier.fit怎么用?Python GradientBoostingClassifier.fit使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.GradientBoostingClassifier的用法示例。


在下文中一共展示了GradientBoostingClassifier.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: ctr_gbdt

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def ctr_gbdt(model='sklearn-clicklog', from_cache=False, train_dataset_length=100000, test_dataset_length=100000):
    TRAIN_FILE, TEST_FILE = create_dataset(model, from_cache, train_dataset_length, test_dataset_length)

    prediction_model = GradientBoostingClassifier(
        loss='deviance',
        learning_rate=0.1,
        n_estimators=30,
        subsample=1.0,
        min_samples_split=2,
        min_samples_leaf=1,
        min_weight_fraction_leaf=0.0,
        max_depth=5,
    )

    x_train, y_train = clean_data(TRAIN_FILE)
    x_test, y_test = clean_data(TEST_FILE)

    with Timer('fit model'):
        prediction_model.fit(x_train, y_train)

    with Timer('evaluate model'):
        y_prediction_train = prediction_model.predict_proba(x_train)
        y_prediction_test = prediction_model.predict_proba(x_test)

        loss_train = log_loss(y_train, y_prediction_train)
        loss_test = log_loss(y_test, y_prediction_test)

    print 'loss_train: %s' % loss_train
    print 'loss_test: %s' % loss_test
开发者ID:kazarinov,项目名称:hccf,代码行数:31,代码来源:sklearn_experiments.py

示例2: test_gradient_boosting_validation_fraction

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def test_gradient_boosting_validation_fraction():
    X, y = make_classification(n_samples=1000, random_state=0)

    gbc = GradientBoostingClassifier(n_estimators=100,
                                     n_iter_no_change=10,
                                     validation_fraction=0.1,
                                     learning_rate=0.1, max_depth=3,
                                     random_state=42)
    gbc2 = clone(gbc).set_params(validation_fraction=0.3)
    gbc3 = clone(gbc).set_params(n_iter_no_change=20)

    gbr = GradientBoostingRegressor(n_estimators=100, n_iter_no_change=10,
                                    learning_rate=0.1, max_depth=3,
                                    validation_fraction=0.1,
                                    random_state=42)
    gbr2 = clone(gbr).set_params(validation_fraction=0.3)
    gbr3 = clone(gbr).set_params(n_iter_no_change=20)

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
    # Check if validation_fraction has an effect
    gbc.fit(X_train, y_train)
    gbc2.fit(X_train, y_train)
    assert gbc.n_estimators_ != gbc2.n_estimators_

    gbr.fit(X_train, y_train)
    gbr2.fit(X_train, y_train)
    assert gbr.n_estimators_ != gbr2.n_estimators_

    # Check if n_estimators_ increase monotonically with n_iter_no_change
    # Set validation
    gbc3.fit(X_train, y_train)
    gbr3.fit(X_train, y_train)
    assert gbr.n_estimators_ < gbr3.n_estimators_
    assert gbc.n_estimators_ < gbc3.n_estimators_
开发者ID:amueller,项目名称:scikit-learn,代码行数:36,代码来源:test_gradient_boosting.py

示例3: test_staged_predict_proba

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def test_staged_predict_proba():
    # Test whether staged predict proba eventually gives
    # the same prediction.
    X, y = datasets.make_hastie_10_2(n_samples=1200,
                                     random_state=1)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]
    clf = GradientBoostingClassifier(n_estimators=20)
    # test raise NotFittedError if not fitted
    assert_raises(NotFittedError, lambda X: np.fromiter(
        clf.staged_predict_proba(X), dtype=np.float64), X_test)

    clf.fit(X_train, y_train)

    # test if prediction for last stage equals ``predict``
    for y_pred in clf.staged_predict(X_test):
        assert_equal(y_test.shape, y_pred.shape)

    assert_array_equal(clf.predict(X_test), y_pred)

    # test if prediction for last stage equals ``predict_proba``
    for staged_proba in clf.staged_predict_proba(X_test):
        assert_equal(y_test.shape[0], staged_proba.shape[0])
        assert_equal(2, staged_proba.shape[1])

    assert_array_almost_equal(clf.predict_proba(X_test), staged_proba)
开发者ID:amueller,项目名称:scikit-learn,代码行数:28,代码来源:test_gradient_boosting.py

示例4: test_partial_dependecy_input

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def test_partial_dependecy_input():
    # Test input validation of partial dependence.
    clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
    clf.fit(X, y)

    assert_raises(ValueError, partial_dependence,
                  clf, [0], grid=None, X=None)

    assert_raises(ValueError, partial_dependence,
                  clf, [0], grid=[0, 1], X=X)

    # first argument must be an instance of BaseGradientBoosting
    assert_raises(ValueError, partial_dependence,
                  {}, [0], X=X)

    # Gradient boosting estimator must be fit
    assert_raises(ValueError, partial_dependence,
                  GradientBoostingClassifier(), [0], X=X)

    assert_raises(ValueError, partial_dependence, clf, [-1], X=X)

    assert_raises(ValueError, partial_dependence, clf, [100], X=X)

    # wrong ndim for grid
    grid = np.random.rand(10, 2, 1)
    assert_raises(ValueError, partial_dependence, clf, [0], grid=grid)
开发者ID:kevin-coder,项目名称:scikit-learn-fork,代码行数:28,代码来源:test_partial_dependence.py

示例5: test_plot_partial_dependence_multiclass

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def test_plot_partial_dependence_multiclass():
    # Test partial dependence plot function on multi-class input.
    clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
    clf.fit(iris.data, iris.target)

    grid_resolution = 25
    fig, axs = plot_partial_dependence(clf, iris.data, [0, 1],
                                       label=0,
                                       grid_resolution=grid_resolution)
    assert len(axs) == 2
    assert all(ax.has_data for ax in axs)

    # now with symbol labels
    target = iris.target_names[iris.target]
    clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
    clf.fit(iris.data, target)

    grid_resolution = 25
    fig, axs = plot_partial_dependence(clf, iris.data, [0, 1],
                                       label='setosa',
                                       grid_resolution=grid_resolution)
    assert len(axs) == 2
    assert all(ax.has_data for ax in axs)

    # label not in gbrt.classes_
    assert_raises(ValueError, plot_partial_dependence,
                  clf, iris.data, [0, 1], label='foobar',
                  grid_resolution=grid_resolution)

    # label not provided
    assert_raises(ValueError, plot_partial_dependence,
                  clf, iris.data, [0, 1],
                  grid_resolution=grid_resolution)
开发者ID:kevin-coder,项目名称:scikit-learn-fork,代码行数:35,代码来源:test_partial_dependence.py

示例6: PlotFeaturesImportance

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def PlotFeaturesImportance(X,y,featureNames,dataName):
    '''
    Plot the relative contribution/importance of the features.
    Best to reduce to top X features first - for interpretability
    Code example from:
    http://bugra.github.io/work/notes/2014-11-22/an-introduction-to-supervised-learning-scikit-learn/
    '''
    gbc = GradientBoostingClassifier(n_estimators=40)
    gbc.fit(X, y)
    # Get Feature Importance from the classifier
    feature_importance = gbc.feature_importances_
    # Normalize The Features
    feature_importance = 100 * (feature_importance / feature_importance.max())
    sorted_idx = numpy.argsort(feature_importance)
    pos = numpy.arange(sorted_idx.shape[0]) + 4.5
    # pos = numpy.arange(sorted_idx.shape[0])
    # plt.figure(figsize=(16, 12))
    plt.figure(figsize=(14, 9), dpi=250)
    plt.barh(pos, feature_importance[sorted_idx], align='center', color='#7A68A6')
    #plt.yticks(pos, numpy.asanyarray(df.columns.tolist())[sorted_idx]) #ORIG
    plt.yticks(pos, numpy.asanyarray(featureNames)[sorted_idx])

    plt.xlabel('Relative Importance')
    plt.title('%s: Top Features' %(dataName))
    plt.grid('off')
    plt.ion()
    plt.show()
    plt.savefig(str(dataName)+'TopFeatures.png',dpi=200)
开发者ID:MichaelDoron,项目名称:ProFET,代码行数:30,代码来源:VisualizeBestFeatures.py

示例7: model_train_ensemble

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def model_train_ensemble(X1,Y1,Save = False, modelname = None):
    
    X1,Y1 = DowmSample(X1,Y1,9)
    
#     model = RandomForestClassifier(n_estimators=100,random_state=1)
    model = GradientBoostingClassifier(n_estimators=100,max_leaf_nodes=5, subsample=0.7, learning_rate=0.1, random_state=1)
#     model = LogisticRegression('l2')
    model.fit(X1, Y1.ravel())
    
    # 保存模型
    if Save == True:
        f = open(modelname,'w')
        pickle.dump(model, f)
        f.close()
    
    print '\n -------------- Training is over ----------------------'    
    return model





    

    
开发者ID:SuixueWang,项目名称:Mobile-Recommendation,代码行数:19,代码来源:Main_processing_offline.py

示例8: Blender

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
class Blender(BaseEstimator, ClassifierMixin):
    def __init__(self, trained_clfs):
        self.clfs = trained_clfs
        # self.classifier = make_pipeline(OneHotEncoder(), DenseTransformer(),
        #                                 GradientBoostingClassifier())
        self.classifier = GradientBoostingClassifier()
        # self.classifier = make_pipeline(
        #     OneHotEncoder(), LogisticRegression(class_weight='auto'))

    def fit(self, data, target):
        # self.enc = LabelEncoder().fit(target)
        probs = self.transform_input(data)
        # self.classifier.fit(predictions, target)
        self.classifier.fit(probs, target)

    def predict(self, data):
        predictions = self.transform_input(data)
        return self.classifier.predict(predictions)

    def transform_input(self, data):
        probabilities = [clf.predict_proba(data) for clf in self.clfs]

        probabilities = np.array(probabilities)
        # features, samples = probabilities.shape
        n_clfs, samples, features = probabilities.shape
        probabilities = np.reshape(probabilities, (samples, n_clfs * features))
        probabilities[np.isnan(probabilities)] = 0
        return probabilities
开发者ID:Kyo91,项目名称:recipe-learners,代码行数:30,代码来源:blender.py

示例9: test_oob_improvement

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def test_oob_improvement():
    """Test if oob improvement has correct shape and regression test. """
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1, subsample=0.5)
    clf.fit(X, y)
    assert clf.oob_improvement_.shape[0] == 100
    # hard-coded regression test - change if modification in OOB computation
    assert_array_almost_equal(clf.oob_improvement_[:5], np.array([0.19, 0.15, 0.12, -0.12, -0.11]), decimal=2)
开发者ID:Anubhav27,项目名称:scikit-learn,代码行数:9,代码来源:test_gradient_boosting.py

示例10: main

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def main():
    print '[INFO, time: %s] Getting Data....' % (time.strftime('%H:%M:%S'))
    testing_file = file('test.p', 'r')
    training_file = file('train.p', 'r')

    train = pickle.load(training_file)
    test = pickle.load(testing_file)

    testing_file.close()
    training_file.close()
    
    trainX = train[:,:-1]
    trainy = train[:,-1]
    
    testX = test[:,:-1]
    testy = test[:,-1]

    print '[INFO, time: %s] Fitting %s ...' % (time.strftime('%H:%M:%S'), 'GradientBoostingClassifier(n_estimators=1000)')
    clf = GradientBoostingClassifier(n_estimators=1000)
    clf.fit(trainX, trainy)

    print '[INFO, time: %s] Making Predictions...' % (time.strftime('%H:%M:%S'))
    prediction = clf.predict(testX)
    print '[RESULT, time: %s] accuracy = %f' % (time.strftime('%H:%M:%S'),accuracy_score(testy, prediction))


    model_save_file = file('gradient_1000.p', 'w')
    pickle.dump(clf, model_save_file)
    model_save_file.close()
    print 'All done'
开发者ID:sanketrahul,项目名称:cs-412_ml_course_project,代码行数:32,代码来源:gradientboosting_1000estimators.py

示例11: partial_dependence

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def partial_dependence(df, y):
    '''
    INPUT: X = features
           y = target variable binary, imbalanced classes
    OUPUT: X = features oversampled to have balanced target classes
           y = target variable oversample to have balanced classes

    Discovers the minority class and then oversamples until eah class makes up
    50% of your data.
    '''
    X_train, X_test, y_train, y_test = oversample_train_test(df, y)
    # X_train, X_test, y_train, y_test = train_test_split(df, y, random_state=42)

    feature_engineering = Pipeline([
        ('lists', ListSplitter()),
        ('race', RaceDummies()),
        ('crime_sentence', CrimeAndSentence()),
        ('feat_eng', FeatureEngineer()),
        ('columns', ColumnFilter(prejudice=False))
    ])

    X = feature_engineering.fit_transform(X_train.copy(), y_train)
    X_test = feature_engineering.fit_transform(X_test.copy(), y_test)

    gbc = GradientBoostingClassifier(n_estimators=850, learning_rate=.75)
    gbc.fit(X.copy(), y_train)
    most_imp = np.argsort(gbc.feature_importances_)[-6:]

    names = list(X_test.columns)
    feats = list(most_imp)
    fig, axs = plot_partial_dependence(gbc, X_test, feats, feature_names=names,
                                       n_jobs=3, grid_resolution=50)
开发者ID:dannyprikaz,项目名称:megans_law_project,代码行数:34,代码来源:functions.py

示例12: run_gradient_boosting_classifier

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def run_gradient_boosting_classifier(data, _max_depth):
    (feature_train, feature_test, label_train, label_test) = train_test_split(data[:, 0:-1], data[:, -1].astype(int),
                                                                              test_size=0.25)
    # TODO: Vary Number of Estimators and Learning Rate
    gbc = GradientBoostingClassifier(learning_rate=0.1, n_estimators=50, max_depth=_max_depth, verbose = True)
    gbc.fit(feature_train, label_train)
    training_error = gbc.score(feature_train, label_train)
    #cross_validation_score = cross_val_score(gbc, feature_train, label_train, cv=10)
    testing_error = gbc.score(feature_test, label_test)

    print "Random Forest Results for Max Depth:", _max_depth
    print "Training Accuracy:", training_error
    #print "10-fold Cross Validation Accuracy: %0.2f (+/- %0.2f)" % (cross_validation_score.mean(), cross_validation_score.std() * 2)
    print "Testing Accuracy:", testing_error

    feature_importance = gbc.feature_importances_
    stddev = np.std([tree[0].feature_importances_ for tree in gbc.estimators_], axis=0)
    indices = np.argsort(feature_importance)[::-1]

    # Print the feature ranking
    print("Feature ranking:")
    for f in range(len(feature_importance)):
        print("%d. feature %d (%f)" % (f + 1, indices[f], feature_importance[indices[f]]))

    plot_feature_importance(feature_importance, indices, stddev, "gradient-boosted-classifier-feature-importance-depth-" + str(_max_depth))
开发者ID:BeifeiZhou,项目名称:social-network-recommendation,代码行数:27,代码来源:runClassifier.py

示例13: predict

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def predict(fea, df, t, t9):
    Un = df.columns == 'Blank'
    for f in Fea:
        '''        
        try:
            df[(f+'_y')] = df[(f+'_x')] - df[(f+'_y')]
            print(1)
        except:
            pass
        '''
        Un = Un | (df.columns == f)
        Un = Un | (df.columns == (f+'_x'))
        Un = Un | (df.columns == (f+'_y'))
    Un = Un & (df.columns != 'New_y')    
    clf = GradientBoostingClassifier()
    y = df[t].label
    X = df[t].ix[:,Un]
    X_train, X_test, y_train, y_test=train_test_split(X, y, test_size = 0.9, random_state = 1)
    clf.fit(X_train, y_train)
    re = 'Testing AUC: \t' + str(roc_auc_score(y_test,clf.predict_proba(X_test)[:,1]))  
    print re
    re =  'September AUC: \t' + str(roc_auc_score(df[t9].label,clf.predict_proba(df[t9].ix[:,Un])[:,1]))
    print re
    print(X.columns)
    print(clf.feature_importances_)
    return Un, clf
开发者ID:duxuhao,项目名称:wo_plus,代码行数:28,代码来源:ThreeMonth2.py

示例14: __init__

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
    def __init__(self, estimator,
                 phase, 
                 n_jobs, cv_k_fold, parameters,
                 X_train, y_train,
                 X_test, y_test):
        # estimator : ensemble学習器

        # cv : if train : get best parameter
        if phase == "train":
            clf = GradientBoostingClassifier()
            gscv = GridSearchCV(clf, parameters, 
                                verbose = 10, 
                                scoring = "f1",#scoring = "precision" or "recall"
                                n_jobs = n_jobs, cv = cv_k_fold)
            gscv.fit(X_train, y_train)
            self.best_params = gscv.best_params_
            
            clf.set_params(**gscv.best_params_)
            clf.fit(X_train, y_train)
            train_loss = clf.train_score_
            test_loss = np.empty(len(clf.estimators_))
            for i, pred in enumerate(clf.staged_predict(X_test)):
                test_loss[i] = clf.loss_(y_test, pred)
            plt.plot(np.arange(len(clf.estimators_)) + 1, test_loss, label='Test')
            plt.plot(np.arange(len(clf.estimators_)) + 1, train_loss, label='Train')
            plt.xlabel('the number of weak learner:Boosting Iterations')
            plt.ylabel('Loss')
            plt.legend(loc="best")
            plt.savefig("loss_cv.png")
            plt.close()

        estimator.set_params(**gscv.best_params_)
        self.estimator = estimator
        self.one_hot_encoding = None
开发者ID:Quasi-quant2010,项目名称:Stacking,代码行数:36,代码来源:stack_test.backup.py

示例15: test_max_feature_auto

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def test_max_feature_auto():
    """Test if max features is set properly for floats and str. """
    X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
    _, n_features = X.shape

    X_train, X_test = X[:2000], X[2000:]
    y_train, y_test = y[:2000], y[2000:]

    gbrt = GradientBoostingClassifier(n_estimators=1, max_features='auto')
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, int(np.sqrt(n_features)))

    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='auto')
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, n_features)

    gbrt = GradientBoostingRegressor(n_estimators=1, max_features=0.3)
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, int(n_features * 0.3))

    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='sqrt')
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, int(np.sqrt(n_features)))

    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='log2')
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, int(np.log2(n_features)))
开发者ID:DearMonster,项目名称:nb_sklearn,代码行数:29,代码来源:test_gradient_boosting.py


注:本文中的sklearn.ensemble.GradientBoostingClassifier.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。