当前位置: 首页>>代码示例>>Python>>正文


Python GradientBoostingClassifier.staged_predict方法代码示例

本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.staged_predict方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.staged_predict方法的具体用法?Python GradientBoostingClassifier.staged_predict怎么用?Python GradientBoostingClassifier.staged_predict使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.GradientBoostingClassifier的用法示例。


在下文中一共展示了GradientBoostingClassifier.staged_predict方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_predict [as 别名]
def main():
    print 'start'
    from sklearn.ensemble import GradientBoostingClassifier
    sett = np.loadtxt('../spam.train.txt')
    X = sett[:, 1:]
    y = sett[:, 0]
    #import random
    #rnd = np.array([random.randint(0,4) for i in range(len(y))])
    Xf = X
    yf = y
    test = np.loadtxt('../spam.test.txt')
    Xa = test[:, 1:]
    ya = test[:, 0]
    n_est = 300
    rate = 0.1

    gb = GradientBoosting(learning_rate=rate, n_estimators=n_est)
    gb.fit(Xf, yf)
    return gb
    
    print conc(gb.predict(Xa), ya)
        
    score_train = gb.score(X, y)
    score_test = gb.score(Xa, ya)
    gb2 = GradientBoostingClassifier(learning_rate=rate, n_estimators=n_est)
    gb2.fit(Xf, yf)
    
    score_train_skl = []
    for pred in gb2.staged_predict(X):
        score_train_skl.append(conc(y, pred))
    score_train_skl  = np.array(score_train_skl)
    
    score_test_skl = []
    for pred in gb2.staged_predict(Xa):
        score_test_skl.append(conc(ya, pred))
    score_test_skl  = np.array(score_test_skl)    
    
    plt.figure(figsize=(10, 5))
    plt.grid(True)
    plt.plot(range(n_est), score_train, 'g-')
    plt.plot(range(n_est), score_train_skl, 'b-')
    plt.plot(range(n_est), score_train_skl - 0.03, 'r')
    plt.legend(['myGradientBoosting', 'sklearnGradientBoosting', 'Danger board!!!!'], loc='lower right' )
    plt.title('Accurancy on train data(GradientBoosting)')
    plt.xlabel('Number of trees')
    plt.show()
    
    plt.figure(figsize=(10, 5))
    plt.grid(True)
    plt.plot(range(n_est), score_test, 'g-')
    plt.plot(range(n_est), score_test_skl, 'b-')
    plt.plot(range(n_est), score_test_skl - 0.03, 'r')
    plt.legend(['myGradientBoosting', 'sklearnGradientBoosting', 'Danger board!!!'], loc='lower right')
    plt.title('Accurancy on test data(GradientBoosting)')
    plt.xlabel('Number of trees')
    plt.show()
开发者ID:dShvetsov,项目名称:DataMining,代码行数:58,代码来源:GradientBoosting.py

示例2: test_staged_predict_proba

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_predict [as 别名]
def test_staged_predict_proba():
    # Test whether staged predict proba eventually gives
    # the same prediction.
    X, y = datasets.make_hastie_10_2(n_samples=1200,
                                     random_state=1)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]
    clf = GradientBoostingClassifier(n_estimators=20)
    # test raise NotFittedError if not fitted
    assert_raises(NotFittedError, lambda X: np.fromiter(
        clf.staged_predict_proba(X), dtype=np.float64), X_test)

    clf.fit(X_train, y_train)

    # test if prediction for last stage equals ``predict``
    for y_pred in clf.staged_predict(X_test):
        assert_equal(y_test.shape, y_pred.shape)

    assert_array_equal(clf.predict(X_test), y_pred)

    # test if prediction for last stage equals ``predict_proba``
    for staged_proba in clf.staged_predict_proba(X_test):
        assert_equal(y_test.shape[0], staged_proba.shape[0])
        assert_equal(2, staged_proba.shape[1])

    assert_array_almost_equal(clf.predict_proba(X_test), staged_proba)
开发者ID:amueller,项目名称:scikit-learn,代码行数:28,代码来源:test_gradient_boosting.py

示例3: __init__

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_predict [as 别名]
    def __init__(self, estimator,
                 phase, 
                 n_jobs, cv_k_fold, parameters,
                 X_train, y_train,
                 X_test, y_test):
        # estimator : ensemble学習器

        # cv : if train : get best parameter
        if phase == "train":
            clf = GradientBoostingClassifier()
            gscv = GridSearchCV(clf, parameters, 
                                verbose = 10, 
                                scoring = "f1",#scoring = "precision" or "recall"
                                n_jobs = n_jobs, cv = cv_k_fold)
            gscv.fit(X_train, y_train)
            self.best_params = gscv.best_params_
            
            clf.set_params(**gscv.best_params_)
            clf.fit(X_train, y_train)
            train_loss = clf.train_score_
            test_loss = np.empty(len(clf.estimators_))
            for i, pred in enumerate(clf.staged_predict(X_test)):
                test_loss[i] = clf.loss_(y_test, pred)
            plt.plot(np.arange(len(clf.estimators_)) + 1, test_loss, label='Test')
            plt.plot(np.arange(len(clf.estimators_)) + 1, train_loss, label='Train')
            plt.xlabel('the number of weak learner:Boosting Iterations')
            plt.ylabel('Loss')
            plt.legend(loc="best")
            plt.savefig("loss_cv.png")
            plt.close()

        estimator.set_params(**gscv.best_params_)
        self.estimator = estimator
        self.one_hot_encoding = None
开发者ID:Quasi-quant2010,项目名称:Stacking,代码行数:36,代码来源:stack_test.backup.py

示例4: test_gbm_classifier_backupsklearn

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_predict [as 别名]
def test_gbm_classifier_backupsklearn(backend='auto'):
    df = pd.read_csv("./open_data/creditcard.csv")
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
    import h2o4gpu
    Solver = h2o4gpu.GradientBoostingClassifier

    # Run h2o4gpu version of RandomForest Regression
    gbm = Solver(backend=backend, random_state=1234)
    print("h2o4gpu fit()")
    gbm.fit(X, y)

    # Run Sklearn version of RandomForest Regression
    from sklearn.ensemble import GradientBoostingClassifier
    gbm_sk = GradientBoostingClassifier(random_state=1234, max_depth=3)
    print("Scikit fit()")
    gbm_sk.fit(X, y)

    if backend == "sklearn":
        assert (gbm.predict(X) == gbm_sk.predict(X)).all() == True
        assert (gbm.predict_log_proba(X) == gbm_sk.predict_log_proba(X)).all() == True
        assert (gbm.predict_proba(X) == gbm_sk.predict_proba(X)).all() == True
        assert (gbm.score(X, y) == gbm_sk.score(X, y)).all() == True
        assert (gbm.decision_function(X)[1] == gbm_sk.decision_function(X)[1]).all() == True
        assert np.allclose(list(gbm.staged_predict(X)), list(gbm_sk.staged_predict(X)))
        assert np.allclose(list(gbm.staged_predict_proba(X)), list(gbm_sk.staged_predict_proba(X)))
        assert (gbm.apply(X) == gbm_sk.apply(X)).all() == True

        print("Estimators")
        print(gbm.estimators_)
        print(gbm_sk.estimators_)

        print("loss")
        print(gbm.loss_)
        print(gbm_sk.loss_)
        assert gbm.loss_.__dict__ == gbm_sk.loss_.__dict__

        print("init_")
        print(gbm.init)
        print(gbm_sk.init)

        print("Feature importance")
        print(gbm.feature_importances_)
        print(gbm_sk.feature_importances_)
        assert (gbm.feature_importances_ == gbm_sk.feature_importances_).all() == True

        print("train_score_")
        print(gbm.train_score_)
        print(gbm_sk.train_score_)
        assert (gbm.train_score_ == gbm_sk.train_score_).all() == True
开发者ID:wamsiv,项目名称:h2o4gpu,代码行数:52,代码来源:test_xgb_sklearn_wrapper.py

示例5: __init__

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_predict [as 别名]
    def __init__(self, estimator,
                 phase, 
                 n_jobs, cv_k_fold, parameters,
                 X_train, y_train,
                 X_test, y_test):
        # estimator : ensemble学習器

        # cv : if train : get best parameter
        if phase == "train":
            gscv = GridSearchCV(GradientBoostingClassifier(), 
                                parameters, 
                                verbose = 10, 
                                scoring = "f1",#scoring = "precision" or "recall"
                                n_jobs = n_jobs, cv = cv_k_fold)
            gscv.fit(X_train, y_train)
            best_params = gscv.best_params_
            print "[GBDT's Best Parameter]", gscv.best_params_
            
            clf = GradientBoostingClassifier()
            clf.set_params(**gscv.best_params_)
            del gscv
            clf.fit(X_train, y_train)
            train_loss = clf.train_score_
            test_loss = np.empty(len(clf.estimators_))
            for i, pred in enumerate(clf.staged_predict(X_test)):
                test_loss[i] = clf.loss_(y_test, pred)
            plt.plot(np.arange(len(clf.estimators_)) + 1, test_loss, label='Test')
            plt.plot(np.arange(len(clf.estimators_)) + 1, train_loss, label='Train')
            plt.xlabel('the number of weak learner:Boosting Iterations')
            plt.ylabel('Loss')
            plt.legend(loc="best")
            plt.savefig("loss_cv.png")
            plt.close()
        else:
            best_params = {'loss' : ['deviance'],
                           'learning_rate' : [0.1],
                           'max_depth': [2],
                           'min_samples_leaf': [8],
                           'max_features': [5],#max_features must be in (0, n_features]
                           'max_leaf_nodes' : [20],
                           'subsample' : [0.1],
                           'n_estimators' : [100],
                           'random_state' : [0]}
            
        estimator.set_params(**best_params)
        self.estimator = estimator
        self.one_hot_encoding = None
开发者ID:Quasi-quant2010,项目名称:Stacking,代码行数:49,代码来源:feature_transformation_gbdt.py

示例6: GradientBoostingClassifier

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_predict [as 别名]
        # cv : if train : get best parameter
        if phase == "train":
            clf = GradientBoostingClassifier()
            gscv = GridSearchCV(clf, parameters, 
                                verbose = 10, 
                                scoring = "f1",#scoring = "precision" or "recall"
                                n_jobs = n_jobs, cv = cv_k_fold)
            gscv.fit(X_train, y_train)
            self.best_params = gscv.best_params_
            
            clf.set_params(**gscv.best_params_)
            clf.fit(X_train, y_train)
            train_loss = clf.train_score_
            test_loss = np.empty(len(clf.estimators_))
            for i, pred in enumerate(clf.staged_predict(X_test)):
                test_loss[i] = clf.loss_(y_test, pred)
            plt.plot(np.arange(len(clf.estimators_)) + 1, test_loss, label='Test')
            plt.plot(np.arange(len(clf.estimators_)) + 1, train_loss, label='Train')
            plt.xlabel('the number of weak learner:Boosting Iterations')
            plt.ylabel('Loss')
            plt.legend(loc="best")
            plt.savefig("loss_cv.png")
            plt.close()

        estimator.set_params(**gscv.best_params_)
        self.estimator = estimator
        self.one_hot_encoding = None
        
    def fit(self, X, y):
        self.fit_transform(X, y)
开发者ID:Quasi-quant2010,项目名称:gbdt,代码行数:32,代码来源:feature_transformation_ver3.py

示例7: gbdt_plus_liner_classifier_grid_search

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_predict [as 别名]
def gbdt_plus_liner_classifier_grid_search(stack_setting_,
                                           upper_param_keys=None, upper_param_vals=None,
                                           lower_param_keys=None, lower_param_vals=None,
                                           num_proc=None):

    """
     upper model is GBDT or Random Forest
     lower model is Linear Classifier
    """
    if stack_setting_ is None:
        sys.stderr.write('You have no setting Json file\n')
        sys.exit()

    if num_proc is None:
        num_proc = 6


    # 1. upper model
    if upper_param_keys is None:
        upper_param_keys = ['model_type', 'n_estimators', 'loss', 'random_state', 'subsample', 'max_features', 'max_leaf_nodes', 'learning_rate', 'max_depth', 'min_samples_leaf']

    if upper_param_vals is None:
        upper_param_vals = [[GradientBoostingClassifier], [100], ['deviance'], [0], [0.1], [5], [20], [0.1], [2], [8]]


    # grid search for upper model : GBDT or Random Forest
    # ExperimentL1 has model free. On the other hand, data is fix
    exp = ExperimentL1(data_folder = stack_setting_['0-Level']['folder'],
                       train_fname = stack_setting_['0-Level']['train'], 
                       test_fname = stack_setting_['0-Level']['test'])

    # GridSearch has a single model. model is dertermined by param
    #gs = GridSearch(SklearnModel, exp, upper_param_keys, upper_param_vals,
    #                cv_folder = stack_setting_['1-Level']['gbdt_linear']['upper']['cv']['folder'],
    #                cv_out = stack_setting_['1-Level']['gbdt_linear']['upper']['cv']['cv_out'], 
    #                cv_pred_out = stack_setting_['1-Level']['gbdt_linear']['upper']['cv']['cv_pred_out'], 
    #                refit_pred_out = stack_setting_['1-Level']['gbdt_linear']['upper']['cv']['refit_pred_out'])
    #upper_best_param, upper_best_score = gs.search_by_cv()


    model_folder = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['folder']
    model_train_fname = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['train']
    model_train_fname = os.path.join(Config.get_string('data.path'), 
                                     model_folder, 
                                     model_train_fname)
    model_folder = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['folder']
    model_test_fname = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['test']
    model_test_fname = os.path.join(Config.get_string('data.path'), 
                                    model_folder, 
                                    model_test_fname)
    upper_param_dict = dict(zip(upper_param_keys, upper_param_vals))
    if os.path.isfile(model_train_fname) is False and \
            os.path.isfile(model_test_fname) is False:
        #upper_param_dict['model_type'] == [GradientBoostingClassifier]
        del upper_param_dict['model_type']        
        clf = GradientBoostingClassifier()
        clf_cv = GridSearchCV(clf, upper_param_dict, 
                              verbose = 10, 
                              scoring = "f1",#scoring = "precision" or "recall"
                              n_jobs = num_proc, cv = 5)
        
        X_train, y_train = exp.get_train_data()
        clf_cv.fit(X_train, y_train)
        upper_best_params = clf_cv.best_params_
        print upper_best_params
        del clf_cv
        clf.set_params(**upper_best_params)
        clf.fit(X_train, y_train)
        train_loss = clf.train_score_
        test_loss = np.empty(len(clf.estimators_))
        X_test, y_test = exp.get_test_data()
        for i, pred in enumerate(clf.staged_predict(X_test)):
            test_loss[i] = clf.loss_(y_test, pred)

        graph_folder = stack_setting_['1-Level']['gbdt_linear']['upper']['graph']['folder']
        graph_fname = stack_setting_['1-Level']['gbdt_linear']['upper']['graph']['name']
        graph_fname = os.path.join(Config.get_string('data.path'), 
                                   graph_folder, 
                                   graph_fname)
        gs = GridSpec(2,2)
        ax1 = plt.subplot(gs[0,1])
        ax2 = plt.subplot(gs[1,1])
        ax3 = plt.subplot(gs[:,0])

        ax1.plot(np.arange(len(clf.estimators_)) + 1, test_loss, label='Test')
        ax1.plot(np.arange(len(clf.estimators_)) + 1, train_loss, label='Train')
        ax1.set_xlabel('the number of weak learner:Boosting Iterations')
        ax1.set_ylabel('%s Loss' % (upper_best_params.get('loss','RMSE')))
        ax1.legend(loc="best")       

        # dump for the transformated feature
        clf = TreeTransform(GradientBoostingClassifier(),
                            best_params_ = upper_best_params)
        if type(X_train) == pd.core.frame.DataFrame:
            clf.fit(X_train.as_matrix().astype(np.float32), y_train)
        elif X_train == np.ndarray:
            clf.fit(X_train.astype(np.float32), y_train)

        # train result
        train_loss = clf.estimator_.train_score_
#.........这里部分代码省略.........
开发者ID:Quasi-quant2010,项目名称:Stacking,代码行数:103,代码来源:run_gbdt_plus_liner_classifier_grid_search.20160414.py


注:本文中的sklearn.ensemble.GradientBoostingClassifier.staged_predict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。