当前位置: 首页>>代码示例>>Python>>正文


Python GradientBoostingClassifier.decision_function方法代码示例

本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.decision_function方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.decision_function方法的具体用法?Python GradientBoostingClassifier.decision_function怎么用?Python GradientBoostingClassifier.decision_function使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.GradientBoostingClassifier的用法示例。


在下文中一共展示了GradientBoostingClassifier.decision_function方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_max_feature_regression

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import decision_function [as 别名]
def test_max_feature_regression():
    # Test to make sure random state is set properly.
    X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)

    X_train, X_test = X[:2000], X[2000:]
    y_train, y_test = y[:2000], y[2000:]

    gbrt = GradientBoostingClassifier(n_estimators=100, min_samples_split=5,
                                      max_depth=2, learning_rate=.1,
                                      max_features=2, random_state=1)
    gbrt.fit(X_train, y_train)
    deviance = gbrt.loss_(y_test, gbrt.decision_function(X_test))
    assert deviance < 0.5, "GB failed with deviance %.4f" % deviance
开发者ID:amueller,项目名称:scikit-learn,代码行数:15,代码来源:test_gradient_boosting.py

示例2: test_gbm_classifier_backupsklearn

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import decision_function [as 别名]
def test_gbm_classifier_backupsklearn(backend='auto'):
    df = pd.read_csv("./open_data/creditcard.csv")
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
    import h2o4gpu
    Solver = h2o4gpu.GradientBoostingClassifier

    # Run h2o4gpu version of RandomForest Regression
    gbm = Solver(backend=backend, random_state=1234)
    print("h2o4gpu fit()")
    gbm.fit(X, y)

    # Run Sklearn version of RandomForest Regression
    from sklearn.ensemble import GradientBoostingClassifier
    gbm_sk = GradientBoostingClassifier(random_state=1234, max_depth=3)
    print("Scikit fit()")
    gbm_sk.fit(X, y)

    if backend == "sklearn":
        assert (gbm.predict(X) == gbm_sk.predict(X)).all() == True
        assert (gbm.predict_log_proba(X) == gbm_sk.predict_log_proba(X)).all() == True
        assert (gbm.predict_proba(X) == gbm_sk.predict_proba(X)).all() == True
        assert (gbm.score(X, y) == gbm_sk.score(X, y)).all() == True
        assert (gbm.decision_function(X)[1] == gbm_sk.decision_function(X)[1]).all() == True
        assert np.allclose(list(gbm.staged_predict(X)), list(gbm_sk.staged_predict(X)))
        assert np.allclose(list(gbm.staged_predict_proba(X)), list(gbm_sk.staged_predict_proba(X)))
        assert (gbm.apply(X) == gbm_sk.apply(X)).all() == True

        print("Estimators")
        print(gbm.estimators_)
        print(gbm_sk.estimators_)

        print("loss")
        print(gbm.loss_)
        print(gbm_sk.loss_)
        assert gbm.loss_.__dict__ == gbm_sk.loss_.__dict__

        print("init_")
        print(gbm.init)
        print(gbm_sk.init)

        print("Feature importance")
        print(gbm.feature_importances_)
        print(gbm_sk.feature_importances_)
        assert (gbm.feature_importances_ == gbm_sk.feature_importances_).all() == True

        print("train_score_")
        print(gbm.train_score_)
        print(gbm_sk.train_score_)
        assert (gbm.train_score_ == gbm_sk.train_score_).all() == True
开发者ID:wamsiv,项目名称:h2o4gpu,代码行数:52,代码来源:test_xgb_sklearn_wrapper.py

示例3: test_probability_exponential

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import decision_function [as 别名]
def test_probability_exponential():
    """Predict probabilities."""
    clf = GradientBoostingClassifier(loss="exponential", n_estimators=100, random_state=1)

    assert_raises(ValueError, clf.predict_proba, T)

    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)

    # check if probabilities are in [0, 1].
    y_proba = clf.predict_proba(T)
    assert np.all(y_proba >= 0.0)
    assert np.all(y_proba <= 1.0)
    score = clf.decision_function(T).ravel()
    assert_array_equal(y_proba[:, 1], 1.0 / (1.0 + np.exp(-2 * score)))

    # derive predictions from probabilities
    y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0)
    assert_array_equal(y_pred, true_result)
开发者ID:Anubhav27,项目名称:scikit-learn,代码行数:21,代码来源:test_gradient_boosting.py

示例4: run

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import decision_function [as 别名]
    def run(self):

        if not self.verify_data():
            print ("\x1b[31mERROR: training input data array shapes are incompatible!\x1b[0m")
            raise Exception("BadTrainingInputData")

        applyClassWeights = False
        if self.parameters['classifier'] == 'GradientBoostingClassifier':
            clf = GradientBoostingClassifier(
                    min_samples_leaf=self.parameters['min_samples_leaf'], 
                    max_depth=self.parameters['max_depth'], 
                    max_leaf_nodes=self.parameters['max_leaf_nodes'],
                    criterion=self.parameters['criterion'],
                    max_features=self.parameters['max_features'],
                    n_estimators=self.parameters['n_estimators'], 
                    learning_rate=self.parameters['learning_rate'], 
                    subsample=self.parameters['subsample'],
                    min_impurity_split=self.parameters['min_impurity_split'],
                )
            if self.parameters['class_weight'] == 'balanced':
                applyClassWeights = True
        elif self.parameters['classifier'] == 'RandomForestClassifier':
            clf = RandomForestClassifier(
                    min_samples_leaf=self.parameters['min_samples_leaf'], 
                    max_depth=self.parameters['max_depth'], 
                    max_leaf_nodes=self.parameters['max_leaf_nodes'],
                    criterion=self.parameters['criterion'],
                    max_features=self.parameters['max_features'],
                    n_estimators=self.parameters['n_estimators'], 
                    bootstrap=self.parameters['bootstrap'],
                )
            if self.parameters['class_weight'] == 'balanced':
                applyClassWeights = True
        elif self.parameters['classifier'] == 'ExtraTreesClassifier':
            clf = ExtraTreesClassifier(
                    min_samples_leaf=self.parameters['min_samples_leaf'], 
                    max_depth=self.parameters['max_depth'], 
                    max_leaf_nodes=self.parameters['max_leaf_nodes'],
                    criterion=self.parameters['criterion'],
                    max_features=self.parameters['max_features'],
                    n_estimators=self.parameters['n_estimators'], 
                    bootstrap=self.parameters['bootstrap'],
                )
            if self.parameters['class_weight'] == 'balanced':
                applyClassWeights = True
        elif self.parameters['classifier'] == 'FT_GradientBoostingClassifier':
            rt = RandomTreesEmbedding(max_depth=3, n_estimators=20, random_state=0)
            clf0 = GradientBoostingClassifier(
                    min_samples_leaf=self.parameters['min_samples_leaf'], 
                    max_depth=self.parameters['max_depth'], 
                    max_leaf_nodes=self.parameters['max_leaf_nodes'],
                    criterion=self.parameters['criterion'],
                    max_features=self.parameters['max_features'],
                    n_estimators=self.parameters['n_estimators'], 
                    learning_rate=self.parameters['learning_rate'], 
                    subsample=self.parameters['subsample'],
                    min_impurity_split=self.parameters['min_impurity_split'],
                )
            if self.parameters['class_weight'] == 'balanced':
                applyClassWeights = True
            clf = make_pipeline(rt, clf0)
        elif self.parameters['classifier'] == 'XGBClassifier':
            clf = XGBClassifier(
                    learning_rate=self.parameters['learning_rate'],
                    max_depth=self.parameters['max_depth'],
                    n_estimators=self.parameters['n_estimators'],
                    objective='binary:logitraw',
                    colsample_bytree=self.parameters['colsample_bytree'],
                    subsample=self.parameters['subsample'],
                    min_child_weight=self.parameters['min_child_weight'],
                    gamma=self.parameters['gamma'] if 'gamma' in self.parameters else 0.0,
                    #reg_alpha=8,
                    reg_lambda=self.parameters['reg_lambda'] if 'reg_lambda' in self.parameters else 1.0,
                    reg_alpha=self.parameters['reg_alpha'] if 'reg_alpha' in self.parameters else 0.0,
                    ) 
            if self.parameters['class_weight'] == 'balanced':
                applyClassWeights = True
        elif self.parameters['classifier'] == 'MLPClassifier':
            classifierParams = {k:v for k,v in self.parameters.iteritems() if k in ['solver', 'alpha', 'hidden_layer_sizes', 'max_iter', 'warm_start', 'learning_rate_init', 'learning_rate', 'momentum', 'epsilon', 'beta_1', 'beta_2', 'validation_fraction', 'early_stopping']}
            clf = MLPClassifier(**classifierParams) 
        elif self.parameters['classifier'] in ['SVC', 'LinearSVC']:
            '''
            clf = SVC(
                        C=1.0,
                        cache_size=4000,
                        class_weight='balanced',
                        coef0=0.0,
                        decision_function_shape='ovr',
                        degree=3,
                        gamma='auto',
                        kernel='rbf',
                        max_iter=100000,
                        probability=False,
                        random_state=None,
                        shrinking=True,
                        tol=0.001,
                        verbose=True
                    )
            '''
            bagged = int(self.parameters['bagged']) if 'bagged' in self.parameters else False
#.........这里部分代码省略.........
开发者ID:GLP90,项目名称:Xbb,代码行数:103,代码来源:run_training_scikit.py

示例5: gbdt_plus_liner_classifier_grid_search

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import decision_function [as 别名]
def gbdt_plus_liner_classifier_grid_search(stack_setting_,
                                           upper_param_keys=None, upper_param_vals=None,
                                           lower_param_keys=None, lower_param_vals=None,
                                           num_proc=None):

    """
     upper model is GBDT or Random Forest
     lower model is Linear Classifier
    """
    if stack_setting_ is None:
        sys.stderr.write('You have no setting Json file\n')
        sys.exit()

    if num_proc is None:
        num_proc = 6
    upper_best_params = None
    lower_best_param = None


    # 1. upper model
    if upper_param_keys is None:
        upper_param_keys = ['model_type', 'n_estimators', 'loss', 'random_state', 'subsample', 'max_features', 'max_leaf_nodes', 'learning_rate', 'max_depth', 'min_samples_leaf']

    if upper_param_vals is None:
        upper_param_vals = [[GradientBoostingClassifier], [100], ['deviance'], [0], [0.1], [5], [20], [0.1], [2], [8]]


    # grid search for upper model : GBDT or Random Forest
    # ExperimentL1 has model free. On the other hand, data is fix
    exp = ExperimentL1(data_folder = stack_setting_['0-Level']['folder'],
                       train_fname = stack_setting_['0-Level']['train'], 
                       test_fname = stack_setting_['0-Level']['test'])


    model_folder = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['folder']
    model_train_fname = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['train']
    model_train_fname = os.path.join(Config.get_string('data.path'), 
                                     model_folder, 
                                     model_train_fname)
    model_folder = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['folder']
    model_test_fname = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['test']
    model_test_fname = os.path.join(Config.get_string('data.path'), 
                                    model_folder, 
                                    model_test_fname)
    upper_param_dict = dict(zip(upper_param_keys, upper_param_vals))
    if os.path.isfile(model_train_fname) is False and \
            os.path.isfile(model_test_fname) is False:
        #upper_param_dict['model_type'] == [GradientBoostingClassifier]
        del upper_param_dict['model_type']        
        clf = GradientBoostingClassifier()
        clf_cv = GridSearchCV(clf, upper_param_dict, 
                              verbose = 10, 
                              scoring = stack_setting_['1-Level']['gbdt_linear']['upper']['metrics'],#scoring = "precision" or "recall" or "f1"
                              n_jobs = num_proc, cv = 5)
        
        X_train, y_train = exp.get_train_data()
        clf_cv.fit(X_train, y_train)
        upper_best_params = clf_cv.best_params_
        print upper_best_params
        del clf_cv
        clf.set_params(**upper_best_params)
        clf.fit(X_train, y_train)
        train_loss = clf.train_score_
        test_loss = np.empty(len(clf.estimators_))
        X_test, y_test = exp.get_test_data()
        for i, pred in enumerate(clf.staged_predict(X_test)):
            test_loss[i] = clf.loss_(y_test, pred)

        graph_folder = stack_setting_['1-Level']['gbdt_linear']['upper']['graph']['folder']
        graph_fname = stack_setting_['1-Level']['gbdt_linear']['upper']['graph']['name']
        graph_fname = os.path.join(Config.get_string('data.path'), 
                                   graph_folder, 
                                   graph_fname)
        gs = GridSpec(2,2)
        ax1 = plt.subplot(gs[0,1])
        ax2 = plt.subplot(gs[1:,1])
        ax3 = plt.subplot(gs[:,0])

        #ax1.plot(np.arange(len(clf.estimators_)) + 1, test_loss, label='Test')
        #ax1.plot(np.arange(len(clf.estimators_)) + 1, train_loss, label='Train')
        #ax1.set_xlabel('the number of weak learner')
        #ax1.set_ylabel('%s Loss' % (upper_best_params.get('loss','RMSE')))
        #ax1.legend(loc="best") 
        confidence_score = clf.decision_function(X_test)
        #sns.distplot(confidence_score, kde=False, rug=False, ax=ax1)
        num_bins = 100
        try:
            counts, bin_edges = np.histogram(confidence_score, bins=num_bins, normed=True)
        except:
            counts, bin_edges = np.histogram(confidence_score, normed=True)
        cdf = np.cumsum(counts)
        ax1.plot(bin_edges[1:], cdf / cdf.max())
        ax1.set_ylabel('CDF')
        ax1.set_xlabel('Decision_Function:Confidence_Score', fontsize=10)

        # dump for the transformated feature
        clf = TreeTransform(GradientBoostingClassifier(),
                            best_params_ = upper_best_params)
        if type(X_train) == pd.core.frame.DataFrame:
            clf.fit(X_train.as_matrix().astype(np.float32), y_train)
#.........这里部分代码省略.........
开发者ID:Quasi-quant2010,项目名称:Stacking,代码行数:103,代码来源:run_gbdt_plus_liner_classifier_grid_search.py

示例6: load_data

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import decision_function [as 别名]

X_train, X_test, y_train, y_test, ind_train, ind_test = load_data(full=False)

clf = GradientBoostingClassifier(n_estimators=500, max_depth=6,
                                 learning_rate=0.1, max_features=256,
                                 min_samples_split=15, verbose=3,
                                 random_state=13)
print('_' * 80)
print('training')
print
print clf
clf.fit(X_train, y_train)

if y_test is not None:
    from sklearn.metrics import auc_score
    print clf

    y_scores = clf.decision_function(X_test).ravel()
    print "AUC: %.6f" % auc_score(y_test, y_scores)

    if generate_report:
        from error_analysis import error_report

        data = np.load("data/train.npz")
        X = data['X_train']
        X_test_raw = X[ind_test]
        error_report(clf, X_test_raw, y_test, y_scores=y_scores, ind=ind_test)

np.savetxt("gbrt3.txt", clf.decision_function(X_test))
开发者ID:Sandy4321,项目名称:kaggle-marinexplore,代码行数:31,代码来源:evaluate.py


注:本文中的sklearn.ensemble.GradientBoostingClassifier.decision_function方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。