当前位置: 首页>>代码示例>>Python>>正文


Python GradientBoostingRegressor.staged_predict方法代码示例

本文整理汇总了Python中sklearn.ensemble.GradientBoostingRegressor.staged_predict方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingRegressor.staged_predict方法的具体用法?Python GradientBoostingRegressor.staged_predict怎么用?Python GradientBoostingRegressor.staged_predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.GradientBoostingRegressor的用法示例。


在下文中一共展示了GradientBoostingRegressor.staged_predict方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_staged_predict

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
def test_staged_predict():
    # Test whether staged decision function eventually gives
    # the same prediction.
    X, y = datasets.make_friedman1(n_samples=1200, random_state=1, noise=1.0)
    X_train, y_train = X[:200], y[:200]
    X_test = X[200:]
    clf = GradientBoostingRegressor()
    # test raise ValueError if not fitted
    assert_raises(ValueError, lambda X: np.fromiter(clf.staged_predict(X), dtype=np.float64), X_test)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    # test if prediction for last stage equals ``predict``
    for y in clf.staged_predict(X_test):
        assert_equal(y.shape, y_pred.shape)

    assert_array_equal(y_pred, y)
开发者ID:arvindchari88,项目名称:newGitTest,代码行数:20,代码来源:test_gradient_boosting.py

示例2: GrientBoostingModel

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
class GrientBoostingModel(BaseModel):
    def __init__(self):
        BaseModel.__init__(self)
#         self.save_final_model = True
        self.do_cross_val = False
        return
    def setClf(self):
        self.clf = GradientBoostingRegressor(n_estimators=100, verbose=100)
#         self.clf = GradientBoostingRegressor(loss = 'ls', verbose = 300, n_estimators=70,    learning_rate= 0.1,subsample=1.0, max_features = 1.0)
        return

    def after_test(self):
        scores_test=[]
        scores_train=[]
        scores_test_mse = []
        scores_train_mse = []
        for i, y_pred in enumerate(self.clf.staged_predict(self.X_test)):
            scores_test.append(mean_absolute_percentage_error(self.y_test, y_pred))
            scores_test_mse.append(mean_squared_error(self.y_test, y_pred))
        
        for i, y_pred in enumerate(self.clf.staged_predict(self.X_train)):
            scores_train.append(mean_absolute_percentage_error(self.y_train, y_pred))
            scores_train_mse.append(mean_squared_error(self.y_train, y_pred))
        
        pd.DataFrame({'scores_train': scores_train, 'scores_test': scores_test,'scores_train_mse': scores_train_mse, 'scores_test_mse': scores_test_mse}).to_csv('temp/trend.csv')
        df = pd.DataFrame({'scores_train': scores_train, 'scores_test': scores_test})
        print "Test set MAPE minimum: {}".format(np.array(scores_test).min())
#         df.plot()
#         plt.show()
        return
    def __get_intial_model_param(self):
        
        return {'max_depth': [8],'max_features': [9], 'subsample':[0.8], 'learning_rate':[0.1], 'n_estimators': np.arange(20, 81, 10)}
    def __get_model_param(self):
        return {'max_depth': np.arange(3,15,1),'subsample': np.linspace(0.5, 1.0,6), 'learning_rate':[0.15,0.1,0.08,0.06,0.04,0.02, 0.01], 'n_estimators': [1000,1300,1500,1800,2000]}
    def getTunedParamterOptions(self):
#         tuned_parameters = self.__get_intial_model_param()
        tuned_parameters = self.__get_model_param()
#         tuned_parameters = [{'learning_rate': [0.1,0.05,0.01,0.002],'subsample': [1.0,0.5], 'n_estimators':[15000]}]
#         tuned_parameters = [{'n_estimators': [2]}]
        return tuned_parameters
开发者ID:LevinJ,项目名称:Supply-demand-forecasting,代码行数:43,代码来源:gradientboostingmodel.py

示例3: boosting_optimization

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
def boosting_optimization(X_train, y_train, X_test, y_test):
    
    gbm = GradientBoostingRegressor(n_estimators=3000, max_depth=10)
    gbm.fit(X_train, y_train)
    
    pred = gbm.predict(X_test)
    
    print "feature importances: "
    print pd.Series(gbm.feature_importances_, index=datasets.load_boston().feature_names)
    print "staged predict: {}".format(gbm.staged_predict(X_train))
    print "predict: {}".format(gbm.predict(X_test))
    print y_test
开发者ID:jonmhong,项目名称:Boston-Housing-Prices,代码行数:14,代码来源:boston_housing.py

示例4: test_gbm_regressor_backupsklearn

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
def test_gbm_regressor_backupsklearn(backend='auto'):
    df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True)
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
    import h2o4gpu
    Solver = h2o4gpu.GradientBoostingRegressor

    #Run h2o4gpu version of RandomForest Regression
    gbm = Solver(backend=backend, random_state=1234)
    print("h2o4gpu fit()")
    gbm.fit(X, y)

    #Run Sklearn version of RandomForest Regression
    from sklearn.ensemble import GradientBoostingRegressor
    gbm_sk = GradientBoostingRegressor(random_state=1234, max_depth=3)
    print("Scikit fit()")
    gbm_sk.fit(X, y)

    if backend == "sklearn":
        assert (gbm.predict(X) == gbm_sk.predict(X)).all() == True
        print((a == b for a, b in zip(gbm.staged_predict(X), gbm_sk.staged_predict(X))))
        assert np.allclose(list(gbm.staged_predict(X)), list(gbm_sk.staged_predict(X)))
        assert (gbm.score(X, y) == gbm_sk.score(X, y)).all() == True
        assert (gbm.apply(X) == gbm_sk.apply(X)).all() == True
        
        print("Estimators")
        print(gbm.estimators_)
        print(gbm_sk.estimators_)
        
        print("loss")
        print(gbm.loss_)
        print(gbm_sk.loss_)
        assert gbm.loss_.__dict__ == gbm_sk.loss_.__dict__
        
        print("init_")
        print(gbm.init)
        print(gbm_sk.init)

        print("Feature importance")
        print(gbm.feature_importances_)
        print(gbm_sk.feature_importances_)
        assert (gbm.feature_importances_ == gbm_sk.feature_importances_).all() == True
        
        print("train_score_")
        print(gbm.train_score_)
        print(gbm_sk.train_score_)
        assert (gbm.train_score_ == gbm_sk.train_score_).all() == True
开发者ID:wamsiv,项目名称:h2o4gpu,代码行数:49,代码来源:test_xgb_sklearn_wrapper.py

示例5: int

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
offset = int(x_train.shape[0] * 0.9)
x_train_fit, y_train_fit = x_train[:offset], y_train[:offset]
x_test_fit, y_test_fit = x_train[offset:], y_train[offset:]

clf = GradientBoostingRegressor(n_estimators = 700,max_depth=6,learning_rate = 0.01,subsample=1.0 )

clf.fit(x_train_fit, y_train_fit)
rmse = fmean_squared_error(y_test_fit,clf.predict(x_test_fit))

print("RMSE: %.4f" % rmse)

#ploting, learning rate 0.01 see which n_estimatores should choose, then grid search the max_depth, then do bagging on that
test_score = np.zeros((700,), dtype=np.float64)

for i, y_pred in enumerate(clf.staged_predict(x_test_fit)):
    test_score[i] = clf.loss_(y_test_fit, y_pred)

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title('Deviance')
plt.plot(np.arange(700) + 1, clf.train_score_, 'b-',
         label='Training Set Deviance')
plt.plot(np.arange(700) + 1, test_score, 'r-',
         label='Test Set Deviance')
plt.legend(loc='upper right')
plt.xlabel('Boosting Iterations')
plt.ylabel('Deviance')
plt.show()

from sklearn import pipeline, grid_search
开发者ID:arslanoqads,项目名称:PyMe,代码行数:33,代码来源:counting_feature+jcd_dice_distance+modeling.py

示例6: plot_data

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
x_plot = np.linspace(0, 10, 500)


def plot_data(figsize=(8, 5)):
    fig = plt.figure(figsize=figsize)
    gt = plt.plot(x_plot, ground_truth(x_plot), alpha=0.4, label="ground truth")

    plt.scatter(X_train, y_train, s=100, alpha=0.4)
    plt.scatter(X_test, y_test, s=10, alpha=0.4, color="red")
    plt.xlim((0, 10))
    plt.ylabel("y")
    plt.xlabel("x")


plot_data(figsize=(8, 5))

# create a gbm classifier to plot against sample
est = GradientBoostingRegressor(n_estimators=1000, max_depth=1, learning_rate=1.0).fit(X_train, y_train)
ax = plt.gca()
first = True

for pred in islice(est.staged_predict(x_plot[:, np.newaxis]), 0, 1000, 10):
    plt.plot(x_plot, pred, color="r", alpha=0.2)
    if first:
        ax.annotate("High bias - low variance", xy=(x_plot[x_plot.shape[0] // 2],))
plt.plot(x_plot, est.predict(x_plot[:, np.newaxis]), label="RT max_depth=1", color="g", alpha=0.9, linewidth=2)
est = DecisionTreeRegressor(max_depth=3).fit(X_train, y_train)
plt.plot(x_plot, est.predict(x_plot[:, np.newaxis]), label="RT max_depth=3", color="g", alpha=0.7, linewidth=1)
plt.legend(loc="upper left")
plt.show()
开发者ID:brennanhking,项目名称:kaggle,代码行数:32,代码来源:basic_gbm_plot.py

示例7: train_test_split

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
plt.show()


X_train, X_val, y_train, y_val = train_test_split(X, y)


gbrt = GradientBoostingRegressor(
		max_depth=2,
		n_estimators=120,
		learning_rate=0.1,
		random_state=42)
gbrt.fit(X_train, y_train)


errors = [mean_squared_error(y_val, y_pred) for y_pred in gbrt.staged_predict(X_val)]

print(errors)

best_n_estimators = np.argmin(errors)
min_error = errors[best_n_estimators]

gbrt_best = GradientBoostingRegressor(
			max_depth=2,
			n_estimators=best_n_estimators,
			learning_rate=0.1,
			random_state=42)

gbrt_best.fit(X_train, y_train)

开发者ID:stonecoder19,项目名称:machine_learning,代码行数:30,代码来源:ch07_ensemble_learning.py

示例8: GradientBoostingRegressor

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
x_test,y_test  = X.tail(X.__len__()-200),Y.tail(X.__len__()-200)

# GBM parametor 설정(이부분을 R 소스와 유사하게 설정해주어야 하는데 잘 모르겠음.;;)
params = {'n_estimators': 6000, 'max_depth': 1, 'learning_rate': 0.01, 'warm_start': False, 'loss':'lad'}

# model fit
gbm0 = GradientBoostingRegressor(**params)
gbm0.fit(x_train, y_train)

# score list 생성(estimators 크기만큼)
test_score = np.zeros((params['n_estimators'],), dtype=np.float64)

bestY_pred = []

# iteration 하면서 가장 score(deviation값)가 낮은(?) 값이 최적값...인듯?
for i, y_pred in enumerate(gbm0.staged_predict(x_test)):
    test_score[i] = gbm0.loss_(y_test, y_pred)
    if i > 0 and test_score[i] < test_score[i-1]:
        bestY_pred = y_pred

# bestY_pred가 최적의 예측 데이터인듯...  bestY_pred를 예측 셋으로 넘겨서 사용하면 됨
print(bestY_pred)


########################################################################
# 여기서 부터는 옵션으로 plot 해보기

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title('Deviance')
plt.plot(np.arange(params['n_estimators']) + 1, gbm0.train_score_, 'b-', label='Training Set Deviance')
开发者ID:tj0822,项目名称:Python,代码行数:33,代码来源:gbmConvert.py

示例9: mean_squared_error

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
mse = mean_squared_error(y_test, clf.predict(X_test))
print("RMSE: %.4f" % np.sqrt(mse))

clf_full_data = joblib.load('model/model.pkl')
print 'Generating graphs - partial dependance...'
for idx, x in enumerate(features):
    fig, axs = partial_dependence.plot_partial_dependence(clf_full_data, X, [features[idx]], feature_names=list(features))
    fig.savefig('graphs/_%s.png' %x.lower().replace(' ', '_'))

###############################################################################
# Plot training deviance

# compute test set deviance
test_score = np.zeros((params['n_estimators'],), dtype=np.int64)

for i, y_pred in enumerate(clf.staged_predict(X_test)):
    test_score[i] = clf.loss_(y_test, y_pred)

deviance_plot = plt
deviance_plot.figure(figsize=(12, 6))
deviance_plot.title('Deviance')
deviance_plot.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-',
         label='Training Set Deviance')
deviance_plot.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',
         label='Test Set Deviance')
deviance_plot.legend(loc='upper right')
deviance_plot.xlabel('Boosting Iterations')
deviance_plot.ylabel('Deviance')

deviance_plot.savefig('graphs/deviance.png')
开发者ID:mhaseebtariq,项目名称:sales_prediction,代码行数:32,代码来源:graphs.py

示例10: gbdt_solver

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]

#.........这里部分代码省略.........

    #Transform predict

    tmp_train_y = train_y
    tmp_validation_y = validation_y

    train_y = Transform(train_y, transform_type, train_last_month_plays)
    validation_y = Transform(validation_y, transform_type, validation_last_month_plays)
    """
 
    params = {
        'n_estimators': 0,
        'learning_rate': 0.03,
        'random_state': 1000000007,
        'max_depth': 3,
        'verbose' : 2,
        'warm_start': True
    }


    max_num_round = 100 
    batch = 10
    best_val = -1e60
    history_validation_val = []
    best_num_round = -1
    curr_round = 0

    assert max_num_round % batch == 0
    gb = GradientBoostingRegressor(**params)
    for step in xrange(max_num_round / batch) :
        train_x = train_x.copy(order='C')
        train_y = train_y.copy(order='C')
        gb.n_estimators += batch
        logging.info('current round is: %d' % curr_round)
        #gb.set_params(**params)
        gb.fit(train_x, train_y)
        curr_round += batch
        predict = gb.predict(validation_x)
        #detransform to plays
        predict=Convert2Plays(predict, transform_type)
        predict = HandlePredict(predict.tolist(), validation_song_id)
        curr_val = evaluate.evaluate(predict, validation_y.tolist(), validation_artist_id, validation_month, validation_label_day)
        history_validation_val.append(curr_val)
        logging.info('the current score is %.10f' % curr_val)
        if curr_round >= 100 and curr_val > best_val:
            best_num_round = curr_round
            best_val = curr_val
            joblib.dump(gb, filepath + '/model/gbdt.pkl')

    logging.info('the best round is %d, the score is %.10f' % (best_num_round, best_val))
    gb = joblib.load(filepath + '/model/gbdt.pkl')
    predict = gb.predict(validation_x)
    #detransform to plays
    predict=Convert2Plays(predict, transform_type)

    with open(filepath + '/parameters.param', 'w') as out :
        for key, val in params.items():
            out.write(str(key) + ': ' + str(val) + '\n')
            out.write('max_num_round: '+str(max_num_round)+'\n')
            out.write('best_num_round: '+str(best_num_round)+'\n')
            out.write('transform_type: '+str(transform_type)+'\n')

    # unable to use matplotlib if used multiprocessing
    if validation_y.shape[0] and False :
        logging.info('the loss in Training set is %.4f' % loss_function(train_y, gb.predict(train_x)))
        logging.info('the loss in Validation set is %.4f' % loss_function(validation_y, gb.predict(validation_x)))

        plt.figure(figsize=(12, 6))
        # Plot feature importance
        plt.subplot(1, 2, 1)
        if (feature_names) == 0:
            feature_names = [str(i + 1) for i in xrange(validation_x.shape[0])]
        feature_names = np.array(feature_names)
        feature_importance = gb.feature_importances_
        feature_importance = 100.0 * (feature_importance / feature_importance.max())
        sorted_idx = np.argsort(feature_importance)
        pos = np.arange(sorted_idx.shape[0]) + .5
        plt.barh(pos, feature_importance[sorted_idx], align='center')
        plt.yticks(pos, feature_names[sorted_idx])
        plt.xlabel('Relative Importance')
        plt.title('Variable Importance')


        # Plot training deviance
        plt.subplot(1, 2, 2)
        test_score = np.zeros((params['n_estimators'],), dtype=np.float64)
        for i, y_pred in enumerate(gb.staged_predict(validation_x)):
            test_score[i] = loss_function(validation_y, y_pred)
        plt.title('Deviance')
        plt.plot(np.arange(params['n_estimators']) + 1, gb.train_score_, 'b-',
                          label='Training Set Deviance')
        plt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',
                          label='Test Set Deviance')
        plt.legend(loc='upper right')
        plt.xlabel('Boosting Iterations')
        plt.ylabel('Deviance')

        plt.savefig(filepath + '/statistics.jpg')

    return predict, Transform(gb.predict(test_x), transform_type)
开发者ID:buyijie,项目名称:bybolove,代码行数:104,代码来源:gbdt.py


注:本文中的sklearn.ensemble.GradientBoostingRegressor.staged_predict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。