本文整理汇总了Python中sklearn.ensemble.GradientBoostingRegressor.staged_predict方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingRegressor.staged_predict方法的具体用法?Python GradientBoostingRegressor.staged_predict怎么用?Python GradientBoostingRegressor.staged_predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.GradientBoostingRegressor
的用法示例。
在下文中一共展示了GradientBoostingRegressor.staged_predict方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_staged_predict
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
def test_staged_predict():
# Test whether staged decision function eventually gives
# the same prediction.
X, y = datasets.make_friedman1(n_samples=1200, random_state=1, noise=1.0)
X_train, y_train = X[:200], y[:200]
X_test = X[200:]
clf = GradientBoostingRegressor()
# test raise ValueError if not fitted
assert_raises(ValueError, lambda X: np.fromiter(clf.staged_predict(X), dtype=np.float64), X_test)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
# test if prediction for last stage equals ``predict``
for y in clf.staged_predict(X_test):
assert_equal(y.shape, y_pred.shape)
assert_array_equal(y_pred, y)
示例2: GrientBoostingModel
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
class GrientBoostingModel(BaseModel):
def __init__(self):
BaseModel.__init__(self)
# self.save_final_model = True
self.do_cross_val = False
return
def setClf(self):
self.clf = GradientBoostingRegressor(n_estimators=100, verbose=100)
# self.clf = GradientBoostingRegressor(loss = 'ls', verbose = 300, n_estimators=70, learning_rate= 0.1,subsample=1.0, max_features = 1.0)
return
def after_test(self):
scores_test=[]
scores_train=[]
scores_test_mse = []
scores_train_mse = []
for i, y_pred in enumerate(self.clf.staged_predict(self.X_test)):
scores_test.append(mean_absolute_percentage_error(self.y_test, y_pred))
scores_test_mse.append(mean_squared_error(self.y_test, y_pred))
for i, y_pred in enumerate(self.clf.staged_predict(self.X_train)):
scores_train.append(mean_absolute_percentage_error(self.y_train, y_pred))
scores_train_mse.append(mean_squared_error(self.y_train, y_pred))
pd.DataFrame({'scores_train': scores_train, 'scores_test': scores_test,'scores_train_mse': scores_train_mse, 'scores_test_mse': scores_test_mse}).to_csv('temp/trend.csv')
df = pd.DataFrame({'scores_train': scores_train, 'scores_test': scores_test})
print "Test set MAPE minimum: {}".format(np.array(scores_test).min())
# df.plot()
# plt.show()
return
def __get_intial_model_param(self):
return {'max_depth': [8],'max_features': [9], 'subsample':[0.8], 'learning_rate':[0.1], 'n_estimators': np.arange(20, 81, 10)}
def __get_model_param(self):
return {'max_depth': np.arange(3,15,1),'subsample': np.linspace(0.5, 1.0,6), 'learning_rate':[0.15,0.1,0.08,0.06,0.04,0.02, 0.01], 'n_estimators': [1000,1300,1500,1800,2000]}
def getTunedParamterOptions(self):
# tuned_parameters = self.__get_intial_model_param()
tuned_parameters = self.__get_model_param()
# tuned_parameters = [{'learning_rate': [0.1,0.05,0.01,0.002],'subsample': [1.0,0.5], 'n_estimators':[15000]}]
# tuned_parameters = [{'n_estimators': [2]}]
return tuned_parameters
示例3: boosting_optimization
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
def boosting_optimization(X_train, y_train, X_test, y_test):
gbm = GradientBoostingRegressor(n_estimators=3000, max_depth=10)
gbm.fit(X_train, y_train)
pred = gbm.predict(X_test)
print "feature importances: "
print pd.Series(gbm.feature_importances_, index=datasets.load_boston().feature_names)
print "staged predict: {}".format(gbm.staged_predict(X_train))
print "predict: {}".format(gbm.predict(X_test))
print y_test
示例4: test_gbm_regressor_backupsklearn
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
def test_gbm_regressor_backupsklearn(backend='auto'):
df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True)
X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
import h2o4gpu
Solver = h2o4gpu.GradientBoostingRegressor
#Run h2o4gpu version of RandomForest Regression
gbm = Solver(backend=backend, random_state=1234)
print("h2o4gpu fit()")
gbm.fit(X, y)
#Run Sklearn version of RandomForest Regression
from sklearn.ensemble import GradientBoostingRegressor
gbm_sk = GradientBoostingRegressor(random_state=1234, max_depth=3)
print("Scikit fit()")
gbm_sk.fit(X, y)
if backend == "sklearn":
assert (gbm.predict(X) == gbm_sk.predict(X)).all() == True
print((a == b for a, b in zip(gbm.staged_predict(X), gbm_sk.staged_predict(X))))
assert np.allclose(list(gbm.staged_predict(X)), list(gbm_sk.staged_predict(X)))
assert (gbm.score(X, y) == gbm_sk.score(X, y)).all() == True
assert (gbm.apply(X) == gbm_sk.apply(X)).all() == True
print("Estimators")
print(gbm.estimators_)
print(gbm_sk.estimators_)
print("loss")
print(gbm.loss_)
print(gbm_sk.loss_)
assert gbm.loss_.__dict__ == gbm_sk.loss_.__dict__
print("init_")
print(gbm.init)
print(gbm_sk.init)
print("Feature importance")
print(gbm.feature_importances_)
print(gbm_sk.feature_importances_)
assert (gbm.feature_importances_ == gbm_sk.feature_importances_).all() == True
print("train_score_")
print(gbm.train_score_)
print(gbm_sk.train_score_)
assert (gbm.train_score_ == gbm_sk.train_score_).all() == True
示例5: int
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
offset = int(x_train.shape[0] * 0.9)
x_train_fit, y_train_fit = x_train[:offset], y_train[:offset]
x_test_fit, y_test_fit = x_train[offset:], y_train[offset:]
clf = GradientBoostingRegressor(n_estimators = 700,max_depth=6,learning_rate = 0.01,subsample=1.0 )
clf.fit(x_train_fit, y_train_fit)
rmse = fmean_squared_error(y_test_fit,clf.predict(x_test_fit))
print("RMSE: %.4f" % rmse)
#ploting, learning rate 0.01 see which n_estimatores should choose, then grid search the max_depth, then do bagging on that
test_score = np.zeros((700,), dtype=np.float64)
for i, y_pred in enumerate(clf.staged_predict(x_test_fit)):
test_score[i] = clf.loss_(y_test_fit, y_pred)
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title('Deviance')
plt.plot(np.arange(700) + 1, clf.train_score_, 'b-',
label='Training Set Deviance')
plt.plot(np.arange(700) + 1, test_score, 'r-',
label='Test Set Deviance')
plt.legend(loc='upper right')
plt.xlabel('Boosting Iterations')
plt.ylabel('Deviance')
plt.show()
from sklearn import pipeline, grid_search
示例6: plot_data
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
x_plot = np.linspace(0, 10, 500)
def plot_data(figsize=(8, 5)):
fig = plt.figure(figsize=figsize)
gt = plt.plot(x_plot, ground_truth(x_plot), alpha=0.4, label="ground truth")
plt.scatter(X_train, y_train, s=100, alpha=0.4)
plt.scatter(X_test, y_test, s=10, alpha=0.4, color="red")
plt.xlim((0, 10))
plt.ylabel("y")
plt.xlabel("x")
plot_data(figsize=(8, 5))
# create a gbm classifier to plot against sample
est = GradientBoostingRegressor(n_estimators=1000, max_depth=1, learning_rate=1.0).fit(X_train, y_train)
ax = plt.gca()
first = True
for pred in islice(est.staged_predict(x_plot[:, np.newaxis]), 0, 1000, 10):
plt.plot(x_plot, pred, color="r", alpha=0.2)
if first:
ax.annotate("High bias - low variance", xy=(x_plot[x_plot.shape[0] // 2],))
plt.plot(x_plot, est.predict(x_plot[:, np.newaxis]), label="RT max_depth=1", color="g", alpha=0.9, linewidth=2)
est = DecisionTreeRegressor(max_depth=3).fit(X_train, y_train)
plt.plot(x_plot, est.predict(x_plot[:, np.newaxis]), label="RT max_depth=3", color="g", alpha=0.7, linewidth=1)
plt.legend(loc="upper left")
plt.show()
示例7: train_test_split
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
plt.show()
X_train, X_val, y_train, y_val = train_test_split(X, y)
gbrt = GradientBoostingRegressor(
max_depth=2,
n_estimators=120,
learning_rate=0.1,
random_state=42)
gbrt.fit(X_train, y_train)
errors = [mean_squared_error(y_val, y_pred) for y_pred in gbrt.staged_predict(X_val)]
print(errors)
best_n_estimators = np.argmin(errors)
min_error = errors[best_n_estimators]
gbrt_best = GradientBoostingRegressor(
max_depth=2,
n_estimators=best_n_estimators,
learning_rate=0.1,
random_state=42)
gbrt_best.fit(X_train, y_train)
示例8: GradientBoostingRegressor
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
x_test,y_test = X.tail(X.__len__()-200),Y.tail(X.__len__()-200)
# GBM parametor 설정(이부분을 R 소스와 유사하게 설정해주어야 하는데 잘 모르겠음.;;)
params = {'n_estimators': 6000, 'max_depth': 1, 'learning_rate': 0.01, 'warm_start': False, 'loss':'lad'}
# model fit
gbm0 = GradientBoostingRegressor(**params)
gbm0.fit(x_train, y_train)
# score list 생성(estimators 크기만큼)
test_score = np.zeros((params['n_estimators'],), dtype=np.float64)
bestY_pred = []
# iteration 하면서 가장 score(deviation값)가 낮은(?) 값이 최적값...인듯?
for i, y_pred in enumerate(gbm0.staged_predict(x_test)):
test_score[i] = gbm0.loss_(y_test, y_pred)
if i > 0 and test_score[i] < test_score[i-1]:
bestY_pred = y_pred
# bestY_pred가 최적의 예측 데이터인듯... bestY_pred를 예측 셋으로 넘겨서 사용하면 됨
print(bestY_pred)
########################################################################
# 여기서 부터는 옵션으로 plot 해보기
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title('Deviance')
plt.plot(np.arange(params['n_estimators']) + 1, gbm0.train_score_, 'b-', label='Training Set Deviance')
示例9: mean_squared_error
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
mse = mean_squared_error(y_test, clf.predict(X_test))
print("RMSE: %.4f" % np.sqrt(mse))
clf_full_data = joblib.load('model/model.pkl')
print 'Generating graphs - partial dependance...'
for idx, x in enumerate(features):
fig, axs = partial_dependence.plot_partial_dependence(clf_full_data, X, [features[idx]], feature_names=list(features))
fig.savefig('graphs/_%s.png' %x.lower().replace(' ', '_'))
###############################################################################
# Plot training deviance
# compute test set deviance
test_score = np.zeros((params['n_estimators'],), dtype=np.int64)
for i, y_pred in enumerate(clf.staged_predict(X_test)):
test_score[i] = clf.loss_(y_test, y_pred)
deviance_plot = plt
deviance_plot.figure(figsize=(12, 6))
deviance_plot.title('Deviance')
deviance_plot.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-',
label='Training Set Deviance')
deviance_plot.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',
label='Test Set Deviance')
deviance_plot.legend(loc='upper right')
deviance_plot.xlabel('Boosting Iterations')
deviance_plot.ylabel('Deviance')
deviance_plot.savefig('graphs/deviance.png')
示例10: gbdt_solver
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import staged_predict [as 别名]
#.........这里部分代码省略.........
#Transform predict
tmp_train_y = train_y
tmp_validation_y = validation_y
train_y = Transform(train_y, transform_type, train_last_month_plays)
validation_y = Transform(validation_y, transform_type, validation_last_month_plays)
"""
params = {
'n_estimators': 0,
'learning_rate': 0.03,
'random_state': 1000000007,
'max_depth': 3,
'verbose' : 2,
'warm_start': True
}
max_num_round = 100
batch = 10
best_val = -1e60
history_validation_val = []
best_num_round = -1
curr_round = 0
assert max_num_round % batch == 0
gb = GradientBoostingRegressor(**params)
for step in xrange(max_num_round / batch) :
train_x = train_x.copy(order='C')
train_y = train_y.copy(order='C')
gb.n_estimators += batch
logging.info('current round is: %d' % curr_round)
#gb.set_params(**params)
gb.fit(train_x, train_y)
curr_round += batch
predict = gb.predict(validation_x)
#detransform to plays
predict=Convert2Plays(predict, transform_type)
predict = HandlePredict(predict.tolist(), validation_song_id)
curr_val = evaluate.evaluate(predict, validation_y.tolist(), validation_artist_id, validation_month, validation_label_day)
history_validation_val.append(curr_val)
logging.info('the current score is %.10f' % curr_val)
if curr_round >= 100 and curr_val > best_val:
best_num_round = curr_round
best_val = curr_val
joblib.dump(gb, filepath + '/model/gbdt.pkl')
logging.info('the best round is %d, the score is %.10f' % (best_num_round, best_val))
gb = joblib.load(filepath + '/model/gbdt.pkl')
predict = gb.predict(validation_x)
#detransform to plays
predict=Convert2Plays(predict, transform_type)
with open(filepath + '/parameters.param', 'w') as out :
for key, val in params.items():
out.write(str(key) + ': ' + str(val) + '\n')
out.write('max_num_round: '+str(max_num_round)+'\n')
out.write('best_num_round: '+str(best_num_round)+'\n')
out.write('transform_type: '+str(transform_type)+'\n')
# unable to use matplotlib if used multiprocessing
if validation_y.shape[0] and False :
logging.info('the loss in Training set is %.4f' % loss_function(train_y, gb.predict(train_x)))
logging.info('the loss in Validation set is %.4f' % loss_function(validation_y, gb.predict(validation_x)))
plt.figure(figsize=(12, 6))
# Plot feature importance
plt.subplot(1, 2, 1)
if (feature_names) == 0:
feature_names = [str(i + 1) for i in xrange(validation_x.shape[0])]
feature_names = np.array(feature_names)
feature_importance = gb.feature_importances_
feature_importance = 100.0 * (feature_importance / feature_importance.max())
sorted_idx = np.argsort(feature_importance)
pos = np.arange(sorted_idx.shape[0]) + .5
plt.barh(pos, feature_importance[sorted_idx], align='center')
plt.yticks(pos, feature_names[sorted_idx])
plt.xlabel('Relative Importance')
plt.title('Variable Importance')
# Plot training deviance
plt.subplot(1, 2, 2)
test_score = np.zeros((params['n_estimators'],), dtype=np.float64)
for i, y_pred in enumerate(gb.staged_predict(validation_x)):
test_score[i] = loss_function(validation_y, y_pred)
plt.title('Deviance')
plt.plot(np.arange(params['n_estimators']) + 1, gb.train_score_, 'b-',
label='Training Set Deviance')
plt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',
label='Test Set Deviance')
plt.legend(loc='upper right')
plt.xlabel('Boosting Iterations')
plt.ylabel('Deviance')
plt.savefig(filepath + '/statistics.jpg')
return predict, Transform(gb.predict(test_x), transform_type)