本文整理汇总了Python中sklearn.ensemble.GradientBoostingRegressor.set_params方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingRegressor.set_params方法的具体用法?Python GradientBoostingRegressor.set_params怎么用?Python GradientBoostingRegressor.set_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.GradientBoostingRegressor
的用法示例。
在下文中一共展示了GradientBoostingRegressor.set_params方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: fit
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import set_params [as 别名]
def fit(self, X, y):
"""Fit one regressor for each quantile.
Parameters
----------
* `X` [array-like, shape=(n_samples, n_features):
Training vectors, where `n_samples` is the number of samples
and `n_features` is the number of features.
* `y` [array-like, shape=(n_samples,)]:
Target values (real numbers in regression)
"""
rng = check_random_state(self.random_state)
if self.base_estimator is None:
base_estimator = GradientBoostingRegressor(loss='quantile')
else:
base_estimator = self.base_estimator
if not isinstance(base_estimator, GradientBoostingRegressor):
raise ValueError('base_estimator has to be of type'
' GradientBoostingRegressor.')
if not base_estimator.loss == 'quantile':
raise ValueError('base_estimator has to use quantile'
' loss not %s' % base_estimator.loss)
# The predictions for different quantiles should be sorted.
# Therefore each of the regressors need the same seed.
base_estimator.set_params(random_state=rng)
regressors = []
for q in self.quantiles:
regressor = clone(base_estimator)
regressor.set_params(alpha=q)
regressors.append(regressor)
self.regressors_ = Parallel(n_jobs=self.n_jobs, backend='threading')(
delayed(_parallel_fit)(regressor, X, y)
for regressor in regressors)
return self
示例2: _gradient_boosting_regressor
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import set_params [as 别名]
def _gradient_boosting_regressor(self, update=False):
if self.simulation:
return
far_past_index = int(np.interp(self.t_far_past, self.ts_t, self.indices))
t_ensemble = np.atleast_2d(self.ts_t[far_past_index:]).T
x_ensemble = np.atleast_2d(self.ts_x[far_past_index:]).T.ravel()
t_ensemble_future = np.atleast_2d(np.linspace(self.t_far_past, self.t_present, 10000)).T
alpha = 0.95
clf = GradientBoostingRegressor(loss='quantile', alpha=alpha,
n_estimators=250, max_depth=3,
learning_rate=.1, min_samples_leaf=9,
min_samples_split=20)
clf.fit(t_ensemble, x_ensemble)
# Make the prediction on the meshed x-axis
y_upper = clf.predict(t_ensemble_future)
clf.set_params(alpha=1.0 - alpha)
clf.fit(t_ensemble, x_ensemble)
# Make the prediction on the meshed x-axis
y_lower = clf.predict(t_ensemble_future)
clf.set_params(loss='ls')
clf.fit(t_ensemble, x_ensemble)
# Make the prediction on the meshed x-axis
y_prediction = clf.predict(t_ensemble_future)
self.plh_gbr = self.figure.plot(t_ensemble_future, y_prediction, 'r-')
self.plh_gbr.extend(self.figure.plot(t_ensemble_future, y_upper, 'k-'))
self.plh_gbr.extend(self.figure.plot(t_ensemble_future, y_lower, 'k-'))
self.plh_gbr.extend(self.figure.fill(
np.concatenate([t_ensemble_future, t_ensemble_future[::-1]]), np.concatenate([y_upper, y_lower[::-1]]),
alpha=.5, fc='b', ec='None'))
if not self._tp['pl_gbr']:
self.figure.hide_line(ploth=self.plh_gbr)
self.figure.legend(loc='upper left')
self.figure.draw()
示例3: print
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import set_params [as 别名]
print("Loading the data...")
tic = time()
X_train, y_train, qid_train = joblib.load(data_filenames['train'],
mmap_mode='r')
X_vali, y_vali, qid_vali = joblib.load(data_filenames['validation'],
mmap_mode='r')
# warm up (load the data from the drive)
X_train.max(), X_vali.max()
data_load_time = time() - tic
print("done in{:.3f}s".format(data_load_time))
print("Training the model with parameters:")
print(parameters)
tic = time()
model = GradientBoostingRegressor(random_state=0)
model.set_params(**parameters)
model.fit(X_train, y_train)
training_time = time() - tic
print("done in{:.3f}s".format(training_time))
print("Computing training [email protected]")
tic = time()
y_pred = model.predict(X_train)
prediction_time = time() - tic
train_score = mean_ndcg(y_train, y_pred, qid_train)
print("{:.3f}".format(train_score))
print("done in{:.3f}s".format(prediction_time))
print("Computing validation [email protected]")
y_pred = model.predict(X_vali)
validation_score = mean_ndcg(y_vali, y_pred, qid_vali)
示例4: GradientBoostingRegressor
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import set_params [as 别名]
xx = np.atleast_2d(np.linspace(0, 10, 1000)).T
xx = xx.astype(np.float32)
alpha = 0.95
clf = GradientBoostingRegressor(loss='quantile', alpha=alpha,
n_estimators=250, max_depth=3,
learning_rate=.1, min_samples_leaf=9,
min_samples_split=9)
clf.fit(X, y)
# Make the prediction on the meshed x-axis
y_upper = clf.predict(xx)
clf.set_params(alpha=1.0 - alpha)
clf.fit(X, y)
# Make the prediction on the meshed x-axis
y_lower = clf.predict(xx)
clf.set_params(loss='ls')
clf.fit(X, y)
# Make the prediction on the meshed x-axis
y_pred = clf.predict(xx)
# Plot the function, the prediction and the 95% confidence interval based on
# the MSE
fig = plt.figure()
plt.plot(xx, f(xx), 'g:', label=u'$f(x) = x\,\sin(x)$')
示例5: make_friedman1
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import set_params [as 别名]
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_friedman1
from sklearn.ensemble import GradientBoostingRegressor
X, y = make_friedman1(n_samples=1200, random_state=0, noise=1.0)
X_train, X_test = X[:200], X[200:]
y_train, y_test = y[:200], y[200:]
est = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1,
max_depth=1, random_state=0, loss='ls').fit(X_train, y_train)
_ = est.set_params(n_estimators=200, warm_start=True) # set warm_start and new nr of trees
_ = est.fit(X_train, y_train) # fit additional 100 trees to est
print mean_squared_error(y_test, est.predict(X_test))
# 3.84...
示例6: sorted
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import set_params [as 别名]
clf=grid_search.GridSearchCV(est,parameters)
print 'performing grid_searching...'
print 'parameters:'
from time import time
t0=time()
clf.fit(train_data,train_labels)
print 'grid_searching takes %0.3fs'%(time()-t0)
best_parameters=clf.best_params_
for para_name in sorted(parameters.keys()):
print para_name
print best_parameters[para_name]
#
#
#
est.set_params(learning_rate=best_parameters['learning_rate'],
loss=best_parameters['loss'],max_depth=best_parameters['max_depth'],n_estimators=best_parameters['n_estimators'])
est.fit(train_data,train_labels)
print '保存model....'
from sklearn.externals import joblib
model_name = './model/2015-11-29/traffic_GBDT_'+line+'.model'
joblib.dump(est,model_name)
# validation procee
est = joblib.load('./model/2015-11-29/traffic_GBDT_'+line+'.model')
sum = 0.0
for i in range(200):
val_train_data,val_test_data,val_train_labels,val_test_labels=train_test_split(data,labels,test_size=7*15)
predict_labels = est.predict(val_test_data)
# print predict_labels
error = compute_error(predict_labels,val_test_labels)
示例7: load_boston
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import set_params [as 别名]
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.grid_search import GridSearchCV
from sklearn.datasets import load_boston
data = load_boston()
x = data["data"]
y = data["target"]
# tune hyper parameters first
model = GradientBoostingRegressor(n_estimators=3000)
parameters = {
"learning_rate": [0.1, 0.05, 0.02, 0.01],
"max_depth": [4, 6],
"min_samples_leaf": [3, 5, 9, 17],
"max_features": [1.0, 0.3, 0.1],
}
gscv = GridSearchCV(model, parameters, verbose=10, n_jobs=-1, cv=4)
gscv.fit(x, y)
print "best score=", gscv.best_score_
# tune learning rate with higher n_estimators
model = gscv.best_estimator_
model.set_params(n_estimators=100000)
parameters = {"learning_rate": [0.1, 0.05, 0.02, 0.01]}
gscv = GridSearchCV(model, parameters, verbose=10, n_jobs=-1, cv=4)
gscv.fit(x, y)
print "best score=", gscv.best_score_
示例8: SVR
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import set_params [as 别名]
# predict with SVR
svr = SVR()
svr.set_params(**pickle.load(open("svr.p", "rb" )))
svr.fit(X1, Y1)
Y_svr = svr.predict(X2)
# predict with RF
rfr = RandomForestRegressor(n_estimators = 1000)
rfr.set_params(**pickle.load(open("rfr.p", "rb" )))
rfr.fit(X1, Y1)
Y_rfr = rfr.predict(X2)
# predict with GBT
gbr = GradientBoostingRegressor(n_estimators=3000)
gbr.set_params(**pickle.load(open("gbr.p", "rb" )))
gbr.fit(X1, Y1)
Y_gbr = gbr.predict(X2)
# stacking
for alpha in np.logspace(-10, 10, 21, base=2):
for beta in np.logspace(-10, 10, 21, base=2):
y_pred = Y_svr + alpha * Y_rfr + beta * Y_gbr
y_rank = convertScore(y_pred,
[0.0, 0.0761961015948, 0.221500295334, 0.392498523331, 1.0])
if (alpha, beta) not in scores:
scores[(alpha, beta)] = 0.0
scores[(alpha, beta)] += getKappa(Y2, y_rank)
##############################################################
示例9: StandardScaler
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import set_params [as 别名]
ids = test["id"]
test = test.drop('id', axis=1)
X = train.values
Xt = test.values
scaler = StandardScaler()
X = scaler.fit_transform(X)
Xt = scaler.transform(Xt)
gbr = GradientBoostingRegressor(n_estimators = 1000)
parameters = {'learning_rate' : [0.1, 0.05, 0.02, 0.01],
'max_depth': [2, 4, 6],
'min_samples_leaf': [1, 3, 5, 10, 15],
'subsample': [1.0, 0.8, 0.6, 0.4],
'max_features': [1.0, 0.7, 0.5, 0.3, 0.1]}
gscv = RandomizedSearchCV(gbr, parameters, n_iter=100, verbose=10, n_jobs=-1, cv=3, scoring='mean_squared_error')
gscv.fit(X, Y)
# tune learning rate with larger n_estimators
gbr = gscv.best_estimator_
gbr.set_params(n_estimators=3000)
parameters = {'learning_rate' : [0.1, 0.05, 0.02, 0.01, 0.005, 0.002, 0.001]}
gscv = GridSearchCV(gbr, parameters, verbose=10, n_jobs=-1, cv=3, scoring='mean_squared_error')
gscv.fit(X, Y)
print "best score=", gscv.best_score_
print "best_parameter=", gscv.best_params_
pickle.dump(gscv.best_estimator_.get_params(), open("gbr.p", "wb"))
示例10: print
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import set_params [as 别名]
RF_model.fit(X,y)
print('Best {}'.format(RF_model.best_params_))
print('Performing grid search on GBR')
n_features = X.shape[1]
params = {'max_features':['auto','sqrt','log2'],
'max_depth':[2, 3]}
GBR_model = GridSearchCV(GBR_est, params)
GBR_model.fit(X,y)
print('Best {}'.format(GBR_model.best_params_))
else:
Lin_model = Lin_est.set_params(alpha=100.0)
SVR_model = svr_est.set_params(C=1.0)
RF_model = RF_est.set_params(max_features='auto')
GBR_model = GBR_est.set_params(max_features='auto',
max_depth=3)
#%% Specify set of models to test
model_set = [('Null',LCM.rand_pick_mod()),
('Lin', Lin_model),
('Lin_SVR',SVR_model),
('GBR',GBR_model),
('RF', RF_model)]
# model_set = [('Null',LCM.rand_pick_mod()),
# ('Lin', Lin_model),
# ('RF', RF_model)]
leg_titles = {'Null':'Random\nPicking',
'Lin':'Linear\nModel',
'Lin_SVR':'Linear SVM',
示例11: GradientBoostingRegressorTestPlots
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import set_params [as 别名]
def GradientBoostingRegressorTestPlots(X_train, X_test, y_train, y_test, n_estimators=1000):
"""
An important diagnostic when using GBRT in practise is the so-called deviance
plot that shows the training/testing error (or deviance) as a function of the
number of trees.
"""
def fmt_params(params):
return ", ".join("{0}={1}".format(key, val) for key, val in params.iteritems())
def deviance_plot(est, X_test, y_test, ax=None, label='', train_color='#2c7bb6',
test_color='#d7191c', alpha=1.0):
"""Deviance plot for ``est``, use ``X_test`` and ``y_test`` for test error. """
test_dev = np.empty(n_estimators)
for i, pred in enumerate(est.staged_predict(X_test)):
test_dev[i] = est.loss_(y_test, pred)
if ax is None:
fig = plt.figure(figsize=(8, 5))
ax = plt.gca()
ax.plot(np.arange(n_estimators) + 1, test_dev, color=test_color, label='Test %s' % label,
linewidth=2, alpha=alpha)
ax.plot(np.arange(n_estimators) + 1, est.train_score_, color=train_color,
label='Train %s' % label, linewidth=2, alpha=alpha)
ax.set_ylabel('Error')
ax.set_xlabel('n_estimators')
return test_dev, ax
est = GBR(n_estimators=n_estimators, verbose=1)
est.fit(X_train, y_train)
feature_importance = est.feature_importances_
test_dev, ax = deviance_plot(est, X_test, y_test)
ax.legend(loc='upper right')
ax.annotate('Lowest test error', xy=(test_dev.argmin() + 1, test_dev.min() + 0.02), xycoords='data',
xytext=(150, 1.0), textcoords='data',
arrowprops=dict(arrowstyle="->", connectionstyle="arc"))
plt.savefig('GBRdeviance.pdf')
plt.close()
#sample leaves
fig = plt.figure(figsize=(8, 5))
ax = plt.gca()
for params, (test_color, train_color) in [({'min_samples_leaf': 1},
('#d7191c', '#2c7bb6')),
({'min_samples_leaf': 4},
('#fdae61', '#abd9e9'))]:
est = GBR(n_estimators=n_estimators, verbose=1)
est.set_params(**params)
est.fit(X_train, y_train)
test_dev, ax = deviance_plot(est, X_test, y_test, ax=ax, label=fmt_params(params),
train_color=train_color, test_color=test_color)
plt.legend(loc='upper right')
plt.savefig('GBRTree.pdf')
plt.close()
#lerning rate
fig = plt.figure(figsize=(8, 5))
ax = plt.gca()
for params, (test_color, train_color) in [({'learning_rate': 0.2},
('#d7191c', '#2c7bb6')),
({'learning_rate': 0.7},
('#fdae61', '#abd9e9'))]:
est = GBR(n_estimators=n_estimators, verbose=1)
est.set_params(**params)
est.fit(X_train, y_train)
test_dev, ax = deviance_plot(est, X_test, y_test, ax=ax, label=fmt_params(params),
train_color=train_color, test_color=test_color)
plt.legend(loc='upper right')
plt.savefig('GBRShrinkage.pdf')
plt.close()
#sub-samples
fig = plt.figure(figsize=(8, 5))
ax = plt.gca()
for params, (test_color, train_color) in [({'subsample': 1.},
('#d7191c', '#2c7bb6')),
({'subsample': 0.7},
('#fdae61', '#abd9e9'))]:
est = GBR(n_estimators=n_estimators, verbose=1)
est.set_params(**params)
est.fit(X_train, y_train)
test_dev, ax = deviance_plot(est, X_test, y_test, ax=ax, label=fmt_params(params),
train_color=train_color, test_color=test_color)
plt.legend(loc='upper right')
plt.savefig('GBRSubsample.pdf')
plt.close()
#feature importance
feature_names = ['u', 'g', 'r', 'i', 'z', 'modelmagerr_u', 'modelmagerr_g',
'modelmagerr_r', 'modelmagerr_i', 'modelmagerr_z']
feature_names = np.asarray(feature_names)
feature_importance = 100.0 * (feature_importance / feature_importance.max())
sorted_idx = np.argsort(feature_importance)
pos = np.arange(sorted_idx.shape[0]) + .5
plt.subplot(1, 1, 1)
#.........这里部分代码省略.........
示例12: shuffle
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import set_params [as 别名]
(X,y) = datasets.load_svmlight_file('/Users/amirrahimi/Desktop/doc/visionTools/lasik-2.4/amir/Features/bld_new/all/below_79.selected.building.log.txt')
#(X,y) = datasets.load_svmlight_file('/Users/amirrahimi/Desktop/doc/visionTools/lasik-2.4/amir/Features/gnd/below_79.ground.selected.log.txt')
X_train, y_train = shuffle(X,y,random_state=13)
#(X_test,y_test) = datasets.load_svmlight_file('/Users/amirrahimi/Desktop/doc/visionTools/lasik-2.4/amir/Features/gnd/below_79.ground.rest.log.txt')
(X_test,y_test) = datasets.load_svmlight_file('/Users/amirrahimi/Desktop/doc/visionTools/lasik-2.4/amir/Features/bld_new/all/below_79.rest.building.log.txt')
X_train = X_train.todense()
clf = GBR(n_estimators=200, loss='ls',max_depth=4, min_samples_split = 1, learn_rate = 0.1) # FIXME maxdepth = 8
X_test = X_test.todense()
#clf.fit(X_train, y_train)
#clf = GBR(max_depth=8, min_samples_split = 1)
#tuned_parameters = [{'n_estimators': [100,200,300,400,500], 'loss' : ['ls','lad'], 'learn_rate':[0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1]}]
tuned_parameters = [{'n_estimators': [500,600,700], 'loss' : ['ls'], 'learn_rate':[0.075,0.1,0.25]}]
#tuned_parameters = [{'n_estimators':[100], 'loss' : ['ls','lad'], 'learn_rate':[1]}]
scores = [('mse', mean_squared_error),
('abs_err', mean_abs_err),
('log_err', mean_log_err)
]
grid_list = list(IterGrid(tuned_parameters))
for i in range(len(grid_list)):
clf.set_params(**grid_list[i])
print 'training '+ str(grid_list[i])
clf.fit( X_train, y_train)
y_pred = clf.predict( X_test)
for score_name, score_func in scores:
print score_name + ': ' + str( score_func( y_pred, y_test ) )