本文整理汇总了Python中sklearn.ensemble.GradientBoostingRegressor.fit方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingRegressor.fit方法的具体用法?Python GradientBoostingRegressor.fit怎么用?Python GradientBoostingRegressor.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.GradientBoostingRegressor
的用法示例。
在下文中一共展示了GradientBoostingRegressor.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: cross_val_cols
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def cross_val_cols(self, n_folds = 3):
"""
Takes in: number of folds
Prints out RMSE score and stores the results in self.results
"""
cv = KFold(n = self.X_train.shape[0], n_folds = n_folds)
gbr = GradientBoostingRegressor(**self.params)
self.med_error = []
self.rmse_cv = []
self.pct_error=[]
self.results = {'pred': [],
'real': []}
for train, test in cv:
gbr.fit(self.X_train[train], self.y_train[train])
dfFeatures+=[unencode(pd.DataFrame(columns=final_cols[:-1], data=self.X_train[test]))]
pred = gbr.predict(self.X_train[test])
medError=median_absolute_error(predExp, testExp)
percentError=np.median([np.fabs(p-t)/t for p,t in zip(predExp, testExp)])
error = mean_squared_error(np.power(pred, 10), np.power(self.y_train[test], 10))**0.5
self.inFeatures=(self.X_train[test])
self.results['pred'] += list(predExp)
self.results['real'] += list(testExp)
self.rmse_cv += [error]
self.med_error+=[medError]
self.pct_error+=[percentError]
print 'Abs Median Error:', np.mean(self.med_error)
print 'Abs Percent Error:', np.mean(self.pct_error)
print 'Mean RMSE:', np.mean(self.rmse_cv)
self.valDf=pd.DataFrame.concat(dfFeatures)
self.valDf= self.valDf.reset_index().drop('index', axis = 1)
self.valDf['pred']=self.results['pred']
self.valDf['real']=self.results['real']
return self.valDf
示例2: check_boston
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def check_boston(presort, loss, subsample):
# Check consistency on dataset boston house prices with least squares
# and least absolute deviation.
ones = np.ones(len(boston.target))
last_y_pred = None
for sample_weight in None, ones, 2 * ones:
clf = GradientBoostingRegressor(n_estimators=100,
loss=loss,
max_depth=4,
subsample=subsample,
min_samples_split=2,
random_state=1,
presort=presort)
assert_raises(ValueError, clf.predict, boston.data)
clf.fit(boston.data, boston.target,
sample_weight=sample_weight)
leaves = clf.apply(boston.data)
assert_equal(leaves.shape, (506, 100))
y_pred = clf.predict(boston.data)
mse = mean_squared_error(boston.target, y_pred)
assert_less(mse, 6.0)
if last_y_pred is not None:
assert_array_almost_equal(last_y_pred, y_pred)
last_y_pred = y_pred
示例3: train
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def train(self, x, y, param_names, **kwargs):
start = time.time()
scaled_x = self._set_and_preprocess(x=x, param_names=param_names)
# Check that each input is between 0 and 1
self._check_scaling(scaled_x=scaled_x)
if self._debug:
print "Shape of training data: ", scaled_x.shape
print "Param names: ", self._used_param_names
print "First training sample\n", scaled_x[0]
print "Encode: ", self._encode
# Do a random search
max_features, learning_rate, max_depth, min_samples_leaf, n_estimators = self._random_search(random_iter=100,
x=scaled_x, y=y)
# Now train model
gb = GradientBoostingRegressor(loss='ls',
learning_rate=learning_rate,
n_estimators=n_estimators,
subsample=1.0,
min_samples_split=2,
min_samples_leaf=min_samples_leaf,
max_depth=max_depth,
init=None,
random_state=self._rng,
max_features=max_features,
alpha=0.9,
verbose=0)
gb.fit(scaled_x, y)
self._model = gb
duration = time.time() - start
self._training_finished = True
return duration
示例4: test_plot_partial_dependence
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def test_plot_partial_dependence():
# Test partial dependence plot function.
clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
clf.fit(boston.data, boston.target)
grid_resolution = 25
fig, axs = plot_partial_dependence(clf, boston.data, [0, 1, (0, 1)],
grid_resolution=grid_resolution,
feature_names=boston.feature_names)
assert len(axs) == 3
assert all(ax.has_data for ax in axs)
# check with str features and array feature names
fig, axs = plot_partial_dependence(clf, boston.data, ['CRIM', 'ZN',
('CRIM', 'ZN')],
grid_resolution=grid_resolution,
feature_names=boston.feature_names)
assert len(axs) == 3
assert all(ax.has_data for ax in axs)
# check with list feature_names
feature_names = boston.feature_names.tolist()
fig, axs = plot_partial_dependence(clf, boston.data, ['CRIM', 'ZN',
('CRIM', 'ZN')],
grid_resolution=grid_resolution,
feature_names=feature_names)
assert len(axs) == 3
assert all(ax.has_data for ax in axs)
示例5: test_gradient_boosting_early_stopping
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def test_gradient_boosting_early_stopping():
X, y = make_classification(n_samples=1000, random_state=0)
gbc = GradientBoostingClassifier(n_estimators=1000,
n_iter_no_change=10,
learning_rate=0.1, max_depth=3,
random_state=42)
gbr = GradientBoostingRegressor(n_estimators=1000, n_iter_no_change=10,
learning_rate=0.1, max_depth=3,
random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y,
random_state=42)
# Check if early_stopping works as expected
for est, tol, early_stop_n_estimators in ((gbc, 1e-1, 24), (gbr, 1e-1, 13),
(gbc, 1e-3, 36),
(gbr, 1e-3, 28)):
est.set_params(tol=tol)
est.fit(X_train, y_train)
assert_equal(est.n_estimators_, early_stop_n_estimators)
assert est.score(X_test, y_test) > 0.7
# Without early stopping
gbc = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1,
max_depth=3, random_state=42)
gbc.fit(X, y)
gbr = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1,
max_depth=3, random_state=42)
gbr.fit(X, y)
assert gbc.n_estimators_ == 100
assert gbr.n_estimators_ == 200
示例6: grid_search
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def grid_search():
results_list_of_tuples = list()
num_folds = 3
best_result = tuple()
for item1 in gd_grid['learning_rate']:
for item2 in gd_grid['max_depth']:
for item3 in gd_grid['min_samples_leaf']:
for item4 in gd_grid['n_estimators']:
for item5 in gd_grid['random_state']:
instance = 'LR {}, max_depth {}, min_samp_leaf {}, n_est {}, rs {}'.format(item1, item2, item3, item4, item5)
print instance
gbrt = GradientBoostingRegressor(random_state=item5, n_estimators=item4, min_samples_leaf=item3, max_depth=item2, learning_rate=item1 )
kf = KFold(X.shape[0], n_folds=num_folds)
mse_list = []
for train_index, test_index in kf:
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
w_train, w_test = weights[train_index], weights[test_index]
gbrt.fit(X_train, y_train, w_train)
y_pred = gbrt.predict(X_test)
mse = mean_squared_error(y_test, y_pred, sample_weight=w_test)
mse_list.append(mse)
kf_mse = np.mean(np.array(mse_list))
results_list_of_tuples.append((instance, kf_mse))
return results_list_of_tuples
示例7: test_boston
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def test_boston():
# Check consistency on dataset boston house prices with least squares
# and least absolute deviation.
for loss in ("ls", "lad", "huber"):
for subsample in (1.0, 0.5):
last_y_pred = None
for i, sample_weight in enumerate(
(None, np.ones(len(boston.target)),
2 * np.ones(len(boston.target)))):
clf = GradientBoostingRegressor(n_estimators=100, loss=loss,
max_depth=4, subsample=subsample,
min_samples_split=1,
random_state=1)
assert_raises(ValueError, clf.predict, boston.data)
clf.fit(boston.data, boston.target,
sample_weight=sample_weight)
y_pred = clf.predict(boston.data)
mse = mean_squared_error(boston.target, y_pred)
assert mse < 6.0, "Failed with loss %s and " \
"mse = %.4f" % (loss, mse)
if last_y_pred is not None:
np.testing.assert_array_almost_equal(
last_y_pred, y_pred,
err_msg='pred_%d doesnt match last pred_%d for loss %r and subsample %r. '
% (i, i - 1, loss, subsample))
last_y_pred = y_pred
示例8: boost2
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def boost2():
minimum_mse = 1000000000
min_depth = 0
X = pd.read_csv('../data/kaggle/kaggle.X1.train.txt', header=None)
Y = pd.read_csv('../data/kaggle/kaggle.Y.train.txt', header=None)
Xtest = pd.read_csv('../data/kaggle/kaggle.X1.test.txt', header=None)
Xtr, Xte, Ytr, Yte = train_test_split(X, Y, test_size=0.25, random_state=42)
for estimators in range(700, 2000, 100):
print "For estimators: ", estimators
for i in range(6, 8):
print "For max_depth: ", i
est = GradientBoostingRegressor(n_estimators=estimators, max_depth=i, min_samples_leaf=500, warm_start=True)
est.fit(Xtr, Ytr)
Yhat = est.predict(Xte)
current_mse = mean_squared_error(Yte, Yhat)
print "For MaxDepth:", i, ", MSE:", current_mse
if minimum_mse > current_mse:
minimum_mse = current_mse
min_depth = i
min_estimator = estimators
est2 = GradientBoostingRegressor(n_estimators=min_estimator, max_depth=min_depth, min_samples_leaf=500,
warm_start=True, verbose=True)
est2.fit(X, Y)
print "** minimum_mse: ", minimum_mse
print "** min_depth: ", min_depth
print "** min_estimator: ", min_estimator
pred = est2.predict(Xtest)
s = pd.Series(pred)
s.index = s.index + 1
s.to_csv('pyprediction.csv', header=['Prediction'], index=True, index_label='ID')
示例9: compute_photoz_forest
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def compute_photoz_forest(N_boosts):
rms_test = np.zeros(len(N_boosts))
rms_train = np.zeros(len(N_boosts))
i_best = 0
z_fit_best = None
for i, Nb in enumerate(N_boosts):
try:
# older versions of scikit-learn
clf = GradientBoostingRegressor(n_estimators=Nb, learn_rate=0.1,
max_depth=3, random_state=0)
except TypeError:
clf = GradientBoostingRegressor(n_estimators=Nb, learning_rate=0.1,
max_depth=3, random_state=0)
clf.fit(mag_train, z_train)
z_fit_train = clf.predict(mag_train)
z_fit = clf.predict(mag_test)
rms_train[i] = np.mean(np.sqrt((z_fit_train - z_train) ** 2))
rms_test[i] = np.mean(np.sqrt((z_fit - z_test) ** 2))
if rms_test[i] <= rms_test[i_best]:
i_best = i
z_fit_best = z_fit
return rms_test, rms_train, i_best, z_fit_best
示例10: build_models
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def build_models(self):
self.remove_columns(
[
"institute_latitude",
"institute_longitude",
"institute_state",
"institute_country",
"var10",
"var11",
"var12",
"var13",
"var14",
"var15",
"instructor_past_performance",
"instructor_association_industry_expert",
"secondary_area",
"var24",
]
)
model1 = GradientBoostingRegressor(learning_rate=0.1, n_estimators=200, subsample=0.8)
model2 = RandomForestRegressor(n_estimators=50)
model3 = ExtraTreesRegressor(n_estimators=50)
model1.fit(self.X, self.y)
model2.fit(self.X, self.y)
model3.fit(self.X, self.y)
return [model1, model2, model3]
示例11: fit
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def fit(filename, treename, inputsname, targetname, workingpoint=0.9, test=False):
# Reading inputs and targets
ninputs = len(inputsname)
branches = copy.deepcopy(inputsname)
branches.append(targetname)
data = root2array(filename, treename=treename, branches=branches)
data = data.view((np.float64, len(data.dtype.names)))
# Extract and format inputs and targets from numpy array
inputs = data[:, range(ninputs)].astype(np.float32)
targets = data[:, [ninputs]].astype(np.float32).ravel()
# if test requested, use 60% of events for training and 40% for testing
inputs_train = inputs
targets_train = targets
if test:
inputs_train, inputs_test, targets_train, targets_test = cross_validation.train_test_split(inputs, targets, test_size=0.4, random_state=0)
# Define and fit quantile regression (quantile = workingpoint)
# Default training parameters are used
regressor = GradientBoostingRegressor(loss='quantile', alpha=workingpoint)
regressor.fit(inputs_train, targets_train)
if test:
# Compare regression prediction with the true value and count the fraction of time it falls below
# This should give the working point value
predict_test = regressor.predict(inputs_test)
compare = np.less(targets_test, predict_test)
print 'Testing regression with inputs', inputsname, 'and working point', workingpoint
print ' Test efficiency =', float(list(compare).count(True))/float(len(compare))
# TODO: add 1D efficiency graphs vs input variables
return regressor
示例12: modelTheData
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def modelTheData(data,target):
# params = {'n_estimators': 400, 'max_depth': 4, 'min_samples_split': 2,
# 'subsample': 0.5,'min_samples_leaf': 2,
# 'learning_rate': 0.01, 'loss': 'ls'}
#beijing
myMachine = GradientBoostingRegressor(alpha=0.9, init=None, learn_rate=None,
learning_rate=0.05, loss='ls', max_depth=1, max_features=None,
min_samples_leaf=2, min_samples_split=2, n_estimators=300,
random_state=None, subsample=0.5, verbose=0)
#shanghai
# myMachine = GradientBoostingRegressor(alpha=0.9, init=None, learn_rate=None,
# learning_rate=0.05, loss='ls', max_depth=3, max_features=None,
# min_samples_leaf=2, min_samples_split=2, n_estimators=500,
# random_state=None, subsample=0.5, verbose=0)
# myMachine = GradientBoostingRegressor(**params)
myMachine.fit(data,target)
return myMachine
示例13: train
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def train(targets, features, model_file, params):
model = GradientBoostingRegressor(**params)
print "Training hard..."
model.fit(features, targets)
print "Saving model..."
pickle.dump(model, open(model_file, 'wb'))
return model
示例14: train_model
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def train_model(features, label, params):
#Preprocessing
#scaled_features = preprocessing.scale(features);
scaled_features = features;
total_rmse = 0.0;
count = 0;
kf = KFold(len(scaled_features), n_folds=10);
for train_index, validation_index in kf:
X_train, X_validation = scaled_features[train_index], scaled_features[validation_index];
Y_train, Y_validation = label[train_index], label[validation_index];
#estimator = SVR(**params)
#estimator = RandomForestRegressor(**params)
estimator = GradientBoostingRegressor(**params)
estimator.fit(X_train, Y_train);
current_rmse = calculate_RMSE(estimator, X_validation, Y_validation);
total_rmse += current_rmse;
count += 1;
#Average across all samples
avg_current_rmse = total_rmse / float(count);
print("Avg Current RMSE " + str(avg_current_rmse));
return (params, avg_current_rmse);
示例15: gradient_boosting_regressor
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def gradient_boosting_regressor(train_x, train_y, pred_x, review_id, v_curve=False, l_curve=False, get_model=True):
"""
:param train_x: train
:param train_y: text
:param pred_x: test set to predict
:param review_id: takes in a review id
:param v_curve: run the model for validation curve
:param l_curve: run the model for learning curve
:param get_model: run the model
:return:the predicted values,learning curve, validation curve
"""
gbr = GradientBoostingRegressor(n_estimators=200, max_depth=7, random_state=7)
if get_model:
print "Fitting GBR..."
gbr.fit(train_x, np.log(train_y+1))
gbr_pred = np.exp(gbr.predict(pred_x))- 1
#dealing with
for i in range(len(gbr_pred)):
if gbr_pred[i] < 0:
gbr_pred[i] = 0
Votes = gbr_pred[:, np.newaxis]
Id = np.array(review_id)[:, np.newaxis]
submission_gbr = np.concatenate((Id,Votes),axis=1)
np.savetxt("submission_gbr.csv", submission_gbr,header="Id,Votes", delimiter=',',fmt="%s, %0.2f", comments='')
# plot validation and learning curves
if v_curve:
print "Working on Validation Curves"
plot_validation_curve(GradientBoostingRegressor(), "Validation Curve: GBR", train_x, np.log(train_y+1.0),
param_name="n_estimators", param_range=[5, 20, 60, 100, 150, 200])
if l_curve:
print "Working on Learning Curves"
plot_learning_curve(GradientBoostingRegressor(), "Learning Curve: GBR", train_x, np.log(train_y+1.0))