本文整理汇总了Python中sklearn.ensemble.GradientBoostingRegressor类的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingRegressor类的具体用法?Python GradientBoostingRegressor怎么用?Python GradientBoostingRegressor使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了GradientBoostingRegressor类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train_model
def train_model(features, label, params):
#Preprocessing
#scaled_features = preprocessing.scale(features);
scaled_features = features;
total_rmse = 0.0;
count = 0;
kf = KFold(len(scaled_features), n_folds=10);
for train_index, validation_index in kf:
X_train, X_validation = scaled_features[train_index], scaled_features[validation_index];
Y_train, Y_validation = label[train_index], label[validation_index];
#estimator = SVR(**params)
#estimator = RandomForestRegressor(**params)
estimator = GradientBoostingRegressor(**params)
estimator.fit(X_train, Y_train);
current_rmse = calculate_RMSE(estimator, X_validation, Y_validation);
total_rmse += current_rmse;
count += 1;
#Average across all samples
avg_current_rmse = total_rmse / float(count);
print("Avg Current RMSE " + str(avg_current_rmse));
return (params, avg_current_rmse);
示例2: test_gradient_boosting_validation_fraction
def test_gradient_boosting_validation_fraction():
X, y = make_classification(n_samples=1000, random_state=0)
gbc = GradientBoostingClassifier(n_estimators=100,
n_iter_no_change=10,
validation_fraction=0.1,
learning_rate=0.1, max_depth=3,
random_state=42)
gbc2 = clone(gbc).set_params(validation_fraction=0.3)
gbc3 = clone(gbc).set_params(n_iter_no_change=20)
gbr = GradientBoostingRegressor(n_estimators=100, n_iter_no_change=10,
learning_rate=0.1, max_depth=3,
validation_fraction=0.1,
random_state=42)
gbr2 = clone(gbr).set_params(validation_fraction=0.3)
gbr3 = clone(gbr).set_params(n_iter_no_change=20)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
# Check if validation_fraction has an effect
gbc.fit(X_train, y_train)
gbc2.fit(X_train, y_train)
assert gbc.n_estimators_ != gbc2.n_estimators_
gbr.fit(X_train, y_train)
gbr2.fit(X_train, y_train)
assert gbr.n_estimators_ != gbr2.n_estimators_
# Check if n_estimators_ increase monotonically with n_iter_no_change
# Set validation
gbc3.fit(X_train, y_train)
gbr3.fit(X_train, y_train)
assert gbr.n_estimators_ < gbr3.n_estimators_
assert gbc.n_estimators_ < gbc3.n_estimators_
示例3: test_feature_importance_regression
def test_feature_importance_regression():
"""Test that Gini importance is calculated correctly.
This test follows the example from [1]_ (pg. 373).
.. [1] Friedman, J., Hastie, T., & Tibshirani, R. (2001). The elements
of statistical learning. New York: Springer series in statistics.
"""
california = fetch_california_housing()
X, y = california.data, california.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
reg = GradientBoostingRegressor(loss='huber', learning_rate=0.1,
max_leaf_nodes=6, n_estimators=100,
random_state=0)
reg.fit(X_train, y_train)
sorted_idx = np.argsort(reg.feature_importances_)[::-1]
sorted_features = [california.feature_names[s] for s in sorted_idx]
# The most important feature is the median income by far.
assert sorted_features[0] == 'MedInc'
# The three subsequent features are the following. Their relative ordering
# might change a bit depending on the randomness of the trees and the
# train / test split.
assert set(sorted_features[1:4]) == {'Longitude', 'AveOccup', 'Latitude'}
示例4: gbdt_model
def gbdt_model(trains):
trains = np.array(trains)
gbdt=GradientBoostingRegressor(
loss='ls',
learning_rate=0.1,
n_estimators=100,
subsample=1,
min_samples_split=2,
min_samples_leaf=1,
max_depth=3,
init=None,
random_state=None,
max_features=None,
alpha=0.9,
verbose=0,
max_leaf_nodes=None,
warm_start=False
)
# pdb.set_trace()
train_set = trains[:, :-1]
label_set = trains[:, -1]
gbdt.fit(train_set, label_set)
return gbdt
示例5: gbm_fit
def gbm_fit(params, cv_folds):
gbm = GradientBoostingRegressor(**params)
gbm.fit(x_train, y_train)
# Check accuracy of model
# No need for validation data because of cross validation
# Training data is split up into cv_folds folds:
# Model trained on (cv_folds - 1) of the folds; last fold is saved as validation set
cv_scores_mse = cross_validation.cross_val_score(gbm, x_train, y_train, cv=cv_folds, scoring='mean_squared_error')
print '\nModel Report'
print ('MSE Score: Mean - %.7g | Std - %.7g | Min - %.7g | Max - %.7g' %
(np.mean(cv_scores_mse), np.std(cv_scores_mse), np.min(cv_scores_mse), np.max(cv_scores_mse)))
feat_imp = pd.Series(gbm.feature_importances_, features).sort_values(ascending=False)
feat_imp.plot(kind='bar', title='Feature Importances')
plt.ylabel('Feature Importance Score')
plt.show()
# Check actual performance on test data
final_predictions = gbm.predict(x_test)
test['health_score_in_week'] = final_predictions
test.to_csv(output_file, columns=['user_id', 'date', 'steps', 'total_sleep', 'resting_hr',
'step_week_slope', 'sleep_week_slope', 'hr_week_slope',
'curr_health_score', 'health_score_in_week'])
# Save the model to file 'health_prediction.pkl'
joblib.dump(gbm, 'health_prediction.pkl', compress=1)
示例6: GBRModel
def GBRModel(X_train,X_cv,y_train,y_cv):
targets = get_target_array()
#print len(train_features)
#print train_features[0]
#print len(test_features)
n_estimators = [50, 100]#, 1500, 5000]
max_depth = [3,8]
best_GBR = None
best_mse = float('inf')
best_score = -float('inf')
print "################# Performing Gradient Boosting Regression ####################### \n\n\n\n"
for estm in n_estimators:
for cur_depth in max_depth:
#random_forest = RandomForestRegressor(n_estimators=estm)
regr_GBR = GradientBoostingRegressor(n_estimators=estm, max_depth= cur_depth)
predictor = regr_GBR.fit(X_train,y_train)
score = regr_GBR.score(X_cv,y_cv)
mse = np.mean((regr_GBR.predict(X_cv) - y_cv) **2)
print "Number of estimators used: ",estm
print "Tree depth used: ",cur_depth
print "Residual sum of squares: %.2f "%mse
print "Variance score: %.2f \n"%score
if best_score <= score:
if best_mse > mse:
best_mse = mse
best_score = score
best_GBR = predictor
print "\nBest score: ",best_score
print "Best mse: ",best_mse
return best_GBR
示例7: gradient_boosting
def gradient_boosting(features_values_temp, rows_temp, columns_temp, prediction_values_temp, kernel, threshold):
#kernel: linear, poly, rbf, sigmoid, precomputed
rows = 0
while rows_temp > 0:
rows = rows + 1
rows_temp = rows_temp - 1
columns = 0
while columns_temp > 0:
columns = columns + 1
columns_temp = columns_temp - 1
features_values = [x for x in features_values_temp]
prediction_values = [y for y in prediction_values_temp]
rotated = convert_list_to_matrix(features_values, rows, columns)
scores = np.array(prediction_values)
threshold = float(threshold)
estimator = SVR(kernel=kernel) # try to change to the model for which the test is gonna run (lasso, ridge, etc.)
X, y = make_friedman1(n_samples=1200, random_state=0, noise=1.0)
X_train, X_test = X[:200], X[200:]
y_train, y_test = y[:200], y[200:]
est = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0, loss='ls').fit(X_train, y_train)
mean_squared_error(y_test, est.predict(X_test))
示例8: fit
def fit(filename, treename, inputsname, targetname, workingpoint=0.9, test=False):
# Reading inputs and targets
ninputs = len(inputsname)
branches = copy.deepcopy(inputsname)
branches.append(targetname)
data = root2array(filename, treename=treename, branches=branches)
data = data.view((np.float64, len(data.dtype.names)))
# Extract and format inputs and targets from numpy array
inputs = data[:, range(ninputs)].astype(np.float32)
targets = data[:, [ninputs]].astype(np.float32).ravel()
# if test requested, use 60% of events for training and 40% for testing
inputs_train = inputs
targets_train = targets
if test:
inputs_train, inputs_test, targets_train, targets_test = cross_validation.train_test_split(inputs, targets, test_size=0.4, random_state=0)
# Define and fit quantile regression (quantile = workingpoint)
# Default training parameters are used
regressor = GradientBoostingRegressor(loss='quantile', alpha=workingpoint)
regressor.fit(inputs_train, targets_train)
if test:
# Compare regression prediction with the true value and count the fraction of time it falls below
# This should give the working point value
predict_test = regressor.predict(inputs_test)
compare = np.less(targets_test, predict_test)
print 'Testing regression with inputs', inputsname, 'and working point', workingpoint
print ' Test efficiency =', float(list(compare).count(True))/float(len(compare))
# TODO: add 1D efficiency graphs vs input variables
return regressor
示例9: impute
def impute(df,imp_val,headers):
if np.isnan(imp_val):
imp_val = -500
log("imputing...",1)
model = GradientBoostingRegressor(loss='ls', learning_rate=0.1, n_estimators=100, subsample=1.0, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, init=None, random_state=None, max_features=None, alpha=0.9, verbose=0, max_leaf_nodes=None, warm_start=False, presort='auto')
data = np.array(df[headers].get_values())
data[np.isnan(data)] = -500
for col in range(0,len(headers)):
#print "Working on column: "+str(col)
##for the current column, remove rows where the current (row,column) value is not equal to zero
##this way we are only training on data with non-zero target values
reduced_data = data[np.logical_not(data[:,col] == imp_val)] #remove row if row,col_num value is zero
target_set = reduced_data[:,col]
training_set = np.delete(reduced_data,col,1)
model.fit(training_set,target_set)
row_num=0
for row in data:
remaining = np.delete(row,col,0)
if data[row_num,col] == imp_val:
data[row_num,col] = model.predict(remaining)
row_num+=1
cntr=0
for h in headers:
df[h] = data[:,cntr];cntr+=1
return df
示例10: modelTheData
def modelTheData(data,target):
# params = {'n_estimators': 400, 'max_depth': 4, 'min_samples_split': 2,
# 'subsample': 0.5,'min_samples_leaf': 2,
# 'learning_rate': 0.01, 'loss': 'ls'}
#beijing
myMachine = GradientBoostingRegressor(alpha=0.9, init=None, learn_rate=None,
learning_rate=0.05, loss='ls', max_depth=1, max_features=None,
min_samples_leaf=2, min_samples_split=2, n_estimators=300,
random_state=None, subsample=0.5, verbose=0)
#shanghai
# myMachine = GradientBoostingRegressor(alpha=0.9, init=None, learn_rate=None,
# learning_rate=0.05, loss='ls', max_depth=3, max_features=None,
# min_samples_leaf=2, min_samples_split=2, n_estimators=500,
# random_state=None, subsample=0.5, verbose=0)
# myMachine = GradientBoostingRegressor(**params)
myMachine.fit(data,target)
return myMachine
示例11: build_models
def build_models(self):
self.remove_columns(
[
"institute_latitude",
"institute_longitude",
"institute_state",
"institute_country",
"var10",
"var11",
"var12",
"var13",
"var14",
"var15",
"instructor_past_performance",
"instructor_association_industry_expert",
"secondary_area",
"var24",
]
)
model1 = GradientBoostingRegressor(learning_rate=0.1, n_estimators=200, subsample=0.8)
model2 = RandomForestRegressor(n_estimators=50)
model3 = ExtraTreesRegressor(n_estimators=50)
model1.fit(self.X, self.y)
model2.fit(self.X, self.y)
model3.fit(self.X, self.y)
return [model1, model2, model3]
示例12: fit
def fit(self,data_train,target):
self.target_train = target
self.catcol = data_train.filter(like='var').columns.tolist()
#start_gbr_tr = time.clock()
self.gbr = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
self.gbr.fit(data_train,self.target_train)
self.transformed_train_gbr = self.gbr.transform(data_train,threshold="0.35*mean")
self.gbr_tr_fit = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
self.gbr_tr_fit.fit(self.transformed_train_gbr,self.target_train)
#end_gbr_tr = time.clock()
#print >> log, "time_gbr_tr = ", end_gbr_tr-start_gbr_tr
#start_xfr_tr = time.clock()
self.xfr= ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
self.xfr.fit(data_train,self.target_train)
self.transformed_train_xfr = self.xfr.transform(data_train,threshold="0.35*mean")
self.xfr_tr_fit = ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
self.xfr_tr_fit.fit(self.transformed_train_xfr,self.target_train)
#end_xfr_tr = time.clock()
#print >> log, "time_xfr_tr = ", end_xfr_tr-start_xfr_tr
#start_gbr_cat = time.clock()
self.gbr_cat_fit = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
self.gbr_cat_fit.fit(data_train[self.catcol],self.target_train)
#end_gbr_cat = time.clock()
#print >> log, "time_gbr_cat = ", end_gbr_cat-start_gbr_cat
#start_xfr_cat = time.clock()
self.xfr_cat_fit = ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
self.xfr_cat_fit.fit(data_train[self.catcol],self.target_train)
#end_xfr_cat = time.clock()
#print >> log, "time_xfr_cat = ", end_xfr_cat-start_xfr_cat
return self
示例13: gbdrtrain
def gbdrtrain(x, y, pre_x):
x, pre_x = datscater(x, pre_x)
clf = GradientBoostingRegressor(n_estimators=740, min_samples_leaf = 0.8, min_samples_split = 40, learning_rate=0.1,max_depth=7, random_state=400, loss='huber').fit(x, y)
# clf = GradientBoostingRegressor(n_estimators=200,max_leaf_nodes =20, learning_rate=0.1,max_depth=6, random_state=400, loss='ls').fit(x, y)
pred = clf.predict(pre_x)
return pred
示例14: train
def train(targets, features, model_file, params):
model = GradientBoostingRegressor(**params)
print "Training hard..."
model.fit(features, targets)
print "Saving model..."
pickle.dump(model, open(model_file, 'wb'))
return model
示例15: add_new_weak_learner
def add_new_weak_learner(self):
'''
Summary:
Adds a new function, h, to self.weak_learners by solving for Eq. 1 using multiple additive regression trees:
[Eq. 1] h = argmin_h (sum_i Q_A(s_i,a_i) + h(s_i, a_i) - (r_i + max_b Q_A(s'_i, b)))
'''
if len(self.most_recent_episode) == 0:
# If this episode contains no data, don't do anything.
return
# Build up data sets of features and loss terms
data = np.zeros((len(self.most_recent_episode), self.max_state_features + 1))
total_loss = np.zeros(len(self.most_recent_episode))
for i, experience in enumerate(self.most_recent_episode):
# Grab the experience.
s, a, r, s_prime = experience
# Pad in case the state features are too short (as in Atari sometimes).
features = self._pad_features_with_zeros(s, a)
loss = (r + self.gamma * self.get_max_q_value(s_prime) - self.get_q_value(s, a))
# Add to relevant lists.
data[i] = features
total_loss[i] = loss
# Compute new regressor and add it to the weak learners.
estimator = GradientBoostingRegressor(loss='ls', n_estimators=1, max_depth=self.max_depth)
estimator.fit(data, total_loss)
self.weak_learners.append(estimator)