本文整理汇总了Python中sklearn.ensemble.GradientBoostingRegressor.predict方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingRegressor.predict方法的具体用法?Python GradientBoostingRegressor.predict怎么用?Python GradientBoostingRegressor.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.GradientBoostingRegressor
的用法示例。
在下文中一共展示了GradientBoostingRegressor.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: compute_photoz_forest
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import predict [as 别名]
def compute_photoz_forest(N_boosts):
rms_test = np.zeros(len(N_boosts))
rms_train = np.zeros(len(N_boosts))
i_best = 0
z_fit_best = None
for i, Nb in enumerate(N_boosts):
try:
# older versions of scikit-learn
clf = GradientBoostingRegressor(n_estimators=Nb, learn_rate=0.1,
max_depth=3, random_state=0)
except TypeError:
clf = GradientBoostingRegressor(n_estimators=Nb, learning_rate=0.1,
max_depth=3, random_state=0)
clf.fit(mag_train, z_train)
z_fit_train = clf.predict(mag_train)
z_fit = clf.predict(mag_test)
rms_train[i] = np.mean(np.sqrt((z_fit_train - z_train) ** 2))
rms_test[i] = np.mean(np.sqrt((z_fit - z_test) ** 2))
if rms_test[i] <= rms_test[i_best]:
i_best = i
z_fit_best = z_fit
return rms_test, rms_train, i_best, z_fit_best
示例2: test_regression_synthetic
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import predict [as 别名]
def test_regression_synthetic():
"""Test on synthetic regression datasets used in Leo Breiman,
`Bagging Predictors?. Machine Learning 24(2): 123-140 (1996). """
random_state = check_random_state(1)
regression_params = {'n_estimators': 100, 'max_depth': 4,
'min_samples_split': 1, 'learning_rate': 0.1,
'loss': 'ls'}
# Friedman1
X, y = datasets.make_friedman1(n_samples=1200,
random_state=random_state, noise=1.0)
X_train, y_train = X[:200], y[:200]
X_test, y_test = X[200:], y[200:]
clf = GradientBoostingRegressor()
clf.fit(X_train, y_train)
mse = mean_squared_error(y_test, clf.predict(X_test))
assert mse < 5.0, "Failed on Friedman1 with mse = %.4f" % mse
# Friedman2
X, y = datasets.make_friedman2(n_samples=1200, random_state=random_state)
X_train, y_train = X[:200], y[:200]
X_test, y_test = X[200:], y[200:]
clf = GradientBoostingRegressor(**regression_params)
clf.fit(X_train, y_train)
mse = mean_squared_error(y_test, clf.predict(X_test))
assert mse < 1700.0, "Failed on Friedman2 with mse = %.4f" % mse
# Friedman3
X, y = datasets.make_friedman3(n_samples=1200, random_state=random_state)
X_train, y_train = X[:200], y[:200]
X_test, y_test = X[200:], y[200:]
clf = GradientBoostingRegressor(**regression_params)
clf.fit(X_train, y_train)
mse = mean_squared_error(y_test, clf.predict(X_test))
assert mse < 0.015, "Failed on Friedman3 with mse = %.4f" % mse
示例3: boost2
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import predict [as 别名]
def boost2():
minimum_mse = 1000000000
min_depth = 0
X = pd.read_csv('../data/kaggle/kaggle.X1.train.txt', header=None)
Y = pd.read_csv('../data/kaggle/kaggle.Y.train.txt', header=None)
Xtest = pd.read_csv('../data/kaggle/kaggle.X1.test.txt', header=None)
Xtr, Xte, Ytr, Yte = train_test_split(X, Y, test_size=0.25, random_state=42)
for estimators in range(700, 2000, 100):
print "For estimators: ", estimators
for i in range(6, 8):
print "For max_depth: ", i
est = GradientBoostingRegressor(n_estimators=estimators, max_depth=i, min_samples_leaf=500, warm_start=True)
est.fit(Xtr, Ytr)
Yhat = est.predict(Xte)
current_mse = mean_squared_error(Yte, Yhat)
print "For MaxDepth:", i, ", MSE:", current_mse
if minimum_mse > current_mse:
minimum_mse = current_mse
min_depth = i
min_estimator = estimators
est2 = GradientBoostingRegressor(n_estimators=min_estimator, max_depth=min_depth, min_samples_leaf=500,
warm_start=True, verbose=True)
est2.fit(X, Y)
print "** minimum_mse: ", minimum_mse
print "** min_depth: ", min_depth
print "** min_estimator: ", min_estimator
pred = est2.predict(Xtest)
s = pd.Series(pred)
s.index = s.index + 1
s.to_csv('pyprediction.csv', header=['Prediction'], index=True, index_label='ID')
示例4: gradient_boosting
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import predict [as 别名]
def gradient_boosting(X,y, nf = 2, lr = .1, ne = 100):
col_names = X.columns
y = y.astype(float)
Xs = X.astype(float)
Xs_t, Xs_holdout, y_t, y_holdout = train_test_split(Xs, y, train_size=.8)
Xs_t = Xs_t.set_index([range(len(Xs_t))])
Xs_holdout = Xs_holdout.set_index([range(len(Xs_holdout))])
y_t = pd.DataFrame(y_t).set_index([range(len(y_t))])
y_holdout = pd.DataFrame(y_holdout).set_index([range(len(y_holdout))])
kf = KFold(len(Xs_t), nf)
output_table = []
precisions = []
accuracies = []
F1s = []
fold_count = 1
for train_index, test_index in kf:
results = []
Xs_train, Xs_test = Xs_t.iloc[train_index,:], Xs_t.iloc[test_index,:]
y_train, y_test = y_t.iloc[train_index,:], y_t.iloc[test_index,:]
y_train = np.array(y_train)
y_test = np.array(y_test)
Gboost = GradientBoostingRegressor(learning_rate=lr, loss='ls', n_estimators=ne)
Gboost.fit(Xs_train, y_train)
pred = Gboost.predict(Xs_test)
pred = np.array(pred)
pred = pred.round()
output_table.append(' ')
output_table.append("Fold "+ str(fold_count) + ':')
output_table.append("Precision Score: "+str(precision_score(pred, y_test)))
output_table.append("Accuracy Score: "+ str(accuracy_score(pred, y_test)))
output_table.append("F1 Score: "+str(f1_score(pred, y_test)))
precisions.append(precision_score(pred, y_test))
accuracies.append(accuracy_score(pred, y_test))
F1s.append(f1_score(pred, y_test))
fold_count += 1
pred_holdout = Gboost.predict(Xs_holdout)
pred_holdout = np.array(pred_holdout)
pred_holdout = pred_holdout.round()
cm = confusion_matrix(y_holdout, pred_holdout)
TN = cm[0][0]
FN = cm[0][1]
TP = cm[1][1]
FP = cm[1][0]
print "Mean Precision: ", np.mean(precisions)
print "Mean F1s: ", np.mean(F1s)
print "True Positive Rate (Sensitivity): ", TP*1./(TP+FN)#cm[1][1]*1./(cm[1][1]+cm[0][1])
print "True Negative Rate (Specificity): ", TN*1./(TN+FP)#cm[0][0]*1./(cm[0][0]+cm[1][0])
print "Precision: ", TP*1./(TP+FP), #precision_score(pred_holdout, y_holdout)
print "Accuracy: ", (TP+TN)*1./(TP+TN+FP+FN), #accuracy_score(pred_holdout, y_holdout)
indices = np.argsort(Gboost.feature_importances_)
figure = plt.figure(figsize=(10,7))
plt.barh(np.arange(len(col_names)), Gboost.feature_importances_[indices],
align='center', alpha=.5)
plt.yticks(np.arange(len(col_names)), np.array(col_names)[indices], fontsize=14)
plt.xticks(fontsize=14)
_ = plt.xlabel('Relative importance', fontsize=18)
return Gboost
示例5: predict
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import predict [as 别名]
def predict(total_check):
X_train, X_test = total_check[0:891,1::], total_check[891::, 1::]
y_train = total_check[0:891,0]
est = GradientBoostingRegressor(n_estimators=220, learning_rate=0.1,\
loss='ls').fit(X_train, y_train)
print mean_squared_error(y_train, est.predict(X_train))
output = est.predict(X_test)
return output
示例6: test1
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import predict [as 别名]
def test1():
purchase_features = pd.read_csv('purchase_features.csv', index_col = 'report_date', parse_dates = 'report_date')
redeem_features = pd.read_csv('redeem_features.csv', index_col = 'report_date', parse_dates = 'report_date')
result = pd.read_csv('result.csv', index_col = 'time', parse_dates = 'time')
purchase_trian_y = result['20140331':'20140630']['purchase']
redeem_train_y = result['20140331':'20140630']['redeem']
purchase_x = purchase_features['20140401':'20140630']
redeem_x = redeem_features['20140401':'20140630']
purchase_test_y = result['20140701':'20140731']['purchase']
redeem_test_y = result['20140701':'20140731']['redeem']
purchase_test_x = purchase_features['20140701':'20140731']
redeem_test_x = redeem_features['20140701':'20140731']
purchase_delta = delta(purchase_trian_y)
redeem_delta = delta(redeem_train_y)
m1 = GradientBoostingRegressor(n_estimators=250, learning_rate=0.01, max_depth=3, random_state=0, loss='lad', min_samples_split=2).fit(purchase_x.values, purchase_delta)
m2 = GradientBoostingRegressor(n_estimators=250, learning_rate=0.01, max_depth=3, random_state=0, loss='lad', min_samples_split=2).fit(redeem_x.values, redeem_delta)
y_p_pre = list()
y_r_pre = list()
last_value_p = purchase_trian_y[-1]
last_value_r = redeem_train_y[-1]
for i in range(31):
if i != 0:
purchase_test_x.ix[i, 'yesterday_purchase'] = last_value_p
purchase_test_x.ix[i, 'yesterday_redeem'] = last_value_r
redeem_test_x.ix[i, 'yesterday_purchase'] = last_value_p
redeem_test_x.ix[i, 'yesterday_redeem'] = last_value_r
if i-7 >= 0:
purchase_test_x.ix[i, 'week1'] = y_p_pre[i-7]
redeem_test_x.ix[i, 'week1'] = y_r_pre[i-7]
if i-14 >= 0:
purchase_test_x.ix[i, 'week2'] = y_p_pre[i-14]
redeem_test_x.ix[i, 'week2'] = y_r_pre[i-14]
if i-21 >= 0:
purchase_test_x.ix[i, 'week3'] = y_p_pre[i-21]
redeem_test_x.ix[i, 'week3'] = y_r_pre[i-21]
if i-28 >= 0:
purchase_test_x.ix[i, 'week4'] = y_p_pre[i-28]
redeem_test_x.ix[i, 'week4'] = y_r_pre[i-28]
p_pre = m1.predict(purchase_test_x.ix[i].values)
p_pre += last_value_p
last_value_p = p_pre
r_pre = m2.predict(redeem_test_x.ix[i].values)
r_pre += last_value_r
last_value_r = r_pre
y_p_pre.append(p_pre)
y_r_pre.append(r_pre)
print "purchage mean/var error", error(purchase_test_y, y_p_pre)
print "redeem mean/var error", error(redeem_test_y, y_r_pre)
示例7: gbrt_training
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import predict [as 别名]
def gbrt_training(x,y):
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state=42,test_size=0.5)
t = GradientBoostingRegressor(n_estimators=100,learning_rate=0.1)
t.fit(x_train,y_train)
p = t.predict(x_test)
p = map(int,p)
print ((p-y_test)**2).mean()
p = t.predict(x)
p = map(int,p)
print ((p-y)**2).mean()
print p[:10]
print y[:10]
示例8: test
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import predict [as 别名]
def test(min_day_train, max_day_train, min_day_test, max_day_test, line_num=[11], features=[0, 1, 2, 3, 4, 5, 6, 7, 9]):
print " ", min_day_train, \
" ", max_day_train, \
" ", min_day_test, \
" ", max_day_test, \
" ", line_num[0],
data = getData.get_train_data(min_day=min_day_train, max_day=max_day_train, line_num=line_num)
train = np.array(data)
xtrain = train[:, features]
ytrain = train[:, -1]
# data = getData.get_test_data()
# test = np.array(data)
# xtest = test[:, 1:]
data = getData.get_train_data(min_day=min_day_test, max_day=max_day_test, line_num=line_num)
test = np.array(data)
xtest = test[:, features]
ytest = test[:, -1]
from sklearn import linear_model
clf = linear_model.BayesianRidge(normalize=True)
clf.fit(xtrain, ytrain)
yHat = clf.predict(xtest)
print " ",rssError(ytest, yHat),
from sklearn import tree
clf = tree.DecisionTreeRegressor()
clf.fit(xtrain, ytrain)
yHat = clf.predict(xtest)
print " ",rssError(ytest, yHat),
from sklearn.ensemble import GradientBoostingRegressor
clf = GradientBoostingRegressor()
clf.fit(xtrain, ytrain)
yHat = clf.predict(xtest)
print " ",rssError(ytest, yHat),
from sklearn.neighbors import KNeighborsRegressor
clf = KNeighborsRegressor(n_neighbors=1)
clf.fit(xtrain, ytrain)
yHat = clf.predict(xtest)
print " ",rssError(ytest, yHat),
clf = linear_model.LassoLars(alpha=.01, normalize=True)
clf.fit(xtrain, ytrain)
yHat = clf.predict(xtest)
print " ",rssError(ytest, yHat)
示例9: boosting_optimization
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import predict [as 别名]
def boosting_optimization(X_train, y_train, X_test, y_test):
gbm = GradientBoostingRegressor(n_estimators=3000, max_depth=10)
gbm.fit(X_train, y_train)
pred = gbm.predict(X_test)
print "feature importances: "
print pd.Series(gbm.feature_importances_, index=datasets.load_boston().feature_names)
print "staged predict: {}".format(gbm.staged_predict(X_train))
print "predict: {}".format(gbm.predict(X_test))
print y_test
示例10: test_quantile_loss
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import predict [as 别名]
def test_quantile_loss():
"""Check if quantile loss with alpha=0.5 equals lad. """
clf_quantile = GradientBoostingRegressor(n_estimators=100, loss="quantile", max_depth=4, alpha=0.5, random_state=7)
clf_quantile.fit(boston.data, boston.target)
y_quantile = clf_quantile.predict(boston.data)
clf_lad = GradientBoostingRegressor(n_estimators=100, loss="lad", max_depth=4, random_state=7)
clf_lad.fit(boston.data, boston.target)
y_lad = clf_lad.predict(boston.data)
assert_array_almost_equal(y_quantile, y_lad, decimal=4)
示例11: grid_search
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import predict [as 别名]
def grid_search(X, y, split, max_features=[4,6,8,None], learning_rate=[.002,.005,.05,.1]):
for feat in max_features:
for learn in learning_rate:
model = GradientBoostingRegressor(n_estimators=2000,
learning_rate=learn,
max_features=feat,
subsample = .3,
min_samples_leaf=50,
random_state=3)
model.fit(X[:split], y[:split])
in_samp_score = mean_squared_error(model.predict(X[:split]), y[:split])
out_samp_score = mean_squared_error(model.predict(X[split:]), y[split:])
print 'learn, max_features: {},{}'.format(learn,feat)
print 'in-sample score, out-sample score: {}, {}'.format(in_samp_score, out_samp_score)
示例12: gradient
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import predict [as 别名]
def gradient(X_train, y_train, y_test, X_test, file_loc, target):
'''
Passes to grid search function within this function to pick the best parameters for each gradient boosted model depending on the target variable we are trying to predict
'''
grid = grid_search(file_loc, target)
best_params = grid.best_params_
learn_rate = best_params['learning_rate']
n_estimators = best_params['n_estimators']
max_feat = best_params['max_features']
model = GradientBoostingRegressor(learning_rate=learn_rate, n_estimators=n_estimators, max_features=max_feat)
model.fit(X_train, y_train)
prediction = model.predict(X_test)
mean_squared_error = mse(y_test, model.predict(X_test))
r2 = model.score(X_test, y_test)
return (mean_squared_error, r2)
示例13: process_one_file
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import predict [as 别名]
def process_one_file(f):
print f
csv_r = csv.reader(open(f))
csv_r.next() # jump header
x,y=[],[]
for r in csv_r:
tmp_t = datetime.datetime.strptime(r[1],'%Y-%m-%d %H:%M:%S')
hour = r[2]
minutes = r[3]
v_occ_min = float(r[4])
w_occ_min = float(r[5])
v_occ = float(r[6])
win = float(r[7])
wout = float(r[8])
raw_v_occ = float(r[9])
_y = float(r[-1])
x.append([hour,minutes,v_occ_min,w_occ_min,v_occ,win,wout,raw_v_occ])
y.append(_y)
x,y=np.array(x),np.array(y)
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=233)
t = GradientBoostingRegressor()
t.fit(x_train,y_train)
predict = t.predict(x_test)
predict_all = t.predict(x)
print 'gbrt',f_mae(predict,y_test),f_rms(predict,y_test),f_mae(predict_all,y),f_rms(predict_all,y)
# define base models
base_models = [GradientBoostingRegressor(n_estimators=100),
RandomForestRegressor(n_estimators=100, n_jobs=-1),
ExtraTreesRegressor(n_estimators=100, n_jobs=-1)]
# define blending model
blending_model = LinearRegression()
# initialize multi-stage model
sg = StackedGeneralizer(base_models, blending_model,
n_folds=N_FOLDS, verbose=VERBOSE)
# fit model
sg.fit(x_train,y_train)
predict = sg.predict(x_test)
predict_all = sg.predict(x)
print 'stack', f_mae(predict, y_test), f_rms(predict, y_test), f_mae(predict_all, y), f_rms(predict_all, y)
print ''
示例14: pipeline
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import predict [as 别名]
def pipeline():
val = data[data.watch==0]
val_a_b = val[['item_id','store_code','a','b']]
val_x = val.drop(['label','watch','item_id','store_code','a','b'],axis=1)
train = data[data.watch!=0]
train_y = train.label
a = list(train.a)
b = list(train.b)
train_weight = []
for i in range(len(a)):
train_weight.append(min(a[i],b[i]))
train_weight = np.array(train_weight)
train_x = train.drop(['label','watch','item_id','store_code','a','b'],axis=1)
train_x.fillna(train_x.median(),inplace=True)
val_x.fillna(val_x.median(),inplace=True)
model = GradientBoostingRegressor(loss='lad',learning_rate=0.01,n_estimators=400,subsample=0.75,max_depth=6,random_state=1024, max_features=0.75)
#train
model.fit(train_x,train_y, sample_weight=train_weight)
#predict val set
val_a_b['pred'] = model.predict(val_x)
val_a_b.to_csv('gbrt_3.csv',index=None)
示例15: GBRModel
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import predict [as 别名]
def GBRModel(X_train,X_cv,y_train,y_cv):
targets = get_target_array()
#print len(train_features)
#print train_features[0]
#print len(test_features)
n_estimators = [50, 100]#, 1500, 5000]
max_depth = [3,8]
best_GBR = None
best_mse = float('inf')
best_score = -float('inf')
print "################# Performing Gradient Boosting Regression ####################### \n\n\n\n"
for estm in n_estimators:
for cur_depth in max_depth:
#random_forest = RandomForestRegressor(n_estimators=estm)
regr_GBR = GradientBoostingRegressor(n_estimators=estm, max_depth= cur_depth)
predictor = regr_GBR.fit(X_train,y_train)
score = regr_GBR.score(X_cv,y_cv)
mse = np.mean((regr_GBR.predict(X_cv) - y_cv) **2)
print "Number of estimators used: ",estm
print "Tree depth used: ",cur_depth
print "Residual sum of squares: %.2f "%mse
print "Variance score: %.2f \n"%score
if best_score <= score:
if best_mse > mse:
best_mse = mse
best_score = score
best_GBR = predictor
print "\nBest score: ",best_score
print "Best mse: ",best_mse
return best_GBR