本文整理汇总了Python中sklearn.ensemble.GradientBoostingRegressor.score方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingRegressor.score方法的具体用法?Python GradientBoostingRegressor.score怎么用?Python GradientBoostingRegressor.score使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.GradientBoostingRegressor
的用法示例。
在下文中一共展示了GradientBoostingRegressor.score方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: anm_fit
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import score [as 别名]
def anm_fit( (x, y) ):
newX = np.array(x).reshape(len(x), 1)
clf = GradientBoostingRegressor()
clf.fit(newX, y)
err = y - clf.predict(newX)
ret = [clf.score(newX, y)] + list(pearsonr(x, err))
return ret
示例2: train_model_parallel
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import score [as 别名]
def train_model_parallel(xtrain, ytrain, index=0):
xTrain, xTest, yTrain, yTest = train_test_split(xtrain, ytrain[:, index],
test_size=0.25)
# model = RandomForestRegressor()
# model = LogisticRegression()
model = GradientBoostingRegressor(verbose=1)
n_est = [10, 50]
m_dep = [5, 3]
model = GridSearchCV(estimator=model,
param_grid=dict(n_estimators=n_est,
max_depth=m_dep),
scoring=scorer,
n_jobs=-1, verbose=1)
model.fit(xTrain, yTrain)
ypred = model.predict(xTest)
if hasattr(model, 'best_params_'):
print('best_params', model.best_params_)
print('score %d %s' % (index, model.score(xTest, yTest)))
print('RMSLE %d %s' % (index, np.sqrt(mean_squared_error(yTest, ypred))))
with gzip.open('model_%d.pkl.gz' % index, 'wb') as pklfile:
pickle.dump(model, pklfile, protocol=2)
return
示例3: GBRModel
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import score [as 别名]
def GBRModel(X_train,X_cv,y_train,y_cv):
targets = get_target_array()
#print len(train_features)
#print train_features[0]
#print len(test_features)
n_estimators = [50, 100]#, 1500, 5000]
max_depth = [3,8]
best_GBR = None
best_mse = float('inf')
best_score = -float('inf')
print "################# Performing Gradient Boosting Regression ####################### \n\n\n\n"
for estm in n_estimators:
for cur_depth in max_depth:
#random_forest = RandomForestRegressor(n_estimators=estm)
regr_GBR = GradientBoostingRegressor(n_estimators=estm, max_depth= cur_depth)
predictor = regr_GBR.fit(X_train,y_train)
score = regr_GBR.score(X_cv,y_cv)
mse = np.mean((regr_GBR.predict(X_cv) - y_cv) **2)
print "Number of estimators used: ",estm
print "Tree depth used: ",cur_depth
print "Residual sum of squares: %.2f "%mse
print "Variance score: %.2f \n"%score
if best_score <= score:
if best_mse > mse:
best_mse = mse
best_score = score
best_GBR = predictor
print "\nBest score: ",best_score
print "Best mse: ",best_mse
return best_GBR
示例4: kfold_cv
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import score [as 别名]
def kfold_cv(self, n_folds = 3):
"""
Takes in: number of folds
Prints out RMSE score and stores the results in self.results
"""
cv = KFold(n = self.X_train.shape[0], n_folds = n_folds)
gbr = GradientBoostingRegressor(**self.params)
self.med_error = []
self.rmse_cv = []
self.pct_error=[]
self.r2=[]
self.results = {'pred': [],
'real': []}
for train, test in cv:
gbr.fit(self.X_train[train], self.y_train[train])
pred = gbr.predict(self.X_train[test])
print "Score", gbr.score(self.X_train[test], self.y_train[test])
predExp=np.power(10, pred)
testExp=np.power(10, self.y_train[test])
medError=median_absolute_error(predExp, testExp)
percentError=np.median([np.fabs(p-t)/t for p,t in zip(predExp, testExp)])
error = mean_squared_error(np.power(10, pred), np.power(10, self.y_train[test]))**0.5
self.results['pred'] += list(pred)
self.results['real'] += list(self.y_train[test])
self.rmse_cv += [error]
self.med_error+=[medError]
self.pct_error+=[percentError]
self.r2+=[r2_score(self.y_train[test], pred)]
print 'Abs Median Error:', np.mean(self.med_error)
print 'Abs Percent Error:', np.mean(self.pct_error)
print 'Mean RMSE:', np.mean(self.rmse_cv)
print "R2",np.mean(self.r2)
示例5: do_job_unit
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import score [as 别名]
def do_job_unit(self, event, corpus, unit, **kwargs):
assert unit == 0
extractor = kwargs.get('extractor', "goose")
thresh = kwargs.get('thresh', .8)
delay = kwargs.get('delay', None)
topk = kwargs.get('topk', 20)
train_events = [e for e in cuttsum.events.get_events()
if e.query_num not in set([event.query_num, 7])]
res = InputStreamResource()
y = []
X = []
for train_event in train_events:
y_e = []
X_e = []
istream = res.get_dataframes(
train_event,
cuttsum.corpora.get_raw_corpus(train_event),
extractor, thresh, delay, topk)
for df in istream:
selector = (df["n conf"] == 1) & (df["nugget probs"].apply(len) == 0)
df.loc[selector, "nugget probs"] = \
df.loc[selector, "nuggets"].apply(lambda x: {n:1 for n in x})
df["probs"] = df["nugget probs"].apply(lambda x: [val for key, val in x.items()] +[0])
df["probs"] = df["probs"].apply(lambda x: np.max(x))
df.loc[(df["n conf"] == 1) & (df["nuggets"].apply(len) == 0), "probs"] = 0
y_t = df["probs"].values
y_t = y_t[:, np.newaxis]
y_e.append(y_t)
X_t = df[self.cols].values
X_e.append(X_t)
y_e = np.vstack(y_e)
y.append(y_e)
X_e = np.vstack(X_e)
X.append(X_e)
# print "WARNING NOT USING 2014 EVENTS"
X = np.vstack(X)
y = np.vstack(y)
gbc = GradientBoostingRegressor(
n_estimators=100, learning_rate=1.,
max_depth=3, random_state=0)
print "fitting", event
gbc.fit(X, y.ravel())
print event, "SCORE", gbc.score(X, y.ravel())
model_dir = self.get_model_dir(event)
if not os.path.exists(model_dir):
os.makedirs(model_dir)
joblib.dump(gbc, self.get_model_path(event), compress=9)
示例6: _random_search
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import score [as 别名]
def _random_search(self, random_iter, x, y):
# Default values
max_features = x.shape[1]
learning_rate = 0.1 # [0.1, 0.05, 0.02, 0.01],
max_depth = 3 # [4, 6],
min_samples_leaf = 1 # [3, 5, 9, 17],
n_estimators = 100 #
best_score = -sys.maxint
if random_iter > 0:
sys.stdout.write("Do a random search %d times" % random_iter)
param_dist = {"max_features": numpy.linspace(0.1, 1, num=10),
"learning_rate": 2**numpy.linspace(-1, -10, num=10),
"max_depth": range(1, 11),
"min_samples_leaf": range(2, 20, 2),
"n_estimators": range(10, 110, 10)}
param_list = [{"max_features": max_features,
"learning_rate": learning_rate,
"max_depth": max_depth,
"min_samples_leaf": min_samples_leaf,
"n_estimators": n_estimators}]
param_list.extend(list(ParameterSampler(param_dist, n_iter=random_iter-1, random_state=self._rng)))
for idx, d in enumerate(param_list):
gb = GradientBoostingRegressor(loss='ls',
learning_rate=d["learning_rate"],
n_estimators=d["n_estimators"],
subsample=1.0,
min_samples_split=2,
min_samples_leaf=d["min_samples_leaf"],
max_depth=d["max_depth"],
init=None,
random_state=self._rng,
max_features=d["max_features"],
alpha=0.9,
verbose=0)
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.5, random_state=self._rng)
gb.fit(train_x, train_y)
sc = gb.score(test_x, test_y)
# Tiny output
m = "."
if idx % 10 == 0:
m = "#"
if sc > best_score:
m = "<"
best_score = sc
max_features = d["max_features"]
learning_rate = d["learning_rate"]
max_depth = d["max_depth"]
min_samples_leaf = d["min_samples_leaf"]
n_estimators = d["n_estimators"]
sys.stdout.write(m)
sys.stdout.flush()
sys.stdout.write("Using max_features %f, learning_rate: %f, max_depth: %d, min_samples_leaf: %d, "
"and n_estimators: %d\n" %
(max_features, learning_rate, max_depth, min_samples_leaf, n_estimators))
return max_features, learning_rate, max_depth, min_samples_leaf, n_estimators
示例7: modelTheData
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import score [as 别名]
def modelTheData(dataSet,draw=False):
dataSet = np.random.permutation(dataSet)
myData,myTarget=dataSet[:,1:-1],dataSet[:,-1]
date = dataSet[:,0]
rat = 0.7
ratio = int(len(myData)*rat)
myMachine = GradientBoostingRegressor(n_estimators=100, learning_rate=1.0,
max_depth=1, random_state=0, loss='ls')
myMachine.fit(myData[:ratio], myTarget[:ratio])
preDara=myMachine.predict(myData[ratio:])
myDate = date[ratio:]
# draw the wrong sssssampe
error=preDara-myTarget[ratio:]
if draw:
print myMachine.score(myData[ratio:],myTarget[ratio:])
plt.scatter(myDate,error)
plt.show()
careError=[]
careMyDate=[]
for i in range(len(error)):
if abs(error[i])>=50:
careError +=[error[i]]
careMyDate += [myDate[i]]
# print careMyDate
# plt.scatter(careMyDate,careError)
# plt.text()
# plt.show()
return careMyDate
示例8: boosting
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import score [as 别名]
def boosting(df1, features, pred_var, df2):
#for x in [10, 100, 1000]:
#for y in [3, 5, 7]:
lr = GradientBoostingRegressor(n_estimators=100, max_depth=7)
lr.fit(df1[features], df1[pred_var])
print 'GradientBoostingRegressor Score: ', lr.score(df2[features], df2[pred_var])
#0.727261516253
return lr.predict(df2[features])
示例9: gbrt_regressor
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import score [as 别名]
def gbrt_regressor(X, y, weight):
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import cross_validation
X_train, X_test, y_train, y_test, weight_train, weight_test = cross_validation.train_test_split(
X, y, weight, test_size=0.4, random_state=0)
clf = GradientBoostingRegressor(n_estimators=100, max_features='sqrt')
clf.fit(X_train, y_train, weight_train)
print(clf.score(X_test, y_test, weight_test))
示例10: grid_search
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import score [as 别名]
def grid_search(X, y, split, learn=[.01], samples_leaf=[250, 350, 500],
depth=[10, 15]):
'''
Runs a grid search for GBM on split data
'''
for l in learn:
for s in samples_leaf:
for d in depth:
model = GradientBoostingRegressor(n_estimators=250,
learning_rate=l,
min_samples_leaf=s,
max_depth=d,
random_state=42)
model.fit(X.values[:split], y.values[:split])
in_score = model.score(X.values[:split], y.values[:split])
out_score = model.score(X.values[split:], y.values[split:])
print 'learning_rate: {}, min_samples_leaf: {}, max_depth: {}'.\
format(l, s, d)
print 'in-sample score:', in_score
print 'out-sample score:', out_score
print ''
示例11: main
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import score [as 别名]
def main():
train, train_loss = load_data()
train = impute_and_scale(train)
# Fit the regressor
train_regressor = []
train_loss_regressor = []
for i in range(len(train)):
if(train_loss[i]) > 0:
train_loss_regressor.append(train_loss[i])
#train_regressor.append(train[i][0:769])
train_regressor.append(train[i])
for percent in [.99, .9, .66, .5, .33, .1, .01]:
print percent
#replaceNAStrategy = ['mean', 'median', 'most_frequent'][0]
#train_classifier = train[['f527', 'f528', 'f271', 'f274']]
#train_loss_classifier = train.loss.apply(lambda x: 0 if x==0 else 1)
#train = impute_and_scale(train)
#train = impute_random(train, 1)
#train = train.values
#train = impute_random(train, 1)
#train = impute_to_zero(train)
#train = filterNullsWithZero(train)
#train_classifier = impute_and_scale(train_classifier)
# Fit the classifier
#clf = LogisticRegression(C=1e20,penalty='l2')
#clf.fit(train_classifier,train_loss_classifier)
#print "regressor.py - finished fitting classifier"
#train = train[['f527', 'f528', 'f271', 'f274']]
#print len(train[1])
#print train[1]
x_train, x_test, y_train, y_test = \
cross_validation.train_test_split(train_regressor, train_loss_regressor, test_size=percent, random_state=0)
print len(x_train)
reg4 = GradientBoostingRegressor(n_estimators=100, verbose=1)
reg4 = reg4.fit(x_train, y_train)
# print "zero: " + str(reg4.score(x_test, y_test))
print str(percent) + ": " + str(reg4.score(x_test, y_test))
print "regressor.py - finished fitting regressor"
示例12: Predictor
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import score [as 别名]
class Predictor(object):
def __init__(self, n_estimators=50):
"""Constructor for the predictor object."""
self.score = -1
# use following code to test parameters for the regressor
# regressor2 = GradientBoostingRegressor()
# parameters = {'n_estimators': [50, 100], 'loss': ('ls', 'lad'),
# 'max_depth': [3, 5, 7]}
# self.regressor = GridSearchCV(regressor2, parameters, n_jobs=-1)
self.regressor = GradientBoostingRegressor(n_estimators=n_estimators)
def fit_algorithm(self, x, y):
"""Wrapper to the sklearn regressor fit function."""
self.regressor.fit(x, y)
print self.regressor.best_params_
def predict_outputs(self, inputs):
"""Wrapper to the sklearn regressor predict function."""
try:
a = self.regressor.feature_importances_
except sklearn.utils.validation.NotFittedError:
print "Please fit the algorithm before calling this function."
return
prediction = self.regressor.predict(inputs)
return prediction
def predictor_metrics(self, outputs, prediction):
"""This function calculates metrics byt measuring MSE."""
try:
a = self.regressor.feature_importances_
except sklearn.utils.validation.NotFittedError:
print "Please fit the algorithm before calling this function."
return
return mean_squared_error(outputs, prediction)
def score_predictor(self, x, y):
"""Wrapper to the score calculated with the predictor."""
try:
a = self.regressor.feature_importances_
except sklearn.utils.validation.NotFittedError:
print "Please fit the algorithm before calling this function."
return
return self.regressor.score(x, y)
def get_feature_importances(self):
"""Returns a copy of features importances vector."""
try:
fi = self.regressor.feature_importances_
except sklearn.utils.validation.NotFittedError:
print "Please fit the algorithm before calling this function."
return
return fi
示例13: dummie_columns_gradient_boosting
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import score [as 别名]
def dummie_columns_gradient_boosting(train, test):
from sklearn.ensemble import GradientBoostingRegressor
print "-- {} --".format("Gradient Boosting Regression using all but remarks")
predicting_columns = list(train._get_numeric_data().columns.values)
predicting_columns.remove("LISTPRICE")
predicting_columns.remove("SOLDPRICE")
svr = GradientBoostingRegressor(n_estimators=300)
svr.fit(train[predicting_columns], train["SOLDPRICE"])
score = svr.score(test[predicting_columns], test["SOLDPRICE"])
predictions = svr.predict(test[predicting_columns])
sample_predictions(test, predictions)
print "Accuracy: {}\n".format(score)
return score, predictions
示例14: train_model
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import score [as 别名]
def train_model():
fil = get_training_data()
X = []
y = []
f = get_features()
for query in fil:
for feat in f.get_X(query):
X.append(feat[1])
if feat[0] in fil[query]:
y.append(1)
else:
y.append(0)
print len(X),len(y),X,y
clf = GradientBoostingRegressor()
clf.fit(X, y)
print clf
print clf.score(X, y)
filename = '/home/romil/Desktop/Model4/digits_classifier.joblib.pkl'
_ = joblib.dump(clf, filename)
with open('/home/romil/Desktop/Model4/X.pkl', 'wb') as fid:
cPickle.dump(X, fid)
with open('/home/romil/Desktop/Model4/y.pkl', 'wb') as fid:
cPickle.dump(y, fid)
示例15: main
# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import score [as 别名]
def main():
xpath = '/Users/qiaotian/Downloads/dataset/sample1/feature.txt'
ypath = '/Users/qiaotian/Downloads/dataset/sample1/label.txt'
y = pd.read_csv(ypath, sep=',', header=None).iloc[:,1]
X = pd.read_csv(xpath, sep=',', header=None).iloc[0:len(y),:]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
random_state=9)
# 1. Linear Regressor
lr_params = {}
lr = LinearRegression()
lr.fit(X_train, y_train)
train_acc = lr.score(X_train, y_train)
test_acc = lr.score(X_test, y_test)
y_pred = lr.predict(X_test)
print('-> Done Linear Regression: ', train_acc, test_acc, len([elem for elem in y_pred-y_test if abs(elem)<1.0])/len(y_test))
# 2. Random Foreset Regressor
rf_params = {'n_estimators':100}
rf = RandomForestRegressor(**rf_params)
rf.fit(X_train, y_train)
train_acc = rf.score(X_train, y_train)
test_acc = rf.score(X_test, y_test)
y_pred = rf.predict(X_test)
print('-> Done Random Forest Regression: ', train_acc, test_acc, len([elem for elem in y_pred-y_test if abs(elem)<1.0])/len(y_test))
# 3. Gradient Booting Regressor
gbdt_params = {'loss':'ls', 'n_estimators':100, 'max_depth':3,\
'subsample':0.9, 'learning_rate':0.1,\
'min_samples_leaf':1, 'random_state':1234}
gbdt = GradientBoostingRegressor(**gbdt_params)
gbdt.fit(X_train, y_train)
train_acc = gbdt.score(X_train, y_train)
test_acc = gbdt.score(X_test, y_test)
y_pred = gbdt.predict(X_test)
print('-> Done Gradient Boosting Regression: ', train_acc, test_acc, len([elem for elem in y_pred-y_test if abs(elem)<1.0])/len(y_test))