本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.loss_方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.loss_方法的具体用法?Python GradientBoostingClassifier.loss_怎么用?Python GradientBoostingClassifier.loss_使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.GradientBoostingClassifier
的用法示例。
在下文中一共展示了GradientBoostingClassifier.loss_方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import loss_ [as 别名]
def __init__(self, estimator,
phase,
n_jobs, cv_k_fold, parameters,
X_train, y_train,
X_test, y_test):
# estimator : ensemble学習器
# cv : if train : get best parameter
if phase == "train":
clf = GradientBoostingClassifier()
gscv = GridSearchCV(clf, parameters,
verbose = 10,
scoring = "f1",#scoring = "precision" or "recall"
n_jobs = n_jobs, cv = cv_k_fold)
gscv.fit(X_train, y_train)
self.best_params = gscv.best_params_
clf.set_params(**gscv.best_params_)
clf.fit(X_train, y_train)
train_loss = clf.train_score_
test_loss = np.empty(len(clf.estimators_))
for i, pred in enumerate(clf.staged_predict(X_test)):
test_loss[i] = clf.loss_(y_test, pred)
plt.plot(np.arange(len(clf.estimators_)) + 1, test_loss, label='Test')
plt.plot(np.arange(len(clf.estimators_)) + 1, train_loss, label='Train')
plt.xlabel('the number of weak learner:Boosting Iterations')
plt.ylabel('Loss')
plt.legend(loc="best")
plt.savefig("loss_cv.png")
plt.close()
estimator.set_params(**gscv.best_params_)
self.estimator = estimator
self.one_hot_encoding = None
示例2: test_max_feature_regression
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import loss_ [as 别名]
def test_max_feature_regression():
# Test to make sure random state is set properly.
X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
X_train, X_test = X[:2000], X[2000:]
y_train, y_test = y[:2000], y[2000:]
gbrt = GradientBoostingClassifier(n_estimators=100, min_samples_split=5,
max_depth=2, learning_rate=.1,
max_features=2, random_state=1)
gbrt.fit(X_train, y_train)
deviance = gbrt.loss_(y_test, gbrt.decision_function(X_test))
assert deviance < 0.5, "GB failed with deviance %.4f" % deviance
示例3: __init__
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import loss_ [as 别名]
def __init__(self, estimator,
phase,
n_jobs, cv_k_fold, parameters,
X_train, y_train,
X_test, y_test):
# estimator : ensemble学習器
# cv : if train : get best parameter
if phase == "train":
gscv = GridSearchCV(GradientBoostingClassifier(),
parameters,
verbose = 10,
scoring = "f1",#scoring = "precision" or "recall"
n_jobs = n_jobs, cv = cv_k_fold)
gscv.fit(X_train, y_train)
best_params = gscv.best_params_
print "[GBDT's Best Parameter]", gscv.best_params_
clf = GradientBoostingClassifier()
clf.set_params(**gscv.best_params_)
del gscv
clf.fit(X_train, y_train)
train_loss = clf.train_score_
test_loss = np.empty(len(clf.estimators_))
for i, pred in enumerate(clf.staged_predict(X_test)):
test_loss[i] = clf.loss_(y_test, pred)
plt.plot(np.arange(len(clf.estimators_)) + 1, test_loss, label='Test')
plt.plot(np.arange(len(clf.estimators_)) + 1, train_loss, label='Train')
plt.xlabel('the number of weak learner:Boosting Iterations')
plt.ylabel('Loss')
plt.legend(loc="best")
plt.savefig("loss_cv.png")
plt.close()
else:
best_params = {'loss' : ['deviance'],
'learning_rate' : [0.1],
'max_depth': [2],
'min_samples_leaf': [8],
'max_features': [5],#max_features must be in (0, n_features]
'max_leaf_nodes' : [20],
'subsample' : [0.1],
'n_estimators' : [100],
'random_state' : [0]}
estimator.set_params(**best_params)
self.estimator = estimator
self.one_hot_encoding = None
示例4: GradientBoostingClassifier
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import loss_ [as 别名]
# cv : if train : get best parameter
if phase == "train":
clf = GradientBoostingClassifier()
gscv = GridSearchCV(clf, parameters,
verbose = 10,
scoring = "f1",#scoring = "precision" or "recall"
n_jobs = n_jobs, cv = cv_k_fold)
gscv.fit(X_train, y_train)
self.best_params = gscv.best_params_
clf.set_params(**gscv.best_params_)
clf.fit(X_train, y_train)
train_loss = clf.train_score_
test_loss = np.empty(len(clf.estimators_))
for i, pred in enumerate(clf.staged_predict(X_test)):
test_loss[i] = clf.loss_(y_test, pred)
plt.plot(np.arange(len(clf.estimators_)) + 1, test_loss, label='Test')
plt.plot(np.arange(len(clf.estimators_)) + 1, train_loss, label='Train')
plt.xlabel('the number of weak learner:Boosting Iterations')
plt.ylabel('Loss')
plt.legend(loc="best")
plt.savefig("loss_cv.png")
plt.close()
estimator.set_params(**gscv.best_params_)
self.estimator = estimator
self.one_hot_encoding = None
def fit(self, X, y):
self.fit_transform(X, y)
return self
示例5: gbdt_plus_liner_classifier_grid_search
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import loss_ [as 别名]
def gbdt_plus_liner_classifier_grid_search(stack_setting_,
upper_param_keys=None, upper_param_vals=None,
lower_param_keys=None, lower_param_vals=None,
num_proc=None):
"""
upper model is GBDT or Random Forest
lower model is Linear Classifier
"""
if stack_setting_ is None:
sys.stderr.write('You have no setting Json file\n')
sys.exit()
if num_proc is None:
num_proc = 6
# 1. upper model
if upper_param_keys is None:
upper_param_keys = ['model_type', 'n_estimators', 'loss', 'random_state', 'subsample', 'max_features', 'max_leaf_nodes', 'learning_rate', 'max_depth', 'min_samples_leaf']
if upper_param_vals is None:
upper_param_vals = [[GradientBoostingClassifier], [100], ['deviance'], [0], [0.1], [5], [20], [0.1], [2], [8]]
# grid search for upper model : GBDT or Random Forest
# ExperimentL1 has model free. On the other hand, data is fix
exp = ExperimentL1(data_folder = stack_setting_['0-Level']['folder'],
train_fname = stack_setting_['0-Level']['train'],
test_fname = stack_setting_['0-Level']['test'])
# GridSearch has a single model. model is dertermined by param
#gs = GridSearch(SklearnModel, exp, upper_param_keys, upper_param_vals,
# cv_folder = stack_setting_['1-Level']['gbdt_linear']['upper']['cv']['folder'],
# cv_out = stack_setting_['1-Level']['gbdt_linear']['upper']['cv']['cv_out'],
# cv_pred_out = stack_setting_['1-Level']['gbdt_linear']['upper']['cv']['cv_pred_out'],
# refit_pred_out = stack_setting_['1-Level']['gbdt_linear']['upper']['cv']['refit_pred_out'])
#upper_best_param, upper_best_score = gs.search_by_cv()
model_folder = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['folder']
model_train_fname = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['train']
model_train_fname = os.path.join(Config.get_string('data.path'),
model_folder,
model_train_fname)
model_folder = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['folder']
model_test_fname = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['test']
model_test_fname = os.path.join(Config.get_string('data.path'),
model_folder,
model_test_fname)
upper_param_dict = dict(zip(upper_param_keys, upper_param_vals))
if os.path.isfile(model_train_fname) is False and \
os.path.isfile(model_test_fname) is False:
#upper_param_dict['model_type'] == [GradientBoostingClassifier]
del upper_param_dict['model_type']
clf = GradientBoostingClassifier()
clf_cv = GridSearchCV(clf, upper_param_dict,
verbose = 10,
scoring = "f1",#scoring = "precision" or "recall"
n_jobs = num_proc, cv = 5)
X_train, y_train = exp.get_train_data()
clf_cv.fit(X_train, y_train)
upper_best_params = clf_cv.best_params_
print upper_best_params
del clf_cv
clf.set_params(**upper_best_params)
clf.fit(X_train, y_train)
train_loss = clf.train_score_
test_loss = np.empty(len(clf.estimators_))
X_test, y_test = exp.get_test_data()
for i, pred in enumerate(clf.staged_predict(X_test)):
test_loss[i] = clf.loss_(y_test, pred)
graph_folder = stack_setting_['1-Level']['gbdt_linear']['upper']['graph']['folder']
graph_fname = stack_setting_['1-Level']['gbdt_linear']['upper']['graph']['name']
graph_fname = os.path.join(Config.get_string('data.path'),
graph_folder,
graph_fname)
gs = GridSpec(2,2)
ax1 = plt.subplot(gs[0,1])
ax2 = plt.subplot(gs[1,1])
ax3 = plt.subplot(gs[:,0])
ax1.plot(np.arange(len(clf.estimators_)) + 1, test_loss, label='Test')
ax1.plot(np.arange(len(clf.estimators_)) + 1, train_loss, label='Train')
ax1.set_xlabel('the number of weak learner:Boosting Iterations')
ax1.set_ylabel('%s Loss' % (upper_best_params.get('loss','RMSE')))
ax1.legend(loc="best")
# dump for the transformated feature
clf = TreeTransform(GradientBoostingClassifier(),
best_params_ = upper_best_params)
if type(X_train) == pd.core.frame.DataFrame:
clf.fit(X_train.as_matrix().astype(np.float32), y_train)
elif X_train == np.ndarray:
clf.fit(X_train.astype(np.float32), y_train)
# train result
train_loss = clf.estimator_.train_score_
#.........这里部分代码省略.........
开发者ID:Quasi-quant2010,项目名称:Stacking,代码行数:103,代码来源:run_gbdt_plus_liner_classifier_grid_search.20160414.py
示例6: dict
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import loss_ [as 别名]
for label, color, setting in [('learning_rate= 1', 'orange',
{'learning_rate': 1.0}),
('learning_rate=0.5', 'turquoise',
{'learning_rate': 0.5}),
('subsample=0.3', 'blue',
{'learning_rate': 0.3}),
('learning_rate=0.2', 'gray',
{'learning_rate': 0.2}),
('learning_rate=0.1', 'magenta',
{'learning_rate': 0.1})]:
params = dict(original_params)
params.update(setting)
clf = GradientBoostingClassifier(**params)
clf.fit(X_train, y_train)
# compute test set deviance
test_deviance = np.zeros((params['n_estimators'],), dtype=np.float64)
for i, y_pred in enumerate(clf.staged_decision_function(X_test)):
# clf.loss_ assumes that y_test[i] in {0, 1}
test_deviance[i] = clf.loss_(y_test, y_pred)
plt.plot((np.arange(test_deviance.shape[0]) + 1)[::5], test_deviance[::5],
'-', color=color, label=label)
plt.savefig(str(i)+'example.png')
plt.show()