本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.get_params方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.get_params方法的具体用法?Python GradientBoostingClassifier.get_params怎么用?Python GradientBoostingClassifier.get_params使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.GradientBoostingClassifier
的用法示例。
在下文中一共展示了GradientBoostingClassifier.get_params方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: gbClf
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import get_params [as 别名]
class gbClf(BaseModel):
"""Model using random forest classifier."""
def __init__(self, train_data_fname=None, nrows=None, **kwargs):
"""Initialize the data frame."""
super(gbClf, self).__init__(train_data_fname, nrows, **kwargs)
def set_model(self, **kwargs):
"""Set the classifier."""
verbose = kwargs.get('verbose', 0)
n_estimators = kwargs.get('n_estimators', 3000)
max_depth = kwargs.get('max_depth', 3)
min_samples_leaf = kwargs.get('min_samples_leaf', 1)
min_samples_split = kwargs.get('min_samples_split', 2)
max_features = kwargs.get('max_features', None)
learning_rate = kwargs.get('learning_rate', 0.1)
subsample = kwargs.get('subsample', 1.0)
random_state = kwargs.get('random_state', 24)
self.learner = GradientBoostingClassifier(n_estimators=n_estimators,
max_depth=max_depth,
learning_rate=learning_rate,
min_samples_leaf=min_samples_leaf,
min_samples_split=min_samples_split,
max_features=max_features,
verbose=verbose,
random_state=random_state)
print('\n\nGradient Boosting set with parameters:')
par_dict = self.learner.get_params()
for ipar in par_dict.keys():
print('{}: {}'.format(ipar, par_dict[ipar]))
print('\n\n')
def fitNscore(self, **kwargs):
"""Fit classifier and produce score and related plots."""
col2fit = kwargs.get('features')
# cleaning
bids_path = kwargs.get('bids_path', 'data/bids.csv')
if not self.iscleaned:
print 'Preparing the data...'
self.prepare_data(bids_path, **kwargs)
print('columns for fit=\n{}'.format(self.df_train.columns))
test_size = 0.2 # fraction kept for testing
rnd_seed = 24 # for reproducibility
#features_train, features_test, target_train, target_test =\
# train_test_split(self.df_train[col2fit].values,
# self.df_train['outcome'].values,
# test_size=test_size,
# random_state=rnd_seed)
sss = StratifiedShuffleSplit(self.df_train['outcome'].values,
n_iter=1,
test_size=test_size,
random_state=rnd_seed)
for train_index, test_index in sss:
features_train = self.df_train[col2fit].values[train_index]
features_test = self.df_train[col2fit].values[test_index]
target_train = self.df_train['outcome'].values[train_index]
target_test = self.df_train['outcome'].values[test_index]
# Fit Classifier
self.fitModel(features_train, target_train, **kwargs)
# Predict on the rest of the sample
print('\nPredicting...')
predictions = self.learner.predict(features_test)
probas = self.learner.predict_proba(features_test)
# Feature index ordered by importance
ord_idx = np.argsort(self.learner.feature_importances_)
print("Feature ranking:")
for ifeaturindex in ord_idx[::-1]:
print('{0} \t: {1}'.format(col2fit[ifeaturindex],
round(self.learner.feature_importances_[ifeaturindex], 2)))
# Score
print('(Self) Score={}'.format(self.learner.score(features_test, target_test)))
# Plots
# Feature importances
maxfeat2show = 30 # number of features to show in plots
importances = self.learner.feature_importances_
#std = np.std([tree.feature_importances_ for tree in self.learner.estimators_],axis=0)
indices = np.argsort(importances)[::-1]
indices = indices[:min(maxfeat2show, len(indices))] # truncate if > maxfeat2show
ordered_names = [col2fit[i] for i in indices]
fig_import = plt.figure(figsize=(10, 10))
plt.title("Feature importances, GB")
#plt.barh(range(len(indices)), importances[indices],
# color="b", xerr=std[indices], align="center",ecolor='r')
plt.barh(range(len(indices)), importances[indices],
color="b", align="center")
plt.yticks(range(len(indices)), ordered_names)
plt.ylim([-1, len(indices)])
plt.ylim(plt.ylim()[::-1])
#.........这里部分代码省略.........
示例2: KFold
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import get_params [as 别名]
dire_flying_courier_time 71132 ----' acquired by "dire" in the part of battles
dire_first_ward_time 95404 ---------> subject "ward" was not used by "dire"
'''
# filling NA with 0 with accordance with task description #3
X_train = features.fillna(value = 0, axis = 'columns')
# definition of a target variable #4
y_train = train_df['radiant_win']
print '\nradiant_win'
print "\n", "GradientBoostingClassifier:"
# using KFold by task description #5
kf = KFold(totalLenght, n_folds = 5, shuffle = True, random_state = 1013)
# learning with defualt GradientBoostingClassifier settings
clf = GradientBoostingClassifier(random_state = 1013)
params = clf.get_params()
start_time = datetime.now()
score = cross_val_score(estimator = clf, X=X_train, y=y_train, scoring='roc_auc', cv=kf).mean()
print '\tn_estimators:', params['n_estimators'],\
'\tmax_depth:', params['max_depth'],\
'\tscore:', score,\
'\ttimeElapsed:', datetime.now() - start_time
#looks like good result for a long-long very long time
'''
n_estimators: 100 max_depth: 3 score: 0.70661221449 timeElapsed: 0:07:13.214000
'''
# try to found out compromise beetwen good score in execution time
for maxDepth in [1,2,3,5]:
for treeCount in [10, 20, 30, 40, 50, 80, 100, 200, 500]:
示例3: print
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import get_params [as 别名]
# Print the feature ranking
print("Feature ranking:")
for f in range(x2.shape[1]):
print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))
# Plot the feature importances of the forest
plt.figure()
plt.title("Feature importances")
plt.bar(range(x2.shape[1]), importances[indices],
color="r", align="center")
plt.xticks(range(x2.shape[1]), indices)
plt.xlim([-1, x2.shape[1]])
plt.show()
feature_imp(X)
clf.get_params()
param_grid = [
{'learning_rate': [0.05, 0.1, 0.2, 0.25], 'max_depth': [3,4,5,6], 'min_samples_leaf': [1,2], 'n_estimators': [100,200,300]},
]
svr = GradientBoostingClassifier()
from sklearn import grid_search
clf = grid_search.GridSearchCV(svr, param_grid)
clf.fit(x2,training_target)
print("Best parameters set found on development set:")
print()
print(clf.best_params_)
print()
示例4: load_obj
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import get_params [as 别名]
if __name__ == "__main__":
if load_sample:
header, x_array, label = load_obj(sample_save_file)
else:
header, x_array, label, _ = read_features(feature_file)
save = [header, x_array, label]
save_obj(save, sample_save_file)
train_set, test_set, _ = split_train_test(seed, x_array, label, 0.9)
train_x, train_y = train_set
# print "size of train_x is", train_x.shape
gbrt = GradientBoostingClassifier()
parameter_grid = {'n_estimators' : range(10, 110, 10),
'learning_rate': uniform(loc = 0.01, scale = 0.99),
'max_depth' : range(1, 6, 1),
'max_features' : uniform(loc = 0.1, scale = 0.89),
'subsample' : uniform(loc = 0.2, scale = 0.79),
'min_samples_leaf' : [min_samples_leaf],
'min_samples_split' : [min_samples_split]}
gbrt_gridsearch = grid_search.RandomizedSearchCV(gbrt, \
parameter_grid, scoring = auc, \
cv=4, n_jobs=2, n_iter=100, verbose=5, refit=False)
gbrt_gridsearch.fit(train_x, train_y)
gbrt_best = GradientBoostingClassifier(verbose=2, **gbrt_gridsearch.best_params_)
gbrt_best.fit(train_x, train_y)
print "parameters:", gbrt_best.get_params()
save_obj(gbrt_best, model_save_file)
示例5: log_loss
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import get_params [as 别名]
y_pred_gbdt = gbdt_model.predict_proba(X_valid)[:, 1]
log_loss_gbdt = log_loss(y_valid, y_pred_gbdt)
print('log loss of GBDT on valid set: %.5f' % log_loss_gbdt)
## store the pre-trained gbdt_model
pickle.dump(gbdt_model, open(fp_gbdt_model, 'wb'))
del X_train_gbdt
del y_train_gbdt
gc.collect()
gbdt_model = pickle.load(open(fp_gbdt_model, 'rb'))
#----- data for LR (one-hot encoding with GDBT output) -----#
id_cols = []
for i in range(1, gbdt_model.get_params()['n_estimators']+1):
id_cols.append('tree'+str(i))
oh_enc = OneHotEncoder(id_cols)
def chunker(seq, size):
return (seq[pos: pos + size] for pos in range(0, len(seq), size))
## oh_enc fit the train_set
df_train_id = pd.DataFrame(gbdt_model.apply(X_train_org)[:, :, 0], columns=id_cols, dtype=np.int8)
for chunk in chunker(df_train_id, 50000):
oh_enc.fit(chunk)
del df_train_id
del X_train_org