本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.min_samples_leaf方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.min_samples_leaf方法的具体用法?Python GradientBoostingClassifier.min_samples_leaf怎么用?Python GradientBoostingClassifier.min_samples_leaf使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.GradientBoostingClassifier
的用法示例。
在下文中一共展示了GradientBoostingClassifier.min_samples_leaf方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: GBM
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import min_samples_leaf [as 别名]
def GBM(x_train,y_train,x_test,udf_trees=100,udf_lr=0.01,udf_max_depth=5,udf_minsam=50,do_CV=False,names=None):
### GridSearchCV for GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import roc_auc_score
if do_CV:
param_grid = {'max_depth': [2,3,4,5],
'min_samples_leaf':[50,250,1000,2500]}
est=GradientBoostingClassifier(n_estimators=100,learning_rate=0.1, verbose=1)
cv_scores=list()
params_list=list()
start = time()
for mdep in param_grid['max_depth']:
for minSamples in param_grid['min_samples_leaf']:
print 'Trying parameter combination: (Max_Depth=%i, minSamples=%i)' % (mdep,minSamples)
est.min_samples_leaf=minSamples
est.max_depth=mdep
cv_score=udf.cross_val_score_proba(x_train,y_train,5,est)
cv_scores.append(np.mean(cv_score))
### Create the labels for display purposes ###
params_list.append((mdep,minSamples))
print 'Took %.2f seconds for parameter tuning.' %(time()-start)
print 'writing CV results to file...'
results = np.array([params_list,cv_scores]).T ## should have 48 results...
print 'GBM Parameter tuning results........'
print 'Parameters (max_depth, min_samples_in_leaf), CV_Scores'
for i in range(len(results)):
print results[i]
else:
### Train the GBM Classifier with the optimal parameters found above ###
print 'Fitting GBM with optimal user-defined parameters....'
est=GradientBoostingClassifier(n_estimators=udf_trees,learning_rate=udf_lr,max_depth=udf_max_depth,min_samples_leaf=7500,verbose=1)
est.fit(x_train,y_train)
y_pred=est.predict_proba(x_test)[:,1] ## Must predict probability!! ##
### Plot feature importances ###
plot_feature_importance(est, names)
print 'Writing submission file....'
with open('GBM_Submission.csv','wb') as testfile:
w=csv.writer(testfile)
w.writerow(('Id','Probability'))
for i in range(len(y_pred)):
w.writerow(((i+1),y_pred[i]))
testfile.close()
print 'File written to disk...'