本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.transform方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.transform方法的具体用法?Python GradientBoostingClassifier.transform怎么用?Python GradientBoostingClassifier.transform使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.GradientBoostingClassifier
的用法示例。
在下文中一共展示了GradientBoostingClassifier.transform方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: gbdt_feature
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import transform [as 别名]
def gbdt_feature(X,y,Z):
from sklearn.ensemble import GradientBoostingClassifier
gbdt = GradientBoostingClassifier(n_estimators=200,max_depth=5)
gbdt.fit(X, y) # 训练。喝杯咖啡吧
f = gbdt.transform(Z, threshold=None)
ff = pd.DataFrame(f)
ff.columns = ['f0','f1','f2','f3','f4','f5','f6','f7','f8','f9','f10',
'f11','f12','f13','f14','f15','f16','f17','f18','f19','f20','f21']
feature_all = read_user('data\\test\\feature_all_15.csv')
feature_all1 = pd.merge(feature_all,ff,how='outer',left_index=True,right_index=True)
feature_all1.set_index('enrollment_id').to_csv('data\\test\\feature_all_16.csv')
示例2: len
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import transform [as 别名]
plt.ylabel('Loss')
plt.legend(loc="best")
plt.savefig("loss.png")
plt.close()
# tree ensambles
#print clf.estimator_.feature_importances_
#print clf.toarray().shape
# >(26049, 100)
# input_features = 26049, weak_learners = 100
#print len(one_hot.toarray()[:,0]), one_hot.toarray()[:,0]
#print len(one_hot.toarray()[0,:]), one_hot.toarray()[0,:]
# get test data from train trees
transformated_train_features = clf.one_hot_encoding.toarray()
transformated_test_features = clf.transform(X_test, y_test)
#print transformated_train_features.shape, X_train.shape
#print transformated_test_features.shape, X_test.shape
out_fname = "%s/%s" % (os.environ["HOME"],
"data/gbdt/encoding_tree_cv.pkl.gz")
with gzip.open(out_fname, "wb") as gf:
cPickle.dump([[transformated_train_features, y_train], [transformated_test_features, y_test]],
gf,
cPickle.HIGHEST_PROTOCOL)
print clf.best_params
# 3. Logistic Regression using transformated features
# determine C by train data
parameters = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
'penalty' : ["l1","l2"]}
示例3: gbdt_plus_liner_classifier_grid_search
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import transform [as 别名]
#.........这里部分代码省略.........
# tree ensambles
score_threshold=0.8
index2feature = dict(zip(np.arange(len(X_train.columns.values)), X_train.columns.values))
feature_importances_index = [str(j) for j in clf.estimator_.feature_importances_.argsort()[::-1]]
feature_importances_score = [clf.estimator_.feature_importances_[int(j)] for j in feature_importances_index]
fis = pd.DataFrame(
{'name':[index2feature.get(int(key),'Null') for key in feature_importances_index],
'score':feature_importances_score}
)
score_threshold = fis['score'][fis['score'] > 0.0].quantile(score_threshold)
# where_str = 'score > %f & score > %f' % (score_threshold, 0.0)
where_str = 'score >= %f' % (score_threshold)
fis = fis.query(where_str)
sns.barplot(x = 'score', y = 'name',
data = fis,
ax=ax3,
color="blue")
ax3.set_xlabel("Feature_Importance", fontsize=10)
plt.tight_layout()
plt.savefig(graph_fname)
plt.close()
#print clf.toarray().shape
# >(26049, 100)
#input_features = 26049, weak_learners = 100
#print len(one_hot.toarray()[:,0]), one_hot.toarray()[:,0]
#print len(one_hot.toarray()[0,:]), one_hot.toarray()[0,:]
## feature transformation : get test data from train trees
#print transformated_train_features.shape, X_train.shape
#print transformated_test_features.shape, X_test.shape
transformated_train_features = clf.one_hot_encoding
if type(X_test) == pd.core.frame.DataFrame:
transformated_test_features = clf.transform(X_test.as_matrix().astype(np.float32),
y_test)
elif type(X_train) == np.ndarray:
transformated_test_features = clf.transform(X_test, y_test)
#model_folder = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['folder']
#model_train_fname = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['train']
#model_train_fname = os.path.join(Config.get_string('data.path'),
# model_folder,
# model_train_fname)
with gzip.open(model_train_fname, "wb") as gf:
cPickle.dump([transformated_train_features, y_train],
gf,
cPickle.HIGHEST_PROTOCOL)
#model_folder = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['folder']
#model_test_fname = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['test']
#model_test_fname = os.path.join(Config.get_string('data.path'),
# model_folder,
# model_test_fname)
with gzip.open(model_test_fname, "wb") as gf:
cPickle.dump([transformated_test_features, y_test],
gf,
cPickle.HIGHEST_PROTOCOL)
"""
# 2. lower model
if lower_param_keys is None:
lower_param_keys = ['model_type', 'n_neighbors', 'weights',
'algorithm', 'leaf_size', 'metric', 'p', 'n_jobs']
开发者ID:Quasi-quant2010,项目名称:Stacking,代码行数:70,代码来源:run_gbdt_plus_liner_classifier_grid_search.20160414.py