本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.apply方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.apply方法的具体用法?Python GradientBoostingClassifier.apply怎么用?Python GradientBoostingClassifier.apply使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.GradientBoostingClassifier
的用法示例。
在下文中一共展示了GradientBoostingClassifier.apply方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: transform_with_gbm_to_categorical
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
def transform_with_gbm_to_categorical(header, tr_x, tr_y, ts_x, n_est=100, learning_rate=0.1, max_depth=5):
clf = GradientBoostingClassifier(n_estimators=n_est, learning_rate=learning_rate, max_depth=max_depth)
clf = clf.fit(tr_x, tr_y)
""" #Node count
estimators = clf.estimators_
for row in estimators:
for e in row:
print(e.tree_.node_count)"""
leaf_indices = clf.apply(tr_x)
leaf_indices = leaf_indices.reshape(leaf_indices.shape[0], -1)
ts_leaf_indices = clf.apply(ts_x)
ts_leaf_indices = ts_leaf_indices.reshape(ts_leaf_indices.shape[0], -1)
enc = OneHotEncoder()
enc.fit(np.append(leaf_indices, ts_leaf_indices, axis=0))
tr_cat_features = enc.transform(leaf_indices).toarray()
ts_cat_features = enc.transform(ts_leaf_indices).toarray()
header = ["cat_" + str(i) for i in range(ts_cat_features.shape[1])]
print("[gbm_cat] Features size: ", len(header))
return header, tr_cat_features, ts_cat_features
示例2: check_iris
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
def check_iris(presort, subsample, sample_weight):
# Check consistency on dataset iris.
clf = GradientBoostingClassifier(n_estimators=100,
loss='deviance',
random_state=1,
subsample=subsample,
presort=presort)
clf.fit(iris.data, iris.target, sample_weight=sample_weight)
score = clf.score(iris.data, iris.target)
assert_greater(score, 0.9)
leaves = clf.apply(iris.data)
assert_equal(leaves.shape, (150, 100, 3))
示例3: test_iris
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
def test_iris():
# Check consistency on dataset iris.
for subsample in (1.0, 0.5):
for sample_weight in (None, np.ones(len(iris.target))):
clf = GradientBoostingClassifier(n_estimators=100, loss='deviance',
random_state=1, subsample=subsample)
clf.fit(iris.data, iris.target, sample_weight=sample_weight)
score = clf.score(iris.data, iris.target)
assert score > 0.9, "Failed with subsample %.1f " \
"and score = %f" % (subsample, score)
leaves = clf.apply(iris.data)
assert_equal(leaves.shape, (150, 100, 3))
示例4: test_gbm_classifier_backupsklearn
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
def test_gbm_classifier_backupsklearn(backend='auto'):
df = pd.read_csv("./open_data/creditcard.csv")
X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
import h2o4gpu
Solver = h2o4gpu.GradientBoostingClassifier
# Run h2o4gpu version of RandomForest Regression
gbm = Solver(backend=backend, random_state=1234)
print("h2o4gpu fit()")
gbm.fit(X, y)
# Run Sklearn version of RandomForest Regression
from sklearn.ensemble import GradientBoostingClassifier
gbm_sk = GradientBoostingClassifier(random_state=1234, max_depth=3)
print("Scikit fit()")
gbm_sk.fit(X, y)
if backend == "sklearn":
assert (gbm.predict(X) == gbm_sk.predict(X)).all() == True
assert (gbm.predict_log_proba(X) == gbm_sk.predict_log_proba(X)).all() == True
assert (gbm.predict_proba(X) == gbm_sk.predict_proba(X)).all() == True
assert (gbm.score(X, y) == gbm_sk.score(X, y)).all() == True
assert (gbm.decision_function(X)[1] == gbm_sk.decision_function(X)[1]).all() == True
assert np.allclose(list(gbm.staged_predict(X)), list(gbm_sk.staged_predict(X)))
assert np.allclose(list(gbm.staged_predict_proba(X)), list(gbm_sk.staged_predict_proba(X)))
assert (gbm.apply(X) == gbm_sk.apply(X)).all() == True
print("Estimators")
print(gbm.estimators_)
print(gbm_sk.estimators_)
print("loss")
print(gbm.loss_)
print(gbm_sk.loss_)
assert gbm.loss_.__dict__ == gbm_sk.loss_.__dict__
print("init_")
print(gbm.init)
print(gbm_sk.init)
print("Feature importance")
print(gbm.feature_importances_)
print(gbm_sk.feature_importances_)
assert (gbm.feature_importances_ == gbm_sk.feature_importances_).all() == True
print("train_score_")
print(gbm.train_score_)
print(gbm_sk.train_score_)
assert (gbm.train_score_ == gbm_sk.train_score_).all() == True
示例5: check_classification_toy
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
def check_classification_toy(presort, loss):
# Check classification on a toy dataset.
clf = GradientBoostingClassifier(loss=loss, n_estimators=10,
random_state=1, presort=presort)
assert_raises(ValueError, clf.predict, T)
clf.fit(X, y)
assert_array_equal(clf.predict(T), true_result)
assert_equal(10, len(clf.estimators_))
deviance_decrease = (clf.train_score_[:-1] - clf.train_score_[1:])
assert np.any(deviance_decrease >= 0.0)
leaves = clf.apply(X)
assert_equal(leaves.shape, (6, 10, 1))
示例6: test_classification_toy
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
def test_classification_toy():
# Check classification on a toy dataset.
for loss in ('deviance', 'exponential'):
clf = GradientBoostingClassifier(loss=loss, n_estimators=10,
random_state=1)
assert_raises(ValueError, clf.predict, T)
clf.fit(X, y)
assert_array_equal(clf.predict(T), true_result)
assert_equal(10, len(clf.estimators_))
deviance_decrease = (clf.train_score_[:-1] - clf.train_score_[1:])
assert np.any(deviance_decrease >= 0.0), \
"Train deviance does not monotonically decrease."
leaves = clf.apply(X)
assert_equal(leaves.shape, (6, 10, 1))
示例7: train_test_split
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
# xt = xt[selected_feature[0:23]]
# x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5)
# x_train, x_train_lr, y_train, y_train_lr = train_test_split(x_train, y_train, test_size=0.5)
x_train, x_train_lr, y_train, y_train_lr = train_test_split(x, y, test_size=0.5)
params = {'n_estimators': 1800, 'max_leaf_nodes': 4, 'max_depth': 6, 'random_state': 2, # None
'min_samples_split': 5, 'learning_rate': 0.1, 'subsample': 0.83}
gb = GradientBoostingClassifier(**params)
gb_encoder = preprocessing.OneHotEncoder()
lr = LogisticRegression()
gb.fit(x_train, y_train)
gb_encoder.fit(gb.apply(x_train)[:, :, 0])
lr.fit(gb_encoder.transform(gb.apply(x_train_lr)[:, :, 0]), y_train_lr)
# yhat = lr.predict_proba(gb_encoder.transform(gb.(x_test)[:, :, 0]))[:, 1]
yhat = lr.predict_proba(gb_encoder.transform(gb.apply(xt)[:, :, 0]))[:, 1]
yhat2 = gb.predict_proba(xt)[:, 1]
yhat3 = (np.array(yhat)+np.array(yhat2))/2
# fpr_grd_lm, tpr_grd_lm, _ = roc_curve(y_test, yhat)
# plt.figure()
# plt.xlim(0, 1)
# plt.ylim(0, 1)
# plt.plot(fpr_grd_lm, tpr_grd_lm, label='GBT + LR')
# plt.show()
result_data = {'QuoteNumber': xt['QuoteNumber'], 'QuoteConversion_Flag': yhat3}
示例8: LR
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
del X_train_gbdt
del y_train_gbdt
gc.collect()
gbdt_model = pickle.load(open(fp_gbdt_model, 'rb'))
#----- data for LR (one-hot encoding with GDBT output) -----#
id_cols = []
for i in range(1, gbdt_model.get_params()['n_estimators']+1):
id_cols.append('tree'+str(i))
oh_enc = OneHotEncoder(id_cols)
def chunker(seq, size):
return (seq[pos: pos + size] for pos in range(0, len(seq), size))
## oh_enc fit the train_set
df_train_id = pd.DataFrame(gbdt_model.apply(X_train_org)[:, :, 0], columns=id_cols, dtype=np.int8)
for chunk in chunker(df_train_id, 50000):
oh_enc.fit(chunk)
del df_train_id
del X_train_org
del y_train_org
gc.collect()
## oh_enc fit the test_set
df_test_f = pd.read_csv(fp_test_f,
index_col=None, dtype={'id':str},
chunksize=50000, iterator=True)
示例9: train_test_split
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size = 0.3)
#X_train, X_train_lr, y_train, y_train_lr = train_test_split(X_train, y_train, test_size = 0.5)
del train
"""
GBDT+LR
"""
print "Performing GBDT+LR"
random_state = np.random.RandomState(520)
lr = LogisticRegression(random_state = random_state)
grd = GradientBoostingClassifier(n_estimators = 10, random_state = random_state)
grd_enc = OneHotEncoder()
grd.fit(X_train, y_train)
grd_enc.fit(grd.apply(X_train)[:, :, 0])
lr.fit(grd_enc.transform(grd.apply(X_train_lr)[:, :, 0]), y_train_lr)
#probas = lr.predict_proba(grd_enc.transform(grd.apply(X_test)[:, :, 0]))
print "Predicting..."
probas_train = lr.predict_proba(grd_enc.transform(grd.apply(X_train)[:, :, 0]))
#probas_validation = lr.predict_proba(grd_enc.transform(grd.apply(X_validation)[:, :, 0]))
#fpr, tpr, thredsholds = roc_curve(y_test, probas[:, 1])
fpr_train, tpr_train, thredsholds_train = roc_curve(y_train, probas_train[:, 1])
#fpr_validation, tpr_validation, thredsholds_validation= roc_curve(y_validation, probas_validation[:, 1])
#roc_auc = auc(fpr, tpr)
roc_auc_train = auc(fpr_train, tpr_train)
#roc_auc_vldt = auc(fpr_validation, tpr_validation)
#plt.plot(fpr, tpr, lw = 1, label = 'AUC: = %0.4f)' % (roc_auc))
print "Plotting!..."
plt.plot(fpr_train, tpr_train, lw = 1, label = 'AUC: = %0.4f)' % (roc_auc_train))
#plt.plot(fpr_validation, tpr_validation, lw = 1, label = 'AUC: = %0.4f)' % (roc_auc_vldt))
示例10: train_test_split
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
X_test = test_data
X_train, X_train_lr, y_train, y_train_lr = train_test_split(X_train,
y_train,
test_size=0.5)
# rf_lm.fit(rf_enc.transform(rf.apply(X_train_lr)), y_train_lr)
# y_pred_rf_lm = rf_lm.predict_proba(rf_enc.transform(rf.apply(X_test)))[:, 1]
#sixthforestt
#Encoder and Logestic Regression combined with Gradient Boosting Classifier
n_estimator = 10
grd = GradientBoostingClassifier(n_estimators=n_estimator)
grd_enc = OneHotEncoder()
grd_lm = LogisticRegression()
grd.fit(X_train, y_train)
grd_enc.fit(grd.apply(X_train)[:, :, 0])
grd_lm.fit(grd_enc.transform(grd.apply(X_train_lr)[:, :, 0]), y_train_lr)
# output = grd_lm.predict(test_data).astype(int)
#output = rf_lm.predict(rf_enc.transform(rf.apply(X_test))).astype(int)
output = grd_lm.predict(grd_enc.transform(grd.apply(X_test)[:, :, 0])).astype(int)
'''
#secondforest (in git)
#Cross Validation
train_size = int(0.7*(train_data.shape[0]))
validation_size = train_data.shape[0] - train_size
X_train = train_data[0:train_size, 1::]
示例11: RandomForestClassifier
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
# Supervised transformation based on random forests
rf = RandomForestClassifier(max_depth=3, n_estimators=n_estimator)
rf_enc = OneHotEncoder()
rf_lm = LogisticRegression()
rf.fit(X_train, y_train)
rf_enc.fit(rf.apply(X_train))
rf_lm.fit(rf_enc.transform(rf.apply(X_train_lr)), y_train_lr)
y_pred_rf_lm = rf_lm.predict_proba(rf_enc.transform(rf.apply(X_test)))[:, 1]
fpr_rf_lm, tpr_rf_lm, _ = roc_curve(y_test, y_pred_rf_lm)
grd = GradientBoostingClassifier(n_estimators=n_estimator, verbose=1)
grd_enc = OneHotEncoder()
grd_lm = LogisticRegression()
grd.fit(X_train, y_train)
grd_enc.fit(grd.apply(X_train)[:, :, 0])
grd_lm.fit(grd_enc.transform(grd.apply(X_train_lr)[:, :, 0]), y_train_lr)
y_pred_grd_lm = grd_lm.predict_proba(
grd_enc.transform(grd.apply(X_test)[:, :, 0]))[:, 1]
fpr_grd_lm, tpr_grd_lm, _ = roc_curve(y_test, y_pred_grd_lm)
# The gradient boosted model by itself
y_pred_grd = grd.predict_proba(X_test)[:, 1]
fpr_grd, tpr_grd, _ = roc_curve(y_test, y_pred_grd)
# The random forest model by itself
y_pred_rf = rf.predict_proba(X_test)[:, 1]
fpr_rf, tpr_rf, _ = roc_curve(y_test, y_pred_rf)
示例12: GradientBoostingClassifier
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
# Spit the data set into roughly 2 halfs
# This allows us to not overfit with stacking
train_x1 = train_X[:30000]
train_x2 = train_X[30000:]
train_y1 = train_y[:30000]
train_y2 = train_y[30000:]
# We are first going to use Graident Boosting to transform
# the data and then using One Hot Encoding.
# After this, we will then try and fit a Logist Regression
grd = GradientBoostingClassifier()
grd_enc = OneHotEncoder()
grd.fit(train_x1, train_y1)
grd_enc.fit(grd.apply(train_x1)[:,:, 0])
grd_lm = LogisticRegression(penalty = 'l2', C = .0115)
grd_lm.fit(grd_enc.transform(grd.apply(train_x2)[:,:, 0]), train_y2)
#Import test data
test_x = []
with open('test_2012.csv', 'r') as f:
first_row = f.readline()
headers = first_row.split(',')
for row in f:
ints = [int(elem) for elem in row.split(',')]