当前位置: 首页>>代码示例>>Python>>正文


Python GradientBoostingClassifier.apply方法代码示例

本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.apply方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.apply方法的具体用法?Python GradientBoostingClassifier.apply怎么用?Python GradientBoostingClassifier.apply使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.GradientBoostingClassifier的用法示例。


在下文中一共展示了GradientBoostingClassifier.apply方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: transform_with_gbm_to_categorical

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
def transform_with_gbm_to_categorical(header, tr_x, tr_y, ts_x, n_est=100, learning_rate=0.1, max_depth=5):

    clf = GradientBoostingClassifier(n_estimators=n_est, learning_rate=learning_rate, max_depth=max_depth)
    clf = clf.fit(tr_x, tr_y)

    """ #Node count
    estimators = clf.estimators_
    for row in estimators:
        for e in row:
            print(e.tree_.node_count)"""
    leaf_indices = clf.apply(tr_x)
    leaf_indices = leaf_indices.reshape(leaf_indices.shape[0], -1)

    ts_leaf_indices = clf.apply(ts_x)
    ts_leaf_indices = ts_leaf_indices.reshape(ts_leaf_indices.shape[0], -1)

    enc = OneHotEncoder()
    enc.fit(np.append(leaf_indices, ts_leaf_indices, axis=0))

    tr_cat_features = enc.transform(leaf_indices).toarray()
    ts_cat_features = enc.transform(ts_leaf_indices).toarray()

    header = ["cat_" + str(i) for i in range(ts_cat_features.shape[1])]
    print("[gbm_cat] Features size: ", len(header))
    return header, tr_cat_features, ts_cat_features
开发者ID:thushv89,项目名称:kaggle_tel,代码行数:27,代码来源:analyse_features.py

示例2: check_iris

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
def check_iris(presort, subsample, sample_weight):
    # Check consistency on dataset iris.
    clf = GradientBoostingClassifier(n_estimators=100,
                                     loss='deviance',
                                     random_state=1,
                                     subsample=subsample,
                                     presort=presort)
    clf.fit(iris.data, iris.target, sample_weight=sample_weight)
    score = clf.score(iris.data, iris.target)
    assert_greater(score, 0.9)

    leaves = clf.apply(iris.data)
    assert_equal(leaves.shape, (150, 100, 3))
开发者ID:amueller,项目名称:scikit-learn,代码行数:15,代码来源:test_gradient_boosting.py

示例3: test_iris

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
def test_iris():
    # Check consistency on dataset iris.
    for subsample in (1.0, 0.5):
        for sample_weight in (None, np.ones(len(iris.target))):
            clf = GradientBoostingClassifier(n_estimators=100, loss='deviance',
                                             random_state=1, subsample=subsample)
            clf.fit(iris.data, iris.target, sample_weight=sample_weight)
            score = clf.score(iris.data, iris.target)
            assert score > 0.9, "Failed with subsample %.1f " \
                "and score = %f" % (subsample, score)

            leaves = clf.apply(iris.data)
            assert_equal(leaves.shape, (150, 100, 3))
开发者ID:AtonLerin,项目名称:maya_python_packages,代码行数:15,代码来源:test_gradient_boosting.py

示例4: test_gbm_classifier_backupsklearn

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
def test_gbm_classifier_backupsklearn(backend='auto'):
    df = pd.read_csv("./open_data/creditcard.csv")
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
    import h2o4gpu
    Solver = h2o4gpu.GradientBoostingClassifier

    # Run h2o4gpu version of RandomForest Regression
    gbm = Solver(backend=backend, random_state=1234)
    print("h2o4gpu fit()")
    gbm.fit(X, y)

    # Run Sklearn version of RandomForest Regression
    from sklearn.ensemble import GradientBoostingClassifier
    gbm_sk = GradientBoostingClassifier(random_state=1234, max_depth=3)
    print("Scikit fit()")
    gbm_sk.fit(X, y)

    if backend == "sklearn":
        assert (gbm.predict(X) == gbm_sk.predict(X)).all() == True
        assert (gbm.predict_log_proba(X) == gbm_sk.predict_log_proba(X)).all() == True
        assert (gbm.predict_proba(X) == gbm_sk.predict_proba(X)).all() == True
        assert (gbm.score(X, y) == gbm_sk.score(X, y)).all() == True
        assert (gbm.decision_function(X)[1] == gbm_sk.decision_function(X)[1]).all() == True
        assert np.allclose(list(gbm.staged_predict(X)), list(gbm_sk.staged_predict(X)))
        assert np.allclose(list(gbm.staged_predict_proba(X)), list(gbm_sk.staged_predict_proba(X)))
        assert (gbm.apply(X) == gbm_sk.apply(X)).all() == True

        print("Estimators")
        print(gbm.estimators_)
        print(gbm_sk.estimators_)

        print("loss")
        print(gbm.loss_)
        print(gbm_sk.loss_)
        assert gbm.loss_.__dict__ == gbm_sk.loss_.__dict__

        print("init_")
        print(gbm.init)
        print(gbm_sk.init)

        print("Feature importance")
        print(gbm.feature_importances_)
        print(gbm_sk.feature_importances_)
        assert (gbm.feature_importances_ == gbm_sk.feature_importances_).all() == True

        print("train_score_")
        print(gbm.train_score_)
        print(gbm_sk.train_score_)
        assert (gbm.train_score_ == gbm_sk.train_score_).all() == True
开发者ID:wamsiv,项目名称:h2o4gpu,代码行数:52,代码来源:test_xgb_sklearn_wrapper.py

示例5: check_classification_toy

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
def check_classification_toy(presort, loss):
    # Check classification on a toy dataset.
    clf = GradientBoostingClassifier(loss=loss, n_estimators=10,
                                     random_state=1, presort=presort)

    assert_raises(ValueError, clf.predict, T)

    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(10, len(clf.estimators_))

    deviance_decrease = (clf.train_score_[:-1] - clf.train_score_[1:])
    assert np.any(deviance_decrease >= 0.0)

    leaves = clf.apply(X)
    assert_equal(leaves.shape, (6, 10, 1))
开发者ID:amueller,项目名称:scikit-learn,代码行数:18,代码来源:test_gradient_boosting.py

示例6: test_classification_toy

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
def test_classification_toy():
    # Check classification on a toy dataset.

    for loss in ('deviance', 'exponential'):
        clf = GradientBoostingClassifier(loss=loss, n_estimators=10,
                                         random_state=1)

        assert_raises(ValueError, clf.predict, T)

        clf.fit(X, y)
        assert_array_equal(clf.predict(T), true_result)
        assert_equal(10, len(clf.estimators_))

        deviance_decrease = (clf.train_score_[:-1] - clf.train_score_[1:])
        assert np.any(deviance_decrease >= 0.0), \
            "Train deviance does not monotonically decrease."

        leaves = clf.apply(X)
        assert_equal(leaves.shape, (6, 10, 1))
开发者ID:AtonLerin,项目名称:maya_python_packages,代码行数:21,代码来源:test_gradient_boosting.py

示例7: train_test_split

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
# xt = xt[selected_feature[0:23]]

# x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5)

# x_train, x_train_lr, y_train, y_train_lr = train_test_split(x_train, y_train, test_size=0.5)
x_train, x_train_lr, y_train, y_train_lr = train_test_split(x, y, test_size=0.5)

params = {'n_estimators': 1800, 'max_leaf_nodes': 4, 'max_depth': 6, 'random_state': 2,  # None
          'min_samples_split': 5, 'learning_rate': 0.1, 'subsample': 0.83}
gb = GradientBoostingClassifier(**params)
gb_encoder = preprocessing.OneHotEncoder()
lr = LogisticRegression()

gb.fit(x_train, y_train)

gb_encoder.fit(gb.apply(x_train)[:, :, 0])

lr.fit(gb_encoder.transform(gb.apply(x_train_lr)[:, :, 0]), y_train_lr)

# yhat = lr.predict_proba(gb_encoder.transform(gb.(x_test)[:, :, 0]))[:, 1]
yhat = lr.predict_proba(gb_encoder.transform(gb.apply(xt)[:, :, 0]))[:, 1]
yhat2 = gb.predict_proba(xt)[:, 1]
yhat3 = (np.array(yhat)+np.array(yhat2))/2
# fpr_grd_lm, tpr_grd_lm, _ = roc_curve(y_test, yhat)

# plt.figure()
# plt.xlim(0, 1)
# plt.ylim(0, 1)
# plt.plot(fpr_grd_lm, tpr_grd_lm, label='GBT + LR')
# plt.show()
result_data = {'QuoteNumber': xt['QuoteNumber'], 'QuoteConversion_Flag': yhat3}
开发者ID:Jilinl66,项目名称:Data-Analysis-Matlab,代码行数:33,代码来源:HQC2.py

示例8: LR

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
del X_train_gbdt
del y_train_gbdt
gc.collect()

gbdt_model = pickle.load(open(fp_gbdt_model, 'rb'))
#----- data for LR (one-hot encoding with GDBT output) -----#
id_cols = []
for i in range(1, gbdt_model.get_params()['n_estimators']+1):
    id_cols.append('tree'+str(i))
oh_enc = OneHotEncoder(id_cols)

def chunker(seq, size):
    return (seq[pos: pos + size] for pos in range(0, len(seq), size))

## oh_enc fit the train_set
df_train_id = pd.DataFrame(gbdt_model.apply(X_train_org)[:, :, 0], columns=id_cols, dtype=np.int8)

for chunk in chunker(df_train_id, 50000):
    oh_enc.fit(chunk)
    
del df_train_id

del X_train_org
del y_train_org
gc.collect()

## oh_enc fit the test_set
df_test_f = pd.read_csv(fp_test_f, 
                        index_col=None,  dtype={'id':str}, 
                        chunksize=50000, iterator=True)
开发者ID:xiaowanzi123,项目名称:Practice-of-Machine-Learning,代码行数:32,代码来源:gbdt-lr.py

示例9: train_test_split

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size = 0.3)
#X_train, X_train_lr, y_train, y_train_lr = train_test_split(X_train, y_train, test_size = 0.5)
del train


"""
GBDT+LR
"""
print "Performing GBDT+LR"
random_state = np.random.RandomState(520)
lr = LogisticRegression(random_state = random_state)
grd = GradientBoostingClassifier(n_estimators = 10, random_state = random_state)

grd_enc = OneHotEncoder()
grd.fit(X_train, y_train)
grd_enc.fit(grd.apply(X_train)[:, :, 0])
lr.fit(grd_enc.transform(grd.apply(X_train_lr)[:, :, 0]), y_train_lr)
#probas = lr.predict_proba(grd_enc.transform(grd.apply(X_test)[:, :, 0]))
print "Predicting..."
probas_train = lr.predict_proba(grd_enc.transform(grd.apply(X_train)[:, :, 0]))
#probas_validation = lr.predict_proba(grd_enc.transform(grd.apply(X_validation)[:, :, 0]))
#fpr, tpr, thredsholds = roc_curve(y_test, probas[:, 1])
fpr_train, tpr_train, thredsholds_train = roc_curve(y_train, probas_train[:, 1])
#fpr_validation, tpr_validation, thredsholds_validation= roc_curve(y_validation, probas_validation[:, 1])
#roc_auc = auc(fpr, tpr)
roc_auc_train = auc(fpr_train, tpr_train)
#roc_auc_vldt = auc(fpr_validation, tpr_validation)
#plt.plot(fpr, tpr, lw = 1, label = 'AUC: = %0.4f)' % (roc_auc))
print "Plotting!..."
plt.plot(fpr_train, tpr_train, lw = 1, label = 'AUC: = %0.4f)' % (roc_auc_train))
#plt.plot(fpr_validation, tpr_validation, lw = 1, label = 'AUC: = %0.4f)' % (roc_auc_vldt))
开发者ID:RamonYeung,项目名称:ctrip-game6,代码行数:33,代码来源:GBDT_LR.py

示例10: train_test_split

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
X_test = test_data
X_train, X_train_lr, y_train, y_train_lr = train_test_split(X_train,
                                                            y_train,
                                                            test_size=0.5)
# rf_lm.fit(rf_enc.transform(rf.apply(X_train_lr)), y_train_lr)

# y_pred_rf_lm = rf_lm.predict_proba(rf_enc.transform(rf.apply(X_test)))[:, 1]

#sixthforestt
#Encoder and Logestic Regression combined with Gradient Boosting Classifier
n_estimator = 10
grd = GradientBoostingClassifier(n_estimators=n_estimator)
grd_enc = OneHotEncoder()
grd_lm = LogisticRegression()
grd.fit(X_train, y_train)
grd_enc.fit(grd.apply(X_train)[:, :, 0])
grd_lm.fit(grd_enc.transform(grd.apply(X_train_lr)[:, :, 0]), y_train_lr)


# output = grd_lm.predict(test_data).astype(int)
#output = rf_lm.predict(rf_enc.transform(rf.apply(X_test))).astype(int)
output = grd_lm.predict(grd_enc.transform(grd.apply(X_test)[:, :, 0])).astype(int)


'''
#secondforest (in git)
#Cross Validation 
train_size = int(0.7*(train_data.shape[0]))
validation_size = train_data.shape[0] - train_size

X_train = train_data[0:train_size, 1::]
开发者ID:Aliases,项目名称:KaggleProjects,代码行数:33,代码来源:randomforest.py

示例11: RandomForestClassifier

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
# Supervised transformation based on random forests
rf = RandomForestClassifier(max_depth=3, n_estimators=n_estimator)
rf_enc = OneHotEncoder()
rf_lm = LogisticRegression()
rf.fit(X_train, y_train)
rf_enc.fit(rf.apply(X_train))
rf_lm.fit(rf_enc.transform(rf.apply(X_train_lr)), y_train_lr)

y_pred_rf_lm = rf_lm.predict_proba(rf_enc.transform(rf.apply(X_test)))[:, 1]
fpr_rf_lm, tpr_rf_lm, _ = roc_curve(y_test, y_pred_rf_lm)

grd = GradientBoostingClassifier(n_estimators=n_estimator, verbose=1)
grd_enc = OneHotEncoder()
grd_lm = LogisticRegression()
grd.fit(X_train, y_train)
grd_enc.fit(grd.apply(X_train)[:, :, 0])
grd_lm.fit(grd_enc.transform(grd.apply(X_train_lr)[:, :, 0]), y_train_lr)

y_pred_grd_lm = grd_lm.predict_proba(
    grd_enc.transform(grd.apply(X_test)[:, :, 0]))[:, 1]
fpr_grd_lm, tpr_grd_lm, _ = roc_curve(y_test, y_pred_grd_lm)


# The gradient boosted model by itself
y_pred_grd = grd.predict_proba(X_test)[:, 1]
fpr_grd, tpr_grd, _ = roc_curve(y_test, y_pred_grd)


# The random forest model by itself
y_pred_rf = rf.predict_proba(X_test)[:, 1]
fpr_rf, tpr_rf, _ = roc_curve(y_test, y_pred_rf)
开发者ID:cedricoeldorf,项目名称:Binary_classification,代码行数:33,代码来源:random_tree_embedding+all.py

示例12: GradientBoostingClassifier

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import apply [as 别名]
# Spit the data set into roughly 2 halfs
# This allows us to not overfit with stacking
train_x1 = train_X[:30000]
train_x2 = train_X[30000:]


train_y1 = train_y[:30000]
train_y2 = train_y[30000:]

# We are first going to use Graident Boosting to transform
# the data and then using One Hot Encoding.
# After this, we will then try and fit a Logist Regression
grd = GradientBoostingClassifier()
grd_enc = OneHotEncoder()
grd.fit(train_x1, train_y1)
grd_enc.fit(grd.apply(train_x1)[:,:, 0])

grd_lm = LogisticRegression(penalty = 'l2', C = .0115)


grd_lm.fit(grd_enc.transform(grd.apply(train_x2)[:,:, 0]), train_y2)



#Import test data
test_x = []
with open('test_2012.csv', 'r') as f:
	first_row = f.readline()
	headers = first_row.split(',')
	for row in f:
		ints = [int(elem) for elem in row.split(',')]
开发者ID:SpencerStrumwasser,项目名称:CS-155-2017---Miniproject-1,代码行数:33,代码来源:GBC_LR.py


注:本文中的sklearn.ensemble.GradientBoostingClassifier.apply方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。