当前位置: 首页>>代码示例>>Python>>正文


Python GradientBoostingClassifier.predict_proba方法代码示例

本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.predict_proba方法的具体用法?Python GradientBoostingClassifier.predict_proba怎么用?Python GradientBoostingClassifier.predict_proba使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.GradientBoostingClassifier的用法示例。


在下文中一共展示了GradientBoostingClassifier.predict_proba方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: ensembleGBM

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def ensembleGBM(derived_data_path, X_train, Y_train, X_test, seed=60):
    random.seed(seed)
    GBM1 = GradientBoostingClassifier(n_estimators = 1500, learning_rate = 0.008, min_samples_leaf = 5, max_features=0.2, max_depth=7)
    GBM2 = GradientBoostingClassifier(n_estimators = 1700, learning_rate = 0.007, min_samples_leaf = 5, max_features=0.2, max_depth=7)
    GBM3 = GradientBoostingClassifier(n_estimators = 1600, learning_rate = 0.0075, min_samples_leaf = 5, max_features=0.2, max_depth=7)
    GBM4 = GradientBoostingClassifier(n_estimators = 1650, learning_rate = 0.007, min_samples_leaf = 5, max_features=0.2, max_depth=8)
    GBM5 = GradientBoostingClassifier(n_estimators = 1750, learning_rate = 0.00725, min_samples_leaf = 6, max_features=0.2, max_depth=7)
    GBM6 = GradientBoostingClassifier(n_estimators = 1550, learning_rate = 0.00775, min_samples_leaf = 4, max_features=0.2, max_depth=7)
    GBM7 = GradientBoostingClassifier(n_estimators = 1850, learning_rate = 0.00725, min_samples_leaf = 5, max_features=0.2, max_depth=6)

    print "Running Model 1"
    GBM1.fit(X_train, Y_train)
    print "Running Model 2"
    GBM2.fit(X_train, Y_train)
    print "Running Model 3"
    GBM3.fit(X_train, Y_train)
    print "Running Model 4"
    GBM4.fit(X_train, Y_train)
    print "Running Model 5"
    GBM5.fit(X_train, Y_train)
    print "Running Model 6"
    GBM6.fit(X_train, Y_train)
    print "Running Model 7"
    GBM7.fit(X_train, Y_train)
    
    GBMClassifiers = [GBM1, GBM2, GBM3, GBM4, GBM5, GBM6, GBM7]
    saveObject(derived_data_path, 'GBM_classifiers.obj', GBMClassifiers)
    
    combine = float(1)/7*(GBM1.predict_proba(X_test)[:,1] + GBM2.predict_proba(X_test)[:,1] + GBM3.predict_proba(X_test)[:,1] +GBM4.predict_proba(X_test)[:,1] +GBM5.predict_proba(X_test)[:,1] + GBM6.predict_proba(X_test)[:,1] + GBM7.predict_proba(X_test)[:,1])

    return combine
开发者ID:pkravik,项目名称:kaggle,代码行数:33,代码来源:main.py

示例2: gbdt_solver

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def gbdt_solver(train_data, train_label, validation, test, unlabel, dimreduce=decomposition.undo):
    """
    """
    # train_data = train_data[:100,:]
    # train_label = train_label[:100]

    logging.info("begin to train the gbdt classifier")
    new_train_data, new_val, new_test, new_unlabel = dimreduce(train_data, train_label, validation, test, unlabel)
    logging.info("finished feature extracting")

    """
    gb = GradientBoostingClassifier ()
    params_gbdt = {"n_estimators":[100,200,500,1000],
                 "learning_rate":[0.02,0.03,0.05,0.1],
                 "max_depth":[3,5,7,9],
                 "random_state":[1000000007]}"""

    # rand_search_result = GridSearchCV (gb, param_grid = params_gbdt , n_jobs = 3  , cv = 3, scoring = 'roc_auc')
    # rand_search_result = RandomizedSearchCV (gb, param_distributions = params_gbdt, n_jobs = 3, cv = 3, n_iter = 100, scoring = 'roc_auc')
    # rand_search_result.fit (new_train_data , train_label)
    # params = tools.report (rand_search_result.grid_scores_)

    params = {
        "n_estimators": 600,
        "learning_rate": 0.03,
        "random_state": 1000000007,
        "max_depth": 2,
        "warm_start": True,
    }
    gb = GradientBoostingClassifier(**params)
    gb.fit(new_train_data, train_label)
    joblib.dump(gb, ROOT + "/result/gbdt.pkl")
    evaluate.get_auc(gb.predict_proba(new_val)[:, 1])
    return gb.predict_proba(new_test)[:, 1]
开发者ID:cxlove,项目名称:RPPredict,代码行数:36,代码来源:gbdt.py

示例3: ctr_gbdt

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def ctr_gbdt(model='sklearn-clicklog', from_cache=False, train_dataset_length=100000, test_dataset_length=100000):
    TRAIN_FILE, TEST_FILE = create_dataset(model, from_cache, train_dataset_length, test_dataset_length)

    prediction_model = GradientBoostingClassifier(
        loss='deviance',
        learning_rate=0.1,
        n_estimators=30,
        subsample=1.0,
        min_samples_split=2,
        min_samples_leaf=1,
        min_weight_fraction_leaf=0.0,
        max_depth=5,
    )

    x_train, y_train = clean_data(TRAIN_FILE)
    x_test, y_test = clean_data(TEST_FILE)

    with Timer('fit model'):
        prediction_model.fit(x_train, y_train)

    with Timer('evaluate model'):
        y_prediction_train = prediction_model.predict_proba(x_train)
        y_prediction_test = prediction_model.predict_proba(x_test)

        loss_train = log_loss(y_train, y_prediction_train)
        loss_test = log_loss(y_test, y_prediction_test)

    print 'loss_train: %s' % loss_train
    print 'loss_test: %s' % loss_test
开发者ID:kazarinov,项目名称:hccf,代码行数:31,代码来源:sklearn_experiments.py

示例4: predict

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def predict(fea, df, t, t9):
    Un = df.columns == 'Blank'
    for f in Fea:
        '''        
        try:
            df[(f+'_y')] = df[(f+'_x')] - df[(f+'_y')]
            print(1)
        except:
            pass
        '''
        Un = Un | (df.columns == f)
        Un = Un | (df.columns == (f+'_x'))
        Un = Un | (df.columns == (f+'_y'))
    Un = Un & (df.columns != 'New_y')    
    clf = GradientBoostingClassifier()
    y = df[t].label
    X = df[t].ix[:,Un]
    X_train, X_test, y_train, y_test=train_test_split(X, y, test_size = 0.9, random_state = 1)
    clf.fit(X_train, y_train)
    re = 'Testing AUC: \t' + str(roc_auc_score(y_test,clf.predict_proba(X_test)[:,1]))  
    print re
    re =  'September AUC: \t' + str(roc_auc_score(df[t9].label,clf.predict_proba(df[t9].ix[:,Un])[:,1]))
    print re
    print(X.columns)
    print(clf.feature_importances_)
    return Un, clf
开发者ID:duxuhao,项目名称:wo_plus,代码行数:28,代码来源:ThreeMonth2.py

示例5: TestGradientBoostingClassifierConverter

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
class TestGradientBoostingClassifierConverter(TestCase):
    def setUp(self):
        np.random.seed(1)
        self.est = GradientBoostingClassifier(max_depth=2, n_estimators=10)
        self.est.fit([[0, 0], [0, 1], [1, 0], [1, 1]], [0, 1, 1, 1])
        self.ctx = TransformationContext(
            {
                Schema.INPUT: [IntegerNumericFeature("x1"), StringCategoricalFeature("x2", ["zero", "one"])],
                Schema.MODEL: [IntegerNumericFeature("x1"), StringCategoricalFeature("x2", ["zero", "one"])],
                Schema.DERIVED: [],
                Schema.OUTPUT: [IntegerCategoricalFeature("output", [0, 1])],
            }
        )
        self.converter = GradientBoostingConverter(estimator=self.est, context=self.ctx)

    def test_transform(self):
        p = self.converter.pmml()
        mm = p.MiningModel[0]
        assert mm.MiningSchema is not None, "Missing mining schema"
        assert len(mm.MiningSchema.MiningField) == 2, "Wrong number of mining fields"
        assert mm.Segmentation is not None, "Missing segmentation root"

    def test_transform_with_verification(self):
        p = self.converter.pmml(
            [
                {"x1": 0, "x2": "zero", "output": self.est.predict_proba([[0, 0]])[0, 1]},
                {"x1": 0, "x2": "one", "output": self.est.predict_proba([[0, 1]])[0, 1]},
                {"x1": 1, "x2": "zero", "output": self.est.predict_proba([[1, 0]])[0, 1]},
                {"x1": 1, "x2": "one", "output": self.est.predict_proba([[1, 1]])[0, 1]},
            ]
        )
        mm = p.MiningModel[0]
        assert mm.MiningSchema is not None, "Missing mining schema"
        assert len(mm.MiningSchema.MiningField) == 2, "Wrong number of mining fields"
        assert mm.Segmentation is not None, "Missing segmentation root"
开发者ID:gitter-badger,项目名称:sklearn-pmml,代码行数:37,代码来源:test_gradientBoostingConverter.py

示例6: GB_Classifier

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def GB_Classifier(X_train, X_cv, X_test, Y_train,Y_cv,Y_test, Actual_DS):
    print("***************Starting Gradient Boosting***************")
    t0 = time()
    clf = GradientBoostingClassifier(n_estimators=500,learning_rate=0.01)
    clf.fit(X_train, Y_train)
    preds = clf.predict(X_cv)
    score = clf.score(X_cv,Y_cv)

    print("Gradient Boosting - {0:.2f}%".format(100 * score))
    Summary = pd.crosstab(label_enc.inverse_transform(Y_cv), label_enc.inverse_transform(preds),
                      rownames=['actual'], colnames=['preds'])
    Summary['pct'] = (Summary.divide(Summary.sum(axis=1), axis=1)).max(axis=1)*100
    print(Summary)

    #Check with log loss function
    epsilon = 1e-15
    #ll_output = log_loss_func(Y_cv, preds, epsilon)
    preds2 = clf.predict_proba(X_cv)
    ll_output2= log_loss(Y_cv, preds2, eps=1e-15, normalize=True)
    print(ll_output2)

    print("done in %0.3fs" % (time() - t0))

    preds3 = clf.predict_proba(X_test)
    #preds4 = clf.predict_proba((Actual_DS.ix[:,'feat_1':]))
    preds4 = clf.predict_proba(Actual_DS)

    print("***************Ending Gradient Boosting***************")
    return pd.DataFrame(preds2),pd.DataFrame(preds3),pd.DataFrame(preds4)
开发者ID:roshankr,项目名称:DS_Competition,代码行数:31,代码来源:Otto_Classification.py

示例7: ensembleGBMTest

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def ensembleGBMTest(derived_data_path, X_train, Y_train, X_test, Y_test):
    random.seed(60)
    GBM1 = GradientBoostingClassifier(n_estimators = 1500, learning_rate = 0.008, min_samples_leaf = 5, max_features=0.2, max_depth=7)
    GBM2 = GradientBoostingClassifier(n_estimators = 1700, learning_rate = 0.007, min_samples_leaf = 5, max_features=0.2, max_depth=7)
    GBM3 = GradientBoostingClassifier(n_estimators = 1600, learning_rate = 0.0075, min_samples_leaf = 5, max_features=0.2, max_depth=7)
    GBM4 = GradientBoostingClassifier(n_estimators = 1650, learning_rate = 0.007, min_samples_leaf = 5, max_features=0.2, max_depth=8)
    GBM5 = GradientBoostingClassifier(n_estimators = 1750, learning_rate = 0.00725, min_samples_leaf = 6, max_features=0.2, max_depth=7)
    GBM6 = GradientBoostingClassifier(n_estimators = 1550, learning_rate = 0.00775, min_samples_leaf = 4, max_features=0.2, max_depth=7)
    GBM7 = GradientBoostingClassifier(n_estimators = 1850, learning_rate = 0.00725, min_samples_leaf = 5, max_features=0.2, max_depth=6)

    GBM1.fit(X_train, Y_train)
    GBM2.fit(X_train, Y_train)
    GBM3.fit(X_train, Y_train)
    GBM4.fit(X_train, Y_train)
    GBM5.fit(X_train, Y_train)
    GBM6.fit(X_train, Y_train)
    GBM7.fit(X_train, Y_train)
    
    print "GBM1: %f" % (gini(GBM1, X_test, Y_test))
    print "GBM2: %f" % (gini(GBM2, X_test, Y_test))
    print "GBM3: %f" % (gini(GBM3, X_test, Y_test))
    print "GBM4: %f" % (gini(GBM4, X_test, Y_test))
    print "GBM5: %f" % (gini(GBM5, X_test, Y_test))
    print "GBM6: %f" % (gini(GBM6, X_test, Y_test))
    print "GBM7: %f" % (gini(GBM7, X_test, Y_test))
    
    #now combine!
    combine = GBM1.predict_proba(X_test)[:,1] + GBM2.predict_proba(X_test)[:,1] + GBM3.predict_proba(X_test)[:,1] +GBM4.predict_proba(X_test)[:,1] +GBM5.predict_proba(X_test)[:,1] 
    combine = combine + GBM6.predict_proba(X_test)[:,1] + GBM7.predict_proba(X_test)[:,1]
    print "With our powers combined: %f" % (giniNoEstimator(Y_test, combine))

    GBMClassifiers = [GBM1, GBM2, GBM3, GBM4, GBM5, GBM6, GBM7]
    saveObject(derived_data_path, 'GBM_classifiers.obj', GBMClassifiers)
开发者ID:pkravik,项目名称:kaggle,代码行数:35,代码来源:main.py

示例8: predict

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def predict(fea1,fea2, df, t, t9):
    n = 0
    weight = [0.73,0.27]
    tave = np.zeros(len(df[t9]))
    y = df[t].label
    X_1 = df[t]
    df9 = df[t9]
    for fea in [fea1,fea2]:
        Un = df.columns == 'Blank'
        for f in fea:
            Un = Un | (df.columns == f)
            Un = Un | (df.columns == (f+'_x'))
            Un = Un | (df.columns == (f+'_y'))
        Un = Un & (df.columns != 'quarterly_attrition_rate_y')
        clf = GradientBoostingClassifier()
        X = X_1.ix[:,Un]
        X_train, X_test, y_train, y_test=train_test_split(X, y, test_size = 0.9, random_state = 1)  
        min_max_scaler = preprocessing.MinMaxScaler()
        clf.fit(min_max_scaler.fit_transform(X_train), y_train)
        re = 'Testing AUC: \t' + str(roc_auc_score(y_test,clf.predict_proba(min_max_scaler.transform(X_test))[:,1]))
        print re
        t = clf.predict_proba(min_max_scaler.fit_transform(df9.ix[:,Un]))[:,1]
        re =  'September AUC: \t' + str(roc_auc_score(df9.label,t))
        print re
        tave = t * weight[n] + tave
        n += 1
        
    
    print '-' * 30
    print(weight)
    print 'Total AUC'
    re =  'September AUC: \t' + str(roc_auc_score(df9.label,tave))
    print re
    return Un, clf
开发者ID:duxuhao,项目名称:wo_plus,代码行数:36,代码来源:ThreeMonth.py

示例9: main

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def main():
    
    train_f = pd.read_csv(train_path, header=0, parse_dates=['Dates'])
    print train_f.dtypes

    X, Y = get_feature(train_f, "training_set")
    

    ### TRAINING
    clf = GradientBoostingClassifier(n_estimators=50)
    # clf = RandomForestClassifier(n_estimators=2)
    # clf = LogisticRegression(n_jobs=4)

    X, Y = shuffle_XY(X, Y)
    data_len = len(X)
    train_len = data_len * 95 / 100 
    val_len = data_len - train_len
    X_train = X[:train_len]
    X_val = X[train_len:]
    Y_train = Y[:train_len]
    Y_val = Y[train_len:]
    
    clf = clf.fit(X_train, Y_train)
    print "Training done"

    
    val_acc = clf.score(X_val, Y_val)
    print "Val acc:", val_acc

    val_pred = clf.predict_proba(X_val)
    

    # print max(Y_val), min(Y_val)
    # print Y_val, Y_val + 1
    val_log = 0.0
    cnt = 0
    for y in Y_val:
        val_log += math.log(val_pred[cnt, y]+0.0000001)
        cnt += 1
    val_log =  - val_log / len(Y_val)
    print "Val log loss:", val_log
 
    # print "Val loss:", log_loss(Y_val+1, val_pred) # Note the +1 here!
    """
    # scores = cross_val_score(clf, X, Y)
    # print "Cross val acc:", scores.mean()
    """

    ### Testing

    test_f = pd.read_csv(test_path, header=0, parse_dates=['Dates'])
    # print test_f.dtypes

    X_test, _ = get_feature(test_f, "test_set")
    Y_test = clf.predict_proba(X_test)

    ### Write results
    # write_results(Y_test)
    write_results_prob(Y_test)
开发者ID:ruoyanwang,项目名称:datasci,代码行数:61,代码来源:gradient_boosting_tree.py

示例10: MyGradientBoost

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
class MyGradientBoost(MyClassifier):
    def __init__(self, params=dict()):
        self._params = params
        self._gb = GradientBoostingClassifier(**(self._params))

    def update_params(self, updates):
        self._params.update(updates)
        self._gb = GradientBoostingClassifier(**(self._params))

    def fit(self, Xtrain, ytrain):
        self._gb.fit(Xtrain, ytrain)

    # def predict(self, Xtest, option = None):
    #   return self._gb.predict(Xtest)

    def predict_proba(self, Xtest, option = None):
        return self._gb.predict_proba(Xtest)[:, 1]

    def predict_proba_multi(self, Xtest, option = None):
        return self._gb.predict_proba(Xtest)

    def plt_feature_importance(self, fname_list, f_range = list()):
        importances = self._gb.feature_importances_

        std = np.std([tree[0].feature_importances_ for tree in self._gb.estimators_], axis=0)
        indices = np.argsort(importances)[::-1]

        fname_array = np.array(fname_list)

        if not f_range:
            f_range = range(indices.shape[0])

        n_f = len(f_range)

        plt.figure()
        plt.title("Gradient Boost Feature importances")
        plt.barh(range(n_f), importances[indices[f_range]],
               color="b", xerr=std[indices[f_range]], ecolor='k',align="center")
        plt.yticks(range(n_f), fname_array[indices[f_range]])
        plt.ylim([-1, n_f])
        plt.show()    

    def list_feature_importance(self, fname_list, f_range = list(), return_list = False):
        importances = self._gb.feature_importances_
        indices = np.argsort(importances)[::-1]

        print 'Gradient Boost feature ranking:'

        if not f_range :
            f_range = range(indices.shape[0])

        n_f = len(f_range)

        for i in range(n_f):
            f = f_range[i]
            print '{0:d}. feature[{1:d}]  {2:s}  ({3:f})'.format(f + 1, indices[f], fname_list[indices[f]], importances[indices[f]])

        if return_list:
            return [indices[f_range[i]] for i in range(n_f)]
开发者ID:tonyzhangrt,项目名称:wklearn,代码行数:61,代码来源:learner.py

示例11: do_gbdt4

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def do_gbdt4(train_x, train_y, test_x=None, test_y=None, learning_rate=0.03, max_depth=8, max_features=25,
            n_estimators=600, load=False, save=True, outfile=None, search=False, log=False):
    if search == False:
        if log==True:
            mdl_name = 'gbdt_log_train_lr' + str(learning_rate) + '_n' + str(n_estimators) + '_maxdep' + str(max_depth) + '.pkl'
        else:
            mdl_name = 'gbdt_train_lr' + str(learning_rate) + '_n' + str(n_estimators) + '_maxdep' + str(max_depth) + '.pkl'
        if os.path.exists(mdl_name) == True:
            clf_gbdt = joblib.load(mdl_name)
        else:
            # create gradient boosting
            clf_gbdt = GradientBoostingClassifier(learning_rate=learning_rate, max_depth=max_depth,
                                                  max_features=max_features, n_estimators=n_estimators)
            #n_estimators=500, learning_rate=0.5, max_depth=3)
            clf_gbdt.fit(train_x, train_y)
            if save == True:
                try:
                    _ = joblib.dump(clf_gbdt, mdl_name, compress=1)
                except:
                    print("*** Save GBM model to pickle failed!!!")
                    if outfile != None:
                        outfile.write("*** Save RF model to pickle failed!!!")
        if test_x != None and test_y != None:
            probas_gbdt = clf_gbdt.predict_proba(test_x)[:, 1]
            score_gbdt = roc_auc_score(test_y, probas_gbdt)
            print("GBDT ROC score", score_gbdt)
        return clf_gbdt
    else:
        max_depth_list = [ 6, 7, 8, 9, 10]
        n_list = [2000]
        lr_list = [0.005,0.003]
        max_feat_list = [15, 16, 17, 18, 20]
        info = {}
        for md in max_depth_list:
            for n in n_list:
                for lr in lr_list:
                  for mf in max_feat_list:
                    print 'max_depth = ', md
                    print 'n = ', n
                    print 'learning rate = ', lr
                    print 'max feature = ', mf
                    # n_estimators=500, learning_rate=0.5, max_depth=3)
                    mdl_name = 'gbdt_n'+str(n)+'_lr'+str(lr)+'_md'+str(md)+'mf'+str(mf)+'.pkl'
                    if os.path.exists(mdl_name) == True:
                        clf_gbdt = joblib.load(mdl_name)        
                    else:
                        clf_gbdt = GradientBoostingClassifier(learning_rate=learning_rate, max_depth=md,max_features=mf, n_estimators=n_estimators)
                        clf_gbdt.fit(train_x, train_y)
                        _ = joblib.dump(clf_gbdt, mdl_name, compress=1)
                    probas_gbdt = clf_gbdt.predict_proba(test_x)[:, 1]
                    score_gbdt = roc_auc_score(test_y, probas_gbdt)
                    info[md, n, lr, mf] = score_gbdt
        for md in info:
            scores = info[md]
            print('GBDT max_depth = %d, n = %d, lr = %.5f, max_feature = %d, ROC score = %.5f(%.5f)' % (
                md[0], md[1], md[2], md[3], scores.mean(), scores.std()))
开发者ID:qi-feng,项目名称:ClassificationUsingScikitLearn,代码行数:58,代码来源:kdd_model.py

示例12: gb

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def gb(train_data,train_label,val_data,val_label,test_data,name="GradientBoosting_submission.csv"):
	print "start training GradientBoosting..."
	gbClf = GradientBoostingClassifier()       # params: by default
	gbClf.fit(train_data,train_label)
	#evaluate on validation set
	val_pred_label = gbClf.predict_proba(val_data)
	logloss = preprocess.evaluation(val_label,val_pred_label)
	print "logloss of validation set:",logloss

	print "Start classify test set..."
	test_label = gbClf.predict_proba(test_data)
	preprocess.saveResult(test_label,filename = name)
开发者ID:9627872,项目名称:OpenDL,代码行数:14,代码来源:GradientBoosting.py

示例13: classify2

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def classify2(dis_data, numeric_data, t_label):
    fold = 5
    skf = StratifiedKFold(t_label, fold)
    roc_auc = 0  
    f1_score_value = 0

    clf1 = LogisticRegression()
    clf2 = GradientBoostingClassifier()
#    clf3 = tree.DecisionTreeClassifier(max_depth=500, max_leaf_nodes= 500, class_weight={1:12})
    clf3 = GradientBoostingClassifier()
    
    for train, test in skf:
        clf3 = clf3.fit(dis_data.iloc[train], t_label.iloc[train])
        
        #compute auc
        probas_  = clf3.predict_proba(dis_data.iloc[test])
        fpr, tpr, thresholds = roc_curve(t_label.iloc[test], probas_[:, 0])
        roc_auc += auc(fpr, tpr)    
        
        #compute f1_score
        label_pred = clf3.predict(dis_data.iloc[test])
        
        f1_score_value += f1_score(t_label.iloc[test], label_pred, pos_label= 1)
        
    return roc_auc / fold, f1_score_value / fold     
开发者ID:HY201,项目名称:KDD-CUP,代码行数:27,代码来源:FinalHomework.py

示例14: machineLearning

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def machineLearning(X, Y_parameters,  predict_value, writer):
    X_parameters = X
    clf1 = LinearSVR()
    clf2 = LinearRegression()
    clf3 = RandomForestClassifier()
    clf4 = LogisticRegression()
    clf5 = DecisionTreeClassifier()
    clf6 = GradientBoostingClassifier()
    ##clf1.fit(X_parameters, Y_parameters)
    #clf2.fit(X_parameters, Y_parameters)
    #clf3.fit(X_parameters, Y_parameters)
    clf4.fit(X_parameters, Y_parameters)
    #clf5.fit(X_parameters, Y_parameters)
    clf6.fit(X_parameters, Y_parameters)
    print "finish fitting"
    answer = []
    for line in predict_value:
        line1 = line[1:]
        #predict_outcome1 = clf1.predict(line1)
        #predict_outcome2 = clf2.predict(line1)
        #predict_outcome3 = clf3.predict_proba(line1)
        predict_outcome4 = clf4.predict_proba(line1)
        #predict_outcome5 = clf5.predict_proba(line1)
        predict_outcome6 = clf6.predict_proba(line1)
        #value1 = predict_outcome1[0]
        #value2 = predict_outcome2[0]
        #value3 = predict_outcome3[0][1]
        value4 = predict_outcome4[0][1]
        #value5 = predict_outcome5[0][1]
        value6 = predict_outcome6[0][1]
        data =  (value4+value6)/2
        writer.writerow([line[0],data])
    print "finish learning"
开发者ID:frankxu2004,项目名称:dropout-mooc-kddcup2015,代码行数:35,代码来源:train.py

示例15: train

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def train():
    posi_result = {}
    train_feature, test_feature, train_id_list, test_id_list, train_tar_list = merge_feature(feature_str)
    tmp1 = [m < 32 for m in trainTarList]
    tmp1 = np.array(tmp1)
    # train_feature = train_feature[tmp1]
    target_list = np.array(trainTarList)
    target_list = target_list[tmp1]
    # train_id_list = np.array(train_id_list)
    # train_id_list = train_id_list[tmp1]
    c_feature = trainFeature.columns[:]
    clf1 = RandomForestClassifier(n_estimators=200, min_samples_split=17)
    clf1.fit(trainFeature[c_feature], target_list)
    # rf_preds = clf1.predict(test_feature)
    rf_prob = clf1.predict_proba(test_feature)
    gbdt1 = GradientBoostingClassifier(n_estimators=150, min_samples_split=17)
    gbdt1.fit(trainFeature[c_feature], target_list)
    # gbdt_preds = gbdt1.predict(test_feature)
    gbdt_prob = gbdt1.predict_proba(test_feature)
    all_prob = rf_prob + gbdt_prob
    all_preds = []
    print all_prob.shape
    for k in range(all_prob.shape[0]):
        prob1 = list(allProb[k, :])
        ind1 = prob.index(max(prob1))
        allPreds.append(ind1)
    for j in range(len(all_preds)):
        all_pre_name = dl.get_num_position(all_preds[j])
        posi_result[test_id_list[j]] = all_pre_name
    return posi_result
开发者ID:yinzhao0312,项目名称:position-predict,代码行数:32,代码来源:posi_predict.py


注:本文中的sklearn.ensemble.GradientBoostingClassifier.predict_proba方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。