Python XGBClassifier.fit方法代码示例

本文整理汇总了Python中xgboost.XGBClassifier.fit方法的典型用法代码示例。如果您正苦于以下问题：Python XGBClassifier.fit方法的具体用法？Python XGBClassifier.fit怎么用？Python XGBClassifier.fit使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类xgboost.XGBClassifier的用法示例。

在下文中一共展示了XGBClassifier.fit方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: kfold_cv

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import fit [as 别名]
def kfold_cv(X_train, y_train,idx,k):

    kf = StratifiedKFold(y_train,n_folds=k)
    xx=[]
    count=0
    for train_index, test_index in kf:
        count+=1
        X_train_cv, X_test_cv = X_train[train_index,:],X_train[test_index,:]
        gc.collect()
        y_train_cv, y_test_cv = y_train[train_index],y_train[test_index]
        y_pred=np.zeros(X_test_cv.shape[0])
        m=0
         
        for j in range(m):
            clf=xgb_classifier(eta=0.05,min_child_weight=20,col=0.5,subsample=0.7,depth=7,num_round=400,seed=j*77,gamma=0.1)
            y_pred+=clf.train_predict(X_train_cv,(y_train_cv),X_test_cv,y_test=(y_test_cv))
            yqq=y_pred*(1.0/(j+1))

            print j,llfun(y_test_cv,yqq)

        #y_pred/=m;
        clf=XGBClassifier(max_depth=10,colsample_bytree=0.8,learning_rate=0.02,n_estimators=500,nthread=-1)
        #clf=RandomForestClassifier(n_jobs=-1,n_estimators=100,max_depth=100)
        clf.fit(X_train_cv,(y_train_cv),eval_metric="logloss",eval_set=[(X_test_cv, y_test_cv)])
        y_pred=clf.predict_proba(X_test_cv).T[1]
        print y_pred.shape
        xx.append(llfun(y_test_cv,(y_pred)))
        ypred=y_pred
        yreal=y_test_cv
        idx=idx[test_index]
        print xx[-1]#,y_pred.shape
        break

    print xx,'average:',np.mean(xx),'std',np.std(xx)
    return ypred,yreal,idx#np.mean(xx)

开发者ID:daxiongshu，项目名称:bnp，代码行数:37，代码来源:ada7.py

示例2: cv

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import fit [as 别名]
def cv(X_train, y_train, features_inner):

    kfold = StratifiedKFold(n_splits=5, shuffle=True)

    scores_f = []
    scores_p = []
    scores_r = []

    for train, test in kfold.split(X_train, y_train):

        model = XGBClassifier()
        X_train_cv = pd.DataFrame(X_train.values[train], columns=X_train.columns)
        y_train_cv = pd.DataFrame(y_train.values[train], columns=["tred_cutoff"])
        X_test_cv = pd.DataFrame(X_train.values[test], columns=X_train.columns)
        y_test_cv = pd.DataFrame(y_train.values[test], columns=["tred_cutoff"])
        model.fit(X_train_cv, y_train_cv)

        y_pred = model.predict(X_test_cv)

        s_f = f1_score(y_test_cv, y_pred)
        s_p = precision_score(y_test_cv, y_pred)
        s_r = recall_score(y_test_cv, y_pred)
        print("\tscores f1", (s_f))
        print("\tscores p", (s_p))
        print("\tscores r", (s_r))
        scores_f.append(s_f)
        scores_p.append(s_p)
        scores_r.append(s_r)

    print("mean scores f1", np.mean(scores_f))
    print("mean scores p", np.mean(scores_p))
    print("mean scores r", np.mean(scores_r))

开发者ID:nogur9，项目名称:PTSD，代码行数:34，代码来源:tred+as+target.py

示例3: xgboostcv

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import fit [as 别名]
def xgboostcv(max_depth,
              learning_rate,
              n_estimators,
              subsample,
              colsample_bytree,
              gamma,
              min_child_weight,
              silent=True,
              nthread=-1,
              seed=1234):

    clf = XGBClassifier(max_depth=int(max_depth),
                        learning_rate=learning_rate,
                        n_estimators=int(n_estimators),
                        silent=silent,
                        nthread=nthread,
                        subsample=subsample,
                        colsample_bytree=colsample_bytree,
                        gamma=gamma,
                        min_child_weight = min_child_weight,
                        seed=seed,
                        objective="binary:logistic")

    clf.fit(x0, y0, eval_metric="logloss", eval_set=[(x1, y1)],early_stopping_rounds=25)
    ll = -log_loss(y1, clf.predict_proba(x1))
    return ll

开发者ID:mpearmain，项目名称:bnp，代码行数:28，代码来源:xgb_autotune.py

示例4: feature_selection

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import fit [as 别名]
def feature_selection(model, X_train, X_test, y_train, y_test, eval_metric='auc'):
    thresholds = [thres for thres in sorted(model.feature_importances_) if thres != 0]  # Use feat. with >0 importance

    roc_scores = {}
    for thresh in thresholds:  # select features using threshold

        selection = SelectFromModel(model, threshold=thresh, prefit=True)
        select_X_train = selection.transform(X_train)

        selection_model = XGBClassifier()  # train model
        selection_model.fit(select_X_train, y_train, eval_metric=eval_metric)

        select_X_test = selection.transform(X_test)  # eval model
        y_pred = selection_model.predict(select_X_test)

        roc = roc_auc_score(y_test, y_pred)
        roc_scores[selection.threshold] = roc

    best_thresh = max(roc_scores, key=roc_scores.get)

    fs = SelectFromModel(model, threshold=best_thresh, prefit=True)
    pickle_model(fs, 'feature.select')
    X_train_trans_ = fs.transform(X_train)
    X_test_trans_ = fs.transform(X_test)
    print 'total features kept: {}'.format(X_train_trans_.shape[1])

    return X_train_trans_, X_test_trans_

开发者ID:jmc856，项目名称:Webpage，代码行数:29，代码来源:predict.py

示例5: xgboost_classifier

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import fit [as 别名]
 def xgboost_classifier(self):
     cls = XGBClassifier()
     print 'xgboost cross validation score', cross_val_score(cls,self.x_data,self.y_data)
     start_time = time.time()
     cls.fit(self.x_train, self.y_train)
     print 'score', cls.score(self.x_test, self.y_test)
     print 'time cost', time.time() - start_time

开发者ID:AloneGu，项目名称:ml_algo_box，代码行数:9，代码来源:classifier_benchmark.py

示例6: test_predict_sklearn_pickle

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import fit [as 别名]
    def test_predict_sklearn_pickle(self):
        x, y = build_dataset()

        kwargs = {'tree_method': 'gpu_hist',
                  'predictor': 'gpu_predictor',
                  'verbosity': 2,
                  'objective': 'binary:logistic',
                  'n_estimators': 10}

        model = XGBClassifier(**kwargs)
        model.fit(x, y)

        save_pickle(model, "model.pkl")
        del model

        # load model
        model: xgb.XGBClassifier = load_pickle("model.pkl")
        os.remove("model.pkl")

        gpu_pred = model.predict(x, output_margin=True)

        # Switch to CPU predictor
        bst = model.get_booster()
        bst.set_param({'predictor': 'cpu_predictor'})
        cpu_pred = model.predict(x, output_margin=True)
        np.testing.assert_allclose(cpu_pred, gpu_pred, rtol=1e-5)

开发者ID:dmlc，项目名称:xgboost，代码行数:28，代码来源:test_pickling.py

示例7: test_predict_sklearn_pickle

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import fit [as 别名]
    def test_predict_sklearn_pickle(self):
        X,y = makeXy()
        Xtest = makeXtest()

        from xgboost import XGBClassifier
        kwargs={}
        kwargs['tree_method'] = 'gpu_hist'
        kwargs['predictor'] = 'gpu_predictor'
        kwargs['silent'] = 0
        kwargs['objective'] = 'binary:logistic'

        model = XGBClassifier(**kwargs)
        model.fit(X,y)
        print(model)

        # pickle model
        save_obj(model,"model.pkl")
        # delete model
        del model
        # load model
        model = load_obj("model.pkl")
        os.remove("model.pkl")

        # continue as before
        print("Before model.predict")
        sys.stdout.flush()
        tmp = time.time()
        gpu_pred = model.predict(Xtest, output_margin=True)
        print(gpu_pred)
        print("E non-zeroes: %d:" % (np.count_nonzero(gpu_pred)))
        print("E GPU Time to predict = %g" % (time.time() - tmp))

开发者ID:wamsiv，项目名称:h2o4gpu，代码行数:33，代码来源:test_gpu_prediction_pickledmodel.py

示例8: get_xgb_feature_importance_plot

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import fit [as 别名]
def get_xgb_feature_importance_plot(best_param_, experiment_, 
                                    png_folder,
                                    png_fname,
                                    score_threshold=0.8):

    # 1. 
    train_X, train_y = experiment_.get_train_data()
    clf = XGBClassifier()
    try:
        del best_param_['model_type']
    except:
        pass
    clf.set_params(**best_param_)
    clf.fit(train_X, train_y)
    index2feature = clf.booster().get_fscore()
    fis = pd.DataFrame({'name':index2feature.keys(),
                        'score':index2feature.values()})
    fis = fis.sort('score', ascending=False)
    if len(fis.index) > 20:
        score_threshold = fis['score'][fis['score'] > 0.0].quantile(score_threshold)
        #where_str = 'score > %f & score > %f' % (score_threshold, 0.0)
        where_str = 'score >= %f' % (score_threshold)
        fis = fis.query(where_str)

    # 2. plot
    #gs = GridSpec(2,2)
    #ax1 = plt.subplot(gs[:,0])
    #ax2 = plt.subplot(gs[0,1])
    #ax3 = plt.subplot(gs[1,1])

    # 3.1 feature importance
    sns.barplot(x = 'score', y = 'name',
                data = fis,
                #ax=ax1,
                color="blue")
    #plt.title("Feature_Importance", fontsize=10)
    plt.ylabel("Feature", fontsize=10)
    plt.xlabel("Feature_Importance : f-Score", fontsize=10)

    """
    # 3.2 PDF
    confidence_score = clf.oob_decision_function_[:,1]
    sns.distplot(confidence_score, kde=False, rug=False, ax=ax2)
    ax2.set_title("PDF")

    # 3.3 CDF
    num_bins = min(best_param_.get('n_estimators',1), 100)
    counts, bin_edges = np.histogram(confidence_score, bins=num_bins, normed=True)
    cdf = np.cumsum(counts)
    ax3.plot(bin_edges[1:], cdf / cdf.max())
    ax3.set_title("CDF")
    ax3.set_xlabel("Oob_Decision_Function:Confidence_Score", fontsize=10)
    """

    png_fname = os.path.join(Config.get_string('data.path'), 'graph', png_fname)
    plt.tight_layout()
    plt.savefig(png_fname)#, bbox_inches='tight', pad_inches=1)
    plt.close()

    return True

开发者ID:Quasi-quant2010，项目名称:Stacking，代码行数:62，代码来源:run_xgb_param_search.py

示例9: XGB_model

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import fit [as 别名]
def XGB_model(train,y):
	model=XGBClassifier(n_estimators=150, learning_rate=0.01)
	from sklearn import cross_validation
	cv = cross_validation.KFold(len(train), n_folds=5,random_state=7)
	for traincv,testcv in cv:
	    model.fit(train.iloc[traincv],y.iloc[traincv])
	y_XGB=model.predict(test)
	return y_XGB

开发者ID:99sbr，项目名称:Machine-Learning，代码行数:10，代码来源:Predict_Growth.py

示例10: main

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import fit [as 别名]
def main():
    # Set seed for reproducibility
    np.random.seed(0)

    print("Loading data...")
    # Load the data from the CSV files
    
    training_data = pd.read_csv('/home/vipin/Videos/train.csv', header=0)
    prediction_data = pd.read_csv('/home/vipin/Videos/test.csv', header=0)
     
     
    training_data['countrycode']=training_data['countrycode'].apply(lambda x:ord(x))
    training_data['browserid']=training_data['browserid'].apply(lambda x: myfunc (x) if np.all(pd.notnull(x)) else myfunc("unknown") )
    training_data['devid']=training_data['devid'].apply(lambda x: myfunc (x) if np.all(pd.notnull(x)) else myfunc("none"))
    
    
    #pd.to_csv('/home/vipin/Videos/train11.csv', sep=',', encoding='utf-8')
    #exit(0)
    prediction_data['countrycode']=prediction_data['countrycode'].apply(lambda x:ord(x))
    prediction_data['browserid']=prediction_data['browserid'].apply(lambda x:myfunc (x) if np.all(pd.notnull(x)) else myfunc("unknown") )
    prediction_data['devid']=prediction_data['devid'].apply(lambda x:myfunc (x) if np.all(pd.notnull(x)) else myfunc("none") )
    
    
    features=['siteid','offerid','category','merchant','countrycode','browserid','devid']
    target="click"
    X = training_data[features]
    x_prediction = prediction_data[features]
    Y= training_data[target]
    ids = prediction_data["ID"]
    model = XGBClassifier()
            
            
    #linear_model.LogisticRegression(n_jobs=-1)
        
    print("Training...")
            # Your model is trained on the training_data
    model.fit(X, Y)
        
    print("Predicting...")
    
    seed =7
    test_size=0.33
    X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=test_size,random_state=seed)
    y_prediction = model.predict_proba(x_prediction)
    results = y_prediction[:, 1]
    results_df = pd.DataFrame(data={'probability':results})
    joined = pd.DataFrame(ids).join(results_df)
        
    y_pred=model.predict(X_test)
    accuracy=accuracy_score(y_test,y_pred)
    

    print("Accuracy: %.2f%%" % (accuracy * 100.0))
    print("Writing predictions to predictions.csv")
        # Save the predictions out to a CSV file
    joined.to_csv("/home/vipin/Videos/predictions.csv", index=False)

开发者ID:vipinkumar7，项目名称:Machine-Learning，代码行数:58，代码来源:clickdata.py

示例11: test_xgboost

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import fit [as 别名]
def test_xgboost():
    """Ensure that the TPOT xgboost method outputs the same as the xgboost classfier method"""

    tpot_obj = TPOT()
    result = tpot_obj._xgradient_boosting(training_testing_data, n_estimators=100, learning_rate=0, max_depth=3)
    result = result[result['group'] == 'testing']

    xgb = XGBClassifier(n_estimators=100, learning_rate=0.0001, max_depth=3, seed=42)
    xgb.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, xgb.predict(testing_features))

开发者ID:booleancandy，项目名称:tpot，代码行数:13，代码来源:tests.py

示例12: update_model

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import fit [as 别名]
def update_model(current_year):
    print 'Creating model...\nDate: {}'.format(datetime.now().strftime('%Y-%m-%d_%H:%M:%S'))

    managers = tuple(unique_managers(current_year))

    sql = "select * from (select week, year, manager1_name, manager2_name, team1_points, team1_projected, team2_points, team2_projected, type \
         from scoreboard_all WHERE team1_points > 0 and week<=13 \
        UNION select week, year, manager2_name AS manager1_name, manager1_name as manager2_name, team2_points AS team1_points, \
        team2_projected AS team1_projected, team1_points as team2_points, team1_projected AS team2_projected, type FROM scoreboard_all \
        where team1_points>0 and week<=13) order by year, week, type;"

    ff1 = download_data(os.path.join(os.getcwd(), 'data/fantasy_football.db'), sql)

    data_features = custom_features(ff1)
    data_features = data_features[(data_features.manager1_name.isin(managers)) & (data_features.manager2_name.isin(managers))]
    X, y, managers, league_type = dummy_and_interaction(data_features)
    # feats = X.columns.tolist()
    sc = StandardScaler()
    X_std = sc.fit_transform(X)
    pickle_model(sc, 'standard.scaler')

    # Select best features
    X_train, X_test, y_train, y_test = train_test_split(X_std, y, test_size=0.25, random_state=None)

    model = XGBClassifier()
    model.fit(X_train, y_train)
    # imports = model.feature_importances_.tolist()
    # g = zip(feats, imports)
    # feat_importance = sorted(g, key=lambda x: x[1], reverse=True)
    # print feat_importance
    X_train_trans, X_test_trans = feature_selection(model, X_train, X_test, y_train, y_test, eval_metric='auc')

    # Select best params
    model = XGBClassifier()
    learning_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
    n_estimators = [50, 100, 150, 200, 250, 300]
    param_grid = dict(n_estimators=n_estimators, learning_rate=learning_rate)

    grid_search = GridSearchCV(model, param_grid, scoring="log_loss", cv=10, verbose=1)
    result = grid_search.fit(X_train_trans, y_train)

    print("Best: {0} using {1}".format(result.best_score_, result.best_params_))
    print 'Best params: ', result.best_params_
    best_est = result.best_estimator_
    validation = best_est.predict_proba(X_train_trans)
    print("Roc AUC Train: ", roc_auc_score(y_train, validation[:, 1], average='macro'))

    probs = best_est.predict_proba(X_test_trans)
    print("Roc AUC Validation: ", roc_auc_score(y_test, probs[:, 1], average='macro'))

    pickle_model(best_est, 'fantasy.predict')

开发者ID:jmc856，项目名称:Webpage，代码行数:53，代码来源:predict.py

示例13: train_model_xgb_meta

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import fit [as 别名]
def train_model_xgb_meta(train_x, train_y, xgb_features):
    train_ind = StratifiedShuffleSplit(train_y, random_state=1, test_size=0.2)

    for train_index, test_index in train_ind:
        x_train = train_x.ix[train_index, :]
        y_train = train_y.ix[train_index]

        x_eval = train_x.ix[test_index, :]
        y_eval = train_y.ix[test_index]


    #Classifier
    xgb = XGBClassifier(max_depth=xgb_features['max_depth'], learning_rate=xgb_features['learning_rate'], n_estimators=int(xgb_features['n_estimators']), objective='binary:logistic',
                        subsample=xgb_features['subsample'], colsample_bytree=xgb_features['colsample_bytree'], min_child_weight=xgb_features['min_child_weight'])
    # gives 0.458

    #  bag_clf = BaggingClassifier(xgb, max_samples=10, warm_start=True, verbose=10)
    #  x_train = pd.DataFrame(x_train, dtype=float)
    #  bag_clf.fit(x_train, y_train)
    xgb = xgb.fit(x_train, y_train, verbose=True, eval_metric='logloss',  eval_set=[(x_eval, y_eval)], early_stopping_rounds=10)

    #  cv_score = cross_val_score(xgb, x_train, y_train, cv=4, n_jobs=1, pre_dispatch=1, verbose=10, scoring='log_loss')
    #  print(cv_score)
    #  print(np.mean(cv_score))

    #  predictions = pd.Series(xgb.predict_proba(x_train, ntree_limit=xgb.best_iteration)[:, 1], name='PredictedProb')

    return xgb  #  , predictions

开发者ID:drawer87，项目名称:kaggle_bnp，代码行数:30，代码来源:XGBOOST.py

示例14: xgboostcv

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import fit [as 别名]
def xgboostcv(max_depth,
              learning_rate,
              n_estimators,
              gamma,
              min_child_weight,
              max_delta_step,
              subsample,
              colsample_bytree,
              silent=True,
              nthread=-1,
              seed=1234):

    clf = XGBClassifier(max_depth=int(max_depth),
                        learning_rate=learning_rate,
                        n_estimators=int(n_estimators),
                        silent=silent,
                        nthread=nthread,
                        gamma=gamma,
                        min_child_weight=min_child_weight,
                        max_delta_step=max_delta_step,
                        subsample=subsample,
                        colsample_bytree=colsample_bytree,
                        seed=seed,
                        objective="binary:logistic")

    # Run Kfolds on the data model to stop over-fitting
    X_train, X_valid, y_train, y_valid = train_test_split(train,
                                                          train_labels,
                                                          test_size=0.1,
                                                          random_state=seed)
    xgb_model = clf.fit(X_train, y_train, eval_metric="auc", eval_set=[(X_valid, y_valid)], early_stopping_rounds=20)
    y_pred = xgb_model.predict_proba(X_valid)[:,1]

    return auc(y_valid, y_pred)

开发者ID:mpearmain，项目名称:springleaf，代码行数:36，代码来源:xgb_autotune.py

示例15: runner

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import fit [as 别名]
def runner ():
    m = Model()
    X = m.df.drop("tred_cutoff", axis=1)
    Y = m.df["tred_cutoff"]
    features_inner = m.features + m.features_2
    cv(X, Y, features_inner)

    model = XGBClassifier()
    model.fit(X, Y)

    y_pred = model.predict(m.X_test)
    s_f = f1_score(m.y_test, y_pred)
    s_p = precision_score(m.y_test, y_pred)
    s_r = recall_score(m.y_test, y_pred)
    print("test f1", s_f)
    print("test precision", s_p)
    print("test recall", s_r)

开发者ID:nogur9，项目名称:PTSD，代码行数:19，代码来源:tred+as+target.py

注：本文中的xgboost.XGBClassifier.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。