当前位置: 首页>>代码示例>>Python>>正文


Python xgboost.XGBClassifier类代码示例

本文整理汇总了Python中xgboost.XGBClassifier的典型用法代码示例。如果您正苦于以下问题:Python XGBClassifier类的具体用法?Python XGBClassifier怎么用?Python XGBClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了XGBClassifier类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: kfold_cv

def kfold_cv(X_train, y_train,idx,k):

    kf = StratifiedKFold(y_train,n_folds=k)
    xx=[]
    count=0
    for train_index, test_index in kf:
        count+=1
        X_train_cv, X_test_cv = X_train[train_index,:],X_train[test_index,:]
        gc.collect()
        y_train_cv, y_test_cv = y_train[train_index],y_train[test_index]
        y_pred=np.zeros(X_test_cv.shape[0])
        m=0
         
        for j in range(m):
            clf=xgb_classifier(eta=0.05,min_child_weight=20,col=0.5,subsample=0.7,depth=7,num_round=400,seed=j*77,gamma=0.1)
            y_pred+=clf.train_predict(X_train_cv,(y_train_cv),X_test_cv,y_test=(y_test_cv))
            yqq=y_pred*(1.0/(j+1))

            print j,llfun(y_test_cv,yqq)

        #y_pred/=m;
        clf=XGBClassifier(max_depth=10,colsample_bytree=0.8,learning_rate=0.02,n_estimators=500,nthread=-1)
        #clf=RandomForestClassifier(n_jobs=-1,n_estimators=100,max_depth=100)
        clf.fit(X_train_cv,(y_train_cv),eval_metric="logloss",eval_set=[(X_test_cv, y_test_cv)])
        y_pred=clf.predict_proba(X_test_cv).T[1]
        print y_pred.shape
        xx.append(llfun(y_test_cv,(y_pred)))
        ypred=y_pred
        yreal=y_test_cv
        idx=idx[test_index]
        print xx[-1]#,y_pred.shape
        break

    print xx,'average:',np.mean(xx),'std',np.std(xx)
    return ypred,yreal,idx#np.mean(xx)
开发者ID:daxiongshu,项目名称:bnp,代码行数:35,代码来源:ada7.py

示例2: xgboostcv

def xgboostcv(max_depth,
              learning_rate,
              n_estimators,
              subsample,
              colsample_bytree,
              gamma,
              min_child_weight,
              silent=True,
              nthread=-1,
              seed=1234):

    clf = XGBClassifier(max_depth=int(max_depth),
                        learning_rate=learning_rate,
                        n_estimators=int(n_estimators),
                        silent=silent,
                        nthread=nthread,
                        subsample=subsample,
                        colsample_bytree=colsample_bytree,
                        gamma=gamma,
                        min_child_weight = min_child_weight,
                        seed=seed,
                        objective="binary:logistic")

    clf.fit(x0, y0, eval_metric="logloss", eval_set=[(x1, y1)],early_stopping_rounds=25)
    ll = -log_loss(y1, clf.predict_proba(x1))
    return ll
开发者ID:mpearmain,项目名称:bnp,代码行数:26,代码来源:xgb_autotune.py

示例3: xgboostcv

def xgboostcv(max_depth,
              learning_rate,
              n_estimators,
              gamma,
              min_child_weight,
              max_delta_step,
              subsample,
              colsample_bytree,
              silent=True,
              nthread=-1,
              seed=1234):

    clf = XGBClassifier(max_depth=int(max_depth),
                        learning_rate=learning_rate,
                        n_estimators=int(n_estimators),
                        silent=silent,
                        nthread=nthread,
                        gamma=gamma,
                        min_child_weight=min_child_weight,
                        max_delta_step=max_delta_step,
                        subsample=subsample,
                        colsample_bytree=colsample_bytree,
                        seed=seed,
                        objective="binary:logistic")

    # Run Kfolds on the data model to stop over-fitting
    X_train, X_valid, y_train, y_valid = train_test_split(train,
                                                          train_labels,
                                                          test_size=0.1,
                                                          random_state=seed)
    xgb_model = clf.fit(X_train, y_train, eval_metric="auc", eval_set=[(X_valid, y_valid)], early_stopping_rounds=20)
    y_pred = xgb_model.predict_proba(X_valid)[:,1]

    return auc(y_valid, y_pred)
开发者ID:mpearmain,项目名称:springleaf,代码行数:34,代码来源:xgb_autotune.py

示例4: test_predict_sklearn_pickle

    def test_predict_sklearn_pickle(self):
        X,y = makeXy()
        Xtest = makeXtest()

        from xgboost import XGBClassifier
        kwargs={}
        kwargs['tree_method'] = 'gpu_hist'
        kwargs['predictor'] = 'gpu_predictor'
        kwargs['silent'] = 0
        kwargs['objective'] = 'binary:logistic'

        model = XGBClassifier(**kwargs)
        model.fit(X,y)
        print(model)

        # pickle model
        save_obj(model,"model.pkl")
        # delete model
        del model
        # load model
        model = load_obj("model.pkl")
        os.remove("model.pkl")

        # continue as before
        print("Before model.predict")
        sys.stdout.flush()
        tmp = time.time()
        gpu_pred = model.predict(Xtest, output_margin=True)
        print(gpu_pred)
        print("E non-zeroes: %d:" % (np.count_nonzero(gpu_pred)))
        print("E GPU Time to predict = %g" % (time.time() - tmp))
开发者ID:wamsiv,项目名称:h2o4gpu,代码行数:31,代码来源:test_gpu_prediction_pickledmodel.py

示例5: cv

def cv(X_train, y_train, features_inner):

    kfold = StratifiedKFold(n_splits=5, shuffle=True)

    scores_f = []
    scores_p = []
    scores_r = []

    for train, test in kfold.split(X_train, y_train):

        model = XGBClassifier()
        X_train_cv = pd.DataFrame(X_train.values[train], columns=X_train.columns)
        y_train_cv = pd.DataFrame(y_train.values[train], columns=["tred_cutoff"])
        X_test_cv = pd.DataFrame(X_train.values[test], columns=X_train.columns)
        y_test_cv = pd.DataFrame(y_train.values[test], columns=["tred_cutoff"])
        model.fit(X_train_cv, y_train_cv)

        y_pred = model.predict(X_test_cv)

        s_f = f1_score(y_test_cv, y_pred)
        s_p = precision_score(y_test_cv, y_pred)
        s_r = recall_score(y_test_cv, y_pred)
        print("\tscores f1", (s_f))
        print("\tscores p", (s_p))
        print("\tscores r", (s_r))
        scores_f.append(s_f)
        scores_p.append(s_p)
        scores_r.append(s_r)

    print("mean scores f1", np.mean(scores_f))
    print("mean scores p", np.mean(scores_p))
    print("mean scores r", np.mean(scores_r))
开发者ID:nogur9,项目名称:PTSD,代码行数:32,代码来源:tred+as+target.py

示例6: xgboost_classifier

 def xgboost_classifier(self):
     cls = XGBClassifier()
     print 'xgboost cross validation score', cross_val_score(cls,self.x_data,self.y_data)
     start_time = time.time()
     cls.fit(self.x_train, self.y_train)
     print 'score', cls.score(self.x_test, self.y_test)
     print 'time cost', time.time() - start_time
开发者ID:AloneGu,项目名称:ml_algo_box,代码行数:7,代码来源:classifier_benchmark.py

示例7: feature_selection

def feature_selection(model, X_train, X_test, y_train, y_test, eval_metric='auc'):
    thresholds = [thres for thres in sorted(model.feature_importances_) if thres != 0]  # Use feat. with >0 importance

    roc_scores = {}
    for thresh in thresholds:  # select features using threshold

        selection = SelectFromModel(model, threshold=thresh, prefit=True)
        select_X_train = selection.transform(X_train)

        selection_model = XGBClassifier()  # train model
        selection_model.fit(select_X_train, y_train, eval_metric=eval_metric)

        select_X_test = selection.transform(X_test)  # eval model
        y_pred = selection_model.predict(select_X_test)

        roc = roc_auc_score(y_test, y_pred)
        roc_scores[selection.threshold] = roc

    best_thresh = max(roc_scores, key=roc_scores.get)

    fs = SelectFromModel(model, threshold=best_thresh, prefit=True)
    pickle_model(fs, 'feature.select')
    X_train_trans_ = fs.transform(X_train)
    X_test_trans_ = fs.transform(X_test)
    print 'total features kept: {}'.format(X_train_trans_.shape[1])

    return X_train_trans_, X_test_trans_
开发者ID:jmc856,项目名称:Webpage,代码行数:27,代码来源:predict.py

示例8: train_model_xgb_meta

def train_model_xgb_meta(train_x, train_y, xgb_features):
    train_ind = StratifiedShuffleSplit(train_y, random_state=1, test_size=0.2)

    for train_index, test_index in train_ind:
        x_train = train_x.ix[train_index, :]
        y_train = train_y.ix[train_index]

        x_eval = train_x.ix[test_index, :]
        y_eval = train_y.ix[test_index]


    #Classifier
    xgb = XGBClassifier(max_depth=xgb_features['max_depth'], learning_rate=xgb_features['learning_rate'], n_estimators=int(xgb_features['n_estimators']), objective='binary:logistic',
                        subsample=xgb_features['subsample'], colsample_bytree=xgb_features['colsample_bytree'], min_child_weight=xgb_features['min_child_weight'])
    # gives 0.458

    #  bag_clf = BaggingClassifier(xgb, max_samples=10, warm_start=True, verbose=10)
    #  x_train = pd.DataFrame(x_train, dtype=float)
    #  bag_clf.fit(x_train, y_train)
    xgb = xgb.fit(x_train, y_train, verbose=True, eval_metric='logloss',  eval_set=[(x_eval, y_eval)], early_stopping_rounds=10)

    #  cv_score = cross_val_score(xgb, x_train, y_train, cv=4, n_jobs=1, pre_dispatch=1, verbose=10, scoring='log_loss')
    #  print(cv_score)
    #  print(np.mean(cv_score))

    #  predictions = pd.Series(xgb.predict_proba(x_train, ntree_limit=xgb.best_iteration)[:, 1], name='PredictedProb')

    return xgb  #  , predictions
开发者ID:drawer87,项目名称:kaggle_bnp,代码行数:28,代码来源:XGBOOST.py

示例9: XGB_model

def XGB_model(train,y):
	model=XGBClassifier(n_estimators=150, learning_rate=0.01)
	from sklearn import cross_validation
	cv = cross_validation.KFold(len(train), n_folds=5,random_state=7)
	for traincv,testcv in cv:
	    model.fit(train.iloc[traincv],y.iloc[traincv])
	y_XGB=model.predict(test)
	return y_XGB
开发者ID:99sbr,项目名称:Machine-Learning,代码行数:8,代码来源:Predict_Growth.py

示例10: main

def main():
    # Set seed for reproducibility
    np.random.seed(0)

    print("Loading data...")
    # Load the data from the CSV files
    
    training_data = pd.read_csv('/home/vipin/Videos/train.csv', header=0)
    prediction_data = pd.read_csv('/home/vipin/Videos/test.csv', header=0)
     
     
    training_data['countrycode']=training_data['countrycode'].apply(lambda x:ord(x))
    training_data['browserid']=training_data['browserid'].apply(lambda x: myfunc (x) if np.all(pd.notnull(x)) else myfunc("unknown") )
    training_data['devid']=training_data['devid'].apply(lambda x: myfunc (x) if np.all(pd.notnull(x)) else myfunc("none"))
    
    
    #pd.to_csv('/home/vipin/Videos/train11.csv', sep=',', encoding='utf-8')
    #exit(0)
    prediction_data['countrycode']=prediction_data['countrycode'].apply(lambda x:ord(x))
    prediction_data['browserid']=prediction_data['browserid'].apply(lambda x:myfunc (x) if np.all(pd.notnull(x)) else myfunc("unknown") )
    prediction_data['devid']=prediction_data['devid'].apply(lambda x:myfunc (x) if np.all(pd.notnull(x)) else myfunc("none") )
    
    
    features=['siteid','offerid','category','merchant','countrycode','browserid','devid']
    target="click"
    X = training_data[features]
    x_prediction = prediction_data[features]
    Y= training_data[target]
    ids = prediction_data["ID"]
    model = XGBClassifier()
            
            
    #linear_model.LogisticRegression(n_jobs=-1)
        
    print("Training...")
            # Your model is trained on the training_data
    model.fit(X, Y)
        
    print("Predicting...")
    
    seed =7
    test_size=0.33
    X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=test_size,random_state=seed)
    y_prediction = model.predict_proba(x_prediction)
    results = y_prediction[:, 1]
    results_df = pd.DataFrame(data={'probability':results})
    joined = pd.DataFrame(ids).join(results_df)
        
    y_pred=model.predict(X_test)
    accuracy=accuracy_score(y_test,y_pred)
    

    print("Accuracy: %.2f%%" % (accuracy * 100.0))
    print("Writing predictions to predictions.csv")
        # Save the predictions out to a CSV file
    joined.to_csv("/home/vipin/Videos/predictions.csv", index=False)
开发者ID:vipinkumar7,项目名称:Machine-Learning,代码行数:56,代码来源:clickdata.py

示例11: test_xgboost

def test_xgboost():
    """Ensure that the TPOT xgboost method outputs the same as the xgboost classfier method"""

    tpot_obj = TPOT()
    result = tpot_obj._xgradient_boosting(training_testing_data, n_estimators=100, learning_rate=0, max_depth=3)
    result = result[result['group'] == 'testing']

    xgb = XGBClassifier(n_estimators=100, learning_rate=0.0001, max_depth=3, seed=42)
    xgb.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, xgb.predict(testing_features))
开发者ID:booleancandy,项目名称:tpot,代码行数:11,代码来源:tests.py

示例12: update_model

def update_model(current_year):
    print 'Creating model...\nDate: {}'.format(datetime.now().strftime('%Y-%m-%d_%H:%M:%S'))

    managers = tuple(unique_managers(current_year))

    sql = "select * from (select week, year, manager1_name, manager2_name, team1_points, team1_projected, team2_points, team2_projected, type \
         from scoreboard_all WHERE team1_points > 0 and week<=13 \
        UNION select week, year, manager2_name AS manager1_name, manager1_name as manager2_name, team2_points AS team1_points, \
        team2_projected AS team1_projected, team1_points as team2_points, team1_projected AS team2_projected, type FROM scoreboard_all \
        where team1_points>0 and week<=13) order by year, week, type;"

    ff1 = download_data(os.path.join(os.getcwd(), 'data/fantasy_football.db'), sql)

    data_features = custom_features(ff1)
    data_features = data_features[(data_features.manager1_name.isin(managers)) & (data_features.manager2_name.isin(managers))]
    X, y, managers, league_type = dummy_and_interaction(data_features)
    # feats = X.columns.tolist()
    sc = StandardScaler()
    X_std = sc.fit_transform(X)
    pickle_model(sc, 'standard.scaler')

    # Select best features
    X_train, X_test, y_train, y_test = train_test_split(X_std, y, test_size=0.25, random_state=None)

    model = XGBClassifier()
    model.fit(X_train, y_train)
    # imports = model.feature_importances_.tolist()
    # g = zip(feats, imports)
    # feat_importance = sorted(g, key=lambda x: x[1], reverse=True)
    # print feat_importance
    X_train_trans, X_test_trans = feature_selection(model, X_train, X_test, y_train, y_test, eval_metric='auc')

    # Select best params
    model = XGBClassifier()
    learning_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
    n_estimators = [50, 100, 150, 200, 250, 300]
    param_grid = dict(n_estimators=n_estimators, learning_rate=learning_rate)

    grid_search = GridSearchCV(model, param_grid, scoring="log_loss", cv=10, verbose=1)
    result = grid_search.fit(X_train_trans, y_train)

    print("Best: {0} using {1}".format(result.best_score_, result.best_params_))
    print 'Best params: ', result.best_params_
    best_est = result.best_estimator_
    validation = best_est.predict_proba(X_train_trans)
    print("Roc AUC Train: ", roc_auc_score(y_train, validation[:, 1], average='macro'))

    probs = best_est.predict_proba(X_test_trans)
    print("Roc AUC Validation: ", roc_auc_score(y_test, probs[:, 1], average='macro'))

    pickle_model(best_est, 'fantasy.predict')
开发者ID:jmc856,项目名称:Webpage,代码行数:51,代码来源:predict.py

示例13: train

def train(imgfile='img/segmentation', modelfile='segmentation.pkl'):
    
    filelabel = getFiles(imgfile)
    row = 120
    col=40
    data = filter(lambda z: z is not None ,map(lambda x:Img(x[1],row,col,x[0]).imgmap,filelabel))
    data = filter(lambda x:x[0] is not None,sum(data,[]))
    label = np.array(map(lambda x:CHARACTER.get(x[0]),data))
    feature = np.array(map(lambda x:np.array(x[1]),data))
    from xgboost import XGBClassifier
    xgb = XGBClassifier(objective='multi:softmax',reg_alpha=1.0,reg_lambda=0.0,subsample=0.7,n_estimators=100,learning_rate=0.3)
    model = xgb.fit(feature,label,eval_set=[(feature,label)],eval_metric='mlogloss')
    import pickle
    fn = modelfile
    with open(fn, 'w') as f:                     # open file with write-mode
        pickle.dump(model, f)
开发者ID:wenlihaoyu,项目名称:model,代码行数:16,代码来源:segmentation.py

示例14: runner

def runner ():
    m = Model()
    X = m.df.drop("tred_cutoff", axis=1)
    Y = m.df["tred_cutoff"]
    features_inner = m.features + m.features_2
    cv(X, Y, features_inner)

    model = XGBClassifier()
    model.fit(X, Y)

    y_pred = model.predict(m.X_test)
    s_f = f1_score(m.y_test, y_pred)
    s_p = precision_score(m.y_test, y_pred)
    s_r = recall_score(m.y_test, y_pred)
    print("test f1", s_f)
    print("test precision", s_p)
    print("test recall", s_r)
开发者ID:nogur9,项目名称:PTSD,代码行数:17,代码来源:tred+as+target.py

示例15: main

def main():
    titanic = pandas.read_csv('dataset/titanic.csv')

    x_set = titanic[['pclass', 'age', 'sex']]
    y_set = titanic['survived']
    x_set.fillna(x_set['age'].mean(), inplace=True)
    x_train, x_test, y_train, y_test = utils.prepare_train_and_test_sets(x_set, y_set)

    dict_vectorizer = DictVectorizer(sparse=False)
    x_train = dict_vectorizer.fit_transform(x_train.to_dict(orient='record'))
    x_test = dict_vectorizer.transform(x_test.to_dict(orient='record'))

    decision_tree_classifier = DecisionTreeClassifier()
    utils.get_trained_result(decision_tree_classifier, x_test, x_train, y_test, y_train)

    xgb_classifier = XGBClassifier()
    xgb_classifier.fit(x_train, y_train)
    utils.get_trained_result(xgb_classifier, x_test, x_train, y_test, y_train)
开发者ID:ACEGuiPeng,项目名称:kaggle_demo_tests,代码行数:18,代码来源:example_titanic.py


注:本文中的xgboost.XGBClassifier类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。