当前位置: 首页>>代码示例>>Python>>正文


Python XGBClassifier.predict_proba方法代码示例

本文整理汇总了Python中xgboost.XGBClassifier.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python XGBClassifier.predict_proba方法的具体用法?Python XGBClassifier.predict_proba怎么用?Python XGBClassifier.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在xgboost.XGBClassifier的用法示例。


在下文中一共展示了XGBClassifier.predict_proba方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: kfold_cv

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
def kfold_cv(X_train, y_train,idx,k):

    kf = StratifiedKFold(y_train,n_folds=k)
    xx=[]
    count=0
    for train_index, test_index in kf:
        count+=1
        X_train_cv, X_test_cv = X_train[train_index,:],X_train[test_index,:]
        gc.collect()
        y_train_cv, y_test_cv = y_train[train_index],y_train[test_index]
        y_pred=np.zeros(X_test_cv.shape[0])
        m=0
         
        for j in range(m):
            clf=xgb_classifier(eta=0.05,min_child_weight=20,col=0.5,subsample=0.7,depth=7,num_round=400,seed=j*77,gamma=0.1)
            y_pred+=clf.train_predict(X_train_cv,(y_train_cv),X_test_cv,y_test=(y_test_cv))
            yqq=y_pred*(1.0/(j+1))

            print j,llfun(y_test_cv,yqq)

        #y_pred/=m;
        clf=XGBClassifier(max_depth=10,colsample_bytree=0.8,learning_rate=0.02,n_estimators=500,nthread=-1)
        #clf=RandomForestClassifier(n_jobs=-1,n_estimators=100,max_depth=100)
        clf.fit(X_train_cv,(y_train_cv),eval_metric="logloss",eval_set=[(X_test_cv, y_test_cv)])
        y_pred=clf.predict_proba(X_test_cv).T[1]
        print y_pred.shape
        xx.append(llfun(y_test_cv,(y_pred)))
        ypred=y_pred
        yreal=y_test_cv
        idx=idx[test_index]
        print xx[-1]#,y_pred.shape
        break

    print xx,'average:',np.mean(xx),'std',np.std(xx)
    return ypred,yreal,idx#np.mean(xx)
开发者ID:daxiongshu,项目名称:bnp,代码行数:37,代码来源:ada7.py

示例2: LabelEncoder

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
ohe_feats = ['gender', 'signup_method', 'signup_flow', 'language', 'affiliate_channel', 'affiliate_provider', 'first_affiliate_tracked', 'signup_app', 'first_device_type', 'first_browser']
for f in ohe_feats:
    df_all_dummy = pd.get_dummies(df_all[f], prefix=f)
    df_all = df_all.drop([f], axis=1)
    df_all = pd.concat((df_all, df_all_dummy), axis=1)

# split df into test and training data
vals = df_all.values
X = vals[:piv_train]
le = LabelEncoder()
y = le.fit_transform(labels)   
X_test = vals[piv_train:]

# use xgboost XGBClassifier 
xgb = XGBClassifier(max_depth=8, learning_rate=0.075, n_estimators=250,
                    objective='multi:softprob', subsample=0.75, colsample_bytree=0.85, seed=13)                  
xgb.fit(X, y)
y_pred = xgb.predict_proba(X_test)  

# select the 5 highest probability classes
ids = []  # list ids
cts = []  # list countries
for i in range(len(id_test)):
    idx = id_test[i]
    ids += [idx] * 5
    cts += le.inverse_transform(np.argsort(y_pred[i])[::-1])[:5].tolist()

# generate output 'pysub.csv'
sub = pd.DataFrame(np.column_stack((ids, cts)), columns=['id', 'country'])
sub.to_csv('/Users/ianmurray/Documents/kaggle/airbnb/output/pysub.csv',index=False)
开发者ID:advnturecaptlst,项目名称:kaggle_archive,代码行数:32,代码来源:airbnb.py

示例3: xgboostcv

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
def xgboostcv(max_depth,
              learning_rate,
              n_estimators,
              subsample,
              colsample_bytree,
              gamma,
              min_child_weight,
              silent=True,
              nthread=-1,
              seed=1234):

    clf = XGBClassifier(max_depth=int(max_depth),
                        learning_rate=learning_rate,
                        n_estimators=int(n_estimators),
                        silent=silent,
                        nthread=nthread,
                        subsample=subsample,
                        colsample_bytree=colsample_bytree,
                        gamma=gamma,
                        min_child_weight = min_child_weight,
                        seed=seed,
                        objective="binary:logistic")

    clf.fit(x0, y0, eval_metric="logloss", eval_set=[(x1, y1)],early_stopping_rounds=25)
    ll = -log_loss(y1, clf.predict_proba(x1))
    return ll
开发者ID:mpearmain,项目名称:bnp,代码行数:28,代码来源:xgb_autotune.py

示例4: main

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
def main():
    # Set seed for reproducibility
    np.random.seed(0)

    print("Loading data...")
    # Load the data from the CSV files
    
    training_data = pd.read_csv('/home/vipin/Videos/train.csv', header=0)
    prediction_data = pd.read_csv('/home/vipin/Videos/test.csv', header=0)
     
     
    training_data['countrycode']=training_data['countrycode'].apply(lambda x:ord(x))
    training_data['browserid']=training_data['browserid'].apply(lambda x: myfunc (x) if np.all(pd.notnull(x)) else myfunc("unknown") )
    training_data['devid']=training_data['devid'].apply(lambda x: myfunc (x) if np.all(pd.notnull(x)) else myfunc("none"))
    
    
    #pd.to_csv('/home/vipin/Videos/train11.csv', sep=',', encoding='utf-8')
    #exit(0)
    prediction_data['countrycode']=prediction_data['countrycode'].apply(lambda x:ord(x))
    prediction_data['browserid']=prediction_data['browserid'].apply(lambda x:myfunc (x) if np.all(pd.notnull(x)) else myfunc("unknown") )
    prediction_data['devid']=prediction_data['devid'].apply(lambda x:myfunc (x) if np.all(pd.notnull(x)) else myfunc("none") )
    
    
    features=['siteid','offerid','category','merchant','countrycode','browserid','devid']
    target="click"
    X = training_data[features]
    x_prediction = prediction_data[features]
    Y= training_data[target]
    ids = prediction_data["ID"]
    model = XGBClassifier()
            
            
    #linear_model.LogisticRegression(n_jobs=-1)
        
    print("Training...")
            # Your model is trained on the training_data
    model.fit(X, Y)
        
    print("Predicting...")
    
    seed =7
    test_size=0.33
    X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=test_size,random_state=seed)
    y_prediction = model.predict_proba(x_prediction)
    results = y_prediction[:, 1]
    results_df = pd.DataFrame(data={'probability':results})
    joined = pd.DataFrame(ids).join(results_df)
        
    y_pred=model.predict(X_test)
    accuracy=accuracy_score(y_test,y_pred)
    

    print("Accuracy: %.2f%%" % (accuracy * 100.0))
    print("Writing predictions to predictions.csv")
        # Save the predictions out to a CSV file
    joined.to_csv("/home/vipin/Videos/predictions.csv", index=False)
开发者ID:vipinkumar7,项目名称:Machine-Learning,代码行数:58,代码来源:clickdata.py

示例5: train_model_xgb

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
def train_model_xgb(train_x, train_y, xgb_features):

    train_ind = StratifiedShuffleSplit(train_y, random_state=1, test_size=0.1)

    for train_index, test_index in train_ind:
        x_train = train_x.ix[train_index, :]
        y_train = train_y.ix[train_index]

        x_eval = train_x.ix[test_index, :]
        y_eval = train_y.ix[test_index]

    #Classifier
    xgb = XGBClassifier(max_depth=xgb_features['max_depth'], learning_rate=xgb_features['learning_rate'], n_estimators=int(xgb_features['n_estimators']), objective='binary:logistic',
                        subsample=xgb_features['subsample'], colsample_bytree=xgb_features['colsample_bytree'], min_child_weight=xgb_features['min_child_weight'])
    # gives 0.458
    xgb = xgb.fit(x_train, y_train, verbose=True, eval_metric='logloss',  eval_set=[(x_eval, y_eval)], early_stopping_rounds=10)

    predictions = pd.Series(xgb.predict_proba(x_train, ntree_limit=xgb.best_iteration)[:, 1], name='PredictedProb')

    return xgb, predictions
开发者ID:drawer87,项目名称:kaggle_bnp,代码行数:22,代码来源:XGBOOST.py

示例6:

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
				  Xg_train, 
 				  num_boost_round = clf.get_params()['n_estimators'],
 				  nfold = 5,
 				  show_progress = True,
				  early_stopping_rounds = 100)
clf.set_params(n_estimators=cvresult.shape[0])
clf.fit(X_train, y_train)
best_outcome_params = clf.get_params()
best_outcome_score = cvresult.min()

try:
	# predict the outcome probabilities
	y_pred = grid.predict_proba(X_test)
except:
	# predict the outcome probabilities
	y_pred = clf.predict_proba(X_test)


# Create a data frame
column_names = possible_outcomes[:]
idx = pd.Int64Index(np.arange(1,11457, dtype='int64'))
idx.rename('ID', inplace=True)
df = pd.DataFrame(index = idx, data=y_pred, columns=column_names)

# write it to file, timestamp it
import time, datetime
ts = time.time()
submission_time_stamp = datetime.datetime.fromtimestamp(ts).strftime('%Y.%m.%d.%H.%M.%S')
df.to_csv('./Data/xgb_submission_'+submission_time_stamp+'.csv',header=True)

# save parameters to file:
开发者ID:pkepley,项目名称:kaggle.austin.animals,代码行数:33,代码来源:XGBtmp.py

示例7: main

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
def main():
    args = parse_args()
    config = parse_config(args.config_file)
    if config is None:
        print('No configuration file is defined. '
              'Define one with `--config-file`.')
        sys.exit(1)

    if args.plot_dir is not None:
        if not os.path.isdir(args.plot_dir):
            os.mkdir(args.plot_dir)

    index_cols = config['index_features']
    event_cols = config['unique_event_features']

    # this will be the training dataframe
    if args.input_file:
        merged_training_df = read_root(args.input_file, stop=args.stop)
        merged_training_df.set_index(index_cols, inplace=True)
        # duplicates may have ended up in the root file
        len_before = len(merged_training_df)
        merged_training_df.drop_duplicates(inplace=True)
        print(f'Dropped {(1 - len(merged_training_df) / len_before) * 100:.5f}%'
              ' duplicated entries in dataframe')
    else:
        merged_training_df = read_full_files(args, config)

    # in every case, define a proper target
    merged_training_df['target'] = merged_training_df.eval(config['target_eval'])

    # sort for performance
    merged_training_df.sort_index(inplace=True)

    print_avg_tagging_info(merged_training_df, config)

    mva_features = config['mva_features']
    total_event_number = get_event_number(config)
    selected_event_number = (merged_training_df.groupby(
        event_cols).SigYield_sw.head(1).sum())

    # build BDT model and train the classifier nBootstrap x 3 times
    xgb_kwargs = config['xgb_kwargs']
    n_jobs = config['n_jobs']

    sorting_feature = config['sorting_feature']

    bootstrap_roc_aucs = []
    bootstrap_scores = []
    bootstrap_d2s = []
    bootstrap_roc_curves = []
    bootstrap_calibration_params = []
    nBootstrap = args.n_bootstrap or config['n_bootstrap']
    print('Starting bootstrapping.')
    pbar = tqdm(total=nBootstrap * 6)
    for _ in range(nBootstrap):
        # yield 3-fold split for CV
        df_sets = [merged_training_df.iloc[indices]
                   for indices in NSplit(merged_training_df)]
        # try to compensate for slow subset creation
        pbar.update(3)

        for i in range(3):
            df1, df2, df3 = (df_sets[i % 3],
                             df_sets[(i + 1) % 3],
                             df_sets[(i + 2) % 3])
            model = XGBClassifier(nthread=n_jobs, **xgb_kwargs)
            model.fit(df1[mva_features], df1.target,
                      sample_weight=df1.SigYield_sw)
            roc1 = roc_auc_score(df1.target,
                                 model.predict_proba(df1[mva_features])[:, 1])

            probas = model.predict_proba(df2[mva_features])[:, 1]
            roc2 = roc_auc_score(df2.target, probas)

            # calibrate
            calibrator = PolynomialLogisticRegression(power=3,
                                                      solver='lbfgs',
                                                      n_jobs=n_jobs)
            calibrator.fit(probas.reshape(-1, 1), df2.target,
                           sample_weight=df2.SigYield_sw)
            bootstrap_calibration_params.append(calibrator.lr.coef_)

            probas = model.predict_proba(df3[mva_features])[:, 1]
            calib_probas = calibrator.predict_proba(probas)[:, 1]
            roc3 = roc_auc_score(df3.target, calib_probas)

            # concatenating here, since df3 is a view on the main df and will
            # throw warnings when adding any columns to it
            df3 = pd.concat([
                    df3.reset_index(),
                    pd.Series(calib_probas, name='calib_probas'),
                ], axis=1)
            best_indices = df3.groupby(event_cols)[sorting_feature].idxmax()
            best_particles = df3.loc[best_indices]

            bootstrap_roc_aucs.append([roc1, roc2, roc3])
            score = tagging_power_score(best_particles, config,
                efficiency=selected_event_number/total_event_number,
                etas='calib_probas')
            if args.plot_dir is not None:
#.........这里部分代码省略.........
开发者ID:bixel,项目名称:fttrainer,代码行数:103,代码来源:crossval_training.py

示例8: len

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
            train.loc[train_series.isnull(), train_name] = -9999 #train_series.mean()
        #and Test
        tmp_len = len(test[test_series.isnull()])
        if tmp_len>0:
            test.loc[test_series.isnull(), test_name] = -9999 #train_series.mean()  #TODO

X_train = train
X_test = test

extc = XGBClassifier(max_depth=10,colsample_bytree=0.8,learning_rate=0.02,n_estimators=500,nthread=-1)#max_features= 50,criterion= 'entropy',min_samples_split= 4,
                            #max_depth= 50, min_samples_leaf= 4)      
y_test=pd.read_csv('good/xgb4.csv')['real'].values

extc.fit(X_train,target,eval_metric="logloss",eval_set=[(X_test, y_test)]) 

print('Predict...')
y_pred = extc.predict_proba(X_test)
#print y_pred

pd.DataFrame({"ID": id_test, "PredictedProb": y_pred[:,1]}).to_csv('mycv1.csv',index=False)
y=pd.read_csv('good/xgb4.csv')['real'].values
yp=y_pred[:,1]
score=str(llfun(y,yp))[2:]
print sys.argv[0],score
import subprocess
cmd='cp mycv1.csv vabackup/mycv%s.csv'%score
subprocess.call(cmd,shell=True)
cmd='cp mycv.py vabackup/mycv%s.py'%score
subprocess.call(cmd,shell=True)

开发者ID:daxiongshu,项目名称:bnp,代码行数:31,代码来源:xgb7.py

示例9: XGBClassifier

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
     biter.append(best_iter)
     print "---log_loss: %0.6f\n" %ll
     print "---best_iter: %d\n" %best_iter
     gc.collect()
 
 best_i = np.mean(biter) + 50
 # train on whole data
 gbm = XGBClassifier(max_depth=8,
                     learning_rate = 0.01,
                     n_estimators=best_i,
                     subsample=0.9,
                     colsample_bytree=0.45,
                     objective="binary:logistic",
                     silent = False,
                     min_child_weight=1,                       
                     nthread=-1)
 
 gbm.fit(train_processed, target, eval_metric="logloss",
         eval_set = [(train_processed, target)],
                     verbose=20)                        
 
 tid = test_processed["ID"].copy()
 assert (len(tid) == 114393), "test length does not match!"
 test_processed.drop(["ID", "target", "train_flag"], axis = 1, inplace = True)
 tpreds = gbm.predict_proba(test_processed)[:, 1]
 sub = pd.DataFrame({"ID" : tid, "PredictedProb" : tpreds})
 submission_file = os.path.join(submission_dir, "xgb_denormalized.csv")
 sub.to_csv(submission_file, index = False)
 
 end_time = datetime.now()
 print 'elapsed time: {}'.format(end_time - start_time)
开发者ID:bishwarup307,项目名称:BNP_Paribas_Cardiff_Claim_Management,代码行数:33,代码来源:xgb_denormalized.py

示例10: print

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
print('We have %d classes and %d models TOTAL so in resulting arrays \
we expect to see %d columns.' % (n_classes, len(models_1) + len(models_2), 
                                 n_classes * (len(models_1) + len(models_2))))

# Create empty arrays
S_train_all = np.zeros((X_train.shape[0], 0))
S_test_all = np.zeros((X_test.shape[0], 0))

# Load results
for name in sorted(glob('*.npy')):
    print('Loading: %s' % name)
    S = np.load(name)
    S_train_all = np.c_[S_train_all, S[0]]
    S_test_all = np.c_[S_test_all, S[1]]
    
print('\nS_train_all shape:', S_train_all.shape)
print('S_test_all shape: ', S_test_all.shape)

# Initialize 2nd level model
model = XGBClassifier(random_state=0, n_jobs=-1, learning_rate=0.1, 
                      n_estimators=100, max_depth=3)
    
# Fit 2nd level model
model = model.fit(S_train_all, y_train)

# Predict
y_pred = model.predict_proba(S_test_all)

# Final prediction score
print('Final prediction score: %.8f' % log_loss(y_test, y_pred))
开发者ID:Ewen2015,项目名称:Kaggle,代码行数:32,代码来源:Stacking.py

示例11: XGBClassifier

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
                eval_set=[(trainingSet[feature_names], np.array(trainingSet["TARGET"])), (validationSet[feature_names], np.array(validationSet["TARGET"]))],
                         early_stopping_rounds=200,verbose=20)    
                          
        ll = gbm.best_score
        best_iter = gbm.best_iteration
        cv.append(ll)
        biter.append(best_iter)
        print "---auc : %0.6f\n" %ll
        print "---best_iter: %d\n" %best_iter
        gc.collect()
    
    gbm = XGBClassifier(max_depth=4,
                            learning_rate = 0.01,
                            n_estimators=370,
                            subsample=0.8,
                            colsample_bytree=0.5,
                            objective="binary:logistic",
                            silent = False,
                            min_child_weight=5,                       
                            nthread=-1)
                            
    gbm.fit(train[feature_names], np.array(train["TARGET"]),
            eval_metric = "auc",
            eval_set = [(train[feature_names], np.array(train["TARGET"]))],
                        verbose=20)                            
                        
    tpreds = gbm.predict_proba(test[feature_names])[:, 1]
    df = pd.DataFrame({"ID" : test["ID"], "TARGET" : tpreds })
    submission_name = "stacked_xgb_3.csv"
    df.to_csv(os.path.join(output_dir, submission_name), index = False)
开发者ID:bishwarup307,项目名称:Santander_Customer_Satisfaction,代码行数:32,代码来源:L2_xgb.py

示例12: XGBClassifier

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
n_estimators = 600
max_depth = 6
subsample = 0.9
colsample_bytree = 0.85
min_child_weight = 1  # default

eval_metrics = ['auc']
eval_sets = [(X_train, y_train), (X_test, y_test)]
xgb = XGBClassifier(seed=0, learning_rate=learning_rate, n_estimators=n_estimators,
                    min_child_weight=min_child_weight, max_depth=max_depth,
                    colsample_bytree=colsample_bytree, subsample=subsample)
print("Fitting the model")
xgb = xgb.fit(X_train, y_train, eval_metric=eval_metrics, eval_set=eval_sets, verbose=False)
    
print("Predicting Probabilities")
probs['xgb'] = xgb.predict_proba(X_test)[:, -1]

print("Computing AUC")
auc_test = [xgb.evals_result_['validation_%d' % i]['auc'] for i in range(len(eval_sets))]
auc_test = np.array(auc_test, dtype=float).T

auc_best_round = np.argmax(auc_test, axis=0)
auc_best = [auc_test[auc_best_round[0], 0], auc_test[auc_best_round[1], 1]]

print('Best AUC train=%f (round=%d), test=%f (round=%d)' % (auc_best[0], auc_best_round[0], auc_best[1], auc_best_round[1]))
print('Validation')
test_probs = pd.DataFrame()
test_probs['xgb_valid'] = xgb.predict_proba(df_test)[:,-1]
print(test_probs['xgb_valid'].head())

fpr, tpr, thresholds = metrics.roc_curve(df_test_target, test_probs, pos_label=1)
开发者ID:nareshshah139,项目名称:IE-Group-D-Term3,代码行数:33,代码来源:FATrial1.py

示例13: main

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
def main():
    args = parse_args()
    config = parse_config(args.config_file)
    if config is None:
        print('No configuration file is defined. '
              'Define one with `--config-file`.')
        sys.exit(1)

    # read dataset
    files = config['files']
    if 'filepath' in config:
        files = [config['filepath'] + f for f in files]
    kwargs = config['pandas_kwargs']

    print('Reading ', end='')
    entries = 0
    for f in files:
        rootfile = ROOT.TFile(f)
        tree = rootfile.Get(kwargs['key'])
        entries += tree.GetEntries()
    maxslices = args.max_slices
    chunksize = kwargs['chunksize']
    total = (maxslices
             if maxslices is not None and maxslices < (entries / chunksize)
             else (entries / chunksize))
    print(total * chunksize, 'events.')
    df = pd.concat([
        df for df in tqdm(
            islice(
                read_root(files, flatten=True, **kwargs), maxslices),
            total=total)])

    # rename the tagging particle branches
    df.rename(columns=dict(zip(df.columns,
        [c.replace(config['tagging_particle_prefix'], 'tp').replace('-', '_')
            for c in df.columns])),
        inplace=True)
    df['event_id'] = df.runNumber.apply(str) + '_' + df.eventNumber.apply(str)
    if 'invert_target' in config and config['invert_target']:
        df['target'] = np.sign(df.B_ID) != np.sign(df.tp_ID)
    else:
        df['target'] = np.sign(df.B_ID) == np.sign(df.tp_ID)

    # read features and selections
    try:
        if 'inclusive_mva_features' in config:
            mva_features = ['tp_' + f for f in config['inclusive_mva_features']]
        else:
            mva_features = ['tp_' + f.split(' ')[0] for f in config['selections']]
    except:
        raise ValueError('Tried to parse features for the BDT.'
                         ' Either provide well-formatted `selections` or'
                         ' define a `inclusive_mva_features` set.')

    # build BDT model and train the classifier n_cv x 3 times
    xgb_kwargs = config['xgb_kwargs']
    n_jobs = config['n_jobs']

    bootstrap_scores = []
    bootstrap_d2s = []
    nfold = (args.bootstrap_folds
             if args.bootstrap_folds is not None
             else config['n_cv'])
    print('Starting bootstrapping.')
    pbar = tqdm(total=nfold * 3)
    for _ in range(nfold):
        # yield 3-fold split for CV
        df_sets = [df.iloc[indices] for indices in NSplit(df)]

        cv_scores = []
        for i in range(3):
            df1, df2, df3 = (df_sets[i % 3].copy(),
                             df_sets[(i + 1) % 3].copy(),
                             df_sets[(i + 2) % 3].copy())
            model = XGBClassifier(nthread=n_jobs, **xgb_kwargs)
            sample_weight = (df1.target
                             if 'training_weights' in config
                                and config['training_weights']
                             else None)
            model.fit(df1[mva_features], df1.target,
                      sample_weight=df1.SigYield_sw)

            df2['probas'] = model.predict_proba(df2[mva_features])[:, 1]
            df2.reset_index(inplace=True, drop=True)
            df2_max = df2.iloc[df2.groupby('event_id')['probas'].idxmax()].copy()
            df3['probas'] = model.predict_proba(df3[mva_features])[:, 1]
            df3.reset_index(inplace=True, drop=True)
            df3_max = df3.iloc[df3.groupby('event_id')['probas'].idxmax()].copy()

            # calibrate
            calibrator = PolynomialLogisticRegression(power=4,
                                                      solver='lbfgs',
                                                      n_jobs=n_jobs)
            calibrator.fit(df2_max.probas.reshape(-1, 1), df2_max.target,
                           sample_weight=df2_max.SigYield_sw)

            df3_max['calib_probas'] = calibrator.predict_proba(df3_max.probas)[:, 1]

            score = tagging_power_score(df3_max.calib_probas,
                                        tot_event_number=get_event_number(df3_max),
#.........这里部分代码省略.........
开发者ID:bixel,项目名称:fttrainer,代码行数:103,代码来源:bdt_training.py

示例14: range

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
max_score = 0
for i in range(5,10):
    model = XGBClassifier(max_depth=i)
    kf = KFold(len(y),n_folds=5,random_state=42, shuffle=True)
    #Using accuracy because of final table using it measure
    score = cross_val_score(model, X, y, cv=kf, scoring='accuracy').mean()
    print('Cross validation score =', score)
    print('max_depth =', i)
    if score > max_score:
        max_score = score
        max_n = i
print('Max Cross validation score =',max_score)
print('Max max_depth =', max_n)
model = XGBClassifier(max_depth=max_n)
model.fit(X,y)
prediction = model.predict_proba(test_pred)

#Just to see what features are important and what are not
print(model.feature_importances_)

#Step 3. Save data to file.
submission = pd.DataFrame({
    "ID": test["ID"],
    "Adoption": prediction[:,0],
    "Died": prediction[:,1],
    "Euthanasia": prediction[:,2],
    "Return_to_owner": prediction[:,3],
    "Transfer": prediction[:,4]

})
开发者ID:sann05,项目名称:ShelterAnimalOutcomes,代码行数:32,代码来源:animals.py

示例15: XGBClassifier

# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
     
     bst = XGBClassifier(max_depth=8,
                         learning_rate = 0.01,
                         n_estimators=2100,
                         subsample=0.9,
                         colsample_bytree=0.45,
                         objective="binary:logistic",
                         silent = False,
                         min_child_weight=1,                       
                         nthread=-1)
                             
     bst.fit(X_train, y_train, eval_metric= "logloss",
             eval_set=[(X_train, y_train), (X_valid, y_valid)],
                       verbose=200)
                      
     preds = bst.predict_proba(X_valid)[:, 1]
     ll = log_loss(validationSet["target"], preds)
     df = pd.DataFrame({"ID" : validationSet["ID"], pred_name : preds})
     eval_matrix = eval_matrix.append(df, ignore_index = True)
     print "fold : {} | logloss: {}".format(i+1, ll)        
     del trainingSet, validationSet, bst, preds, ll, X_train, X_valid, y_train, y_valid
     gc.collect()
 
 X_train = train[feature_names].copy()
 y_train = np.array(train["target"].copy())
 bst = XGBClassifier(max_depth=8,
                         learning_rate = 0.01,
                         n_estimators=2100,
                         subsample=0.9,
                         colsample_bytree=0.45,
                         objective="binary:logistic",
开发者ID:bishwarup307,项目名称:BNP_Paribas_Cardiff_Claim_Management,代码行数:33,代码来源:xgb_denormalized_layer_1.py


注:本文中的xgboost.XGBClassifier.predict_proba方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。