当前位置: 首页>>代码示例>>Python>>正文


Python XGBClassifier.predict_proba方法代码示例

本文整理汇总了Python中xgboost.sklearn.XGBClassifier.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python XGBClassifier.predict_proba方法的具体用法?Python XGBClassifier.predict_proba怎么用?Python XGBClassifier.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在xgboost.sklearn.XGBClassifier的用法示例。


在下文中一共展示了XGBClassifier.predict_proba方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: job_function

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import predict_proba [as 别名]
def job_function(params):
	learning_rate = params[0]
	max_depth = params[1]
	ss_cs = params[2]
	gamma = params[3]
	min_child_weight = params[4]
	reg_lambda = params[5]
	reg_alpha = params[6]

	early_stopping_rounds = 25
	if learning_rate >= 0.3:
		early_stopping_rounds = 5
	if learning_rate <= 0.03:
		early_stopping_rounds = 50

	scores = []
	for i in range(iterations_per_job):
		X_train = Xy[i][0]
		X_test = Xy[i][1]
		y_train = Xy[i][2]
		y_test = Xy[i][3]
		
		y_train2 = le.transform(y_train)   
		y_test2 = le.transform(y_test)   

		clf = XGBClassifier(max_depth=max_depth, learning_rate=learning_rate, n_estimators=5000, objective='multi:softprob', subsample=ss_cs, colsample_bytree=ss_cs, gamma=gamma, min_child_weight=min_child_weight, seed=0, silent=True, reg_lambda=reg_lambda, reg_alpha=reg_alpha)      
		clf.fit(X_train, y_train, eval_set=[(X_test, y_test2)], eval_metric=calculate_score_2, early_stopping_rounds=early_stopping_rounds, verbose=False)
		y_predicted = clf.predict_proba(X_test, ntree_limit=clf.booster().best_ntree_limit)
		score = calculate_score(y_predicted, y_test2)
		scores.append(score)

	avg_score = np.array(scores).mean()
	print(avg_score, params)
	return avg_score
开发者ID:mircean,项目名称:ML,代码行数:36,代码来源:module6_boost_cv.py

示例2: eval_fn

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import predict_proba [as 别名]
 def eval_fn(params):
     model = XGBClassifier(n_estimators=n_estimators_max, learning_rate=learning_rate, seed=seed)
     score = 0
     n_estimators = 0
     for tr, va in skf:
         X_tr, y_tr = X_train[tr], y_train[tr]
         X_va, y_va = X_train[va], y_train[va]
         model.set_params(**params)
         model.fit(X_tr, y_tr, eval_set=[(X_va, y_va)], eval_metric='logloss',
                   early_stopping_rounds=50, verbose=False)
         score += model.best_score
         n_estimators += model.best_iteration
     score /= n_folds
     n_estimators /= n_folds
     n_estimators_lst.append(n_estimators)
     result_str = "train:%.4f ntree:%5d  " % (score, n_estimators)
     if X_valid is not None:
         model.n_estimators = n_estimators
         model.fit(X_train, y_train)
         pr = model.predict_proba(X_valid)[:,1]
         sc_valid = log_loss(y_valid, pr)
         score_valid.append(sc_valid)
         result_str += "valid:%.4f" % sc_valid
     if verbose:
         print result_str
     return score
开发者ID:tks0123456789,项目名称:ParamTune_experiments,代码行数:28,代码来源:utility.py

示例3: main

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import predict_proba [as 别名]
def main(training_data, test_data):
    # Merging data to ensure consistent cleaning. Putting marker variable to separate later.
    training_data['source'] = 'training'
    test_data['source'] = 'test'
    merged_data = pd.concat([training_data, test_data])

    # Cleaning data
    cleaned_data = data_cleaner(merged_data)

    # Separating data, removing marker
    pred_df = cleaned_data[cleaned_data['source'] == 'training'].copy()
    test_pred = cleaned_data[cleaned_data['source'] == 'test'].copy()

    pred_df.drop('source', axis=1, inplace=True)
    test_pred.drop('source', axis=1, inplace=True)

    # Transforming target into ints, saving the key for later transformation
    labels = LabelEncoder().fit(training_data['country_destination'])
    target_df = pd.Series(labels.transform(training_data['country_destination']), index=training_data.index)

    # Training model
    xgb_model = XGBClassifier(max_depth=6, learning_rate=0.3, n_estimators=25, objective='multi:softprob',
                              subsample=0.5, colsample_bytree=0.5, seed=0)
    xgb_model.fit(pred_df.as_matrix(), target_df.tolist())

    # Running the model
    preds = xgb_model.predict_proba(test_pred.as_matrix())

    # Selecting the top 5 most likely for each respondent and stacking. 
    # This section is VERY slow and could use being optimized
    model_probs = pd.DataFrame(preds, index=test_pred.index, columns=labels.classes_)

    stacked_probs = pd.Series()
    for i in model_probs.index:
        temp = model_probs.loc[i, :]
        temp_sort = pd.DataFrame(temp.sort_values(ascending=False)[:5].index)

        temp_sort['id'] = i
        temp_sort.columns = ['country', 'id']

        stacked_probs = pd.concat([stacked_probs, temp_sort])

    # # Selecting classes with highest probabilities, compiling into list
    # ids = []
    # cts = []
    # test_ids = pd.Series(test_data.index)
    # for i in range(len(test_ids)):
    #     idx = test_data.index[i]
    #     ids += [idx] * 5
    #     cts += labels.inverse_transform(np.argsort(model_probs[i])[::-1])[:5].tolist()
    #
    # predictions = pd.DataFrame(np.column_stack((ids, cts)), columns=['id', 'country'])

    # Cleaning output and returning it
    output = stacked_probs[['id', 'country']]
    return output
开发者ID:paperparrot,项目名称:Kaggle-scripts,代码行数:58,代码来源:airbnb+xgboost+model.py

示例4: xgboostinitial_predictor

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import predict_proba [as 别名]
def xgboostinitial_predictor(train_path, test_path, eval_path):
    # Loading the data
    print 'Loading the data...'
    train = pd.read_csv(train_path, index_col=0)
    test = pd.read_csv(test_path, index_col=0)
    eval_df = pd.read_csv(eval_path, index_col=0)
    target = train['target'].copy()
    train.drop('target', axis=1, inplace=True)

    # Training model
    print 'Model training begins...'
    # xgtrain = xgb.DMatrix(train.values, target.values, missing=np.nan)
    # xgboost_params = {'objective': 'binary:logistic', 'booster': 'gbtree', 'eval_metric': 'logloss', 'eta': 0.01,
    #                   'subsample': 0.5, 'colsample_bytree': 0.5, 'max_depth': 10, 'silent': 0}
    #
    # xgb_model = xgb.train(xgboost_params, xgtrain, learning_rates=0.3)

    xgb_model = XGBClassifier(max_depth=6, learning_rate=0.3, n_estimators=25, objective='binary:logistic',
                              subsample=0.5, colsample_bytree=0.5, seed=0)
    xgb_model.fit(train.as_matrix(), target.tolist())

    # Running the model
    print 'Making predictions....'
    # xgtest = xgb.DMatrix(test.values)
    # xgeval = xgb.DMatrix(eval_df)

    test_preds = xgb_model.predict_proba(test.as_matrix())
    eval_preds = xgb_model.predict_proba(eval_df.as_matrix())

    print 'Cleaning predictions to match expected format....'
    test_output = pd.DataFrame(test_preds, index=test.index)
    print test_output.columns
    test_output = test_output[1]
    test_output.columns = ['PredictedProb']

    eval_output = pd.DataFrame(eval_preds, index=eval_df.index)
    eval_output = eval_output[1]
    eval_output.columns = ['PredictedProb']

    return test_output, eval_output
开发者ID:paperparrot,项目名称:BNP_kaggle,代码行数:42,代码来源:bnp_xgb_init.py

示例5: main

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import predict_proba [as 别名]
def main():
    data_train = pd.read_csv(args.train_dataset)
    X_train = data_train.drop(['Id', 'Class'], axis=1)
    y_train = data_train.loc[:, 'Class']
    data_test = pd.read_csv(args.test_dataset)
    X_test = data_test.drop(['Id'], axis=1)
    Id = data_test.loc[:, 'Id']
    clf = XGBClassifier()
    clf.set_params(**best_dicts)
    clf.fit(X_train, y_train)
    prediction = clf.predict_proba(X_test)
    columns = ['Prediction'+str(i) for i in range(1, 10)]
    prediction = pd.DataFrame(prediction, columns=columns)
    results = pd.concat([Id, prediction], axis=1)
    return (clf, results)
开发者ID:Chris19920210,项目名称:Microsoft_malware,代码行数:17,代码来源:final_predictor.py

示例6: objective

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import predict_proba [as 别名]
def objective(space):

    clf = XGBClassifier(n_estimators=int(space['n_estimators']),
                        objective='binary:logistic',
                        seed=37,
                        learning_rate=space['learning_rate'],
                        max_depth=space['max_depth'],
                        min_child_weight=space['min_child_weight'],
                        colsample_bytree=space['colsample_bytree'],
                        subsample=space['subsample'])

    clf.fit(xTrain, yTrain, eval_metric="logloss")
    pred = clf.predict_proba(xValid)[:, 1]
    loss = log_loss(yValid, pred)
    return{'loss': loss, 'status': STATUS_OK}
开发者ID:SeanBE,项目名称:numerai,代码行数:17,代码来源:xgbModel.py

示例7: myThreadFunc

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import predict_proba [as 别名]
def myThreadFunc(ThreadID):
	X_train = Xy[ThreadID][0]
	X_test = Xy[ThreadID][1]
	y_train = Xy[ThreadID][2]
	y_test = Xy[ThreadID][3]
		
	y_train2 = le.transform(y_train)   
	y_test2 = le.transform(y_test)   

	clf = XGBClassifier(max_depth=max_depth, learning_rate=learning_rate, n_estimators=5000, objective='multi:softprob', subsample=ss_cs, colsample_bytree=ss_cs, gamma=gamma, min_child_weight=min_child_weight, seed=0, silent=True, reg_lambda=reg_lambda, reg_alpha=reg_alpha)      
	clf.fit(X_train, y_train, eval_set=[(X_test, y_test2)], eval_metric=calculate_score_2, early_stopping_rounds=early_stopping_rounds, verbose=False)
	y_predicted = clf.predict_proba(X_test, ntree_limit=clf.booster().best_ntree_limit)
	score = calculate_score(y_predicted, y_test2)
	print(score, clf.booster().best_ntree_limit)
	
	train_and_test_scores[ThreadID] = score
开发者ID:mircean,项目名称:ML,代码行数:18,代码来源:module3_python_v2.py

示例8: apply_xgb_ens

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import predict_proba [as 别名]
def apply_xgb_ens(y_valid, valid_folder='Valid', test_folder='Test'):
    """
    Ensembler based on xgboost Gradient boosting.
    """
    #Loading data
    X, X_test, n_preds, n_class = get_X_X_Test(valid_folder, test_folder)
    y = y_valid
    
    #Defining classifier
    xgb = XGBClassifier(max_depth=4, learning_rate=0.05, n_estimators=200,
                        objective='multi:softprob', gamma=0., 
                        max_delta_step=0., subsample=0.9, colsample_bytree=0.9,
                        seed=0)  
    xgb.fit(X, y)   
    y_pred = xgb.predict_proba(X_test)
    return y_pred      
    
    
    
开发者ID:BabelTower,项目名称:kaggle_airbnb,代码行数:18,代码来源:ensemble.py

示例9: perform_prediction

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import predict_proba [as 别名]
def perform_prediction(training, labels, testing, xgb_votes, rf_votes):
    """ Perform prediction using a combination of XGB and RandomForests. """
    predictions = np.zeros((len(testing), len(set(labels))))
    # Predictions using xgboost.
    for i in range(xgb_votes):
        print 'XGB vote %d' % i
        xgb = XGBClassifier(
            max_depth=DEPTH_XGB, learning_rate=LEARNING_XGB,
            n_estimators=ESTIMATORS_XGB, objective='multi:softprob',
            subsample=SUBSAMPLE_XGB, colsample_bytree=COLSAMPLE_XGB)
        xgb.fit(training, labels)
        predictions += xgb.predict_proba(testing)
    # Predictions using RandomForestClassifier.
    for i in range(rf_votes):
        print 'RandomForest vote %d' % i
        rand_forest = RandomForestClassifier(
            n_estimators=ESTIMATORS_RF, criterion=CRITERION_RF, n_jobs=JOBS_RF,
            max_depth=DEPTH_RF, min_samples_leaf=MIN_LEAF_RF, bootstrap=True)
        rand_forest.fit(training, labels)
        predictions += rand_forest.predict_proba(testing)
    return predictions
开发者ID:Zhongjiong,项目名称:kaggle_airbnb_new_user_bookings,代码行数:23,代码来源:prediction.py

示例10: XGBClassifier

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import predict_proba [as 别名]
      "shot_type","shot_zone_area","shot_zone_basic","shot_zone_range",
      "matchup","opponent","game_date","shot_distance","minutes_remaining","seconds_remaining",
      "loc_x","loc_y"]
for col in cols:
    data_x=pd.concat([data_x,pd.get_dummies(data[col],prefix=col),],axis=1)
train_x=data_x[-pd.isnull(data.shot_made_flag)]
test_x=data_x[pd.isnull(data.shot_made_flag)]
train_y=data.shot_made_flag[-pd.isnull(data.shot_made_flag)]

clf = XGBClassifier(max_depth=6, learning_rate=0.01, n_estimators=550,
                     subsample=0.5, colsample_bytree=0.5, seed=0)
clf.fit(train_x, train_y)
y_pred = clf.predict(train_x)
print("Number of mislabeled points out of a total %d points : %d"  % (train_x.shape[0],(train_y != y_pred).sum()))

def logloss(act, pred):
    epsilon = 1e-15
    pred = sp.maximum(epsilon, pred)
    pred = sp.minimum(1-epsilon, pred)
    ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred)))
    ll = ll * -1.0/len(act)
    print(ll)
    return ll
    
logloss(train_y,clf.predict_proba(train_x)[:,1])

test_y=clf.predict_proba(test_x)[:,1]
test_id=data[pd.isnull(data.shot_made_flag)]["shot_id"]
submission=pd.DataFrame({"shot_id":test_id,"shot_made_flag":test_y})
submission.to_csv("submissson_1.csv",index=False)
开发者ID:Helen-n,项目名称:kaggle,代码行数:32,代码来源:xgboost.py

示例11: xgbost

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import predict_proba [as 别名]
def xgbost(x,y,targetx):
    clf_xgb = XGBClassifier(n_estimators=1000,max_depth=6, learning_rate=0.0075,subsample=0.7,colsample_bytree=0.7,seed=4)
    clf_xgb.fit(x,y)
    return clf_xgb.predict_proba(targetx)[:,1]
开发者ID:guohuiGH,项目名称:kaggle,代码行数:6,代码来源:train.py

示例12: xgboost_algorithm

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import predict_proba [as 别名]
def xgboost_algorithm(XTrain,YTrain,XTest):
    xgb = XGBClassifier(max_depth=6, learning_rate=0.3, n_estimators=25,
                    objective='multi:softprob', subsample=0.5, colsample_bytree=0.5, seed=0)                  
    xgb.fit(XTrain, YTrain)
    y_pred_xgboost = xgb.predict_proba(XTest) 
    return y_pred_xgboost
开发者ID:parthrparekh93,项目名称:AML,代码行数:8,代码来源:ensemble_methods.py

示例13: LabelEncoder

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import predict_proba [as 别名]
av = train_file.age.values
train_file['age'] = np.where(np.logical_or(av<14, av>100), 0, av)
# One Hot Encoding#
train_file_dummy=[]
features = ['gender','age','signup_method','signup_flow','language','affiliate_channel','affiliate_provider','first_affiliate_tracked','signup_app','first_device_type','first_browser']
for feature in features:
    train_dummy = pd.get_dummies(train_file[feature],prefix=feature)
    train_file = train_file.drop(feature,axis=1)
    train_file = pd.concat((train_dummy,train_file),axis=1)

# Train and Test data split
vals = train_file.values
train_data = vals[:piv_train]
le = LabelEncoder()
train_labels = le.fit_transform(labels)   
test_data = vals[piv_train:]
# Train the Classifier.
xgb = XGBClassifier(max_depth=6, learning_rate=0.3, n_estimators=25,objective='multi:softprob', subsample=0.5, colsample_bytree=0.5, seed=0)                  
xgb.fit(train_data, train_labels)
y_pred = xgb.predict_proba(test_data)
ids = []  #list of ids
cts = []  #list of countries
for i in range(len(id_test)):
    idx = id_test[i]
    ids += [idx] * 5
    cts += le.inverse_transform(np.argsort(y_pred[i])[::-1])[:5].tolist()

#Generate submission
sub = pd.DataFrame(np.column_stack((ids, cts)), columns=['id', 'country'])
sub.to_csv('sub.csv',index=False)
开发者ID:SidharthGulati,项目名称:Airbnb-New-User-Bookings,代码行数:32,代码来源:Code.py

示例14: XGBClassifier

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import predict_proba [as 别名]
test_predictions.to_csv("/Users/grazim/Documents/Kaggle_Local/Shelter Animal Outcomes/RF_pred.csv",header = RF.classes_ )





pred = pred.drop(['hour_5'], axis = 1)
test_pred = test_pred.drop(['hour_3'], axis = 1) 

#XGBoost
XGB = XGBClassifier(n_estimators=15000)
XGB.fit(pred, resp)



test_predictions_XGB = pd.DataFrame(XGB.predict_proba(test_pred))
test_predictions_XGB.index += 1



test_predictions.to_csv("/Users/grazim/Documents/Kaggle_Local/Shelter Animal Outcomes/XGB_pred.csv",header = XGB.classes_ )


output = pd.DataFrame(pred.columns)
output.to_csv("/Users/grazim/Documents/Kaggle_Local/Shelter Animal Outcomes/pred.csv")

test_output = pd.DataFrame(test_pred.columns)
test_output.to_csv("/Users/grazim/Documents/Kaggle_Local/Shelter Animal Outcomes/test_pred.csv")


开发者ID:MattGraz,项目名称:Kaggle,代码行数:30,代码来源:Initial_Models.py

示例15: range

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import predict_proba [as 别名]
        scores2 = []
        for i in range(10):
            folds = StratifiedKFold(y_train, n_folds=5, shuffle=True)
            scores = []
            for train_index, test_index in folds:
                X_train2, X_test2 = X_train.loc[train_index], X_train.loc[test_index]
                y_train2, y_test2 = y_train[train_index], y_train[test_index]

                X_train2, X_test2 = feature_engineering_extra(X_train2, X_test2, y_train2)

                X_train2 = csr_matrix(X_train2.values)
                X_test2 = csr_matrix(X_test2.values)

                clf.fit(X_train2, y_train2)
                y_pred = clf.predict_proba(X_test2)
                score = log_loss(y_test2, y_pred)
                scores.append(round(score, 6))

            scores = np.array(scores)
            score = scores.mean()
            scores2.append(score)
            print('score, std', score, scores.std())

        scores = np.array(scores2)
        scores = np.delete(scores, [scores.argmax(), scores.argmin()])
        print('score, std', scores.mean(), scores.std())

    if is_tt_rf_1 == 1:
        X_train, X_test = feature_engineering(df_train, df_test, y_train)
    
开发者ID:mircean,项目名称:ML,代码行数:31,代码来源:module1.py


注:本文中的xgboost.sklearn.XGBClassifier.predict_proba方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。