本文整理汇总了Python中xgboost.XGBClassifier.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python XGBClassifier.predict_proba方法的具体用法?Python XGBClassifier.predict_proba怎么用?Python XGBClassifier.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类xgboost.XGBClassifier
的用法示例。
在下文中一共展示了XGBClassifier.predict_proba方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: kfold_cv
# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
def kfold_cv(X_train, y_train,idx,k):
kf = StratifiedKFold(y_train,n_folds=k)
xx=[]
count=0
for train_index, test_index in kf:
count+=1
X_train_cv, X_test_cv = X_train[train_index,:],X_train[test_index,:]
gc.collect()
y_train_cv, y_test_cv = y_train[train_index],y_train[test_index]
y_pred=np.zeros(X_test_cv.shape[0])
m=0
for j in range(m):
clf=xgb_classifier(eta=0.05,min_child_weight=20,col=0.5,subsample=0.7,depth=7,num_round=400,seed=j*77,gamma=0.1)
y_pred+=clf.train_predict(X_train_cv,(y_train_cv),X_test_cv,y_test=(y_test_cv))
yqq=y_pred*(1.0/(j+1))
print j,llfun(y_test_cv,yqq)
#y_pred/=m;
clf=XGBClassifier(max_depth=10,colsample_bytree=0.8,learning_rate=0.02,n_estimators=500,nthread=-1)
#clf=RandomForestClassifier(n_jobs=-1,n_estimators=100,max_depth=100)
clf.fit(X_train_cv,(y_train_cv),eval_metric="logloss",eval_set=[(X_test_cv, y_test_cv)])
y_pred=clf.predict_proba(X_test_cv).T[1]
print y_pred.shape
xx.append(llfun(y_test_cv,(y_pred)))
ypred=y_pred
yreal=y_test_cv
idx=idx[test_index]
print xx[-1]#,y_pred.shape
break
print xx,'average:',np.mean(xx),'std',np.std(xx)
return ypred,yreal,idx#np.mean(xx)
示例2: LabelEncoder
# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
ohe_feats = ['gender', 'signup_method', 'signup_flow', 'language', 'affiliate_channel', 'affiliate_provider', 'first_affiliate_tracked', 'signup_app', 'first_device_type', 'first_browser']
for f in ohe_feats:
df_all_dummy = pd.get_dummies(df_all[f], prefix=f)
df_all = df_all.drop([f], axis=1)
df_all = pd.concat((df_all, df_all_dummy), axis=1)
# split df into test and training data
vals = df_all.values
X = vals[:piv_train]
le = LabelEncoder()
y = le.fit_transform(labels)
X_test = vals[piv_train:]
# use xgboost XGBClassifier
xgb = XGBClassifier(max_depth=8, learning_rate=0.075, n_estimators=250,
objective='multi:softprob', subsample=0.75, colsample_bytree=0.85, seed=13)
xgb.fit(X, y)
y_pred = xgb.predict_proba(X_test)
# select the 5 highest probability classes
ids = [] # list ids
cts = [] # list countries
for i in range(len(id_test)):
idx = id_test[i]
ids += [idx] * 5
cts += le.inverse_transform(np.argsort(y_pred[i])[::-1])[:5].tolist()
# generate output 'pysub.csv'
sub = pd.DataFrame(np.column_stack((ids, cts)), columns=['id', 'country'])
sub.to_csv('/Users/ianmurray/Documents/kaggle/airbnb/output/pysub.csv',index=False)
示例3: xgboostcv
# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
def xgboostcv(max_depth,
learning_rate,
n_estimators,
subsample,
colsample_bytree,
gamma,
min_child_weight,
silent=True,
nthread=-1,
seed=1234):
clf = XGBClassifier(max_depth=int(max_depth),
learning_rate=learning_rate,
n_estimators=int(n_estimators),
silent=silent,
nthread=nthread,
subsample=subsample,
colsample_bytree=colsample_bytree,
gamma=gamma,
min_child_weight = min_child_weight,
seed=seed,
objective="binary:logistic")
clf.fit(x0, y0, eval_metric="logloss", eval_set=[(x1, y1)],early_stopping_rounds=25)
ll = -log_loss(y1, clf.predict_proba(x1))
return ll
示例4: main
# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
def main():
# Set seed for reproducibility
np.random.seed(0)
print("Loading data...")
# Load the data from the CSV files
training_data = pd.read_csv('/home/vipin/Videos/train.csv', header=0)
prediction_data = pd.read_csv('/home/vipin/Videos/test.csv', header=0)
training_data['countrycode']=training_data['countrycode'].apply(lambda x:ord(x))
training_data['browserid']=training_data['browserid'].apply(lambda x: myfunc (x) if np.all(pd.notnull(x)) else myfunc("unknown") )
training_data['devid']=training_data['devid'].apply(lambda x: myfunc (x) if np.all(pd.notnull(x)) else myfunc("none"))
#pd.to_csv('/home/vipin/Videos/train11.csv', sep=',', encoding='utf-8')
#exit(0)
prediction_data['countrycode']=prediction_data['countrycode'].apply(lambda x:ord(x))
prediction_data['browserid']=prediction_data['browserid'].apply(lambda x:myfunc (x) if np.all(pd.notnull(x)) else myfunc("unknown") )
prediction_data['devid']=prediction_data['devid'].apply(lambda x:myfunc (x) if np.all(pd.notnull(x)) else myfunc("none") )
features=['siteid','offerid','category','merchant','countrycode','browserid','devid']
target="click"
X = training_data[features]
x_prediction = prediction_data[features]
Y= training_data[target]
ids = prediction_data["ID"]
model = XGBClassifier()
#linear_model.LogisticRegression(n_jobs=-1)
print("Training...")
# Your model is trained on the training_data
model.fit(X, Y)
print("Predicting...")
seed =7
test_size=0.33
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=test_size,random_state=seed)
y_prediction = model.predict_proba(x_prediction)
results = y_prediction[:, 1]
results_df = pd.DataFrame(data={'probability':results})
joined = pd.DataFrame(ids).join(results_df)
y_pred=model.predict(X_test)
accuracy=accuracy_score(y_test,y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
print("Writing predictions to predictions.csv")
# Save the predictions out to a CSV file
joined.to_csv("/home/vipin/Videos/predictions.csv", index=False)
示例5: train_model_xgb
# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
def train_model_xgb(train_x, train_y, xgb_features):
train_ind = StratifiedShuffleSplit(train_y, random_state=1, test_size=0.1)
for train_index, test_index in train_ind:
x_train = train_x.ix[train_index, :]
y_train = train_y.ix[train_index]
x_eval = train_x.ix[test_index, :]
y_eval = train_y.ix[test_index]
#Classifier
xgb = XGBClassifier(max_depth=xgb_features['max_depth'], learning_rate=xgb_features['learning_rate'], n_estimators=int(xgb_features['n_estimators']), objective='binary:logistic',
subsample=xgb_features['subsample'], colsample_bytree=xgb_features['colsample_bytree'], min_child_weight=xgb_features['min_child_weight'])
# gives 0.458
xgb = xgb.fit(x_train, y_train, verbose=True, eval_metric='logloss', eval_set=[(x_eval, y_eval)], early_stopping_rounds=10)
predictions = pd.Series(xgb.predict_proba(x_train, ntree_limit=xgb.best_iteration)[:, 1], name='PredictedProb')
return xgb, predictions
示例6:
# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
Xg_train,
num_boost_round = clf.get_params()['n_estimators'],
nfold = 5,
show_progress = True,
early_stopping_rounds = 100)
clf.set_params(n_estimators=cvresult.shape[0])
clf.fit(X_train, y_train)
best_outcome_params = clf.get_params()
best_outcome_score = cvresult.min()
try:
# predict the outcome probabilities
y_pred = grid.predict_proba(X_test)
except:
# predict the outcome probabilities
y_pred = clf.predict_proba(X_test)
# Create a data frame
column_names = possible_outcomes[:]
idx = pd.Int64Index(np.arange(1,11457, dtype='int64'))
idx.rename('ID', inplace=True)
df = pd.DataFrame(index = idx, data=y_pred, columns=column_names)
# write it to file, timestamp it
import time, datetime
ts = time.time()
submission_time_stamp = datetime.datetime.fromtimestamp(ts).strftime('%Y.%m.%d.%H.%M.%S')
df.to_csv('./Data/xgb_submission_'+submission_time_stamp+'.csv',header=True)
# save parameters to file:
示例7: main
# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
def main():
args = parse_args()
config = parse_config(args.config_file)
if config is None:
print('No configuration file is defined. '
'Define one with `--config-file`.')
sys.exit(1)
if args.plot_dir is not None:
if not os.path.isdir(args.plot_dir):
os.mkdir(args.plot_dir)
index_cols = config['index_features']
event_cols = config['unique_event_features']
# this will be the training dataframe
if args.input_file:
merged_training_df = read_root(args.input_file, stop=args.stop)
merged_training_df.set_index(index_cols, inplace=True)
# duplicates may have ended up in the root file
len_before = len(merged_training_df)
merged_training_df.drop_duplicates(inplace=True)
print(f'Dropped {(1 - len(merged_training_df) / len_before) * 100:.5f}%'
' duplicated entries in dataframe')
else:
merged_training_df = read_full_files(args, config)
# in every case, define a proper target
merged_training_df['target'] = merged_training_df.eval(config['target_eval'])
# sort for performance
merged_training_df.sort_index(inplace=True)
print_avg_tagging_info(merged_training_df, config)
mva_features = config['mva_features']
total_event_number = get_event_number(config)
selected_event_number = (merged_training_df.groupby(
event_cols).SigYield_sw.head(1).sum())
# build BDT model and train the classifier nBootstrap x 3 times
xgb_kwargs = config['xgb_kwargs']
n_jobs = config['n_jobs']
sorting_feature = config['sorting_feature']
bootstrap_roc_aucs = []
bootstrap_scores = []
bootstrap_d2s = []
bootstrap_roc_curves = []
bootstrap_calibration_params = []
nBootstrap = args.n_bootstrap or config['n_bootstrap']
print('Starting bootstrapping.')
pbar = tqdm(total=nBootstrap * 6)
for _ in range(nBootstrap):
# yield 3-fold split for CV
df_sets = [merged_training_df.iloc[indices]
for indices in NSplit(merged_training_df)]
# try to compensate for slow subset creation
pbar.update(3)
for i in range(3):
df1, df2, df3 = (df_sets[i % 3],
df_sets[(i + 1) % 3],
df_sets[(i + 2) % 3])
model = XGBClassifier(nthread=n_jobs, **xgb_kwargs)
model.fit(df1[mva_features], df1.target,
sample_weight=df1.SigYield_sw)
roc1 = roc_auc_score(df1.target,
model.predict_proba(df1[mva_features])[:, 1])
probas = model.predict_proba(df2[mva_features])[:, 1]
roc2 = roc_auc_score(df2.target, probas)
# calibrate
calibrator = PolynomialLogisticRegression(power=3,
solver='lbfgs',
n_jobs=n_jobs)
calibrator.fit(probas.reshape(-1, 1), df2.target,
sample_weight=df2.SigYield_sw)
bootstrap_calibration_params.append(calibrator.lr.coef_)
probas = model.predict_proba(df3[mva_features])[:, 1]
calib_probas = calibrator.predict_proba(probas)[:, 1]
roc3 = roc_auc_score(df3.target, calib_probas)
# concatenating here, since df3 is a view on the main df and will
# throw warnings when adding any columns to it
df3 = pd.concat([
df3.reset_index(),
pd.Series(calib_probas, name='calib_probas'),
], axis=1)
best_indices = df3.groupby(event_cols)[sorting_feature].idxmax()
best_particles = df3.loc[best_indices]
bootstrap_roc_aucs.append([roc1, roc2, roc3])
score = tagging_power_score(best_particles, config,
efficiency=selected_event_number/total_event_number,
etas='calib_probas')
if args.plot_dir is not None:
#.........这里部分代码省略.........
示例8: len
# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
train.loc[train_series.isnull(), train_name] = -9999 #train_series.mean()
#and Test
tmp_len = len(test[test_series.isnull()])
if tmp_len>0:
test.loc[test_series.isnull(), test_name] = -9999 #train_series.mean() #TODO
X_train = train
X_test = test
extc = XGBClassifier(max_depth=10,colsample_bytree=0.8,learning_rate=0.02,n_estimators=500,nthread=-1)#max_features= 50,criterion= 'entropy',min_samples_split= 4,
#max_depth= 50, min_samples_leaf= 4)
y_test=pd.read_csv('good/xgb4.csv')['real'].values
extc.fit(X_train,target,eval_metric="logloss",eval_set=[(X_test, y_test)])
print('Predict...')
y_pred = extc.predict_proba(X_test)
#print y_pred
pd.DataFrame({"ID": id_test, "PredictedProb": y_pred[:,1]}).to_csv('mycv1.csv',index=False)
y=pd.read_csv('good/xgb4.csv')['real'].values
yp=y_pred[:,1]
score=str(llfun(y,yp))[2:]
print sys.argv[0],score
import subprocess
cmd='cp mycv1.csv vabackup/mycv%s.csv'%score
subprocess.call(cmd,shell=True)
cmd='cp mycv.py vabackup/mycv%s.py'%score
subprocess.call(cmd,shell=True)
示例9: XGBClassifier
# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
biter.append(best_iter)
print "---log_loss: %0.6f\n" %ll
print "---best_iter: %d\n" %best_iter
gc.collect()
best_i = np.mean(biter) + 50
# train on whole data
gbm = XGBClassifier(max_depth=8,
learning_rate = 0.01,
n_estimators=best_i,
subsample=0.9,
colsample_bytree=0.45,
objective="binary:logistic",
silent = False,
min_child_weight=1,
nthread=-1)
gbm.fit(train_processed, target, eval_metric="logloss",
eval_set = [(train_processed, target)],
verbose=20)
tid = test_processed["ID"].copy()
assert (len(tid) == 114393), "test length does not match!"
test_processed.drop(["ID", "target", "train_flag"], axis = 1, inplace = True)
tpreds = gbm.predict_proba(test_processed)[:, 1]
sub = pd.DataFrame({"ID" : tid, "PredictedProb" : tpreds})
submission_file = os.path.join(submission_dir, "xgb_denormalized.csv")
sub.to_csv(submission_file, index = False)
end_time = datetime.now()
print 'elapsed time: {}'.format(end_time - start_time)
示例10: print
# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
print('We have %d classes and %d models TOTAL so in resulting arrays \
we expect to see %d columns.' % (n_classes, len(models_1) + len(models_2),
n_classes * (len(models_1) + len(models_2))))
# Create empty arrays
S_train_all = np.zeros((X_train.shape[0], 0))
S_test_all = np.zeros((X_test.shape[0], 0))
# Load results
for name in sorted(glob('*.npy')):
print('Loading: %s' % name)
S = np.load(name)
S_train_all = np.c_[S_train_all, S[0]]
S_test_all = np.c_[S_test_all, S[1]]
print('\nS_train_all shape:', S_train_all.shape)
print('S_test_all shape: ', S_test_all.shape)
# Initialize 2nd level model
model = XGBClassifier(random_state=0, n_jobs=-1, learning_rate=0.1,
n_estimators=100, max_depth=3)
# Fit 2nd level model
model = model.fit(S_train_all, y_train)
# Predict
y_pred = model.predict_proba(S_test_all)
# Final prediction score
print('Final prediction score: %.8f' % log_loss(y_test, y_pred))
示例11: XGBClassifier
# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
eval_set=[(trainingSet[feature_names], np.array(trainingSet["TARGET"])), (validationSet[feature_names], np.array(validationSet["TARGET"]))],
early_stopping_rounds=200,verbose=20)
ll = gbm.best_score
best_iter = gbm.best_iteration
cv.append(ll)
biter.append(best_iter)
print "---auc : %0.6f\n" %ll
print "---best_iter: %d\n" %best_iter
gc.collect()
gbm = XGBClassifier(max_depth=4,
learning_rate = 0.01,
n_estimators=370,
subsample=0.8,
colsample_bytree=0.5,
objective="binary:logistic",
silent = False,
min_child_weight=5,
nthread=-1)
gbm.fit(train[feature_names], np.array(train["TARGET"]),
eval_metric = "auc",
eval_set = [(train[feature_names], np.array(train["TARGET"]))],
verbose=20)
tpreds = gbm.predict_proba(test[feature_names])[:, 1]
df = pd.DataFrame({"ID" : test["ID"], "TARGET" : tpreds })
submission_name = "stacked_xgb_3.csv"
df.to_csv(os.path.join(output_dir, submission_name), index = False)
示例12: XGBClassifier
# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
n_estimators = 600
max_depth = 6
subsample = 0.9
colsample_bytree = 0.85
min_child_weight = 1 # default
eval_metrics = ['auc']
eval_sets = [(X_train, y_train), (X_test, y_test)]
xgb = XGBClassifier(seed=0, learning_rate=learning_rate, n_estimators=n_estimators,
min_child_weight=min_child_weight, max_depth=max_depth,
colsample_bytree=colsample_bytree, subsample=subsample)
print("Fitting the model")
xgb = xgb.fit(X_train, y_train, eval_metric=eval_metrics, eval_set=eval_sets, verbose=False)
print("Predicting Probabilities")
probs['xgb'] = xgb.predict_proba(X_test)[:, -1]
print("Computing AUC")
auc_test = [xgb.evals_result_['validation_%d' % i]['auc'] for i in range(len(eval_sets))]
auc_test = np.array(auc_test, dtype=float).T
auc_best_round = np.argmax(auc_test, axis=0)
auc_best = [auc_test[auc_best_round[0], 0], auc_test[auc_best_round[1], 1]]
print('Best AUC train=%f (round=%d), test=%f (round=%d)' % (auc_best[0], auc_best_round[0], auc_best[1], auc_best_round[1]))
print('Validation')
test_probs = pd.DataFrame()
test_probs['xgb_valid'] = xgb.predict_proba(df_test)[:,-1]
print(test_probs['xgb_valid'].head())
fpr, tpr, thresholds = metrics.roc_curve(df_test_target, test_probs, pos_label=1)
示例13: main
# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
def main():
args = parse_args()
config = parse_config(args.config_file)
if config is None:
print('No configuration file is defined. '
'Define one with `--config-file`.')
sys.exit(1)
# read dataset
files = config['files']
if 'filepath' in config:
files = [config['filepath'] + f for f in files]
kwargs = config['pandas_kwargs']
print('Reading ', end='')
entries = 0
for f in files:
rootfile = ROOT.TFile(f)
tree = rootfile.Get(kwargs['key'])
entries += tree.GetEntries()
maxslices = args.max_slices
chunksize = kwargs['chunksize']
total = (maxslices
if maxslices is not None and maxslices < (entries / chunksize)
else (entries / chunksize))
print(total * chunksize, 'events.')
df = pd.concat([
df for df in tqdm(
islice(
read_root(files, flatten=True, **kwargs), maxslices),
total=total)])
# rename the tagging particle branches
df.rename(columns=dict(zip(df.columns,
[c.replace(config['tagging_particle_prefix'], 'tp').replace('-', '_')
for c in df.columns])),
inplace=True)
df['event_id'] = df.runNumber.apply(str) + '_' + df.eventNumber.apply(str)
if 'invert_target' in config and config['invert_target']:
df['target'] = np.sign(df.B_ID) != np.sign(df.tp_ID)
else:
df['target'] = np.sign(df.B_ID) == np.sign(df.tp_ID)
# read features and selections
try:
if 'inclusive_mva_features' in config:
mva_features = ['tp_' + f for f in config['inclusive_mva_features']]
else:
mva_features = ['tp_' + f.split(' ')[0] for f in config['selections']]
except:
raise ValueError('Tried to parse features for the BDT.'
' Either provide well-formatted `selections` or'
' define a `inclusive_mva_features` set.')
# build BDT model and train the classifier n_cv x 3 times
xgb_kwargs = config['xgb_kwargs']
n_jobs = config['n_jobs']
bootstrap_scores = []
bootstrap_d2s = []
nfold = (args.bootstrap_folds
if args.bootstrap_folds is not None
else config['n_cv'])
print('Starting bootstrapping.')
pbar = tqdm(total=nfold * 3)
for _ in range(nfold):
# yield 3-fold split for CV
df_sets = [df.iloc[indices] for indices in NSplit(df)]
cv_scores = []
for i in range(3):
df1, df2, df3 = (df_sets[i % 3].copy(),
df_sets[(i + 1) % 3].copy(),
df_sets[(i + 2) % 3].copy())
model = XGBClassifier(nthread=n_jobs, **xgb_kwargs)
sample_weight = (df1.target
if 'training_weights' in config
and config['training_weights']
else None)
model.fit(df1[mva_features], df1.target,
sample_weight=df1.SigYield_sw)
df2['probas'] = model.predict_proba(df2[mva_features])[:, 1]
df2.reset_index(inplace=True, drop=True)
df2_max = df2.iloc[df2.groupby('event_id')['probas'].idxmax()].copy()
df3['probas'] = model.predict_proba(df3[mva_features])[:, 1]
df3.reset_index(inplace=True, drop=True)
df3_max = df3.iloc[df3.groupby('event_id')['probas'].idxmax()].copy()
# calibrate
calibrator = PolynomialLogisticRegression(power=4,
solver='lbfgs',
n_jobs=n_jobs)
calibrator.fit(df2_max.probas.reshape(-1, 1), df2_max.target,
sample_weight=df2_max.SigYield_sw)
df3_max['calib_probas'] = calibrator.predict_proba(df3_max.probas)[:, 1]
score = tagging_power_score(df3_max.calib_probas,
tot_event_number=get_event_number(df3_max),
#.........这里部分代码省略.........
示例14: range
# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
max_score = 0
for i in range(5,10):
model = XGBClassifier(max_depth=i)
kf = KFold(len(y),n_folds=5,random_state=42, shuffle=True)
#Using accuracy because of final table using it measure
score = cross_val_score(model, X, y, cv=kf, scoring='accuracy').mean()
print('Cross validation score =', score)
print('max_depth =', i)
if score > max_score:
max_score = score
max_n = i
print('Max Cross validation score =',max_score)
print('Max max_depth =', max_n)
model = XGBClassifier(max_depth=max_n)
model.fit(X,y)
prediction = model.predict_proba(test_pred)
#Just to see what features are important and what are not
print(model.feature_importances_)
#Step 3. Save data to file.
submission = pd.DataFrame({
"ID": test["ID"],
"Adoption": prediction[:,0],
"Died": prediction[:,1],
"Euthanasia": prediction[:,2],
"Return_to_owner": prediction[:,3],
"Transfer": prediction[:,4]
})
示例15: XGBClassifier
# 需要导入模块: from xgboost import XGBClassifier [as 别名]
# 或者: from xgboost.XGBClassifier import predict_proba [as 别名]
bst = XGBClassifier(max_depth=8,
learning_rate = 0.01,
n_estimators=2100,
subsample=0.9,
colsample_bytree=0.45,
objective="binary:logistic",
silent = False,
min_child_weight=1,
nthread=-1)
bst.fit(X_train, y_train, eval_metric= "logloss",
eval_set=[(X_train, y_train), (X_valid, y_valid)],
verbose=200)
preds = bst.predict_proba(X_valid)[:, 1]
ll = log_loss(validationSet["target"], preds)
df = pd.DataFrame({"ID" : validationSet["ID"], pred_name : preds})
eval_matrix = eval_matrix.append(df, ignore_index = True)
print "fold : {} | logloss: {}".format(i+1, ll)
del trainingSet, validationSet, bst, preds, ll, X_train, X_valid, y_train, y_valid
gc.collect()
X_train = train[feature_names].copy()
y_train = np.array(train["target"].copy())
bst = XGBClassifier(max_depth=8,
learning_rate = 0.01,
n_estimators=2100,
subsample=0.9,
colsample_bytree=0.45,
objective="binary:logistic",
开发者ID:bishwarup307,项目名称:BNP_Paribas_Cardiff_Claim_Management,代码行数:33,代码来源:xgb_denormalized_layer_1.py