本文整理汇总了Python中xgboost.XGBClassifier类的典型用法代码示例。如果您正苦于以下问题:Python XGBClassifier类的具体用法?Python XGBClassifier怎么用?Python XGBClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了XGBClassifier类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: kfold_cv
def kfold_cv(X_train, y_train,idx,k):
kf = StratifiedKFold(y_train,n_folds=k)
xx=[]
count=0
for train_index, test_index in kf:
count+=1
X_train_cv, X_test_cv = X_train[train_index,:],X_train[test_index,:]
gc.collect()
y_train_cv, y_test_cv = y_train[train_index],y_train[test_index]
y_pred=np.zeros(X_test_cv.shape[0])
m=0
for j in range(m):
clf=xgb_classifier(eta=0.05,min_child_weight=20,col=0.5,subsample=0.7,depth=7,num_round=400,seed=j*77,gamma=0.1)
y_pred+=clf.train_predict(X_train_cv,(y_train_cv),X_test_cv,y_test=(y_test_cv))
yqq=y_pred*(1.0/(j+1))
print j,llfun(y_test_cv,yqq)
#y_pred/=m;
clf=XGBClassifier(max_depth=10,colsample_bytree=0.8,learning_rate=0.02,n_estimators=500,nthread=-1)
#clf=RandomForestClassifier(n_jobs=-1,n_estimators=100,max_depth=100)
clf.fit(X_train_cv,(y_train_cv),eval_metric="logloss",eval_set=[(X_test_cv, y_test_cv)])
y_pred=clf.predict_proba(X_test_cv).T[1]
print y_pred.shape
xx.append(llfun(y_test_cv,(y_pred)))
ypred=y_pred
yreal=y_test_cv
idx=idx[test_index]
print xx[-1]#,y_pred.shape
break
print xx,'average:',np.mean(xx),'std',np.std(xx)
return ypred,yreal,idx#np.mean(xx)
示例2: xgboostcv
def xgboostcv(max_depth,
learning_rate,
n_estimators,
subsample,
colsample_bytree,
gamma,
min_child_weight,
silent=True,
nthread=-1,
seed=1234):
clf = XGBClassifier(max_depth=int(max_depth),
learning_rate=learning_rate,
n_estimators=int(n_estimators),
silent=silent,
nthread=nthread,
subsample=subsample,
colsample_bytree=colsample_bytree,
gamma=gamma,
min_child_weight = min_child_weight,
seed=seed,
objective="binary:logistic")
clf.fit(x0, y0, eval_metric="logloss", eval_set=[(x1, y1)],early_stopping_rounds=25)
ll = -log_loss(y1, clf.predict_proba(x1))
return ll
示例3: xgboostcv
def xgboostcv(max_depth,
learning_rate,
n_estimators,
gamma,
min_child_weight,
max_delta_step,
subsample,
colsample_bytree,
silent=True,
nthread=-1,
seed=1234):
clf = XGBClassifier(max_depth=int(max_depth),
learning_rate=learning_rate,
n_estimators=int(n_estimators),
silent=silent,
nthread=nthread,
gamma=gamma,
min_child_weight=min_child_weight,
max_delta_step=max_delta_step,
subsample=subsample,
colsample_bytree=colsample_bytree,
seed=seed,
objective="binary:logistic")
# Run Kfolds on the data model to stop over-fitting
X_train, X_valid, y_train, y_valid = train_test_split(train,
train_labels,
test_size=0.1,
random_state=seed)
xgb_model = clf.fit(X_train, y_train, eval_metric="auc", eval_set=[(X_valid, y_valid)], early_stopping_rounds=20)
y_pred = xgb_model.predict_proba(X_valid)[:,1]
return auc(y_valid, y_pred)
示例4: test_predict_sklearn_pickle
def test_predict_sklearn_pickle(self):
X,y = makeXy()
Xtest = makeXtest()
from xgboost import XGBClassifier
kwargs={}
kwargs['tree_method'] = 'gpu_hist'
kwargs['predictor'] = 'gpu_predictor'
kwargs['silent'] = 0
kwargs['objective'] = 'binary:logistic'
model = XGBClassifier(**kwargs)
model.fit(X,y)
print(model)
# pickle model
save_obj(model,"model.pkl")
# delete model
del model
# load model
model = load_obj("model.pkl")
os.remove("model.pkl")
# continue as before
print("Before model.predict")
sys.stdout.flush()
tmp = time.time()
gpu_pred = model.predict(Xtest, output_margin=True)
print(gpu_pred)
print("E non-zeroes: %d:" % (np.count_nonzero(gpu_pred)))
print("E GPU Time to predict = %g" % (time.time() - tmp))
示例5: cv
def cv(X_train, y_train, features_inner):
kfold = StratifiedKFold(n_splits=5, shuffle=True)
scores_f = []
scores_p = []
scores_r = []
for train, test in kfold.split(X_train, y_train):
model = XGBClassifier()
X_train_cv = pd.DataFrame(X_train.values[train], columns=X_train.columns)
y_train_cv = pd.DataFrame(y_train.values[train], columns=["tred_cutoff"])
X_test_cv = pd.DataFrame(X_train.values[test], columns=X_train.columns)
y_test_cv = pd.DataFrame(y_train.values[test], columns=["tred_cutoff"])
model.fit(X_train_cv, y_train_cv)
y_pred = model.predict(X_test_cv)
s_f = f1_score(y_test_cv, y_pred)
s_p = precision_score(y_test_cv, y_pred)
s_r = recall_score(y_test_cv, y_pred)
print("\tscores f1", (s_f))
print("\tscores p", (s_p))
print("\tscores r", (s_r))
scores_f.append(s_f)
scores_p.append(s_p)
scores_r.append(s_r)
print("mean scores f1", np.mean(scores_f))
print("mean scores p", np.mean(scores_p))
print("mean scores r", np.mean(scores_r))
示例6: xgboost_classifier
def xgboost_classifier(self):
cls = XGBClassifier()
print 'xgboost cross validation score', cross_val_score(cls,self.x_data,self.y_data)
start_time = time.time()
cls.fit(self.x_train, self.y_train)
print 'score', cls.score(self.x_test, self.y_test)
print 'time cost', time.time() - start_time
示例7: feature_selection
def feature_selection(model, X_train, X_test, y_train, y_test, eval_metric='auc'):
thresholds = [thres for thres in sorted(model.feature_importances_) if thres != 0] # Use feat. with >0 importance
roc_scores = {}
for thresh in thresholds: # select features using threshold
selection = SelectFromModel(model, threshold=thresh, prefit=True)
select_X_train = selection.transform(X_train)
selection_model = XGBClassifier() # train model
selection_model.fit(select_X_train, y_train, eval_metric=eval_metric)
select_X_test = selection.transform(X_test) # eval model
y_pred = selection_model.predict(select_X_test)
roc = roc_auc_score(y_test, y_pred)
roc_scores[selection.threshold] = roc
best_thresh = max(roc_scores, key=roc_scores.get)
fs = SelectFromModel(model, threshold=best_thresh, prefit=True)
pickle_model(fs, 'feature.select')
X_train_trans_ = fs.transform(X_train)
X_test_trans_ = fs.transform(X_test)
print 'total features kept: {}'.format(X_train_trans_.shape[1])
return X_train_trans_, X_test_trans_
示例8: train_model_xgb_meta
def train_model_xgb_meta(train_x, train_y, xgb_features):
train_ind = StratifiedShuffleSplit(train_y, random_state=1, test_size=0.2)
for train_index, test_index in train_ind:
x_train = train_x.ix[train_index, :]
y_train = train_y.ix[train_index]
x_eval = train_x.ix[test_index, :]
y_eval = train_y.ix[test_index]
#Classifier
xgb = XGBClassifier(max_depth=xgb_features['max_depth'], learning_rate=xgb_features['learning_rate'], n_estimators=int(xgb_features['n_estimators']), objective='binary:logistic',
subsample=xgb_features['subsample'], colsample_bytree=xgb_features['colsample_bytree'], min_child_weight=xgb_features['min_child_weight'])
# gives 0.458
# bag_clf = BaggingClassifier(xgb, max_samples=10, warm_start=True, verbose=10)
# x_train = pd.DataFrame(x_train, dtype=float)
# bag_clf.fit(x_train, y_train)
xgb = xgb.fit(x_train, y_train, verbose=True, eval_metric='logloss', eval_set=[(x_eval, y_eval)], early_stopping_rounds=10)
# cv_score = cross_val_score(xgb, x_train, y_train, cv=4, n_jobs=1, pre_dispatch=1, verbose=10, scoring='log_loss')
# print(cv_score)
# print(np.mean(cv_score))
# predictions = pd.Series(xgb.predict_proba(x_train, ntree_limit=xgb.best_iteration)[:, 1], name='PredictedProb')
return xgb # , predictions
示例9: XGB_model
def XGB_model(train,y):
model=XGBClassifier(n_estimators=150, learning_rate=0.01)
from sklearn import cross_validation
cv = cross_validation.KFold(len(train), n_folds=5,random_state=7)
for traincv,testcv in cv:
model.fit(train.iloc[traincv],y.iloc[traincv])
y_XGB=model.predict(test)
return y_XGB
示例10: main
def main():
# Set seed for reproducibility
np.random.seed(0)
print("Loading data...")
# Load the data from the CSV files
training_data = pd.read_csv('/home/vipin/Videos/train.csv', header=0)
prediction_data = pd.read_csv('/home/vipin/Videos/test.csv', header=0)
training_data['countrycode']=training_data['countrycode'].apply(lambda x:ord(x))
training_data['browserid']=training_data['browserid'].apply(lambda x: myfunc (x) if np.all(pd.notnull(x)) else myfunc("unknown") )
training_data['devid']=training_data['devid'].apply(lambda x: myfunc (x) if np.all(pd.notnull(x)) else myfunc("none"))
#pd.to_csv('/home/vipin/Videos/train11.csv', sep=',', encoding='utf-8')
#exit(0)
prediction_data['countrycode']=prediction_data['countrycode'].apply(lambda x:ord(x))
prediction_data['browserid']=prediction_data['browserid'].apply(lambda x:myfunc (x) if np.all(pd.notnull(x)) else myfunc("unknown") )
prediction_data['devid']=prediction_data['devid'].apply(lambda x:myfunc (x) if np.all(pd.notnull(x)) else myfunc("none") )
features=['siteid','offerid','category','merchant','countrycode','browserid','devid']
target="click"
X = training_data[features]
x_prediction = prediction_data[features]
Y= training_data[target]
ids = prediction_data["ID"]
model = XGBClassifier()
#linear_model.LogisticRegression(n_jobs=-1)
print("Training...")
# Your model is trained on the training_data
model.fit(X, Y)
print("Predicting...")
seed =7
test_size=0.33
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=test_size,random_state=seed)
y_prediction = model.predict_proba(x_prediction)
results = y_prediction[:, 1]
results_df = pd.DataFrame(data={'probability':results})
joined = pd.DataFrame(ids).join(results_df)
y_pred=model.predict(X_test)
accuracy=accuracy_score(y_test,y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
print("Writing predictions to predictions.csv")
# Save the predictions out to a CSV file
joined.to_csv("/home/vipin/Videos/predictions.csv", index=False)
示例11: test_xgboost
def test_xgboost():
"""Ensure that the TPOT xgboost method outputs the same as the xgboost classfier method"""
tpot_obj = TPOT()
result = tpot_obj._xgradient_boosting(training_testing_data, n_estimators=100, learning_rate=0, max_depth=3)
result = result[result['group'] == 'testing']
xgb = XGBClassifier(n_estimators=100, learning_rate=0.0001, max_depth=3, seed=42)
xgb.fit(training_features, training_classes)
assert np.array_equal(result['guess'].values, xgb.predict(testing_features))
示例12: update_model
def update_model(current_year):
print 'Creating model...\nDate: {}'.format(datetime.now().strftime('%Y-%m-%d_%H:%M:%S'))
managers = tuple(unique_managers(current_year))
sql = "select * from (select week, year, manager1_name, manager2_name, team1_points, team1_projected, team2_points, team2_projected, type \
from scoreboard_all WHERE team1_points > 0 and week<=13 \
UNION select week, year, manager2_name AS manager1_name, manager1_name as manager2_name, team2_points AS team1_points, \
team2_projected AS team1_projected, team1_points as team2_points, team1_projected AS team2_projected, type FROM scoreboard_all \
where team1_points>0 and week<=13) order by year, week, type;"
ff1 = download_data(os.path.join(os.getcwd(), 'data/fantasy_football.db'), sql)
data_features = custom_features(ff1)
data_features = data_features[(data_features.manager1_name.isin(managers)) & (data_features.manager2_name.isin(managers))]
X, y, managers, league_type = dummy_and_interaction(data_features)
# feats = X.columns.tolist()
sc = StandardScaler()
X_std = sc.fit_transform(X)
pickle_model(sc, 'standard.scaler')
# Select best features
X_train, X_test, y_train, y_test = train_test_split(X_std, y, test_size=0.25, random_state=None)
model = XGBClassifier()
model.fit(X_train, y_train)
# imports = model.feature_importances_.tolist()
# g = zip(feats, imports)
# feat_importance = sorted(g, key=lambda x: x[1], reverse=True)
# print feat_importance
X_train_trans, X_test_trans = feature_selection(model, X_train, X_test, y_train, y_test, eval_metric='auc')
# Select best params
model = XGBClassifier()
learning_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
n_estimators = [50, 100, 150, 200, 250, 300]
param_grid = dict(n_estimators=n_estimators, learning_rate=learning_rate)
grid_search = GridSearchCV(model, param_grid, scoring="log_loss", cv=10, verbose=1)
result = grid_search.fit(X_train_trans, y_train)
print("Best: {0} using {1}".format(result.best_score_, result.best_params_))
print 'Best params: ', result.best_params_
best_est = result.best_estimator_
validation = best_est.predict_proba(X_train_trans)
print("Roc AUC Train: ", roc_auc_score(y_train, validation[:, 1], average='macro'))
probs = best_est.predict_proba(X_test_trans)
print("Roc AUC Validation: ", roc_auc_score(y_test, probs[:, 1], average='macro'))
pickle_model(best_est, 'fantasy.predict')
示例13: train
def train(imgfile='img/segmentation', modelfile='segmentation.pkl'):
filelabel = getFiles(imgfile)
row = 120
col=40
data = filter(lambda z: z is not None ,map(lambda x:Img(x[1],row,col,x[0]).imgmap,filelabel))
data = filter(lambda x:x[0] is not None,sum(data,[]))
label = np.array(map(lambda x:CHARACTER.get(x[0]),data))
feature = np.array(map(lambda x:np.array(x[1]),data))
from xgboost import XGBClassifier
xgb = XGBClassifier(objective='multi:softmax',reg_alpha=1.0,reg_lambda=0.0,subsample=0.7,n_estimators=100,learning_rate=0.3)
model = xgb.fit(feature,label,eval_set=[(feature,label)],eval_metric='mlogloss')
import pickle
fn = modelfile
with open(fn, 'w') as f: # open file with write-mode
pickle.dump(model, f)
示例14: runner
def runner ():
m = Model()
X = m.df.drop("tred_cutoff", axis=1)
Y = m.df["tred_cutoff"]
features_inner = m.features + m.features_2
cv(X, Y, features_inner)
model = XGBClassifier()
model.fit(X, Y)
y_pred = model.predict(m.X_test)
s_f = f1_score(m.y_test, y_pred)
s_p = precision_score(m.y_test, y_pred)
s_r = recall_score(m.y_test, y_pred)
print("test f1", s_f)
print("test precision", s_p)
print("test recall", s_r)
示例15: main
def main():
titanic = pandas.read_csv('dataset/titanic.csv')
x_set = titanic[['pclass', 'age', 'sex']]
y_set = titanic['survived']
x_set.fillna(x_set['age'].mean(), inplace=True)
x_train, x_test, y_train, y_test = utils.prepare_train_and_test_sets(x_set, y_set)
dict_vectorizer = DictVectorizer(sparse=False)
x_train = dict_vectorizer.fit_transform(x_train.to_dict(orient='record'))
x_test = dict_vectorizer.transform(x_test.to_dict(orient='record'))
decision_tree_classifier = DecisionTreeClassifier()
utils.get_trained_result(decision_tree_classifier, x_test, x_train, y_test, y_train)
xgb_classifier = XGBClassifier()
xgb_classifier.fit(x_train, y_train)
utils.get_trained_result(xgb_classifier, x_test, x_train, y_test, y_train)