本文整理匯總了Python中xgboost.sklearn.XGBClassifier.fit方法的典型用法代碼示例。如果您正苦於以下問題:Python XGBClassifier.fit方法的具體用法?Python XGBClassifier.fit怎麽用?Python XGBClassifier.fit使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類xgboost.sklearn.XGBClassifier
的用法示例。
在下文中一共展示了XGBClassifier.fit方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: _distributor
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import fit [as 別名]
def _distributor(self, label, cv, param, eval_metric, early_stopping_rounds=50):
start = time()
if self.is_classifier:
label = 'XGBClassifier'
rs = XGBClassifier(param)
else:
label = 'XGBRegressor'
rs = XGBRegressor(param)
X_visible, X_blind, y_visible, y_blined = \
train_test_split(
self.X_train, self.y_train, random_state=1301, stratify=self.y_train, test_size=0.4)
rs.fit(self.X_visible, self.y_visible, eval_metric, early_stopping_rounds=50,
eval_set=[(X_visible, y_visible), (X_blind, y_blined)])
self.result[label] = {}
self.result[label]['clf'] = rs
# self.result[label]['score'] = rs.best_score_
self.result[label]['time'] = time() - start
# self.result[label]['set'] = ('n_iter: %s cv: %s' % (n_iter, cv))
pprint.pprint(self.result[label])
# pprint.pprint(rs.grid_scores_)
out_result = open(self.result_address, 'wb')
pickle.dump(self.result, out_result)
out_result.close()
示例2: job_function
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import fit [as 別名]
def job_function(params):
learning_rate = params[0]
max_depth = params[1]
ss_cs = params[2]
gamma = params[3]
min_child_weight = params[4]
reg_lambda = params[5]
reg_alpha = params[6]
early_stopping_rounds = 25
if learning_rate >= 0.3:
early_stopping_rounds = 5
if learning_rate <= 0.03:
early_stopping_rounds = 50
scores = []
for i in range(iterations_per_job):
X_train = Xy[i][0]
X_test = Xy[i][1]
y_train = Xy[i][2]
y_test = Xy[i][3]
y_train2 = le.transform(y_train)
y_test2 = le.transform(y_test)
clf = XGBClassifier(max_depth=max_depth, learning_rate=learning_rate, n_estimators=5000, objective='multi:softprob', subsample=ss_cs, colsample_bytree=ss_cs, gamma=gamma, min_child_weight=min_child_weight, seed=0, silent=True, reg_lambda=reg_lambda, reg_alpha=reg_alpha)
clf.fit(X_train, y_train, eval_set=[(X_test, y_test2)], eval_metric=calculate_score_2, early_stopping_rounds=early_stopping_rounds, verbose=False)
y_predicted = clf.predict_proba(X_test, ntree_limit=clf.booster().best_ntree_limit)
score = calculate_score(y_predicted, y_test2)
scores.append(score)
avg_score = np.array(scores).mean()
print(avg_score, params)
return avg_score
示例3: eval_fn
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import fit [as 別名]
def eval_fn(params):
model = XGBClassifier(n_estimators=n_estimators_max, learning_rate=learning_rate, seed=seed)
score = 0
n_estimators = 0
for tr, va in skf:
X_tr, y_tr = X_train[tr], y_train[tr]
X_va, y_va = X_train[va], y_train[va]
model.set_params(**params)
model.fit(X_tr, y_tr, eval_set=[(X_va, y_va)], eval_metric='logloss',
early_stopping_rounds=50, verbose=False)
score += model.best_score
n_estimators += model.best_iteration
score /= n_folds
n_estimators /= n_folds
n_estimators_lst.append(n_estimators)
result_str = "train:%.4f ntree:%5d " % (score, n_estimators)
if X_valid is not None:
model.n_estimators = n_estimators
model.fit(X_train, y_train)
pr = model.predict_proba(X_valid)[:,1]
sc_valid = log_loss(y_valid, pr)
score_valid.append(sc_valid)
result_str += "valid:%.4f" % sc_valid
if verbose:
print result_str
return score
示例4: extract_leaf_feature
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import fit [as 別名]
def extract_leaf_feature(features, targets, train_indexes, params):
model = XGBClassifier(**params)
model.fit(features[train_indexes], targets[train_indexes])
booster = model.booster()
dmatrix = xgb.DMatrix(features)
leaf = booster.predict(dmatrix, pred_leaf=True)
encoder = sklearn.preprocessing.OneHotEncoder()
leaf_feature = encoder.fit_transform(leaf)
return leaf_feature
示例5: main
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import fit [as 別名]
def main(training_data, test_data):
# Merging data to ensure consistent cleaning. Putting marker variable to separate later.
training_data['source'] = 'training'
test_data['source'] = 'test'
merged_data = pd.concat([training_data, test_data])
# Cleaning data
cleaned_data = data_cleaner(merged_data)
# Separating data, removing marker
pred_df = cleaned_data[cleaned_data['source'] == 'training'].copy()
test_pred = cleaned_data[cleaned_data['source'] == 'test'].copy()
pred_df.drop('source', axis=1, inplace=True)
test_pred.drop('source', axis=1, inplace=True)
# Transforming target into ints, saving the key for later transformation
labels = LabelEncoder().fit(training_data['country_destination'])
target_df = pd.Series(labels.transform(training_data['country_destination']), index=training_data.index)
# Training model
xgb_model = XGBClassifier(max_depth=6, learning_rate=0.3, n_estimators=25, objective='multi:softprob',
subsample=0.5, colsample_bytree=0.5, seed=0)
xgb_model.fit(pred_df.as_matrix(), target_df.tolist())
# Running the model
preds = xgb_model.predict_proba(test_pred.as_matrix())
# Selecting the top 5 most likely for each respondent and stacking.
# This section is VERY slow and could use being optimized
model_probs = pd.DataFrame(preds, index=test_pred.index, columns=labels.classes_)
stacked_probs = pd.Series()
for i in model_probs.index:
temp = model_probs.loc[i, :]
temp_sort = pd.DataFrame(temp.sort_values(ascending=False)[:5].index)
temp_sort['id'] = i
temp_sort.columns = ['country', 'id']
stacked_probs = pd.concat([stacked_probs, temp_sort])
# # Selecting classes with highest probabilities, compiling into list
# ids = []
# cts = []
# test_ids = pd.Series(test_data.index)
# for i in range(len(test_ids)):
# idx = test_data.index[i]
# ids += [idx] * 5
# cts += labels.inverse_transform(np.argsort(model_probs[i])[::-1])[:5].tolist()
#
# predictions = pd.DataFrame(np.column_stack((ids, cts)), columns=['id', 'country'])
# Cleaning output and returning it
output = stacked_probs[['id', 'country']]
return output
示例6: main
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import fit [as 別名]
def main():
data_train = pd.read_csv(args.train_dataset)
X_train = data_train.drop(['Id', 'Class'], axis=1)
y_train = data_train.loc[:, 'Class']
data_test = pd.read_csv(args.test_dataset)
X_test = data_test.drop(['Id'], axis=1)
Id = data_test.loc[:, 'Id']
clf = XGBClassifier()
clf.set_params(**best_dicts)
clf.fit(X_train, y_train)
prediction = clf.predict_proba(X_test)
columns = ['Prediction'+str(i) for i in range(1, 10)]
prediction = pd.DataFrame(prediction, columns=columns)
results = pd.concat([Id, prediction], axis=1)
return (clf, results)
示例7: objective
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import fit [as 別名]
def objective(space):
clf = XGBClassifier(n_estimators=int(space['n_estimators']),
objective='binary:logistic',
seed=37,
learning_rate=space['learning_rate'],
max_depth=space['max_depth'],
min_child_weight=space['min_child_weight'],
colsample_bytree=space['colsample_bytree'],
subsample=space['subsample'])
clf.fit(xTrain, yTrain, eval_metric="logloss")
pred = clf.predict_proba(xValid)[:, 1]
loss = log_loss(yValid, pred)
return{'loss': loss, 'status': STATUS_OK}
示例8: myThreadFunc
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import fit [as 別名]
def myThreadFunc(ThreadID):
X_train = Xy[ThreadID][0]
X_test = Xy[ThreadID][1]
y_train = Xy[ThreadID][2]
y_test = Xy[ThreadID][3]
y_train2 = le.transform(y_train)
y_test2 = le.transform(y_test)
clf = XGBClassifier(max_depth=max_depth, learning_rate=learning_rate, n_estimators=5000, objective='multi:softprob', subsample=ss_cs, colsample_bytree=ss_cs, gamma=gamma, min_child_weight=min_child_weight, seed=0, silent=True, reg_lambda=reg_lambda, reg_alpha=reg_alpha)
clf.fit(X_train, y_train, eval_set=[(X_test, y_test2)], eval_metric=calculate_score_2, early_stopping_rounds=early_stopping_rounds, verbose=False)
y_predicted = clf.predict_proba(X_test, ntree_limit=clf.booster().best_ntree_limit)
score = calculate_score(y_predicted, y_test2)
print(score, clf.booster().best_ntree_limit)
train_and_test_scores[ThreadID] = score
示例9: apply_xgb_ens
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import fit [as 別名]
def apply_xgb_ens(y_valid, valid_folder='Valid', test_folder='Test'):
"""
Ensembler based on xgboost Gradient boosting.
"""
#Loading data
X, X_test, n_preds, n_class = get_X_X_Test(valid_folder, test_folder)
y = y_valid
#Defining classifier
xgb = XGBClassifier(max_depth=4, learning_rate=0.05, n_estimators=200,
objective='multi:softprob', gamma=0.,
max_delta_step=0., subsample=0.9, colsample_bytree=0.9,
seed=0)
xgb.fit(X, y)
y_pred = xgb.predict_proba(X_test)
return y_pred
示例10: perform_prediction
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import fit [as 別名]
def perform_prediction(training, labels, testing, xgb_votes, rf_votes):
""" Perform prediction using a combination of XGB and RandomForests. """
predictions = np.zeros((len(testing), len(set(labels))))
# Predictions using xgboost.
for i in range(xgb_votes):
print 'XGB vote %d' % i
xgb = XGBClassifier(
max_depth=DEPTH_XGB, learning_rate=LEARNING_XGB,
n_estimators=ESTIMATORS_XGB, objective='multi:softprob',
subsample=SUBSAMPLE_XGB, colsample_bytree=COLSAMPLE_XGB)
xgb.fit(training, labels)
predictions += xgb.predict_proba(testing)
# Predictions using RandomForestClassifier.
for i in range(rf_votes):
print 'RandomForest vote %d' % i
rand_forest = RandomForestClassifier(
n_estimators=ESTIMATORS_RF, criterion=CRITERION_RF, n_jobs=JOBS_RF,
max_depth=DEPTH_RF, min_samples_leaf=MIN_LEAF_RF, bootstrap=True)
rand_forest.fit(training, labels)
predictions += rand_forest.predict_proba(testing)
return predictions
示例11: xgboostinitial_predictor
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import fit [as 別名]
def xgboostinitial_predictor(train_path, test_path, eval_path):
# Loading the data
print 'Loading the data...'
train = pd.read_csv(train_path, index_col=0)
test = pd.read_csv(test_path, index_col=0)
eval_df = pd.read_csv(eval_path, index_col=0)
target = train['target'].copy()
train.drop('target', axis=1, inplace=True)
# Training model
print 'Model training begins...'
# xgtrain = xgb.DMatrix(train.values, target.values, missing=np.nan)
# xgboost_params = {'objective': 'binary:logistic', 'booster': 'gbtree', 'eval_metric': 'logloss', 'eta': 0.01,
# 'subsample': 0.5, 'colsample_bytree': 0.5, 'max_depth': 10, 'silent': 0}
#
# xgb_model = xgb.train(xgboost_params, xgtrain, learning_rates=0.3)
xgb_model = XGBClassifier(max_depth=6, learning_rate=0.3, n_estimators=25, objective='binary:logistic',
subsample=0.5, colsample_bytree=0.5, seed=0)
xgb_model.fit(train.as_matrix(), target.tolist())
# Running the model
print 'Making predictions....'
# xgtest = xgb.DMatrix(test.values)
# xgeval = xgb.DMatrix(eval_df)
test_preds = xgb_model.predict_proba(test.as_matrix())
eval_preds = xgb_model.predict_proba(eval_df.as_matrix())
print 'Cleaning predictions to match expected format....'
test_output = pd.DataFrame(test_preds, index=test.index)
print test_output.columns
test_output = test_output[1]
test_output.columns = ['PredictedProb']
eval_output = pd.DataFrame(eval_preds, index=eval_df.index)
eval_output = eval_output[1]
eval_output.columns = ['PredictedProb']
return test_output, eval_output
示例12: get_xgboost_classifier
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import fit [as 別名]
def get_xgboost_classifier(X_train, y_train, X_val, y_val,params=None, tag=""):
param_grid = {'max_depth':[3,5,7], 'min_child_weight': [1,3,5], 'n_estimators': [50]}
if params is None:
xgb = XGBClassifier(
learning_rate =0.2,
objective= 'binary:logistic',
seed=27)
t = start("training xgboost ")
cv = cross_validation.ShuffleSplit(X_train.shape[0], n_iter=10,test_size=0.2, random_state=123)
clf = grid_search.GridSearchCV(xgb, param_grid, cv=cv, n_jobs=1, scoring='roc_auc')
clf = clf.fit(X_train,y_train)
report(t, nitems=10*len(param_grid))
print("Best score:{} with scorer {}".format(clf.best_score_, clf.scorer_))
print "With parameters:"
best_parameters = clf.best_estimator_.get_params()
for param_name in sorted(param_grid.keys()):
print '\t%s: %r' % (param_name, best_parameters[param_name])
else:
clf = XGBClassifier(**params)
clf.fit(X_train, y_train, eval_set = [(X_train,y_train),(X_val,y_val)], eval_metric='auc', verbose=False)
if plot_cv_curves:
train = clf.evals_result()['validation_0']['auc']
val = clf.evals_result()['validation_1']['auc']
plot_cv_curve(train, val, tag)
if plot_feature_importance:
plot_feature_importance(clf, tag)
return clf
示例13: train_classifier
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import fit [as 別名]
def train_classifier(X, y, clf_name='xgb'):
if clf_name == 'xgb':
clf = XGBClassifier(
n_estimators=ESTIMATORS_XG,
objective=OBJECTIVE_XG,
max_depth=DEPTH_XG,
learning_rate=LEARNING_RATE_XG,
subsample=SUBSAMPLE_XG,
colsample_bytree=COLSAMPLE_BYTREE_XG,
seed=0,
)
else:
clf = RandomForestClassifier(
n_estimators=ESTIMATORS_RF,
criterion=CRITERION_RF,
n_jobs=JOBS_RF,
max_depth=DEPTH_RF,
min_samples_leaf=MIN_LEAF_RF,
min_samples_split=MIN_SPLIT_RF,
max_features=MAX_FEATURES_RF,
bootstrap=True,
)
clf.fit(X, y)
return clf
示例14: LabelEncoder
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import fit [as 別名]
#Splitting train and test
vals = df_all.values
X = vals[:piv_train]
le = LabelEncoder()
y = le.fit_transform(labels)
X_test = vals[piv_train:]
# In[ ]:
#Classifier
xgb = XGBClassifier(max_depth=6, learning_rate=0.3, n_estimators=25,
objective='multi:softprob', subsample=0.5, colsample_bytree=0.5, seed=0)
xgb.fit(X, y)
y_pred = xgb.predict_proba(X_test)
# In[ ]:
ids = [] #list of ids
cts = [] #list of countries
for i in range(len(id_test)):
idx = id_test[i]
ids += [idx] * 5
cts += le.inverse_transform(np.argsort(y_pred[i])[::-1])[:5].tolist()
#Generate submission
sub = pd.DataFrame(np.column_stack((ids, cts)), columns=['id', 'country'])
sub.to_csv('sub0.csv',index=False)
示例15: train_test_split
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import fit [as 別名]
plt.xlabel('Predicted label')
#define X y
X, y = data.loc[:,data.columns != 'state'].values, data.loc[:,data.columns == 'state'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
#ClusterCentroids
cc = ClusterCentroids(random_state=0)
os_X,os_y = cc.fit_sample(X_train,y_train)
#XGboost
clf_XG = XGBClassifier(learning_rate= 0.3, min_child_weight=1,
max_depth=6,gamma=0,subsample=1, max_delta_step=0, colsample_bytree=1,
reg_lambda=1, n_estimators=100, seed=1000, scale_pos_weight=1000)
clf_XG.fit(os_X, os_y,eval_set=[(os_X, os_y), (X_test, y_test)],eval_metric='auc',verbose=False)
evals_result = clf_XG.evals_result()
y_true, y_pred = y_test, clf_XG.predict(X_test)
#F1_score, precision, recall, specifity, G score
print "F1_score : %.4g" % metrics.f1_score(y_true, y_pred)
print "Recall : %.4g" % metrics.recall_score(y_true, y_pred)
recall = metrics.recall_score(y_true, y_pred)
print "Precision : %.4g" % metrics.precision_score(y_true, y_pred)
#Compute confusion matrix
cnf_matrix = confusion_matrix(y_test,y_pred)
np.set_printoptions(precision=2)
print "Specifity: " , float(cnf_matrix[0,0])/(cnf_matrix[0,0]+cnf_matrix[0,1])
specifity = float(cnf_matrix[0,0])/(cnf_matrix[0,0]+cnf_matrix[0,1])
print "G score: " , math.sqrt(recall/ specifity)