本文整理匯總了Python中xgboost.sklearn.XGBClassifier.booster方法的典型用法代碼示例。如果您正苦於以下問題:Python XGBClassifier.booster方法的具體用法?Python XGBClassifier.booster怎麽用?Python XGBClassifier.booster使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類xgboost.sklearn.XGBClassifier
的用法示例。
在下文中一共展示了XGBClassifier.booster方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: myThreadFunc
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import booster [as 別名]
def myThreadFunc(ThreadID):
X_train = Xy[ThreadID][0]
X_test = Xy[ThreadID][1]
y_train = Xy[ThreadID][2]
y_test = Xy[ThreadID][3]
y_train2 = le.transform(y_train)
y_test2 = le.transform(y_test)
clf = XGBClassifier(max_depth=max_depth, learning_rate=learning_rate, n_estimators=5000, objective='multi:softprob', subsample=ss_cs, colsample_bytree=ss_cs, gamma=gamma, min_child_weight=min_child_weight, seed=0, silent=True, reg_lambda=reg_lambda, reg_alpha=reg_alpha)
clf.fit(X_train, y_train, eval_set=[(X_test, y_test2)], eval_metric=calculate_score_2, early_stopping_rounds=early_stopping_rounds, verbose=False)
y_predicted = clf.predict_proba(X_test, ntree_limit=clf.booster().best_ntree_limit)
score = calculate_score(y_predicted, y_test2)
print(score, clf.booster().best_ntree_limit)
train_and_test_scores[ThreadID] = score
示例2: job_function
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import booster [as 別名]
def job_function(params):
learning_rate = params[0]
max_depth = params[1]
ss_cs = params[2]
gamma = params[3]
min_child_weight = params[4]
reg_lambda = params[5]
reg_alpha = params[6]
early_stopping_rounds = 25
if learning_rate >= 0.3:
early_stopping_rounds = 5
if learning_rate <= 0.03:
early_stopping_rounds = 50
scores = []
for i in range(iterations_per_job):
X_train = Xy[i][0]
X_test = Xy[i][1]
y_train = Xy[i][2]
y_test = Xy[i][3]
y_train2 = le.transform(y_train)
y_test2 = le.transform(y_test)
clf = XGBClassifier(max_depth=max_depth, learning_rate=learning_rate, n_estimators=5000, objective='multi:softprob', subsample=ss_cs, colsample_bytree=ss_cs, gamma=gamma, min_child_weight=min_child_weight, seed=0, silent=True, reg_lambda=reg_lambda, reg_alpha=reg_alpha)
clf.fit(X_train, y_train, eval_set=[(X_test, y_test2)], eval_metric=calculate_score_2, early_stopping_rounds=early_stopping_rounds, verbose=False)
y_predicted = clf.predict_proba(X_test, ntree_limit=clf.booster().best_ntree_limit)
score = calculate_score(y_predicted, y_test2)
scores.append(score)
avg_score = np.array(scores).mean()
print(avg_score, params)
return avg_score
示例3: extract_leaf_feature
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import booster [as 別名]
def extract_leaf_feature(features, targets, train_indexes, params):
model = XGBClassifier(**params)
model.fit(features[train_indexes], targets[train_indexes])
booster = model.booster()
dmatrix = xgb.DMatrix(features)
leaf = booster.predict(dmatrix, pred_leaf=True)
encoder = sklearn.preprocessing.OneHotEncoder()
leaf_feature = encoder.fit_transform(leaf)
return leaf_feature
示例4: do_cell
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import booster [as 別名]
def do_cell(task):
df_train, df_test, x_start, y_start = task[0], task[1], task[2], task[3]
#print('do_cell', df_train.shape, df_test.shape, x_start, y_start)
#train
n_places_th_local = n_places_th
n_places_local = n_places
if n_places != 0:
tmp = df_train.shape[0]
value_counts = df_train.place_id.value_counts()[0:n_places]
df_train = pd.merge(df_train, pd.DataFrame(value_counts), left_on='place_id', right_index=True)[df_train.columns]
n_places_th_local = value_counts.values[n_places - 1]
percentage = df_train.shape[0]/tmp
elif n_places_th != 0:
value_counts = df_train.place_id.value_counts()
n_places_local = value_counts[value_counts >= n_places_th_local].count()
mask = value_counts[df_train.place_id.values] >= n_places_th_local
percentage = mask.value_counts()[True]/df_train.shape[0]
df_train = df_train.loc[mask.values]
else:
n_places_th_local = 2
value_counts = df_train.place_id.value_counts()
n_places_local = value_counts[value_counts >= n_places_th_local].count()
mask = value_counts[df_train.place_id.values] >= n_places_th_local
percentage = mask.value_counts()[True]/df_train.shape[0]
while percentage > n_places_percentage:
n_places_th_local += 1
n_places_local = value_counts[value_counts >= n_places_th_local].count()
mask = value_counts[df_train.place_id.values] >= n_places_th_local
percentage = mask.value_counts()[True]/df_train.shape[0]
n_places_th_local -= 1
n_places_local = value_counts[value_counts >= n_places_th_local].count()
mask = value_counts[df_train.place_id.values] >= n_places_th_local
percentage = mask.value_counts()[True]/df_train.shape[0]
df_train = df_train.loc[mask.values]
#print(x_start, y_start, n_places_local, n_places_th_local, percentage)
#test
row_ids = df_test.index
if 'place_id' in df_test.columns:
df_test = df_test.drop(['place_id'], axis=1)
le = LabelEncoder()
y = le.fit_transform(df_train.place_id.values)
X = df_train.drop(['place_id'], axis=1).values
X_predict = df_test.values
score = 0
n_estimators = 0
if xgb == 1:
if xgb_calculate_n_estimators == True:
clf = XGBClassifier(max_depth=max_depth, learning_rate=learning_rate, n_estimators=5000, objective='multi:softprob', subsample=ss, colsample_bytree=cs, gamma=gamma, min_child_weight=min_child_weight, reg_lambda=reg_lambda, reg_alpha=reg_alpha)
if train_test == 1:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)], eval_metric=calculate_score, early_stopping_rounds=early_stopping_rounds, verbose=10 if one_cell == 1 else False)
score = round(1 - clf.booster().best_score, 6)
n_estimators = clf.booster().best_ntree_limit
else:
abc += 1
xgb_options = clf.get_xgb_params()
xgb_options['num_class'] = n_places + 1
train_dmatrix = DMatrix(X, label=y)
#some of the classes have less than n_folds, cannot use stratified KFold
#folds = StratifiedKFold(y, n_folds=n_folds, shuffle=True)
folds = KFold(len(y), n_folds=n_folds, shuffle=True)
cv_results = cv(xgb_options, train_dmatrix, clf.n_estimators, early_stopping_rounds=early_stopping_rounds, verbose_eval=10 if one_cell == 1 else False, show_stdv=False, folds=folds, feval=calculate_score)
n_estimators = cv_results.shape[0]
score = round(1 - cv_results.values[-1][0], 6)
std = round(cv_results.values[-1][1], 6)
else:
n_estimators = n_estimators_fixed
clf = XGBClassifier(max_depth=max_depth, learning_rate=learning_rate, n_estimators=n_estimators, objective='multi:softprob', subsample=ss, colsample_bytree=cs, gamma=gamma, min_child_weight=min_child_weight, reg_lambda=reg_lambda, reg_alpha=reg_alpha)
else:
clf = RandomForestClassifier(n_estimators = 300, n_jobs = -1)
if rf_calculate_score == True:
if train_test == 1:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
y_train2 = le.transform(y_train)
y_test2 = le.transform(y_test)
clf.fit(X_train, y_train2)
y_predict = clf.predict_proba(X_test)
scores_local = []
for i in range(X_test.shape[0]):
#.........這裏部分代碼省略.........
示例5: range
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import booster [as 別名]
for i in range(10):
folds = StratifiedKFold(y_train, n_folds=5, shuffle=True)
scores = []
iterations = []
for train_index, test_index in folds:
X_train2, X_test2 = X_train.loc[train_index], X_train.loc[test_index]
y_train2, y_test2 = y_train[train_index], y_train[test_index]
X_train2, X_test2 = feature_engineering_extra(X_train2, X_test2, y_train2)
X_train2 = csr_matrix(X_train2.values)
X_test2 = csr_matrix(X_test2.values)
clf.fit(X_train2, y_train2, eval_set=[(X_test2, y_test2)], eval_metric='mlogloss', early_stopping_rounds=early_stopping_rounds, verbose=False)
#print(round(clf.booster().best_score, 6), int(clf.booster().best_ntree_limit))
scores.append(round(clf.booster().best_score, 6))
iterations.append(int(clf.booster().best_ntree_limit))
scores = np.array(scores)
iterations = np.array(iterations)
score = scores.mean()
scores2.append(score)
print('score, std, iterations', score, scores.std(), iterations.mean())
scores = np.array(scores2)
scores = np.delete(scores, [scores.argmax(), scores.argmin()])
print('score, std', scores.mean(), scores.std())
if is_tt_rf == 1:
X_train, X_test = feature_engineering(df_train, df_test, y_train)
示例6: print
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import booster [as 別名]
reg_alpha=0.05,
reg_lambda=2,
subsample=1.0,
colsample_bytree=1.0,
max_delta_step=1,
scale_pos_weight=1,
objective='multi:softprob',
nthread=8,
seed=0 # ,
# silent = False
)
print('training...')
xgb_model.fit(training, label)
print('predicting...')
predicted = xgb_model.predict_proba(testing)
predicted = pandas.DataFrame(predicted)
predicted.columns = xgb_model.classes_
# Name index column.
predicted.index.name = 'Id'
# Write csv.
print('Saving prediction...')
predicted.to_csv('Prediction.csv')
# feature importance
feat_imp = pandas.Series(xgb_model.booster().get_fscore()).sort_values(ascending=False)
feat_imp.plot(kind='bar', title='Feature Importances')
matplotlib.pyplot.show()
plot_importance(xgb_model, title='Feature importance')
matplotlib.pyplot.show()
plot_tree(xgb_model, num_trees=0)
matplotlib.pyplot.show()
示例7: XGBClassifier
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import booster [as 別名]
X_val = Xfold1
y_val = fold1.loc[:, 'Category']
# Now comes the time-consuming step of training xgb.
# In[3]:
xgb = XGBClassifier(**HYPER_PARAMS)
xgb.fit(X_train, y_train, eval_set = [(X_val, y_val)], eval_metric = SCORING, verbose = 10)
# Now, we can gaze at the important features.
# In[4]:
gbdt = xgb.booster()
importance = gbdt.get_fscore()
importance = sorted(importance.items(), key = operator.itemgetter(1), reverse = True)
df=pd.DataFrame(importance, columns = ['feature', 'fscore'])
print(df)
# This provides us with a good idea as to which features are particularly relevant.
#
# - clearly, the timing in terms of minute, hour and year are critical
# - the collocated-crime feature scores surprisingly high
# - the spatial coordinates are useful
# - the total number of crimes in a steet is an important indicator, as well as some of the log-ratios
# - the month is not particularly essential, presumably as seasonal information can be recovered from the week