本文整理汇总了Python中xgboost.plot_importance函数的典型用法代码示例。如果您正苦于以下问题:Python plot_importance函数的具体用法?Python plot_importance怎么用?Python plot_importance使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了plot_importance函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train_helper
def train_helper(X_train, X_test, y_train, y_test, model_name):
xg_train = xgboost.DMatrix( X_train, label=y_train)
xg_test = xgboost.DMatrix(X_test, label=y_test)
le = load_label_encoder(model_name)
param = {}
# use softmax multi-class classification
param['objective'] = 'multi:softprob'
param['eta'] = 0.002
param['max_depth'] = 7
param['nthread'] = 7
param['num_class'] = len(le.classes_)
param['eval_metric'] = 'merror'
evals = [ (xg_train, 'train'), (xg_test, 'eval') ]
# Train xgboost
print "Training classifier..."
t1 = time.time()
bst = xgboost.train(param, xg_train, 500, evals, early_stopping_rounds=10)
xgboost.plot_importance(bst)
t2 = time.time()
print t2-t1
bst.save_model(classifier_filename(model_name))
return bst
示例2: test_importance_plot_lim
def test_importance_plot_lim(self):
np.random.seed(1)
dm = xgb.DMatrix(np.random.randn(100, 100), label=[0, 1] * 50)
bst = xgb.train({}, dm)
assert len(bst.get_fscore()) == 71
ax = xgb.plot_importance(bst)
assert ax.get_xlim() == (0., 11.)
assert ax.get_ylim() == (-1., 71.)
ax = xgb.plot_importance(bst, xlim=(0, 5), ylim=(10, 71))
assert ax.get_xlim() == (0., 5.)
assert ax.get_ylim() == (10., 71.)
示例3: run_xgb
def run_xgb(train, test, features, target, random_state=0):
eta = 0.02
max_depth = 5
subsample = 0.75
colsample_bytree = 0.7
start_time = time.time()
print('XGBoost params. ETA: {}, MAX_DEPTH: {}, SUBSAMPLE: {}, COLSAMPLE_BY_TREE: {}'.format(eta, max_depth, subsample, colsample_bytree))
params = {
"objective": "multi:softprob",
"num_class": 12,
"booster" : "gbtree",
"eval_metric": "mlogloss",
"eta": eta,
"max_depth": max_depth,
"subsample": subsample,
"colsample_bytree": colsample_bytree,
"silent": 1,
"seed": random_state,
}
num_boost_round = 500*2
early_stopping_rounds = 50
test_size = 0.3
X_train, X_valid = train_test_split(train, test_size=test_size, random_state=random_state)
print('Length train:', len(X_train.index))
print('Length valid:', len(X_valid.index))
y_train = X_train[target]
y_valid = X_valid[target]
dtrain = xgb.DMatrix(X_train[features], y_train)
dvalid = xgb.DMatrix(X_valid[features], y_valid)
watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
gbm = xgb.train(params, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=early_stopping_rounds, verbose_eval=True)
print "importance of feathure"
xgb.plot_importance(gbm)
show()
#time.sleep(60*5)
print("Validating...")
check = gbm.predict(xgb.DMatrix(X_valid[features]), ntree_limit=gbm.best_iteration)
score = log_loss(y_valid.tolist(), check)
print("Predict test set...")
test_prediction = gbm.predict(xgb.DMatrix(test[features]), ntree_limit=gbm.best_iteration)
print('Training time: {} minutes'.format(round((time.time() - start_time)/60, 2)))
return test_prediction.tolist(), score
示例4: run_train_validation
def run_train_validation(self):
x_train, y_train,x_validation,y_validation = self.get_train_validationset()
dtrain = xgb.DMatrix(x_train, label= y_train,feature_names=x_train.columns)
dvalidation = xgb.DMatrix(x_validation, label= y_validation,feature_names=x_validation.columns)
self.set_xgb_parameters()
evals=[(dtrain,'train'),(dvalidation,'eval')]
model = xgb.train(self.xgb_params, dtrain, evals=evals, **self.xgb_learning_params)
xgb.plot_importance(model)
plt.show()
print "features used:\n {}".format(self.get_used_features())
return
示例5: test_sklearn_plotting
def test_sklearn_plotting():
tm._skip_if_no_sklearn()
from sklearn.datasets import load_iris
iris = load_iris()
classifier = xgb.XGBClassifier()
classifier.fit(iris.data, iris.target)
import matplotlib
matplotlib.use('Agg')
from matplotlib.axes import Axes
from graphviz import Digraph
ax = xgb.plot_importance(classifier)
assert isinstance(ax, Axes)
assert ax.get_title() == 'Feature importance'
assert ax.get_xlabel() == 'F score'
assert ax.get_ylabel() == 'Features'
assert len(ax.patches) == 4
g = xgb.to_graphviz(classifier, num_trees=0)
assert isinstance(g, Digraph)
ax = xgb.plot_tree(classifier, num_trees=0)
assert isinstance(ax, Axes)
示例6: test_plotting
def test_plotting(self):
bst2 = xgb.Booster(model_file='xgb.model')
# plotting
import matplotlib
matplotlib.use('Agg')
from matplotlib.axes import Axes
from graphviz import Digraph
ax = xgb.plot_importance(bst2)
assert isinstance(ax, Axes)
assert ax.get_title() == 'Feature importance'
assert ax.get_xlabel() == 'F score'
assert ax.get_ylabel() == 'Features'
assert len(ax.patches) == 4
ax = xgb.plot_importance(bst2, color='r',
title='t', xlabel='x', ylabel='y')
assert isinstance(ax, Axes)
assert ax.get_title() == 't'
assert ax.get_xlabel() == 'x'
assert ax.get_ylabel() == 'y'
assert len(ax.patches) == 4
for p in ax.patches:
assert p.get_facecolor() == (1.0, 0, 0, 1.0) # red
ax = xgb.plot_importance(bst2, color=['r', 'r', 'b', 'b'],
title=None, xlabel=None, ylabel=None)
assert isinstance(ax, Axes)
assert ax.get_title() == ''
assert ax.get_xlabel() == ''
assert ax.get_ylabel() == ''
assert len(ax.patches) == 4
assert ax.patches[0].get_facecolor() == (1.0, 0, 0, 1.0) # red
assert ax.patches[1].get_facecolor() == (1.0, 0, 0, 1.0) # red
assert ax.patches[2].get_facecolor() == (0, 0, 1.0, 1.0) # blue
assert ax.patches[3].get_facecolor() == (0, 0, 1.0, 1.0) # blue
g = xgb.to_graphviz(bst2, num_trees=0)
assert isinstance(g, Digraph)
ax = xgb.plot_tree(bst2, num_trees=0)
assert isinstance(ax, Axes)
示例7: save_topn_features
def save_topn_features(self, fname="XGBRegressor_topn_features.txt", topn=-1):
ax = xgb.plot_importance(self.model)
yticklabels = ax.get_yticklabels()[::-1]
if topn == -1:
topn = len(yticklabels)
else:
topn = min(topn, len(yticklabels))
with open(fname, "w") as f:
for i in range(topn):
f.write("%s\n"%yticklabels[i].get_text())
示例8: plot_feat_importances
def plot_feat_importances():
gbm = xgboost.XGBClassifier(silent=False, seed=8).fit(X_train, y_train)
plot = xgboost.plot_importance(gbm)
ticks = plot.set_yticklabels(df_xgb.columns)
importances = rf.feature_importances_
std = np.std([tree.feature_importances_ for tree in rf.estimators_],
axis=0)
indices = np.argsort(importances)
plt.barh(range(len(indices)), importances[indices], yerr=std[indices], color='lightblue')
ticks = plt.yticks(range(len(indices)), df_xgb.columns)
示例9: plot_importance
def plot_importance(self, ax=None, height=0.2,
xlim=None, title='Feature importance',
xlabel='F score', ylabel='Features',
grid=True, **kwargs):
"""Plot importance based on fitted trees.
Parameters
----------
ax : matplotlib Axes, default None
Target axes instance. If None, new figure and axes will be created.
height : float, default 0.2
Bar height, passed to ax.barh()
xlim : tuple, default None
Tuple passed to axes.xlim()
title : str, default "Feature importance"
Axes title. To disable, pass None.
xlabel : str, default "F score"
X axis title label. To disable, pass None.
ylabel : str, default "Features"
Y axis title label. To disable, pass None.
kwargs :
Other keywords passed to ax.barh()
Returns
-------
ax : matplotlib Axes
"""
import xgboost as xgb
if not isinstance(self._df.estimator, xgb.XGBModel):
raise ValueError('estimator must be XGBRegressor or XGBClassifier')
return xgb.plot_importance(self._df.estimator.booster(),
ax=ax, height=height, xlim=xlim, title=title,
xlabel=xlabel, ylabel=ylabel, grid=True, **kwargs)
示例10: range
fscore_lo = np.percentile(fscore, 2.5, axis=0)
fscore_hi = np.percentile(fscore, 97.5, axis=0)
ind_sort = np.array(np.argsort(fscore_mean))
fscore_mean_sorted = fscore_mean[ind_sort]
# ci_sorted = fscore_ci[ind_sort]
fscore_lo_sorted = fscore_lo[ind_sort]
fscore_hi_sorted = fscore_hi[ind_sort]
feature_label_sorted = feature_label[ind_sort]
feature_label_short = []
for i in range(feature_label_sorted.size):
feature_label_short.append(dic[feature_label_sorted[i]])
get_ipython().magic(u'matplotlib inline')
plt.figure(figsize=(4,12))
axes = plt.gca()
# plt.barh(np.arange(val_sorted.size), val_sorted, xerr=ci_sorted, height=.7, color=(.4,.4,.8), align='center', ecolor=(0,0,0))
plt.barh(np.arange(fscore_mean_sorted.size), fscore_mean_sorted, xerr=np.array([fscore_mean_sorted-fscore_lo_sorted,fscore_hi_sorted-fscore_mean_sorted]), height=.7, color=(.4,.4,.8), align='center', ecolor=(0,0,0))
plt.yticks(np.arange(len(feature_label_short)), feature_label_short, fontsize=12, color=(0,0,0));
# axes.set_ylim([3.5, len(feature_label_short)-9.5])
# axes.set_xlim([0, 0.04])
plt.box(on=False)
plt.xlabel('Gini Importance',fontsize=14)
plt.grid()
# In[ ]:
np.percentile(fscore, 2.5, axis=0)
xgb.plot_importance()
示例11: print
del xgb_train, xgb_val
gc.collect()
cv_scores.append(roc_auc_score(y_val, bst.predict(xgb.DMatrix(X_val), ntree_limit=bst.best_ntree_limit)))
print(cv_scores)
print('predicting...')
if i == 0:
pred = bst.predict(xgb.DMatrix(np.array(test_x)),
ntree_limit=bst.best_ntree_limit)
else:
pred += bst.predict(xgb.DMatrix(np.array(test_x)),
ntree_limit=bst.best_ntree_limit)
del train_x, train_y
gc.collect()
print('mean_score:', np.mean(cv_scores))
pred /= folds
df_test['is_churn'] = pred.clip(0.0000001, 0.999999)
df_test = df_test[['msno', 'is_churn']]
# df_test.to_csv(out_path + 'stack_submissions{}.csv'.format(datetime.now().strftime("%Y%m%d-%H%M%S")), index=False)
df_test = []
plt.rcParams['figure.figsize'] = (7.0, 7.0)
xgb.plot_importance(booster=bst)
plt.show()
# plt.savefig('./feature_importance.png', dpi=100)
示例12: print
cv_xgb = xgb.cv(params = our_params, dtrain = xgdmat, num_boost_round = 3000, nfold = 5,
metrics = ['error'], # Make sure you enter metrics inside a list or you may encounter issues!
early_stopping_rounds = 100) # Look for early stopping that minimizes error
print('Tail:\n')
print(cv_xgb.tail(5))
our_params = {'eta': 0.1, 'seed':0, 'subsample': 0.8, 'colsample_bytree': 0.8,
'objective': 'binary:logistic', 'max_depth':3, 'min_child_weight':1}
print('Final Train: \n')
final_gb = xgb.train(our_params, xgdmat, num_boost_round = 432)
xgb.plot_importance(final_gb)
plt.show()
#Predicting:
testdmat = xgb.DMatrix(X_pred)
y_pred = final_gb.predict(testdmat)
y_pred[y_pred > 0.5] = 1
y_pred[y_pred <= 0.5] = 0
y_pred = y_pred.astype(np.int64)
#Submission
submission = pd.DataFrame({
"PassengerId": test_df["PassengerId"],
"Survived": y_pred
示例13: print
##xgboost.train(params, dtrain, num_boost_round=10, evals=(), obj=None,
## feval=None, maximize=False, early_stopping_rounds=None,
## evals_result=None, verbose_eval=True, learning_rates=None,
## xgb_model=None)
#
evallist = [(dtest,'eval'), (dtrain,'train')]
watchlist = [ (xg_train,'train'), (xg_test, 'test') ]
evals_result = {}
num_round = 10
bst = xgb.train(param,xg_train, num_round, evals_result=evals_result)
pred = bst.predict(xg_test)
print ('predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) ))
xgb.plot_importance(bst)
xgb.plot_tree(bst, num_trees=2)
#=============Logistic Regression==============================================================
#Define sigmoid function
def sigmoid(z):
return 1 / (1 + e**(-z))
#Calcualte the cost to be minimized -- using the sigmoid function
def cost(theta, X, y, l):
m = X.shape[0] #Number of rows in the data
z = X.dot(theta)
O = (-1 / m) * (log(sigmoid(z)).T.dot(y) + log(1-sigmoid(z)).T.dot((1-y)))
# print(m)
# print(theta)
示例14: format
#test = []
pred2 = model.predict(dtest)
df2 = pd.DataFrame()
df2["Orginal"] = testDelay
df2["Predicted"] = pred2
df2.to_csv('compareDelay.csv', index = False)
import matplotlib.pyplot as plt
plt.style.use("ggplot")
mapper = { 'f{0}' . format (I): v for I, v in enumerate (train.columns)}
mapped = {mapper [k]: v for k, v in model.get_fscore().items()}
import operator
mapped = sorted(mapped.items(), key=operator.itemgetter(1))
xgb.plot_importance(mapped)
plt.show()
df = pd.DataFrame(mapped, columns=['feature', 'fscore'])
df['fscore'] = df['fscore'] / df['fscore'].sum()
df.plot(kind='barh', x='feature', y='fscore', legend=False, figsize=(25, 15))
plt.title('XGBoost Feature Importance')
plt.xlabel('relative importance')
plt.gcf().savefig('feature_importance_xgb.png')
xx = np.linspace(-10,500)
yy = xx
h0 = plt.plot(xx, yy, 'k-', label="ideal Values")
plt.scatter(df2.Orginal, df2.Predicted, c = 'y')
plt.legend()
plt.show()
示例15: plot_importance
def plot_importance(self):
ax = xgb.plot_importance(self.model)
self.save_topn_features()
return ax