当前位置: 首页>>代码示例>>Python>>正文


Python xgboost.plot_importance函数代码示例

本文整理汇总了Python中xgboost.plot_importance函数的典型用法代码示例。如果您正苦于以下问题:Python plot_importance函数的具体用法?Python plot_importance怎么用?Python plot_importance使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了plot_importance函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train_helper

def train_helper(X_train, X_test, y_train, y_test, model_name):
    xg_train = xgboost.DMatrix( X_train, label=y_train)
    xg_test = xgboost.DMatrix(X_test, label=y_test)

    le = load_label_encoder(model_name)

    param = {}
    # use softmax multi-class classification
    param['objective'] = 'multi:softprob'
    param['eta'] = 0.002
    param['max_depth'] = 7
    param['nthread'] = 7
    param['num_class'] = len(le.classes_)
    param['eval_metric'] = 'merror'

    evals = [ (xg_train, 'train'), (xg_test, 'eval') ]

    # Train xgboost
    print "Training classifier..."
    t1 = time.time()
    bst = xgboost.train(param, xg_train, 500, evals, early_stopping_rounds=10)
    xgboost.plot_importance(bst)
    t2 = time.time()
    print t2-t1
    bst.save_model(classifier_filename(model_name))
    return bst
开发者ID:smurching,项目名称:pokemon_ai,代码行数:26,代码来源:model_xgb_tiered.py

示例2: test_importance_plot_lim

    def test_importance_plot_lim(self):
        np.random.seed(1)
        dm = xgb.DMatrix(np.random.randn(100, 100), label=[0, 1] * 50)
        bst = xgb.train({}, dm)
        assert len(bst.get_fscore()) == 71
        ax = xgb.plot_importance(bst)
        assert ax.get_xlim() == (0., 11.)
        assert ax.get_ylim() == (-1., 71.)

        ax = xgb.plot_importance(bst, xlim=(0, 5), ylim=(10, 71))
        assert ax.get_xlim() == (0., 5.)
        assert ax.get_ylim() == (10., 71.)
开发者ID:BayronP,项目名称:xgboost,代码行数:12,代码来源:test_plotting.py

示例3: run_xgb

def run_xgb(train, test, features, target, random_state=0):
    eta = 0.02
    max_depth = 5 
    subsample = 0.75
    colsample_bytree = 0.7
    start_time = time.time()

    print('XGBoost params. ETA: {}, MAX_DEPTH: {}, SUBSAMPLE: {}, COLSAMPLE_BY_TREE: {}'.format(eta, max_depth, subsample, colsample_bytree))
    params = {
        "objective": "multi:softprob",
        "num_class": 12,
        "booster" : "gbtree",
        "eval_metric": "mlogloss",
        "eta": eta,
        "max_depth": max_depth,
        "subsample": subsample,
        "colsample_bytree": colsample_bytree,
        "silent": 1,
        "seed": random_state,
    }
    num_boost_round = 500*2
    early_stopping_rounds = 50
    test_size = 0.3

    X_train, X_valid = train_test_split(train, test_size=test_size, random_state=random_state)
    print('Length train:', len(X_train.index))
    print('Length valid:', len(X_valid.index))
    y_train = X_train[target]
    y_valid = X_valid[target]
    dtrain = xgb.DMatrix(X_train[features], y_train)
    dvalid = xgb.DMatrix(X_valid[features], y_valid)

    watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
    gbm = xgb.train(params, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=early_stopping_rounds, verbose_eval=True)

    print "importance of feathure"
    xgb.plot_importance(gbm)
    show()


    #time.sleep(60*5)

    print("Validating...")
    check = gbm.predict(xgb.DMatrix(X_valid[features]), ntree_limit=gbm.best_iteration)
    score = log_loss(y_valid.tolist(), check)

    print("Predict test set...")
    test_prediction = gbm.predict(xgb.DMatrix(test[features]), ntree_limit=gbm.best_iteration)

    print('Training time: {} minutes'.format(round((time.time() - start_time)/60, 2)))
    return test_prediction.tolist(), score
开发者ID:worldwar2008,项目名称:kg,代码行数:51,代码来源:gq.py

示例4: run_train_validation

 def run_train_validation(self):
     x_train, y_train,x_validation,y_validation = self.get_train_validationset()
     dtrain = xgb.DMatrix(x_train, label= y_train,feature_names=x_train.columns)
     dvalidation = xgb.DMatrix(x_validation, label= y_validation,feature_names=x_validation.columns)
     self.set_xgb_parameters()
     
     evals=[(dtrain,'train'),(dvalidation,'eval')]
     model = xgb.train(self.xgb_params, dtrain, evals=evals, **self.xgb_learning_params)
     xgb.plot_importance(model)
     plt.show()
      
     print "features used:\n {}".format(self.get_used_features())
      
     return
开发者ID:LevinJ,项目名称:Supply-demand-forecasting,代码行数:14,代码来源:xgbbasemodel.py

示例5: test_sklearn_plotting

def test_sklearn_plotting():
    tm._skip_if_no_sklearn()
    from sklearn.datasets import load_iris

    iris = load_iris()

    classifier = xgb.XGBClassifier()
    classifier.fit(iris.data, iris.target)

    import matplotlib
    matplotlib.use('Agg')

    from matplotlib.axes import Axes
    from graphviz import Digraph

    ax = xgb.plot_importance(classifier)
    assert isinstance(ax, Axes)
    assert ax.get_title() == 'Feature importance'
    assert ax.get_xlabel() == 'F score'
    assert ax.get_ylabel() == 'Features'
    assert len(ax.patches) == 4

    g = xgb.to_graphviz(classifier, num_trees=0)
    assert isinstance(g, Digraph)

    ax = xgb.plot_tree(classifier, num_trees=0)
    assert isinstance(ax, Axes)
开发者ID:ChangXiaodong,项目名称:xgboost-withcomments,代码行数:27,代码来源:test_with_sklearn.py

示例6: test_plotting

    def test_plotting(self):
        bst2 = xgb.Booster(model_file='xgb.model')
        # plotting

        import matplotlib
        matplotlib.use('Agg')

        from matplotlib.axes import Axes
        from graphviz import Digraph

        ax = xgb.plot_importance(bst2)
        assert isinstance(ax, Axes)
        assert ax.get_title() == 'Feature importance'
        assert ax.get_xlabel() == 'F score'
        assert ax.get_ylabel() == 'Features'
        assert len(ax.patches) == 4

        ax = xgb.plot_importance(bst2, color='r',
                                 title='t', xlabel='x', ylabel='y')
        assert isinstance(ax, Axes)
        assert ax.get_title() == 't'
        assert ax.get_xlabel() == 'x'
        assert ax.get_ylabel() == 'y'
        assert len(ax.patches) == 4
        for p in ax.patches:
            assert p.get_facecolor() == (1.0, 0, 0, 1.0) # red


        ax = xgb.plot_importance(bst2, color=['r', 'r', 'b', 'b'],
                                 title=None, xlabel=None, ylabel=None)
        assert isinstance(ax, Axes)
        assert ax.get_title() == ''
        assert ax.get_xlabel() == ''
        assert ax.get_ylabel() == ''
        assert len(ax.patches) == 4
        assert ax.patches[0].get_facecolor() == (1.0, 0, 0, 1.0) # red
        assert ax.patches[1].get_facecolor() == (1.0, 0, 0, 1.0) # red
        assert ax.patches[2].get_facecolor() == (0, 0, 1.0, 1.0) # blue
        assert ax.patches[3].get_facecolor() == (0, 0, 1.0, 1.0) # blue

        g = xgb.to_graphviz(bst2, num_trees=0)
        assert isinstance(g, Digraph)
        ax = xgb.plot_tree(bst2, num_trees=0)
        assert isinstance(ax, Axes)
开发者ID:ndingwall,项目名称:xgboost,代码行数:44,代码来源:test_basic.py

示例7: save_topn_features

 def save_topn_features(self, fname="XGBRegressor_topn_features.txt", topn=-1):
     ax = xgb.plot_importance(self.model)
     yticklabels = ax.get_yticklabels()[::-1]
     if topn == -1:
         topn = len(yticklabels)
     else:
         topn = min(topn, len(yticklabels))
     with open(fname, "w") as f:
         for i in range(topn):
             f.write("%s\n"%yticklabels[i].get_text())
开发者ID:Anhmike,项目名称:Kaggle_HomeDepot,代码行数:10,代码来源:xgb_utils.py

示例8: plot_feat_importances

def plot_feat_importances():
    gbm = xgboost.XGBClassifier(silent=False, seed=8).fit(X_train, y_train)
    plot = xgboost.plot_importance(gbm)
    ticks = plot.set_yticklabels(df_xgb.columns)

    importances = rf.feature_importances_
    std = np.std([tree.feature_importances_ for tree in rf.estimators_],
                 axis=0)
    indices = np.argsort(importances)
    plt.barh(range(len(indices)), importances[indices], yerr=std[indices], color='lightblue')
    ticks = plt.yticks(range(len(indices)), df_xgb.columns)
开发者ID:Nathx,项目名称:ride_sharing_churn,代码行数:11,代码来源:churn.py

示例9: plot_importance

    def plot_importance(self, ax=None, height=0.2,
                        xlim=None, title='Feature importance',
                        xlabel='F score', ylabel='Features',
                        grid=True, **kwargs):

        """Plot importance based on fitted trees.

        Parameters
        ----------
        ax : matplotlib Axes, default None
            Target axes instance. If None, new figure and axes will be created.
        height : float, default 0.2
            Bar height, passed to ax.barh()
        xlim : tuple, default None
            Tuple passed to axes.xlim()
        title : str, default "Feature importance"
            Axes title. To disable, pass None.
        xlabel : str, default "F score"
            X axis title label. To disable, pass None.
        ylabel : str, default "Features"
            Y axis title label. To disable, pass None.
        kwargs :
            Other keywords passed to ax.barh()

        Returns
        -------
        ax : matplotlib Axes
        """

        import xgboost as xgb

        if not isinstance(self._df.estimator, xgb.XGBModel):
            raise ValueError('estimator must be XGBRegressor or XGBClassifier')
        return xgb.plot_importance(self._df.estimator.booster(),
                                   ax=ax, height=height, xlim=xlim, title=title,
                                   xlabel=xlabel, ylabel=ylabel, grid=True, **kwargs)
开发者ID:Sandy4321,项目名称:pandas-ml,代码行数:36,代码来源:base.py

示例10: range

fscore_lo = np.percentile(fscore, 2.5, axis=0)
fscore_hi = np.percentile(fscore, 97.5, axis=0)
ind_sort = np.array(np.argsort(fscore_mean))
fscore_mean_sorted = fscore_mean[ind_sort]
# ci_sorted = fscore_ci[ind_sort]
fscore_lo_sorted = fscore_lo[ind_sort]
fscore_hi_sorted = fscore_hi[ind_sort]
feature_label_sorted = feature_label[ind_sort]
feature_label_short = []
for i in range(feature_label_sorted.size):
    feature_label_short.append(dic[feature_label_sorted[i]])
    
get_ipython().magic(u'matplotlib inline')
plt.figure(figsize=(4,12))
axes = plt.gca()
# plt.barh(np.arange(val_sorted.size), val_sorted, xerr=ci_sorted, height=.7, color=(.4,.4,.8), align='center', ecolor=(0,0,0))
plt.barh(np.arange(fscore_mean_sorted.size), fscore_mean_sorted,          xerr=np.array([fscore_mean_sorted-fscore_lo_sorted,fscore_hi_sorted-fscore_mean_sorted]),          height=.7, color=(.4,.4,.8), align='center', ecolor=(0,0,0))
plt.yticks(np.arange(len(feature_label_short)), feature_label_short, fontsize=12, color=(0,0,0));
# axes.set_ylim([3.5, len(feature_label_short)-9.5])
# axes.set_xlim([0, 0.04])
plt.box(on=False)
plt.xlabel('Gini Importance',fontsize=14)
plt.grid()


# In[ ]:

np.percentile(fscore, 2.5, axis=0)
xgb.plot_importance()

开发者ID:sosata,项目名称:CS120DataAnalysis,代码行数:29,代码来源:show_importance.py

示例11: print

    del xgb_train, xgb_val
    gc.collect()

    cv_scores.append(roc_auc_score(y_val, bst.predict(xgb.DMatrix(X_val), ntree_limit=bst.best_ntree_limit)))
    print(cv_scores)

    print('predicting...')
    if i == 0:
        pred = bst.predict(xgb.DMatrix(np.array(test_x)),
                               ntree_limit=bst.best_ntree_limit)
    else:
        pred += bst.predict(xgb.DMatrix(np.array(test_x)),
                                ntree_limit=bst.best_ntree_limit)

del train_x, train_y
gc.collect()

print('mean_score:', np.mean(cv_scores))

pred /= folds
df_test['is_churn'] = pred.clip(0.0000001, 0.999999)
df_test = df_test[['msno', 'is_churn']]

# df_test.to_csv(out_path + 'stack_submissions{}.csv'.format(datetime.now().strftime("%Y%m%d-%H%M%S")), index=False)
df_test = []

plt.rcParams['figure.figsize'] = (7.0, 7.0)
xgb.plot_importance(booster=bst)
plt.show()
# plt.savefig('./feature_importance.png', dpi=100)
开发者ID:zgcgreat,项目名称:WSDM,代码行数:30,代码来源:stacking_fit.py

示例12: print

cv_xgb = xgb.cv(params = our_params, dtrain = xgdmat, num_boost_round = 3000, nfold = 5,
                metrics = ['error'], # Make sure you enter metrics inside a list or you may encounter issues!
                early_stopping_rounds = 100) # Look for early stopping that minimizes error

print('Tail:\n')
print(cv_xgb.tail(5))


our_params = {'eta': 0.1, 'seed':0, 'subsample': 0.8, 'colsample_bytree': 0.8,
             'objective': 'binary:logistic', 'max_depth':3, 'min_child_weight':1}

print('Final Train: \n')
final_gb = xgb.train(our_params, xgdmat, num_boost_round = 432)

xgb.plot_importance(final_gb)
plt.show()
#Predicting:

testdmat = xgb.DMatrix(X_pred)
y_pred = final_gb.predict(testdmat)

y_pred[y_pred > 0.5] = 1
y_pred[y_pred <= 0.5] = 0

y_pred = y_pred.astype(np.int64)
#Submission

submission = pd.DataFrame({
        "PassengerId": test_df["PassengerId"],
        "Survived": y_pred
开发者ID:grixxy,项目名称:ml_python,代码行数:30,代码来源:TitanicXGBoost.py

示例13: print

##xgboost.train(params, dtrain, num_boost_round=10, evals=(), obj=None, 
##              feval=None, maximize=False, early_stopping_rounds=None, 
##              evals_result=None, verbose_eval=True, learning_rates=None, 
##              xgb_model=None)
#
evallist  = [(dtest,'eval'), (dtrain,'train')]

watchlist = [ (xg_train,'train'), (xg_test, 'test') ]
evals_result = {}
num_round = 10
bst = xgb.train(param,xg_train, num_round, evals_result=evals_result)
pred = bst.predict(xg_test)

print ('predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) ))

xgb.plot_importance(bst)
xgb.plot_tree(bst, num_trees=2)

#=============Logistic Regression==============================================================

#Define sigmoid function
def sigmoid(z):
    return 1 / (1 + e**(-z))

#Calcualte the cost to be minimized -- using the sigmoid function
def cost(theta, X, y, l):
    m = X.shape[0] #Number of rows in the data
    z = X.dot(theta)
    O = (-1 / m) * (log(sigmoid(z)).T.dot(y)  +  log(1-sigmoid(z)).T.dot((1-y)))
#    print(m)
#    print(theta)
开发者ID:tijohnso,项目名称:Usyd_masters,代码行数:31,代码来源:mlass1_9.py

示例14: format

#test = []
pred2 = model.predict(dtest)

df2 = pd.DataFrame()
df2["Orginal"] = testDelay
df2["Predicted"] = pred2
df2.to_csv('compareDelay.csv', index = False)


import matplotlib.pyplot as plt
plt.style.use("ggplot")
mapper = { 'f{0}' . format (I): v for I, v in  enumerate (train.columns)}
mapped = {mapper [k]: v for k, v in model.get_fscore().items()}
import operator
mapped = sorted(mapped.items(), key=operator.itemgetter(1))
xgb.plot_importance(mapped)
plt.show()
df = pd.DataFrame(mapped, columns=['feature', 'fscore'])
df['fscore'] = df['fscore'] / df['fscore'].sum()
df.plot(kind='barh', x='feature', y='fscore', legend=False, figsize=(25, 15))
plt.title('XGBoost Feature Importance')
plt.xlabel('relative importance')
plt.gcf().savefig('feature_importance_xgb.png')


xx = np.linspace(-10,500)
yy = xx
h0 = plt.plot(xx, yy, 'k-', label="ideal Values")
plt.scatter(df2.Orginal, df2.Predicted, c = 'y')
plt.legend()
plt.show()
开发者ID:DEK11,项目名称:Predicting-EOB-delay,代码行数:31,代码来源:linear.py

示例15: plot_importance

 def plot_importance(self):
     ax = xgb.plot_importance(self.model)
     self.save_topn_features()
     return ax
开发者ID:Anhmike,项目名称:Kaggle_HomeDepot,代码行数:4,代码来源:xgb_utils.py


注:本文中的xgboost.plot_importance函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。