当前位置: 首页>>代码示例>>Python>>正文


Python xgboost.cv函数代码示例

本文整理汇总了Python中xgboost.cv函数的典型用法代码示例。如果您正苦于以下问题:Python cv函数的具体用法?Python cv怎么用?Python cv使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了cv函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train

    def train(self):
        print('#### preprocessing ####')
        self.df = self.preprocess(self.df)

        print('#### training ####')
        self.predictors = [x for x in self.df.columns if x not in [self.target_column, self.id_column]]
        xgb_param = self.clf.get_xgb_params()

        xgtrain  = xgb.DMatrix(self.df[self.predictors], label=self.df[self.target_column], missing=np.nan)
        try:
            cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=self.clf.get_params()['n_estimators'], nfold=5,
                metrics=[self.scoring], early_stopping_rounds=self.early_stopping_rounds, show_progress=self.verbose)
        except:
            try:
                cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=self.clf.get_params()['n_estimators'], nfold=5,
                    metrics=[self.scoring], early_stopping_rounds=self.early_stopping_rounds, verbose_eval=self.verbose)
            except:
                cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=self.clf.get_params()['n_estimators'], nfold=5,
                    metrics=[self.scoring], early_stopping_rounds=self.early_stopping_rounds)
        self.clf.set_params(n_estimators=cvresult.shape[0])
        self.clf.fit(self.df[self.predictors], self.df[self.target_column],eval_metric=self.scoring)

        #Predict training set:
        train_df_predictions = self.clf.predict(self.df[self.predictors])

        if self.target_type == 'binary':
            train_df_predprob = self.clf.predict_proba(self.df[self.predictors])[:,1]
            print("Accuracy : %.4g" % metrics.accuracy_score(self.df[self.target_column].values, train_df_predictions))
            print("AUC Score (Train): %f" % metrics.roc_auc_score(self.df[self.target_column], train_df_predprob))
        elif self.target_type == 'linear':
            print("Mean squared error: %f" % metrics.mean_squared_error(self.df[self.target_column].values, train_df_predictions))
            print("Root mean squared error: %f" % np.sqrt(metrics.mean_squared_error(self.df[self.target_column].values, train_df_predictions)))
开发者ID:softman123g,项目名称:xgbmagic,代码行数:32,代码来源:__init__.py

示例2: test_cv_explicit_fold_indices_labels

    def test_cv_explicit_fold_indices_labels(self):
        params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective':
                  'reg:linear'}
        N = 100
        F = 3
        dm = xgb.DMatrix(data=np.random.randn(N, F), label=np.arange(N))
        folds = [
            # Train        Test
            ([1, 3], [5, 8]),
            ([7, 9], [23, 43, 11]),
        ]

        # Use callback to log the test labels in each fold
        def cb(cbackenv):
            print([fold.dtest.get_label() for fold in cbackenv.cvfolds])

        # Run cross validation and capture standard out to test callback result
        with captured_output() as (out, err):
            xgb.cv(
                params, dm, num_boost_round=1, folds=folds, callbacks=[cb],
                as_pandas=False
            )
            output = out.getvalue().strip()
        solution = ('[array([5., 8.], dtype=float32), array([23., 43., 11.],' +
                    ' dtype=float32)]')
        assert output == solution
开发者ID:zhengruifeng,项目名称:xgboost,代码行数:26,代码来源:test_basic.py

示例3: test_sklearn_nfolds_cv

def test_sklearn_nfolds_cv():
    tm._skip_if_no_sklearn()
    from sklearn.datasets import load_digits
    from sklearn.model_selection import StratifiedKFold

    digits = load_digits(3)
    X = digits['data']
    y = digits['target']
    dm = xgb.DMatrix(X, label=y)

    params = {
        'max_depth': 2,
        'eta': 1,
        'silent': 1,
        'objective':
        'multi:softprob',
        'num_class': 3
    }

    seed = 2016
    nfolds = 5
    skf = StratifiedKFold(n_splits=nfolds, shuffle=True, random_state=seed)

    cv1 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, seed=seed)
    cv2 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, folds=skf, seed=seed)
    cv3 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, stratified=True, seed=seed)
    assert cv1.shape[0] == cv2.shape[0] and cv2.shape[0] == cv3.shape[0]
    assert cv2.iloc[-1, 0] == cv3.iloc[-1, 0]
开发者ID:ChangXiaodong,项目名称:xgboost-withcomments,代码行数:28,代码来源:test_with_sklearn.py

示例4: test_cv

    def test_cv(self):
        dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
        params = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }

        import pandas as pd
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10)
        assert isinstance(cv, pd.DataFrame)
        exp = pd.Index([u'test-error-mean', u'test-error-std',
                        u'train-error-mean', u'train-error-std'])
        assert cv.columns.equals(exp)

        # show progress log (result is the same as above)
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
                    show_progress=True)
        assert isinstance(cv, pd.DataFrame)
        exp = pd.Index([u'test-error-mean', u'test-error-std',
                        u'train-error-mean', u'train-error-std'])
        assert cv.columns.equals(exp)
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
                    show_progress=True, show_stdv=False)
        assert isinstance(cv, pd.DataFrame)
        exp = pd.Index([u'test-error-mean', u'test-error-std',
                        u'train-error-mean', u'train-error-std'])
        assert cv.columns.equals(exp)

        # return np.ndarray
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=False)
        assert isinstance(cv, np.ndarray)
        assert cv.shape == (10, 4)
开发者ID:ndingwall,项目名称:xgboost,代码行数:29,代码来源:test_basic.py

示例5: test_custom_objective

	def test_custom_objective(self):
		param = {'max_depth':2, 'eta':1, 'silent':1 }
		watchlist  = [(dtest,'eval'), (dtrain,'train')]
		num_round = 2
		def logregobj(preds, dtrain):
			labels = dtrain.get_label()
			preds = 1.0 / (1.0 + np.exp(-preds))
			grad = preds - labels
			hess = preds * (1.0-preds)
			return grad, hess
		def evalerror(preds, dtrain):
			labels = dtrain.get_label()
			return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
		
		# test custom_objective in training
		bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)
		assert isinstance(bst, xgb.core.Booster)
		preds = bst.predict(dtest)
		labels = dtest.get_label()
		err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
		assert err < 0.1

		# test custom_objective in cross-validation
		xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
	       obj = logregobj, feval=evalerror)
开发者ID:GongliDuan,项目名称:xgboost,代码行数:25,代码来源:test_models.py

示例6: test_sklearn_nfolds_cv

def test_sklearn_nfolds_cv():
    digits = load_digits(3)
    X = digits['data']
    y = digits['target']
    dm = xgb.DMatrix(X, label=y)
    
    params = {
        'max_depth': 2,
        'eta': 1,
        'silent': 1,
        'objective':
        'multi:softprob',
        'num_class': 3
    }

    seed = 2016
    nfolds = 5
    skf = StratifiedKFold(y, n_folds=nfolds, shuffle=True, random_state=seed)

    import pandas as pd
    cv1 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, seed=seed)
    cv2 = xgb.cv(params, dm, num_boost_round=10, folds=skf, seed=seed)
    cv3 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, stratified=True, seed=seed)
    assert cv1.shape[0] == cv2.shape[0] and cv2.shape[0] == cv3.shape[0]
    assert cv2.iloc[-1,0] == cv3.iloc[-1,0]
开发者ID:000Nelson000,项目名称:xgboost,代码行数:25,代码来源:test_with_sklearn.py

示例7: xgb_model

def xgb_model(all_file, num=200, debug=True):
    if debug:
        all_data = pd.read_csv(all_file,nrows=500, encoding='gb18030')
    else:
        all_data = pd.read_csv(all_file, encoding='gb18030')
    train_data = all_data[all_data['tag'] ==1]
    feature_data = train_data.drop(['Idx', 'ListingInfo', 'target','tag'],axis=1)
    feature_data.fillna(-1, inplace=True)
    labels = train_data['target']
    # feature_importance = pd.read_csv(features_importance_file)
    # feature_importance_columns = feature_importance['feature'].tolist()
    # feature_importance_columns = feature_importance_columns[:num]
    # final_train_data = feature_data[feature_importance_columns]
    final_train_data = feature_data
    print final_train_data.shape
    labels = train_data['target']
    dtrain = xgb.DMatrix(final_train_data, label=labels, missing=-1)
    # xgb_params = {'subsample':0.9, 'min_child_weight': 1, 'eval_metric': 'rmse', 'fit_const': 0.5,
    #               'nthread': 3, 'num_round': 700, 'gamma': 5, 'max_depth': 6, 'eta': 0.01,
    #               'colsample_bytree': 0.6, 'silent': 1, 'objective': 'binary:logistic'}
    # xgb_params = {'num_round': 2200, 'colsample_bytree': 0.4, 'silent': 1, 'eval_metric': 'auc', 'nthread': 3,
    #               'min_child_weight': 1, 'subsample': 0.66, 'eta': 0.006, 'fit_const': 0.6, 'objective': 'binary:logistic',
    #               'max_depth': 6, 'gamma': 0}
    xgb_params = {'num_round': 2400, 'colsample_bytree': 0.5, 'silent': 1, 'eval_metric': 'auc', 'nthread': 3,
                  'min_child_weight': 6, 'subsample': 0.8, 'eta': 0.016, 'fit_const': 0.4, 'objective': 'binary:logistic',
                  'max_depth': 10, 'gamma': 1}

    xgb.cv(xgb_params, dtrain, num_boost_round=2400, nfold=5, metrics={'auc'}, show_progress=True)
    print 'finished'
开发者ID:burness,项目名称:ppd_code,代码行数:29,代码来源:xgb_model.py

示例8: test_fpreproc

	def test_fpreproc(self):
		param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
		num_round = 2
		def fpreproc(dtrain, dtest, param):
			label = dtrain.get_label()
			ratio = float(np.sum(label == 0)) / np.sum(label==1)
			param['scale_pos_weight'] = ratio
			return (dtrain, dtest, param)
		xgb.cv(param, dtrain, num_round, nfold=5,
	       metrics={'auc'}, seed = 0, fpreproc = fpreproc)
开发者ID:GongliDuan,项目名称:xgboost,代码行数:10,代码来源:test_models.py

示例9: cross_validation

def cross_validation():
    for k in sorted(train_y.keys()):
        if k.startswith('TripType_'):
            dtrain = xgboost.DMatrix(train_X, label=train_y)
            params = {
                'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'
            }
            print xgboost.cv(params, dtrain, num_round=2, nfold=5,
                metrics={'error'}, seed=0)
            break
开发者ID:binhngoc17,项目名称:kaggle,代码行数:10,代码来源:train.py

示例10: cross_validation

def cross_validation():

    dtrain = xgb.DMatrix('dataset_dmatrix/offline_0516_sim.train.buffer')

    param = {'max_depth':5, 'eta':0.08, 'silent':1, 'objective':'binary:logistic'}
    param['nthread'] = 8
    param['subsample'] = 0.5
    num_round = 1500

    print ('running cross validation')
    xgb.cv(param, dtrain, num_round, nfold=3,
		   show_progress=True,feval=evalerror ,seed = 0,show_stdv=False,maximize=True)
开发者ID:SuixueWang,项目名称:Koubei-Recommendation,代码行数:12,代码来源:Model_cross_validation.py

示例11: cross_validate

def cross_validate(args):
    """
    Usage: cv iq_training_data_svm.txt dummy --num_round=1000
    https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-cv.py
    https://github.com/dmlc/xgboost/blob/master/demo/guide-python/cross_validation.py
    :param args:
    :return:
    """

    data = xgb.DMatrix(args.input)
    param = vars(args)
    xgb.cv(param, data, args.num_round, nfold=int(args.nfold),
           metrics={'mlogloss', 'merror'}, seed=0)
开发者ID:thepythia,项目名称:pythia,代码行数:13,代码来源:gbdt_classifier.py

示例12: xgbCV

def xgbCV(dmatrix,  nfolds, eta_list, gamma_list, num_rounds = 500):
	
	params = {'eta':'', 'gamma':'', 'objective':'binary:logistic', 'verbose':3,
				'max_depth':20, 'subsample':.75, 'colsample_bytree':.75}
	
	vals = {'eta':[], 'gamma':[], 'num_iter':[], 'mean_cv_error':[], 'std_cv_error':[]}
	
	
	for e in eta_list:
		for g in gamma_list:
			params['eta'] = e
			params['gamma'] = g
			
			vals['eta'].append(e)
			vals['gamma'].append(g)
			
			print('Training the booster with a learning rate of', e, "and gamma of ", g)
			bst = xgb.cv(params, dmatrix, num_rounds, nfolds, early_stopping_rounds = 2)
			print('Stopped after', len(bst.index), "rounds.")
			
			best_iter = bst.nsmallest(1, 'test-error-mean')
			vals['num_iter'].append(best_iter.index[0])
			vals['mean_cv_error'].append(best_iter['test-error-mean'])
			vals['std_cv_error'].append(best_iter['test-error-std'])
			
	cv_df = pd.DataFrame.from_dict(vals)
	
	return(cv_df)
开发者ID:whereofonecannotspeak,项目名称:twitter_sentiment_analysis,代码行数:28,代码来源:boostCV.py

示例13: modelfit

def modelfit(alg, dtrain, predictors, target, useTrainCV=True, cv_folds=5, early_stopping_rounds=50):
    
    if useTrainCV:
        xgb_param = alg.get_xgb_params()
        xgtrain = xgb.DMatrix(dtrain[predictors].values, label=dtrain[target].values)
        cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
            metrics='auc', early_stopping_rounds=early_stopping_rounds)
        alg.set_params(n_estimators=cvresult.shape[0])
    
    #Fit the algorithm on the data
    alg.fit(dtrain[predictors], dtrain[target],eval_metric='auc')
        
    #Predict training set:
    dtrain_predictions = alg.predict(dtrain[predictors])
    dtrain_predprob = alg.predict_proba(dtrain[predictors])[:,1]
        
    #Print model report:
    print ("\nModel Report")
    print ("Accuracy : %.4g" % metrics.accuracy_score(dtrain[target].values, dtrain_predictions))
    print ("AUC Score (Train): %f" % metrics.roc_auc_score(dtrain[target], dtrain_predprob))
                    
    feat_imp = pd.Series(alg.booster().get_fscore()).sort_values(ascending=False)
    feat_imp.plot(kind='bar', title='Feature Importances')
    plt.ylabel('Feature Importance Score')
    plt.show()
开发者ID:Paliking,项目名称:ML_examples,代码行数:25,代码来源:LoanPrediction2_XGB.py

示例14: model_2

def model_2(train, labels, test):

    dtrain = xgb.DMatrix(train, label=labels)
    dtest = xgb.DMatrix(test)

    xgb_params = {}
    xgb_params["objective"] = "reg:linear"
    xgb_params["eta"] = 0.1
    xgb_params["subsample"] = 0.7
    xgb_params["silent"] = 1
    xgb_params["max_depth"] = 6
    xgb_params['eval_metric'] = 'rmse'
    xgb_params['min_child_weight'] = 5
    xgb_params['seed'] = 22424

    res = xgb.cv(xgb_params, dtrain, num_boost_round=500, nfold=5, seed=2017, stratified=False,
                 early_stopping_rounds=25, verbose_eval=10, show_stdv=True)

    best_nrounds = res.shape[0] - 1
    cv_mean = res.iloc[-1, 0]
    cv_std = res.iloc[-1, 1]

    print('')
    print('Ensemble-CV: {0}+{1}'.format(cv_mean, cv_std))
    bst = xgb.train(xgb_params, dtrain, best_nrounds)

    preds = np.exp(bst.predict(dtest))
    return preds
开发者ID:movb,项目名称:kaggle,代码行数:28,代码来源:script.py

示例15: do_compute

def do_compute(x):  
    row = grid.iloc[x,:]   
    eta = row['eta']
    min_child_weight = row['min_child_weight']
    colsample_bytree = row['colsample_bytree']
    max_depth = row['max_depth']
    subsample = row['subsample']
    _lambda = row['lambda']
    nround = row['nround']
    ####
    xgb_pars = {'min_child_weight': min_child_weight,
                'eta': eta,
                'colsample_bytree': colsample_bytree,
                'max_depth': int(max_depth),
                'subsample': subsample,
                'lambda': _lambda,
                'nthread': -1,
                'booster' : 'gbtree',
                'silent': 1,
                'eval_metric': 'rmse',
                'objective': 'reg:linear'}
    #print(xgb_pars)
    model = xgb.cv(xgb_pars, dtrain, 100000,nfold = 4, early_stopping_rounds=50,maximize=False, verbose_eval=10)
    nround = model.shape[0]
    rmse_cv_mean = model['test-rmse-mean'][model.shape[0]-1]
    rmse_cv_std = model['test-rmse-std'][model.shape[0]-1]
    # calculate the square of the value of x
    grid.loc[x,'rmse_cv_mean'] = rmse_cv_mean
    grid.loc[x,'rmse_cv_std'] = rmse_cv_std
    grid.loc[x,'nround'] = nround
    grid.to_csv('base_grid_xgb_40perc__'+str(os.getpid())+'.csv',index=False)
    return rmse_cv_mean
开发者ID:gtesei,项目名称:fast-furious,代码行数:32,代码来源:base_xgb_tune_mthread.py


注:本文中的xgboost.cv函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。