当前位置: 首页>>代码示例>>Python>>正文


Python xgboost.train函数代码示例

本文整理汇总了Python中xgboost.train函数的典型用法代码示例。如果您正苦于以下问题:Python train函数的具体用法?Python train怎么用?Python train使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了train函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: xgbTuning

	def xgbTuning(self, pX, change = 3):
		w = self.getWeight(self.y)
		dm = xgb.DMatrix(pX, self.y, weight=w)
		best_auc = 0
		n = pX.shape[0]
		best_params = None
		for i in range(change):
			randp = np.random.random_sample(3)
			param = {
				'bst:eta': randp[0],
				'max_depth': int(3+6*randp[1]) , 
				'nthread':4, 
				'silent':1,
				'alpha':randp[2],
				'eval_metric':'auc',
				'objective': 'binary:logistic' 
			}
			m = xgb.cv(param, dm, metrics='auc', nfold=3, num_boost_round = 50,early_stopping_rounds=5)
			auc = m['test-auc-mean'].max()
			if auc > best_auc :
				print 'xgb:' + str(auc)
				best_auc = auc
				best_params = param
		Xtrain, Xtest, ytrain, ytest = train_test_split(pX, self.y, test_size=.33)
		trainw = self.getWeight(ytrain)
		testw = self.getWeight(ytest)
		dtrain = xgb.DMatrix(Xtrain, label = ytrain, feature_names=Xtrain.columns, weight = trainw)
		dtest = xgb.DMatrix(Xtest, label = ytest, feature_names=Xtest.columns, weight = testw)
		evallist = [(dtrain, 'train'), (dtest, 'eval')]
		booster = xgb.train(best_params, dtrain, evals=evallist, num_boost_round=100,early_stopping_rounds=10)
		rounds = booster.attr("best_iteration")
		best_auc = booster.attr("best_score")
		return float(best_auc), xgb.train(best_params, dtrain, num_boost_round=int(rounds))
开发者ID:Gnostikoi,项目名称:orange,代码行数:33,代码来源:model_generator.py

示例2: evalModelOHE

def evalModelOHE(train_data, eval_data, train_labels, eval_labels):
    params = {}
#    params["objective"] = "reg:linear"
#    params["eta"] = 0.05
#    params["min_child_weight"] = 8
#    params["subsample"] = 0.7
#    params["colsample_bytree"] = 0.7
#    params["scale_pos_weight"] = 1.0
#    params["silent"] = 1
#    params["max_depth"] = 8
#    params["max_delta_step"]=2
    params["objective"] = "reg:linear"
    params["eta"] = 0.013
    params["min_child_weight"] = 6
    params["subsample"] = 0.51
    params["colsample_bytree"] = 0.6
    params["scale_pos_weight"] = 1.0
    params["silent"] = 1
    params["max_depth"] = 10
    params["max_delta_step"]=1
    plst = list(params.items())
    
    xgtrain = xgb.DMatrix(train_data,label=train_labels)
    xgeval = xgb.DMatrix(eval_data,label=eval_labels)
    evallist  = [(xgeval,'eval'), (xgtrain,'train')]
    xgb.train(plst, xgtrain, num_boost_round=5000, evals=evallist,feval=evalerror)
开发者ID:shashankchaudhry,项目名称:caterpillar-tube-pricing,代码行数:26,代码来源:new_model_08-31_n_fold_CV_framework_OHE_mean.py

示例3: test_fast_histmaker

    def test_fast_histmaker(self):
        variable_param = {'tree_method': ['hist'],
                          'max_depth': [2, 8],
                          'max_bin': [2, 256],
                          'grow_policy': ['depthwise', 'lossguide'],
                          'max_leaves': [64, 0],
                          'verbosity': [0]}
        for param in parameter_combinations(variable_param):
            result = run_suite(param)
            assert_results_non_increasing(result, 1e-2)

        # hist must be same as exact on all-categorial data
        dpath = 'demo/data/'
        ag_dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
        ag_dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
        ag_param = {'max_depth': 2,
                    'tree_method': 'hist',
                    'eta': 1,
                    'verbosity': 0,
                    'objective': 'binary:logistic',
                    'eval_metric': 'auc'}
        hist_res = {}
        exact_res = {}

        xgb.train(ag_param, ag_dtrain, 10,
                  [(ag_dtrain, 'train'), (ag_dtest, 'test')],
                  evals_result=hist_res)
        ag_param["tree_method"] = "exact"
        xgb.train(ag_param, ag_dtrain, 10,
                  [(ag_dtrain, 'train'), (ag_dtest, 'test')],
                  evals_result=exact_res)
        assert hist_res['train']['auc'] == exact_res['train']['auc']
        assert hist_res['test']['auc'] == exact_res['test']['auc']
开发者ID:dmlc,项目名称:xgboost,代码行数:33,代码来源:test_updaters.py

示例4: train_predict

	def train_predict(self,train_x,train_y,test_x):
		xgmat_train = xgb.DMatrix(train_x, label=train_y, missing=-9999)
		test_size = test_x.shape[0]
		params = {
			'booster':'gbtree',
			'objective':'binary:logistic',
			'silent':self.silent,
			'eta':self.eta,
			'gamma':self.gamma,
			'max_depth':self.max_depth,
			'min_chile_weitght':self.min_chile_weight,
			'subsample':self.subsample,
			'lambda':self.lambda_,
			'scale_pos_weight':self.scale_pos_weight,
			"colsample_bytree": self.colsample_bytree,
			'eval_metirc':'auc',
			'seed':2014,
			'nthread':self.threads
		}

		watchlist = [ (xgmat_train,'train') ]
		num_round = self.num_boost_round

		bst = xgb.train( params, xgmat_train, num_round, watchlist )
		xgmat_test = xgb.DMatrix(test_x,missing=-9999)

		if self.exist_prediction:
			tmp_train = bst.predict(xgmat_train, output_margin=True)
			tmp_test = bst.predict(xgmat_test, output_margin=True)
			xgmat_train.set_base_margin(tmp_train)
			xgmat_test.set_base_margin(tmp_test)
			bst = xgb.train(params, xgmat_train, self.exist_num_boost_round, watchlist )

		ypred = bst.predict(xgmat_test)
		return ypred
开发者ID:Sandy4321,项目名称:Xgboost_Datacastle_MoralQualityPrediction,代码行数:35,代码来源:xgb_class.py

示例5: hyperopt_obj

 def hyperopt_obj(self,param,train_X,train_y):
     # 5-fold crossvalidation error
     #ret = xgb.cv(param,dtrain,num_boost_round=param['num_round'])
     kf = KFold(n_splits = 3)
     errors = []
     r2 = []
     int_params = ['max_depth','num_round']
     for item in int_params:
         param[item] = int(param[item])
     for train_ind,test_ind in kf.split(train_X):
         train_valid_x,train_valid_y = train_X[train_ind],train_y[train_ind]
         test_valid_x,test_valid_y = train_X[test_ind],train_y[test_ind]
         dtrain = xgb.DMatrix(train_valid_x,label = train_valid_y)
         dtest = xgb.DMatrix(test_valid_x)
         pred_model = xgb.train(param,dtrain,num_boost_round=int(param['num_round']))
         pred_test = pred_model.predict(dtest)
         errors.append(mean_squared_error(test_valid_y,pred_test))
         r2.append(r2_score(test_valid_y,pred_test))
     all_dtrain = xgb.DMatrix(train_X,label = train_y)
     print('training score:')
     pred_model = xgb.train(param,all_dtrain,num_boost_round= int(param['num_round']))
     all_dtest = xgb.DMatrix(train_X)
     pred_train = pred_model.predict(all_dtest)
     print(str(r2_score(train_y,pred_train)))
     print(np.mean(r2))
     print('\n')
     return {'loss':np.mean(errors),'status': STATUS_OK}
开发者ID:Matafight,项目名称:Kaggle,代码行数:27,代码来源:stacking.py

示例6: test_predict

    def test_predict(self):
        iterations = 10
        np.random.seed(1)
        test_num_rows = [10, 1000, 5000]
        test_num_cols = [10, 50, 500]
        for num_rows in test_num_rows:
            for num_cols in test_num_cols:
                dtrain = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
                dval = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
                dtest = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
                watchlist = [(dtrain, 'train'), (dval, 'validation')]
                res = {}
                param = {
                    "objective": "binary:logistic",
                    "predictor": "gpu_predictor",
                    'eval_metric': 'auc',
                }
                bst = xgb.train(param, dtrain, iterations, evals=watchlist, evals_result=res)
                assert self.non_decreasing(res["train"]["auc"])
                gpu_pred_train = bst.predict(dtrain, output_margin=True)
                gpu_pred_test = bst.predict(dtest, output_margin=True)
                gpu_pred_val = bst.predict(dval, output_margin=True)

                param["predictor"] = "cpu_predictor"
                bst_cpu = xgb.train(param, dtrain, iterations, evals=watchlist)
                cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True)
                cpu_pred_test = bst_cpu.predict(dtest, output_margin=True)
                cpu_pred_val = bst_cpu.predict(dval, output_margin=True)
                np.testing.assert_allclose(cpu_pred_train, gpu_pred_train, rtol=1e-5)
                np.testing.assert_allclose(cpu_pred_val, gpu_pred_val, rtol=1e-5)
                np.testing.assert_allclose(cpu_pred_test, gpu_pred_test, rtol=1e-5)
开发者ID:Ihaveadreammoonlighter,项目名称:xgboost,代码行数:31,代码来源:test_gpu_prediction.py

示例7: XgbTrain

        def XgbTrain(self, submitfile):
              offset = 5000
              X_train, y_train = self.dataMat, self.labelMat
              X_test = self.testData
              xgtest = xgb.DMatrix(X_test)
              
              xgtrain_train = xgb.DMatrix(X_train[offset:,:], label=y_train[offset:])
              xgtrain_val = xgb.DMatrix(X_train[:offset,:], label=y_train[:offset])
              
                      
              watchlist = [(xgtrain_train, 'train'),(xgtrain_val, 'val')]
              model = xgb.train(self.params_best, xgtrain_train, self.num_rounds_best, watchlist,early_stopping_rounds=self.early_stopping_rounds_best)
              preds1 = model.predict(xgtest)
                      
              X_train = X_train[::-1,:]
              y_train = y_train[::-1]

              xgtrain_train = xgb.DMatrix(X_train[offset:,:], label=y_train[offset:])
              xgtrain_val = xgb.DMatrix(X_train[:offset,:], label=y_train[:offset])

              watchlist = [(xgtrain_train, 'train'),(xgtrain_val, 'val')]
              model = xgb.train(self.params_best, xgtrain_train, self.num_rounds_best, watchlist, early_stopping_rounds=self.early_stopping_rounds_best)
              preds2 = model.predict(xgtest)
                      
              preds = preds1 + preds2
              #preds = pd.DataFrame({"Id": self.testid, "Hazard": preds})
              if submitfile!='':
                writer=csv.writer(open(submitfile,'wb'))
                writer.writerow(['ID','Hazard'])
                for i in range(len(preds)):
                    line = [self.testid[i], preds[i]]
                    writer.writerow(line)
开发者ID:kevinmtian,项目名称:Kaggle,代码行数:32,代码来源:KgModelXgb.py

示例8: run_benchmark

def run_benchmark(args, gpu_algorithm, cpu_algorithm):
    print("Generating dataset: {} rows * {} columns".format(args.rows,args.columns))
    tmp = time.time()
    X, y = make_classification(args.rows, n_features=args.columns, random_state=7)
    print ("Generate Time: %s seconds" % (str(time.time() - tmp)))
    tmp = time.time()
    print ("DMatrix Start")
    # omp way
    dtrain = xgb.DMatrix(X, y, nthread=-1)
    # non-omp way
    #dtrain = xgb.DMatrix(X, y)
    print ("DMatrix Time: %s seconds" % (str(time.time() - tmp)))

    param = {'objective': 'binary:logistic',
             'max_depth': 6,
             'silent': 0,
             'n_gpus': 1,
             'gpu_id': 0,
             'eval_metric': 'auc'}

    param['tree_method'] = gpu_algorithm
    print("Training with '%s'" % param['tree_method'])
    tmp = time.time()
    xgb.train(param, dtrain, args.iterations)
    print ("Train Time: %s seconds" % (str(time.time() - tmp)))

    param['silent'] = 1
    param['tree_method'] = cpu_algorithm
    print("Training with '%s'" % param['tree_method'])
    tmp = time.time()
    xgb.train(param, dtrain, args.iterations)
    print ("Time: %s seconds" % (str(time.time() - tmp)))
开发者ID:ChangXiaodong,项目名称:xgboost-withcomments,代码行数:32,代码来源:benchmark.py

示例9: test_multi_predict

    def test_multi_predict(self):
        from sklearn.datasets import make_regression
        from sklearn.model_selection import train_test_split

        n = 1000
        X, y = make_regression(n, random_state=rng)
        X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                            random_state=123)
        dtrain = xgb.DMatrix(X_train, label=y_train)
        dtest = xgb.DMatrix(X_test)

        params = {}
        params["tree_method"] = "gpu_hist"

        params['predictor'] = "gpu_predictor"
        bst_gpu_predict = xgb.train(params, dtrain)

        params['predictor'] = "cpu_predictor"
        bst_cpu_predict = xgb.train(params, dtrain)

        predict0 = bst_gpu_predict.predict(dtest)
        predict1 = bst_gpu_predict.predict(dtest)
        cpu_predict = bst_cpu_predict.predict(dtest)

        assert np.allclose(predict0, predict1)
        assert np.allclose(predict0, cpu_predict)
开发者ID:rfru,项目名称:xgboost,代码行数:26,代码来源:test_gpu_prediction.py

示例10: run

def run(train_matrix,test_matrix):
    params = {'booster': 'gbtree',
              #'objective': 'multi:softmax',
              'objective': 'multi:softprob',
              'eval_metric': 'mlogloss',
              'gamma': 1,
              'min_child_weight': 1.5,
              'max_depth': 5,
              'lambda': 10,
              'subsample': 0.7,
              'colsample_bytree': 0.7,
              'colsample_bylevel': 0.7,
              'eta': 0.03,
              'tree_method': 'exact',
              'seed': 2017,
              'nthread': 12,
              "num_class":3
              }
    num_round = 10000
    early_stopping_rounds = 50
    watchlist = [(train_matrix, 'train'),
                 (test_matrix, 'eval')
                 ]
    if test_matrix:
        model = xgb.train(params, train_matrix, num_boost_round=num_round, evals=watchlist,
                      early_stopping_rounds=early_stopping_rounds
                      )
        pred_test_y = model.predict(test_matrix,ntree_limit=model.best_iteration)
        return pred_test_y, model
    else:
        model = xgb.train(params, train_matrix, num_boost_round=num_round
                      )
        return model
开发者ID:bifeng,项目名称:Rental-Listing-Inquiries,代码行数:33,代码来源:xgb.py

示例11: train

    def train(self, X, Y, getApproxError=False):
        dtrain = xgb.DMatrix(X, label=Y)
        self.bst = xgb.train(self.param, dtrain, self.nRounds)

        if getApproxError:

            e = 0.0
            c = 0.0

            kf = KFold(Y.shape[0], n_folds=4)
            for train_index, test_index in kf:

                XTrain = X[train_index, :]
                XTest  = X[test_index, :]

                YTrain = Y[train_index]
                YTest  = Y[test_index]

                dtrain2 = xgb.DMatrix(XTrain, label=YTrain)
                bst = xgb.train(self.param, dtrain2, self.nRounds)
              

                dtest = xgb.DMatrix(XTest)
                probs = bst.predict(dtest)
                ypred =numpy.argmax(probs, axis=1)

                

                error = float(numpy.sum(ypred != YTest))
                e += error
                c += float(len(YTest))

            e/=c

            return e
开发者ID:DerThorsten,项目名称:nifty,代码行数:35,代码来源:tools.py

示例12: xgboost_model

def xgboost_model(train, test, num_round, params):
    """
    Takes in: training set, test set, number of estimators, params is a list

    Returns: predictions in correct format
    """
    X = train.as_matrix(train.columns[:-1]).astype(float)
    y = train.as_matrix(["cost"])[:, 0].astype(float)
    ylog1p = np.log1p(y)
    X_test = test.as_matrix(test.columns[:-1]).astype(float)

    xgb_train = xgb.DMatrix(X, label=ylog1p)
    xgb_test = xgb.DMatrix(X_test)

    # Round 1
    bst1 = xgb.train(params, xgb_train, num_round)
    y_pred1 = bst1.predict(xgb_test)

    # Round 2
    # num_round2 = 2000
    # bst2 = xgb.train(params, xgb_train, 2000)
    # y_pred2 = bst2.predict(xgb_test)

    # Power Train
    ypower3 = np.power(y, 1 / 47.0)
    xgb_train3 = xgb.DMatrix(X, label=ypower3)
    xst3 = xgb.train(params, xgb_train3, num_round)
    y_predp3 = xst3.predict(xgb_test)

    p = 0.5
    y_pred = p * np.expm1(y_pred1) + (1 - p) * np.power(y_predp3, 47.0)

    return y_pred
开发者ID:evanslt,项目名称:CDIPS15_TeamCat,代码行数:33,代码来源:cv_run.py

示例13: train_predict

    def train_predict(self,X_train,y_train,X_test,base_train_prediction,base_test_prediction):
        xgmat_train = xgb.DMatrix(X_train, label=y_train,missing=-999)
        test_size = X_test.shape[0]
        param = {}
        param['objective'] = 'binary:logistic'

        param['bst:eta'] = self.eta
        param['colsample_bytree']=1
        param['min_child_weight']=self.min_child_weight
        param['bst:max_depth'] = self.depth
        param['eval_metric'] = 'auc'
        param['silent'] = 1
        param['nthread'] = self.threads
        plst = list(param.items())

        watchlist = [ (xgmat_train,'train') ]
        num_round = self.num_round

        xgmat_test = xgb.DMatrix(X_test,missing=-999)
    
        if self.boost_from_exist_prediction:
        # train xgb with existing predictions
        # see more at https://github.com/tqchen/xgboost/blob/master/demo/guide-python/boost_from_prediction.py
       
            xgmat_train.set_base_margin(base_train_prediction)
            xgmat_test.set_base_margin(base_test_prediction)
            bst = xgb.train(param, xgmat_train, self.exist_num_round, watchlist )
        else:
            bst = xgb.train( plst, xgmat_train, num_round, watchlist )
        ypred = bst.predict(xgmat_test)
        return ypred
开发者ID:thekannman,项目名称:kaggle,代码行数:31,代码来源:xgb_classifier.py

示例14: run_benchmark

def run_benchmark(args):
    print("Generating dataset: {} rows * {} columns".format(args.rows, args.columns))
    print("{}/{} test/train split".format(args.test_size, 1.0 - args.test_size))
    tmp = time.time()
    X, y = make_classification(args.rows, n_features=args.columns, random_state=7)
    if args.sparsity < 1.0:
       X = np.array([[np.nan if rng.uniform(0, 1) < args.sparsity else x for x in x_row] for x_row in X])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=args.test_size, random_state=7)
    print ("Generate Time: %s seconds" % (str(time.time() - tmp)))
    tmp = time.time()
    print ("DMatrix Start")
    dtrain = xgb.DMatrix(X_train, y_train, nthread=-1)
    dtest = xgb.DMatrix(X_test, y_test, nthread=-1)
    print ("DMatrix Time: %s seconds" % (str(time.time() - tmp)))

    param = {'objective': 'binary:logistic'}
    if args.params is not '':
        param.update(ast.literal_eval(args.params))

    param['tree_method'] = args.tree_method
    print("Training with '%s'" % param['tree_method'])
    tmp = time.time()
    xgb.train(param, dtrain, args.iterations, evals=[(dtest, "test")])
    print ("Train Time: %s seconds" % (str(time.time() - tmp)))
开发者ID:wyj2046,项目名称:xgboost,代码行数:25,代码来源:benchmark.py

示例15: fit

  def fit(self, X, y):    
    X = self.build_matrix(X, y)
    param = {
      'silent': 1 if self.silent else 0, 
      'use_buffer': int(self.use_buffer),
      'num_round': self.num_round,
      'ntree_limit': self.ntree_limit,
      'nthread': self.nthread,
      'booster': self.booster,
      'eta': self.eta,
      'gamma': self.gamma,
      'max_depth': self.max_depth,
      'min_child_weight': self.min_child_weight,
      'subsample': self.subsample,
      'colsample_bytree': self.colsample_bytree,
      'max_delta_step': self.max_delta_step,
      'l': self.l,
      'alpha': self.alpha,
      'lambda_bias': self.lambda_bias,
      'objective': self.objective,
      'eval_metric': self.eval_metric,
      'seed': self.seed          
    }
    if self.num_class is not None:
      param['num_class']= self.num_class

    watchlist  = [(X,'train')]    
    if self.early_stopping_rounds > 0:
      self.bst = xgb.train(param, X, self.num_round, watchlist, early_stopping_rounds=self.early_stopping_rounds)
    else:
      self.bst = xgb.train(param, X, self.num_round, watchlist)

    return self
开发者ID:eugeneyan,项目名称:py_ml_utils,代码行数:33,代码来源:XGBoostClassifier.py


注:本文中的xgboost.train函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。