Python sklearn.XGBClassifier類代碼示例

本文整理匯總了Python中xgboost.sklearn.XGBClassifier類的典型用法代碼示例。如果您正苦於以下問題：Python XGBClassifier類的具體用法？Python XGBClassifier怎麽用？Python XGBClassifier使用的例子？那麽, 這裏精選的類代碼示例或許可以為您提供幫助。

在下文中一共展示了XGBClassifier類的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: job_function

def job_function(params):
	learning_rate = params[0]
	max_depth = params[1]
	ss_cs = params[2]
	gamma = params[3]
	min_child_weight = params[4]
	reg_lambda = params[5]
	reg_alpha = params[6]

	early_stopping_rounds = 25
	if learning_rate >= 0.3:
		early_stopping_rounds = 5
	if learning_rate <= 0.03:
		early_stopping_rounds = 50

	scores = []
	for i in range(iterations_per_job):
		X_train = Xy[i][0]
		X_test = Xy[i][1]
		y_train = Xy[i][2]
		y_test = Xy[i][3]
		
		y_train2 = le.transform(y_train)   
		y_test2 = le.transform(y_test)   

		clf = XGBClassifier(max_depth=max_depth, learning_rate=learning_rate, n_estimators=5000, objective='multi:softprob', subsample=ss_cs, colsample_bytree=ss_cs, gamma=gamma, min_child_weight=min_child_weight, seed=0, silent=True, reg_lambda=reg_lambda, reg_alpha=reg_alpha)      
		clf.fit(X_train, y_train, eval_set=[(X_test, y_test2)], eval_metric=calculate_score_2, early_stopping_rounds=early_stopping_rounds, verbose=False)
		y_predicted = clf.predict_proba(X_test, ntree_limit=clf.booster().best_ntree_limit)
		score = calculate_score(y_predicted, y_test2)
		scores.append(score)

	avg_score = np.array(scores).mean()
	print(avg_score, params)
	return avg_score

開發者ID:mircean，項目名稱:ML，代碼行數:34，代碼來源:module6_boost_cv.py

示例2: _distributor

    def _distributor(self, label, cv, param, eval_metric, early_stopping_rounds=50):
        start = time()

        if self.is_classifier:
            label = 'XGBClassifier'
            rs = XGBClassifier(param)
        else:
            label = 'XGBRegressor'
            rs = XGBRegressor(param)

        X_visible, X_blind, y_visible, y_blined = \
            train_test_split(
                self.X_train, self.y_train, random_state=1301, stratify=self.y_train, test_size=0.4)

        rs.fit(self.X_visible, self.y_visible, eval_metric, early_stopping_rounds=50,
               eval_set=[(X_visible, y_visible), (X_blind, y_blined)])

        self.result[label] = {}
        self.result[label]['clf'] = rs
        # self.result[label]['score'] = rs.best_score_
        self.result[label]['time'] = time() - start
        # self.result[label]['set'] = ('n_iter: %s cv: %s' % (n_iter, cv))

        pprint.pprint(self.result[label])
        # pprint.pprint(rs.grid_scores_)

        out_result = open(self.result_address, 'wb')
        pickle.dump(self.result, out_result)
        out_result.close()

開發者ID:kalleon，項目名稱:custom，代碼行數:29，代碼來源:model.py

示例3: extract_leaf_feature

def extract_leaf_feature(features, targets, train_indexes, params):
    model = XGBClassifier(**params)
    model.fit(features[train_indexes], targets[train_indexes])
    booster = model.booster()
    dmatrix = xgb.DMatrix(features)
    leaf = booster.predict(dmatrix, pred_leaf=True)
    encoder = sklearn.preprocessing.OneHotEncoder()
    leaf_feature = encoder.fit_transform(leaf)
    return leaf_feature

開發者ID:samcrosoft，項目名稱:Amazon_Review_Helpfulness_Prediction，代碼行數:9，代碼來源:util.py

示例4: main

def main(training_data, test_data):
    # Merging data to ensure consistent cleaning. Putting marker variable to separate later.
    training_data['source'] = 'training'
    test_data['source'] = 'test'
    merged_data = pd.concat([training_data, test_data])

    # Cleaning data
    cleaned_data = data_cleaner(merged_data)

    # Separating data, removing marker
    pred_df = cleaned_data[cleaned_data['source'] == 'training'].copy()
    test_pred = cleaned_data[cleaned_data['source'] == 'test'].copy()

    pred_df.drop('source', axis=1, inplace=True)
    test_pred.drop('source', axis=1, inplace=True)

    # Transforming target into ints, saving the key for later transformation
    labels = LabelEncoder().fit(training_data['country_destination'])
    target_df = pd.Series(labels.transform(training_data['country_destination']), index=training_data.index)

    # Training model
    xgb_model = XGBClassifier(max_depth=6, learning_rate=0.3, n_estimators=25, objective='multi:softprob',
                              subsample=0.5, colsample_bytree=0.5, seed=0)
    xgb_model.fit(pred_df.as_matrix(), target_df.tolist())

    # Running the model
    preds = xgb_model.predict_proba(test_pred.as_matrix())

    # Selecting the top 5 most likely for each respondent and stacking. 
    # This section is VERY slow and could use being optimized
    model_probs = pd.DataFrame(preds, index=test_pred.index, columns=labels.classes_)

    stacked_probs = pd.Series()
    for i in model_probs.index:
        temp = model_probs.loc[i, :]
        temp_sort = pd.DataFrame(temp.sort_values(ascending=False)[:5].index)

        temp_sort['id'] = i
        temp_sort.columns = ['country', 'id']

        stacked_probs = pd.concat([stacked_probs, temp_sort])

    # # Selecting classes with highest probabilities, compiling into list
    # ids = []
    # cts = []
    # test_ids = pd.Series(test_data.index)
    # for i in range(len(test_ids)):
    #     idx = test_data.index[i]
    #     ids += [idx] * 5
    #     cts += labels.inverse_transform(np.argsort(model_probs[i])[::-1])[:5].tolist()
    #
    # predictions = pd.DataFrame(np.column_stack((ids, cts)), columns=['id', 'country'])

    # Cleaning output and returning it
    output = stacked_probs[['id', 'country']]
    return output

開發者ID:paperparrot，項目名稱:Kaggle-scripts，代碼行數:56，代碼來源:airbnb+xgboost+model.py

示例5: eval_fn

 def eval_fn(params):
     model = XGBClassifier(n_estimators=n_estimators_max, learning_rate=learning_rate, seed=seed)
     score = 0
     n_estimators = 0
     for tr, va in skf:
         X_tr, y_tr = X_train[tr], y_train[tr]
         X_va, y_va = X_train[va], y_train[va]
         model.set_params(**params)
         model.fit(X_tr, y_tr, eval_set=[(X_va, y_va)], eval_metric='logloss',
                   early_stopping_rounds=50, verbose=False)
         score += model.best_score
         n_estimators += model.best_iteration
     score /= n_folds
     n_estimators /= n_folds
     n_estimators_lst.append(n_estimators)
     result_str = "train:%.4f ntree:%5d  " % (score, n_estimators)
     if X_valid is not None:
         model.n_estimators = n_estimators
         model.fit(X_train, y_train)
         pr = model.predict_proba(X_valid)[:,1]
         sc_valid = log_loss(y_valid, pr)
         score_valid.append(sc_valid)
         result_str += "valid:%.4f" % sc_valid
     if verbose:
         print result_str
     return score

開發者ID:tks0123456789，項目名稱:ParamTune_experiments，代碼行數:26，代碼來源:utility.py

示例6: objective

def objective(space):

    clf = XGBClassifier(n_estimators=int(space['n_estimators']),
                        objective='binary:logistic',
                        seed=37,
                        learning_rate=space['learning_rate'],
                        max_depth=space['max_depth'],
                        min_child_weight=space['min_child_weight'],
                        colsample_bytree=space['colsample_bytree'],
                        subsample=space['subsample'])

    clf.fit(xTrain, yTrain, eval_metric="logloss")
    pred = clf.predict_proba(xValid)[:, 1]
    loss = log_loss(yValid, pred)
    return{'loss': loss, 'status': STATUS_OK}

開發者ID:SeanBE，項目名稱:numerai，代碼行數:15，代碼來源:xgbModel.py

示例7: GBDT

    def GBDT(self, report=False):
        """Gradient Boosting Decision Tree.

        Args:
            report: whether print out the model analysis report.
        Returns:
            Decision tree model generated from Gradient Boosting Decision Tree."""
        from xgboost.sklearn import XGBClassifier

        self.gbdt = XGBClassifier(objective='binary:logistic',
                                  booster='gbtree',
                                  learning_rate=0.01,
                                  n_estimators=5000,
                                  max_depth=3,
                                  subsample=0.75,
                                  colsample_bytree=0.75,
                                  n_jobs=4,
                                  random_state=2018)

        self.gbdt.fit(self.train_prep[self.features], self.train_prep[self.target])
        
        if report:
            from Report import Report
            rpt = Report(self.gbdt, self.train, self.valid, self.target, self.features)
            rpt.ALL()

        return self.gbdt

開發者ID:Ewen2015，項目名稱:Kaggle，代碼行數:27，代碼來源:Baseline.py

示例8: fit

    def fit(self, json_train, n_estimators = 10, is_xgb = True):

        train = self.pre_process(json_train, istrain = True)
        
        bow_vectorizer = BagOfWordsVectorizer()
        word2vec_model = Word2VecModel()
        tag_counter_model = TagCounterModel()

        # word2vec_model.fit(train["author_pos_sentences"], 500)
        # author_features = word2vec_model.transform(train["author_pos_sentences"], "author")
        # self.author_model = word2vec_model.get_model()

#        bow_vectorizer.fit(train["title_pos_sentences"], 1000)
#        title_features = bow_vectorizer.transform(train["title_pos_sentences"], "title")
#        self.title_model = bow_vectorizer.get_vectorizer()

        bow_vectorizer.fit(train["text_pos_sentences"], 1000)
        text_features = bow_vectorizer.transform(train["text_pos_sentences"], "text")
        self.text_model = bow_vectorizer.get_vectorizer()

#        tag_features = tag_counter_model.fit_transform(train["text"])
#        self.tag_model = tag_counter_model.get_col()

        train = pd.concat([train, text_features], axis = 1)

        #le = preprocessing.LabelEncoder()

        # train["forumid"] = le.fit_transform(train["forumid"])
        
        label = train['istroll']
        train = train.drop('istroll', axis=1)
        train = train.drop(['text', 'text_pos', 'text_pos_sentences'], axis=1)
        
        print(train.columns)

        train.columns = [str(x) for x in range(len(train.columns))]
        
        if is_xgb == False:
            self.model = RandomForestClassifier(n_estimators, n_jobs=-1)
        else:
            self.model = XGBClassifier(n_estimators = n_estimators, max_depth = 10)

        print(train.shape)
        self.model.fit(train, label)

開發者ID:MacLunch，項目名稱:MacLunch，代碼行數:44，代碼來源:Ilwar.py

示例9: apply_xgb_ens

def apply_xgb_ens(y_valid, valid_folder='Valid', test_folder='Test'):
    """
    Ensembler based on xgboost Gradient boosting.
    """
    #Loading data
    X, X_test, n_preds, n_class = get_X_X_Test(valid_folder, test_folder)
    y = y_valid
    
    #Defining classifier
    xgb = XGBClassifier(max_depth=4, learning_rate=0.05, n_estimators=200,
                        objective='multi:softprob', gamma=0., 
                        max_delta_step=0., subsample=0.9, colsample_bytree=0.9,
                        seed=0)  
    xgb.fit(X, y)   
    y_pred = xgb.predict_proba(X_test)
    return y_pred

開發者ID:BabelTower，項目名稱:kaggle_airbnb，代碼行數:16，代碼來源:ensemble.py

示例10: perform_prediction

def perform_prediction(training, labels, testing, xgb_votes, rf_votes):
    """ Perform prediction using a combination of XGB and RandomForests. """
    predictions = np.zeros((len(testing), len(set(labels))))
    # Predictions using xgboost.
    for i in range(xgb_votes):
        print 'XGB vote %d' % i
        xgb = XGBClassifier(
            max_depth=DEPTH_XGB, learning_rate=LEARNING_XGB,
            n_estimators=ESTIMATORS_XGB, objective='multi:softprob',
            subsample=SUBSAMPLE_XGB, colsample_bytree=COLSAMPLE_XGB)
        xgb.fit(training, labels)
        predictions += xgb.predict_proba(testing)
    # Predictions using RandomForestClassifier.
    for i in range(rf_votes):
        print 'RandomForest vote %d' % i
        rand_forest = RandomForestClassifier(
            n_estimators=ESTIMATORS_RF, criterion=CRITERION_RF, n_jobs=JOBS_RF,
            max_depth=DEPTH_RF, min_samples_leaf=MIN_LEAF_RF, bootstrap=True)
        rand_forest.fit(training, labels)
        predictions += rand_forest.predict_proba(testing)
    return predictions

開發者ID:Zhongjiong，項目名稱:kaggle_airbnb_new_user_bookings，代碼行數:21，代碼來源:prediction.py

示例11: xgboostinitial_predictor

def xgboostinitial_predictor(train_path, test_path, eval_path):
    # Loading the data
    print 'Loading the data...'
    train = pd.read_csv(train_path, index_col=0)
    test = pd.read_csv(test_path, index_col=0)
    eval_df = pd.read_csv(eval_path, index_col=0)
    target = train['target'].copy()
    train.drop('target', axis=1, inplace=True)

    # Training model
    print 'Model training begins...'
    # xgtrain = xgb.DMatrix(train.values, target.values, missing=np.nan)
    # xgboost_params = {'objective': 'binary:logistic', 'booster': 'gbtree', 'eval_metric': 'logloss', 'eta': 0.01,
    #                   'subsample': 0.5, 'colsample_bytree': 0.5, 'max_depth': 10, 'silent': 0}
    #
    # xgb_model = xgb.train(xgboost_params, xgtrain, learning_rates=0.3)

    xgb_model = XGBClassifier(max_depth=6, learning_rate=0.3, n_estimators=25, objective='binary:logistic',
                              subsample=0.5, colsample_bytree=0.5, seed=0)
    xgb_model.fit(train.as_matrix(), target.tolist())

    # Running the model
    print 'Making predictions....'
    # xgtest = xgb.DMatrix(test.values)
    # xgeval = xgb.DMatrix(eval_df)

    test_preds = xgb_model.predict_proba(test.as_matrix())
    eval_preds = xgb_model.predict_proba(eval_df.as_matrix())

    print 'Cleaning predictions to match expected format....'
    test_output = pd.DataFrame(test_preds, index=test.index)
    print test_output.columns
    test_output = test_output[1]
    test_output.columns = ['PredictedProb']

    eval_output = pd.DataFrame(eval_preds, index=eval_df.index)
    eval_output = eval_output[1]
    eval_output.columns = ['PredictedProb']

    return test_output, eval_output

開發者ID:paperparrot，項目名稱:BNP_kaggle，代碼行數:40，代碼來源:bnp_xgb_init.py

示例12: train_classifier

def train_classifier(X, y, clf_name='xgb'):
    if clf_name == 'xgb':
        clf = XGBClassifier(
            n_estimators=ESTIMATORS_XG,
            objective=OBJECTIVE_XG,
            max_depth=DEPTH_XG,
            learning_rate=LEARNING_RATE_XG,
            subsample=SUBSAMPLE_XG,
            colsample_bytree=COLSAMPLE_BYTREE_XG,
            seed=0,
        )
    else:
        clf = RandomForestClassifier(
            n_estimators=ESTIMATORS_RF,
            criterion=CRITERION_RF,
            n_jobs=JOBS_RF,
            max_depth=DEPTH_RF,
            min_samples_leaf=MIN_LEAF_RF,
            min_samples_split=MIN_SPLIT_RF,
            max_features=MAX_FEATURES_RF,
            bootstrap=True,
        )
    clf.fit(X, y)
    return clf

開發者ID:Chouffe，項目名稱:kaggle_airbnb_new_user_booking，代碼行數:24，代碼來源:prediction.py

示例13: get_xgboost_classifier

def get_xgboost_classifier(X_train, y_train, X_val, y_val,params=None, tag=""):
    
    param_grid = {'max_depth':[3,5,7], 'min_child_weight': [1,3,5], 'n_estimators': [50]}
    
    if params is None:
        xgb = XGBClassifier(
                 learning_rate =0.2,
                 objective= 'binary:logistic',
                 seed=27)
                 
        t = start("training xgboost ")
        cv = cross_validation.ShuffleSplit(X_train.shape[0], n_iter=10,test_size=0.2, random_state=123)
        clf = grid_search.GridSearchCV(xgb, param_grid, cv=cv, n_jobs=1, scoring='roc_auc')
        clf = clf.fit(X_train,y_train)
        report(t, nitems=10*len(param_grid))
        
        print("Best score:{} with scorer {}".format(clf.best_score_, clf.scorer_))
        print "With parameters:"
    
        best_parameters = clf.best_estimator_.get_params()
        for param_name in sorted(param_grid.keys()):
            print '\t%s: %r' % (param_name, best_parameters[param_name]) 
    else:
        clf = XGBClassifier(**params)
        clf.fit(X_train, y_train, eval_set =  [(X_train,y_train),(X_val,y_val)], eval_metric='auc', verbose=False)
        
        if plot_cv_curves:
            train = clf.evals_result()['validation_0']['auc']
            val = clf.evals_result()['validation_1']['auc']
        
            plot_cv_curve(train, val, tag)
        
        if plot_feature_importance:
            plot_feature_importance(clf, tag)

    return clf

開發者ID:joostgp，項目名稱:kaggle_ad_detection，代碼行數:36，代碼來源:analysis_basic.py

示例14: main

def main():
    data_train = pd.read_csv(args.train_dataset)
    X_train = data_train.drop(['Id', 'Class'], axis=1)
    y_train = data_train.loc[:, 'Class']
    data_test = pd.read_csv(args.test_dataset)
    X_test = data_test.drop(['Id'], axis=1)
    Id = data_test.loc[:, 'Id']
    clf = XGBClassifier()
    clf.set_params(**best_dicts)
    clf.fit(X_train, y_train)
    prediction = clf.predict_proba(X_test)
    columns = ['Prediction'+str(i) for i in range(1, 10)]
    prediction = pd.DataFrame(prediction, columns=columns)
    results = pd.concat([Id, prediction], axis=1)
    return (clf, results)

開發者ID:Chris19920210，項目名稱:Microsoft_malware，代碼行數:15，代碼來源:final_predictor.py

示例15: myThreadFunc

def myThreadFunc(ThreadID):
	X_train = Xy[ThreadID][0]
	X_test = Xy[ThreadID][1]
	y_train = Xy[ThreadID][2]
	y_test = Xy[ThreadID][3]
		
	y_train2 = le.transform(y_train)   
	y_test2 = le.transform(y_test)   

	clf = XGBClassifier(max_depth=max_depth, learning_rate=learning_rate, n_estimators=5000, objective='multi:softprob', subsample=ss_cs, colsample_bytree=ss_cs, gamma=gamma, min_child_weight=min_child_weight, seed=0, silent=True, reg_lambda=reg_lambda, reg_alpha=reg_alpha)      
	clf.fit(X_train, y_train, eval_set=[(X_test, y_test2)], eval_metric=calculate_score_2, early_stopping_rounds=early_stopping_rounds, verbose=False)
	y_predicted = clf.predict_proba(X_test, ntree_limit=clf.booster().best_ntree_limit)
	score = calculate_score(y_predicted, y_test2)
	print(score, clf.booster().best_ntree_limit)
	
	train_and_test_scores[ThreadID] = score

開發者ID:mircean，項目名稱:ML，代碼行數:16，代碼來源:module3_python_v2.py

注：本文中的xgboost.sklearn.XGBClassifier類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。