Python XGBClassifier.booster方法代码示例

本文整理汇总了Python中xgboost.sklearn.XGBClassifier.booster方法的典型用法代码示例。如果您正苦于以下问题：Python XGBClassifier.booster方法的具体用法？Python XGBClassifier.booster怎么用？Python XGBClassifier.booster使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类xgboost.sklearn.XGBClassifier的用法示例。

在下文中一共展示了XGBClassifier.booster方法的7个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: myThreadFunc

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import booster [as 别名]
def myThreadFunc(ThreadID):
	X_train = Xy[ThreadID][0]
	X_test = Xy[ThreadID][1]
	y_train = Xy[ThreadID][2]
	y_test = Xy[ThreadID][3]
		
	y_train2 = le.transform(y_train)   
	y_test2 = le.transform(y_test)   

	clf = XGBClassifier(max_depth=max_depth, learning_rate=learning_rate, n_estimators=5000, objective='multi:softprob', subsample=ss_cs, colsample_bytree=ss_cs, gamma=gamma, min_child_weight=min_child_weight, seed=0, silent=True, reg_lambda=reg_lambda, reg_alpha=reg_alpha)      
	clf.fit(X_train, y_train, eval_set=[(X_test, y_test2)], eval_metric=calculate_score_2, early_stopping_rounds=early_stopping_rounds, verbose=False)
	y_predicted = clf.predict_proba(X_test, ntree_limit=clf.booster().best_ntree_limit)
	score = calculate_score(y_predicted, y_test2)
	print(score, clf.booster().best_ntree_limit)
	
	train_and_test_scores[ThreadID] = score

开发者ID:mircean，项目名称:ML，代码行数:18，代码来源:module3_python_v2.py

示例2: job_function

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import booster [as 别名]
def job_function(params):
	learning_rate = params[0]
	max_depth = params[1]
	ss_cs = params[2]
	gamma = params[3]
	min_child_weight = params[4]
	reg_lambda = params[5]
	reg_alpha = params[6]

	early_stopping_rounds = 25
	if learning_rate >= 0.3:
		early_stopping_rounds = 5
	if learning_rate <= 0.03:
		early_stopping_rounds = 50

	scores = []
	for i in range(iterations_per_job):
		X_train = Xy[i][0]
		X_test = Xy[i][1]
		y_train = Xy[i][2]
		y_test = Xy[i][3]
		
		y_train2 = le.transform(y_train)   
		y_test2 = le.transform(y_test)   

		clf = XGBClassifier(max_depth=max_depth, learning_rate=learning_rate, n_estimators=5000, objective='multi:softprob', subsample=ss_cs, colsample_bytree=ss_cs, gamma=gamma, min_child_weight=min_child_weight, seed=0, silent=True, reg_lambda=reg_lambda, reg_alpha=reg_alpha)      
		clf.fit(X_train, y_train, eval_set=[(X_test, y_test2)], eval_metric=calculate_score_2, early_stopping_rounds=early_stopping_rounds, verbose=False)
		y_predicted = clf.predict_proba(X_test, ntree_limit=clf.booster().best_ntree_limit)
		score = calculate_score(y_predicted, y_test2)
		scores.append(score)

	avg_score = np.array(scores).mean()
	print(avg_score, params)
	return avg_score

开发者ID:mircean，项目名称:ML，代码行数:36，代码来源:module6_boost_cv.py

示例3: extract_leaf_feature

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import booster [as 别名]
def extract_leaf_feature(features, targets, train_indexes, params):
    model = XGBClassifier(**params)
    model.fit(features[train_indexes], targets[train_indexes])
    booster = model.booster()
    dmatrix = xgb.DMatrix(features)
    leaf = booster.predict(dmatrix, pred_leaf=True)
    encoder = sklearn.preprocessing.OneHotEncoder()
    leaf_feature = encoder.fit_transform(leaf)
    return leaf_feature

开发者ID:samcrosoft，项目名称:Amazon_Review_Helpfulness_Prediction，代码行数:11，代码来源:util.py

示例4: do_cell

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import booster [as 别名]
def do_cell(task):
    df_train, df_test, x_start, y_start = task[0], task[1], task[2], task[3]
    #print('do_cell', df_train.shape, df_test.shape, x_start, y_start)

    #train
    n_places_th_local = n_places_th
    n_places_local = n_places

    if n_places != 0:
        tmp = df_train.shape[0]
        value_counts = df_train.place_id.value_counts()[0:n_places]
        df_train = pd.merge(df_train, pd.DataFrame(value_counts), left_on='place_id', right_index=True)[df_train.columns]
        n_places_th_local = value_counts.values[n_places - 1]
        percentage = df_train.shape[0]/tmp

    elif n_places_th != 0:
        value_counts = df_train.place_id.value_counts()
        n_places_local = value_counts[value_counts >= n_places_th_local].count()
        mask = value_counts[df_train.place_id.values] >= n_places_th_local
        percentage = mask.value_counts()[True]/df_train.shape[0]
        df_train = df_train.loc[mask.values]

    else:
        n_places_th_local = 2

        value_counts = df_train.place_id.value_counts()
        n_places_local = value_counts[value_counts >= n_places_th_local].count()
        mask = value_counts[df_train.place_id.values] >= n_places_th_local
        percentage = mask.value_counts()[True]/df_train.shape[0]

        while percentage > n_places_percentage:
            n_places_th_local += 1
            n_places_local = value_counts[value_counts >= n_places_th_local].count()
            mask = value_counts[df_train.place_id.values] >= n_places_th_local
            percentage = mask.value_counts()[True]/df_train.shape[0]

        n_places_th_local -= 1
        n_places_local = value_counts[value_counts >= n_places_th_local].count()
        mask = value_counts[df_train.place_id.values] >= n_places_th_local
        percentage = mask.value_counts()[True]/df_train.shape[0]

        df_train = df_train.loc[mask.values]


    #print(x_start, y_start, n_places_local, n_places_th_local, percentage)
        
    #test
    row_ids = df_test.index
    if 'place_id' in df_test.columns:
        df_test = df_test.drop(['place_id'], axis=1)

    le = LabelEncoder()
    y = le.fit_transform(df_train.place_id.values)
    
    X = df_train.drop(['place_id'], axis=1).values
    X_predict = df_test.values

    score = 0
    n_estimators = 0
    if xgb == 1:    
        if xgb_calculate_n_estimators == True:
            clf = XGBClassifier(max_depth=max_depth, learning_rate=learning_rate, n_estimators=5000, objective='multi:softprob', subsample=ss, colsample_bytree=cs, gamma=gamma, min_child_weight=min_child_weight, reg_lambda=reg_lambda, reg_alpha=reg_alpha)

            if train_test == 1:
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
   
                clf.fit(X_train, y_train, eval_set=[(X_test, y_test)], eval_metric=calculate_score, early_stopping_rounds=early_stopping_rounds, verbose=10 if one_cell == 1 else False)
                score = round(1 - clf.booster().best_score, 6)
                n_estimators = clf.booster().best_ntree_limit
            else:
                abc += 1
                xgb_options = clf.get_xgb_params()
                xgb_options['num_class'] = n_places + 1
                train_dmatrix = DMatrix(X, label=y)

                #some of the classes have less than n_folds, cannot use stratified KFold
                #folds = StratifiedKFold(y, n_folds=n_folds, shuffle=True)
                folds = KFold(len(y), n_folds=n_folds, shuffle=True)
                cv_results = cv(xgb_options, train_dmatrix, clf.n_estimators, early_stopping_rounds=early_stopping_rounds, verbose_eval=10 if one_cell == 1 else False, show_stdv=False, folds=folds, feval=calculate_score)

                n_estimators = cv_results.shape[0]
                score = round(1 - cv_results.values[-1][0], 6)
                std = round(cv_results.values[-1][1], 6)
        else:
            n_estimators = n_estimators_fixed

        clf = XGBClassifier(max_depth=max_depth, learning_rate=learning_rate, n_estimators=n_estimators, objective='multi:softprob', subsample=ss, colsample_bytree=cs, gamma=gamma, min_child_weight=min_child_weight, reg_lambda=reg_lambda, reg_alpha=reg_alpha)
    else:
        clf = RandomForestClassifier(n_estimators = 300, n_jobs = -1)
        if rf_calculate_score == True:
            if train_test == 1:
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
                y_train2 = le.transform(y_train)
                y_test2 = le.transform(y_test)
    
                clf.fit(X_train, y_train2)
                y_predict = clf.predict_proba(X_test)

                scores_local = []
                for i in range(X_test.shape[0]):
#.........这里部分代码省略.........

开发者ID:mircean，项目名称:ML，代码行数:103，代码来源:test1.py

示例5: range

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import booster [as 别名]
        for i in range(10):
            folds = StratifiedKFold(y_train, n_folds=5, shuffle=True)
            scores = []
            iterations = []
            for train_index, test_index in folds:
                X_train2, X_test2 = X_train.loc[train_index], X_train.loc[test_index]
                y_train2, y_test2 = y_train[train_index], y_train[test_index]

                X_train2, X_test2 = feature_engineering_extra(X_train2, X_test2, y_train2)

                X_train2 = csr_matrix(X_train2.values)
                X_test2 = csr_matrix(X_test2.values)

                clf.fit(X_train2, y_train2, eval_set=[(X_test2, y_test2)], eval_metric='mlogloss', early_stopping_rounds=early_stopping_rounds, verbose=False)
                #print(round(clf.booster().best_score, 6), int(clf.booster().best_ntree_limit))
                scores.append(round(clf.booster().best_score, 6))
                iterations.append(int(clf.booster().best_ntree_limit))

            scores = np.array(scores)
            iterations = np.array(iterations)
            score = scores.mean()
            scores2.append(score)
            print('score, std, iterations', score, scores.std(), iterations.mean())

        scores = np.array(scores2)
        scores = np.delete(scores, [scores.argmax(), scores.argmin()])
        print('score, std', scores.mean(), scores.std())

    if is_tt_rf == 1:
        X_train, X_test = feature_engineering(df_train, df_test, y_train)

开发者ID:mircean，项目名称:ML，代码行数:32，代码来源:module1.py

示例6: print

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import booster [as 别名]
                              reg_alpha=0.05,
                              reg_lambda=2,
                              subsample=1.0,
                              colsample_bytree=1.0,
                              max_delta_step=1,
                              scale_pos_weight=1,
                              objective='multi:softprob',
                              nthread=8,
                              seed=0  # ,
                              # silent = False
                              )
    print('training...')
    xgb_model.fit(training, label)
    print('predicting...')
    predicted = xgb_model.predict_proba(testing)
    predicted = pandas.DataFrame(predicted)
    predicted.columns = xgb_model.classes_
    # Name index column.
    predicted.index.name = 'Id'
    # Write csv.
    print('Saving prediction...')
    predicted.to_csv('Prediction.csv')
    # feature importance
    feat_imp = pandas.Series(xgb_model.booster().get_fscore()).sort_values(ascending=False)
    feat_imp.plot(kind='bar', title='Feature Importances')
    matplotlib.pyplot.show()
    plot_importance(xgb_model, title='Feature importance')
    matplotlib.pyplot.show()
    plot_tree(xgb_model, num_trees=0)
    matplotlib.pyplot.show()

开发者ID:MichaelPluemacher，项目名称:San-Francisco-crimes，代码行数:32，代码来源:XGBoost_model.py

示例7: XGBClassifier

# 需要导入模块: from xgboost.sklearn import XGBClassifier [as 别名]
# 或者: from xgboost.sklearn.XGBClassifier import booster [as 别名]
X_val = Xfold1
y_val = fold1.loc[:, 'Category']


# Now comes the time-consuming step of training xgb.

# In[3]:

xgb = XGBClassifier(**HYPER_PARAMS)
xgb.fit(X_train, y_train, eval_set = [(X_val, y_val)], eval_metric = SCORING, verbose = 10)


# Now, we can gaze at the important features.

# In[4]:

gbdt = xgb.booster()
importance = gbdt.get_fscore()
importance = sorted(importance.items(), key = operator.itemgetter(1), reverse = True)
df=pd.DataFrame(importance, columns = ['feature', 'fscore'])
print(df)


# This provides us with a good idea as to which features are particularly relevant. 
# 
# - clearly, the timing in terms of minute, hour and year are critical
# - the collocated-crime feature scores surprisingly high
# - the spatial coordinates are useful
# - the total number of crimes in a steet is an important indicator, as well as some of the log-ratios
# - the month is not particularly essential, presumably as seasonal information can be recovered from the week

开发者ID:sokolov-alex，项目名称:CSE-291-D-Latent-Variable-Models，代码行数:32，代码来源:script.py

注：本文中的xgboost.sklearn.XGBClassifier.booster方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。