当前位置: 首页>>代码示例>>Python>>正文


Python GradientBoostingClassifier.get_params方法代码示例

本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.get_params方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.get_params方法的具体用法?Python GradientBoostingClassifier.get_params怎么用?Python GradientBoostingClassifier.get_params使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.GradientBoostingClassifier的用法示例。


在下文中一共展示了GradientBoostingClassifier.get_params方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: gbClf

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import get_params [as 别名]
class gbClf(BaseModel):

    """Model using random forest classifier."""

    def __init__(self, train_data_fname=None, nrows=None, **kwargs):
        """Initialize the data frame."""
        super(gbClf, self).__init__(train_data_fname, nrows, **kwargs)

    def set_model(self, **kwargs):
        """Set the classifier."""
        verbose = kwargs.get('verbose', 0)
        n_estimators = kwargs.get('n_estimators', 3000)
        max_depth = kwargs.get('max_depth', 3)
        min_samples_leaf = kwargs.get('min_samples_leaf', 1)
        min_samples_split = kwargs.get('min_samples_split', 2)
        max_features = kwargs.get('max_features', None)
        learning_rate = kwargs.get('learning_rate', 0.1)
        subsample = kwargs.get('subsample', 1.0)
        random_state = kwargs.get('random_state', 24)

        self.learner = GradientBoostingClassifier(n_estimators=n_estimators,
                                                  max_depth=max_depth,
                                                  learning_rate=learning_rate,
                                                  min_samples_leaf=min_samples_leaf,
                                                  min_samples_split=min_samples_split,
                                                  max_features=max_features,
                                                  verbose=verbose,
                                                  random_state=random_state)
        print('\n\nGradient Boosting set with parameters:')
        par_dict = self.learner.get_params()
        for ipar in par_dict.keys():
            print('{}: {}'.format(ipar, par_dict[ipar]))
        print('\n\n')

    def fitNscore(self, **kwargs):
        """Fit classifier and produce score and related plots."""
        col2fit = kwargs.get('features')
        # cleaning
        bids_path = kwargs.get('bids_path', 'data/bids.csv')
        if not self.iscleaned:
            print 'Preparing the data...'
            self.prepare_data(bids_path, **kwargs)
        print('columns for fit=\n{}'.format(self.df_train.columns))

        test_size = 0.2  # fraction kept for testing
        rnd_seed = 24  # for reproducibility

        #features_train, features_test, target_train, target_test =\
        #    train_test_split(self.df_train[col2fit].values,
        #                     self.df_train['outcome'].values,
        #                     test_size=test_size,
        #                     random_state=rnd_seed)

        sss = StratifiedShuffleSplit(self.df_train['outcome'].values,
                                     n_iter=1,
                                     test_size=test_size,
                                     random_state=rnd_seed)
        for train_index, test_index in sss:
            features_train = self.df_train[col2fit].values[train_index]
            features_test = self.df_train[col2fit].values[test_index]
            target_train = self.df_train['outcome'].values[train_index]
            target_test = self.df_train['outcome'].values[test_index]

        # Fit Classifier
        self.fitModel(features_train, target_train, **kwargs)

        # Predict on the rest of the sample
        print('\nPredicting...')
        predictions = self.learner.predict(features_test)
        probas = self.learner.predict_proba(features_test)

        # Feature index ordered by importance
        ord_idx = np.argsort(self.learner.feature_importances_)
        print("Feature ranking:")
        for ifeaturindex in ord_idx[::-1]:
            print('{0} \t: {1}'.format(col2fit[ifeaturindex],
                                       round(self.learner.feature_importances_[ifeaturindex], 2)))

        # Score
        print('(Self) Score={}'.format(self.learner.score(features_test, target_test)))

        # Plots

        # Feature importances
        maxfeat2show = 30 # number of features to show in plots
        importances = self.learner.feature_importances_
        #std = np.std([tree.feature_importances_ for tree in self.learner.estimators_],axis=0)
        indices = np.argsort(importances)[::-1]
        indices = indices[:min(maxfeat2show, len(indices))]  # truncate if > maxfeat2show
        ordered_names = [col2fit[i] for i in indices]

        fig_import = plt.figure(figsize=(10, 10))
        plt.title("Feature importances, GB")
        #plt.barh(range(len(indices)), importances[indices],
        #        color="b", xerr=std[indices], align="center",ecolor='r')
        plt.barh(range(len(indices)), importances[indices],
                 color="b", align="center")
        plt.yticks(range(len(indices)), ordered_names)
        plt.ylim([-1, len(indices)])
        plt.ylim(plt.ylim()[::-1])
#.........这里部分代码省略.........
开发者ID:jfraj,项目名称:khor,代码行数:103,代码来源:gb.py

示例2: KFold

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import get_params [as 别名]
dire_flying_courier_time		71132  ----'      acquired by "dire" in the part of battles
dire_first_ward_time			95404  ---------> subject "ward" was not used by "dire"
'''

# filling NA with 0 with accordance with task description											#3
X_train = features.fillna(value = 0, axis = 'columns')
# definition of a target variable														#4
y_train = train_df['radiant_win']
print '\nradiant_win'

print "\n", "GradientBoostingClassifier:"
# using KFold by task description 														#5
kf = KFold(totalLenght, n_folds = 5, shuffle = True, random_state = 1013)
# learning with defualt GradientBoostingClassifier settings
clf = GradientBoostingClassifier(random_state = 1013)
params = clf.get_params()
start_time = datetime.now()
score = cross_val_score(estimator = clf, X=X_train, y=y_train, scoring='roc_auc', cv=kf).mean()
print	'\tn_estimators:', params['n_estimators'],\
		'\tmax_depth:', params['max_depth'],\
		'\tscore:', score,\
		'\ttimeElapsed:', datetime.now() - start_time

#looks like good result for a long-long very long time
'''
	n_estimators: 100	max_depth: 3	score: 0.70661221449	timeElapsed: 0:07:13.214000
'''

# try to found out compromise beetwen good score in execution time
for maxDepth in [1,2,3,5]:
	for treeCount in [10, 20, 30, 40, 50, 80, 100, 200, 500]:
开发者ID:bzz13,项目名称:CourseraYandexMachineLerning,代码行数:33,代码来源:final.py

示例3: print

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import get_params [as 别名]
    # Print the feature ranking
print("Feature ranking:")    
for f in range(x2.shape[1]):
    print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))  
    # Plot the feature importances of the forest
plt.figure()
plt.title("Feature importances")
plt.bar(range(x2.shape[1]), importances[indices],
    color="r", align="center")
plt.xticks(range(x2.shape[1]), indices)
plt.xlim([-1, x2.shape[1]])
plt.show()  

feature_imp(X)
clf.get_params()

param_grid = [
  {'learning_rate': [0.05, 0.1, 0.2, 0.25], 'max_depth': [3,4,5,6], 'min_samples_leaf': [1,2], 'n_estimators': [100,200,300]},
 ]
 
svr = GradientBoostingClassifier() 
from sklearn import grid_search
clf = grid_search.GridSearchCV(svr, param_grid)
clf.fit(x2,training_target)


    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
开发者ID:cedricoeldorf,项目名称:Binary_classification,代码行数:32,代码来源:GBM.py

示例4: load_obj

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import get_params [as 别名]
if __name__ == "__main__":
	if load_sample:
		header, x_array, label = load_obj(sample_save_file)
	else:
		header, x_array, label, _ = read_features(feature_file)
		save = [header, x_array, label]
		save_obj(save, sample_save_file)

	train_set, test_set, _ = split_train_test(seed, x_array, label, 0.9)

	train_x, train_y = train_set
	# print "size of train_x is", train_x.shape

	gbrt = GradientBoostingClassifier()
	parameter_grid = {'n_estimators' : range(10, 110, 10),
	                 'learning_rate': uniform(loc = 0.01, scale = 0.99),
	                 'max_depth' : range(1, 6, 1),
		             'max_features' : uniform(loc = 0.1, scale = 0.89),
		             'subsample' : uniform(loc = 0.2, scale = 0.79),
		             'min_samples_leaf' : [min_samples_leaf],
		             'min_samples_split' : [min_samples_split]}
	gbrt_gridsearch = grid_search.RandomizedSearchCV(gbrt, \
	                parameter_grid, scoring = auc, \
	                cv=4, n_jobs=2, n_iter=100, verbose=5, refit=False)
	gbrt_gridsearch.fit(train_x, train_y)

	gbrt_best = GradientBoostingClassifier(verbose=2, **gbrt_gridsearch.best_params_)
	gbrt_best.fit(train_x, train_y)
	print "parameters:", gbrt_best.get_params()
	save_obj(gbrt_best, model_save_file)
开发者ID:flybywind,项目名称:valsh,代码行数:32,代码来源:cv_gbrt.py

示例5: log_loss

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import get_params [as 别名]
y_pred_gbdt = gbdt_model.predict_proba(X_valid)[:, 1]
log_loss_gbdt = log_loss(y_valid, y_pred_gbdt)
print('log loss of GBDT on valid set: %.5f' % log_loss_gbdt)

## store the pre-trained gbdt_model
pickle.dump(gbdt_model, open(fp_gbdt_model, 'wb'))

del X_train_gbdt
del y_train_gbdt
gc.collect()

gbdt_model = pickle.load(open(fp_gbdt_model, 'rb'))
#----- data for LR (one-hot encoding with GDBT output) -----#
id_cols = []
for i in range(1, gbdt_model.get_params()['n_estimators']+1):
    id_cols.append('tree'+str(i))
oh_enc = OneHotEncoder(id_cols)

def chunker(seq, size):
    return (seq[pos: pos + size] for pos in range(0, len(seq), size))

## oh_enc fit the train_set
df_train_id = pd.DataFrame(gbdt_model.apply(X_train_org)[:, :, 0], columns=id_cols, dtype=np.int8)

for chunk in chunker(df_train_id, 50000):
    oh_enc.fit(chunk)
    
del df_train_id

del X_train_org
开发者ID:xiaowanzi123,项目名称:Practice-of-Machine-Learning,代码行数:32,代码来源:gbdt-lr.py


注:本文中的sklearn.ensemble.GradientBoostingClassifier.get_params方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。