当前位置: 首页>>代码示例>>Python>>正文


Python ensemble.GradientBoostingRegressor类代码示例

本文整理汇总了Python中sklearn.ensemble.GradientBoostingRegressor的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingRegressor类的具体用法?Python GradientBoostingRegressor怎么用?Python GradientBoostingRegressor使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了GradientBoostingRegressor类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train_model

def train_model(features, label, params):
    #Preprocessing
    #scaled_features = preprocessing.scale(features);
    scaled_features  = features;

    total_rmse  = 0.0;
    count       = 0;

    kf          = KFold(len(scaled_features), n_folds=10);

    for train_index, validation_index in kf:

        X_train, X_validation = scaled_features[train_index], scaled_features[validation_index];
        Y_train, Y_validation = label[train_index], label[validation_index];

        #estimator               = SVR(**params)
        #estimator               = RandomForestRegressor(**params)
        estimator                = GradientBoostingRegressor(**params)

        estimator.fit(X_train, Y_train);

        current_rmse             = calculate_RMSE(estimator, X_validation, Y_validation);

        total_rmse              += current_rmse;
        count                   += 1;

    #Average across all samples
    avg_current_rmse   = total_rmse / float(count);
    print("Avg Current RMSE " + str(avg_current_rmse));

    return  (params, avg_current_rmse);
开发者ID:Amortized,项目名称:Restaurant-Revenue-Predictor,代码行数:31,代码来源:process.py

示例2: test_gradient_boosting_validation_fraction

def test_gradient_boosting_validation_fraction():
    X, y = make_classification(n_samples=1000, random_state=0)

    gbc = GradientBoostingClassifier(n_estimators=100,
                                     n_iter_no_change=10,
                                     validation_fraction=0.1,
                                     learning_rate=0.1, max_depth=3,
                                     random_state=42)
    gbc2 = clone(gbc).set_params(validation_fraction=0.3)
    gbc3 = clone(gbc).set_params(n_iter_no_change=20)

    gbr = GradientBoostingRegressor(n_estimators=100, n_iter_no_change=10,
                                    learning_rate=0.1, max_depth=3,
                                    validation_fraction=0.1,
                                    random_state=42)
    gbr2 = clone(gbr).set_params(validation_fraction=0.3)
    gbr3 = clone(gbr).set_params(n_iter_no_change=20)

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
    # Check if validation_fraction has an effect
    gbc.fit(X_train, y_train)
    gbc2.fit(X_train, y_train)
    assert gbc.n_estimators_ != gbc2.n_estimators_

    gbr.fit(X_train, y_train)
    gbr2.fit(X_train, y_train)
    assert gbr.n_estimators_ != gbr2.n_estimators_

    # Check if n_estimators_ increase monotonically with n_iter_no_change
    # Set validation
    gbc3.fit(X_train, y_train)
    gbr3.fit(X_train, y_train)
    assert gbr.n_estimators_ < gbr3.n_estimators_
    assert gbc.n_estimators_ < gbc3.n_estimators_
开发者ID:amueller,项目名称:scikit-learn,代码行数:34,代码来源:test_gradient_boosting.py

示例3: test_feature_importance_regression

def test_feature_importance_regression():
    """Test that Gini importance is calculated correctly.

    This test follows the example from [1]_ (pg. 373).

    .. [1] Friedman, J., Hastie, T., & Tibshirani, R. (2001). The elements
       of statistical learning. New York: Springer series in statistics.
    """
    california = fetch_california_housing()
    X, y = california.data, california.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    reg = GradientBoostingRegressor(loss='huber', learning_rate=0.1,
                                    max_leaf_nodes=6, n_estimators=100,
                                    random_state=0)
    reg.fit(X_train, y_train)
    sorted_idx = np.argsort(reg.feature_importances_)[::-1]
    sorted_features = [california.feature_names[s] for s in sorted_idx]

    # The most important feature is the median income by far.
    assert sorted_features[0] == 'MedInc'

    # The three subsequent features are the following. Their relative ordering
    # might change a bit depending on the randomness of the trees and the
    # train / test split.
    assert set(sorted_features[1:4]) == {'Longitude', 'AveOccup', 'Latitude'}
开发者ID:amueller,项目名称:scikit-learn,代码行数:26,代码来源:test_gradient_boosting.py

示例4: gbdt_model

def gbdt_model(trains):

    trains = np.array(trains)

    gbdt=GradientBoostingRegressor(
      loss='ls',
      learning_rate=0.1,
      n_estimators=100,
      subsample=1,
      min_samples_split=2,
      min_samples_leaf=1,
      max_depth=3,
      init=None,
      random_state=None,
      max_features=None,
      alpha=0.9,
      verbose=0,
      max_leaf_nodes=None,
      warm_start=False
    )

#     pdb.set_trace()
    train_set = trains[:, :-1]
    label_set = trains[:, -1]

    gbdt.fit(train_set, label_set)
    return gbdt
开发者ID:fengkaicnic,项目名称:pyml,代码行数:27,代码来源:gbdt_model.py

示例5: gbm_fit

def gbm_fit(params, cv_folds):
    gbm = GradientBoostingRegressor(**params)
    gbm.fit(x_train, y_train)

    # Check accuracy of model
    # No need for validation data because of cross validation
    # Training data is split up into cv_folds folds:
    # Model trained on (cv_folds - 1) of the folds; last fold is saved as validation set
    cv_scores_mse = cross_validation.cross_val_score(gbm, x_train, y_train, cv=cv_folds, scoring='mean_squared_error')
    print '\nModel Report'
    print ('MSE Score: Mean - %.7g | Std - %.7g | Min - %.7g | Max - %.7g' %
          (np.mean(cv_scores_mse), np.std(cv_scores_mse), np.min(cv_scores_mse), np.max(cv_scores_mse)))
    feat_imp = pd.Series(gbm.feature_importances_, features).sort_values(ascending=False)
    feat_imp.plot(kind='bar', title='Feature Importances')
    plt.ylabel('Feature Importance Score')
    plt.show()

    # Check actual performance on test data
    final_predictions = gbm.predict(x_test)
    test['health_score_in_week'] = final_predictions
    test.to_csv(output_file, columns=['user_id', 'date', 'steps', 'total_sleep', 'resting_hr',
                                      'step_week_slope', 'sleep_week_slope', 'hr_week_slope',
                                      'curr_health_score', 'health_score_in_week'])

    # Save the model to file 'health_prediction.pkl'
    joblib.dump(gbm, 'health_prediction.pkl', compress=1)
开发者ID:Fitomo,项目名称:Prediction-Service,代码行数:26,代码来源:predicted_health_algorithm.py

示例6: GBRModel

def GBRModel(X_train,X_cv,y_train,y_cv):
	targets = get_target_array()
	#print len(train_features)
	#print train_features[0]

	#print len(test_features)
	n_estimators = [50, 100]#, 1500, 5000]
	max_depth = [3,8]
	

	best_GBR = None
	best_mse = float('inf')
	best_score = -float('inf')

	print "################# Performing Gradient Boosting Regression ####################### \n\n\n\n"
	for estm in n_estimators:
		for cur_depth in max_depth:
			#random_forest = RandomForestRegressor(n_estimators=estm)
			regr_GBR = GradientBoostingRegressor(n_estimators=estm, max_depth= cur_depth)
			predictor = regr_GBR.fit(X_train,y_train)
			score = regr_GBR.score(X_cv,y_cv)
			mse = np.mean((regr_GBR.predict(X_cv) - y_cv) **2)
			print "Number of estimators used: ",estm
			print "Tree depth used: ",cur_depth
			print "Residual sum of squares: %.2f "%mse
			print "Variance score: %.2f \n"%score
			if best_score <= score:
				if best_mse > mse:
					best_mse = mse
					best_score = score
					best_GBR = predictor	
	print "\nBest score: ",best_score
	print "Best mse: ",best_mse
	return best_GBR
开发者ID:SaarthakKhanna2104,项目名称:Home-Depot-Product-Search-Relevance,代码行数:34,代码来源:GBR.py

示例7: gradient_boosting

def gradient_boosting(features_values_temp, rows_temp, columns_temp, prediction_values_temp, kernel, threshold):
	#kernel: linear, poly, rbf, sigmoid, precomputed

	rows = 0
	while rows_temp > 0:
		rows = rows + 1
		rows_temp = rows_temp - 1

	columns = 0
	while columns_temp > 0:
		columns = columns + 1
		columns_temp = columns_temp - 1

	features_values = [x for x in features_values_temp]
	prediction_values = [y for y in prediction_values_temp]



	rotated = convert_list_to_matrix(features_values, rows, columns)
	scores = np.array(prediction_values)

	threshold = float(threshold)

	estimator = SVR(kernel=kernel) # try to change to the model for which the test is gonna run (lasso, ridge, etc.)

	 X, y = make_friedman1(n_samples=1200, random_state=0, noise=1.0)
	 X_train, X_test = X[:200], X[200:]
	 y_train, y_test = y[:200], y[200:]
	 est = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0, loss='ls').fit(X_train, y_train)
	 mean_squared_error(y_test, est.predict(X_test)) 
开发者ID:adityasubramanian,项目名称:kaggle_titanic,代码行数:30,代码来源:feature_selection.py

示例8: fit

def fit(filename, treename, inputsname, targetname, workingpoint=0.9, test=False):
    # Reading inputs and targets
    ninputs = len(inputsname)
    branches = copy.deepcopy(inputsname)
    branches.append(targetname)
    data = root2array(filename, treename=treename, branches=branches)
    data = data.view((np.float64, len(data.dtype.names)))
    # Extract and format inputs and targets from numpy array
    inputs = data[:, range(ninputs)].astype(np.float32)
    targets = data[:, [ninputs]].astype(np.float32).ravel()
    # if test requested, use 60% of events for training and 40% for testing
    inputs_train = inputs
    targets_train = targets
    if test:
        inputs_train, inputs_test, targets_train, targets_test = cross_validation.train_test_split(inputs, targets, test_size=0.4, random_state=0)
    # Define and fit quantile regression (quantile = workingpoint)
    # Default training parameters are used
    regressor = GradientBoostingRegressor(loss='quantile', alpha=workingpoint)
    regressor.fit(inputs_train, targets_train)
    if test:
        # Compare regression prediction with the true value and count the fraction of time it falls below
        # This should give the working point value
        predict_test = regressor.predict(inputs_test)
        compare = np.less(targets_test, predict_test)
        print 'Testing regression with inputs', inputsname, 'and working point', workingpoint
        print '    Test efficiency =', float(list(compare).count(True))/float(len(compare))
        # TODO: add 1D efficiency graphs vs input variables
    return regressor
开发者ID:jbsauvan,项目名称:L1T-Utilities,代码行数:28,代码来源:quantile_regression.py

示例9: impute

def impute(df,imp_val,headers):
	if np.isnan(imp_val):
		imp_val = -500	
	log("imputing...",1)

	model = GradientBoostingRegressor(loss='ls', learning_rate=0.1, n_estimators=100, subsample=1.0, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, init=None, random_state=None, max_features=None, alpha=0.9, verbose=0, max_leaf_nodes=None, warm_start=False, presort='auto')
	data = np.array(df[headers].get_values())
	data[np.isnan(data)] = -500

	for col in range(0,len(headers)):
		#print "Working on column: "+str(col)
		##for the current column, remove rows where the current (row,column) value is not equal to zero
		##this way we are only training on data with non-zero target values
		reduced_data = data[np.logical_not(data[:,col] == imp_val)] #remove row if row,col_num value is zero
		target_set = reduced_data[:,col]
		training_set = np.delete(reduced_data,col,1)
		model.fit(training_set,target_set)
		row_num=0
		for row in data:
			remaining = np.delete(row,col,0)
			if data[row_num,col] == imp_val:
				data[row_num,col] = model.predict(remaining)
			row_num+=1
	cntr=0
	for h in headers:
		df[h] = data[:,cntr];cntr+=1
	return df
开发者ID:reventropy,项目名称:maxquant_differential_analysis,代码行数:27,代码来源:mq_diff_1.7.py

示例10: modelTheData

def modelTheData(data,target):

#    params = {'n_estimators': 400, 'max_depth': 4, 'min_samples_split': 2,
#          'subsample': 0.5,'min_samples_leaf': 2,
#          'learning_rate': 0.01, 'loss': 'ls'}


#beijing
    myMachine = GradientBoostingRegressor(alpha=0.9, init=None, learn_rate=None,
             learning_rate=0.05, loss='ls', max_depth=1, max_features=None,
             min_samples_leaf=2, min_samples_split=2, n_estimators=300,
             random_state=None, subsample=0.5, verbose=0)

#shanghai
#    myMachine = GradientBoostingRegressor(alpha=0.9, init=None, learn_rate=None,
#             learning_rate=0.05, loss='ls', max_depth=3, max_features=None,
#             min_samples_leaf=2, min_samples_split=2, n_estimators=500,
#             random_state=None, subsample=0.5, verbose=0)





#    myMachine = GradientBoostingRegressor(**params)
    myMachine.fit(data,target)

    return myMachine
开发者ID:wybert,项目名称:PMpredict,代码行数:27,代码来源:modelSHData.py

示例11: build_models

    def build_models(self):

        self.remove_columns(
            [
                "institute_latitude",
                "institute_longitude",
                "institute_state",
                "institute_country",
                "var10",
                "var11",
                "var12",
                "var13",
                "var14",
                "var15",
                "instructor_past_performance",
                "instructor_association_industry_expert",
                "secondary_area",
                "var24",
            ]
        )

        model1 = GradientBoostingRegressor(learning_rate=0.1, n_estimators=200, subsample=0.8)
        model2 = RandomForestRegressor(n_estimators=50)
        model3 = ExtraTreesRegressor(n_estimators=50)

        model1.fit(self.X, self.y)
        model2.fit(self.X, self.y)
        model3.fit(self.X, self.y)

        return [model1, model2, model3]
开发者ID:numb3r33,项目名称:predict-grants,代码行数:30,代码来源:model.py

示例12: fit

    def fit(self,data_train,target):
        self.target_train = target
        self.catcol = data_train.filter(like='var').columns.tolist()
        #start_gbr_tr = time.clock()
        self.gbr = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
        self.gbr.fit(data_train,self.target_train)
        self.transformed_train_gbr = self.gbr.transform(data_train,threshold="0.35*mean")
        self.gbr_tr_fit = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
        self.gbr_tr_fit.fit(self.transformed_train_gbr,self.target_train)
        #end_gbr_tr = time.clock()
        #print >> log, "time_gbr_tr = ", end_gbr_tr-start_gbr_tr

        #start_xfr_tr = time.clock()
        self.xfr= ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
        self.xfr.fit(data_train,self.target_train)
        self.transformed_train_xfr = self.xfr.transform(data_train,threshold="0.35*mean")
        self.xfr_tr_fit = ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
        self.xfr_tr_fit.fit(self.transformed_train_xfr,self.target_train)
        #end_xfr_tr = time.clock()
        #print >> log, "time_xfr_tr = ", end_xfr_tr-start_xfr_tr

        #start_gbr_cat = time.clock()
        self.gbr_cat_fit = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
        self.gbr_cat_fit.fit(data_train[self.catcol],self.target_train)
        #end_gbr_cat = time.clock()
        #print >> log, "time_gbr_cat = ", end_gbr_cat-start_gbr_cat

        #start_xfr_cat = time.clock()
        self.xfr_cat_fit = ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
        self.xfr_cat_fit.fit(data_train[self.catcol],self.target_train)
        #end_xfr_cat = time.clock()
        #print >> log, "time_xfr_cat = ", end_xfr_cat-start_xfr_cat
        return self
开发者ID:kirilligum,项目名称:cdips-fire,代码行数:33,代码来源:cvbari.py

示例13: gbdrtrain

def gbdrtrain(x, y, pre_x):
	x, pre_x = datscater(x, pre_x)
	clf = GradientBoostingRegressor(n_estimators=740, min_samples_leaf = 0.8, min_samples_split = 40, learning_rate=0.1,max_depth=7, random_state=400, loss='huber').fit(x, y)
	# clf = GradientBoostingRegressor(n_estimators=200,max_leaf_nodes =20, learning_rate=0.1,max_depth=6, random_state=400, loss='ls').fit(x, y)

	pred = clf.predict(pre_x)
	return pred
开发者ID:pthaike,项目名称:comp,代码行数:7,代码来源:predict.py

示例14: train

def train(targets, features, model_file, params):
    model = GradientBoostingRegressor(**params)
    print "Training hard..."
    model.fit(features, targets)
    print "Saving model..."
    pickle.dump(model, open(model_file, 'wb'))
    return model
开发者ID:DenXX,项目名称:irlab,代码行数:7,代码来源:train.py

示例15: add_new_weak_learner

    def add_new_weak_learner(self):
        '''
        Summary:
            Adds a new function, h, to self.weak_learners by solving for Eq. 1 using multiple additive regression trees:

            [Eq. 1] h = argmin_h (sum_i Q_A(s_i,a_i) + h(s_i, a_i) - (r_i + max_b Q_A(s'_i, b)))

        '''
        if len(self.most_recent_episode) == 0:
            # If this episode contains no data, don't do anything.
            return

        # Build up data sets of features and loss terms
        data = np.zeros((len(self.most_recent_episode), self.max_state_features + 1))
        total_loss = np.zeros(len(self.most_recent_episode))

        for i, experience in enumerate(self.most_recent_episode):
            # Grab the experience.
            s, a, r, s_prime = experience

            # Pad in case the state features are too short (as in Atari sometimes).
            features = self._pad_features_with_zeros(s, a)
            loss = (r + self.gamma * self.get_max_q_value(s_prime) - self.get_q_value(s, a))
            
            # Add to relevant lists.
            data[i] = features
            total_loss[i] = loss

        # Compute new regressor and add it to the weak learners.
        estimator = GradientBoostingRegressor(loss='ls', n_estimators=1, max_depth=self.max_depth)
        estimator.fit(data, total_loss)
        self.weak_learners.append(estimator)
开发者ID:david-abel,项目名称:simple_rl,代码行数:32,代码来源:GradientBoostingAgentClass.py


注:本文中的sklearn.ensemble.GradientBoostingRegressor类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。