当前位置: 首页>>代码示例>>Python>>正文


Python GradientBoostingRegressor.fit方法代码示例

本文整理汇总了Python中sklearn.ensemble.GradientBoostingRegressor.fit方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingRegressor.fit方法的具体用法?Python GradientBoostingRegressor.fit怎么用?Python GradientBoostingRegressor.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.GradientBoostingRegressor的用法示例。


在下文中一共展示了GradientBoostingRegressor.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: cross_val_cols

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
    def cross_val_cols(self, n_folds = 3):
        """
        Takes in: number of folds
        
        Prints out RMSE score and stores the results in self.results
        """

        cv = KFold(n = self.X_train.shape[0], n_folds = n_folds)
        gbr = GradientBoostingRegressor(**self.params)
        self.med_error = []
        self.rmse_cv = []
        self.pct_error=[]
        self.results = {'pred': [],
                   'real': []}
        for train, test in cv:
            gbr.fit(self.X_train[train], self.y_train[train])
            dfFeatures+=[unencode(pd.DataFrame(columns=final_cols[:-1], data=self.X_train[test]))]
            pred = gbr.predict(self.X_train[test])
            medError=median_absolute_error(predExp, testExp)
            percentError=np.median([np.fabs(p-t)/t for p,t in zip(predExp, testExp)])
            error = mean_squared_error(np.power(pred, 10), np.power(self.y_train[test], 10))**0.5
            self.inFeatures=(self.X_train[test])
            self.results['pred'] += list(predExp)
            self.results['real'] += list(testExp)
            self.rmse_cv += [error]
            self.med_error+=[medError]
            self.pct_error+=[percentError]
        print 'Abs Median Error:', np.mean(self.med_error)
        print 'Abs Percent Error:', np.mean(self.pct_error)
        print 'Mean RMSE:', np.mean(self.rmse_cv)
        self.valDf=pd.DataFrame.concat(dfFeatures)
        self.valDf= self.valDf.reset_index().drop('index', axis = 1)
        self.valDf['pred']=self.results['pred']
        self.valDf['real']=self.results['real']
        return self.valDf
开发者ID:jbrosamer,项目名称:PonyPricer,代码行数:37,代码来源:model.py

示例2: check_boston

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def check_boston(presort, loss, subsample):
    # Check consistency on dataset boston house prices with least squares
    # and least absolute deviation.
    ones = np.ones(len(boston.target))
    last_y_pred = None
    for sample_weight in None, ones, 2 * ones:
        clf = GradientBoostingRegressor(n_estimators=100,
                                        loss=loss,
                                        max_depth=4,
                                        subsample=subsample,
                                        min_samples_split=2,
                                        random_state=1,
                                        presort=presort)

        assert_raises(ValueError, clf.predict, boston.data)
        clf.fit(boston.data, boston.target,
                sample_weight=sample_weight)
        leaves = clf.apply(boston.data)
        assert_equal(leaves.shape, (506, 100))

        y_pred = clf.predict(boston.data)
        mse = mean_squared_error(boston.target, y_pred)
        assert_less(mse, 6.0)

        if last_y_pred is not None:
            assert_array_almost_equal(last_y_pred, y_pred)

        last_y_pred = y_pred
开发者ID:amueller,项目名称:scikit-learn,代码行数:30,代码来源:test_gradient_boosting.py

示例3: train

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
    def train(self, x, y, param_names, **kwargs):
        start = time.time()
        scaled_x = self._set_and_preprocess(x=x, param_names=param_names)

        # Check that each input is between 0 and 1
        self._check_scaling(scaled_x=scaled_x)

        if self._debug:
            print "Shape of training data: ", scaled_x.shape
            print "Param names: ", self._used_param_names
            print "First training sample\n", scaled_x[0]
            print "Encode: ", self._encode

        # Do a random search
        max_features, learning_rate, max_depth, min_samples_leaf, n_estimators = self._random_search(random_iter=100,
                                                                                                     x=scaled_x, y=y)
        # Now train model
        gb = GradientBoostingRegressor(loss='ls',
                                       learning_rate=learning_rate,
                                       n_estimators=n_estimators,
                                       subsample=1.0,
                                       min_samples_split=2,
                                       min_samples_leaf=min_samples_leaf,
                                       max_depth=max_depth,
                                       init=None,
                                       random_state=self._rng,
                                       max_features=max_features,
                                       alpha=0.9,
                                       verbose=0)
        gb.fit(scaled_x, y)
        self._model = gb

        duration = time.time() - start
        self._training_finished = True
        return duration
开发者ID:KEggensperger,项目名称:SurrogateBenchmarks,代码行数:37,代码来源:GradientBoosting.py

示例4: test_plot_partial_dependence

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def test_plot_partial_dependence():
    # Test partial dependence plot function.
    clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
    clf.fit(boston.data, boston.target)

    grid_resolution = 25
    fig, axs = plot_partial_dependence(clf, boston.data, [0, 1, (0, 1)],
                                       grid_resolution=grid_resolution,
                                       feature_names=boston.feature_names)
    assert len(axs) == 3
    assert all(ax.has_data for ax in axs)

    # check with str features and array feature names
    fig, axs = plot_partial_dependence(clf, boston.data, ['CRIM', 'ZN',
                                                          ('CRIM', 'ZN')],
                                       grid_resolution=grid_resolution,
                                       feature_names=boston.feature_names)

    assert len(axs) == 3
    assert all(ax.has_data for ax in axs)

    # check with list feature_names
    feature_names = boston.feature_names.tolist()
    fig, axs = plot_partial_dependence(clf, boston.data, ['CRIM', 'ZN',
                                                          ('CRIM', 'ZN')],
                                       grid_resolution=grid_resolution,
                                       feature_names=feature_names)
    assert len(axs) == 3
    assert all(ax.has_data for ax in axs)
开发者ID:kevin-coder,项目名称:scikit-learn-fork,代码行数:31,代码来源:test_partial_dependence.py

示例5: test_gradient_boosting_early_stopping

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def test_gradient_boosting_early_stopping():
    X, y = make_classification(n_samples=1000, random_state=0)

    gbc = GradientBoostingClassifier(n_estimators=1000,
                                     n_iter_no_change=10,
                                     learning_rate=0.1, max_depth=3,
                                     random_state=42)

    gbr = GradientBoostingRegressor(n_estimators=1000, n_iter_no_change=10,
                                    learning_rate=0.1, max_depth=3,
                                    random_state=42)

    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        random_state=42)
    # Check if early_stopping works as expected
    for est, tol, early_stop_n_estimators in ((gbc, 1e-1, 24), (gbr, 1e-1, 13),
                                              (gbc, 1e-3, 36),
                                              (gbr, 1e-3, 28)):
        est.set_params(tol=tol)
        est.fit(X_train, y_train)
        assert_equal(est.n_estimators_, early_stop_n_estimators)
        assert est.score(X_test, y_test) > 0.7

    # Without early stopping
    gbc = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1,
                                     max_depth=3, random_state=42)
    gbc.fit(X, y)
    gbr = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1,
                                    max_depth=3, random_state=42)
    gbr.fit(X, y)

    assert gbc.n_estimators_ == 100
    assert gbr.n_estimators_ == 200
开发者ID:amueller,项目名称:scikit-learn,代码行数:35,代码来源:test_gradient_boosting.py

示例6: grid_search

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def grid_search():
    results_list_of_tuples = list()
    num_folds = 3
    best_result = tuple()
    for item1 in gd_grid['learning_rate']:
        for item2 in gd_grid['max_depth']:
            for item3 in gd_grid['min_samples_leaf']:
                for item4 in gd_grid['n_estimators']:
                    for item5 in gd_grid['random_state']:
                        instance =                           'LR {}, max_depth {}, min_samp_leaf {}, n_est {}, rs {}'.format(item1,                                                                                           item2, item3,                                                                                           item4, item5)
                        print instance
                        gbrt = GradientBoostingRegressor(random_state=item5,                                                          n_estimators=item4,                                                          min_samples_leaf=item3,                                                          max_depth=item2,                                                          learning_rate=item1                                                         )
                        kf = KFold(X.shape[0], n_folds=num_folds)
                        mse_list = []
                        for train_index, test_index in kf:
                            X_train, X_test = X[train_index], X[test_index]
                            y_train, y_test = y[train_index], y[test_index]
                            w_train, w_test = weights[train_index], weights[test_index]
                            gbrt.fit(X_train, y_train, w_train)                 
                            y_pred = gbrt.predict(X_test)
                            mse = mean_squared_error(y_test, y_pred, sample_weight=w_test)
                            mse_list.append(mse)

                        kf_mse = np.mean(np.array(mse_list))
                        results_list_of_tuples.append((instance, kf_mse))
                        
    return results_list_of_tuples
开发者ID:jerrystsai,项目名称:project-upwork,代码行数:29,代码来源:gbrt.py

示例7: test_boston

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def test_boston():
    # Check consistency on dataset boston house prices with least squares
    # and least absolute deviation.
    for loss in ("ls", "lad", "huber"):
        for subsample in (1.0, 0.5):
            last_y_pred = None
            for i, sample_weight in enumerate(
                    (None, np.ones(len(boston.target)),
                     2 * np.ones(len(boston.target)))):
                clf = GradientBoostingRegressor(n_estimators=100, loss=loss,
                                                max_depth=4, subsample=subsample,
                                                min_samples_split=1,
                                                random_state=1)

                assert_raises(ValueError, clf.predict, boston.data)
                clf.fit(boston.data, boston.target,
                        sample_weight=sample_weight)
                y_pred = clf.predict(boston.data)
                mse = mean_squared_error(boston.target, y_pred)
                assert mse < 6.0, "Failed with loss %s and " \
                    "mse = %.4f" % (loss, mse)

                if last_y_pred is not None:
                    np.testing.assert_array_almost_equal(
                        last_y_pred, y_pred,
                        err_msg='pred_%d doesnt match last pred_%d for loss %r and subsample %r. '
                        % (i, i - 1, loss, subsample))

                last_y_pred = y_pred
开发者ID:BobChew,项目名称:scikit-learn,代码行数:31,代码来源:test_gradient_boosting.py

示例8: boost2

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def boost2():
    minimum_mse = 1000000000
    min_depth = 0
    X = pd.read_csv('../data/kaggle/kaggle.X1.train.txt', header=None)
    Y = pd.read_csv('../data/kaggle/kaggle.Y.train.txt', header=None)
    Xtest = pd.read_csv('../data/kaggle/kaggle.X1.test.txt', header=None)
    Xtr, Xte, Ytr, Yte = train_test_split(X, Y, test_size=0.25, random_state=42)

    for estimators in range(700, 2000, 100):
        print "For estimators: ", estimators
        for i in range(6, 8):
            print "For max_depth: ", i
            est = GradientBoostingRegressor(n_estimators=estimators, max_depth=i, min_samples_leaf=500, warm_start=True)
            est.fit(Xtr, Ytr)
            Yhat = est.predict(Xte)
            current_mse = mean_squared_error(Yte, Yhat)
            print "For MaxDepth:", i, ", MSE:", current_mse
            if minimum_mse > current_mse:
                minimum_mse = current_mse
                min_depth = i
                min_estimator = estimators
                est2 = GradientBoostingRegressor(n_estimators=min_estimator, max_depth=min_depth, min_samples_leaf=500,
                                                 warm_start=True, verbose=True)
                est2.fit(X, Y)

    print "** minimum_mse: ", minimum_mse
    print "** min_depth: ", min_depth
    print "** min_estimator: ", min_estimator

    pred = est2.predict(Xtest)
    s = pd.Series(pred)
    s.index = s.index + 1
    s.to_csv('pyprediction.csv', header=['Prediction'], index=True, index_label='ID')
开发者ID:Gabeesh,项目名称:CS273a-Introduction-to-Machine-Learning,代码行数:35,代码来源:SKLearnTree.py

示例9: compute_photoz_forest

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def compute_photoz_forest(N_boosts):
    rms_test = np.zeros(len(N_boosts))
    rms_train = np.zeros(len(N_boosts))
    i_best = 0
    z_fit_best = None

    for i, Nb in enumerate(N_boosts):
        try:
            # older versions of scikit-learn
            clf = GradientBoostingRegressor(n_estimators=Nb, learn_rate=0.1,
                                            max_depth=3, random_state=0)
        except TypeError:
            clf = GradientBoostingRegressor(n_estimators=Nb, learning_rate=0.1,
                                            max_depth=3, random_state=0)
        clf.fit(mag_train, z_train)

        z_fit_train = clf.predict(mag_train)
        z_fit = clf.predict(mag_test)
        rms_train[i] = np.mean(np.sqrt((z_fit_train - z_train) ** 2))
        rms_test[i] = np.mean(np.sqrt((z_fit - z_test) ** 2))

        if rms_test[i] <= rms_test[i_best]:
            i_best = i
            z_fit_best = z_fit

    return rms_test, rms_train, i_best, z_fit_best
开发者ID:CKrawczyk,项目名称:astroML,代码行数:28,代码来源:fig_photoz_boosting.py

示例10: build_models

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
    def build_models(self):

        self.remove_columns(
            [
                "institute_latitude",
                "institute_longitude",
                "institute_state",
                "institute_country",
                "var10",
                "var11",
                "var12",
                "var13",
                "var14",
                "var15",
                "instructor_past_performance",
                "instructor_association_industry_expert",
                "secondary_area",
                "var24",
            ]
        )

        model1 = GradientBoostingRegressor(learning_rate=0.1, n_estimators=200, subsample=0.8)
        model2 = RandomForestRegressor(n_estimators=50)
        model3 = ExtraTreesRegressor(n_estimators=50)

        model1.fit(self.X, self.y)
        model2.fit(self.X, self.y)
        model3.fit(self.X, self.y)

        return [model1, model2, model3]
开发者ID:numb3r33,项目名称:predict-grants,代码行数:32,代码来源:model.py

示例11: fit

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def fit(filename, treename, inputsname, targetname, workingpoint=0.9, test=False):
    # Reading inputs and targets
    ninputs = len(inputsname)
    branches = copy.deepcopy(inputsname)
    branches.append(targetname)
    data = root2array(filename, treename=treename, branches=branches)
    data = data.view((np.float64, len(data.dtype.names)))
    # Extract and format inputs and targets from numpy array
    inputs = data[:, range(ninputs)].astype(np.float32)
    targets = data[:, [ninputs]].astype(np.float32).ravel()
    # if test requested, use 60% of events for training and 40% for testing
    inputs_train = inputs
    targets_train = targets
    if test:
        inputs_train, inputs_test, targets_train, targets_test = cross_validation.train_test_split(inputs, targets, test_size=0.4, random_state=0)
    # Define and fit quantile regression (quantile = workingpoint)
    # Default training parameters are used
    regressor = GradientBoostingRegressor(loss='quantile', alpha=workingpoint)
    regressor.fit(inputs_train, targets_train)
    if test:
        # Compare regression prediction with the true value and count the fraction of time it falls below
        # This should give the working point value
        predict_test = regressor.predict(inputs_test)
        compare = np.less(targets_test, predict_test)
        print 'Testing regression with inputs', inputsname, 'and working point', workingpoint
        print '    Test efficiency =', float(list(compare).count(True))/float(len(compare))
        # TODO: add 1D efficiency graphs vs input variables
    return regressor
开发者ID:jbsauvan,项目名称:L1T-Utilities,代码行数:30,代码来源:quantile_regression.py

示例12: modelTheData

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def modelTheData(data,target):

#    params = {'n_estimators': 400, 'max_depth': 4, 'min_samples_split': 2,
#          'subsample': 0.5,'min_samples_leaf': 2,
#          'learning_rate': 0.01, 'loss': 'ls'}


#beijing
    myMachine = GradientBoostingRegressor(alpha=0.9, init=None, learn_rate=None,
             learning_rate=0.05, loss='ls', max_depth=1, max_features=None,
             min_samples_leaf=2, min_samples_split=2, n_estimators=300,
             random_state=None, subsample=0.5, verbose=0)

#shanghai
#    myMachine = GradientBoostingRegressor(alpha=0.9, init=None, learn_rate=None,
#             learning_rate=0.05, loss='ls', max_depth=3, max_features=None,
#             min_samples_leaf=2, min_samples_split=2, n_estimators=500,
#             random_state=None, subsample=0.5, verbose=0)





#    myMachine = GradientBoostingRegressor(**params)
    myMachine.fit(data,target)

    return myMachine
开发者ID:wybert,项目名称:PMpredict,代码行数:29,代码来源:modelSHData.py

示例13: train

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def train(targets, features, model_file, params):
    model = GradientBoostingRegressor(**params)
    print "Training hard..."
    model.fit(features, targets)
    print "Saving model..."
    pickle.dump(model, open(model_file, 'wb'))
    return model
开发者ID:DenXX,项目名称:irlab,代码行数:9,代码来源:train.py

示例14: train_model

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def train_model(features, label, params):
    #Preprocessing
    #scaled_features = preprocessing.scale(features);
    scaled_features  = features;

    total_rmse  = 0.0;
    count       = 0;

    kf          = KFold(len(scaled_features), n_folds=10);

    for train_index, validation_index in kf:

        X_train, X_validation = scaled_features[train_index], scaled_features[validation_index];
        Y_train, Y_validation = label[train_index], label[validation_index];

        #estimator               = SVR(**params)
        #estimator               = RandomForestRegressor(**params)
        estimator                = GradientBoostingRegressor(**params)

        estimator.fit(X_train, Y_train);

        current_rmse             = calculate_RMSE(estimator, X_validation, Y_validation);

        total_rmse              += current_rmse;
        count                   += 1;

    #Average across all samples
    avg_current_rmse   = total_rmse / float(count);
    print("Avg Current RMSE " + str(avg_current_rmse));

    return  (params, avg_current_rmse);
开发者ID:Amortized,项目名称:Restaurant-Revenue-Predictor,代码行数:33,代码来源:process.py

示例15: gradient_boosting_regressor

# 需要导入模块: from sklearn.ensemble import GradientBoostingRegressor [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingRegressor import fit [as 别名]
def gradient_boosting_regressor(train_x, train_y, pred_x, review_id, v_curve=False, l_curve=False, get_model=True):
    """
    :param train_x: train
    :param train_y: text
    :param pred_x: test set to predict
    :param review_id: takes in a review id
    :param v_curve: run the model for validation curve
    :param l_curve: run the model for learning curve
    :param get_model: run the model
    :return:the predicted values,learning curve, validation curve
    """
    gbr = GradientBoostingRegressor(n_estimators=200, max_depth=7, random_state=7)
    if get_model:
        print "Fitting GBR..."
        gbr.fit(train_x, np.log(train_y+1))
        gbr_pred = np.exp(gbr.predict(pred_x))- 1
        #dealing with
        for i in range(len(gbr_pred)):
            if gbr_pred[i] < 0:
                gbr_pred[i] = 0
        Votes = gbr_pred[:, np.newaxis]
        Id = np.array(review_id)[:, np.newaxis]
        submission_gbr = np.concatenate((Id,Votes),axis=1)
        np.savetxt("submission_gbr.csv", submission_gbr,header="Id,Votes", delimiter=',',fmt="%s, %0.2f", comments='')
    # plot validation and learning curves
    if v_curve:
        print "Working on Validation Curves"
        plot_validation_curve(GradientBoostingRegressor(), "Validation Curve: GBR", train_x, np.log(train_y+1.0),
                              param_name="n_estimators", param_range=[5, 20, 60, 100, 150, 200])
    if l_curve:
        print "Working on Learning Curves"
        plot_learning_curve(GradientBoostingRegressor(), "Learning Curve: GBR", train_x, np.log(train_y+1.0))
开发者ID:rachanbassi,项目名称:yelp_kaggle_project,代码行数:34,代码来源:algorithms.py


注:本文中的sklearn.ensemble.GradientBoostingRegressor.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。