本文整理汇总了Python中sklearn.ensemble.BaggingRegressor类的典型用法代码示例。如果您正苦于以下问题:Python BaggingRegressor类的具体用法?Python BaggingRegressor怎么用?Python BaggingRegressor使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了BaggingRegressor类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: model_fit_rf_bagging
def model_fit_rf_bagging():
def in_limits(x):
if x<1: return 1
if x>3: return 3
return x
print "STARTING MODEL"
X = full_data[['count_words','count_digits','match_d_title','match_d_description','match_w_title','match_w_description','match_d_attribute','match_w_attribute']].values
y = full_data['relevance'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
rf = RandomForestRegressor(n_estimators=15, max_depth=6, random_state=0)
clf = BaggingRegressor(rf, n_estimators=45, max_samples=0.1, random_state=25)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
in_limits = np.vectorize(in_limits,otypes=[np.float])
y_pred = in_limits(y_pred)
RMSE = mean_squared_error(y_test, y_pred)**0.5
print "RMSE: ",RMSE
# for the submission
real_X_test = real_full_test[['count_words','count_digits','match_d_title','match_d_description','match_w_title','match_w_description','match_d_attribute','match_w_attribute']].values
test_pred = clf.predict(real_X_test)
test_pred = in_limits(test_pred)
return test_pred
示例2: train_model
def train_model(train, test, labels):
rf = RandomForestRegressor(n_estimators=15, max_depth=6, random_state=10)
#rf = RandomForestRegressor(n_estimators=45, max_depth=9, random_state=10)
clf = BaggingRegressor(rf, n_estimators=45, max_samples=0.2, random_state=25)
clf.fit(train, labels)
#clf = SVR(C=1.0, epsilon=0.2)
#clf.fit(train, labels)
#clf = GaussianNB()
#clf.fit(train, labels)
print "Good!"
predictions = clf.predict(test)
print predictions.shape
predictions = pd.DataFrame(predictions, columns = ['relevance'])
print "Good again!"
print "Predictions head -------"
print predictions.head()
print predictions.shape
print "TEST head -------"
print test.head()
print test.shape
#test['id'].to_csv("TEST_TEST.csv",index=False)
#predictions.to_csv("PREDICTIONS.csv",index=False)
#test = test.reset_index()
#predictions = predictions.reset_index()
#test = test.groupby(level=0).first()
#predictions = predictions.groupby(level=0).first()
predictions = pd.concat([test['id'],predictions], axis=1, verify_integrity=False)
print predictions
return predictions
示例3: train_bagging_xgboost
def train_bagging_xgboost(X, Y):
adaboost = BaggingRegressor(xgb.XGBRegressor(max_depth=6, learning_rate=0.02, n_estimators=300, silent=True,
objective='reg:linear', subsample=0.7, reg_alpha=0.8,
reg_lambda=0.8, booster="gblinear")
, max_features=0.7, n_estimators=30)
adaboost.fit(X, Y)
return adaboost
示例4: test_oob_score_regression
def test_oob_score_regression():
# Check that oob prediction is a good estimation of the generalization
# error.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data,
boston.target,
random_state=rng)
clf = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
n_estimators=50,
bootstrap=True,
oob_score=True,
random_state=rng).fit(X_train, y_train)
test_score = clf.score(X_test, y_test)
assert_less(abs(test_score - clf.oob_score_), 0.1)
# Test with few estimators
assert_warns(UserWarning,
BaggingRegressor(base_estimator=DecisionTreeRegressor(),
n_estimators=1,
bootstrap=True,
oob_score=True,
random_state=rng).fit,
X_train,
y_train)
示例5: test_bootstrap_samples
def test_bootstrap_samples():
"""Test that bootstraping samples generate non-perfect base estimators."""
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data,
boston.target,
random_state=rng)
base_estimator = DecisionTreeRegressor().fit(X_train, y_train)
# without bootstrap, all trees are perfect on the training set
ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
max_samples=1.0,
bootstrap=False,
random_state=rng).fit(X_train, y_train)
assert_equal(base_estimator.score(X_train, y_train),
ensemble.score(X_train, y_train))
# with bootstrap, trees are no longer perfect on the training set
ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
max_samples=1.0,
bootstrap=True,
random_state=rng).fit(X_train, y_train)
assert_greater(base_estimator.score(X_train, y_train),
ensemble.score(X_train, y_train))
示例6: avmPredict
def avmPredict(params):
town = getPlace(params['lat'], params['long'])[0]
x, y, z = getXYZ(params['lat'], params['long'])
r = 1.0
data = []
target = []
header = []
with open('../../../data/working22.csv') as f:
f = csv.reader(f)
header = next(f)
for row in f:
t = (map(float, row[:3] + row[4:]), float(row[3]))
if weightF([x, y, z], t[0][0:3], r):
data.append(t[0])
target.append(t[1])
ensemble = BaggingRegressor()
ensemble.fit(data, target)
test = createTest(params)
return ensemble.predict(test)
示例7: fit
def fit(self):
"""Scale data and train the model with the indicated algorithm.
Do not forget to tune the hyperparameters.
Parameters
----------
algorithm : String,
"KernelRidge", "SVM", "LinearRegression", "Lasso", "ElasticNet", "NeuralNet", "BaggingNeuralNet", default = "SVM"
"""
self.X_scaler.fit(self.X_train)
self.Y_scaler.fit(self.y_train)
# scaling the data in all cases, it may not be used during the fit later
self.X_train_sc = self.X_scaler.transform(self.X_train)
self.y_train_sc = self.Y_scaler.transform(self.y_train)
self.X_test_sc = self.X_scaler.transform(self.X_test)
self.y_test_sc = self.Y_scaler.transform(self.y_test)
if self.algorithm == "KernelRidge":
clf_kr = KernelRidge(kernel=self.user_kernel)
self.model = sklearn.model_selection.GridSearchCV(clf_kr, cv=5, param_grid=self.param_kr)
elif self.algorithm == "SVM":
clf_svm = SVR(kernel=self.user_kernel)
self.model = sklearn.model_selection.GridSearchCV(clf_svm, cv=5, param_grid=self.param_svm)
elif self.algorithm == "Lasso":
clf_lasso = sklearn.linear_model.Lasso(alpha=0.1,random_state=self.rand_state)
self.model = sklearn.model_selection.GridSearchCV(clf_lasso, cv=5,
param_grid=dict(alpha=np.logspace(-5,5,30)))
elif self.algorithm == "ElasticNet":
clf_ElasticNet = sklearn.linear_model.ElasticNet(alpha=0.1, l1_ratio=0.5,random_state=self.rand_state)
self.model = sklearn.model_selection.GridSearchCV(clf_ElasticNet,cv=5,
param_grid=dict(alpha=np.logspace(-5,5,30)))
elif self.algorithm == "LinearRegression":
self.model = sklearn.linear_model.LinearRegression()
elif self.algorithm == "NeuralNet":
self.model = MLPRegressor(**self.param_neurons)
elif self.algorithm == "BaggingNeuralNet":
nn_m = MLPRegressor(**self.param_neurons)
self.model = BaggingRegressor(base_estimator = nn_m, **self.param_bag)
if self.scaling == True:
self.model.fit(self.X_train_sc, self.y_train_sc.reshape(-1,))
predict_train_sc = self.model.predict(self.X_train_sc)
self.prediction_train = self.Y_scaler.inverse_transform(predict_train_sc.reshape(-1,1))
predict_test_sc = self.model.predict(self.X_test_sc)
self.prediction_test = self.Y_scaler.inverse_transform(predict_test_sc.reshape(-1,1))
else:
self.model.fit(self.X_train, self.y_train.reshape(-1,))
self.prediction_train = self.model.predict(self.X_train)
self.prediction_test = self.model.predict(self.X_test)
示例8: random_forest
def random_forest(X,Y,Xt):
print('learn')
rf = RandomForestRegressor(n_estimators=15, max_depth=6, random_state=0)
clf = BaggingRegressor(rf, n_estimators=45, max_samples=0.1, random_state=25)
clf.fit(X, Y)
print('predict')
Yp_clamped = clf.predict(Xt)
return Yp_clamped
示例9: test_sparse_regression
def test_sparse_regression():
# Check regression for various parameter settings on sparse input.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
boston.target[:50],
random_state=rng)
class CustomSVR(SVR):
"""SVC variant that records the nature of the training set"""
def fit(self, X, y):
super().fit(X, y)
self.data_type_ = type(X)
return self
parameter_sets = [
{"max_samples": 0.5,
"max_features": 2,
"bootstrap": True,
"bootstrap_features": True},
{"max_samples": 1.0,
"max_features": 4,
"bootstrap": True,
"bootstrap_features": True},
{"max_features": 2,
"bootstrap": False,
"bootstrap_features": True},
{"max_samples": 0.5,
"bootstrap": True,
"bootstrap_features": False},
]
for sparse_format in [csc_matrix, csr_matrix]:
X_train_sparse = sparse_format(X_train)
X_test_sparse = sparse_format(X_test)
for params in parameter_sets:
# Trained on sparse format
sparse_classifier = BaggingRegressor(
base_estimator=CustomSVR(),
random_state=1,
**params
).fit(X_train_sparse, y_train)
sparse_results = sparse_classifier.predict(X_test_sparse)
# Trained on dense format
dense_results = BaggingRegressor(
base_estimator=CustomSVR(),
random_state=1,
**params
).fit(X_train, y_train).predict(X_test)
sparse_type = type(X_train_sparse)
types = [i.data_type_ for i in sparse_classifier.estimators_]
assert_array_almost_equal(sparse_results, dense_results)
assert all([t == sparse_type for t in types])
assert_array_almost_equal(sparse_results, dense_results)
示例10: procedureA
def procedureA(goldenFlag = False):
# Trains and generates a prediction file
# Uses hard heuristic for buy_or_not
popFlag = True
X, Y = getDataXY(currYearFlag = False, popFlag = popFlag)
X, Y = shuffle(X, Y, random_state = 0)
if popFlag:
encoder = oneHot(X[:, 2:])
Xt = encoder.transform(X[:, 2:])
Xt = np.hstack((X[:,:2], Xt))
else:
encoder = oneHot(X)
Xt = encoder.transform(X)
buySet = set()
for i in range(X.shape[0]):
tmpTup = (X[i][0], X[i][2])
buySet.add(tmpTup)
# Y_buy = [1] * Xt.shape[0]
min_max_scaler = preprocessing.MinMaxScaler()
# Xt = min_max_scaler.fit_transform(Xt)
if goldenFlag:
print Xt.shape
Xt = getGoldenX(Xt, 2, 2 + encoder.feature_indices_[1], 2 + encoder.feature_indices_[0], 2 + min(9, encoder.feature_indices_[1]))
split = 0.9
X_train, X_test = Xt[:(int(Xt.shape[0]*split)),:], Xt[int(Xt.shape[0]*split):, :]
Y_train, Y_test = Y[:(int(Y.shape[0]*split)),:], Y[int(Y.shape[0]*split):, :]
Y_train = Y_train.ravel()
Y_test = Y_test.ravel()
print X_train.shape
print X_test.shape
# clf = Ridge(alpha = 100)
# clf = SVR(C = 10.0, kernel = 'poly', degree = 2)
# clf = LinearSVR(C = 1.0)
clf = BaggingRegressor(DecisionTreeRegressor(), n_estimators = 125, n_jobs = 4, random_state = 0)
# clf = AdaBoostRegressor(DecisionTreeRegressor(), n_estimators = 100)
# clf = DecisionTreeRegressor()
# clf = RandomForestRegressor(random_state = 0, n_estimators = 200, n_jobs = 4)
clf.fit(X_train, Y_train.ravel())
Y_pred = clf.predict(X_test)
evaluatePred(Y_pred, Y_test)
return clf, encoder, min_max_scaler
示例11: __init__
def __init__(self):
# self.clf = GradientBoostingRegressor(n_estimators=200, max_features="sqrt", max_depth=5)
# self.clf = LinearRegression()
self.clf = BaggingRegressor(LinearRegression())
# self.clf = GaussianProcess(theta0=4)
# self.sp = RandomizedLasso()
self.sp = SparseRandomProjection(n_components=5)
示例12: test_single_estimator
def test_single_estimator():
# Check singleton ensembles.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data,
boston.target,
random_state=rng)
clf1 = BaggingRegressor(base_estimator=KNeighborsRegressor(),
n_estimators=1,
bootstrap=False,
bootstrap_features=False,
random_state=rng).fit(X_train, y_train)
clf2 = KNeighborsRegressor().fit(X_train, y_train)
assert_array_almost_equal(clf1.predict(X_test), clf2.predict(X_test))
示例13: train_model
def train_model(training, testing, window=5, n=5):
X_train, y_train = prepare_data(training)
X_test, y_test = prepare_data(testing)
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
predrf = rf.predict(X_test)
print "mse for random forest regressor: ", mean_squared_error(predrf, y_test)
gb = GradientBoostingRegressor(n_estimators=100, learning_rate=0.025)
gb.fit(X_train, y_train)
predgb = gb.predict(X_test)
print "mse for gradient boosting regressor: ", mean_squared_error(predgb, y_test)
## plot feature importance using GBR results
fx_imp = pd.Series(gb.feature_importances_, index=['bb', 'momentum', 'sma', 'volatility'])
fx_imp /= fx_imp.max() # normalize
fx_imp.sort()
ax = fx_imp.plot(kind='barh')
fig = ax.get_figure()
fig.savefig("output/feature_importance.png")
adb = AdaBoostRegressor(DecisionTreeRegressor())
adb.fit(X_train, y_train)
predadb = adb.predict(X_test)
print "mse for adaboosting decision tree regressor: ", mean_squared_error(predadb, y_test)
scale = StandardScaler()
scale.fit(X_train)
X_trainscale = scale.transform(X_train)
X_testscale = scale.transform(X_test)
knn = BaggingRegressor(KNeighborsRegressor(n_neighbors=10), max_samples=0.5, max_features=0.5)
knn.fit(X_trainscale, y_train)
predknn = knn.predict(X_testscale)
print "mse for bagging knn regressor: ", mean_squared_error(predknn, y_test)
pred_test = 0.1*predrf+0.2*predgb+0.1*predadb+0.6*predknn
print "mse for ensemble all the regressors: ", mean_squared_error(pred_test, y_test)
result = testing.copy()
result.ix[5:-5, 'trend'] = pred_test
result.ix[10:, 'pred'] = pred_test * result.ix[5:-5, 'IBM'].values
result.ix[:-5, 'pred_date'] = result.index[5:]
return result
示例14: procc_modelfusion
def procc_modelfusion(df_test, data_test):
from sklearn.ensemble import BaggingRegressor
from sklearn import linear_model
train_df = df.filter(regex='Survived|Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass.*|Mother|Child|Family|Title')
train_np = train_df.as_matrix()
# y即Survival结果
y = train_np[:, 0]
# X即特征属性值
X = train_np[:, 1:]
# fit到BaggingRegressor之中
clf = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6)
bagging_clf = BaggingRegressor(clf, n_estimators=10, max_samples=0.8, max_features=1.0, bootstrap=True, bootstrap_features=False, n_jobs=-1)
bagging_clf.fit(X, y)
test = df_test.filter(regex='Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass.*|Mother|Child|Family|Title')
predictions = bagging_clf.predict(test)
result = pd.DataFrame({'PassengerId' : data_test['PassengerId'].as_matrix(), 'Survived':predictions.astype(np.int32)})
result.to_csv("logistic_regression_predictions3.csv", index=False)
示例15: Regressor
class Regressor(BaseEstimator):
def __init__(self):
# self.clf = GradientBoostingRegressor(n_estimators=200, max_features="sqrt", max_depth=5)
# self.clf = LinearRegression()
self.clf = BaggingRegressor(LinearRegression())
# self.clf = GaussianProcess(theta0=4)
# self.sp = RandomizedLasso()
self.sp = SparseRandomProjection(n_components=5)
# self.sp = TruncatedSVD()
# self.sp = KernelPCA(n_components=3, tol=0.0001, kernel="poly")
# self.clf = ExtraTreesRegressor(n_estimators=200, max_features="sqrt", max_depth=5)
def fit(self, X, y):
# print(self.sp)
# Xr = self.sp.fit_transform(X, y)
self.clf.fit(X, y.ravel())
def predict(self, X):
# Xr = self.sp.transform(X)
return self.clf.predict(X)