本文整理汇总了Python中sklearn.ensemble.AdaBoostRegressor类的典型用法代码示例。如果您正苦于以下问题:Python AdaBoostRegressor类的具体用法?Python AdaBoostRegressor怎么用?Python AdaBoostRegressor使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了AdaBoostRegressor类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: fit
def fit(self, start_date, end_date):
for ticker in self.tickers:
self.stocks[ticker] = Stock(ticker)
params_ada = [{
'n_estimators': [25, 50, 100],
'learning_rate': [0.01, 0.1, 1, 10],
'loss': ['linear', 'square', 'exponential']
}]
params = ParameterGrid(params_ada)
# Find the split for training and CV
mid_date = train_test_split(start_date, end_date)
for ticker, stock in self.stocks.items():
X_train, y_train = stock.get_data(start_date, mid_date, fit=True)
# X_train = self.pca.fit_transform(X_train.values)
X_train = X_train.values
# pdb.set_trace()
X_cv, y_cv = stock.get_data(mid_date, end_date)
# X_cv = self.pca.transform(X_cv.values)
X_cv = X_cv.values
lowest_mse = np.inf
for i, param in enumerate(params):
ada = AdaBoostRegressor(**param)
ada.fit(X_train, y_train.values)
mse = mean_squared_error(
y_cv, ada.predict(X_cv))
if mse <= lowest_mse:
self.models[ticker] = ada
return self
示例2: round2
def round2(X_df, featurelist):
# Set parameters
model = AdaBoostRegressor()
y_df = X_df['target']
n = len(y_df)
# Perform 5-fold cross validation
scores = []
kf = KFold(n, n_folds=5, shuffle=True)
# Calculate mean absolute deviation for train/test for each fold
for train_idx, test_idx in kf:
X_train, X_test = X_df.iloc[train_idx, :], X_df.iloc[test_idx, :]
# y_train, y_test = y_df[train_idx], y_df[test_idx]
X_train, X_test = applyFeatures(X_train, X_test, featurelist)
Xtrain_array, ytrain_array, Xtest_array, ytest_array = dfToArray(X_train, X_test)
model.fit(Xtrain_array, ytrain_array)
prediction = model.predict(Xtest_array)
rmse = np.sqrt(mean_squared_error(ytest_array, prediction))
scores.append(rmse)
print rmse
print "Finish fold"
return scores
示例3: train_learning_model_decision_tree_ada_boost
def train_learning_model_decision_tree_ada_boost(df):
#code taken from sklearn
X_all, y_all = preprocess_data(df)
X_train, X_test, y_train, y_test = split_data(X_all, y_all)
tree_regressor = DecisionTreeRegressor(max_depth = 6)
ada_regressor = AdaBoostRegressor(DecisionTreeRegressor(max_depth=6), n_estimators = 500, learning_rate = 0.01, random_state = 1)
tree_regressor.fit(X_train, y_train)
ada_regressor.fit(X_train, y_train)
y_pred_tree = tree_regressor.predict(X_test)
y_pred_ada = ada_regressor.predict(X_test)
mse_tree = mean_squared_error(y_test, y_pred_tree)
mse_ada = mean_squared_error(y_test, y_pred_ada)
mse_tree_train = mean_squared_error(y_train, tree_regressor.predict(X_train))
mse_ada_train = mean_squared_error(y_train, ada_regressor.predict(X_train))
print ("MSE tree: %.4f " %mse_tree)
print ("MSE ada: %.4f " %mse_ada)
print ("MSE tree train: %.4f " %mse_tree_train)
print ("MSE ada train: %.4f " %mse_ada_train)
示例4: predict
def predict(tour_data):
vec = DictVectorizer()
tour_data = get_tour_data()
transformed = vec.fit_transform(tour_data).toarray()
categories = vec.get_feature_names()
y = transformed[:,[categories.index('rating')]]
X = transformed[:,np.arange(transformed.shape[1])!=categories.index('rating')]
reg_tree = DecisionTreeRegressor()
addboost_tree = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4),
n_estimators=300, random_state=rng)
red_tree.fit(X,y)
addboost_tree(X,y)
# Predict
y_1 = red_tree.predict(X)
y_2 = addboost_tree.predict(X)
return prediction
示例5: backTest
def backTest(trainEndDate, code, testDate, predictDate):
conn = db.get_history_data_db('D')
df = None
# train more date
# model = pickle.load(open('%s/%s.pkl' % (config.model_dir, code), 'r'))
rng = np.random.RandomState(1)
model = AdaBoostRegressor(DecisionTreeRegressor(
max_depth=4), n_estimators=1000, random_state=rng, loss='square')
df = pd.read_sql_query(
"select * from history_data where date([date])<='%s' and code='%s' order by code, date([date]) asc" % (
trainEndDate, code), conn)
shift_1 = df['close'].shift(-2)
df['target'] = shift_1
data = df[df['target'] > -1000]
X_train = data.ix[:, 'code':'turnover']
y_train = data.ix[:, 'target']
if len(X_train) < 500:
return
print len(X_train)
# print data
# for i in range(0, 10):
# model.fit(X_train, y_train)
model.fit(X_train, y_train)
# predict tomorrow
try:
df = pd.read_sql_query(config.sql_history_data_by_code_date % (code, testDate), conn)
# print df
except Exception, e:
print e
示例6: main
def main():
ab = AdaBoostRegressor(base_estimator=None, n_estimators=50,
learning_rate=1.0, loss='exponential',
random_state=None)
ab.fit(X_train, y_train)
#Evaluation in train set
#Evaluation in train set
pred_proba_train = ab.predict(X_train)
mse_train = mean_squared_error(y_train, pred_proba_train)
rmse_train = np.sqrt(mse_train)
logloss_train = log_loss(y_train, pred_proba_train)
#Evaluation in validation set
pred_proba_val = ab.predict(X_val)
mse_val = mean_squared_error(y_val, pred_proba_val)
rmse_val = np.sqrt(mse_val)
logloss_val = log_loss(y_val, pred_proba_val)
rmse_train
rmse_val
logloss_train
logloss_val
示例7: ada_boost_regressor
def ada_boost_regressor(train_x, train_y, pred_x, review_id, v_curve=False, l_curve=False, get_model=True):
"""
:param train_x: train
:param train_y: text
:param pred_x: test set to predict
:param review_id: takes in a review id
:param v_curve: run the model for validation curve
:param l_curve: run the model for learning curve
:param get_model: run the model
:return: the predicted values,learning curve, validation curve
"""
ada = AdaBoostRegressor(n_estimators=5)
if get_model:
print "Fitting Ada..."
ada.fit(train_x, np.log(train_y+1))
ada_pred = np.exp(ada.predict(pred_x))-1
Votes = ada_pred[:,np.newaxis]
Id = np.array(review_id)[:,np.newaxis]
# create submission csv for Kaggle
submission_ada= np.concatenate((Id,Votes),axis=1)
np.savetxt("submission_ada.csv", submission_ada,header="Id,Votes", delimiter=',',fmt="%s, %0.2f", comments='')
# plot validation and learning curves
if l_curve:
print "Working on Learning Curves"
plot_learning_curve(AdaBoostRegressor(), "Learning curve: Adaboost", train_x, np.log(train_y+1.0))
if v_curve:
print "Working on Validation Curves"
plot_validation_curve(AdaBoostRegressor(), "Validation Curve: Adaboost", train_x, np.log(train_y+1.0),
param_name="n_estimators", param_range=[2, 5, 10, 15, 20, 25, 30])
示例8: Round2
def Round2(X, y):
# Set parameters
min_score = {}
for loss in ['linear', 'square', 'exponential']:
model = AdaBoostRegressor(loss=loss)
n = len(y)
# Perform 5-fold cross validation
scores = []
kf = KFold(n, n_folds=5, shuffle=True)
# Calculate mean absolute deviation for train/test for each fold
for train_idx, test_idx in kf:
X_train, X_test = X[train_idx], X[test_idx]
y_train, y_test = y[train_idx], y[test_idx]
model.fit(X_train, y_train)
prediction = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, prediction))
# score = model.score(X_test, y_test)
scores.append(rmse)
if len(min_score) == 0:
min_score['loss'] = loss
min_score['scores'] = scores
else:
if np.mean(scores) < np.mean(min_score['scores']):
min_score['loss'] = loss
min_score['scores'] = scores
print "Loss:", loss
print scores
print np.mean(scores)
return min_score
示例9: predict_volatility_1year_ahead
def predict_volatility_1year_ahead(rows, day, num_days):
"""
SUMMARY: Predict volatility 1 year into the future
ALGORITHM:
a) The predictor will train on all data up to exactly 1 year (252 trading days) before `day`
b) The newest 10 days up to and including `day` will be used as the feature vector for the prediction
i.e. if day = 0, then the feature vector for prediction will consist of days (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
if day = 10, then the feature vector for predictor input will be days (10, 11, 12, 13, 14, 15, 16, 17, 19)
INPUT: minimum of (1 year + 10 days) of data before `day` (newest data is day=0)
"""
'''enforce that `day` is in the required range'''
assert len(rows) >= 252+num_days + day, 'You need to have AT LEAST 252+%d rows AFTER the day index. See predict_volatility_1year_ahead() for details.' % num_days
assert day >= 0
'''Compile features for fitting'''
feature_sets = []
value_sets = [];
for ii in range(day+num_days+252, len(rows) - num_days):
features = []
for jj in range(num_days):
day_index = ii + jj
features += [
float(rows[day_index][7]),
float(rows[day_index][8]),
float(rows[day_index][9]),
float(rows[day_index][10]),
float(rows[day_index][11]),
float(rows[day_index][12]),
float(rows[day_index][13]),
]
#print("issue here: " + str(rows[day_index][0]))
feature_sets += [features]
value_sets += [float(rows[ii-252][9])]
'''Create Regressor and fit'''
num_features = 16
rng = np.random.RandomState(1)
regr = AdaBoostRegressor(CustomClassifier(), n_estimators=3, random_state=rng)
regr.fit(feature_sets, value_sets)
'''Get prediction features'''
ii = day
features = []
for jj in range( num_days ):
day_index = ii + jj
features += [
float(rows[day_index][7]),
float(rows[day_index][8]),
float(rows[day_index][9]),
float(rows[day_index][10]),
float(rows[day_index][11]),
float(rows[day_index][12]),
float(rows[day_index][13]),
]
return float(regr.predict([features]))
示例10: ada_learning
def ada_learning(labels, train, test):
label_log=np.log1p(labels)
# try 50 / 1.0
#boost GradientBoostingRegressor(n_estimators=200, max_depth=8, learning_rate=0.1)
clf=AdaBoostRegressor(GradientBoostingRegressor(n_estimators=200, max_depth=8, learning_rate=0.1),n_estimators=50, learning_rate=1.0)
model=clf.fit(train, label_log)
preds1=model.predict(test)
preds=np.expm1(preds1)
return preds
示例11: Regressor
class Regressor(BaseEstimator):
def __init__(self):
self.clf = AdaBoostRegressor(RandomForestRegressor(n_estimators=500, max_depth=78, max_features=10), n_estimators=40)
def fit(self, X, y):
self.clf.fit(X, y)
def predict(self, X):
return self.clf.predict(X)
示例12: train_predict
def train_predict(train_id, test_id):
# load libsvm files for training dataset
Xs_train = []
ys_train = []
n_train = load_libsvm_files(train_id, Xs_train, ys_train)
# load libsvm files for testing dataset
Xs_test = []
ys_test = []
n_test = load_libsvm_files(test_id, Xs_test, ys_test)
# models
model = []
# ans
ans_train = []
ans_test = []
# generate predictions for training dataset
ps_train = []
for i in range(0, n_train):
ps_train.append([0.0 for j in range(10)])
# generate predictions for testing dataset
ps_test = []
for i in range(0, n_test):
ps_test.append([0.0 for j in range(10)])
# fit models
for i in range(10):
l = np.array([ys_train[j][i] for j in range(n_train)])
clf = AdaBoostRegressor(DecisionTreeRegressor(max_depth=params['max_depth']), n_estimators=params['n_estimators'], learning_rate=params['learning_rate'])
clf.fit(Xs_train[i].toarray(), l)
print "[%s] [INFO] %d model training done" % (t_now(), i)
preds_train = clf.staged_predict(Xs_train[i].toarray())
ans_train.append([item for item in preds_train])
# print "len(ans_train[%d]) = %d" % (i, len(ans_train[i]))
print "[%s] [INFO] %d model predict for training data set done" % (t_now(), i)
preds_test = clf.staged_predict(Xs_test[i].toarray())
ans_test.append([item for item in preds_test])
print "[%s] [INFO] %d model predict for testing data set done" % (t_now(), i)
#print "len_ans_train=%d" % len(ans_train[0])
# predict for testing data set
for i in range(params['n_estimators']):
for j in range(10):
tmp = min(i, len(ans_train[j]) - 1)
for k in range(n_train):
ps_train[k][j] = ans_train[j][tmp][k]
tmp = min(i, len(ans_test[j]) - 1)
for k in range(n_test):
ps_test[k][j] = ans_test[j][tmp][k]
print "%s,%d,%f,%f" % (t_now(), i + 1, mean_cos_similarity(ys_train, ps_train, n_train), mean_cos_similarity(ys_test, ps_test, n_test))
return 0
示例13: Regressor
class Regressor(BaseEstimator):
def __init__(self):
cl = RandomForestRegressor(n_estimators=10, max_depth=10, max_features=10)
self.clf = AdaBoostRegressor(base_estimator = cl, n_estimators=100)
def fit(self, X, y):
self.clf.fit(X, y)
def predict(self, X):
return self.clf.predict(X)
#RandomForestClassifier
示例14: ada_boost
def ada_boost(data,classifier,sample):
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.cluster import KMeans
from sklearn.naive_bayes import GaussianNB
func = GaussianNB()
func = DecisionTreeRegressor()
func = KMeans(n_clusters=2)
clf = AdaBoostRegressor(func,n_estimators=300,random_state=random.RandomState(1))
clf.fit(data,classifier)
print_result(clf,[sample])
示例15: AdaBoost
def AdaBoost(xTrain, yTrain, xTest, yTest, treeNum):
rms = dict()
for trees in treeNum:
ab = AdaBoostRegressor(n_estimators = trees)
ab.fit(xTrain, yTrain)
yPred = ab.predict(xTest)
rms[trees] = sqrt(mean_squared_error(yTest, yPred))
(bestRegressor, rmse) = sorted(rms.iteritems(), key = operator.itemgetter(1))[0]
return bestRegressor, rmse