本文整理汇总了Python中sklearn.ensemble.RandomForestRegressor.set_params方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestRegressor.set_params方法的具体用法?Python RandomForestRegressor.set_params怎么用?Python RandomForestRegressor.set_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.RandomForestRegressor
的用法示例。
在下文中一共展示了RandomForestRegressor.set_params方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _create_random_forest
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import set_params [as 别名]
def _create_random_forest(self, current_param={}):
combined_param = dict(self.params, **current_param)
clf = RandomForestRegressor()
clf.set_params(**combined_param)
clf = clf.fit(self.Xtr, self.Ytr)
return clf
示例2: model_rf_cv
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import set_params [as 别名]
def model_rf_cv(train , test):
train_x , train_y = train[0] , train[1]
cv = cross_validation.KFold(len(train_x) , n_folds = 5)
results = []
rf = RandomForestRegressor()
rf.set_params(**Params.rf_reg_params)
for traincv , testcv in cv:
print traincv , testcv
probas = rf.fit(train_x[traincv] , train_y['gap'][traincv].values).predict(train_x[testcv])
results.append(Util.score(train_y.loc[testcv , y_fea_names].values , probas))
print results
print np.mean(results)
示例3: model_rf
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import set_params [as 别名]
def model_rf(train , test , flag):
train_x , train_y = train[0] , train[1]
test_x , test_y = test[0] , test[1]
rf = RandomForestRegressor()
rf.set_params(**Params.rf_reg_params)
print "start training"
rf.fit(train_x , train_y['gap'].values)
if flag == 'online':
prd = rf.predict(test_x)
prd = postprocess(train , test_y.values , prd)
Util.submit(test_y.values , prd)
elif flag == 'offline':
prd = rf.predict(test_x)
prd = postprocess(train , test_y.values , prd)
print 'test : ' , Util.score(test_y.values , prd)
prd = rf.predict(train_x)
print 'train : ' , Util.score(train_y.values , prd)
示例4: model_rf
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import set_params [as 别名]
def model_rf(train , test , flag):
train_x , train_y = train[0] , train[1]
test_x , test_y = test[0] , test[1]
if os.path.exists(configs['rf_model']):
print "model exists"
rf = joblib.load(configs['rf_model'])
else:
rf = RandomForestRegressor()
rf.set_params(**Params.rf_reg_params)
print "start training"
rf.fit(train_x , train_y[3].values)
joblib.dump(rf , configs['rf_model'] , compress=3)
if flag == 'online':
prd = rf.predict(test_x)
#prd = postprocess(train , test_y.values , prd)
Util.submit(test_y.values , prd)
elif flag == 'offline':
prd = rf.predict(test_x)
#prd = postprocess(train , test_y.values , prd)
print 'test : ', Util.score2(test_y.values , prd)
示例5: __init__
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import set_params [as 别名]
class Model:
def __init__(self, obj, kfold=3):
self.kfold = kfold
self.obj = obj
self.best_params_ = [[0 for i in range(featureset_n)] for j in range(model_n)]
self.predicted = [[0 for i in range(featureset_n)] for j in range(model_n)]
self.clf = [[0 for i in range(featureset_n)] for j in range(model_n)]
self.test_predicted = [[0 for i in range(featureset_n)] for j in range(model_n)]
self.parameters = [0 for i in range(model_n)]
self.featureset_master = dict({0:"raw", 1:"tfidf", 2:"pca", 3:"infra_holiday"})
self.clf_master = dict({0:"RF", 1:"KNN", 2:"Elastic", 3:"GBR", 4:"ETR"})
def set_feature(self):
infra_cols = utility.cols_extractcols(data_train, ["infra"],["PCA", "tfidf"])
weather_cols = utility.cols_extractcols(data_train, ["wea"])
tfidf_infra_cols = utility.cols_extractcols(data_train, ["tfidf", "infra"])
tfidf_snslocation_cols = utility.cols_extractcols(data_train, ["tfidf", "snslocation"])
pca_infra_cols = utility.cols_extractcols(data_train, ["PCA", "infra"])
pca_clim_cols = utility.cols_extractcols(data_train, ["PCA", "clim"])
pca_snsraw_cols = utility.cols_extractcols(data_train, ["PCA", "snsraw"])
loc_cols = utility.cols_extractcols(data_train, ["loc_"])
jpy_cols = utility.cols_extractcols(data_train, ["JPY"])
season_cols = utility.cols_extractcols(data_train,["season"])
snslocation_cols = utility.cols_extractcols(data_train, ["snslocation"], ["tfidf", "PCA"])
clim_cols = utility.cols_extractcols(data_train, ["clim"], ["tfidf", "PCA"])
snsraw_cols = utility.cols_extractcols(data_train, ["twitter"], ["PCA"])
holiday_cols = ["is_holiday", "is_weekend", "is_dayoff", "is_dayoff_mean"]
holiday_cols_abroad = ["is_holiday_abroad", "is_weekend", "is_dayoff_abroad", "is_dayoff_mean_abroad"]
if self.obj == "total":
self.features = [0] * featureset_n
self.features[0] = infra_cols + loc_cols + clim_cols + holiday_cols + snslocation_cols + ["snsfeature"]
self.features[1] = tfidf_infra_snslocation_cols + holiday_cols
self.features[2] = pca_infra_clim_snsraw_cols + holiday_cols
self.features[3] = infra_cols + holiday_cols + ["snsfeature"]
elif self.obj == "inbound":
self.features = [0] * featureset_n
self.features[0] = infra_cols + loc_cols + clim_cols + jpy_cols + holiday_cols_abroad + ["snsfeature"]
self.features[1] = tfidf_infra_cols + holiday_cols_abroad
self.features[2] = pca_infra_cols + pca_clim_cols + holiday_cols_abroad
self.features[3] = infra_cols + holiday_cols_abroad + ["snsfeature"]
elif self.obj == "japan":
self.features = [0] * featureset_n
self.features[0] = infra_cols + loc_cols + clim_cols + holiday_cols + ["snsfeature"]
self.features[1] = tfidf_infra_cols + holiday_cols
self.features[2] = pca_infra_cols + pca_clim_cols + pca_snsraw_cols + holiday_cols
self.features[3] = infra_cols + holiday_cols+ ["snsfeature"]
def setmodel_stage1(self):
self.clf[0] = [RandomForestRegressor(random_state=71)] * featureset_n
self.parameters[0] = {'n_estimators':np.arange(50, 450, 100),"max_features":np.arange(3,12,3),"max_depth":np.arange(7,13,3)}
self.clf[1] = [KNeighborsRegressor()] * featureset_n
self.parameters[1] = {'n_neighbors':np.arange(4,15,2), "weights":["uniform", "distance"]}
self.clf[2] = [linear_model.ElasticNet(max_iter=10000)] * featureset_n
self.parameters[2] = {'alpha': np.linspace(0.01, 1500, num=10), "l1_ratio": np.linspace(0.01,1,5)}
self.clf[3] = [GradientBoostingRegressor(random_state=71)] * featureset_n
self.parameters[3] = {'n_estimators':np.arange(200, 400, 100),"max_features":np.arange(6,12,3),"max_depth":np.arange(7,13,3)}
self.clf[4] = [ExtraTreesRegressor(random_state=71)] * featureset_n
self.parameters[4] = {'n_estimators':np.arange(100, 400, 100),"max_features":np.arange(6,12,3),"max_depth":np.arange(4,13,3)}
def parametersearch_stage1(self):
cv = cross_validation.KFold(len(data_train), n_folds=self.kfold, shuffle=True, random_state=1)
for i in range(model_n):
for j in range(featureset_n):
grid = grid_search.GridSearchCV(self.clf[i][j], self.parameters[i], cv=cv, n_jobs=1, scoring="mean_absolute_error")
if self.clf_master[i] == "KNN":
scaler = preprocessing.StandardScaler().fit(data_train[self.features[j]])
tmp = pd.DataFrame(scaler.transform(data_train[self.features[j]]), columns=data_train[self.features[j]].columns)
grid.fit(tmp, data_train[self.obj])
print "{0}_{1} params: {2} score:{3}".format(self.clf_master[i], self.featureset_master[j], grid.best_params_, grid.best_score_)
else:
grid.fit(data_train[self.features[j]], data_train[self.obj])
print "{0}_{1} params: {2} score:{3}".format(self.clf_master[i], self.featureset_master[j], grid.best_params_, grid.best_score_)
self.best_params_[i][j] = grid.best_params_
def predict_stage1(self):
for i in range(model_n):
for j in range(featureset_n):
self.clf[i][j].set_params(**self.best_params_[i][j])
cv = cross_validation.KFold(len(data_train), n_folds=self.kfold, shuffle=True, random_state=71)
if self.clf_master[i] == "KNN":
scaler = preprocessing.StandardScaler().fit(data_train[self.features[j]])
tmp = pd.DataFrame(scaler.transform(data_train[self.features[j]]), columns=data_train[self.features[j]].columns)
self.predicted[i][j] = cross_validation.cross_val_predict(self.clf[i][j], tmp, data_train[self.obj], cv=cv)
self.clf[i][j].fit(tmp, data_train[self.obj])
tmp_test = pd.DataFrame(scaler.transform(data_test[self.features[j]]), columns=data_test[self.features[j]].columns)
self.test_predicted[i][j] = self.clf[i][j].predict(tmp_test)
else:
self.predicted[i][j] = cross_validation.cross_val_predict(self.clf[i][j], data_train[self.features[j]], data_train[self.obj], cv=cv)
self.clf[i][j].fit(data_train[self.features[j]], data_train[self.obj])
#.........这里部分代码省略.........
示例6: predict
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import set_params [as 别名]
clf.fit(X, y)
def predict(examples):
X = [represent(example) for example in examples]
y = clf.predict(X)
return y
import math
from sklearn.metrics import mean_squared_error
def rmse(y_true, y_pred):
mse = mean_squared_error(y_true, y_pred)
return math.sqrt(mse)
from sklearn.cross_validation import cross_val_score
def validate(examples):
X = [represent(example) for example in examples]
y = [label(example) for example in examples]
scores = cross_val_score(clf, X, y, cv=2, score_func=rmse)
return scores
if __name__ == "__main__":
import music
train_examples = music.load_examples('data/train.pkl')
import sys
if len(sys.argv) > 1:
clf.set_params(n_estimators = int(sys.argv[1]))
scores = validate(train_examples)
print "RMSE: %0.6f (+/- %0.6f)" % (scores.mean(), scores.std()/2)
示例7: rfFit
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import set_params [as 别名]
def rfFit(X, y):
clf = RandomForestRegressor(n_estimators=forestSize, n_jobs=8)
clf = clf.fit(X, y)
clf.set_params(n_jobs = 1)
return clf
示例8: str
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import set_params [as 别名]
,{ 'fit' : svrFit, 'predict': scaledPredict, 'scaled': True, 'idx0': 3, 'idx1': 4, 'ratio': svmRatio }
,{ 'fit' : svrFit, 'predict': scaledPredict, 'scaled': True, 'idx0': 4, 'idx1': 5, 'ratio': svmRatio }
,{ 'fit' : svrFit, 'predict': scaledPredict, 'scaled': True, 'idx0': 6, 'idx1': 7, 'ratio': svmRatio }
]
#models2 = combine.combineTrain(X_test, y_test, models)
print "Training random forest..."
forestSize = 30
print "\t# Examples: \t\t" + str(len(X_train))
print "\tForest Size: \t\t" + str(forestSize)
start = time.time()
clf = RandomForestRegressor(n_estimators=forestSize, n_jobs=8)
clf = clf.fit(X_train, y_train)
print "\tTraining Complete"
print "\tTime: \t\t" + str(round(time.time() - start, 1)) + "s"
#Reset n_jobs to 1 because multicore evaluation is apparently hard
params = clf.get_params()
clf.set_params(n_jobs = 1)
print "\tRMSE: \t\t" + str(rmse(X_test, y_test, clf.predict, True))
#results = combine.combineTest(X_test, y_test, clf, models)
#def subPredict(X):
# return combine.combinePredict(X, clf, models)
submission(clf.predict, filters, pca.transform)
示例9: dict
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import set_params [as 别名]
##############################################################
scores = dict()
skf = cross_validation.StratifiedKFold(Y, n_folds=3)
for train_index, test_index in skf:
X1, X2 = X[train_index], X[test_index]
Y1, Y2 = Y[train_index], Y[test_index]
# predict with SVR
svr = SVR()
svr.set_params(**pickle.load(open("svr.p", "rb" )))
svr.fit(X1, Y1)
Y_svr = svr.predict(X2)
# predict with RF
rfr = RandomForestRegressor(n_estimators = 1000)
rfr.set_params(**pickle.load(open("rfr.p", "rb" )))
rfr.fit(X1, Y1)
Y_rfr = rfr.predict(X2)
# predict with GBT
gbr = GradientBoostingRegressor(n_estimators=3000)
gbr.set_params(**pickle.load(open("gbr.p", "rb" )))
gbr.fit(X1, Y1)
Y_gbr = gbr.predict(X2)
# stacking
for alpha in np.logspace(-10, 10, 21, base=2):
for beta in np.logspace(-10, 10, 21, base=2):
y_pred = Y_svr + alpha * Y_rfr + beta * Y_gbr
y_rank = convertScore(y_pred,
[0.0, 0.0761961015948, 0.221500295334, 0.392498523331, 1.0])
示例10: GridSearchCV
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import set_params [as 别名]
RF_model = GridSearchCV(RF_est, params)
RF_model.fit(X,y)
print('Best {}'.format(RF_model.best_params_))
print('Performing grid search on GBR')
n_features = X.shape[1]
params = {'max_features':['auto','sqrt','log2'],
'max_depth':[2, 3]}
GBR_model = GridSearchCV(GBR_est, params)
GBR_model.fit(X,y)
print('Best {}'.format(GBR_model.best_params_))
else:
Lin_model = Lin_est.set_params(alpha=100.0)
SVR_model = svr_est.set_params(C=1.0)
RF_model = RF_est.set_params(max_features='auto')
GBR_model = GBR_est.set_params(max_features='auto',
max_depth=3)
#%% Specify set of models to test
model_set = [('Null',LCM.rand_pick_mod()),
('Lin', Lin_model),
('Lin_SVR',SVR_model),
('GBR',GBR_model),
('RF', RF_model)]
# model_set = [('Null',LCM.rand_pick_mod()),
# ('Lin', Lin_model),
# ('RF', RF_model)]
leg_titles = {'Null':'Random\nPicking',