本文整理汇总了Python中sklearn.ensemble.RandomForestRegressor.fit方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestRegressor.fit方法的具体用法?Python RandomForestRegressor.fit怎么用?Python RandomForestRegressor.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.RandomForestRegressor
的用法示例。
在下文中一共展示了RandomForestRegressor.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def main():
fi = open('25-75_microcap_list.txt', 'r')
symbols = []
for i in fi:
symbols.append(i.strip())
#symbols = symbols[0:6]
train, test = get_data(symbols, n = 30, flag = 1, blag = 12)
train = train.replace([np.inf, -np.inf], np.nan)
test = test.replace([np.inf, -np.inf], np.nan)
train = train.dropna(axis=0)
test = test.dropna(axis=0)
print 'Fitting\n'
m = RandomForestRegressor(n_estimators=250, n_jobs=1)
m.fit(train.ix[:,6:], train.ix[:,5])
print 'Predicting\n'
preds = m.predict(test.ix[:,5:])
result = test.ix[:,:4]
result['Prediction'] = preds
result = result.sort('Prediction', ascending=False)
print result.head()
result.to_csv('trade_result.csv', sep = ',', index = False)
示例2: train_sklearn_forest
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def train_sklearn_forest(XAlltr, XAllcv, yAlltr, yAllcv, trees=20):
errors = []
models = []
X = XAlltr
Xcv = XAllcv
print "training sklearn forset"
for feature in range(np.shape(yAlltr)[1]):
y = yAlltr[:, feature]
ycv = yAllcv[:, feature]
# train a random forest with different number of trees and plot error
# print "training forest %d" % trees
clf = RandomForestRegressor(n_estimators=trees, min_samples_leaf=30, max_depth=20)
clf = RandomForestRegressor(n_estimators=trees)
clf.fit(X, y)
pred = clf.predict(X)
err = pred_error(y, pred, feature)
predcv = clf.predict(Xcv)
errcv = pred_error(ycv, predcv, feature)
print [trees, feature, err, errcv]
errors.append((trees, feature, err, errcv))
models.append(clf)
return models, errors
示例3: buildTreeRegressor
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def buildTreeRegressor(predictorColumns, structurestable = 'structures.csv', targetcolumn = 'c_a', md = None):
"""
Build a random forest-regressor model to predict some structure feature from compositional data. Will return the model trained on all data, a mean_absolute_error score, and a table of true vs. predicted values
"""
df = pd.read_csv(structurestable)
df = df.dropna()
if('fracNobleGas' in df.columns):
df = df[df['fracNobleGas'] <= 0]
s = StandardScaler()
X = s.fit_transform(df[predictorColumns].astype('float64'))
y = df[targetcolumn].values
rfr = RandomForestRegressor(max_depth = md)
acc = mean(cross_val_score(rfr, X, y, scoring=make_scorer(mean_absolute_error)))
X_train, X_test, y_train, y_test = train_test_split(X,y)
rfr.fit(X_train,y_train)
y_predict = rfr.predict(X_test)
t = pd.DataFrame({'True':y_test, 'Predicted':y_predict})
rfr.fit(X, y)
return rfr, t, round(acc,2)
示例4: do_regression
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def do_regression(df, j, i, k): # input is a pandas dataframe with columns as needed below
# output is a regression object trained to the data in the input dataframe
# convert dataframe info into a vector
y = df.loc[ (df['workingday'] == j) & (df['Hour'] == i) & (df['Year'] == 2011 + k), 'count' ].astype(int).values
x_1 = df.loc[ (df['workingday'] == j) & (df['Hour'] == i) & (df['Year'] == 2011 + k), 'humidity' ].astype(int).values
x_2 = df.loc[ (df['workingday'] == j) & (df['Hour'] == i) & (df['Year'] == 2011 + k), 'temp' ].astype(int).values
x = zip(x_1, x_2)
## Create linear regression object
#regr = linear_model.LinearRegression()
# create random forest object, should include all parameters
regr = RandomForestRegressor(n_estimators= 100)
#forest = DecisionTreeRegressor(max_depth = 4)
## Train the model using the training sets
regr.fit(x, y)
return regr
示例5: pipeline
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def pipeline():
val = data[data.watch==1]
val_a_b = val[['item_id','store_code','a','b']]
val_y = val.label
val_x = val.drop(['label','watch','item_id','store_code','a','b'],axis=1)
train = data[(data.watch!=1)&(data.watch!=0)]
train_y = train.label
a = list(train.a)
b = list(train.b)
train_weight = []
for i in range(len(a)):
train_weight.append(min(a[i],b[i]))
train_weight = np.array(train_weight)
train_x = train.drop(['label','watch','item_id','store_code','a','b'],axis=1)
train_x.fillna(train_x.median(),inplace=True)
val_x.fillna(val_x.median(),inplace=True)
model = RandomForestRegressor(n_estimators=500,max_depth=5,max_features=0.6,n_jobs=-1,random_state=1024)
#train
model.fit(train_x,train_y, sample_weight=train_weight)
#predict val set
val_a_b['pred'] = model.predict(val_x)
val_a_b['y'] = val_y
cost = cal_cost(val_y.values,val_a_b.pred.values,val_a_b.a.values,val_a_b.b.values)
val_a_b.to_csv('val_{0}.csv'.format(cost[1]),index=None)
示例6: fit
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def fit(self, X, y, **kwargs):
for key, value in kwargs.iteritems():
if key in self.INITPARAMS.keys():
self.INITPARAMS[key] = value
model = RandomForestRegressor(**self.INITPARAMS)
model.fit(X, y)
self.model = model
示例7: RFscore_one
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def RFscore_one(x,y,id):
folds=3
print "RFscore " + id
r = range(len(x))
np.random.shuffle(r)
x = x[r]
y = y[r]
x = (x - np.mean(x)) / np.std(x)
y = (y - np.mean(y)) / np.std(y)
x = np.array(x, ndmin=2)
y = np.array(y, ndmin=2)
x = x.T
y = y.T
rf = RandomForestRegressor(n_estimators=50, verbose=0,n_jobs=1,min_samples_split=10,compute_importances=True,random_state=1)
fit = rf.fit(x,y)
s = fit.score(x,y)
cv = cross_validation.KFold(len(x), n_folds=folds, indices=False)
score = 0
median = dist(y)
for traincv, testcv in cv:
fit = rf.fit(x[traincv], y[traincv])
score += fit.score(x[testcv], y[testcv])
score /= folds
score /= median
return score
示例8: do_rf
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def do_rf(filename):
df, Y = create_merged_dataset(filename)
rf = RandomForestRegressor(n_estimators=100)
X = df.drop(['driver', 'trip'], 1)
rf.fit(X, Y)
probs = rf.predict(X[:200])
return pd.DataFrame({'driver': df['driver'][:200], 'trip': df['trip'][:200], 'probs': probs})
示例9: rf_regressor
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def rf_regressor(self):
X = X.toarray() # Convert X from sparse to array
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)
model = RandomForestRegressor(n_estimators=100, oob_score=True, random_state=42)
model.fit(X_train, y_train)
return model.score(X_test, y_test).round(2)
示例10: refit_from_scratch
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def refit_from_scratch(self):
""" Create a new model directly from the database, rather
than rely on the one saved from last time."""
# In the background fit a much larger random forest.
self.threaded_fit = ThreadedFit()
self.threaded_fit.signal_finished.connect(self.__init__)
self.threaded_fit.start()
temp_model = RandomForest(max_features="sqrt", n_jobs=-1)
temp_enc = CountVectorizer()
X = [] # binary matrix the presence of tags
Z = [] # additional numerical data
Y = [] # target (to predict) values
db_size = self.db.size()
for data in self.db.yield_some(250):
feedback = data["feedback"]
tags = data[ "tags" ]
if feedback and tags:
Y.append( feedback )
X.append(" ".join(tags))
Z.append(self.fmt_numerical(data))
X = temp_enc.fit_transform(X)
X = hstack((X, coo_matrix(Z)))
self.allX = X
pca = PCA(min(X.shape[0], 200))
reduced_X = pca.fit_transform(X.todense())
temp_model.fit(reduced_X, Y)
self.pca = pca
self.model = temp_model
self.enc = temp_enc
示例11: train_with_features
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def train_with_features(self, features):
X = self.data_folder.truncate(self.A, features)
rfc = RandomForestRegressor()
rfc.fit(X, self.target)
return rfc
示例12: test_rrf_vs_sklearn_reg
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def test_rrf_vs_sklearn_reg(self):
"""Test R vs. sklearn on boston housing dataset. """
from sklearn.datasets import load_boston
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
boston = load_boston()
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target,
test_size=0.2, random_state=13)
n_samples, n_features = X_train.shape
mtry = int(np.floor(0.3 * n_features))
# do 100 trees
r_rf = RRFEstimatorR(**{'ntree': 100, 'nodesize': 1, 'replace': 0,
'mtry': mtry, 'corr.bias': False,
'sampsize': n_samples, 'random_state': 1234})
r_rf.fit(X_train, y_train)
y_pred = r_rf.predict(X_test)
r_mse = mean_squared_error(y_test, y_pred)
p_rf = RandomForestRegressor(n_estimators=100, min_samples_leaf=1, bootstrap=False,
max_features=mtry, random_state=1)
p_rf.fit(X_train, y_train)
y_pred = p_rf.predict(X_test)
p_mse = mean_squared_error(y_test, y_pred)
print('%.4f vs %.4f' % (r_mse, p_mse))
# should be roughly the same (7.6 vs. 7.2)
np.testing.assert_almost_equal(r_mse, p_mse, decimal=0)
示例13: round2
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def round2(X, y):
# Set parameters
min_score = {}
for tree in [50, 100, 200, 500]:
for feature in ['auto', 'log2']:
model = RandomForestRegressor(n_estimators=tree, max_features=feature)
n = len(y)
# Perform 5-fold cross validation
scores = []
kf = KFold(n, n_folds=5, shuffle=True)
# Calculate root mean squared error for train/test for each fold
for train_idx, test_idx in kf:
X_train, X_test = X[train_idx], X[test_idx]
y_train, y_test = y[train_idx], y[test_idx]
model.fit(X_train, y_train)
prediction = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, prediction))
scores.append(rmse)
if len(min_score) == 0:
min_score['estimator'] = tree
min_score['max_feature'] = feature
min_score['scores'] = scores
else:
if np.mean(scores) < np.mean(min_score['scores']):
min_score['estimator'] = tree
min_score['max_feature'] = feature
min_score['scores'] = scores
print "Estimator:", tree
print "Max Features:", feature
print scores
print np.mean(scores)
return min_score
示例14: train_year
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def train_year(train_fea, trees):
values = train_fea['SaleYear'].values
years = sorted(list(set(values)))
rfs =[]
for i in range(0, len(years)):
print 'train model %d' % (years[i])
rf = RandomForestRegressor(n_estimators=trees, n_jobs=1, compute_importances = True)
y = train_fea[train_fea['SaleYear']==years[i]]
y_fea = y.copy()
del y_fea['SalePrice']
rf.fit(y_fea, y["SalePrice"])
rfs.append(rf)
errors = None
for i in range(1, len(years)):
pairs = get_pairs(years, i)
for p in pairs:
print 'compare %d, %d' % (p[0], p[1])
y1 = train_fea[train_fea['SaleYear']==p[0]]
y2 = train_fea[train_fea['SaleYear']==p[1]]
y1_fea, y2_fea = y1.copy(), y2.copy()
del y1_fea['SalePrice']
del y2_fea['SalePrice']
rf = rfs[years.index(p[0])]
y2_p = rf.predict(y2_fea)
y2_r = np.array([v for v in y2['SalePrice']])
error_rates = np.array(map(lambda x,y: math.fabs(x-y)/y, y2_p, y2_r))
if type(errors)==types.NoneType:
errors = pd.DataFrame({'dist':i, 'mean':error_rates.mean(), 'var':error_rates.var(), 'std':error_rates.std()}, index=[i])
else:
errors = errors.append(pd.DataFrame({'dist':i, 'mean':error_rates.mean(), 'var':error_rates.var(), 'std':error_rates.std()}, index=[i]))
errors_list = []
for i in range(1, len(years)):
errors_list.append(errors.ix[i]['mean'].mean())
return rfs, errors_list
示例15: random_forest
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def random_forest(X_train, y_train, y_test, X_test, num_trees=100):
model = RandomForestRegressor(n_estimators=num_trees, oob_score=True)
model.fit(X_train, y_train)
prediction = model.predict(X_test)
mean_squared_error = mse(y_test, model.predict(X_test))
r2 = model.score(X_test, y_test)
return (mean_squared_error, r2)