本文整理汇总了Python中sklearn.ensemble.RandomForestRegressor.predict方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestRegressor.predict方法的具体用法?Python RandomForestRegressor.predict怎么用?Python RandomForestRegressor.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.RandomForestRegressor
的用法示例。
在下文中一共展示了RandomForestRegressor.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: learn
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import predict [as 别名]
def learn(bike_data, label='registered', n_est = 1000, samp_split = 10):
y = bike_data[label]
# remove columns we will not include in the analysis
X = bike_data.drop(['count','registered','casual','dayMonth','month'], axis=1)
# divide into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)
# flatten y into a 1-D array, so that scikit can understand it is an output var
y_train = np.ravel(y_train)
# predict log(y+1)
y_train = np.log(y_train+1)
y_test = np.log(y_test+1)
forest = RandomForestRegressor(n_estimators = n_est, min_samples_split=samp_split, n_jobs=-1, random_state = 0)
forest.fit(X_train, y_train)
y_train_pred = forest.predict(X_train)
y_test_pred = forest.predict(X_test)
# for optimisation log
print('%d %d %.3f %.3f' % (n_est, samp_split, rmsle(y_train,y_train_pred), rmsle(y_test,y_test_pred)))
return forest
示例2: stepwise_best_features_per_cluster
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import predict [as 别名]
def stepwise_best_features_per_cluster(X, Y, all_feature_metadata):
best_features_per_cluster = {}
for c in sorted(X['cluster'].unique()):
seg_X, seg_Y = X[X['cluster'] == c], Y[Y['cluster'] == c].ALSFRS_slope
print "cluster:", c, "with size:", seg_X.shape, "with mean target:", seg_Y.mean(), "std:", seg_Y.std()
seg_Y = seg_Y.fillna(seg_Y.mean())
model = RandomForestRegressor(min_samples_leaf=60, random_state=0, n_estimators=1000)
#model = LassoCV(cv=5)
model = model.fit(seg_X, seg_Y)
print "best we can do with all features:", np.sqrt(np.mean((model.predict(seg_X) - seg_Y) ** 2))
print "using model:", model
selected_fams = set()
selected_derived = set()
for i in range(6):
score_per_family = {}
t1 = time.time()
for family, fm in all_feature_metadata.iteritems():
if family not in selected_fams:
X_feature_fam = seg_X[list(selected_derived) + list(fm["derived_features"])]
model = RandomForestRegressor(min_samples_leaf=60, random_state=0, n_estimators=1000)
#model = LassoCV(cv=5)
model = model.fit(X_feature_fam, seg_Y)
score_per_family[family] = np.sqrt(np.mean((model.predict(X_feature_fam) - seg_Y) ** 2))
t_lasso_cv = time.time() - t1
best_fam = sorted(score_per_family.items(), key=operator.itemgetter(1))[0]
print "adding best family:", best_fam, "time:", t_lasso_cv
selected_fams.add(best_fam[0])
selected_derived.update(all_feature_metadata[best_fam[0]]["derived_features"])
best_features_per_cluster[c] = list(selected_fams)
return best_features_per_cluster
示例3: regressionBoston
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import predict [as 别名]
def regressionBoston():
global bostonFeatures, bostonTargets
X_train, X_test, y_train, y_test = train_test_split(bostonFeatures, bostonTargets, train_size=0.8, random_state=42)
scaler = StandardScaler().fit(X_train)
X_train_scaled = pd.DataFrame(scaler.transform(X_train), index=X_train.index.values, columns=X_train.columns.values)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), index=X_test.index.values, columns=X_test.columns.values)
pca = PCA()
pca.fit(X_train)
cpts = pd.DataFrame(pca.transform(X_train))
x_axis = np.arange(1, pca.n_components_ + 1)
pca_scaled = PCA()
pca_scaled.fit(X_train_scaled)
cpts_scaled = pd.DataFrame(pca.transform(X_train_scaled))
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators=500, oob_score=True, random_state=0)
rf.fit(X_train, y_train)
from sklearn.metrics import r2_score
from scipy.stats import spearmanr, pearsonr
predicted_train = rf.predict(X_train)
predicted_test = rf.predict(X_test)
test_score = r2_score(y_test, predicted_test)
spearman = spearmanr(y_test, predicted_test)
pearson = pearsonr(y_test, predicted_test)
print('Out-of-bag R-2 score estimate: {0}'.format(rf.oob_score_))
print('Test data R-2 score: {0}'.format(test_score))
print('Test data Spearman correlation: {0}'.format(spearman[0]))
print('Test data Pearson correlation: {0}'.format(pearson[0]))
示例4: rf_model
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import predict [as 别名]
def rf_model(train, test, params):
"""
Takes in: training set, test set, params is a list
Returns: predictions in correct format
"""
X = train.as_matrix(train.columns[:-1]).astype(float)
y = train.as_matrix(["cost"])[:, 0].astype(float)
X_test = test.as_matrix(test.columns[:-1]).astype(float)
print "#############################################"
print "Building Random Forest Model from:"
print "rf training set:", rf_train.split("/")[-1]
print "rf testing set:", rf_test.split("/")[-1]
print
print "Parameters:"
print params
print "#############################################"
print "..."
print
rf = RandomForestRegressor(**params)
ylog1p = np.log1p(y)
rf.fit(X, ylog1p)
y_pred1 = rf.predict(X_test)
rf2 = RandomForestRegressor(**params)
ypower3 = np.power(y, 1 / 45.0)
rf2.fit(X, ypower3)
y_pred2 = rf2.predict(X_test)
y_pred = (np.expm1(y_pred1) + np.power(y_pred2, 45.0)) / 2.0
return y_pred
示例5: performance
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import predict [as 别名]
def performance(self):
self.remove_columns(
[
"institute_latitude",
"institute_longitude",
"institute_state",
"institute_country",
"var10",
"var11",
"var12",
"var13",
"var14",
"var15",
"instructor_past_performance",
"instructor_association_industry_expert",
"secondary_area",
"var24",
]
)
# self.remove_columns(['institute_latitude', 'institute_longitude'])
self.split_dataset()
# model = GradientBoostingRegressor(learning_rate=0.1, n_estimators=200, subsample=0.9)
model = RandomForestRegressor(n_estimators=50)
# model = ExtraTreesRegressor(n_estimators=50, min_samples_leaf=5)
model.fit(self.Xt, self.yt)
yt_pred = model.predict(self.Xt)
self.training_score = self.eval_score(self.yt, yt_pred)
yv_pred = model.predict(self.Xv)
self.test_score = self.eval_score(self.yv, yv_pred)
示例6: train_sklearn_forest
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import predict [as 别名]
def train_sklearn_forest():
errors = []
feature = 1
X = XAlltr
Xcv = XAllcv
print "training sklearn forset"
for feature in range(np.shape(yAlltr)[1]):
y = yAlltr[:, feature]
ycv = yAllcv[:, feature]
# train a random forest with different number of trees and plot error
for trees in [20]:
#print "training forest %d" % trees
clf = RandomForestRegressor(n_estimators=trees)
clf.fit(X, y)
pred = clf.predict(X)
err = pred_error(y, pred)
predcv = clf.predict(Xcv)
errcv = pred_error(ycv, predcv)
print [trees, feature, err, errcv]
errors.append((trees, feature, err, errcv))
models.append(clf)
示例7: backward_best_features_per_cluster
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import predict [as 别名]
def backward_best_features_per_cluster(X, Y, all_feature_metadata):
best_features_per_cluster = {}
for c in sorted(X['cluster'].unique()):
seg_X, seg_Y = X[X['cluster'] == c], Y[Y['cluster'] == c].ALSFRS_slope
print "cluster:", c, "with size:", seg_X.shape, "with mean target:", seg_Y.mean(), "std:", seg_Y.std()
seg_Y = seg_Y.fillna(seg_Y.mean())
model = RandomForestRegressor(min_samples_leaf=60, random_state=0, n_estimators=1000).fit(seg_X, seg_Y)
print "best we can do with all features:", np.sqrt(np.mean((model.predict(seg_X) - seg_Y) ** 2))
selected_fams = set(all_feature_metadata.keys())
selected_derived = set([])
for fam in selected_fams:
selected_derived.update([der for der in all_feature_metadata[fam]['derived_features']])
while len(selected_fams) > 6:
score_per_family = {}
t1 = time.time()
for family, fm in all_feature_metadata.iteritems():
if family in selected_fams:
X_feature_fam = seg_X[list(selected_derived - set(fm["derived_features"]))]
model = RandomForestRegressor(min_samples_leaf=60, random_state=0, n_estimators=1000).fit(
X_feature_fam, seg_Y)
score_per_family[family] = np.sqrt(np.mean((model.predict(X_feature_fam) - seg_Y) ** 2))
t_lasso_cv = time.time() - t1
worst_fam = sorted(score_per_family.items(), key=operator.itemgetter(1), reverse=True)[0]
print "removing worst family:", worst_fam, "time:", t_lasso_cv
selected_fams.remove(worst_fam[0])
selected_derived = set([])
for fam in selected_fams:
selected_derived.update([der for der in all_feature_metadata[fam]['derived_features']])
best_features_per_cluster[c] = list(selected_fams)
return best_features_per_cluster
示例8: randomforest_tuning
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import predict [as 别名]
def randomforest_tuning(data, target, network):
kf = KFold(len(target), 10, shuffle = True);
RMSE_BEST = 10
rfr_best = RandomForestRegressor(n_estimators = 30, max_features = len(data[0]), max_depth = 8)
for nEstimators in range(29,31,1):
for maxFeatures in range(len(data[0])-1, len(data[0]+1)):
for maxDepth in range(11,13,1):
rfr = RandomForestRegressor(n_estimators = nEstimators, max_features = maxFeatures, max_depth = maxDepth)
RMSE_RFR = []
for train_index, test_index in kf:
data_train, data_test = data[train_index], data[test_index]
target_train, target_test = target[train_index], target[test_index]
rfr.fit(data_train, target_train)
rmse_rfr = sqrt(np.mean((rfr.predict(data_test) - target_test) ** 2))
RMSE_RFR.append(rmse_rfr)
if RMSE_BEST > np.mean(RMSE_RFR):
rfr_best = rfr
RMSE_BEST = np.mean(RMSE_RFR)
kf_final = KFold(len(target), 10, shuffle = True);
RMSE_FINAL = []
for train_index, test_index in kf_final:
data_train, data_test = data[train_index], data[test_index]
target_train, target_test = target[train_index], target[test_index]
rfr_best.fit(data_train, target_train)
rmse_rfr = sqrt(np.mean((rfr_best.predict(data_test) - target_test) ** 2))
RMSE_FINAL.append(rmse_rfr)
plt.figure()
plt.plot(range(1,len(RMSE_FINAL)+1), RMSE_FINAL)
plt.title("The best RMSE with random forest")
plt.xlabel("cross validation times")
plt.ylabel("RMSE")
plt.show()
print(np.mean(RMSE_FINAL))
return RMSE_FINAL
示例9: rf_model
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import predict [as 别名]
def rf_model(train, test, params):
'''
Takes in: training set, test set, params is a list
Returns: predictions in correct format
'''
X = train.as_matrix(train.columns[:-1]).astype(float)
y = train.as_matrix(['cost'])[:,0].astype(float)
X_test = test.as_matrix(test.columns[:-1]).astype(float)
print '#############################################'
print 'Building Random Forest Model'
print 'rf training set:', rf_train.split('/')[-1]
print 'rf testing set:', rf_test.split('/')[-1]
print
print 'Parameters:'
print params
print '#############################################'
print '...'
print
rf = RandomForestRegressor(**params)
ylog1p = np.log1p(y)
rf.fit(X, ylog1p)
y_pred1 = rf.predict(X_test)
rf2 = RandomForestRegressor(**params)
ypower3 = np.power(y,1/40.0)
rf2.fit(X, ypower3)
y_pred2 = rf2.predict(X_test)
y_pred = (np.expm1(y_pred1) + np.power(y_pred2,40.0))/2.0
return y_pred
示例10: train_sklearn_forest
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import predict [as 别名]
def train_sklearn_forest(XAlltr, XAllcv, yAlltr, yAllcv, trees=20):
errors = []
models = []
X = XAlltr
Xcv = XAllcv
print "training sklearn forset"
for feature in range(np.shape(yAlltr)[1]):
y = yAlltr[:, feature]
ycv = yAllcv[:, feature]
# train a random forest with different number of trees and plot error
# print "training forest %d" % trees
clf = RandomForestRegressor(n_estimators=trees, min_samples_leaf=30, max_depth=20)
clf = RandomForestRegressor(n_estimators=trees)
clf.fit(X, y)
pred = clf.predict(X)
err = pred_error(y, pred, feature)
predcv = clf.predict(Xcv)
errcv = pred_error(ycv, predcv, feature)
print [trees, feature, err, errcv]
errors.append((trees, feature, err, errcv))
models.append(clf)
return models, errors
示例11: mapper_regression
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import predict [as 别名]
def mapper_regression(line):
'''
- mapper function for local regression
- the mapper function works on individual keywork
- a randomforest regression model is applied to predict the next time point
- we have for each keyword
- [(time 1, value 1) ... (time n, value n)]
- transfer into
- training feature set [[(time 2, value 2) ... (time 8, value 8)]
,...,
[(time 85, value 85) ... (time 91, value 91)]]
- training label [value 9, ..., value 92]
= test feature set [(time 86, value 86),...(time 92, value 92)]
- no parameter selection for current code
'''
# data transfermation
npdata = np.array(line[1])
npdata = npdata[npdata[:,0].argsort()]
f_tr = []
l_tr = []
for i in range(2,86):
f_tr.append(npdata[i:(i+7),1])
l_tr.append(npdata[i+6,1])
f_ts = npdata[86:93,1]
# training
clf = RandomForestRegressor(n_estimators=150, min_samples_split=1)
clf.fit(f_tr, l_tr)
# prediction
y_tr = clf.predict(f_tr)
# computing rmse
rmse = math.sqrt(np.sum(np.array([x**2 for x in clf.predict(f_tr)-l_tr])))
return ( line[0], (clf.predict(f_ts).tolist()[0], rmse) )
pass
示例12: random_forest
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import predict [as 别名]
def random_forest(X_train, y_train, y_test, X_test, num_trees=100):
model = RandomForestRegressor(n_estimators=num_trees, oob_score=True)
model.fit(X_train, y_train)
prediction = model.predict(X_test)
mean_squared_error = mse(y_test, model.predict(X_test))
r2 = model.score(X_test, y_test)
return (mean_squared_error, r2)
示例13: RFCV
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import predict [as 别名]
def RFCV(data,YColumn,k = 5,nTreeInitial = 50,maxDepth = 5,maxNumTrees = 200):
#convert YColumn from a list to a string
YColumn = YColumn[0]
#make number of rows divisible by 5
n = data.shape[0]/k*k
data = data.iloc[range(n)]
#set up the initial values for these two tuning parameters
nCandidates = [2,5,20,50,100,200,300,400,500,1000]
numTrees = nCandidates[:nCandidates.index(maxNumTrees)+1]
depths = range(1,maxDepth+1)
#first tune depth with initial number of trees
depthErrors = []
for d in depths:
#begin k-fold CV
CVtestMSE = 0
for i in range(k):
#get training data & test data split
testingData = data.iloc[range(i*n/k,(i+1)*n/k)]
trainingData = data.iloc[range(0,i*n/k)+range((i+1)*n/k,n)]
#get test & training & target
training = trainingData.drop(YColumn,axis=1)
target = trainingData[YColumn]
testing = testingData.drop(YColumn,axis=1)
#get model
model = RandomForestRegressor(n_estimators=nTreeInitial,max_depth=d,max_features="sqrt")
model = model.fit(training,target)
#evaluate model and compute test error
pred = np.array(model.predict(testing))
testY = np.array(testingData[YColumn])
CVtestMSE = CVtestMSE + np.linalg.norm(pred-testY)
#append test errors
depthErrors.append(CVtestMSE)
#get the best maxDepth
bestDepth = depths[depthErrors.index(min(depthErrors))]
#then tune number of trees
nErrors = []
for numTree in numTrees:
#begin k-fold CV
CVtestMSE = 0
for i in range(k):
#get training data & test data split
testingData = data.iloc[range(i*n/k,(i+1)*n/k)]
trainingData = data.iloc[range(0,i*n/k)+range((i+1)*n/k,n)]
#get test & training & target
training = trainingData.drop(YColumn,axis=1)
target = trainingData[YColumn]
testing = testingData.drop(YColumn,axis=1)
#get model
model = RandomForestRegressor(n_estimators=numTree,max_depth=bestDepth,max_features="sqrt")
model = model.fit(training,target)
#evaluate model and compute test error
pred = np.array(model.predict(testing))
testY = np.array(testingData[YColumn])
CVtestMSE = CVtestMSE + np.linalg.norm(pred-testY)
#append test errors
nErrors.append(CVtestMSE)
#get the best numTrees
bestNumTree = numTrees[nErrors.index(min(nErrors))]
return([bestDepth,bestNumTree])
示例14: train_randomforest
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import predict [as 别名]
def train_randomforest(train, test, n_estimators=10, cpus=4):
import numpy as np
from scipy.sparse import csc_matrix
from sklearn.preprocessing import OneHotEncoder
vocabulary_size = 2000
#keep commas and colons
corpus = [t.text for t in train]
test_corpus = [t.text for t in test]
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
"""
prepare text training data
"""
count_vect = CountVectorizer(max_features=None)
X_train_counts = count_vect.fit_transform(corpus)
X_test_counts = count_vect.transform(test_corpus)
tf_transformer = TfidfTransformer(use_idf=False).fit(X_train_counts)
X_train_tf = tf_transformer.transform(X_train_counts)
X_test_tf = tf_transformer.transform(X_test_counts)
X_names = []
X_train = []
y_train = []
for card, token_text in zip(train, X_train_tf):
X_names.append(card.name)
features = np.concatenate((token_text.toarray().flatten(), card.types, [card.power, card.toughness, card.loyalty], card.colors))
X_train.append(features)
y_train.append(card.cost)
X_test = []
y_test = []
X_test_names = []
for card, token_text in zip(test, X_test_tf):
X_test_names.append(card.name)
features = np.concatenate((token_text.toarray().flatten(), card.types, [card.power, card.toughness, card.loyalty], card.colors))
X_test.append(features)
y_test.append(card.cost)
X_train = np.asarray(X_train)
y_train = np.asarray(y_train)
X_test = np.asarray(X_test)
y_test = np.asarray(y_test)
from sklearn.ensemble import RandomForestRegressor
from sklearn import cross_validation
rf = RandomForestRegressor(n_estimators=n_estimators, n_jobs=cpus)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_train)
print y_pred.shape, y_train.shape
print "naive train loss", np.mean(custom_loss(y_train, y_pred))
y_pred = rf.predict(X_test)
print "naive test loss", np.mean(custom_loss(y_test, y_pred))
result = print_predictions(y_pred, y_test, X_test_names)
print "saving to output.naive.txt and output.naive.p"
pickle.dump(result, open('output.naive.p', 'wb'))
示例15: experiment
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import predict [as 别名]
def experiment(state, channel):
if __name__ == "__main__":
state.n_layers = 1
state.corruption = 0.25
state.dimension_boost = False
state.n_estimators = 25
state.max_depth = None
state.min_samples_split = 1
state.min_samples_leaf = 1
state.min_density = 0.1
state.max_features = "auto"
state.bootstrap = True
(train_X, train_Y), (valid_X, valid_Y), (test_X, test_Y) = load_data()
if state.dimension_boost:
# choice of random matrix...
#random_matrix = numpy.random.normal(size=(train_X.shape[1], dimension_boost))
print 'Boosting dimension through random matrix'
# choose regressor, init HP
cls = RandomForestRegressor(n_estimators = state.n_estimators,
max_depth = state.max_depth,
min_samples_split = state.min_samples_split,
min_samples_leaf = state.min_samples_leaf,
min_density = state.min_density,
max_features = state.max_features,
bootstrap = state.bootstrap)
# smDA
print 'Computing stacked mDA'
n_layers = state.n_layers
corruption = state.corruption
W, h, Z = 0, train_X, [valid_X, test_X]
for i in range(n_layers):
W, h, Z = mDA.mDA(h, corruption, Z)
# Train regressor
print 'training classifier'
cls.fit(h, train_Y)
train_pred = numpy.clip(0.99, 0.01, cls.predict(h)).flatten()
valid_pred = numpy.clip(0.99, 0.01, cls.predict(Z[0])).flatten()
test_pred = numpy.clip(0.99, 0.01, cls.predict(Z[1])).flatten()
print 'train ce : ', binary_xce(train_pred, train_Y.flatten()).mean()
print 'valid ce : ', binary_xce(valid_pred, valid_Y.flatten()).mean()
print 'test ce : ', binary_xce(test_pred, test_Y.flatten()).mean()
state.train_costs = [binary_xce(train_pred, train_Y.flatten()).mean()]
state.valid_costs = [binary_xce(valid_pred, valid_Y.flatten()).mean()]
state.test_costs = [binary_xce(test_pred, test_Y.flatten()).mean()]
return channel.COMPLETE