本文整理汇总了Python中sklearn.ensemble.ExtraTreesRegressor类的典型用法代码示例。如果您正苦于以下问题:Python ExtraTreesRegressor类的具体用法?Python ExtraTreesRegressor怎么用?Python ExtraTreesRegressor使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ExtraTreesRegressor类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: fit
def fit(self, X, y, **kwargs):
for key, value in kwargs.iteritems():
if key in self.INITPARAMS.keys():
self.INITPARAMS[key] = value
model = ExtraTreesRegressor(**self.INITPARAMS)
model.fit(X, y)
self.model = model
示例2: do_etrees
def do_etrees(filename):
df, Y = create_merged_dataset(filename)
etree = ExtraTreesRegressor(n_estimators=200, n_jobs=-1, min_samples_leaf=5, random_state=SEED)
X = df.drop(['driver', 'trip'], 1)
etree.fit(X, Y)
probs = etree.predict(X[:200])
return pd.DataFrame({'driver': df['driver'][:200], 'trip': df['trip'][:200], 'probs': probs})
示例3: main
def main():
for ind in range(1, 15+1):
#for ind in [3,4,5,7,9,11,12,13,14,15]: # no 1,2,6,8,10
print "TrainingSet/ACT%d_competition_training.csv" % ind
#read in data, parse into training and target sets
cols, train = read_data("../TrainingSet/ACT%d_competition_training.csv" % ind)
target = np.array( [x[0] for x in train] )
train = filter_cols(train, cols, "../selected/selected_%d.txt" % ind)
#print("Train: ", len(train), " cols:", len(train[0]))
train = np.array( train )
#In this case we'll use a random forest, but this could be any classifier
cfr = ExtraTreesRegressor(n_estimators=1000, max_features=(len(train[0])//3), n_jobs=8, random_state=1279)
#Simple K-Fold cross validation. 10 folds.
cv = cross_validation.KFold(len(train), k=10, indices=False, shuffle=True)
#iterate through the training and test cross validation segments and
#run the classifier on each one, aggregating the results into a list
results = []
for traincv, testcv in cv:
ft = cfr.fit(train[traincv], target[traincv])
score = ft.score(train[testcv], target[testcv])
results.append(score)
print "\tFold %d: %f" % (len(results), score)
#print out the mean of the cross-validated results
print "Results: " + str( np.array(results).mean() )
示例4: mul_dtree
def mul_dtree(X, Y2):
forest = ExtraTreesRegressor(n_estimators=5,
compute_importances=True,
random_state=0)
forest.fit(X[:200], Y2[:200])
forest.predict(X[200:])
print Y2[200:]
示例5: predict_with_one
def predict_with_one(X, out_file_name):
n_samples, n_features = X.shape
iter_num = 3
div = ShuffleSplit(n_samples, n_iter=iter_num, test_size=0.2, random_state=0)
model = ExtraTreesRegressor(n_estimators=5)
score_matrix = np.zeros((n_features, n_features))
t = time()
round_num = 0
for train, test in div:
round_num += 1
train_samples = X[np.array(train)]
test_samples = X[np.array(test)]
for i in range(n_features):
for j in range(n_features):
X_train = train_samples[:, i:i+1]
X_test = test_samples[:, i:i+1]
y_train = train_samples[:, j]
y_test = test_samples[:, j]
# for i in range(len(fl)):
# for j in range(len(fl)):
# if fl[j][1]-fl[j][0] != 1:
# continue
# X_train = train_samples[:, fl[i][0]:fl[i][1]]
# X_test = test_samples[:, fl[i][0]:fl[i][1]]
# y_train = train_samples[:, fl[j][0]]
# y_test = test_samples[:, fl[j][0]]
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
score_matrix[i, j] += mae
print('Round', round_num, '|', i, j, mae, time()-t)
np.savetxt(os.path.join(CODE_PATH, out_file_name),
score_matrix/iter_num, fmt='%.3f', delimiter=',')
示例6: cal_important_features
def cal_important_features(batch=10, threshold=1e-4):
X_samples, Y_samples, scaler = dat.data_prepare('ocpm', 'lifetime_ecpm', outlier=0.05)
tot_goot_atrs = {}
for a in ATRS[5:]: tot_goot_atrs[a] = {}
for i in np.arange(1,batch+1):
Ts = timeit.default_timer()
model = ExtraTreesRegressor(n_jobs=6)
model.fit(X_samples, Y_samples)
print "Totally %i features." % len(model.feature_importances_)
print "[Labels] %i categories, %i interests, %i client_names, %i auto_tags" % (num.categories_len, num.interests_len, num.client_names_len, num.auto_tags_len)
good_atrs = show_important_features(model.feature_importances_, threshold)
for a in reversed(ATRS[5:]):
for b in good_atrs[a]:
if b in tot_goot_atrs[a]:
tot_goot_atrs[a][b] += 1
else:
tot_goot_atrs[a][b] = 1
print "%i batch finished in %.1f secs." % (i, (timeit.default_timer() - Ts))
print "------------------------------------------------"
# show performances
for atr in reversed(ATRS[5:]):
print "-------[%s]-----------------------" % atr
for j in np.arange(1,batch+1):
good_keys = [k for k,v in tot_goot_atrs[atr].items() if (v >= j)]
print "%i keys occurs > %i times." % (len(good_keys), j)
return tot_goot_atrs
示例7: fit
def fit(self, X, y, weights = None, **kwargs):
if weights is None: weights = np.ones(y.shape[0])
data = np.hstack((y.reshape(y.shape[0],1),X))
S = wcov(data, weights)
corr = wcorr(data, weights)
wsd = np.sqrt(S.diagonal())
ExtraTrees = ExtraTreesRegressor(**kwargs)
ExtraTrees.fit(X,y, sample_weight=weights)
Rsquare = ( S[0,1:].dot(np.linalg.inv(S[1:,1:]).dot(S[1:,0])) )/S[0,0]
# assign proportion of Rsquare to each covariate dep. on importance
self.importances = ExtraTrees.feature_importances_ * Rsquare
model = self.constrained_optimization( corr )
if self.fit_intercept:
w = np.diagflat( weights/np.sum(weights),k=0)
wmean = np.sum(w.dot(data), axis=0)
self.intercept_ = wmean[0] - wsd[0]*np.sum(wmean[1:]*model.x/wsd[1:])
self.coef_ = wsd[0]*model.x/wsd[1:]
return self
示例8: main
def main():
for ind in range(1, 15+1):
print "TrainingSet/ACT%d_competition_training.csv" % ind
#read in data, parse into training and target sets
cols, molecules1, train = read_data("../TrainingSet/ACT%d_competition_training.csv" % ind)
target = np.array( [x[0] for x in train] )
#load train
train = filter_cols(train, cols, "../selected/cor9/selected_%d.txt" % ind)
train = np.array(train)
#print("Train: ", len(train), " cols:", len(train[0]))
# seeds used: orig=1279, cor8=1278, cor9=1277
cfr = ExtraTreesRegressor(n_estimators=2000, max_features=(len(train[0])//3), n_jobs=8, random_state=1277)
#min_samples_leaf=2, min_samples_split=2, random_state=1279)
rf = cfr.fit(train, target)
#predict train
pred = rf.predict(train)
write_file("erStacking/cor9/er_stacking_%d.csv" % ind, molecules1, pred)
#load test
cols, molecules2, test = read_data("../TestSet/ACT%d_competition_test.csv" % ind)
test = filter_cols(test, cols, "../selected/cor9/selected_%d.txt" % ind)
test = np.array(test)
#predict test
pred = rf.predict(test)
write_file("erStacking/test/cor9/er_submission_%d.csv" % ind, molecules2, pred)
示例9: fit
def fit(self,data_train,target):
self.target_train = target
self.catcol = data_train.filter(like='var').columns.tolist()
#start_gbr_tr = time.clock()
self.gbr = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
self.gbr.fit(data_train,self.target_train)
self.transformed_train_gbr = self.gbr.transform(data_train,threshold="0.35*mean")
self.gbr_tr_fit = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
self.gbr_tr_fit.fit(self.transformed_train_gbr,self.target_train)
#end_gbr_tr = time.clock()
#print >> log, "time_gbr_tr = ", end_gbr_tr-start_gbr_tr
#start_xfr_tr = time.clock()
self.xfr= ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
self.xfr.fit(data_train,self.target_train)
self.transformed_train_xfr = self.xfr.transform(data_train,threshold="0.35*mean")
self.xfr_tr_fit = ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
self.xfr_tr_fit.fit(self.transformed_train_xfr,self.target_train)
#end_xfr_tr = time.clock()
#print >> log, "time_xfr_tr = ", end_xfr_tr-start_xfr_tr
#start_gbr_cat = time.clock()
self.gbr_cat_fit = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
self.gbr_cat_fit.fit(data_train[self.catcol],self.target_train)
#end_gbr_cat = time.clock()
#print >> log, "time_gbr_cat = ", end_gbr_cat-start_gbr_cat
#start_xfr_cat = time.clock()
self.xfr_cat_fit = ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
self.xfr_cat_fit.fit(data_train[self.catcol],self.target_train)
#end_xfr_cat = time.clock()
#print >> log, "time_xfr_cat = ", end_xfr_cat-start_xfr_cat
return self
示例10: build_models
def build_models(self):
self.remove_columns(
[
"institute_latitude",
"institute_longitude",
"institute_state",
"institute_country",
"var10",
"var11",
"var12",
"var13",
"var14",
"var15",
"instructor_past_performance",
"instructor_association_industry_expert",
"secondary_area",
"var24",
]
)
model1 = GradientBoostingRegressor(learning_rate=0.1, n_estimators=200, subsample=0.8)
model2 = RandomForestRegressor(n_estimators=50)
model3 = ExtraTreesRegressor(n_estimators=50)
model1.fit(self.X, self.y)
model2.fit(self.X, self.y)
model3.fit(self.X, self.y)
return [model1, model2, model3]
示例11: build_extra_tree_regressor
def build_extra_tree_regressor(X_test, X_train_full, y_train_full):
print "Building ExtraTrees regressor..."
etr = ExtraTreesRegressor(n_estimators=500)
etr.fit(X_train_full, y_train_full)
etr_predict = etr.predict(X_test)
return etr_predict
示例12: get_forest
def get_forest(X_names=Xs, y_names=ys, num_trees=256, data=data):
forest = ExtraTreesRegressor(
n_estimators=num_trees, n_jobs=62, bootstrap=True)
X = data.loc[:, [i for i in X_names]]
y = data.loc[:, [i for i in y_names]]
start = time()
rfr = forest.fit(X, y)
end = time()
return(rfr, end-start)
示例13: reg_skl_etr
def reg_skl_etr(param, data):
[X_tr, X_cv, y_class_tr, y_class_cv, y_reg_tr, y_reg_cv] = data
etr = ExtraTreesRegressor(n_estimators=param['n_estimators'],
max_features=param['max_features'],
n_jobs=param['n_jobs'],
random_state=param['random_state'])
etr.fit(X_tr, y_reg_tr)
pred = etr.predict(X_cv)
RMSEScore = getscoreRMSE(y_reg_cv, pred)
return RMSEScore, pred
示例14: extra_trees_regressor
def extra_trees_regressor(x, y, n_estimators, max_depth):
kf = KFold(len(x), n_folds=3)
scores = []
for train_index, test_index in kf:
X_train, X_test = x[train_index], x[test_index]
y_train, y_test = y[train_index], y[test_index]
clf = ExtraTreesRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=0)
clf.fit(X_train, y_train)
scores.append(mean_squared_error(clf.predict(X_test), y_test) ** 0.5)
return np.mean(scores)
示例15: MyExtraTreeReg
class MyExtraTreeReg(MyRegressor):
def __init__(self, params=dict()):
self._params = params
self._extree = ExtraTreesRegressor(**(self._params))
def update_params(self, updates):
self._params.update(updates)
self._extree = ExtraTreesRegressor(**(self._params))
def fit(self, Xtrain, ytrain):
self._extree.fit(Xtrain, ytrain)
def predict(self, Xtest, option = None):
return self._extree.predict(Xtest)
def plt_feature_importance(self, fname_list, f_range = list()):
importances = self._extree.feature_importances_
std = np.std([tree.feature_importances_ for tree in self._extree.estimators_], axis=0)
indices = np.argsort(importances)[::-1]
fname_array = np.array(fname_list)
if not f_range:
f_range = range(indices.shape[0])
n_f = len(f_range)
plt.figure()
plt.title("Extra Tree Feature importances")
plt.barh(range(n_f), importances[indices[f_range]],
color="b", xerr=std[indices[f_range]], ecolor='k',align="center")
plt.yticks(range(n_f), fname_array[indices[f_range]])
plt.ylim([-1, n_f])
plt.show()
def list_feature_importance(self, fname_list, f_range = list(), return_list = False):
importances = self._extree.feature_importances_
indices = np.argsort(importances)[::-1]
print 'Extra tree feature ranking:'
if not f_range :
f_range = range(indices.shape[0])
n_f = len(f_range)
for i in range(n_f):
f = f_range[i]
print '{0:d}. feature[{1:d}] {2:s} ({3:f})'.format(f + 1, indices[f], fname_list[indices[f]], importances[indices[f]])
if return_list:
return [indices[f_range[i]] for i in range(n_f)]