本文整理汇总了Python中sklearn.ensemble.ExtraTreesRegressor.transform方法的典型用法代码示例。如果您正苦于以下问题:Python ExtraTreesRegressor.transform方法的具体用法?Python ExtraTreesRegressor.transform怎么用?Python ExtraTreesRegressor.transform使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.ExtraTreesRegressor
的用法示例。
在下文中一共展示了ExtraTreesRegressor.transform方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ExtraTreesPreprocessorRegression
# 需要导入模块: from sklearn.ensemble import ExtraTreesRegressor [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesRegressor import transform [as 别名]
class ExtraTreesPreprocessorRegression(AutoSklearnPreprocessingAlgorithm):
def __init__(self, n_estimators, criterion, min_samples_leaf,
min_samples_split, max_features,
max_leaf_nodes_or_max_depth="max_depth",
bootstrap=False, max_leaf_nodes=None, max_depth="None",
min_weight_fraction_leaf=0.0,
oob_score=False, n_jobs=1, random_state=None, verbose=0):
self.n_estimators = int(n_estimators)
self.estimator_increment = 10
if criterion not in ("mse", ):
raise ValueError("'criterion' is not in ('mse', ): "
"%s" % criterion)
self.criterion = criterion
if max_leaf_nodes_or_max_depth == "max_depth":
self.max_leaf_nodes = None
if max_depth == "None":
self.max_depth = None
else:
self.max_depth = int(max_depth)
# if use_max_depth == "True":
# self.max_depth = int(max_depth)
#elif use_max_depth == "False":
# self.max_depth = None
else:
if max_leaf_nodes == "None":
self.max_leaf_nodes = None
else:
self.max_leaf_nodes = int(max_leaf_nodes)
self.max_depth = None
self.min_samples_leaf = int(min_samples_leaf)
self.min_samples_split = int(min_samples_split)
self.max_features = float(max_features)
if bootstrap == "True":
self.bootstrap = True
elif bootstrap == "False":
self.bootstrap = False
self.oob_score = oob_score
self.n_jobs = int(n_jobs)
self.random_state = random_state
self.verbose = int(verbose)
self.preprocessor = None
def fit(self, X, Y):
from sklearn.ensemble import ExtraTreesRegressor
num_features = X.shape[1]
max_features = int(
float(self.max_features) * (np.log(num_features) + 1))
# Use at most half of the features
max_features = max(1, min(int(X.shape[1] / 2), max_features))
self.preprocessor = ExtraTreesRegressor(
n_estimators=self.n_estimators, criterion=self.criterion,
max_depth=self.max_depth, min_samples_split=self.min_samples_split,
min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap,
max_features=max_features, max_leaf_nodes=self.max_leaf_nodes,
oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose,
random_state=self.random_state)
self.preprocessor.fit(X, Y)
return self
def transform(self, X):
if self.preprocessor is None:
raise NotImplementedError
return self.preprocessor.transform(X)
@staticmethod
def get_properties(dataset_properties=None):
return {'shortname': 'ETR',
'name': 'Extra Trees Regressor Preprocessing',
'handles_regression': True,
'handles_classification': False,
'handles_multiclass': False,
'handles_multilabel': False,
'is_deterministic': True,
'input': (DENSE, SPARSE, UNSIGNED_DATA),
'output': (INPUT,)}
@staticmethod
def get_hyperparameter_search_space(dataset_properties=None):
cs = ConfigurationSpace()
n_estimators = cs.add_hyperparameter(Constant("n_estimators", 100))
criterion = cs.add_hyperparameter(Constant("criterion", "mse"))
max_features = cs.add_hyperparameter(UniformFloatHyperparameter(
"max_features", 0.5, 5, default=1))
max_depth = cs.add_hyperparameter(
UnParametrizedHyperparameter(name="max_depth", value="None"))
min_samples_split = cs.add_hyperparameter(UniformIntegerHyperparameter(
"min_samples_split", 2, 20, default=2))
min_samples_leaf = cs.add_hyperparameter(UniformIntegerHyperparameter(
#.........这里部分代码省略.........
示例2: open
# 需要导入模块: from sklearn.ensemble import ExtraTreesRegressor [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesRegressor import transform [as 别名]
with open(out_filename+'_estimators_.txt','wt') as f:
#f.write(xfr.estimators_)
print >> f, xfr.estimators_
np.savetxt(out_filename+'_feature_importances_.txt',xfr.feature_importances_)
print data_train.columns.shape,xfr.feature_importances_.shape
with open(out_filename+'_fimp.txt','wt') as f:
for feat,imp in zip(data_train.columns,xfr.feature_importances_):
print >>f,"%s,%g"%(feat,imp)
#with open(out_filename+'_feature_importances_.txt','wt') as f:
#print >> f, xfr.feature_importances_
#with open('oob_score_.txt','wt') as f:
#print >> f, xfr.oob_score_
#with open('oob_prediction_.txt','wt') as f:
#print >> f, xfr.oob_prediction_
transformed_train = xfr.transform(data_train,threshold="0.4*mean")
transformed_test = xfr.transform(data_test,threshold="0.4*mean")
end = time.clock()
print >> log, "time = ", end-start
suffix = '_tr.csv'
train_filename = (os.path.splitext(os.path.basename(sys.argv[1]))[0]+suffix)
train = pd.DataFrame(transformed_train)
train = pd.concat([data_train_in.ix[:,'target'],train],axis=1)
train = pd.concat([data_train_in.ix[:,'id'],train],axis=1)
train.to_csv(train_filename,index=0)
test_filename = (os.path.splitext(os.path.basename(sys.argv[2]))[0]+suffix)
test = pd.DataFrame(transformed_test)
if 'target' in data_test_in:
test = pd.concat([data_test_in.ix[:,'target'],test],axis=1)
示例3: __init__
# 需要导入模块: from sklearn.ensemble import ExtraTreesRegressor [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesRegressor import transform [as 别名]
class mixmodels:
def __init__(self,nest=10):
self.nest = nest
def fit(self,data_train,target):
self.target_train = target
self.catcol = data_train.filter(like='var').columns.tolist()
#start_gbr_tr = time.clock()
self.gbr = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
self.gbr.fit(data_train,self.target_train)
self.transformed_train_gbr = self.gbr.transform(data_train,threshold="0.35*mean")
self.gbr_tr_fit = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
self.gbr_tr_fit.fit(self.transformed_train_gbr,self.target_train)
#end_gbr_tr = time.clock()
#print >> log, "time_gbr_tr = ", end_gbr_tr-start_gbr_tr
#start_xfr_tr = time.clock()
self.xfr= ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
self.xfr.fit(data_train,self.target_train)
self.transformed_train_xfr = self.xfr.transform(data_train,threshold="0.35*mean")
self.xfr_tr_fit = ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
self.xfr_tr_fit.fit(self.transformed_train_xfr,self.target_train)
#end_xfr_tr = time.clock()
#print >> log, "time_xfr_tr = ", end_xfr_tr-start_xfr_tr
#start_gbr_cat = time.clock()
self.gbr_cat_fit = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
self.gbr_cat_fit.fit(data_train[self.catcol],self.target_train)
#end_gbr_cat = time.clock()
#print >> log, "time_gbr_cat = ", end_gbr_cat-start_gbr_cat
#start_xfr_cat = time.clock()
self.xfr_cat_fit = ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
self.xfr_cat_fit.fit(data_train[self.catcol],self.target_train)
#end_xfr_cat = time.clock()
#print >> log, "time_xfr_cat = ", end_xfr_cat-start_xfr_cat
return self
def predict(self,data_test):
mix_test_list = []
transformed_test_gbr = self.gbr.transform(data_test,threshold="0.35*mean")
mix_test_list += [pd.Series(self.gbr_tr_fit.predict(transformed_test_gbr))]
transformed_test_xfr = self.xfr.transform(data_test,threshold="0.35*mean")
mix_test_list += [pd.Series(self.xfr_tr_fit.predict(transformed_test_xfr))]
mix_test_list += [pd.Series(self.gbr_cat_fit.predict(data_test[self.catcol]))]
mix_test_list += [pd.Series(self.xfr_cat_fit.predict(data_test[self.catcol]))]
mix_test = pd.concat(mix_test_list,1)
mix_ave = mix_test.mean(1)
mix_ave.name='target'
return mix_ave
def score(self,data_test,target_test):
total_score = []
transformed_test_gbr = self.gbr.transform(data_test,threshold="0.35*mean")
total_score += [ self.gbr_tr_fit.score(transformed_test_gbr,target_test) ]
transformed_test_xfr = self.xfr.transform(data_test,threshold="0.35*mean")
total_score += [ self.xfr_tr_fit.score(transformed_test_xfr,target_test) ]
total_score += [ self.gbr_cat_fit.score(data_test[self.catcol],target_test) ]
total_score += [ self.xfr_cat_fit.score(data_test[self.catcol],target_test) ]
return sum(total_score)/float(len(total_score))
def gini(self,data_test,target_test):
weight = data_test.var11
gns = []
transformed_test_gbr = self.gbr.transform(data_test,threshold="0.35*mean")
gns += [normalized_weighted_gini(target_test.tolist(),self.gbr_tr_fit.predict(transformed_test_gbr).tolist(),weight.tolist()) ]
transformed_test_xfr = self.xfr.transform(data_test,threshold="0.35*mean")
gns += [normalized_weighted_gini(target_test.tolist(),self.xfr_tr_fit.predict(transformed_test_xfr).tolist(),weight.tolist()) ]
gns += [normalized_weighted_gini(target_test.tolist(),self.gbr_cat_fit.predict(data_test[self.catcol]).tolist(),weight.tolist()) ]
gns += [normalized_weighted_gini(target_test.tolist(),self.xfr_cat_fit.predict(data_test[self.catcol]).tolist(),weight.tolist()) ]
return sum(gns)/float(len(gns))