本文整理汇总了Python中sklearn.ensemble.RandomForestRegressor.get_params方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestRegressor.get_params方法的具体用法?Python RandomForestRegressor.get_params怎么用?Python RandomForestRegressor.get_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.RandomForestRegressor
的用法示例。
在下文中一共展示了RandomForestRegressor.get_params方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]
def main():
## Read in csv and correct formating that was lost in transition
mydf = read_data_csv()
#eliminate rows that have 1 or more missing values
mydf = mydf.dropna(axis=0)
#convert region to something numerical
numeric_regions = {
'Africa': 1,
'Asia': 2,
'Central America/ Caribbean': 3,
}
mydf['region_num'] = mydf['region'].map(numeric_regions)
###
predictor_names = ['week_day_num_posted','day_posted','maleness','region_num', \
'treat_cost','patient_age','smile_scale']
numfeat = len(predictor_names)
Y = mydf.dollars_per_day #variable to predict
X = mydf[predictor_names]
#Build classifier using "best" random forest
nfolds = 3 #number of folds to use for cross-validation
#n_estimators is number of trees in forest
#max_features is the number of features to consider when looking for best split
parameters = {'n_estimators':[10,100,1000], 'max_features':[3,5,7]} # rf parameters to try
njobs = 1 #number of jobs to run in parallel -- pickle problems may occur if njobs > 1
rf_tune = grid_search.GridSearchCV(RandomForestRegressor(), parameters,
n_jobs = njobs, cv = nfolds)
rf_opt = rf_tune.fit(X,Y)
#Results of the grid search for optimal random forest parameters.
print("Grid of scores:\n" + str(rf_opt.grid_scores_) + "\n")
print("Best zero-one score: " + str(rf_opt.best_score_) + "\n")
print("Optimal Model:\n" + str(rf_opt.best_estimator_) + "\n")
#print "Parameters of random forest:\n " , rf_opt.get_params()
#save optimal random forest regressor for future
#mypickledRF = open('RF_Regressor' , 'wb') #w is for write; b is for binary
#pickle.dump(rf_opt.best_estimator_ , mypickledRF) #Save classifier in file "RFclassifier"
#mypickledRF.close()
#Now use the optimal model's parameters to run random forest
#(I couldn't get feature importances directly from the GridSearchCV fit)
crf = RandomForestRegressor(n_jobs=njobs, max_features=3, n_estimators=1000).fit(X,Y)
print "Parameters used in chosen RF model:\n " , crf.get_params()
plotting_names = np.array(('Day','Date','Sex','Region','Cost','Age','Smile'))
print crf.feature_importances_
indices = np.argsort(crf.feature_importances_)[::-1][:numfeat]
plt.bar(xrange(numfeat), crf.feature_importances_[indices], align='center', alpha=.5)
plt.xticks(xrange(numfeat), plotting_names[indices], rotation='horizontal', fontsize=12)
plt.xlim([-1, numfeat])
plt.ylabel('Feature importances', fontsize=12)
plt.title('Feature importances computed by Random Forest', fontsize=16)
plt.savefig('03_feature_importance.png', dpi=150);
示例2: run_random_forest
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]
def run_random_forest(mydf):
print "\n************ Random Forest Results ************\n"
mydf = prepare_data_for_RF(mydf)
predictor_names = ['week_day_num_posted','day_posted','maleness','region_num', \
'treat_cost','patient_age','smile_scale']
numfeat = len(predictor_names)
Y = mydf.dollars_per_day #variable to predict
X = mydf[predictor_names]
#Build classifier using "best" random forest
nfolds = 3 #number of folds to use for cross-validation
#n_estimators is number of trees in forest
#max_features is the number of features to consider when looking for best split
parameters = {'n_estimators':[10,100,1000], 'max_features':[3,5,7]} # to try
njobs = 1 #number of jobs to run in parallel
rf_tune = grid_search.GridSearchCV(RandomForestRegressor(), parameters,
n_jobs = njobs, cv = nfolds)
rf_opt = rf_tune.fit(X,Y)
#Results of the grid search for optimal random forest parameters.
print("Grid of scores:\n" + str(rf_opt.grid_scores_) + "\n")
print("Best zero-one score: " + str(rf_opt.best_score_) + "\n")
print("Optimal Model:\n" + str(rf_opt.best_estimator_) + "\n")
#print "Parameters of random forest:\n " , rf_opt.get_params()
#Now use the optimal model's parameters to run random forest
#(I couldn't get feature importances directly from the GridSearchCV fit)
crf = RandomForestRegressor(
n_jobs=njobs, max_features=3, n_estimators=1000).fit(X,Y)
print "Parameters used in chosen RF model:\n " , crf.get_params()
plotting_names = np.array(('Day','Date','Sex','Region','Cost','Age','Smile'))
#print crf.feature_importances_
indices = np.argsort(crf.feature_importances_)[::-1][:numfeat]
plt.bar(xrange(numfeat), crf.feature_importances_[indices], \
align='center', alpha=.5)
plt.xticks(xrange(numfeat), plotting_names[indices], \
rotation='horizontal', fontsize=20)
plt.xlim([-1, numfeat])
plt.ylabel('Feature importances', fontsize=24)
plt.title('', fontsize=28)
plt.savefig('03_feature_importance_v2.pdf');
示例3: PostRankOptimization
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]
class PostRankOptimization(object):
"""
:param balanced:
:param visual_expansion_use:
:param re_score_alpha:
:param re_score_method_proportional:
:param regions: Define which of the regions to be considered upper body and which legs. If None, not used.
Must be of length 2 if defined.
Example: regions=[[0, 1], [2, 3, 4]]
:return:
"""
def __init__(self, balanced=False, visual_expansion_use=True, re_score_alpha=0.15,
re_score_proportional=True, regions=None, ve_estimators=20, ve_leafs=5): # OK
self.subject = -1 # The order of the person to be Re-identified by the user (Initially -1)
self.probe_name = ""
self.probe_selected = None # Already feature extracted
self.target_position = 0
self.iteration = 0
self.strong_negatives = []
self.weak_negatives = []
self.visual_expanded = []
self.new_strong_negatives = []
self.new_weak_negatives = []
self.new_visual_expanded = []
self.visual_expansion = RandomForestRegressor(n_estimators=ve_estimators, min_samples_leaf=ve_leafs,
n_jobs=-1) # As in POP
# regions = [[0], [1]]
if regions is None:
self.regions = [[0]]
self.regions_parts = 1
elif len(regions) == 2:
self.regions = regions
self.regions_parts = sum([len(e) for e in regions])
else:
raise ValueError("Regions size must be 2 (body region and legs region)")
self.size_for_each_region_in_fe = 0 # Initialized at initial iteration
self.execution = None
self.ranking_matrix = None
self.rank_list = None
self.comp_list = None
self.balanced = balanced
if not balanced:
self.use_visual_expansion = False
else:
self.use_visual_expansion = visual_expansion_use
self.re_score_alpha = re_score_alpha
self.re_score_proportional = re_score_proportional
def set_ex(self, ex, rm): # OK
self.execution = ex
self.ranking_matrix = rm
self.initial_iteration()
def new_samples(self, weak_negatives_index, strong_negatives_index, absolute_index=False): # OK
self.new_weak_negatives = [[e, idx] for [e, idx] in weak_negatives_index if
[e, idx] not in self.weak_negatives]
self.new_strong_negatives = [[e, idx] for [e, idx] in strong_negatives_index if
[e, idx] not in self.strong_negatives]
if not absolute_index:
self.new_weak_negatives = [[self.rank_list[e], idx] for [e, idx] in self.new_weak_negatives]
self.new_strong_negatives = [[self.rank_list[e], idx] for [e, idx] in self.new_strong_negatives]
def _generate_visual_expansion(self): # OK
n_estimators = self.visual_expansion.get_params()['n_estimators']
selected_len = round(float(n_estimators) * (2 / 3.))
selected = np.random.RandomState()
selected = selected.permutation(n_estimators)
selected = selected[:selected_len]
expansion = np.zeros_like(self.probe_selected)
for s in selected:
expansion += self.visual_expansion[s].predict(self.probe_selected).flatten()
expansion /= float(selected_len)
return expansion
def new_subject(self): # OK
if self.subject < self.execution.dataset.test_size:
self.subject += 1
self.probe_name = self.execution.dataset.probe.files_test[self.subject]
self.probe_name = "/".join(self.probe_name.split("/")[-2:])
self.probe_selected = self.execution.dataset.probe.fe_test[self.subject]
self.rank_list = self.ranking_matrix[self.subject].copy()
self.comp_list = self.execution.matching_matrix[self.subject].copy()
self._calc_target_position()
self.iteration = 0
self.strong_negatives = []
self.weak_negatives = []
self.visual_expanded = []
else:
return # TODO Control situation
def initial_iteration(self): # OK
self.new_subject()
self.size_for_each_region_in_fe = self.execution.dataset.gallery.fe_test.shape[1] / self.regions_parts
if self.use_visual_expansion:
self.visual_expansion.fit(self.execution.dataset.probe.fe_train, self.execution.dataset.gallery.fe_train)
#.........这里部分代码省略.........
示例4: str
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]
,{ 'fit' : svrFit, 'predict': scaledPredict, 'scaled': True, 'idx0': 3, 'idx1': 4, 'ratio': svmRatio }
,{ 'fit' : svrFit, 'predict': scaledPredict, 'scaled': True, 'idx0': 4, 'idx1': 5, 'ratio': svmRatio }
,{ 'fit' : svrFit, 'predict': scaledPredict, 'scaled': True, 'idx0': 6, 'idx1': 7, 'ratio': svmRatio }
]
#models2 = combine.combineTrain(X_test, y_test, models)
print "Training random forest..."
forestSize = 30
print "\t# Examples: \t\t" + str(len(X_train))
print "\tForest Size: \t\t" + str(forestSize)
start = time.time()
clf = RandomForestRegressor(n_estimators=forestSize, n_jobs=8)
clf = clf.fit(X_train, y_train)
print "\tTraining Complete"
print "\tTime: \t\t" + str(round(time.time() - start, 1)) + "s"
#Reset n_jobs to 1 because multicore evaluation is apparently hard
params = clf.get_params()
clf.set_params(n_jobs = 1)
print "\tRMSE: \t\t" + str(rmse(X_test, y_test, clf.predict, True))
#results = combine.combineTest(X_test, y_test, clf, models)
#def subPredict(X):
# return combine.combinePredict(X, clf, models)
submission(clf.predict, filters, pca.transform)
示例5: train_predict
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]
for clf in [clf_A, clf_B, clf_C, clf_D, clf_E, clf_F]:
train_predict(clf, X_train, y_train, X_valid, y_valid)'''
# RandomForestRegressor
parameters = {'n_estimators':(10,15,20),
'min_samples_split':(2,3,4),
'min_samples_leaf':(1,2,3)}
rfr = RandomForestRegressor(random_state=seed, warm_start=True)
score = make_scorer(mean_squared_error, greater_is_better=False)
grid_obj = GridSearchCV(rfr, param_grid=parameters, scoring=score, verbose=1, n_jobs=4, cv=5)
grid_obj= grid_obj.fit(X_train, y_train)
rfr = grid_obj.best_estimator_
print rfr.get_params(), '\n'
print "Tuned model has a training RMSE score of {:.4f}.".format(predict_labels(rfr, X_train, y_train))
print "Tuned model has a testing RMSE score of {:.4f}.".format(predict_labels(rfr, X_valid, y_valid))
# RidgeCV
ridge = RidgeCV(alphas=(1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1.0, 10.0), cv=5)
ridge = ridge.fit(X_train, y_train)
print ridge.get_params(), '\n'
print "Tuned model has a training RMSE score of {:.4f}.".format(predict_labels(ridge, X_train, y_train))
print "Tuned model has a testing RMSE score of {:.4f}.".format(predict_labels(ridge, X_valid, y_valid))
# Save regressors
pickle_file = 'regressor.pickle'
try:
f = open(pickle_file, 'wb')
示例6: RandomForestRegressor
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]
valid_X = X_train[int(sz[0] * frac):, :]
valid_Y = Y_train[int(sz[0] * frac):]
####################################################################################
####################################################################################
####################################################################################
#classifier
RFmodel = RandomForestRegressor(
n_estimators=1000, #number of trees to generate
n_jobs=1, #run in parallel on all cores
criterion="mse"
)
#train
RFmodel = RFmodel.fit(train_X, train_Y)
#get parameters
params=RFmodel.get_params()
#score on training set
acc_rate=RFmodel.score(train_X,train_Y)
print acc_rate
#feature importances
feat_imp=RFmodel.feature_importances_
df_train=pd.io.parsers.read_table('X_train.csv',sep=',',header=False)
col_names=list(df_train.columns)
feat_imp_dict={col_names[i]:feat_imp[i] for i in range(len(feat_imp))}
feat_imp_sort = sorted(feat_imp_dict.items(), key=operator.itemgetter(1))
y_out=RFmodel.predict(valid_X)
pred = np.array([np.max([0.0,x]) for x in y_out])
print ('prediction error=%f' % np.sqrt(sum( (pred[i]-valid_Y[i])**2 for i in range(len(valid_Y))) / float(len(valid_Y)) ))
示例7: TrainROI
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]
class TrainROI():
"""Train a regressor and test it on ROI loan data
"""
def __init__(self):
self.load_data()
self.calculate_roi()
self.convert_to_float()
self.split_by_grade()
self.create_targets_features()
self.split_train_test(train_size=0.8)
#self.balance()
self.X_train = self.X_train.drop(['loan_status', 'total_pymnt', 'roi'], 1).values
self.y_train = self.y_train.values
self.X_test = self.X_test.drop(['loan_status', 'total_pymnt', 'roi'], 1).values
self.y_test = self.y_test.values
def load_data(self):
fileName = 'data.pickle'
print "Loading %s" %fileName
f = open(fileName, 'rb')
self.loanData = pickle.load(f)
def calculate_roi(self):
self.loanData['roi'] = (self.loanData['total_pymnt']-self.loanData['funded_amnt'])/self.loanData['funded_amnt']
def convert_to_float(self):
self.loanData = self.loanData.astype(float)
def split_by_grade(self, grade='A'):
self.loans = self.loanData[self.loanData[grade] == 1]
self.loans = self.loans.drop(['A', 'B', 'C', 'D', 'E', 'F', 'G'], 1)
def split_train_test(self, train_size=0.8):
mask = np.random.rand(len(self.targets)) < train_size
self.X_train = self.features[mask]
self.y_train = self.targets[mask]
self.X_test = self.features[~mask]
self.y_test = self.targets[~mask]
print "Instances in training: ", len(self.X_train)
print "Instances in testing: ", len(self.X_test)
def scale(self):
self.scalerX = StandardScaler().fit(self.X_train)
self.X_train, self.X_test = self.scalerX.transform(self.X_train), \
self.scalerX.transform(self.X_test)
def standardize_samples(self):
##0 mean, unit variance
self.X_train = preprocessing.scale(self.X_train)
self.X_test = preprocessing.scale(self.X_test)
def scale_samples_to_range(self):
##Samples lie in range between 0 and 1
minMaxScaler = preprocessing.MinMaxScaler()
self.X_train = minMaxScaler.fit_transform(self.X_train)
self.X_test = minMaxScaler.fit_transform(self.X_test)
def balance(self):
"""Balances the training default and non-default instances"""
print "Total loans before balancing: ", len(self.X_train)
print "Defaults before balancing: ", np.sum(self.X_train['loan_status'] == 0)
defaults_added = 0
for i in range(1, len(self.X_train)):
loan = self.X_train[i-1:i]
loan_roi = self.y_train[i-1:i]
if int(loan['loan_status']) == 0:
for n in range(10): #replicate the loan multiple times
defaults_added += 1
if defaults_added%100 == 0:
print defaults_added
self.X_train = self.X_train.append(loan)
self.y_train = self.y_train.append(loan_roi)
print "Total loans after balancing: ", len(self.y_train)
print "Defaults after balancing: ", np.sum(self.X_train['loan_status'] == 0)
def create_targets_features(self):
self.targets = self.loans['roi']
self.features = self.loans
def define_linear_regressor(self):
self.regr = LinearRegression()
def define_SVR(self, C=1.0, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True,
probability=False, tol=0.01, cache_size=200, class_weight='auto', verbose=True,
max_iter=-1, random_state=None):
print "Using a Support Vector Machine Regressor ..."
self.regr = SVR(C=C, kernel=kernel, degree=degree, gamma=gamma, coef0=coef0, shrinking=shrinking,
probability=probability, tol=tol, cache_size=cache_size, verbose=verbose,
max_iter=max_iter, random_state=random_state)
print self.regr.get_params()
#.........这里部分代码省略.........
示例8: main_params
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]
#.........这里部分代码省略.........
m = GradientBoostingRegressor(n_estimators=m.best_params_["n_estimators"],
subsample=0.5, max_features=0.5, random_state=seed)
elif model_type == "class":
m = GradientBoostingClassifier(n_estimators=m.best_params_["n_estimators"],
subsample=0.5, max_features=0.5, random_state=seed)
models_lst.append(m)
if current_model == "svm":
# choosing optimal parameters
if model_type == "reg":
param_grid = {"C": [10 ** i for i in range(0, 5)],
"epsilon": [0.35, 0.3, 0.25, 0.2, 0.15, 0.1, 0.05, 0.01]}
m = ms.GridSearchCV(svm.SVR(kernel='rbf'), param_grid, n_jobs=ncores, cv=cv, refit=False,
verbose=verbose)
elif model_type == "class":
param_grid = {"C": [10 ** i for i in range(0, 5)],
"gamma": [10 ** i for i in range(-6, 0)]}
m = ms.GridSearchCV(svm.SVC(kernel='rbf', random_state=seed), param_grid, n_jobs=ncores, cv=cv, refit=False,
verbose=verbose)
m.fit(x[subset], y[subset])
# final model
if model_type == "reg":
m = svm.SVR(kernel='rbf', C=m.best_params_["C"], epsilon=m.best_params_["epsilon"])
elif model_type == "class":
m = svm.SVC(kernel='rbf', C=m.best_params_["C"], gamma=m.best_params_["gamma"],
probability=True, random_state=seed)
models_lst.append(m)
if current_model == "pls" and model_type == "reg":
# choosing optimal parameters
param_grid = {"n_components": [i for i in range(1, 8)]}
m = ms.GridSearchCV(PLSRegression(), param_grid, n_jobs=ncores, cv=cv, refit=False, verbose=verbose)
m.fit(x[subset], y[subset])
# final model
m = PLSRegression(n_components=m.best_params_["n_components"])
models_lst.append(m)
if current_model == "knn":
# choosing optimal parameters
param_grid = {"n_neighbors": [i for i in range(3, 21)]}
if model_type == "reg":
m = ms.GridSearchCV(KNeighborsRegressor(), param_grid, n_jobs=ncores, cv=cv, refit=False, verbose=verbose)
elif model_type == "class":
m = ms.GridSearchCV(KNeighborsClassifier(), param_grid, n_jobs=ncores, cv=cv, refit=False, verbose=verbose)
m.fit(x[subset], y[subset])
# final model
if model_type == "reg":
m = KNeighborsRegressor(n_neighbors=m.best_params_["n_neighbors"])
elif model_type == "class":
m = KNeighborsClassifier(n_neighbors=m.best_params_["n_neighbors"])
models_lst.append(m)
# return cv predictions
ncol = len(models_lst) + 1 if len(models_lst) > 1 else len(models_lst) # +1 column if consensus
cv_pred = np.column_stack((y, np.full((y.shape[0], ncol), np.nan)))
for i, (m, subset) in enumerate(zip(models_lst, subsets)):
pred = ms.cross_val_predict(estimator=m, X=x[subset], y=y[subset], cv=cv)
if current_model == 'pls': # reshape for pls because it returns 2d array and we need 1d
pred = pred.reshape(len(subset))
cv_pred[subset, i + 1] = pred
# build final model, save it and its stat
m.fit(x[subset], y[subset])
add_obj_to_file(os.path.join(models_dir, current_model + '.pkl'), m)
save_model_stat_2(current_model + '_%i' % i, model_stat_fname, str(m.get_params())[1:-1],
y[subset],
cv_pred[subset, i + 1],
model_type,
verbose)
# calc cv consensus and save stat
if model_type == "class" and len(models_lst) > 1:
cv_pred[:, -1] = np.apply_along_axis(get_major_vote, 1, cv_pred[:, 1:])
# cv_pred[:, -1] = np.around(np.nanmean(cv_pred[:, 1:], axis=1))
save_model_stat_2(current_model + "_consensus", model_stat_fname, "",
y,
cv_pred[:, -1],
model_type,
verbose)
# save cv predictions
if cv_predictions:
np.savetxt(os.path.join(models_dir, current_model + "_cv_pred.txt"),
np.column_stack([mol_names, np.round(cv_pred, 3)]),
fmt="%s",
delimiter="\t",
comments="",
header="Mol\tObs\t" +
"\t".join("%s_%i" % (current_model, i) for i in range(len(models_lst))) +
"\t" + current_model + "_consensus")
if verbose:
print(current_model.upper() + ' model was built\n')
示例9: main
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]
def main(arg1):
#print arg1
fname = '../EssentiaTrainFeatures/'+ arg1 #Liquids_To_UnvoicedPlosives.arff'
fname2 = './'+ arg1 #Liquids_To_UnvoicedPlosives.arff'
start = time()
try:
f = open(fname,'r')
except:
return('error')
#lines = f.readlines()[:]
#f.close()
#floats = []
#for line in lines:
# floats.append(shlex.split(line))
#array = np.asarray(floats)
#for (x,y), value in np.ndenumerate(array):
# if value == np.nan or value == 'NaN':
# array[x][y] = 0;
# elif value == np.infty:
# array[x][y] = 1;
array = np.loadtxt(f)
f.close()
array = np.nan_to_num(array)
#array = array.astype(np.float)
print 'Data size'
print np.shape(array)
#scale = StandardScaler()
#array = scale.fit_transform(array)
trainY = array[:,305]
trainX = np.delete(array, [302,303,304,305,306,307],1)
elapsed = time() - start
print 'Training array loading time'
print elapsed/60
f = open(fname2,'r')
#lines = f.readlines()[:]
#f.close()
#floats = []
#for line in lines:
# floats.append(shlex.split(line))
#array2 = np.asarray(floats)
#for (x,y), value in np.ndenumerate(array2):
# if value == np.nan or value == 'NaN':
# array2[x][y] = 0;
# elif value == np.infty:
# array2[x][y] = 2;
array2 = np.loadtxt(f)
f.close()
array2 = np.nan_to_num(array2)
#array2 = array2.astype(np.float)
print 'Test size'
print np.shape(array2)
#scale = StandardScaler()
#array = scale.fit_transform(array)
#traiY = array[:,38]
#Position = array2[:,36]
#Hmmboundary = array2[:,37]
#Manualboundary = array2[:,38]
hmm_true = array2[:,305]
hmmX = np.delete(array2, [302,303,304,305,306,307],1)
#trainY, realY, trainX, testX = train_test_split(traiY,traiX,test_size=0.8,random_state=42)
#Cost = np.power(2,np.arange(1,12));
#g = [0.5,0.25,0.125,0.0625,0.03125,0.015625,0.0078125,0.00390625,0.001953125,0.0009765625,0.00048828125,0.00048828125]
#print '\nCost values'
#print Cost
#print '\ngamma values'
#print g
#scorebest = 0
#Cbest = 0
#gammabest = 0
#model_to_set = NuSVR(C=32, cache_size=2048, coef0=0.0, degree=3, gamma=0.03125, kernel='rbf',
# max_iter=-1, nu=0.5, probability=False, shrinking=True, tol=0.001,
# verbose=True)
#parameters = {'C':Cost,'gamma':g}#,'nu':[0.5],'kernel':['rbf'],'verbose':[True]}
#k =[0.5,1]#2,5,7,8];
model_to_set = RandomForestRegressor(n_estimators=100, criterion='mse', max_depth=5000, min_samples_split=2000, min_samples_leaf=10,min_density=0.1, max_features='auto', bootstrap=True, compute_importances=False, oob_score=False, n_jobs=3, random_state=None, verbose=0)
#parameters = {'n_estimators':[10,100,500],'max_depth':[1,5,20,100,None],'min_samples_split':[1,5,20,100],}
#trainY, realY, trainX, testX = train_test_split(traiY,traiX,test_size=0,random_state=42)
print '\nparams'
print model_to_set.get_params()
start = time()
print '\ntraining start time'
print strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())
model_to_set.fit(trainX,trainY)
print '\ntraining end time'
print strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())
elapsed = (time() - start)
print elapsed/60
y_pred = model_to_set.predict(trainX)
#return(y_pred,trainY)
#score1 = model_to_set.score(trainX,trainY)
#print 'score1'
#print score1
#print 'Myscore1'
#print MyScore(trainY,y_pred)
#y_pred = model_to_set.predict(testX)
#score2 = model_to_set.score(testX,realY)
#.........这里部分代码省略.........