当前位置: 首页>>代码示例>>Python>>正文


Python RandomForestRegressor.get_params方法代码示例

本文整理汇总了Python中sklearn.ensemble.RandomForestRegressor.get_params方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestRegressor.get_params方法的具体用法?Python RandomForestRegressor.get_params怎么用?Python RandomForestRegressor.get_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.RandomForestRegressor的用法示例。


在下文中一共展示了RandomForestRegressor.get_params方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]
def main():

    ## Read in csv and correct formating that was lost in transition
    mydf = read_data_csv()

    #eliminate rows that have 1 or more missing values
    mydf = mydf.dropna(axis=0)
    #convert region to something numerical
    numeric_regions = {
        'Africa': 1,
        'Asia': 2,
        'Central America/ Caribbean': 3,
    }
    mydf['region_num'] = mydf['region'].map(numeric_regions)
    
    ###

    predictor_names = ['week_day_num_posted','day_posted','maleness','region_num', \
                      'treat_cost','patient_age','smile_scale']
    numfeat = len(predictor_names)
    Y = mydf.dollars_per_day #variable to predict
    X = mydf[predictor_names]
    
    #Build classifier using "best" random forest
    nfolds = 3 #number of folds to use for cross-validation
    #n_estimators is number of trees in forest
    #max_features is the number of features to consider when looking for best split
    parameters = {'n_estimators':[10,100,1000],  'max_features':[3,5,7]} # rf parameters to try
    njobs = 1 #number of jobs to run in parallel -- pickle problems may occur if njobs > 1
    rf_tune = grid_search.GridSearchCV(RandomForestRegressor(), parameters,
                             n_jobs = njobs, cv = nfolds)
    rf_opt = rf_tune.fit(X,Y)
    
    #Results of the grid search for optimal random forest parameters.
    print("Grid of scores:\n" + str(rf_opt.grid_scores_) + "\n")
    print("Best zero-one score: " + str(rf_opt.best_score_) + "\n")
    print("Optimal Model:\n" + str(rf_opt.best_estimator_) + "\n")
    #print "Parameters of random forest:\n " , rf_opt.get_params()

    #save optimal random forest regressor for future
    #mypickledRF = open('RF_Regressor' , 'wb') #w is for write; b is for binary
    #pickle.dump(rf_opt.best_estimator_ , mypickledRF) #Save classifier in file "RFclassifier"
    #mypickledRF.close()

    #Now use the optimal model's parameters to run random forest
        #(I couldn't get feature importances directly from the GridSearchCV fit)
    crf = RandomForestRegressor(n_jobs=njobs, max_features=3, n_estimators=1000).fit(X,Y) 
    print "Parameters used in chosen RF model:\n " , crf.get_params()

    plotting_names = np.array(('Day','Date','Sex','Region','Cost','Age','Smile'))
    print crf.feature_importances_
    indices = np.argsort(crf.feature_importances_)[::-1][:numfeat]
    plt.bar(xrange(numfeat), crf.feature_importances_[indices], align='center', alpha=.5)
    plt.xticks(xrange(numfeat), plotting_names[indices], rotation='horizontal', fontsize=12)
    plt.xlim([-1, numfeat])
    plt.ylabel('Feature importances', fontsize=12)
    plt.title('Feature importances computed by Random Forest', fontsize=16)
    plt.savefig('03_feature_importance.png', dpi=150);
开发者ID:cotterman,项目名称:Astro250_Python-for-Data-Scientists,代码行数:60,代码来源:experiment.py

示例2: run_random_forest

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]
def run_random_forest(mydf):

    print "\n************ Random Forest Results ************\n"

    mydf = prepare_data_for_RF(mydf)   
    
    predictor_names = ['week_day_num_posted','day_posted','maleness','region_num', \
                      'treat_cost','patient_age','smile_scale']
    numfeat = len(predictor_names)
    Y = mydf.dollars_per_day #variable to predict
    X = mydf[predictor_names]
    
    #Build classifier using "best" random forest
    nfolds = 3 #number of folds to use for cross-validation
    #n_estimators is number of trees in forest
    #max_features is the number of features to consider when looking for best split
    parameters = {'n_estimators':[10,100,1000],  'max_features':[3,5,7]} # to try
    njobs = 1 #number of jobs to run in parallel
    rf_tune = grid_search.GridSearchCV(RandomForestRegressor(), parameters,
                             n_jobs = njobs, cv = nfolds)
    rf_opt = rf_tune.fit(X,Y)
    
    #Results of the grid search for optimal random forest parameters.
    print("Grid of scores:\n" + str(rf_opt.grid_scores_) + "\n")
    print("Best zero-one score: " + str(rf_opt.best_score_) + "\n")
    print("Optimal Model:\n" + str(rf_opt.best_estimator_) + "\n")
    #print "Parameters of random forest:\n " , rf_opt.get_params()

    #Now use the optimal model's parameters to run random forest
        #(I couldn't get feature importances directly from the GridSearchCV fit)
    crf = RandomForestRegressor(
        n_jobs=njobs, max_features=3, n_estimators=1000).fit(X,Y) 
    print "Parameters used in chosen RF model:\n " , crf.get_params()

    plotting_names = np.array(('Day','Date','Sex','Region','Cost','Age','Smile'))
    #print crf.feature_importances_
    indices = np.argsort(crf.feature_importances_)[::-1][:numfeat]
    plt.bar(xrange(numfeat), crf.feature_importances_[indices], \
        align='center', alpha=.5)
    plt.xticks(xrange(numfeat), plotting_names[indices], \
        rotation='horizontal', fontsize=20)
    plt.xlim([-1, numfeat])
    plt.ylabel('Feature importances', fontsize=24)
    plt.title('', fontsize=28)
    plt.savefig('03_feature_importance_v2.pdf');
开发者ID:cotterman,项目名称:Astro250_Python-for-Data-Scientists,代码行数:47,代码来源:Watsi_data_analytics.py

示例3: PostRankOptimization

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]
class PostRankOptimization(object):
    """

    :param balanced:
    :param visual_expansion_use:
    :param re_score_alpha:
    :param re_score_method_proportional:
    :param regions: Define which of the regions to be considered upper body and which legs. If None, not used.
           Must be of length 2 if defined.
           Example: regions=[[0, 1], [2, 3, 4]]
    :return:
    """

    def __init__(self, balanced=False, visual_expansion_use=True, re_score_alpha=0.15,
                 re_score_proportional=True, regions=None, ve_estimators=20, ve_leafs=5):  # OK
        self.subject = -1  # The order of the person to be Re-identified by the user (Initially -1)
        self.probe_name = ""
        self.probe_selected = None  # Already feature extracted
        self.target_position = 0
        self.iteration = 0
        self.strong_negatives = []
        self.weak_negatives = []
        self.visual_expanded = []
        self.new_strong_negatives = []
        self.new_weak_negatives = []
        self.new_visual_expanded = []
        self.visual_expansion = RandomForestRegressor(n_estimators=ve_estimators, min_samples_leaf=ve_leafs,
                                                      n_jobs=-1)  # As in POP

        # regions = [[0], [1]]
        if regions is None:
            self.regions = [[0]]
            self.regions_parts = 1
        elif len(regions) == 2:
            self.regions = regions
            self.regions_parts = sum([len(e) for e in regions])
        else:
            raise ValueError("Regions size must be 2 (body region and legs region)")
        self.size_for_each_region_in_fe = 0  # Initialized at initial iteration

        self.execution = None
        self.ranking_matrix = None
        self.rank_list = None
        self.comp_list = None
        self.balanced = balanced
        if not balanced:
            self.use_visual_expansion = False
        else:
            self.use_visual_expansion = visual_expansion_use
        self.re_score_alpha = re_score_alpha
        self.re_score_proportional = re_score_proportional

    def set_ex(self, ex, rm):  # OK
        self.execution = ex
        self.ranking_matrix = rm
        self.initial_iteration()

    def new_samples(self, weak_negatives_index, strong_negatives_index, absolute_index=False):  # OK
        self.new_weak_negatives = [[e, idx] for [e, idx] in weak_negatives_index if
                                   [e, idx] not in self.weak_negatives]
        self.new_strong_negatives = [[e, idx] for [e, idx] in strong_negatives_index if
                                     [e, idx] not in self.strong_negatives]
        if not absolute_index:
            self.new_weak_negatives = [[self.rank_list[e], idx] for [e, idx] in self.new_weak_negatives]
            self.new_strong_negatives = [[self.rank_list[e], idx] for [e, idx] in self.new_strong_negatives]

    def _generate_visual_expansion(self):  # OK
        n_estimators = self.visual_expansion.get_params()['n_estimators']
        selected_len = round(float(n_estimators) * (2 / 3.))
        selected = np.random.RandomState()
        selected = selected.permutation(n_estimators)
        selected = selected[:selected_len]
        expansion = np.zeros_like(self.probe_selected)
        for s in selected:
            expansion += self.visual_expansion[s].predict(self.probe_selected).flatten()
        expansion /= float(selected_len)
        return expansion

    def new_subject(self):  # OK
        if self.subject < self.execution.dataset.test_size:
            self.subject += 1
            self.probe_name = self.execution.dataset.probe.files_test[self.subject]
            self.probe_name = "/".join(self.probe_name.split("/")[-2:])
            self.probe_selected = self.execution.dataset.probe.fe_test[self.subject]
            self.rank_list = self.ranking_matrix[self.subject].copy()
            self.comp_list = self.execution.matching_matrix[self.subject].copy()
            self._calc_target_position()
            self.iteration = 0
            self.strong_negatives = []
            self.weak_negatives = []
            self.visual_expanded = []
        else:
            return  # TODO Control situation

    def initial_iteration(self):  # OK
        self.new_subject()
        self.size_for_each_region_in_fe = self.execution.dataset.gallery.fe_test.shape[1] / self.regions_parts
        if self.use_visual_expansion:
            self.visual_expansion.fit(self.execution.dataset.probe.fe_train, self.execution.dataset.gallery.fe_train)

#.........这里部分代码省略.........
开发者ID:AShedko,项目名称:PyReID,代码行数:103,代码来源:post_ranker.py

示例4: str

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]
    ,{ 'fit' : svrFit, 'predict': scaledPredict, 'scaled': True, 'idx0': 3, 'idx1': 4, 'ratio': svmRatio }
    ,{ 'fit' : svrFit, 'predict': scaledPredict, 'scaled': True, 'idx0': 4, 'idx1': 5, 'ratio': svmRatio }
    ,{ 'fit' : svrFit, 'predict': scaledPredict, 'scaled': True, 'idx0': 6, 'idx1': 7, 'ratio': svmRatio }
]

#models2 = combine.combineTrain(X_test, y_test, models)

print "Training random forest..."
forestSize = 30
print "\t# Examples: \t\t" + str(len(X_train)) 
print "\tForest Size: \t\t" + str(forestSize)
start = time.time()
clf = RandomForestRegressor(n_estimators=forestSize, n_jobs=8)
clf = clf.fit(X_train, y_train)
print "\tTraining Complete" 
print "\tTime: \t\t" + str(round(time.time() - start, 1)) + "s"

#Reset n_jobs to 1 because multicore evaluation is apparently hard
params = clf.get_params()
clf.set_params(n_jobs = 1)

print "\tRMSE: \t\t" + str(rmse(X_test, y_test, clf.predict, True))
#results = combine.combineTest(X_test, y_test, clf, models)



#def subPredict(X):
#    return combine.combinePredict(X, clf, models)
submission(clf.predict, filters, pca.transform)

开发者ID:mmcdermo,项目名称:142-galaxy,代码行数:31,代码来源:structureForest.py

示例5: train_predict

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]
for clf in [clf_A, clf_B, clf_C, clf_D, clf_E, clf_F]:
    train_predict(clf, X_train, y_train, X_valid, y_valid)'''
    

# RandomForestRegressor
parameters = {'n_estimators':(10,15,20),
              'min_samples_split':(2,3,4),
              'min_samples_leaf':(1,2,3)}

rfr = RandomForestRegressor(random_state=seed, warm_start=True)
score = make_scorer(mean_squared_error, greater_is_better=False)
grid_obj = GridSearchCV(rfr, param_grid=parameters, scoring=score, verbose=1, n_jobs=4, cv=5)
grid_obj= grid_obj.fit(X_train, y_train)
rfr = grid_obj.best_estimator_
print rfr.get_params(), '\n'
print "Tuned model has a training RMSE score of {:.4f}.".format(predict_labels(rfr, X_train, y_train))
print "Tuned model has a testing RMSE score of {:.4f}.".format(predict_labels(rfr, X_valid, y_valid))

# RidgeCV
ridge = RidgeCV(alphas=(1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1.0, 10.0), cv=5)
ridge = ridge.fit(X_train, y_train)
print ridge.get_params(), '\n'
print "Tuned model has a training RMSE score of {:.4f}.".format(predict_labels(ridge, X_train, y_train))
print "Tuned model has a testing RMSE score of {:.4f}.".format(predict_labels(ridge, X_valid, y_valid))

# Save regressors
pickle_file = 'regressor.pickle'

try:
  f = open(pickle_file, 'wb')
开发者ID:hangyao,项目名称:StateFarm,代码行数:32,代码来源:StateFarmCode.py

示例6: RandomForestRegressor

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]
valid_X = X_train[int(sz[0] * frac):, :]
valid_Y = Y_train[int(sz[0] * frac):]
####################################################################################
####################################################################################
####################################################################################
#classifier
RFmodel = RandomForestRegressor(
        n_estimators=1000,        #number of trees to generate
        n_jobs=1,               #run in parallel on all cores
        criterion="mse"
        )

#train
RFmodel = RFmodel.fit(train_X, train_Y)
#get parameters
params=RFmodel.get_params()
#score on training set
acc_rate=RFmodel.score(train_X,train_Y)
print acc_rate
#feature importances
feat_imp=RFmodel.feature_importances_
df_train=pd.io.parsers.read_table('X_train.csv',sep=',',header=False)
col_names=list(df_train.columns)
feat_imp_dict={col_names[i]:feat_imp[i] for i in range(len(feat_imp))}
feat_imp_sort = sorted(feat_imp_dict.items(), key=operator.itemgetter(1))


y_out=RFmodel.predict(valid_X)
pred = np.array([np.max([0.0,x]) for x in y_out])
print ('prediction error=%f' % np.sqrt(sum( (pred[i]-valid_Y[i])**2 for i in range(len(valid_Y))) / float(len(valid_Y)) ))
开发者ID:golbeck,项目名称:Kaggle,代码行数:32,代码来源:RF_v1.py

示例7: TrainROI

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]
class TrainROI():
	"""Train a regressor and test it on ROI loan data

	"""

	def __init__(self):
		self.load_data()
		self.calculate_roi()
		self.convert_to_float()
		self.split_by_grade()

		self.create_targets_features()
		self.split_train_test(train_size=0.8)
		#self.balance()

		self.X_train = self.X_train.drop(['loan_status', 'total_pymnt', 'roi'], 1).values
		self.y_train = self.y_train.values
		self.X_test = self.X_test.drop(['loan_status', 'total_pymnt', 'roi'], 1).values
		self.y_test = self.y_test.values


	def load_data(self):
		fileName = 'data.pickle'
		print "Loading %s" %fileName
		f = open(fileName, 'rb')
		self.loanData = pickle.load(f)

	def calculate_roi(self):
		self.loanData['roi'] = (self.loanData['total_pymnt']-self.loanData['funded_amnt'])/self.loanData['funded_amnt']

	def convert_to_float(self):
		self.loanData = self.loanData.astype(float)

	def split_by_grade(self, grade='A'):
		self.loans = self.loanData[self.loanData[grade] == 1]
		self.loans = self.loans.drop(['A', 'B', 'C', 'D', 'E', 'F', 'G'], 1)


	def split_train_test(self, train_size=0.8):
		mask = np.random.rand(len(self.targets)) < train_size
		self.X_train = self.features[mask]
		self.y_train = self.targets[mask]
		self.X_test = self.features[~mask]
		self.y_test = self.targets[~mask]

		print "Instances in training: ", len(self.X_train)
		print "Instances in testing: ", len(self.X_test)


	def scale(self):
		self.scalerX = StandardScaler().fit(self.X_train)
		self.X_train, self.X_test = self.scalerX.transform(self.X_train), \
									self.scalerX.transform(self.X_test)

	def standardize_samples(self):
		##0 mean, unit variance
		self.X_train = preprocessing.scale(self.X_train)
		self.X_test = preprocessing.scale(self.X_test)

	def scale_samples_to_range(self):
		##Samples lie in range between 0 and 1
		minMaxScaler = preprocessing.MinMaxScaler()
		self.X_train = minMaxScaler.fit_transform(self.X_train)
		self.X_test = minMaxScaler.fit_transform(self.X_test)

	def balance(self):
		"""Balances the training default and non-default instances"""
		print "Total loans before balancing: ", len(self.X_train)
		print "Defaults before balancing: ", np.sum(self.X_train['loan_status'] == 0)
		defaults_added = 0
		for i in range(1, len(self.X_train)):
			loan = self.X_train[i-1:i]
			loan_roi = self.y_train[i-1:i]
			if int(loan['loan_status']) == 0:
				for n in range(10): 	#replicate the loan multiple times
					defaults_added += 1
					if defaults_added%100 == 0:
						print defaults_added
					self.X_train = self.X_train.append(loan)
					self.y_train = self.y_train.append(loan_roi)
		print "Total loans after balancing: ", len(self.y_train)
		print "Defaults after balancing: ", np.sum(self.X_train['loan_status'] == 0)

	def create_targets_features(self):
		self.targets = self.loans['roi']
		self.features = self.loans

	def define_linear_regressor(self):
		self.regr = LinearRegression()

	def define_SVR(self, C=1.0, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, 
				  probability=False, tol=0.01, cache_size=200, class_weight='auto', verbose=True, 
				  max_iter=-1, random_state=None):
		print "Using a Support Vector Machine Regressor ..."
		self.regr = SVR(C=C, kernel=kernel, degree=degree, gamma=gamma, coef0=coef0, shrinking=shrinking, 
				  probability=probability, tol=tol, cache_size=cache_size, verbose=verbose, 
				  max_iter=max_iter, random_state=random_state)

		print self.regr.get_params()

#.........这里部分代码省略.........
开发者ID:mhdella,项目名称:LendingLounge,代码行数:103,代码来源:train_roi.py

示例8: main_params

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]

#.........这里部分代码省略.........
                    m = GradientBoostingRegressor(n_estimators=m.best_params_["n_estimators"],
                                                  subsample=0.5, max_features=0.5, random_state=seed)
                elif model_type == "class":
                    m = GradientBoostingClassifier(n_estimators=m.best_params_["n_estimators"],
                                                   subsample=0.5, max_features=0.5, random_state=seed)
                models_lst.append(m)

            if current_model == "svm":

                # choosing optimal parameters
                if model_type == "reg":
                    param_grid = {"C": [10 ** i for i in range(0, 5)],
                                  "epsilon": [0.35, 0.3, 0.25, 0.2, 0.15, 0.1, 0.05, 0.01]}
                    m = ms.GridSearchCV(svm.SVR(kernel='rbf'), param_grid, n_jobs=ncores, cv=cv, refit=False,
                                        verbose=verbose)
                elif model_type == "class":
                    param_grid = {"C": [10 ** i for i in range(0, 5)],
                                  "gamma": [10 ** i for i in range(-6, 0)]}
                    m = ms.GridSearchCV(svm.SVC(kernel='rbf', random_state=seed), param_grid, n_jobs=ncores, cv=cv, refit=False,
                                        verbose=verbose)
                m.fit(x[subset], y[subset])

                # final model
                if model_type == "reg":
                    m = svm.SVR(kernel='rbf', C=m.best_params_["C"], epsilon=m.best_params_["epsilon"])
                elif model_type == "class":
                    m = svm.SVC(kernel='rbf', C=m.best_params_["C"], gamma=m.best_params_["gamma"],
                                probability=True, random_state=seed)
                models_lst.append(m)

            if current_model == "pls" and model_type == "reg":

                # choosing optimal parameters
                param_grid = {"n_components": [i for i in range(1, 8)]}
                m = ms.GridSearchCV(PLSRegression(), param_grid, n_jobs=ncores, cv=cv, refit=False, verbose=verbose)
                m.fit(x[subset], y[subset])

                # final model
                m = PLSRegression(n_components=m.best_params_["n_components"])
                models_lst.append(m)

            if current_model == "knn":

                # choosing optimal parameters
                param_grid = {"n_neighbors": [i for i in range(3, 21)]}
                if model_type == "reg":
                    m = ms.GridSearchCV(KNeighborsRegressor(), param_grid, n_jobs=ncores, cv=cv, refit=False, verbose=verbose)
                elif model_type == "class":
                    m = ms.GridSearchCV(KNeighborsClassifier(), param_grid, n_jobs=ncores, cv=cv, refit=False, verbose=verbose)
                m.fit(x[subset], y[subset])

                # final model
                if model_type == "reg":
                    m = KNeighborsRegressor(n_neighbors=m.best_params_["n_neighbors"])
                elif model_type == "class":
                    m = KNeighborsClassifier(n_neighbors=m.best_params_["n_neighbors"])
                models_lst.append(m)

        # return cv predictions
        ncol = len(models_lst) + 1 if len(models_lst) > 1 else len(models_lst)   # +1 column if consensus
        cv_pred = np.column_stack((y, np.full((y.shape[0], ncol), np.nan)))

        for i, (m, subset) in enumerate(zip(models_lst, subsets)):
            pred = ms.cross_val_predict(estimator=m, X=x[subset], y=y[subset], cv=cv)
            if current_model == 'pls':   # reshape for pls because it returns 2d array and we need 1d
                pred = pred.reshape(len(subset))
            cv_pred[subset, i + 1] = pred

            # build final model, save it and its stat
            m.fit(x[subset], y[subset])
            add_obj_to_file(os.path.join(models_dir, current_model + '.pkl'), m)
            save_model_stat_2(current_model + '_%i' % i, model_stat_fname, str(m.get_params())[1:-1],
                              y[subset],
                              cv_pred[subset, i + 1],
                              model_type,
                              verbose)

        # calc cv consensus and save stat
        if model_type == "class" and len(models_lst) > 1:
            cv_pred[:, -1] = np.apply_along_axis(get_major_vote, 1, cv_pred[:, 1:])
            # cv_pred[:, -1] = np.around(np.nanmean(cv_pred[:, 1:], axis=1))
            save_model_stat_2(current_model + "_consensus", model_stat_fname, "",
                              y,
                              cv_pred[:, -1],
                              model_type,
                              verbose)

        # save cv predictions
        if cv_predictions:
            np.savetxt(os.path.join(models_dir, current_model + "_cv_pred.txt"),
                       np.column_stack([mol_names, np.round(cv_pred, 3)]),
                       fmt="%s",
                       delimiter="\t",
                       comments="",
                       header="Mol\tObs\t" +
                              "\t".join("%s_%i" % (current_model, i) for i in range(len(models_lst))) +
                              "\t" + current_model + "_consensus")

        if verbose:
            print(current_model.upper() + ' model was built\n')
开发者ID:DrrDom,项目名称:spci,代码行数:104,代码来源:model.py

示例9: main

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import get_params [as 别名]
def main(arg1):
	#print arg1
	fname = '../EssentiaTrainFeatures/'+ arg1 #Liquids_To_UnvoicedPlosives.arff'
	fname2 = './'+ arg1	#Liquids_To_UnvoicedPlosives.arff'
	start = time()
	try:
		f = open(fname,'r')
	except:
		return('error')
	#lines = f.readlines()[:]
	#f.close()       
	#floats = []
	#for line in lines:   
	#	floats.append(shlex.split(line))
	
	#array = np.asarray(floats)
	#for (x,y), value in np.ndenumerate(array):
	#	if value == np.nan or value == 'NaN':
	#		array[x][y] = 0;
	#	elif value == np.infty:
	#		array[x][y] = 1;
	array = np.loadtxt(f)
	f.close()
	array = np.nan_to_num(array)
	#array = array.astype(np.float)
	print 'Data size'
	print np.shape(array)
	#scale = StandardScaler()
	#array = scale.fit_transform(array)
	trainY = array[:,305]
	trainX = np.delete(array, [302,303,304,305,306,307],1)
	elapsed = time() - start
	print 'Training array loading time'
	print elapsed/60
	f = open(fname2,'r')
	#lines = f.readlines()[:]
	#f.close()       
	#floats = []
	#for line in lines:     
	#	floats.append(shlex.split(line))
	#array2 = np.asarray(floats)
	#for (x,y), value in np.ndenumerate(array2):
	#	if value == np.nan or value == 'NaN':
	#		array2[x][y] = 0;
         #       elif value == np.infty:
          #              array2[x][y] = 2;
	array2 = np.loadtxt(f)
	f.close()
	array2 = np.nan_to_num(array2)
	#array2 = array2.astype(np.float)
	print 'Test size'
	print np.shape(array2)
	#scale = StandardScaler()
	#array = scale.fit_transform(array)
	#traiY = array[:,38]
	#Position = array2[:,36]
	#Hmmboundary = array2[:,37]
	#Manualboundary = array2[:,38]
	hmm_true = array2[:,305]
	hmmX = np.delete(array2, [302,303,304,305,306,307],1)
	
	#trainY, realY, trainX, testX = train_test_split(traiY,traiX,test_size=0.8,random_state=42)
	#Cost = np.power(2,np.arange(1,12));
	#g = [0.5,0.25,0.125,0.0625,0.03125,0.015625,0.0078125,0.00390625,0.001953125,0.0009765625,0.00048828125,0.00048828125]
	#print '\nCost values'
	#print Cost
	#print '\ngamma values'
	#print g
	#scorebest = 0
	#Cbest = 0
	#gammabest = 0
	#model_to_set = NuSVR(C=32, cache_size=2048, coef0=0.0, degree=3, gamma=0.03125, kernel='rbf',
 	#  max_iter=-1, nu=0.5, probability=False, shrinking=True, tol=0.001,
  	# verbose=True)
	#parameters = {'C':Cost,'gamma':g}#,'nu':[0.5],'kernel':['rbf'],'verbose':[True]}
	#k =[0.5,1]#2,5,7,8];
	model_to_set = RandomForestRegressor(n_estimators=100, criterion='mse', max_depth=5000, min_samples_split=2000, min_samples_leaf=10,min_density=0.1, max_features='auto', bootstrap=True, compute_importances=False, oob_score=False, n_jobs=3, random_state=None, verbose=0)
	#parameters = {'n_estimators':[10,100,500],'max_depth':[1,5,20,100,None],'min_samples_split':[1,5,20,100],}
	#trainY, realY, trainX, testX = train_test_split(traiY,traiX,test_size=0,random_state=42)
	print '\nparams'
	print model_to_set.get_params()
	start = time()
	print '\ntraining start time'
	print strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())
	model_to_set.fit(trainX,trainY)
	print '\ntraining end time'
	print strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())
	elapsed = (time() - start)
	print elapsed/60

	y_pred = model_to_set.predict(trainX)
	#return(y_pred,trainY)
	#score1 = model_to_set.score(trainX,trainY)
	#print 'score1'
	#print score1
	#print 'Myscore1'
	#print MyScore(trainY,y_pred)
	
	#y_pred = model_to_set.predict(testX)
	#score2 = model_to_set.score(testX,realY)
#.........这里部分代码省略.........
开发者ID:neo01124,项目名称:ModifiedSPM,代码行数:103,代码来源:trainAndTest.py


注:本文中的sklearn.ensemble.RandomForestRegressor.get_params方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。