Python ensemble.RandomForestRegressor类代码示例

本文整理汇总了Python中sklearn.ensemble.RandomForestRegressor类的典型用法代码示例。如果您正苦于以下问题：Python RandomForestRegressor类的具体用法？Python RandomForestRegressor怎么用？Python RandomForestRegressor使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了RandomForestRegressor类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train_year

def train_year(train_fea, trees):
    values = train_fea['SaleYear'].values
    years = sorted(list(set(values)))
    rfs =[]
    for i in range(0, len(years)):
        print 'train model %d' % (years[i])
        rf = RandomForestRegressor(n_estimators=trees, n_jobs=1, compute_importances = True)
        y = train_fea[train_fea['SaleYear']==years[i]]
        y_fea = y.copy()
        del y_fea['SalePrice']
        rf.fit(y_fea, y["SalePrice"])
        rfs.append(rf)
    errors = None
    for i in range(1, len(years)):
        pairs = get_pairs(years, i)
        for p in pairs:
            print 'compare %d, %d' % (p[0], p[1])
            y1 = train_fea[train_fea['SaleYear']==p[0]]
            y2 = train_fea[train_fea['SaleYear']==p[1]]
            y1_fea, y2_fea = y1.copy(), y2.copy()
            del y1_fea['SalePrice']
            del y2_fea['SalePrice']
            rf = rfs[years.index(p[0])]
            y2_p = rf.predict(y2_fea)
            y2_r = np.array([v for v in y2['SalePrice']])
            error_rates = np.array(map(lambda x,y: math.fabs(x-y)/y, y2_p, y2_r))
            if type(errors)==types.NoneType:
                errors = pd.DataFrame({'dist':i, 'mean':error_rates.mean(), 'var':error_rates.var(), 'std':error_rates.std()}, index=[i])
            else:
                errors = errors.append(pd.DataFrame({'dist':i, 'mean':error_rates.mean(), 'var':error_rates.var(), 'std':error_rates.std()}, index=[i]))
    errors_list = []
    for i in range(1, len(years)):
        errors_list.append(errors.ix[i]['mean'].mean())
    return rfs, errors_list

开发者ID:zhangda，项目名称:bulldozers，代码行数:34，代码来源:yearbase.py

示例2: pipeline

def pipeline():
        val = data[data.watch==1]
        val_a_b = val[['item_id','store_code','a','b']]
        val_y = val.label
        val_x = val.drop(['label','watch','item_id','store_code','a','b'],axis=1)

        train = data[(data.watch!=1)&(data.watch!=0)]
        train_y = train.label

        
        a = list(train.a)
        b = list(train.b)
        train_weight = []
        for i in range(len(a)):
            train_weight.append(min(a[i],b[i]))
        train_weight = np.array(train_weight)

        train_x = train.drop(['label','watch','item_id','store_code','a','b'],axis=1)

        train_x.fillna(train_x.median(),inplace=True)
        val_x.fillna(val_x.median(),inplace=True)
        

        model = RandomForestRegressor(n_estimators=500,max_depth=5,max_features=0.6,n_jobs=-1,random_state=1024)

	#train
	model.fit(train_x,train_y, sample_weight=train_weight)


	#predict val set
	val_a_b['pred'] = model.predict(val_x)
	val_a_b['y'] = val_y
	cost = cal_cost(val_y.values,val_a_b.pred.values,val_a_b.a.values,val_a_b.b.values)
        val_a_b.to_csv('val_{0}.csv'.format(cost[1]),index=None)

开发者ID:foxchopin，项目名称:CaiNiao-DemandForecast-StoragePlaning，代码行数:34，代码来源:rf.py

示例3: cross_validate

def cross_validate(features_target):
    features = features_target[0]
    target = features_target[1]
    rf = RandomForestRegressor(
        n_estimators=100, verbose=2, n_jobs=1, min_samples_split=10, compute_importances=True, random_state=1
    )

    cv = cross_validation.KFold(len(features), n_folds=10, indices=False)

    # iterate through the training and test cross validation segments and
    # run the classifier on each one, aggregating the results into a list
    results = []
    i = 1
    for traincv, testcv in cv:
        print "Running fold " + str(i)
        fit = rf.fit(features[traincv], target[traincv])
        predictions = fit.predict(features[testcv])
        predictions = predictions.flatten()

        for j in range(len(predictions)):
            results.append((target[testcv][j], predictions[j]))

        importance(rf)
        i = i + 1

    combined_auc(results)

开发者ID:EoinLawless，项目名称:CauseEffect，代码行数:26，代码来源:TrainTestValidate.py

示例4: RFscore_one

def RFscore_one(x,y,id):
    folds=3
    print "RFscore " + id
    r = range(len(x))
        
    np.random.shuffle(r)
    x = x[r]
    y = y[r]
    x = (x - np.mean(x)) / np.std(x)
    y = (y - np.mean(y)) / np.std(y)
    
    x = np.array(x, ndmin=2)
    y = np.array(y, ndmin=2)
    
    x = x.T
    y = y.T
    
    rf = RandomForestRegressor(n_estimators=50, verbose=0,n_jobs=1,min_samples_split=10,compute_importances=True,random_state=1)
    fit = rf.fit(x,y)

    s = fit.score(x,y)
    
    cv = cross_validation.KFold(len(x), n_folds=folds, indices=False)
    score = 0
    median = dist(y)
    for traincv, testcv in cv:
        fit = rf.fit(x[traincv], y[traincv])
        score += fit.score(x[testcv], y[testcv])

    score /= folds
    score /= median
    return score

开发者ID:EoinLawless，项目名称:CauseEffect，代码行数:32，代码来源:features.py

示例5: do_regression

def do_regression(df, j, i, k): # input is a pandas dataframe with columns as needed below
			# output is a regression object trained to the data in the input dataframe

	
	# convert dataframe info into a vector
				
	y   = df.loc[ (df['workingday'] == j) & (df['Hour'] == i) & (df['Year'] == 2011 + k), 'count' ].astype(int).values
	x_1 = df.loc[ (df['workingday'] == j) & (df['Hour'] == i) & (df['Year'] == 2011 + k), 'humidity' ].astype(int).values
	x_2 = df.loc[ (df['workingday'] == j) & (df['Hour'] == i) & (df['Year'] == 2011 + k), 'temp' ].astype(int).values
	x = zip(x_1, x_2)
				
	## Create linear regression object
	#regr = linear_model.LinearRegression()
	
	# create random forest object, should include all parameters
	regr = RandomForestRegressor(n_estimators= 100)
	#forest = DecisionTreeRegressor(max_depth = 4)
	
	## Train the model using the training sets
	
	regr.fit(x, y)



	return regr

开发者ID:michaelbateman，项目名称:KagglePlayground，代码行数:25，代码来源:randomforest.py

示例6: regression

def regression(X_train, y_train, X_test, y_test):
    """
Train the regressor from Scikit-Learn.
"""
    # Random forest regressor w/ param optimization
    params = {'n_estimators':1000, 'criterion':'mse', 'max_depth':20, 'min_samples_split':1, #'estimators':400, depth:20
              'min_samples_leaf':1, 'max_features':2, 'bootstrap':True, 'oob_score':False, #'max_features':'log2'
              'n_jobs':32, 'random_state':0, 'verbose':0, 'min_density':None, 'max_leaf_nodes':None}
    if config.DEBUG: params['verbose'] = 1

    regr = RandomForestRegressor(**params)

    # Train the model using the training sets
    regr.fit(X_train, y_train)
    return regr

    # Plot the resutls
    save_semeval_data.plot_results(regr, params, X_test, y_test, feature_names)

    if config.DEBUG:
        # Show the mean squared error
        print("Residual sum of squares: %.2f" % np.mean((regr.predict(X_test) - y_test) ** 2))
        # Explained variance score: 1 is perfect prediction
        print('Variance score: %.2f' % regr.score(X_test, y_test))
    
    return regr

开发者ID:BinbinBian，项目名称:semeval-relatedness，代码行数:26，代码来源:semeval_task1.py

示例7: fit

 def fit(self, X, y, **kwargs):
     for key, value in kwargs.iteritems():
         if key in self.INITPARAMS.keys():
             self.INITPARAMS[key] = value
     model = RandomForestRegressor(**self.INITPARAMS)
     model.fit(X, y)
     self.model = model

开发者ID:DJRumble，项目名称:S2DS，代码行数:7，代码来源:estimator.py

示例8: random_learning

def random_learning(labels, train, test):
    label_log=np.log1p(labels)
    clf=RandomForestRegressor(n_estimators=50, n_jobs=3)
    model=clf.fit(train, label_log)
    preds1=model.predict(test)
    preds=np.expm1(preds1)
    return  preds

开发者ID:nickmcadden，项目名称:Kaggle，代码行数:7，代码来源:cv_example.py

示例9: get_kernel

def get_kernel(train_data, test_data, label):

    #Define forest (n_estimators = number of trees)
    forest = RandomForestRegressor(n_estimators=1000, warm_start = True)
    forest = forest.fit(train_data, label)

    dataset = np.concatenate((train_data, test_data), axis=0)

    SAMPLE_SIZE = len(dataset)
    M = 100

    #Loop that generates samples of the PDF
    kernel_list = np.empty([M, SAMPLE_SIZE, SAMPLE_SIZE])
    for m in range(M):
        print("Building partial kernel: {}".format(m))
        kernel_list[m,:,:] = get_partial_kernel(forest, dataset)

    #Average the samples to compute the kernel
    kernel = np.mean(kernel_list, axis=0)

    # B = np.zeros((SAMPLE_SIZE, SAMPLE_SIZE))
    # I = np.identity(SAMPLE_SIZE)
    # alpha = 0.1

    # for m in range(M):
    #     B += np.linalg.inv(kernel_list[m,:,:] + alpha * I)

    # B *= M
    # return B

    return kernel

开发者ID:marthall，项目名称:random_forest_kernel，代码行数:31，代码来源:RegressionKernel.py

示例10: main

def main():
    # read in  data, parse into training and target sets
    cols, train = read_data("../TrainingSet/ACT12_competition_training.csv", 1)
    target = np.array([x[0] for x in train])

    train = filter_cols(train, cols, "../selected/selected_12.txt")
    # print("Train: ", len(train), " cols:", len(train[0]))
    train = np.array(train)

    # In this case we'll use a random forest, but this could be any classifier
    cfr = RandomForestRegressor(n_estimators=500, max_features=(len(train[0]) // 3), n_jobs=8)

    # Simple K-Fold cross validation. 10 folds.
    cv = cross_validation.KFold(len(train), k=5, indices=False)

    # iterate through the training and test cross validation segments and
    # run the classifier on each one, aggregating the results into a list
    results = []
    for traincv, testcv in cv:
        ft = cfr.fit(train[traincv], target[traincv])
        pred = ft.predict(train[traincv])
        print pred[:10]
        score = ft.score(train[traincv], target[traincv])
        results.append(score)
        print "\tFold %d: %f" % (len(results), score)

    # print out the mean of the cross-validated results
    print "Results: " + str(np.array(results).mean())

开发者ID:ashispapu，项目名称:kaggle-1，代码行数:28，代码来源:randomForest.py

示例11: train_with_features

    def train_with_features(self, features):
        X = self.data_folder.truncate(self.A, features)

        rfc = RandomForestRegressor()
        rfc.fit(X, self.target)

        return rfc

开发者ID:nicster，项目名称:bikesharingML，代码行数:7，代码来源:randomForest.py

示例12: test_rrf_vs_sklearn_reg

    def test_rrf_vs_sklearn_reg(self):
        """Test R vs. sklearn on boston housing dataset. """
        from sklearn.datasets import load_boston
        from sklearn.cross_validation import train_test_split
        from sklearn.metrics import mean_squared_error
        from sklearn.ensemble import RandomForestRegressor

        boston = load_boston()
        X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target,
                                                            test_size=0.2, random_state=13)

        n_samples, n_features = X_train.shape
        mtry = int(np.floor(0.3 * n_features))
        # do 100 trees
        r_rf = RRFEstimatorR(**{'ntree': 100, 'nodesize': 1, 'replace': 0,
                                'mtry': mtry, 'corr.bias': False,
                                'sampsize': n_samples, 'random_state': 1234})
        r_rf.fit(X_train, y_train)
        y_pred = r_rf.predict(X_test)
        r_mse = mean_squared_error(y_test, y_pred)

        p_rf = RandomForestRegressor(n_estimators=100, min_samples_leaf=1, bootstrap=False,
                                     max_features=mtry, random_state=1)
        p_rf.fit(X_train, y_train)
        y_pred = p_rf.predict(X_test)
        p_mse = mean_squared_error(y_test, y_pred)
        print('%.4f vs %.4f' % (r_mse, p_mse))
        # should be roughly the same (7.6 vs. 7.2)
        np.testing.assert_almost_equal(r_mse, p_mse, decimal=0)

开发者ID:tkincaid，项目名称:tkincaid.github.com，代码行数:29，代码来源:test_rrf_bm.py

示例13: round2

def round2(X, y):
    # Set parameters
    min_score = {}
    for tree in [50, 100, 200, 500]:
        for feature in ['auto', 'log2']:
            model = RandomForestRegressor(n_estimators=tree, max_features=feature)
            n = len(y)

            # Perform 5-fold cross validation
            scores = []
            kf = KFold(n, n_folds=5, shuffle=True)

            # Calculate root mean squared error for train/test for each fold
            for train_idx, test_idx in kf:
                X_train, X_test = X[train_idx], X[test_idx]
                y_train, y_test = y[train_idx], y[test_idx]
                model.fit(X_train, y_train)
                prediction = model.predict(X_test)
                rmse = np.sqrt(mean_squared_error(y_test, prediction))
                scores.append(rmse)
            if len(min_score) == 0:
                min_score['estimator'] = tree
                min_score['max_feature'] = feature
                min_score['scores'] = scores
            else:
                if np.mean(scores) < np.mean(min_score['scores']):
                    min_score['estimator'] = tree
                    min_score['max_feature'] = feature
                    min_score['scores'] = scores

            print "Estimator:", tree
            print "Max Features:", feature
            print scores
            print np.mean(scores)
    return min_score

开发者ID:gokamoto，项目名称:AdvancedMLProject，代码行数:35，代码来源:model_RandomForest.py

示例14: RandomForestModel

class RandomForestModel(Model):
	""" random forest model """
	def __init__(self, *argv, **args):
		super(RandomForestModel, self).__init__(*argv)
        
		self.rf = RandomForestRegressor(**args)
	
	def pretreat_feature(self):
		# pre-handle about the feature data
		pass

	def train(self):
		# train the samples
		self.rf.fit(self.x, self.y)
	
	def assess(self):
		# assess the regression model
		error = 0.0
		for j in range(len(self.test_x)):
			pre_val = self.predict(self.test_x[j])
			error += (pre_val - self.test_y[j]) ** 2
		print 'Training Error: ', error
		
    
	def predict(self, x):
		# predic the output of the x		
		return self.rf.predict(x)

	def validate(self):
		# use cross-validation to choose the best meta-parameter
		pass

开发者ID:kymo，项目名称:kaggle，代码行数:31，代码来源:model.py

示例15: do_rf

def do_rf(filename):
    df, Y = create_merged_dataset(filename)
    rf = RandomForestRegressor(n_estimators=100)
    X = df.drop(['driver', 'trip'], 1)
    rf.fit(X, Y)
    probs = rf.predict(X[:200])
    return pd.DataFrame({'driver': df['driver'][:200], 'trip': df['trip'][:200], 'probs': probs})

开发者ID:fabiogm，项目名称:kaggle-driver-telematics，代码行数:7，代码来源:main.py

注：本文中的sklearn.ensemble.RandomForestRegressor类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。