当前位置: 首页>>代码示例>>Python>>正文


Python RandomForestRegressor.fit方法代码示例

本文整理汇总了Python中sklearn.ensemble.RandomForestRegressor.fit方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestRegressor.fit方法的具体用法?Python RandomForestRegressor.fit怎么用?Python RandomForestRegressor.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.RandomForestRegressor的用法示例。


在下文中一共展示了RandomForestRegressor.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def main():
    fi = open('25-75_microcap_list.txt', 'r')
    symbols = []
    for i in fi:
        symbols.append(i.strip())
    #symbols = symbols[0:6]

    train, test = get_data(symbols, n = 30, flag = 1, blag = 12)

    train = train.replace([np.inf, -np.inf], np.nan)
    test = test.replace([np.inf, -np.inf], np.nan)

    train = train.dropna(axis=0)
    test = test.dropna(axis=0)

    print 'Fitting\n'
    m = RandomForestRegressor(n_estimators=250, n_jobs=1)
    m.fit(train.ix[:,6:], train.ix[:,5])
    print 'Predicting\n'
    preds = m.predict(test.ix[:,5:])

    result = test.ix[:,:4]
    result['Prediction'] = preds
    result = result.sort('Prediction', ascending=False)
    print result.head()
    result.to_csv('trade_result.csv', sep = ',', index = False)
开发者ID:iswdp,项目名称:microcap,代码行数:28,代码来源:trade.py

示例2: train_sklearn_forest

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def train_sklearn_forest(XAlltr, XAllcv, yAlltr, yAllcv, trees=20):
    errors = []
    models = []

    X = XAlltr
    Xcv = XAllcv

    print "training sklearn forset"

    for feature in range(np.shape(yAlltr)[1]):
        y = yAlltr[:, feature]
        ycv = yAllcv[:, feature]

        # train a random forest with different number of trees and plot error

        # print "training forest %d" % trees
        clf = RandomForestRegressor(n_estimators=trees, min_samples_leaf=30, max_depth=20)
        clf = RandomForestRegressor(n_estimators=trees)
        clf.fit(X, y)
        pred = clf.predict(X)
        err = pred_error(y, pred, feature)

        predcv = clf.predict(Xcv)
        errcv = pred_error(ycv, predcv, feature)

        print [trees, feature, err, errcv]

        errors.append((trees, feature, err, errcv))
        models.append(clf)

    return models, errors
开发者ID:Schmiddi,项目名称:Hand_Pose_Estimation,代码行数:33,代码来源:train_second_phase.py

示例3: buildTreeRegressor

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def buildTreeRegressor(predictorColumns, structurestable = 'structures.csv',  targetcolumn = 'c_a', md = None):
    """
    Build a random forest-regressor model to predict some structure feature from compositional data.  Will return the model trained on all data, a mean_absolute_error score, and a table of true vs. predicted values
    """
    df = pd.read_csv(structurestable)
    df = df.dropna()
    if('fracNobleGas' in df.columns):
        df = df[df['fracNobleGas'] <= 0]
    
    s = StandardScaler()
    
    X = s.fit_transform(df[predictorColumns].astype('float64'))
    y = df[targetcolumn].values

    rfr = RandomForestRegressor(max_depth = md)
    acc = mean(cross_val_score(rfr, X, y, scoring=make_scorer(mean_absolute_error)))

    X_train, X_test, y_train, y_test = train_test_split(X,y)
    rfr.fit(X_train,y_train)
    y_predict = rfr.predict(X_test)
    
    t = pd.DataFrame({'True':y_test, 'Predicted':y_predict})
    
    rfr.fit(X, y)

    return rfr, t, round(acc,2)
开发者ID:rhsimplex,项目名称:matprojgeom,代码行数:28,代码来源:modelbuilder.py

示例4: do_regression

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def do_regression(df, j, i, k): # input is a pandas dataframe with columns as needed below
			# output is a regression object trained to the data in the input dataframe

	
	# convert dataframe info into a vector
				
	y   = df.loc[ (df['workingday'] == j) & (df['Hour'] == i) & (df['Year'] == 2011 + k), 'count' ].astype(int).values
	x_1 = df.loc[ (df['workingday'] == j) & (df['Hour'] == i) & (df['Year'] == 2011 + k), 'humidity' ].astype(int).values
	x_2 = df.loc[ (df['workingday'] == j) & (df['Hour'] == i) & (df['Year'] == 2011 + k), 'temp' ].astype(int).values
	x = zip(x_1, x_2)
				
	## Create linear regression object
	#regr = linear_model.LinearRegression()
	
	# create random forest object, should include all parameters
	regr = RandomForestRegressor(n_estimators= 100)
	#forest = DecisionTreeRegressor(max_depth = 4)
	
	## Train the model using the training sets
	
	regr.fit(x, y)



	return regr
开发者ID:michaelbateman,项目名称:KagglePlayground,代码行数:27,代码来源:randomforest.py

示例5: pipeline

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def pipeline():
        val = data[data.watch==1]
        val_a_b = val[['item_id','store_code','a','b']]
        val_y = val.label
        val_x = val.drop(['label','watch','item_id','store_code','a','b'],axis=1)

        train = data[(data.watch!=1)&(data.watch!=0)]
        train_y = train.label

        
        a = list(train.a)
        b = list(train.b)
        train_weight = []
        for i in range(len(a)):
            train_weight.append(min(a[i],b[i]))
        train_weight = np.array(train_weight)

        train_x = train.drop(['label','watch','item_id','store_code','a','b'],axis=1)

        train_x.fillna(train_x.median(),inplace=True)
        val_x.fillna(val_x.median(),inplace=True)
        

        model = RandomForestRegressor(n_estimators=500,max_depth=5,max_features=0.6,n_jobs=-1,random_state=1024)

	#train
	model.fit(train_x,train_y, sample_weight=train_weight)


	#predict val set
	val_a_b['pred'] = model.predict(val_x)
	val_a_b['y'] = val_y
	cost = cal_cost(val_y.values,val_a_b.pred.values,val_a_b.a.values,val_a_b.b.values)
        val_a_b.to_csv('val_{0}.csv'.format(cost[1]),index=None)
开发者ID:foxchopin,项目名称:CaiNiao-DemandForecast-StoragePlaning,代码行数:36,代码来源:rf.py

示例6: fit

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
 def fit(self, X, y, **kwargs):
     for key, value in kwargs.iteritems():
         if key in self.INITPARAMS.keys():
             self.INITPARAMS[key] = value
     model = RandomForestRegressor(**self.INITPARAMS)
     model.fit(X, y)
     self.model = model
开发者ID:DJRumble,项目名称:S2DS,代码行数:9,代码来源:estimator.py

示例7: RFscore_one

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def RFscore_one(x,y,id):
    folds=3
    print "RFscore " + id
    r = range(len(x))
        
    np.random.shuffle(r)
    x = x[r]
    y = y[r]
    x = (x - np.mean(x)) / np.std(x)
    y = (y - np.mean(y)) / np.std(y)
    
    x = np.array(x, ndmin=2)
    y = np.array(y, ndmin=2)
    
    x = x.T
    y = y.T
    
    rf = RandomForestRegressor(n_estimators=50, verbose=0,n_jobs=1,min_samples_split=10,compute_importances=True,random_state=1)
    fit = rf.fit(x,y)

    s = fit.score(x,y)
    
    cv = cross_validation.KFold(len(x), n_folds=folds, indices=False)
    score = 0
    median = dist(y)
    for traincv, testcv in cv:
        fit = rf.fit(x[traincv], y[traincv])
        score += fit.score(x[testcv], y[testcv])

    score /= folds
    score /= median
    return score
开发者ID:EoinLawless,项目名称:CauseEffect,代码行数:34,代码来源:features.py

示例8: do_rf

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def do_rf(filename):
    df, Y = create_merged_dataset(filename)
    rf = RandomForestRegressor(n_estimators=100)
    X = df.drop(['driver', 'trip'], 1)
    rf.fit(X, Y)
    probs = rf.predict(X[:200])
    return pd.DataFrame({'driver': df['driver'][:200], 'trip': df['trip'][:200], 'probs': probs})
开发者ID:fabiogm,项目名称:kaggle-driver-telematics,代码行数:9,代码来源:main.py

示例9: rf_regressor

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
	def rf_regressor(self):
		X = X.toarray() # Convert X from sparse to array
		X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

		model = RandomForestRegressor(n_estimators=100, oob_score=True, random_state=42)
		model.fit(X_train, y_train)
		return model.score(X_test, y_test).round(2)
开发者ID:edwood1,项目名称:yelp-boston,代码行数:9,代码来源:preprocess_predict.py

示例10: refit_from_scratch

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
    def refit_from_scratch(self):
        """ Create a new model directly from the database, rather
         than rely on the one saved from last time."""
        # In the background fit a much larger random forest.
        self.threaded_fit = ThreadedFit()
        self.threaded_fit.signal_finished.connect(self.__init__)
        self.threaded_fit.start()

        temp_model = RandomForest(max_features="sqrt", n_jobs=-1)
        temp_enc   = CountVectorizer()
        X = []   # binary matrix the presence of tags
        Z = []   # additional numerical data
        Y = []   # target (to predict) values
        db_size = self.db.size()
        for data in self.db.yield_some(250):
            feedback = data["feedback"]
            tags     = data[  "tags"  ]
            if feedback and tags:
                Y.append(   feedback   )
                X.append(" ".join(tags))
                Z.append(self.fmt_numerical(data))

        X = temp_enc.fit_transform(X)
        X = hstack((X, coo_matrix(Z)))
        self.allX = X
        pca = PCA(min(X.shape[0], 200))
        reduced_X = pca.fit_transform(X.todense())
        temp_model.fit(reduced_X, Y)

        self.pca   = pca
        self.model = temp_model
        self.enc   = temp_enc
开发者ID:PornSieve,项目名称:porn_sieve,代码行数:34,代码来源:predict.py

示例11: train_with_features

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
    def train_with_features(self, features):
        X = self.data_folder.truncate(self.A, features)

        rfc = RandomForestRegressor()
        rfc.fit(X, self.target)

        return rfc
开发者ID:nicster,项目名称:bikesharingML,代码行数:9,代码来源:randomForest.py

示例12: test_rrf_vs_sklearn_reg

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
    def test_rrf_vs_sklearn_reg(self):
        """Test R vs. sklearn on boston housing dataset. """
        from sklearn.datasets import load_boston
        from sklearn.cross_validation import train_test_split
        from sklearn.metrics import mean_squared_error
        from sklearn.ensemble import RandomForestRegressor

        boston = load_boston()
        X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target,
                                                            test_size=0.2, random_state=13)

        n_samples, n_features = X_train.shape
        mtry = int(np.floor(0.3 * n_features))
        # do 100 trees
        r_rf = RRFEstimatorR(**{'ntree': 100, 'nodesize': 1, 'replace': 0,
                                'mtry': mtry, 'corr.bias': False,
                                'sampsize': n_samples, 'random_state': 1234})
        r_rf.fit(X_train, y_train)
        y_pred = r_rf.predict(X_test)
        r_mse = mean_squared_error(y_test, y_pred)

        p_rf = RandomForestRegressor(n_estimators=100, min_samples_leaf=1, bootstrap=False,
                                     max_features=mtry, random_state=1)
        p_rf.fit(X_train, y_train)
        y_pred = p_rf.predict(X_test)
        p_mse = mean_squared_error(y_test, y_pred)
        print('%.4f vs %.4f' % (r_mse, p_mse))
        # should be roughly the same (7.6 vs. 7.2)
        np.testing.assert_almost_equal(r_mse, p_mse, decimal=0)
开发者ID:tkincaid,项目名称:tkincaid.github.com,代码行数:31,代码来源:test_rrf_bm.py

示例13: round2

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def round2(X, y):
    # Set parameters
    min_score = {}
    for tree in [50, 100, 200, 500]:
        for feature in ['auto', 'log2']:
            model = RandomForestRegressor(n_estimators=tree, max_features=feature)
            n = len(y)

            # Perform 5-fold cross validation
            scores = []
            kf = KFold(n, n_folds=5, shuffle=True)

            # Calculate root mean squared error for train/test for each fold
            for train_idx, test_idx in kf:
                X_train, X_test = X[train_idx], X[test_idx]
                y_train, y_test = y[train_idx], y[test_idx]
                model.fit(X_train, y_train)
                prediction = model.predict(X_test)
                rmse = np.sqrt(mean_squared_error(y_test, prediction))
                scores.append(rmse)
            if len(min_score) == 0:
                min_score['estimator'] = tree
                min_score['max_feature'] = feature
                min_score['scores'] = scores
            else:
                if np.mean(scores) < np.mean(min_score['scores']):
                    min_score['estimator'] = tree
                    min_score['max_feature'] = feature
                    min_score['scores'] = scores

            print "Estimator:", tree
            print "Max Features:", feature
            print scores
            print np.mean(scores)
    return min_score
开发者ID:gokamoto,项目名称:AdvancedMLProject,代码行数:37,代码来源:model_RandomForest.py

示例14: train_year

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def train_year(train_fea, trees):
    values = train_fea['SaleYear'].values
    years = sorted(list(set(values)))
    rfs =[]
    for i in range(0, len(years)):
        print 'train model %d' % (years[i])
        rf = RandomForestRegressor(n_estimators=trees, n_jobs=1, compute_importances = True)
        y = train_fea[train_fea['SaleYear']==years[i]]
        y_fea = y.copy()
        del y_fea['SalePrice']
        rf.fit(y_fea, y["SalePrice"])
        rfs.append(rf)
    errors = None
    for i in range(1, len(years)):
        pairs = get_pairs(years, i)
        for p in pairs:
            print 'compare %d, %d' % (p[0], p[1])
            y1 = train_fea[train_fea['SaleYear']==p[0]]
            y2 = train_fea[train_fea['SaleYear']==p[1]]
            y1_fea, y2_fea = y1.copy(), y2.copy()
            del y1_fea['SalePrice']
            del y2_fea['SalePrice']
            rf = rfs[years.index(p[0])]
            y2_p = rf.predict(y2_fea)
            y2_r = np.array([v for v in y2['SalePrice']])
            error_rates = np.array(map(lambda x,y: math.fabs(x-y)/y, y2_p, y2_r))
            if type(errors)==types.NoneType:
                errors = pd.DataFrame({'dist':i, 'mean':error_rates.mean(), 'var':error_rates.var(), 'std':error_rates.std()}, index=[i])
            else:
                errors = errors.append(pd.DataFrame({'dist':i, 'mean':error_rates.mean(), 'var':error_rates.var(), 'std':error_rates.std()}, index=[i]))
    errors_list = []
    for i in range(1, len(years)):
        errors_list.append(errors.ix[i]['mean'].mean())
    return rfs, errors_list
开发者ID:zhangda,项目名称:bulldozers,代码行数:36,代码来源:yearbase.py

示例15: random_forest

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import fit [as 别名]
def random_forest(X_train, y_train, y_test, X_test, num_trees=100):
	model = RandomForestRegressor(n_estimators=num_trees, oob_score=True)
	model.fit(X_train, y_train)
	prediction = model.predict(X_test)
	mean_squared_error = mse(y_test, model.predict(X_test))
	r2 = model.score(X_test, y_test)
	return (mean_squared_error, r2)
开发者ID:khanzlik,项目名称:RatioClothing-Project,代码行数:9,代码来源:new_models.py


注:本文中的sklearn.ensemble.RandomForestRegressor.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。