当前位置: 首页>>代码示例>>Python>>正文


Python RandomForestRegressor.apply方法代码示例

本文整理汇总了Python中sklearn.ensemble.RandomForestRegressor.apply方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestRegressor.apply方法的具体用法?Python RandomForestRegressor.apply怎么用?Python RandomForestRegressor.apply使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.RandomForestRegressor的用法示例。


在下文中一共展示了RandomForestRegressor.apply方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_drf_regressor_backupsklearn

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import apply [as 别名]
def test_drf_regressor_backupsklearn(backend='auto'):
    df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True)
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
    import h2o4gpu
    Solver = h2o4gpu.RandomForestRegressor

    #Run h2o4gpu version of RandomForest Regression
    drf = Solver(backend=backend, random_state=1234, oob_score=True)
    print("h2o4gpu fit()")
    drf.fit(X, y)

    #Run Sklearn version of RandomForest Regression
    from sklearn.ensemble import RandomForestRegressor
    drf_sk = RandomForestRegressor(random_state=1234, oob_score=True, max_depth=3)
    print("Scikit fit()")
    drf_sk.fit(X, y)

    if backend == "sklearn":
        assert (drf.predict(X) == drf_sk.predict(X)).all() == True
        assert (drf.score(X, y) == drf_sk.score(X, y)).all() == True
        assert (drf.decision_path(X)[1] == drf_sk.decision_path(X)[1]).all() == True
        assert (drf.apply(X) == drf_sk.apply(X)).all() == True

        print("Estimators")
        print(drf.estimators_)
        print(drf_sk.estimators_)

        print("n_features")
        print(drf.n_features_)
        print(drf_sk.n_features_)
        assert drf.n_features_ == drf_sk.n_features_

        print("n_outputs")
        print(drf.n_outputs_)
        print(drf_sk.n_outputs_)
        assert drf.n_outputs_ == drf_sk.n_outputs_

        print("Feature importance")
        print(drf.feature_importances_)
        print(drf_sk.feature_importances_)
        assert (drf.feature_importances_ == drf_sk.feature_importances_).all() == True

        print("oob_score")
        print(drf.oob_score_)
        print(drf_sk.oob_score_)
        assert drf.oob_score_ == drf_sk.oob_score_

        print("oob_prediction")
        print(drf.oob_prediction_)
        print(drf_sk.oob_prediction_)
        assert (drf.oob_prediction_ == drf_sk.oob_prediction_).all() == True
开发者ID:wamsiv,项目名称:h2o4gpu,代码行数:54,代码来源:test_xgb_sklearn_wrapper.py

示例2: range

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import apply [as 别名]
        test.append(test_data[i])
        test_y.append(float(y[i]))
    else:
        data.append(test_data[i])
    i += 1
test1_y  = np.asarray(test_y, dtype=np.float32)
#test  = np.asarray(test, dtype=np.float32)
#test1_y = test1_y.transpose
#print(test_y)   
#print data

for i1 in range(0,10):
    forest = RandomForestRegressor(n_estimators = 100, max_depth = 3)
#print("--- %s seconds ---" % (time.clock() - start_time))
    forest = forest.fit(test,test1_y) 
    out1 = forest.apply(test) 
    out = forest.score(test,test1_y) 
    print out
    print out1
#print("--- %s seconds ---" % (time.clock() - start_time))
    output = forest.predict(data)
    i = 0
    error = 0
    error1 = 0
    while i < len(output):
        if abs(output[i] - y[test_len+i]) > 0.01:
            #print(i)
            #print(y[test_len+i])
            #print(output[i])
            error += abs(output[i] - y[test_len+i])
            error1 += 1
开发者ID:serleo,项目名称:RF,代码行数:33,代码来源:rf_v7.py

示例3:

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import apply [as 别名]
# In[42]:

print prediction
print bias + np.sum(contributions, axis=1)


# In[43]:

#  the basic feature importance feature provided by sklearn
fit1.feature_importances_


# In[44]:

# treeinterpreter uses the apply function to retrieve the leave indicies with the help of which, 
# the tree path is retrieved

rf.apply


# In[47]:

rf.apply(instances)


# In[ ]:



开发者ID:pramitchoudhary,项目名称:Experiments,代码行数:28,代码来源:random_forest_intepretation_treeinterpreter.py

示例4: PDFRandomForestRegressor

# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import apply [as 别名]
class PDFRandomForestRegressor(BaseEstimator, RegressorMixin):
    """A normal random forest, except that it stores the final leaf positions and delay times for each row of the training set. It will also have a specialized scoring method."""
    def __init__(self,delaymin,delaymax,**kwargs):
        self.rforest = RandomForestRegressor(**kwargs)
        self.delay_min = delaymin
        self.delay_max = delaymax
        self.delay_bin_indices = np.arange(self.delay_max-self.delay_min+1)
        self.delay_bin_values = np.arange(self.delay_min,self.delay_max+1)
        #For each random forest, a dictionary mapping node id numbers to numpy arrays is also stored. These numpy arrays contain a histogram of the number of training models which fell into that node and their delay times.
        self.node_delay_pdfs = [{}]*self.rforest.n_estimators

    def fit(self, X,y,compute_pdf = False):
        y_fit = self.restrict_range(y)
        self.rforest.fit(X,y_fit)

        if compute_pdf == True:
            #Get the node ids for the training set:
            self.set_node_pdfs(X,y_fit)
        
        return self

    def set_node_pdfs(self,X,y):
        y_fit = self.restrict_range(y)
        #Map the y values onto indices for the arrays:
        y_indices = self.map_y_vals(y_fit)
        nodes = self.apply(X)

        #For each tree, make a 2D array containing the full range of integer target values along one axis (first axis), and the unique nodes along the other. Now, when the regression predicts a set of nodes for a given set of inputs, the full delay time distribution can be extracted by taking a slice along the unique node axis
        for i in range(nodes.shape[1]):
            unique_nodes,idxes = np.unique(nodes[:,i],return_inverse=True)
            unique_node_indices = np.arange(len(unique_nodes)+1)
            node_dict = {unique_nodes[i]:unique_node_indices[i] for i in range(len(unique_node_indices)-1)}
            node_indices = unique_node_indices[idxes]
            pdf_arr,xedges,yedges = np.histogram2d(y_fit,node_indices,bins=[self.delay_bin_values,unique_node_indices])
            #print 'testing',np.sum(pdf_arr)
            self.node_delay_pdfs[i] = {'node_dict':node_dict,'pdf_arr':pdf_arr}

    def restrict_range(self,y):
        y_restrict = y.copy()
        y_restrict[y < self.delay_min] = self.delay_min
        y_restrict[y > self.delay_max-1] = self.delay_max-1
        return y_restrict
    
    def map_y_vals(self,y):
        y_map = self.restrict_range(y)
        y_indices = y_map-self.delay_min
        return y_indices

    def predict(self,X):
        return self.rforest.predict(X)

    #Instead of just the normal prediction, which I believe just gives the average value of everything in the leaf node, predict a set of quantiles:
    def predict_percentiles(self,X,percentiles):
        p_nodes = self.apply(X)
        pdf_arr = self.get_node_pdfs(p_nodes)
        #print np.sum(pdf_arr,axis=1)
        sys.exit
        cdf_arr = np.cumsum(pdf_arr,axis=1)
        cdf_arr_frac = (cdf_arr.T/cdf_arr[:,-1].astype(np.float)).T
        #print pdf_arr[0,:]
        #print cdf_arr_frac[0,:]
        #sys.exit(1)
        #print "test",cdf_arr_frac.shape,len(percentiles)
        percentile_yvals = np.zeros((cdf_arr_frac.shape[0],len(percentiles)),dtype=np.int)
        for i,ptile in enumerate(percentiles):
            temp_cdf_arr_frac = cdf_arr_frac.copy()#These steps ensure that the y value is taken as the first index where the cdf goes above the percentile
            temp_cdf_arr_frac[temp_cdf_arr_frac < ptile/100.] = 1000
            indices = np.argmin(temp_cdf_arr_frac-ptile/100.,axis=1)
            #indices = np.argmin(np.abs(cdf_arr_frac-ptile/100.),axis=1)
            #print indices[0]
            percentile_yvals[:,i] = self.delay_bin_values[indices]
            #print i,self.delay_bin_values[indices]

        #print pdf_arr[0,:]
        #print cdf_arr_frac[0,:]
        #print percentile_yvals[0,:],percentile_yvals.shape
        #sys.exit(1)
        return percentile_yvals

    def compute_percentiles(self,X,y):
        y_fit = self.restrict_range(y)
        y_indices = self.map_y_vals(y_fit).astype(np.int)
        p_nodes = self.apply(X)
        pdf_arr = self.get_node_pdfs(p_nodes)
        cdf_arr = np.cumsum(pdf_arr,axis=1)
        cdf_arr_frac = (cdf_arr.T/cdf_arr[:,-1].astype(np.float)).T
        #print cdf_arr_frac[0,:]
        #print self.delay_bin_values
        #print cdf_arr_frac.shape,y_fit.shape,y_fit[0]
        #print 'test',cdf_arr_frac.shape,y_fit.shape,y_indices.min(),y_indices.max(),self.delay_bin_values.shape
        #Now just need to compute the percentiles for all the y_indices
        cdf_at_y = cdf_arr_frac[np.arange(len(y_indices)),y_indices]
        #print "debug",cdf_at_y[0]
        return cdf_at_y
        # print cdf_at_y[:10],cdf_at_y.shape

    def get_node_pdfs(self,nodes):
        pdf_arr = np.zeros((nodes.shape[0],len(self.delay_bin_values)-1),dtype=np.int)
        #print nodes.shape,pdf_arr.shape
        for i,node_info in enumerate(self.node_delay_pdfs):
#.........这里部分代码省略.........
开发者ID:AkiraKane,项目名称:dataprojects,代码行数:103,代码来源:train_streaming_rforest.py


注:本文中的sklearn.ensemble.RandomForestRegressor.apply方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。