本文整理汇总了Python中sklearn.ensemble.RandomForestRegressor.apply方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestRegressor.apply方法的具体用法?Python RandomForestRegressor.apply怎么用?Python RandomForestRegressor.apply使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.RandomForestRegressor
的用法示例。
在下文中一共展示了RandomForestRegressor.apply方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_drf_regressor_backupsklearn
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import apply [as 别名]
def test_drf_regressor_backupsklearn(backend='auto'):
df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True)
X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
import h2o4gpu
Solver = h2o4gpu.RandomForestRegressor
#Run h2o4gpu version of RandomForest Regression
drf = Solver(backend=backend, random_state=1234, oob_score=True)
print("h2o4gpu fit()")
drf.fit(X, y)
#Run Sklearn version of RandomForest Regression
from sklearn.ensemble import RandomForestRegressor
drf_sk = RandomForestRegressor(random_state=1234, oob_score=True, max_depth=3)
print("Scikit fit()")
drf_sk.fit(X, y)
if backend == "sklearn":
assert (drf.predict(X) == drf_sk.predict(X)).all() == True
assert (drf.score(X, y) == drf_sk.score(X, y)).all() == True
assert (drf.decision_path(X)[1] == drf_sk.decision_path(X)[1]).all() == True
assert (drf.apply(X) == drf_sk.apply(X)).all() == True
print("Estimators")
print(drf.estimators_)
print(drf_sk.estimators_)
print("n_features")
print(drf.n_features_)
print(drf_sk.n_features_)
assert drf.n_features_ == drf_sk.n_features_
print("n_outputs")
print(drf.n_outputs_)
print(drf_sk.n_outputs_)
assert drf.n_outputs_ == drf_sk.n_outputs_
print("Feature importance")
print(drf.feature_importances_)
print(drf_sk.feature_importances_)
assert (drf.feature_importances_ == drf_sk.feature_importances_).all() == True
print("oob_score")
print(drf.oob_score_)
print(drf_sk.oob_score_)
assert drf.oob_score_ == drf_sk.oob_score_
print("oob_prediction")
print(drf.oob_prediction_)
print(drf_sk.oob_prediction_)
assert (drf.oob_prediction_ == drf_sk.oob_prediction_).all() == True
示例2: range
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import apply [as 别名]
test.append(test_data[i])
test_y.append(float(y[i]))
else:
data.append(test_data[i])
i += 1
test1_y = np.asarray(test_y, dtype=np.float32)
#test = np.asarray(test, dtype=np.float32)
#test1_y = test1_y.transpose
#print(test_y)
#print data
for i1 in range(0,10):
forest = RandomForestRegressor(n_estimators = 100, max_depth = 3)
#print("--- %s seconds ---" % (time.clock() - start_time))
forest = forest.fit(test,test1_y)
out1 = forest.apply(test)
out = forest.score(test,test1_y)
print out
print out1
#print("--- %s seconds ---" % (time.clock() - start_time))
output = forest.predict(data)
i = 0
error = 0
error1 = 0
while i < len(output):
if abs(output[i] - y[test_len+i]) > 0.01:
#print(i)
#print(y[test_len+i])
#print(output[i])
error += abs(output[i] - y[test_len+i])
error1 += 1
示例3:
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import apply [as 别名]
# In[42]:
print prediction
print bias + np.sum(contributions, axis=1)
# In[43]:
# the basic feature importance feature provided by sklearn
fit1.feature_importances_
# In[44]:
# treeinterpreter uses the apply function to retrieve the leave indicies with the help of which,
# the tree path is retrieved
rf.apply
# In[47]:
rf.apply(instances)
# In[ ]:
示例4: PDFRandomForestRegressor
# 需要导入模块: from sklearn.ensemble import RandomForestRegressor [as 别名]
# 或者: from sklearn.ensemble.RandomForestRegressor import apply [as 别名]
class PDFRandomForestRegressor(BaseEstimator, RegressorMixin):
"""A normal random forest, except that it stores the final leaf positions and delay times for each row of the training set. It will also have a specialized scoring method."""
def __init__(self,delaymin,delaymax,**kwargs):
self.rforest = RandomForestRegressor(**kwargs)
self.delay_min = delaymin
self.delay_max = delaymax
self.delay_bin_indices = np.arange(self.delay_max-self.delay_min+1)
self.delay_bin_values = np.arange(self.delay_min,self.delay_max+1)
#For each random forest, a dictionary mapping node id numbers to numpy arrays is also stored. These numpy arrays contain a histogram of the number of training models which fell into that node and their delay times.
self.node_delay_pdfs = [{}]*self.rforest.n_estimators
def fit(self, X,y,compute_pdf = False):
y_fit = self.restrict_range(y)
self.rforest.fit(X,y_fit)
if compute_pdf == True:
#Get the node ids for the training set:
self.set_node_pdfs(X,y_fit)
return self
def set_node_pdfs(self,X,y):
y_fit = self.restrict_range(y)
#Map the y values onto indices for the arrays:
y_indices = self.map_y_vals(y_fit)
nodes = self.apply(X)
#For each tree, make a 2D array containing the full range of integer target values along one axis (first axis), and the unique nodes along the other. Now, when the regression predicts a set of nodes for a given set of inputs, the full delay time distribution can be extracted by taking a slice along the unique node axis
for i in range(nodes.shape[1]):
unique_nodes,idxes = np.unique(nodes[:,i],return_inverse=True)
unique_node_indices = np.arange(len(unique_nodes)+1)
node_dict = {unique_nodes[i]:unique_node_indices[i] for i in range(len(unique_node_indices)-1)}
node_indices = unique_node_indices[idxes]
pdf_arr,xedges,yedges = np.histogram2d(y_fit,node_indices,bins=[self.delay_bin_values,unique_node_indices])
#print 'testing',np.sum(pdf_arr)
self.node_delay_pdfs[i] = {'node_dict':node_dict,'pdf_arr':pdf_arr}
def restrict_range(self,y):
y_restrict = y.copy()
y_restrict[y < self.delay_min] = self.delay_min
y_restrict[y > self.delay_max-1] = self.delay_max-1
return y_restrict
def map_y_vals(self,y):
y_map = self.restrict_range(y)
y_indices = y_map-self.delay_min
return y_indices
def predict(self,X):
return self.rforest.predict(X)
#Instead of just the normal prediction, which I believe just gives the average value of everything in the leaf node, predict a set of quantiles:
def predict_percentiles(self,X,percentiles):
p_nodes = self.apply(X)
pdf_arr = self.get_node_pdfs(p_nodes)
#print np.sum(pdf_arr,axis=1)
sys.exit
cdf_arr = np.cumsum(pdf_arr,axis=1)
cdf_arr_frac = (cdf_arr.T/cdf_arr[:,-1].astype(np.float)).T
#print pdf_arr[0,:]
#print cdf_arr_frac[0,:]
#sys.exit(1)
#print "test",cdf_arr_frac.shape,len(percentiles)
percentile_yvals = np.zeros((cdf_arr_frac.shape[0],len(percentiles)),dtype=np.int)
for i,ptile in enumerate(percentiles):
temp_cdf_arr_frac = cdf_arr_frac.copy()#These steps ensure that the y value is taken as the first index where the cdf goes above the percentile
temp_cdf_arr_frac[temp_cdf_arr_frac < ptile/100.] = 1000
indices = np.argmin(temp_cdf_arr_frac-ptile/100.,axis=1)
#indices = np.argmin(np.abs(cdf_arr_frac-ptile/100.),axis=1)
#print indices[0]
percentile_yvals[:,i] = self.delay_bin_values[indices]
#print i,self.delay_bin_values[indices]
#print pdf_arr[0,:]
#print cdf_arr_frac[0,:]
#print percentile_yvals[0,:],percentile_yvals.shape
#sys.exit(1)
return percentile_yvals
def compute_percentiles(self,X,y):
y_fit = self.restrict_range(y)
y_indices = self.map_y_vals(y_fit).astype(np.int)
p_nodes = self.apply(X)
pdf_arr = self.get_node_pdfs(p_nodes)
cdf_arr = np.cumsum(pdf_arr,axis=1)
cdf_arr_frac = (cdf_arr.T/cdf_arr[:,-1].astype(np.float)).T
#print cdf_arr_frac[0,:]
#print self.delay_bin_values
#print cdf_arr_frac.shape,y_fit.shape,y_fit[0]
#print 'test',cdf_arr_frac.shape,y_fit.shape,y_indices.min(),y_indices.max(),self.delay_bin_values.shape
#Now just need to compute the percentiles for all the y_indices
cdf_at_y = cdf_arr_frac[np.arange(len(y_indices)),y_indices]
#print "debug",cdf_at_y[0]
return cdf_at_y
# print cdf_at_y[:10],cdf_at_y.shape
def get_node_pdfs(self,nodes):
pdf_arr = np.zeros((nodes.shape[0],len(self.delay_bin_values)-1),dtype=np.int)
#print nodes.shape,pdf_arr.shape
for i,node_info in enumerate(self.node_delay_pdfs):
#.........这里部分代码省略.........