本文整理匯總了Python中statsmodels.api.RLM屬性的典型用法代碼示例。如果您正苦於以下問題:Python api.RLM屬性的具體用法?Python api.RLM怎麽用?Python api.RLM使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在類statsmodels.api
的用法示例。
在下文中一共展示了api.RLM屬性的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: rlm
# 需要導入模塊: from statsmodels import api [as 別名]
# 或者: from statsmodels.api import RLM [as 別名]
def rlm(data, xseq, **params):
"""
Fit RLM
"""
if params['formula']:
return rlm_formula(data, xseq, **params)
X = sm.add_constant(data['x'])
Xseq = sm.add_constant(xseq)
init_kwargs, fit_kwargs = separate_method_kwargs(
params['method_args'], sm.RLM, sm.RLM.fit)
model = sm.RLM(data['y'], X, **init_kwargs)
results = model.fit(**fit_kwargs)
data = pd.DataFrame({'x': xseq})
data['y'] = results.predict(Xseq)
if params['se']:
warnings.warn("Confidence intervals are not yet implemented"
"for RLM smoothing.", PlotnineWarning)
return data
示例2: rlm_formula
# 需要導入模塊: from statsmodels import api [as 別名]
# 或者: from statsmodels.api import RLM [as 別名]
def rlm_formula(data, xseq, **params):
"""
Fit RLM using a formula
"""
eval_env = params['enviroment']
formula = params['formula']
init_kwargs, fit_kwargs = separate_method_kwargs(
params['method_args'], sm.RLM, sm.RLM.fit)
model = smf.rlm(
formula,
data,
eval_env=eval_env,
**init_kwargs
)
results = model.fit(**fit_kwargs)
data = pd.DataFrame({'x': xseq})
data['y'] = results.predict(data)
if params['se']:
warnings.warn("Confidence intervals are not yet implemented"
"for RLM smoothing.", PlotnineWarning)
return data
示例3: __init__
# 需要導入模塊: from statsmodels import api [as 別名]
# 或者: from statsmodels.api import RLM [as 別名]
def __init__(self, y, x, z, data, alpha):
self.regression = sm.RLM(data[y], data[x + z])
self.result = self.regression.fit()
self.coefficient = self.result.params[x][0]
confidence_interval = self.result.conf_int(alpha=alpha / 2.0)
self.upper = confidence_interval[1][x][0]
self.lower = confidence_interval[0][x][0]
示例4: setup
# 需要導入模塊: from statsmodels import api [as 別名]
# 或者: from statsmodels.api import RLM [as 別名]
def setup(self):
#fit for each test, because results will be changed by test
x = self.exog
np.random.seed(987689)
y = x.sum(1) + np.random.randn(x.shape[0])
self.results = sm.RLM(y, self.exog).fit()
示例5: plot_reanalysis_gross_energy_data
# 需要導入模塊: from statsmodels import api [as 別名]
# 或者: from statsmodels.api import RLM [as 別名]
def plot_reanalysis_gross_energy_data(self, outlier_thres):
"""
Make a plot of normalized 30-day gross energy vs wind speed for each reanalysis product, include R2 measure
:param outlier_thres (float): outlier threshold (typical range of 1 to 4) which adjusts outlier sensitivity
detection
:return: matplotlib.pyplot object
"""
import matplotlib.pyplot as plt
valid_monthly = self._monthly.df
project = self._plant
plt.figure(figsize=(9, 9))
# Loop through each reanalysis product and make a scatterplot of monthly wind speed vs plant energy
for p in np.arange(0, len(list(project._reanalysis._product.keys()))):
col_name = list(project._reanalysis._product.keys())[p] # Reanalysis column in monthly data frame
x = sm.add_constant(valid_monthly[col_name]) # Define 'x'-values (constant needed for regression function)
y = valid_monthly['gross_energy_gwh'] * 30 / valid_monthly[
'num_days_expected'] # Normalize energy data to 30-days
rlm = sm.RLM(y, x, M=sm.robust.norms.HuberT(
t=outlier_thres)) # Robust linear regression with HuberT algorithm (threshold equal to 2)
rlm_results = rlm.fit()
r2 = np.corrcoef(x.loc[rlm_results.weights == 1, col_name], y[rlm_results.weights == 1])[
0, 1] # Get R2 from valid data
# Plot results
plt.subplot(2, 2, p + 1)
plt.plot(x.loc[rlm_results.weights != 1, col_name], y[rlm_results.weights != 1], 'rx', label='Outlier')
plt.plot(x.loc[rlm_results.weights == 1, col_name], y[rlm_results.weights == 1], '.', label='Valid data')
plt.title(col_name + ', R2=' + str(np.round(r2, 3)))
plt.xlabel('Wind speed (m/s)')
plt.ylabel('30-day normalized gross energy (GWh)')
plt.tight_layout()
return plt
示例6: filter_outliers
# 需要導入模塊: from statsmodels import api [as 別名]
# 或者: from statsmodels.api import RLM [as 別名]
def filter_outliers(self, reanal, outlier_thresh, comb_loss_thresh):
"""
This function filters outliers based on
1. The reanalysis product
2. The Huber parameter which controls sensitivity of outlier detection in robust linear regression
3. The combined availability and curtailment loss criteria
There are only 300 combinations of outlier removals:
(3 reanalysis product x 10 outlier threshold values x 10 combined loss thresholds)
Therefore, we use a memoized funciton to store the regression data in a dictionary for each combination as it
comes up in the Monte Carlo simulation. This saves significant computational time in not having to run
robust linear regression for each Monte Carlo iteration
Args:
reanal(:obj:`string`): The name of the reanalysis product
outlier_thresh(:obj:`float`): The Huber parameter controlling sensitivity of outlier detection
comb_loss_thresh(:obj:`float`): The combined availabilty and curtailment monthly loss threshold
Returns:
:obj:`pandas.DataFrame`: Filtered monthly data ready for linear regression
"""
# Check if valid data has already been calculated and stored. If so, just return it
if (reanal, outlier_thresh, comb_loss_thresh) in self.outlier_filtering:
valid_data = self.outlier_filtering[(reanal, outlier_thresh, comb_loss_thresh)]
return valid_data
# If valid data hasn't yet been stored in dictionary, determine the valid data
df = self._monthly.df
# First set of filters checking combined losses and if the Nan data flag was on
df_sub = df.loc[
((df['availability_pct'] + df['curtailment_pct']) < comb_loss_thresh) & (df['nan_flag'] == False)]
#print df_sub
# Now perform robust linear regression using Huber algorithm to flag outliers
X = sm.add_constant(df_sub[reanal]) # Reanalysis data with constant column
y = df_sub['gross_energy_gwh'] # Energy data
# Perform robust linear regression
rlm = sm.RLM(y, X, M=sm.robust.norms.HuberT(outlier_thresh))
rlm_results = rlm.fit()
# Define valid data as points in which the Huber algorithm returned a value of 1
valid_data = df_sub.loc[rlm_results.weights == 1, [reanal, 'energy_gwh', 'availability_gwh',
'curtailment_gwh', 'num_days_expected']]
# Update the dictionary
self.outlier_filtering[(reanal, outlier_thresh, comb_loss_thresh)] = valid_data
# Return result
return valid_data