本文整理汇总了Python中statsmodels.regression.linear_model.OLS类的典型用法代码示例。如果您正苦于以下问题:Python OLS类的具体用法?Python OLS怎么用?Python OLS使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了OLS类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_regularized_weights
def test_regularized_weights(self):
np.random.seed(1432)
exog1 = np.random.normal(size=(100, 3))
endog1 = exog1[:, 0] + exog1[:, 1] + np.random.normal(size=100)
exog2 = np.random.normal(size=(100, 3))
endog2 = exog2[:, 0] + exog2[:, 1] + np.random.normal(size=100)
exog_a = np.vstack((exog1, exog1, exog2))
endog_a = np.concatenate((endog1, endog1, endog2))
# Should be equivalent to exog_a, endog_a.
exog_b = np.vstack((exog1, exog2))
endog_b = np.concatenate((endog1, endog2))
wgts = np.ones(200)
wgts[0:100] = 2
sigma = np.diag(1/wgts)
for L1_wt in 0, 0.5, 1:
for alpha in 0, 1:
mod1 = OLS(endog_a, exog_a)
rslt1 = mod1.fit_regularized(L1_wt=L1_wt, alpha=alpha)
mod2 = WLS(endog_b, exog_b, weights=wgts)
rslt2 = mod2.fit_regularized(L1_wt=L1_wt, alpha=alpha)
mod3 = GLS(endog_b, exog_b, sigma=sigma)
rslt3 = mod3.fit_regularized(L1_wt=L1_wt, alpha=alpha)
assert_almost_equal(rslt1.params, rslt2.params, decimal=3)
assert_almost_equal(rslt1.params, rslt3.params, decimal=3)
示例2: test_regression_with_tuples
def test_regression_with_tuples(self):
i = pandas.Series([1, 2, 3, 4] * 10, name="i")
y = pandas.Series([1, 2, 3, 4, 5] * 8, name="y")
x = pandas.Series([1, 2, 3, 4, 5, 6, 7, 8] * 5, name="x")
df = pandas.DataFrame(index=i.index)
df = df.join(i)
endo = df.join(y)
exo = df.join(x)
endo_groups = endo.groupby("i")
exo_groups = exo.groupby("i")
exo_df = exo_groups.agg([np.sum, np.max])
endo_df = endo_groups.agg([np.sum, np.max])
reg = OLS(exo_df[[("x", "sum")]], endo_df).fit()
interesting_lines = []
import warnings
with warnings.catch_warnings():
# Catch ominormal warning, not interesting here
warnings.simplefilter("ignore")
for line in str(reg.summary()).splitlines():
if "_" in line:
interesting_lines.append(line[:38])
desired = ["Dep. Variable: x_sum ",
"y_sum 1.4595 0.209 ",
"y_amax 0.2432 0.035 "]
assert_equal(sorted(desired), sorted(interesting_lines))
示例3: reset_ramsey
def reset_ramsey(res, degree=5):
'''Ramsey's RESET specification test for linear models
This is a general specification test, for additional non-linear effects
in a model.
Notes
-----
The test fits an auxiliary OLS regression where the design matrix, exog,
is augmented by powers 2 to degree of the fitted values. Then it performs
an F-test whether these additional terms are significant.
If the p-value of the f-test is below a threshold, e.g. 0.1, then this
indicates that there might be additional non-linear effects in the model
and that the linear model is mis-specified.
References
----------
http://en.wikipedia.org/wiki/Ramsey_RESET_test
'''
order = degree + 1
k_vars = res.model.exog.shape[1]
#vander without constant and x:
y_fitted_vander = np.vander(res.fittedvalues, order)[:, :-2] #drop constant
exog = np.column_stack((res.model.exog, y_fitted_vander))
res_aux = OLS(res.model.endog, exog).fit()
#r_matrix = np.eye(degree, exog.shape[1], k_vars)
r_matrix = np.eye(degree-1, exog.shape[1], k_vars)
#df1 = degree - 1
#df2 = exog.shape[0] - degree - res.df_model (without constant)
return res_aux.f_test(r_matrix) #, r_matrix, res_aux
示例4: setupClass
def setupClass(cls):
R = np.zeros(7)
R[4:6] = [1,-1]
data = longley.load()
data.exog = add_constant(data.exog, prepend=False)
res1 = OLS(data.endog, data.exog).fit()
cls.Ttest1 = res1.t_test(R)
示例5: fit_dlogM_mw
def fit_dlogM_mw(tab, sfrsd_tab, mltype='ring', mlb='i'):
merge_tab = t.join(tab, sfrsd_tab, 'plateifu')
is_agn = m.mask_from_maskbits(merge_tab['mngtarg3'], [1, 2, 3, 4])
mlb_ix = totalmass.StellarMass.bands_ixs[mlb]
absmag_sun_mlb = totalmass.StellarMass.absmag_sun[mlb_ix]
logmass_in_ifu = merge_tab['mass_in_ifu'].to(u.dex(u.Msun))
logmass_in_ifu_lw = merge_tab['ml_fluxwt'] + merge_tab['ifu_absmag'][:, mlb_ix].to(
u.dex(m.bandpass_sol_l_unit), totalmass.bandpass_flux_to_solarunits(absmag_sun_mlb))
merge_tab['dlogmass_lw'] = logmass_in_ifu - logmass_in_ifu_lw
ha_corr = np.exp(merge_tab['mean_atten_mwtd'] * (6563 / 5500)**-1.3)
sfrsd = merge_tab['sigma_sfr'] * ha_corr * u.Msun / u.yr / u.pc**2
mass_pca = merge_tab['mass_in_ifu'] + merge_tab['outer_mass_{}'.format(mltype)]
ssfrsd = sfrsd / mass_pca
merge_tab['log_ssfrsd'] = ssfrsd.to(u.dex(ssfrsd.unit))
merge_tab['log_ssfrsd'][~np.isfinite(merge_tab['log_ssfrsd'])] = np.nan * merge_tab['log_ssfrsd'].unit
ols = OLS(
endog=np.array(merge_tab['dlogmass_lw'][~is_agn]),
exog=sm_add_constant(
t.Table(merge_tab['mean_atten_mwtd', 'std_atten_mwtd', 'log_ssfrsd'])[~is_agn].to_pandas(),
prepend=False),
hasconst=True, missing='drop')
olsfit = ols.fit()
return olsfit
示例6: test_regularized
def test_regularized(self):
import os
from . import glmnet_r_results
cur_dir = os.path.dirname(os.path.abspath(__file__))
data = np.loadtxt(os.path.join(cur_dir, "results", "lasso_data.csv"),
delimiter=",")
tests = [x for x in dir(glmnet_r_results) if x.startswith("rslt_")]
for test in tests:
vec = getattr(glmnet_r_results, test)
n = vec[0]
p = vec[1]
L1_wt = float(vec[2])
lam = float(vec[3])
params = vec[4:].astype(np.float64)
endog = data[0:int(n), 0]
exog = data[0:int(n), 1:(int(p)+1)]
endog = endog - endog.mean()
endog /= endog.std(ddof=1)
exog = exog - exog.mean(0)
exog /= exog.std(0, ddof=1)
mod = OLS(endog, exog)
rslt = mod.fit_regularized(L1_wt=L1_wt, alpha=lam)
assert_almost_equal(rslt.params, params, decimal=3)
# Smoke test for summary
smry = rslt.summary()
示例7: test_f_score_with_covars_and_normalized_design_withcovar
def test_f_score_with_covars_and_normalized_design_withcovar(random_state=0):
"""
This test has a statsmodels dependance. There seems to be no simple,
alternative way to perform a F-test on a linear model including
covariates.
"""
try:
from statsmodels.regression.linear_model import OLS
except:
warnings.warn("Statsmodels is required to run this test")
raise nose.SkipTest
rng = check_random_state(random_state)
### Normalized data
n_samples = 50
# generate data
var1 = np.ones((n_samples, 1)) / np.sqrt(n_samples) # normalized
var2 = rng.randn(n_samples, 1)
var2 = var2 / np.sqrt(np.sum(var2 ** 2, 0)) # normalize
covars = np.eye(n_samples, 3) # covars is orthogonal
covars[3] = -1 # covars is orthogonal to var1
covars = orthonormalize_matrix(covars)
# own f_score
f_val_own = _f_score_with_covars_and_normalized_design(var1, var2, covars)[0]
# statsmodels f_score
test_matrix = np.array([[1.0, 0.0, 0.0, 0.0]])
statsmodels_ols = OLS(var2, np.hstack((var1, covars))).fit()
f_val_statsmodels = statsmodels_ols.f_test(test_matrix).fvalue[0]
assert_array_almost_equal(f_val_own, f_val_statsmodels)
示例8: test_permuted_ols_intercept_statsmodels_withcovar
def test_permuted_ols_intercept_statsmodels_withcovar(random_state=0):
"""
This test has a statsmodels dependance. There seems to be no simple,
alternative way to perform a F-test on a linear model including
covariates.
"""
try:
from statsmodels.regression.linear_model import OLS
except:
warnings.warn("Statsmodels is required to run this test")
raise nose.SkipTest
rng = check_random_state(random_state)
# design parameters
n_samples = 50
# create design
target_var = rng.randn(n_samples, 1)
tested_var = np.ones((n_samples, 1))
confounding_vars = rng.randn(n_samples, 2)
# statsmodels OLS
ols = OLS(target_var, np.hstack((tested_var, confounding_vars))).fit()
fvals = ols.f_test([[1.0, 0.0, 0.0]]).fvalue
# permuted OLS
_, orig_scores, _ = permuted_ols(tested_var, target_var, confounding_vars, n_perm=0, random_state=random_state)
# same thing but with model_intercept=True to check it has no effect
_, orig_scores_addintercept, _ = permuted_ols(
tested_var, target_var, confounding_vars, model_intercept=True, n_perm=0, random_state=random_state
)
assert_array_almost_equal(fvals, orig_scores, decimal=6)
assert_array_almost_equal(orig_scores, orig_scores_addintercept, decimal=6)
示例9: test_repeat_partition
def test_repeat_partition():
# tests that if we use identical partitions the average is the same
# as the estimate for the full data
np.random.seed(435265)
N = 200
p = 10
m = 1
beta = np.random.normal(size=p)
beta = beta * np.random.randint(0, 2, p)
X = np.random.normal(size=(N, p))
y = X.dot(beta) + np.random.normal(size=N)
def _rep_data_gen(endog, exog, partitions):
"""partitions data"""
n_exog = exog.shape[0]
n_part = np.ceil(n_exog / partitions)
ii = 0
while ii < n_exog:
yield endog, exog
ii += int(n_part)
nv_mod = DistributedModel(m, estimation_method=_est_regularized_naive,
join_method=_join_naive)
fitOLSnv = nv_mod.fit(_rep_data_gen(y, X, m), fit_kwds={"alpha": 0.1})
ols_mod = OLS(y, X)
fitOLS = ols_mod.fit_regularized(alpha=0.1)
assert_allclose(fitOLSnv.params, fitOLS.params)
示例10: setup_class
def setup_class(cls):
cls.cov_type = 'HC0'
mod1 = GLM(endog, exog, family=families.Gaussian())
cls.res1 = mod1.fit(cov_type='HC0')
mod2 = OLS(endog, exog)
cls.res2 = mod2.fit(cov_type='HC0')
示例11: test_filter
def test_filter():
# Basic test for filtering
mod = RecursiveLS(endog, exog)
res = mod.filter()
# Test the RLS estimates against OLS estimates
mod_ols = OLS(endog, exog)
res_ols = mod_ols.fit()
assert_allclose(res.params, res_ols.params)
示例12: test_conf_int_single_regressor
def test_conf_int_single_regressor():
# GH#706 single-regressor model (i.e. no intercept) with 1D exog
# should get passed to DataFrame for conf_int
y = pandas.Series(np.random.randn(10))
x = pandas.Series(np.ones(10))
res = OLS(y, x).fit()
conf_int = res.conf_int()
np.testing.assert_equal(conf_int.shape, (1, 2))
np.testing.assert_(isinstance(conf_int, pandas.DataFrame))
示例13: qqline
def qqline(ax, line, x=None, y=None, dist=None, fmt='r-'):
"""
Plot a reference line for a qqplot.
Parameters
----------
ax : matplotlib axes instance
The axes on which to plot the line
line : str {'45','r','s','q'}
Options for the reference line to which the data is compared.:
- '45' - 45-degree line
- 's' - standardized line, the expected order statistics are scaled by
the standard deviation of the given sample and have the mean
added to them
- 'r' - A regression line is fit
- 'q' - A line is fit through the quartiles.
- None - By default no reference line is added to the plot.
x : array
X data for plot. Not needed if line is '45'.
y : array
Y data for plot. Not needed if line is '45'.
dist : scipy.stats.distribution
A scipy.stats distribution, needed if line is 'q'.
Notes
-----
There is no return value. The line is plotted on the given `ax`.
"""
if line == '45':
end_pts = zip(ax.get_xlim(), ax.get_ylim())
end_pts[0] = min(end_pts[0])
end_pts[1] = max(end_pts[1])
ax.plot(end_pts, end_pts, fmt)
ax.set_xlim(end_pts)
ax.set_ylim(end_pts)
return # does this have any side effects?
if x is None and y is None:
raise ValueError("If line is not 45, x and y cannot be None.")
elif line == 'r':
# could use ax.lines[0].get_xdata(), get_ydata(),
# but don't know axes are 'clean'
y = OLS(y, add_constant(x)).fit().fittedvalues
ax.plot(x,y,fmt)
elif line == 's':
m,b = y.std(), y.mean()
ref_line = x*m + b
ax.plot(x, ref_line, fmt)
elif line == 'q':
_check_for_ppf(dist)
q25 = stats.scoreatpercentile(y, 25)
q75 = stats.scoreatpercentile(y, 75)
theoretical_quartiles = dist.ppf([0.25, 0.75])
m = (q75 - q25) / np.diff(theoretical_quartiles)
b = q25 - m*theoretical_quartiles[0]
ax.plot(x, m*x + b, fmt)
示例14: test_706
def test_706():
# make sure one regressor pandas Series gets passed to DataFrame
# for conf_int.
y = pandas.Series(np.random.randn(10))
x = pandas.Series(np.ones(10))
res = OLS(y,x).fit()
conf_int = res.conf_int()
np.testing.assert_equal(conf_int.shape, (1, 2))
np.testing.assert_(isinstance(conf_int, pandas.DataFrame))
示例15: test_summary_as_latex
def test_summary_as_latex():
# GH#734
import re
dta = longley.load_pandas()
X = dta.exog
X["constant"] = 1
y = dta.endog
res = OLS(y, X).fit()
with pytest.warns(UserWarning):
table = res.summary().as_latex()
# replace the date and time
table = re.sub("(?<=\n\\\\textbf\\{Date:\\} &).+?&",
" Sun, 07 Apr 2013 &", table)
table = re.sub("(?<=\n\\\\textbf\\{Time:\\} &).+?&",
" 13:46:07 &", table)
expected = """\\begin{center}
\\begin{tabular}{lclc}
\\toprule
\\textbf{Dep. Variable:} & TOTEMP & \\textbf{ R-squared: } & 0.995 \\\\
\\textbf{Model:} & OLS & \\textbf{ Adj. R-squared: } & 0.992 \\\\
\\textbf{Method:} & Least Squares & \\textbf{ F-statistic: } & 330.3 \\\\
\\textbf{Date:} & Sun, 07 Apr 2013 & \\textbf{ Prob (F-statistic):} & 4.98e-10 \\\\
\\textbf{Time:} & 13:46:07 & \\textbf{ Log-Likelihood: } & -109.62 \\\\
\\textbf{No. Observations:} & 16 & \\textbf{ AIC: } & 233.2 \\\\
\\textbf{Df Residuals:} & 9 & \\textbf{ BIC: } & 238.6 \\\\
\\textbf{Df Model:} & 6 & \\textbf{ } & \\\\
\\bottomrule
\\end{tabular}
\\begin{tabular}{lcccccc}
& \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\
\\midrule
\\textbf{GNPDEFL} & 15.0619 & 84.915 & 0.177 & 0.863 & -177.029 & 207.153 \\\\
\\textbf{GNP} & -0.0358 & 0.033 & -1.070 & 0.313 & -0.112 & 0.040 \\\\
\\textbf{UNEMP} & -2.0202 & 0.488 & -4.136 & 0.003 & -3.125 & -0.915 \\\\
\\textbf{ARMED} & -1.0332 & 0.214 & -4.822 & 0.001 & -1.518 & -0.549 \\\\
\\textbf{POP} & -0.0511 & 0.226 & -0.226 & 0.826 & -0.563 & 0.460 \\\\
\\textbf{YEAR} & 1829.1515 & 455.478 & 4.016 & 0.003 & 798.788 & 2859.515 \\\\
\\textbf{constant} & -3.482e+06 & 8.9e+05 & -3.911 & 0.004 & -5.5e+06 & -1.47e+06 \\\\
\\bottomrule
\\end{tabular}
\\begin{tabular}{lclc}
\\textbf{Omnibus:} & 0.749 & \\textbf{ Durbin-Watson: } & 2.559 \\\\
\\textbf{Prob(Omnibus):} & 0.688 & \\textbf{ Jarque-Bera (JB): } & 0.684 \\\\
\\textbf{Skew:} & 0.420 & \\textbf{ Prob(JB): } & 0.710 \\\\
\\textbf{Kurtosis:} & 2.434 & \\textbf{ Cond. No. } & 4.86e+09 \\\\
\\bottomrule
\\end{tabular}
%\\caption{OLS Regression Results}
\\end{center}
Warnings: \\newline
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified. \\newline
[2] The condition number is large, 4.86e+09. This might indicate that there are \\newline
strong multicollinearity or other numerical problems."""
assert_equal(table, expected)