本文整理汇总了Python中statsmodels.tools.tools.add_constant函数的典型用法代码示例。如果您正苦于以下问题:Python add_constant函数的具体用法?Python add_constant怎么用?Python add_constant使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了add_constant函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_griliches76_data
def get_griliches76_data():
import os
curdir = os.path.split(__file__)[0]
path = os.path.join(curdir, 'griliches76.dta')
griliches76_data = iolib.genfromdta(path, missing_flt=np.NaN, pandas=True)
# create year dummies
years = griliches76_data['year'].unique()
N = griliches76_data.shape[0]
for yr in years:
griliches76_data['D_%i' %yr] = np.zeros(N)
for i in range(N):
if griliches76_data.ix[i, 'year'] == yr:
griliches76_data.ix[i, 'D_%i' %yr] = 1
else:
pass
griliches76_data['const'] = 1
X = add_constant(griliches76_data[['s', 'iq', 'expr', 'tenure', 'rns',
'smsa', 'D_67', 'D_68', 'D_69', 'D_70',
'D_71', 'D_73']],
prepend=True) # for R comparison
#prepend=False) # for Stata comparison
Z = add_constant(griliches76_data[['expr', 'tenure', 'rns', 'smsa', \
'D_67', 'D_68', 'D_69', 'D_70', 'D_71',
'D_73', 'med', 'kww', 'age', 'mrt']])
Y = griliches76_data['lw']
return Y, X, Z
示例2: __init__
def __init__(self):
d = macrodata.load_pandas().data
# growth rates
d["gs_l_realinv"] = 400 * np.log(d["realinv"]).diff()
d["gs_l_realgdp"] = 400 * np.log(d["realgdp"]).diff()
d["lint"] = d["realint"].shift(1)
d["tbilrate"] = d["tbilrate"].shift(1)
d = d.dropna()
self.d = d
endogg = d["gs_l_realinv"]
exogg = add_constant(d[["gs_l_realgdp", "lint"]])
exogg2 = add_constant(d[["gs_l_realgdp", "tbilrate"]])
exogg3 = add_constant(d[["gs_l_realgdp"]])
res_ols = OLS(endogg, exogg).fit()
res_ols2 = OLS(endogg, exogg2).fit()
res_ols3 = OLS(endogg, exogg3).fit()
self.res = res_ols
self.res2 = res_ols2
self.res3 = res_ols3
self.endog = self.res.model.endog
self.exog = self.res.model.exog
示例3: setup_class
def setup_class(cls):
d = macrodata.load_pandas().data
#growth rates
d['gs_l_realinv'] = 400 * np.log(d['realinv']).diff()
d['gs_l_realgdp'] = 400 * np.log(d['realgdp']).diff()
d['lint'] = d['realint'].shift(1)
d['tbilrate'] = d['tbilrate'].shift(1)
d = d.dropna()
cls.d = d
endogg = d['gs_l_realinv']
exogg = add_constant(d[['gs_l_realgdp', 'lint']])
exogg2 = add_constant(d[['gs_l_realgdp', 'tbilrate']])
exogg3 = add_constant(d[['gs_l_realgdp']])
res_ols = OLS(endogg, exogg).fit()
res_ols2 = OLS(endogg, exogg2).fit()
res_ols3 = OLS(endogg, exogg3).fit()
cls.res = res_ols
cls.res2 = res_ols2
cls.res3 = res_ols3
cls.endog = cls.res.model.endog
cls.exog = cls.res.model.exog
示例4: test_add_constant_has_constant2d
def test_add_constant_has_constant2d(self):
x = np.asarray([[1, 1, 1, 1], [1, 2, 3, 4.0]]).T
y = tools.add_constant(x, has_constant="skip")
assert_equal(x, y)
assert_raises(ValueError, tools.add_constant, x, has_constant="raise")
assert_equal(tools.add_constant(x, has_constant="add"), np.column_stack((np.ones(4), x)))
示例5: test_add_constant_has_constant1d
def test_add_constant_has_constant1d(self):
x = np.ones(5)
x = tools.add_constant(x, has_constant="skip")
assert_equal(x, np.ones(5))
assert_raises(ValueError, tools.add_constant, x, has_constant="raise")
assert_equal(tools.add_constant(x, has_constant="add"), np.ones((5, 2)))
示例6: coint
def coint(y1, y2, regression="c"):
"""
This is a simple cointegration test. Uses unit-root test on residuals to
test for cointegrated relationship
See Hamilton (1994) 19.2
Parameters
----------
y1 : array_like, 1d
first element in cointegrating vector
y2 : array_like
remaining elements in cointegrating vector
c : str {'c'}
Included in regression
* 'c' : Constant
Returns
-------
coint_t : float
t-statistic of unit-root test on residuals
pvalue : float
MacKinnon's approximate p-value based on MacKinnon (1994)
crit_value : dict
Critical values for the test statistic at the 1 %, 5 %, and 10 %
levels.
Notes
-----
The Null hypothesis is that there is no cointegration, the alternative
hypothesis is that there is cointegrating relationship. If the pvalue is
small, below a critical size, then we can reject the hypothesis that there
is no cointegrating relationship.
P-values are obtained through regression surface approximation from
MacKinnon 1994.
References
----------
MacKinnon, J.G. 1994. "Approximate asymptotic distribution functions for
unit-root and cointegration tests. `Journal of Business and Economic
Statistics` 12, 167-76.
"""
regression = regression.lower()
if regression not in ['c', 'nc', 'ct', 'ctt']:
raise ValueError("regression option %s not understood") % regression
y1 = np.asarray(y1)
y2 = np.asarray(y2)
if regression == 'c':
y2 = add_constant(y2, prepend=False)
st1_resid = OLS(y1, y2).fit().resid # stage one residuals
lgresid_cons = add_constant(st1_resid[0:-1], prepend=False)
uroot_reg = OLS(st1_resid[1:], lgresid_cons).fit()
coint_t = (uroot_reg.params[0] - 1) / uroot_reg.bse[0]
pvalue = mackinnonp(coint_t, regression="c", N=2, lags=None)
crit_value = mackinnoncrit(N=1, regression="c", nobs=len(y1))
return coint_t, pvalue, crit_value
示例7: notyet_atst
def notyet_atst():
d = macrodata.load().data
realinv = d['realinv']
realgdp = d['realgdp']
realint = d['realint']
endog = realinv
exog = add_constant(np.c_[realgdp, realint],prepend=True)
res_ols1 = OLS(endog, exog).fit()
#growth rates
gs_l_realinv = 400 * np.diff(np.log(d['realinv']))
gs_l_realgdp = 400 * np.diff(np.log(d['realgdp']))
lint = d['realint'][:-1]
tbilrate = d['tbilrate'][:-1]
endogg = gs_l_realinv
exogg = add_constant(np.c_[gs_l_realgdp, lint], prepend=True)
exogg2 = add_constant(np.c_[gs_l_realgdp, tbilrate], prepend=True)
res_ols = OLS(endogg, exogg).fit()
res_ols2 = OLS(endogg, exogg2).fit()
#the following were done accidentally with res_ols1 in R,
#with original Greene data
params = np.array([-272.3986041341653, 0.1779455206941112,
0.2149432424658157])
cov_hac_4 = np.array([1321.569466333051, -0.2318836566017612,
37.01280466875694, -0.2318836566017614, 4.602339488102263e-05,
-0.0104687835998635, 37.012804668757, -0.0104687835998635,
21.16037144168061]).reshape(3,3, order='F')
cov_hac_10 = np.array([2027.356101193361, -0.3507514463299015,
54.81079621448568, -0.350751446329901, 6.953380432635583e-05,
-0.01268990195095196, 54.81079621448564, -0.01268990195095195,
22.92512402151113]).reshape(3,3, order='F')
#goldfeld-quandt
het_gq_greater = dict(statistic=13.20512768685082, df1=99, df2=98,
pvalue=1.246141976112324e-30, distr='f')
het_gq_less = dict(statistic=13.20512768685082, df1=99, df2=98, pvalue=1.)
het_gq_2sided = dict(statistic=13.20512768685082, df1=99, df2=98,
pvalue=1.246141976112324e-30, distr='f')
#goldfeld-quandt, fraction = 0.5
het_gq_greater_2 = dict(statistic=87.1328934692124, df1=48, df2=47,
pvalue=2.154956842194898e-33, distr='f')
gq = smsdia.het_goldfeldquandt(endog, exog, split=0.5)
compare_t_est(gq, het_gq_greater, decimal=(13, 14))
assert_equal(gq[-1], 'increasing')
harvey_collier = dict(stat=2.28042114041313, df=199,
pvalue=0.02364236161988260, distr='t')
#hc = harvtest(fm, order.by=ggdp , data = list())
harvey_collier_2 = dict(stat=0.7516918462158783, df=199,
pvalue=0.4531244858006127, distr='t')
示例8: test_add_constant_has_constant1d
def test_add_constant_has_constant1d(self):
x = np.ones(5)
x = tools.add_constant(x, has_constant='skip')
assert_equal(x, np.ones((5,1)))
assert_raises(ValueError, tools.add_constant, x, has_constant='raise')
assert_equal(tools.add_constant(x, has_constant='add'),
np.ones((5, 2)))
示例9: test_add_constant_has_constant2d
def test_add_constant_has_constant2d(self):
x = np.asarray([[1,1,1,1],[1,2,3,4.]]).T
y = tools.add_constant(x, has_constant='skip')
assert_equal(x, y)
with pytest.raises(ValueError):
tools.add_constant(x, has_constant='raise')
assert_equal(tools.add_constant(x, has_constant='add'),
np.column_stack((np.ones(4), x)))
示例10: test_wls_tss
def test_wls_tss():
y = np.array([22, 22, 22, 23, 23, 23])
X = [[1, 0], [1, 0], [1, 1], [0, 1], [0, 1], [0, 1]]
ols_mod = OLS(y, add_constant(X, prepend=False)).fit()
yw = np.array([22, 22, 23.])
Xw = [[1,0],[1,1],[0,1]]
w = np.array([2, 1, 3.])
wls_mod = WLS(yw, add_constant(Xw, prepend=False), weights=w).fit()
assert_equal(ols_mod.centered_tss, wls_mod.centered_tss)
示例11: test_poisson_residuals
def test_poisson_residuals():
nobs, k_exog = 100, 5
np.random.seed(987125)
x = np.random.randn(nobs, k_exog - 1)
x = add_constant(x)
y_true = x.sum(1) / 2
y = y_true + 2 * np.random.randn(nobs)
exposure = 1 + np.arange(nobs) // 4
yp = np.random.poisson(np.exp(y_true) * exposure)
yp[10:15] += 10
fam = sm.families.Poisson()
mod_poi_e = GLM(yp, x, family=fam, exposure=exposure)
res_poi_e = mod_poi_e.fit()
mod_poi_w = GLM(yp / exposure, x, family=fam, var_weights=exposure)
res_poi_w = mod_poi_w.fit()
assert_allclose(res_poi_e.resid_response / exposure,
res_poi_w.resid_response)
assert_allclose(res_poi_e.resid_pearson, res_poi_w.resid_pearson)
assert_allclose(res_poi_e.resid_deviance, res_poi_w.resid_deviance)
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=FutureWarning)
assert_allclose(res_poi_e.resid_anscombe, res_poi_w.resid_anscombe)
assert_allclose(res_poi_e.resid_anscombe_unscaled,
res_poi_w.resid_anscombe)
示例12: calculateStat
def calculateStat(y,x):
cointegration = coint(y,x)
signal = (cointegration[1] < 0.05).__int__()
x= add_constant(x)
reg = OLS(y, x).fit()
# returns bo,b1,rmse
return (signal, float(reg.params[0]),float(reg.params[1]), float(math.sqrt(reg.mse_resid)))
示例13: plot_ccpr
def plot_ccpr(results, exog_idx, ax=None):
"""Plot CCPR against one regressor.
Generates a CCPR (component and component-plus-residual) plot.
Parameters
----------
results : result instance
A regression results instance.
exog_idx : int or string
Exogenous, explanatory variable. If string is given, it should
be the variable name that you want to use, and you can use arbitrary
translations as with a formula.
ax : Matplotlib AxesSubplot instance, optional
If given, it is used to plot in instead of a new figure being
created.
Returns
-------
fig : Matplotlib figure instance
If `ax` is None, the created figure. Otherwise the figure to which
`ax` is connected.
See Also
--------
plot_ccpr_grid : Creates CCPR plot for multiple regressors in a plot grid.
Notes
-----
The CCPR plot provides a way to judge the effect of one regressor on the
response variable by taking into account the effects of the other
independent variables. The partial residuals plot is defined as
Residuals + B_i*X_i versus X_i. The component adds the B_i*X_i versus
X_i to show where the fitted line would lie. Care should be taken if X_i
is highly correlated with any of the other independent variables. If this
is the case, the variance evident in the plot will be an underestimate of
the true variance.
References
----------
http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ccpr.htm
"""
fig, ax = utils.create_mpl_ax(ax)
exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)
x1 = results.model.exog[:, exog_idx]
#namestr = ' for %s' % self.name if self.name else ''
x1beta = x1*results._results.params[exog_idx]
ax.plot(x1, x1beta + results.resid, 'o')
from statsmodels.tools.tools import add_constant
mod = OLS(x1beta, add_constant(x1)).fit()
params = mod.params
fig = abline_plot(*params, **dict(ax=ax))
#ax.plot(x1, x1beta, '-')
ax.set_title('Component and component plus residual plot')
ax.set_ylabel("Residual + %s*beta_%d" % (exog_name, exog_idx))
ax.set_xlabel("%s" % exog_name)
return fig
示例14: setupClass
def setupClass(cls):
data = longley.load()
data.exog = add_constant(data.exog, prepend=False)
ols_res = OLS(data.endog, data.exog).fit()
gls_res = GLS(data.endog, data.exog).fit()
cls.res1 = gls_res
cls.res2 = ols_res
示例15: test_hac_simple
def test_hac_simple():
from statsmodels.datasets import macrodata
d2 = macrodata.load().data
g_gdp = 400*np.diff(np.log(d2['realgdp']))
g_inv = 400*np.diff(np.log(d2['realinv']))
exogg = add_constant(np.c_[g_gdp, d2['realint'][:-1]],prepend=True)
res_olsg = OLS(g_inv, exogg).fit()
#> NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE, verbose=TRUE, adjust=TRUE)
#Lag truncation parameter chosen: 4
# (Intercept) ggdp lint
cov1_r = [[ 1.40643899878678802, -0.3180328707083329709, -0.060621111216488610],
[ -0.31803287070833292, 0.1097308348999818661, 0.000395311760301478],
[ -0.06062111121648865, 0.0003953117603014895, 0.087511528912470993]]
#> NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE, verbose=TRUE, adjust=FALSE)
#Lag truncation parameter chosen: 4
# (Intercept) ggdp lint
cov2_r = [[ 1.3855512908840137, -0.313309610252268500, -0.059720797683570477],
[ -0.3133096102522685, 0.108101169035130618, 0.000389440793564339],
[ -0.0597207976835705, 0.000389440793564336, 0.086211852740503622]]
cov1, se1 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=True)
cov2, se2 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=False)
assert_almost_equal(cov1, cov1_r, decimal=14)
assert_almost_equal(cov2, cov2_r, decimal=14)