本文整理汇总了Python中scikits.statsmodels.tools.tools.add_constant函数的典型用法代码示例。如果您正苦于以下问题:Python add_constant函数的具体用法?Python add_constant怎么用?Python add_constant使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了add_constant函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: notyet_atst
def notyet_atst():
d = macrodata.load().data
realinv = d['realinv']
realgdp = d['realgdp']
realint = d['realint']
endog = realinv
exog = add_constant(np.c_[realgdp, realint],prepend=True)
res_ols1 = OLS(endog, exog).fit()
#growth rates
gs_l_realinv = 400 * np.diff(np.log(d['realinv']))
gs_l_realgdp = 400 * np.diff(np.log(d['realgdp']))
lint = d['realint'][:-1]
tbilrate = d['tbilrate'][:-1]
endogg = gs_l_realinv
exogg = add_constant(np.c_[gs_l_realgdp, lint], prepend=True)
exogg2 = add_constant(np.c_[gs_l_realgdp, tbilrate], prepend=True)
res_ols = OLS(endogg, exogg).fit()
res_ols2 = OLS(endogg, exogg2).fit()
#the following were done accidentally with res_ols1 in R,
#with original Greene data
params = np.array([-272.3986041341653, 0.1779455206941112,
0.2149432424658157])
cov_hac_4 = np.array([1321.569466333051, -0.2318836566017612,
37.01280466875694, -0.2318836566017614, 4.602339488102263e-05,
-0.0104687835998635, 37.012804668757, -0.0104687835998635,
21.16037144168061]).reshape(3,3, order='F')
cov_hac_10 = np.array([2027.356101193361, -0.3507514463299015,
54.81079621448568, -0.350751446329901, 6.953380432635583e-05,
-0.01268990195095196, 54.81079621448564, -0.01268990195095195,
22.92512402151113]).reshape(3,3, order='F')
#goldfeld-quandt
het_gq_greater = dict(statistic=13.20512768685082, df1=99, df2=98,
pvalue=1.246141976112324e-30, distr='f')
het_gq_less = dict(statistic=13.20512768685082, df1=99, df2=98, pvalue=1.)
het_gq_2sided = dict(statistic=13.20512768685082, df1=99, df2=98,
pvalue=1.246141976112324e-30, distr='f')
#goldfeld-quandt, fraction = 0.5
het_gq_greater_2 = dict(statistic=87.1328934692124, df1=48, df2=47,
pvalue=2.154956842194898e-33, distr='f')
gq = smsdia.het_goldfeldquandt(endog, exog, split=0.5)
compare_t_est(gq, het_gq_greater, decimal=(13, 14))
assert_equal(gq[-1], 'increasing')
harvey_collier = dict(stat=2.28042114041313, df=199,
pvalue=0.02364236161988260, distr='t')
#hc = harvtest(fm, order.by=ggdp , data = list())
harvey_collier_2 = dict(stat=0.7516918462158783, df=199,
pvalue=0.4531244858006127, distr='t')
示例2: coint
def coint(y1, y2, regression="c"):
"""
This is a simple cointegration test. Uses unit-root test on residuals to
test for cointegrated relationship
See Hamilton (1994) 19.2
Parameters
----------
y1 : array_like, 1d
first element in cointegrating vector
y2 : array_like
remaining elements in cointegrating vector
c : str {'c'}
Included in regression
* 'c' : Constant
Returns
-------
coint_t : float
t-statistic of unit-root test on residuals
pvalue : float
MacKinnon's approximate p-value based on MacKinnon (1994)
crit_value : dict
Critical values for the test statistic at the 1 %, 5 %, and 10 % levels.
Notes
-----
The Null hypothesis is that there is no cointegration, the alternative
hypothesis is that there is cointegrating relationship. If the pvalue is
small, below a critical size, then we can reject the hypothesis that there
is no cointegrating relationship.
P-values are obtained through regression surface approximation from
MacKinnon 1994.
References
----------
MacKinnon, J.G. 1994. "Approximate asymptotic distribution functions for
unit-root and cointegration tests. `Journal of Business and Economic
Statistics` 12, 167-76.
"""
regression = regression.lower()
if regression not in ['c','nc','ct','ctt']:
raise ValueError("regression option %s not understood") % regression
y1 = np.asarray(y1)
y2 = np.asarray(y2)
if regression == 'c':
y2 = add_constant(y2)
st1_resid = OLS(y1, y2).fit().resid #stage one residuals
lgresid_cons = add_constant(st1_resid[0:-1])
uroot_reg = OLS(st1_resid[1:], lgresid_cons).fit()
coint_t = (uroot_reg.params[0]-1)/uroot_reg.bse[0]
pvalue = mackinnonp(coint_t, regression="c", N=2, lags=None)
crit_value = mackinnoncrit(N=1, regression="c", nobs=len(y1))
return coint_t, pvalue, crit_value
示例3: test_hac_simple
def test_hac_simple():
from scikits.statsmodels.datasets import macrodata
d2 = macrodata.load().data
g_gdp = 400*np.diff(np.log(d2['realgdp']))
g_inv = 400*np.diff(np.log(d2['realinv']))
exogg = add_constant(np.c_[g_gdp, d2['realint'][:-1]],prepend=True)
res_olsg = OLS(g_inv, exogg).fit()
#> NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE, verbose=TRUE, adjust=TRUE)
#Lag truncation parameter chosen: 4
# (Intercept) ggdp lint
cov1_r = [[ 1.40643899878678802, -0.3180328707083329709, -0.060621111216488610],
[ -0.31803287070833292, 0.1097308348999818661, 0.000395311760301478],
[ -0.06062111121648865, 0.0003953117603014895, 0.087511528912470993]]
#> NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE, verbose=TRUE, adjust=FALSE)
#Lag truncation parameter chosen: 4
# (Intercept) ggdp lint
cov2_r = [[ 1.3855512908840137, -0.313309610252268500, -0.059720797683570477],
[ -0.3133096102522685, 0.108101169035130618, 0.000389440793564339],
[ -0.0597207976835705, 0.000389440793564336, 0.086211852740503622]]
cov1, se1 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=True)
cov2, se2 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=False)
assert_almost_equal(cov1, cov1_r, decimal=14)
assert_almost_equal(cov2, cov2_r, decimal=14)
示例4: setupClass
def setupClass(cls):
data = longley.load()
data.exog = add_constant(data.exog)
ols_res = OLS(data.endog, data.exog).fit()
gls_res = GLS(data.endog, data.exog).fit()
cls.res1 = gls_res
cls.res2 = ols_res
示例5: pacf_ols
def pacf_ols(x, nlags=40):
'''Calculate partial autocorrelations
Parameters
----------
x : 1d array
observations of time series for which pacf is calculated
nlags : int
Number of lags for which pacf is returned. Lag 0 is not returned.
Returns
-------
pacf : 1d array
partial autocorrelations, maxlag+1 elements
Notes
-----
This solves a separate OLS estimation for each desired lag.
'''
#TODO: add warnings for Yule-Walker
#NOTE: demeaning and not using a constant gave incorrect answers?
#JP: demeaning should have a better estimate of the constant
#maybe we can compare small sample properties with a MonteCarlo
xlags, x0 = lagmat(x, nlags, original='sep')
#xlags = sm.add_constant(lagmat(x, nlags), prepend=True)
xlags = add_constant(xlags, prepend=True)
pacf = [1.]
for k in range(1, nlags+1):
res = OLS(x0[k:], xlags[k:,:k+1]).fit()
#np.take(xlags[k:], range(1,k+1)+[-1],
pacf.append(res.params[-1])
return np.array(pacf)
示例6: test_cov_cluster_2groups
def test_cov_cluster_2groups():
#comparing cluster robust standard errors to Peterson
#requires Petersen's test_data
#http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.txt
import os
cur_dir = os.path.abspath(os.path.dirname(__file__))
fpath = os.path.join(cur_dir,"test_data.txt")
pet = np.genfromtxt(fpath)
endog = pet[:,-1]
group = pet[:,0].astype(int)
time = pet[:,1].astype(int)
exog = add_constant(pet[:,2], prepend=True)
res = OLS(endog, exog).fit()
cov01, covg, covt = sw.cov_cluster_2groups(res, group, group2=time)
#Reference number from Petersen
#http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.htm
bse_petw = [0.0284, 0.0284]
bse_pet0 = [0.0670, 0.0506]
bse_pet1 = [0.0234, 0.0334] #year
bse_pet01 = [0.0651, 0.0536] #firm and year
bse_0 = sw.se_cov(covg)
bse_1 = sw.se_cov(covt)
bse_01 = sw.se_cov(cov01)
#print res.HC0_se, bse_petw - res.HC0_se
#print bse_0, bse_0 - bse_pet0
#print bse_1, bse_1 - bse_pet1
#print bse_01, bse_01 - bse_pet01
assert_almost_equal(bse_petw, res.HC0_se, decimal=4)
assert_almost_equal(bse_0, bse_pet0, decimal=4)
assert_almost_equal(bse_1, bse_pet1, decimal=4)
assert_almost_equal(bse_01, bse_pet01, decimal=4)
示例7: test_prefect_pred
def test_prefect_pred():
cur_dir = os.path.dirname(os.path.abspath(__file__))
iris = np.genfromtxt(os.path.join(cur_dir, 'results', 'iris.csv'),
delimiter=",", skip_header=1)
y = iris[:,-1]
X = iris[:,:-1]
X = X[y != 2]
y = y[y != 2]
X = add_constant(X, prepend=True)
glm = GLM(y, X, family=sm.families.Binomial())
assert_raises(PerfectSeparationError, glm.fit)
示例8: setupClass
def setupClass(cls):
from results.results_regression import LongleyGls
data = longley.load()
exog = add_constant(np.column_stack((data.exog[:, 1], data.exog[:, 4])))
tmp_results = OLS(data.endog, exog).fit()
rho = np.corrcoef(tmp_results.resid[1:], tmp_results.resid[:-1])[0][1] # by assumption
order = toeplitz(np.arange(16))
sigma = rho ** order
GLS_results = GLS(data.endog, exog, sigma=sigma).fit()
cls.res1 = GLS_results
cls.res2 = LongleyGls()
示例9: setupClass
def setupClass(cls):
from results.results_glm import Cpunish
from scikits.statsmodels.datasets.cpunish import load
data = load()
data.exog[:, 3] = np.log(data.exog[:, 3])
data.exog = add_constant(data.exog)
exposure = [100] * len(data.endog)
cls.res1 = GLM(data.endog, data.exog, family=sm.families.Poisson(), exposure=exposure).fit()
cls.res1.params[-1] += np.log(100) # add exposure back in to param
# to make the results the same
cls.res2 = Cpunish()
示例10: qqline
def qqline(ax, line, x=None, y=None, dist=None, fmt='r-'):
"""
Plot a reference line for a qqplot.
Parameters
----------
ax : matplotlib axes instance
The axes on which to plot the line
line : str {'45','r','s','q'}
Options for the reference line to which the data is compared.
'45' - 45-degree line
's' - standardized line, the expected order statistics are scaled by the
standard deviation of the given sample and have the mean added to them
'r' - A regression line is fit
'q' - A line is fit through the quartiles.
None - By default no reference line is added to the plot.
x : array
X data for plot. Not needed if line is '45'.
y : array
Y data for plot. Not needed if line is '45'.
dist : scipy.stats.distribution
A scipy.stats distribution, needed if line is 'q'.
Notes
-----
There is no return value. The line is plotted on the given `ax`.
"""
if line == '45':
end_pts = zip(ax.get_xlim(), ax.get_ylim())
end_pts[0] = max(end_pts[0])
end_pts[1] = min(end_pts[1])
ax.plot(end_pts, end_pts, fmt)
return # does this have any side effects?
if x is None and y is None:
raise ValueError("If line is not 45, x and y cannot be None.")
elif line == 'r':
# could use ax.lines[0].get_xdata(), get_ydata(),
# but don't know axes are 'clean'
y = OLS(y, add_constant(x)).fit().fittedvalues
ax.plot(x,y,fmt)
elif line == 's':
m,b = y.std(), y.mean()
ref_line = x*m + b
ax.plot(x, ref_line, fmt)
elif line == 'q':
q25 = stats.scoreatpercentile(y, 25)
q75 = stats.scoreatpercentile(y, 75)
theoretical_quartiles = dist.ppf([.25,.75])
m = (q75 - q25) / np.diff(theoretical_quartiles)
b = q25 - m*theoretical_quartiles[0]
ax.plot(x, m*x + b, fmt)
示例11: __init__
def __init__(self):
d = macrodata.load().data
#growth rates
gs_l_realinv = 400 * np.diff(np.log(d['realinv']))
gs_l_realgdp = 400 * np.diff(np.log(d['realgdp']))
lint = d['realint'][:-1]
tbilrate = d['tbilrate'][:-1]
endogg = gs_l_realinv
exogg = add_constant(np.c_[gs_l_realgdp, lint], prepend=True)
exogg2 = add_constant(np.c_[gs_l_realgdp, tbilrate], prepend=True)
exogg3 = add_constant(np.c_[gs_l_realgdp], prepend=True)
res_ols = OLS(endogg, exogg).fit()
res_ols2 = OLS(endogg, exogg2).fit()
res_ols3 = OLS(endogg, exogg3).fit()
self.res = res_ols
self.res2 = res_ols2
self.res3 = res_ols3
self.endog = self.res.model.endog
self.exog = self.res.model.exog
示例12: __init__
def __init__(self):
'''
Tests Poisson family with canonical log link.
Test results were obtained by R.
'''
from results.results_glm import Cpunish
from scikits.statsmodels.datasets.cpunish import load
self.data = load()
self.data.exog[:,3] = np.log(self.data.exog[:,3])
self.data.exog = add_constant(self.data.exog)
self.res1 = GLM(self.data.endog, self.data.exog,
family=sm.families.Poisson()).fit()
self.res2 = Cpunish()
示例13: test_add_constant_1d
def test_add_constant_1d(self):
x = np.arange(1,5)
x = tools.add_constant(x, prepend=True)
y = np.asarray([[1,1,1,1],[1,2,3,4.]]).T
assert_equal(x, y)
示例14: test_add_constant_has_constant1d
def test_add_constant_has_constant1d(self):
x = np.ones(5)
x = tools.add_constant(x)
assert_equal(x, np.ones(5))
示例15: add_trend
def add_trend(X, trend="c", prepend=False):
"""
Adds a trend and/or constant to an array.
Parameters
----------
X : array-like
Original array of data.
trend : str {"c","t","ct","ctt"}
"c" add constant only
"t" add trend only
"ct" add constant and linear trend
"ctt" add constant and linear and quadratic trend.
prepend : bool
If True, prepends the new data to the columns of X.
Notes
-----
Returns columns as ["ctt","ct","c"] whenever applicable. There is currently
no checking for an existing constant or trend.
See also
--------
scikits.statsmodels.add_constant
"""
#TODO: could be generalized for trend of aribitrary order
trend = trend.lower()
if trend == "c": # handles structured arrays
return add_constant(X, prepend=prepend)
elif trend == "ct" or trend == "t":
trendorder = 1
elif trend == "ctt":
trendorder = 2
else:
raise ValueError("trend %s not understood" % trend)
X = np.asanyarray(X)
nobs = len(X)
trendarr = np.vander(np.arange(1,nobs+1, dtype=float), trendorder+1)
# put in order ctt
trendarr = np.fliplr(trendarr)
if trend == "t":
trendarr = trendarr[:,1]
if not X.dtype.names:
if not prepend:
X = np.column_stack((X, trendarr))
else:
X = np.column_stack((trendarr, X))
else:
return_rec = data.__clas__ is np.recarray
if trendorder == 1:
if trend == "ct":
dt = [('const',float),('trend',float)]
else:
dt = [('trend', float)]
elif trendorder == 2:
dt = [('const',float),('trend',float),('trend_squared', float)]
trendarr = trendarr.view(dt)
if prepend:
X = nprf.append_fields(trendarr, X.dtype.names, [X[i] for i
in data.dtype.names], usemask=False, asrecarray=return_rec)
else:
X = nprf.append_fields(X, trendarr.dtype.names, [trendarr[i] for i
in trendarr.dtype.names], usemask=false, asrecarray=return_rec)
return X