本文整理汇总了Python中statsmodels.stats.sandwich_covariance.cov_cluster函数的典型用法代码示例。如果您正苦于以下问题:Python cov_cluster函数的具体用法?Python cov_cluster怎么用?Python cov_cluster使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cov_cluster函数的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_panel_robust_cov
def test_panel_robust_cov():
import pandas as pa
import statsmodels.datasets.grunfeld as gr
from results.results_panelrobust import results as res_stata
dtapa = gr.data.load_pandas()
#Stata example/data seems to miss last firm
dtapa_endog = dtapa.endog[:200]
dtapa_exog = dtapa.exog[:200]
res = OLS(dtapa_endog, add_constant(dtapa_exog[['value', 'capital']],
prepend=False)).fit()
#time indicator in range(max Ti)
time = np.asarray(dtapa_exog[['year']])
time -= time.min()
time = np.squeeze(time).astype(int)
#sw.cov_nw_panel requires bounds instead of index
tidx = [(i*20, 20*(i+1)) for i in range(10)]
#firm index in range(n_firms)
firm_names, firm_id = np.unique(np.asarray(dtapa_exog[['firm']], 'S20'),
return_inverse=True)
#panel newey west standard errors
cov = sw.cov_nw_panel(res, 0, tidx, use_correction='hac')
#dropping numpy 1.4 soon
#np.testing.assert_allclose(cov, res_stata.cov_pnw0_stata, rtol=1e-6)
assert_almost_equal(cov, res_stata.cov_pnw0_stata, decimal=4)
cov = sw.cov_nw_panel(res, 1, tidx, use_correction='hac')
#np.testing.assert_allclose(cov, res_stata.cov_pnw1_stata, rtol=1e-6)
assert_almost_equal(cov, res_stata.cov_pnw1_stata, decimal=4)
cov = sw.cov_nw_panel(res, 4, tidx) #check default
#np.testing.assert_allclose(cov, res_stata.cov_pnw4_stata, rtol=1e-6)
assert_almost_equal(cov, res_stata.cov_pnw4_stata, decimal=4)
#cluster robust standard errors
cov_clu = sw.cov_cluster(res, firm_id)
assert_almost_equal(cov_clu, res_stata.cov_clu_stata, decimal=4)
#cluster robust standard errors, non-int groups
cov_clu = sw.cov_cluster(res, map(str, firm_id))
assert_almost_equal(cov_clu, res_stata.cov_clu_stata, decimal=4)
#Driscoll and Kraay panel robust standard errors
rcov = sw.cov_nw_groupsum(res, 0, time, use_correction=0)
assert_almost_equal(rcov, res_stata.cov_dk0_stata, decimal=4)
rcov = sw.cov_nw_groupsum(res, 1, time, use_correction=0)
assert_almost_equal(rcov, res_stata.cov_dk1_stata, decimal=4)
rcov = sw.cov_nw_groupsum(res, 4, time) #check default
assert_almost_equal(rcov, res_stata.cov_dk4_stata, decimal=4)
示例2: cluster_se
def cluster_se(fit, gp_name):
"""
Compute robust "clustered" standard errors.
Parameters
==========
fit : statsmodels.regression.linear_model.RegressionResultsWrapper
The statsmodels fit object obtained from the original regression
gp_name : str
The name of the group on which the clustering should happen.
This needs to be the name of a column in the original DataFrame
used to create and fit the model.
Returns
=======
ser : pd.Series
A pandas Series with the variable names and robust standard
errors.
"""
from statsmodels.stats.sandwich_covariance import cov_cluster
grp = fit.model.data.frame[gp_name]
se = np.diag(cov_cluster(fit, grp)) ** (1/2.)
return pd.Series(se, index=fit.params.index)
示例3: get_robust_clu
def get_robust_clu(cls):
res1 = cls.res1
cov_clu = sw.cov_cluster(res1, group)
cls.bse_rob = sw.se_cov(cov_clu)
nobs, k_vars = res1.model.exog.shape
k_params = len(res1.params)
#n_groups = len(np.unique(group))
corr_fact = (nobs-1.) / float(nobs - k_params)
# for bse we need sqrt of correction factor
cls.corr_fact = np.sqrt(corr_fact)
示例4: se_cluster
def se_cluster(fe_results, df, group):
"""
degrees of freedom adjustment for nested-within-cluster std. err.
see http://www.stata.com/statalist/archive/2013-01/msg00596.html
"""
from numpy.linalg import matrix_rank
from statsmodels.stats.sandwich_covariance import cov_cluster
cl_se = np.sqrt(cov_cluster(fe_results, df[group]).diagonal())
cols = [v for v in X.columns if not v.startswith("panid")]
V = fe_results.cov_params().ix[cols, cols]
N = fe_results.nobs
N_grp = len(np.unique(df[group].squeeze()))
rank = matrix_rank(V)
df_cl = N - (rank - 1) - (N_grp - 1) # degrees of freedom for cluster-robust but no nested adjustmnet
df_xt = N - (rank - 1) # degrees of freedom for cluster-nested-robust
cl_se_adj = cl_se * np.sqrt(df_cl/df_xt)
return cl_se_adj
示例5: setup
def setup(self):
res_ols = self.res1.get_robustcov_results('cluster',
groups=self.groups,
use_correction=False,
use_t=False,
df_correction=True)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False)
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = False
self.res2 = res2.results_cluster_large
self.skip_f = True
self.rtol = 1e-6
self.rtolh = 1e-10
示例6: assert_almost_equal
#test White
assert_almost_equal(bse_w, self.HC0_se, 15)
bse_wc = sw.se_cov(sw.cov_white_simple(self, use_correction=True))
print bse_wc
#test White
assert_almost_equal(bse_wc, self.HC1_se, 15)
groups = np.repeat(np.arange(5), 20)
idx = np.nonzero(np.diff(groups))[0].tolist()
groupidx = zip([0]+idx, idx+[len(groups)])
ngroups = len(groupidx)
print sw.se_cov(sw.cov_cluster(self, groups))
#two strange looking corner cases BUG?
print sw.se_cov(sw.cov_cluster(self, np.ones(len(endog), int), use_correction=False))
print sw.se_cov(sw.cov_crosssection_0(self, np.arange(len(endog))))
#these results are close to simple (no group) white, 50 groups 2 obs each
groups = np.repeat(np.arange(50), 100//50)
print sw.se_cov(sw.cov_cluster(self, groups))
#2 groups with 50 obs each, what was the interpretation again?
groups = np.repeat(np.arange(2), 100//2)
print sw.se_cov(sw.cov_cluster(self, groups))
"http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.txt"
'''
test <- read.table(
url(paste("http://www.kellogg.northwestern.edu/",
"faculty/petersen/htm/papers/se/",
示例7: get_robustcov_results
#.........这里部分代码省略.........
res.cov_params_default = sw.cov_white_simple(self,
use_correction=False)
elif cov_type.lower() == 'hac':
maxlags = kwds['maxlags'] # required?, default in cov_hac_simple
res.cov_kwds['maxlags'] = maxlags
weights_func = kwds.get('weights_func', sw.weights_bartlett)
res.cov_kwds['weights_func'] = weights_func
use_correction = kwds.get('use_correction', False)
res.cov_kwds['use_correction'] = use_correction
res.cov_kwds['description'] = ('Standard Errors are heteroscedasticity ' +
'and autocorrelation robust (HAC) using %d lags and %s small ' +
'sample correction') % (maxlags, ['without', 'with'][use_correction])
res.cov_params_default = sw.cov_hac_simple(self, nlags=maxlags,
weights_func=weights_func,
use_correction=use_correction)
elif cov_type.lower() == 'cluster':
#cluster robust standard errors, one- or two-way
groups = kwds['groups']
if not hasattr(groups, 'shape'):
groups = np.asarray(groups).T
if groups.ndim >= 2:
groups = groups.squeeze()
res.cov_kwds['groups'] = groups
use_correction = kwds.get('use_correction', True)
res.cov_kwds['use_correction'] = use_correction
if groups.ndim == 1:
if adjust_df:
# need to find number of groups
# duplicate work
self.n_groups = n_groups = len(np.unique(groups))
res.cov_params_default = sw.cov_cluster(self, groups,
use_correction=use_correction)
elif groups.ndim == 2:
if hasattr(groups, 'values'):
groups = groups.values
if adjust_df:
# need to find number of groups
# duplicate work
n_groups0 = len(np.unique(groups[:,0]))
n_groups1 = len(np.unique(groups[:, 1]))
self.n_groups = (n_groups0, n_groups1)
n_groups = min(n_groups0, n_groups1) # use for adjust_df
# Note: sw.cov_cluster_2groups has 3 returns
res.cov_params_default = sw.cov_cluster_2groups(self, groups,
use_correction=use_correction)[0]
else:
raise ValueError('only two groups are supported')
res.cov_kwds['description'] = ('Standard Errors are robust to' +
'cluster correlation ' + '(' + cov_type + ')')
elif cov_type.lower() == 'hac-panel':
#cluster robust standard errors
res.cov_kwds['time'] = time = kwds.get('time', None)
res.cov_kwds['groups'] = groups = kwds.get('groups', None)
#TODO: nlags is currently required
#nlags = kwds.get('nlags', True)
#res.cov_kwds['nlags'] = nlags
#TODO: `nlags` or `maxlags`
res.cov_kwds['maxlags'] = maxlags = kwds['maxlags']
use_correction = kwds.get('use_correction', 'hac')
示例8:
#remove nan observation
mask = (xx!=-999.0).all(1) #nan code in dta file
mask.shape
y = y[mask]
xx = xx[mask]
group = group[mask]
#run OLS
res_srs = sm.OLS(y, xx).fit()
print 'params ', res_srs.params
print 'bse_OLS ', res_srs.bse
#get cluster robust standard errors and compare with STATA
cov_cr = sw.cov_cluster(res_srs, group.astype(int))
bse_cr = sw.se_cov(cov_cr)
print 'bse_rob ', bse_cr
res_stata = np.rec.array(
[ ('growth', '|', -0.1027121, 0.22917029999999999, -0.45000000000000001, 0.65500000000000003, -0.55483519999999997, 0.34941109999999997),
('emer', '|', -5.4449319999999997, 0.72939690000000001, -7.46, 0.0, -6.8839379999999997, -4.0059269999999998),
('yr_rnd', '|', -51.075690000000002, 22.83615, -2.2400000000000002, 0.027, -96.128439999999998, -6.0229350000000004),
('_cons', '|', 740.3981, 13.460760000000001, 55.0, 0.0, 713.84180000000003, 766.95439999999996)],
dtype=[('exogname', '|S6'), ('del', '|S1'), ('params', '<f8'),
('bse', '<f8'), ('tvalues', '<f8'), ('pvalues', '<f8'),
('cilow', '<f8'), ('ciupp', '<f8')])
print 'diff Stata', bse_cr - res_stata.bse
assert_almost_equal(bse_cr, res_stata.bse, decimal=6)
示例9: get_robust_clu
def get_robust_clu(cls):
res1 = cls.res1
cov_clu = sw.cov_cluster(res1, group)
cls.bse_rob = sw.se_cov(cov_clu)
cls.corr_fact = cls.get_correction_factor(res1)
示例10: ShortPanelGLS
# res.resid is of transformed model
# np.corrcoef(res.resid.reshape(-1,n_groups, order='F'))
y_pred = np.dot(mod.exog, res.params)
resid = y - y_pred
print np.corrcoef(resid.reshape(-1, n_groups, order="F"))
print resid.std()
err = y_pred - dgp.y_true
print err.std()
# OLS standard errors are too small
mod.res_pooled.params
mod.res_pooled.bse
# heteroscedasticity robust doesn't help
mod.res_pooled.HC1_se
# compare with cluster robust se
print sw.se_cov(sw.cov_cluster(mod.res_pooled, dgp.groups.astype(int)))
# not bad, pretty close to panel estimator
# and with Newey-West Hac
print sw.se_cov(sw.cov_nw_panel(mod.res_pooled, 4, mod.group.groupidx))
# too small, assuming no bugs,
# see Peterson assuming it refers to same kind of model
print dgp.cov
mod2 = ShortPanelGLS(y, dgp.exog, dgp.groups)
res2 = mod2.fit_iterative(2)
print res2.params
print res2.bse
# both implementations produce the same results:
from numpy.testing import assert_almost_equal
assert_almost_equal(res.params, res2.params, decimal=12)
示例11: test_short_panel
def test_short_panel():
#this checks that some basic statistical properties are satisfied by the
#results, not verified results against other packages
#Note: the ranking of robust bse is different if within=True
#I added within keyword to PanelSample to be able to use old example
#if within is False, then there is no within group variation in exog.
nobs = 100
nobs_i = 5
n_groups = nobs // nobs_i
k_vars = 3
dgp = PanelSample(nobs, k_vars, n_groups, corr_structure=cs.corr_arma,
corr_args=([1], [1., -0.9],), seed=377769, within=False)
#print 'seed', dgp.seed
y = dgp.generate_panel()
noise = y - dgp.y_true
#test dgp
dgp_cov_e = np.array(
[[ 1. , 0.9 , 0.81 , 0.729 , 0.6561],
[ 0.9 , 1. , 0.9 , 0.81 , 0.729 ],
[ 0.81 , 0.9 , 1. , 0.9 , 0.81 ],
[ 0.729 , 0.81 , 0.9 , 1. , 0.9 ],
[ 0.6561, 0.729 , 0.81 , 0.9 , 1. ]])
npt.assert_almost_equal(dgp.cov, dgp_cov_e, 13)
cov_noise = np.cov(noise.reshape(-1,n_groups, order='F'))
corr_noise = cov2corr(cov_noise)
npt.assert_almost_equal(corr_noise, dgp.cov, 1)
#estimate panel model
mod2 = ShortPanelGLS(y, dgp.exog, dgp.groups)
res2 = mod2.fit_iterative(2)
#whitened residual should be uncorrelated
corr_wresid = np.corrcoef(res2.wresid.reshape(-1,n_groups, order='F'))
assert_maxabs(corr_wresid, np.eye(5), 0.1)
#residual should have same correlation as dgp
corr_resid = np.corrcoef(res2.resid.reshape(-1,n_groups, order='F'))
assert_maxabs(corr_resid, dgp.cov, 0.1)
assert_almost_equal(res2.resid.std(),1, decimal=0)
y_pred = np.dot(mod2.exog, res2.params)
assert_almost_equal(res2.fittedvalues, y_pred, 13)
#compare with OLS
res2_ols = mod2._fit_ols()
npt.assert_(mod2.res_pooled is res2_ols)
res2_ols = mod2.res_pooled #TODO: BUG: requires call to _fit_ols
#fitting once is the same as OLS
#note: I need to create new instance, otherwise it continuous fitting
mod1 = ShortPanelGLS(y, dgp.exog, dgp.groups)
res1 = mod1.fit_iterative(1)
assert_almost_equal(res1.params, res2_ols.params, decimal=13)
assert_almost_equal(res1.bse, res2_ols.bse, decimal=13)
res_ols = OLS(y, dgp.exog).fit()
assert_almost_equal(res1.params, res_ols.params, decimal=13)
assert_almost_equal(res1.bse, res_ols.bse, decimal=13)
#compare with old version
mod_old = ShortPanelGLS2(y, dgp.exog, dgp.groups)
res_old = mod_old.fit()
assert_almost_equal(res2.params, res_old.params, decimal=13)
assert_almost_equal(res2.bse, res_old.bse, decimal=13)
mod5 = ShortPanelGLS(y, dgp.exog, dgp.groups)
res5 = mod5.fit_iterative(5)
#make sure it's different
#npt.assert_array_less(0.009, em.maxabs(res5.bse, res2.bse))
cov_clu = sw.cov_cluster(mod2.res_pooled, dgp.groups.astype(int))
clubse = se_cov(cov_clu)
pnwbse = se_cov(sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx))
bser = np.vstack((res2.bse, res5.bse, clubse, pnwbse))
bser_mean = np.mean(bser, axis=0)
#cov_cluster close to robust and PanelGLS
#is up to 24% larger than mean of bser
#npt.assert_array_less(0, clubse / bser_mean - 1)
npt.assert_array_less(clubse / bser_mean - 1, 0.25)
#cov_nw_panel close to robust and PanelGLS
npt.assert_array_less(pnwbse / bser_mean - 1, 0.1)
#OLS underestimates bse, robust at least 60% larger
npt.assert_array_less(0.6, bser_mean / res_ols.bse - 1)
#.........这里部分代码省略.........