Python sandwich_covariance.cov_cluster函数代码示例

本文整理汇总了Python中statsmodels.stats.sandwich_covariance.cov_cluster函数的典型用法代码示例。如果您正苦于以下问题：Python cov_cluster函数的具体用法？Python cov_cluster怎么用？Python cov_cluster使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了cov_cluster函数的11个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_panel_robust_cov

def test_panel_robust_cov():
    import pandas as pa
    import statsmodels.datasets.grunfeld as gr
    from results.results_panelrobust import results as res_stata

    dtapa = gr.data.load_pandas()
    #Stata example/data seems to miss last firm
    dtapa_endog = dtapa.endog[:200]
    dtapa_exog = dtapa.exog[:200]
    res = OLS(dtapa_endog, add_constant(dtapa_exog[['value', 'capital']],
              prepend=False)).fit()

    #time indicator in range(max Ti)
    time = np.asarray(dtapa_exog[['year']])
    time -= time.min()
    time = np.squeeze(time).astype(int)

    #sw.cov_nw_panel requires bounds instead of index
    tidx = [(i*20, 20*(i+1)) for i in range(10)]

    #firm index in range(n_firms)
    firm_names, firm_id = np.unique(np.asarray(dtapa_exog[['firm']], 'S20'),
                                    return_inverse=True)

    #panel newey west standard errors
    cov = sw.cov_nw_panel(res, 0, tidx, use_correction='hac')
    #dropping numpy 1.4 soon
    #np.testing.assert_allclose(cov, res_stata.cov_pnw0_stata, rtol=1e-6)
    assert_almost_equal(cov, res_stata.cov_pnw0_stata, decimal=4)

    cov = sw.cov_nw_panel(res, 1, tidx, use_correction='hac')
    #np.testing.assert_allclose(cov, res_stata.cov_pnw1_stata, rtol=1e-6)
    assert_almost_equal(cov, res_stata.cov_pnw1_stata, decimal=4)

    cov = sw.cov_nw_panel(res, 4, tidx)  #check default
    #np.testing.assert_allclose(cov, res_stata.cov_pnw4_stata, rtol=1e-6)
    assert_almost_equal(cov, res_stata.cov_pnw4_stata, decimal=4)

    #cluster robust standard errors
    cov_clu = sw.cov_cluster(res, firm_id)
    assert_almost_equal(cov_clu, res_stata.cov_clu_stata, decimal=4)

    #cluster robust standard errors, non-int groups
    cov_clu = sw.cov_cluster(res, map(str, firm_id))
    assert_almost_equal(cov_clu, res_stata.cov_clu_stata, decimal=4)

    #Driscoll and Kraay panel robust standard errors
    rcov = sw.cov_nw_groupsum(res, 0, time, use_correction=0)
    assert_almost_equal(rcov, res_stata.cov_dk0_stata, decimal=4)

    rcov = sw.cov_nw_groupsum(res, 1, time, use_correction=0)
    assert_almost_equal(rcov, res_stata.cov_dk1_stata, decimal=4)

    rcov = sw.cov_nw_groupsum(res, 4, time) #check default
    assert_almost_equal(rcov, res_stata.cov_dk4_stata, decimal=4)

开发者ID:SuperXrooT，项目名称:statsmodels，代码行数:55，代码来源:test_panel_robustcov.py

示例2: cluster_se

def cluster_se(fit, gp_name):
    """
    Compute robust "clustered" standard errors.

    Parameters
    ==========
    fit : statsmodels.regression.linear_model.RegressionResultsWrapper
        The statsmodels fit object obtained from the original regression

    gp_name : str
        The name of the group on which the clustering should happen.
        This needs to be the name of a column in the original DataFrame
        used to create and fit the model.

    Returns
    =======
    ser : pd.Series
        A pandas Series with the variable names and robust standard
        errors.

    """
    from statsmodels.stats.sandwich_covariance import cov_cluster
    grp = fit.model.data.frame[gp_name]
    se = np.diag(cov_cluster(fit, grp)) ** (1/2.)
    return pd.Series(se, index=fit.params.index)

开发者ID:spencerlyon2，项目名称:econtools，代码行数:25，代码来源:metrics.py

示例3: get_robust_clu

    def get_robust_clu(cls):
        res1 = cls.res1
        cov_clu = sw.cov_cluster(res1, group)
        cls.bse_rob = sw.se_cov(cov_clu)

        nobs, k_vars = res1.model.exog.shape
        k_params = len(res1.params)
        #n_groups = len(np.unique(group))
        corr_fact = (nobs-1.) / float(nobs - k_params)
        # for bse we need sqrt of correction factor
        cls.corr_fact = np.sqrt(corr_fact)

开发者ID:tadeze，项目名称:statsmodels，代码行数:11，代码来源:test_sandwich_cov.py

示例4: se_cluster

def se_cluster(fe_results, df, group):

    """
    degrees of freedom adjustment for nested-within-cluster std. err.
        see http://www.stata.com/statalist/archive/2013-01/msg00596.html
    """
    from numpy.linalg import matrix_rank
    from statsmodels.stats.sandwich_covariance import cov_cluster

    cl_se = np.sqrt(cov_cluster(fe_results, df[group]).diagonal())
    cols = [v for v in X.columns if not v.startswith("panid")]
    V = fe_results.cov_params().ix[cols, cols]
    N = fe_results.nobs
    N_grp = len(np.unique(df[group].squeeze()))
    rank = matrix_rank(V)
    df_cl = N - (rank - 1) - (N_grp - 1) # degrees of freedom for cluster-robust but no nested adjustmnet
    df_xt = N - (rank - 1) # degrees of freedom for cluster-nested-robust
    cl_se_adj = cl_se * np.sqrt(df_cl/df_xt)
    return cl_se_adj

开发者ID:bprest，项目名称:Duke_PUBPOL590，代码行数:19，代码来源:fe_functions.py

示例5: setup

    def setup(self):
        res_ols = self.res1.get_robustcov_results('cluster',
                                                  groups=self.groups,
                                                  use_correction=False,
                                                  use_t=False,
                                                  df_correction=True)
        self.res3 = self.res1
        self.res1 = res_ols
        self.bse_robust = res_ols.bse
        self.cov_robust = res_ols.cov_params()
        cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False)
        se1 =  sw.se_cov(cov1)
        self.bse_robust2 = se1
        self.cov_robust2 = cov1
        self.small = False
        self.res2 = res2.results_cluster_large

        self.skip_f = True
        self.rtol = 1e-6
        self.rtolh = 1e-10

开发者ID:dieterv77，项目名称:statsmodels，代码行数:20，代码来源:test_robustcov.py

示例6: assert_almost_equal

#test White
assert_almost_equal(bse_w, self.HC0_se, 15)

bse_wc = sw.se_cov(sw.cov_white_simple(self, use_correction=True))
print bse_wc
#test White
assert_almost_equal(bse_wc, self.HC1_se, 15)


groups = np.repeat(np.arange(5), 20)

idx = np.nonzero(np.diff(groups))[0].tolist()
groupidx = zip([0]+idx, idx+[len(groups)])
ngroups = len(groupidx)

print sw.se_cov(sw.cov_cluster(self, groups))
#two strange looking corner cases BUG?
print sw.se_cov(sw.cov_cluster(self, np.ones(len(endog), int), use_correction=False))
print sw.se_cov(sw.cov_crosssection_0(self, np.arange(len(endog))))
#these results are close to simple (no group) white, 50 groups 2 obs each
groups = np.repeat(np.arange(50), 100//50)
print sw.se_cov(sw.cov_cluster(self, groups))
#2 groups with 50 obs each, what was the interpretation again?
groups = np.repeat(np.arange(2), 100//2)
print sw.se_cov(sw.cov_cluster(self, groups))

"http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.txt"
'''
test <- read.table(
      url(paste("http://www.kellogg.northwestern.edu/",
            "faculty/petersen/htm/papers/se/",

开发者ID:EdTenerife，项目名称:statsmodels，代码行数:31，代码来源:ex_sandwich.py

示例7: get_robustcov_results


#.........这里部分代码省略.........
            res.cov_params_default = sw.cov_white_simple(self,
                                                         use_correction=False)
    elif cov_type.lower() == 'hac':
        maxlags = kwds['maxlags']   # required?, default in cov_hac_simple
        res.cov_kwds['maxlags'] = maxlags
        weights_func = kwds.get('weights_func', sw.weights_bartlett)
        res.cov_kwds['weights_func'] = weights_func
        use_correction = kwds.get('use_correction', False)
        res.cov_kwds['use_correction'] = use_correction
        res.cov_kwds['description'] = ('Standard Errors are heteroscedasticity ' +
             'and autocorrelation robust (HAC) using %d lags and %s small ' +
             'sample correction') % (maxlags, ['without', 'with'][use_correction])

        res.cov_params_default = sw.cov_hac_simple(self, nlags=maxlags,
                                             weights_func=weights_func,
                                             use_correction=use_correction)
    elif cov_type.lower() == 'cluster':
        #cluster robust standard errors, one- or two-way
        groups = kwds['groups']
        if not hasattr(groups, 'shape'):
            groups = np.asarray(groups).T

        if groups.ndim >= 2:
            groups = groups.squeeze()

        res.cov_kwds['groups'] = groups
        use_correction = kwds.get('use_correction', True)
        res.cov_kwds['use_correction'] = use_correction
        if groups.ndim == 1:
            if adjust_df:
                # need to find number of groups
                # duplicate work
                self.n_groups = n_groups = len(np.unique(groups))
            res.cov_params_default = sw.cov_cluster(self, groups,
                                             use_correction=use_correction)

        elif groups.ndim == 2:
            if hasattr(groups, 'values'):
                groups = groups.values

            if adjust_df:
                # need to find number of groups
                # duplicate work
                n_groups0 = len(np.unique(groups[:,0]))
                n_groups1 = len(np.unique(groups[:, 1]))
                self.n_groups = (n_groups0, n_groups1)
                n_groups = min(n_groups0, n_groups1) # use for adjust_df

            # Note: sw.cov_cluster_2groups has 3 returns
            res.cov_params_default = sw.cov_cluster_2groups(self, groups,
                                         use_correction=use_correction)[0]
        else:
            raise ValueError('only two groups are supported')
        res.cov_kwds['description'] = ('Standard Errors are robust to' +
                            'cluster correlation ' + '(' + cov_type + ')')

    elif cov_type.lower() == 'hac-panel':
        #cluster robust standard errors
        res.cov_kwds['time'] = time = kwds.get('time', None)
        res.cov_kwds['groups'] = groups = kwds.get('groups', None)
        #TODO: nlags is currently required
        #nlags = kwds.get('nlags', True)
        #res.cov_kwds['nlags'] = nlags
        #TODO: `nlags` or `maxlags`
        res.cov_kwds['maxlags'] = maxlags = kwds['maxlags']
        use_correction = kwds.get('use_correction', 'hac')

开发者ID:BranYang，项目名称:statsmodels，代码行数:67，代码来源:covtype.py

示例8:

#remove nan observation
mask = (xx!=-999.0).all(1)   #nan code in dta file
mask.shape
y = y[mask]
xx = xx[mask]
group = group[mask]

#run OLS

res_srs = sm.OLS(y, xx).fit()
print 'params    ', res_srs.params
print 'bse_OLS   ', res_srs.bse

#get cluster robust standard errors and compare with STATA

cov_cr = sw.cov_cluster(res_srs, group.astype(int))
bse_cr = sw.se_cov(cov_cr)
print 'bse_rob   ', bse_cr

res_stata = np.rec.array(
     [ ('growth', '|', -0.1027121, 0.22917029999999999, -0.45000000000000001, 0.65500000000000003, -0.55483519999999997, 0.34941109999999997),
       ('emer', '|', -5.4449319999999997, 0.72939690000000001, -7.46, 0.0, -6.8839379999999997, -4.0059269999999998),
       ('yr_rnd', '|', -51.075690000000002, 22.83615, -2.2400000000000002, 0.027, -96.128439999999998, -6.0229350000000004),
       ('_cons', '|', 740.3981, 13.460760000000001, 55.0, 0.0, 713.84180000000003, 766.95439999999996)],
      dtype=[('exogname', '|S6'), ('del', '|S1'), ('params', '<f8'),
             ('bse', '<f8'), ('tvalues', '<f8'), ('pvalues', '<f8'),
             ('cilow', '<f8'), ('ciupp', '<f8')])

print 'diff Stata', bse_cr - res_stata.bse
assert_almost_equal(bse_cr, res_stata.bse, decimal=6)

开发者ID:changhiskhan，项目名称:statsmodels，代码行数:30，代码来源:ex_sandwich2.py

示例9: get_robust_clu

    def get_robust_clu(cls):
        res1 = cls.res1
        cov_clu = sw.cov_cluster(res1, group)
        cls.bse_rob = sw.se_cov(cov_clu)

        cls.corr_fact = cls.get_correction_factor(res1)

开发者ID:bashtage，项目名称:statsmodels，代码行数:6，代码来源:test_sandwich_cov.py

示例10: ShortPanelGLS

    # res.resid is of transformed model
    # np.corrcoef(res.resid.reshape(-1,n_groups, order='F'))
    y_pred = np.dot(mod.exog, res.params)
    resid = y - y_pred
    print np.corrcoef(resid.reshape(-1, n_groups, order="F"))
    print resid.std()
    err = y_pred - dgp.y_true
    print err.std()
    # OLS standard errors are too small
    mod.res_pooled.params
    mod.res_pooled.bse
    # heteroscedasticity robust doesn't help
    mod.res_pooled.HC1_se
    # compare with cluster robust se

    print sw.se_cov(sw.cov_cluster(mod.res_pooled, dgp.groups.astype(int)))
    # not bad, pretty close to panel estimator
    # and with Newey-West Hac
    print sw.se_cov(sw.cov_nw_panel(mod.res_pooled, 4, mod.group.groupidx))
    # too small, assuming no bugs,
    # see Peterson assuming it refers to same kind of model
    print dgp.cov

    mod2 = ShortPanelGLS(y, dgp.exog, dgp.groups)
    res2 = mod2.fit_iterative(2)
    print res2.params
    print res2.bse
    # both implementations produce the same results:
    from numpy.testing import assert_almost_equal

    assert_almost_equal(res.params, res2.params, decimal=12)

开发者ID:r0k3，项目名称:statsmodels，代码行数:31，代码来源:ex_random_panel.py

示例11: test_short_panel

def test_short_panel():
    #this checks that some basic statistical properties are satisfied by the
    #results, not verified results against other packages
    #Note: the ranking of robust bse is different if within=True
    #I added within keyword to PanelSample to be able to use old example
    #if within is False, then there is no within group variation in exog.
    nobs = 100
    nobs_i = 5
    n_groups = nobs // nobs_i
    k_vars = 3

    dgp = PanelSample(nobs, k_vars, n_groups, corr_structure=cs.corr_arma,
                      corr_args=([1], [1., -0.9],), seed=377769, within=False)
    #print 'seed', dgp.seed
    y = dgp.generate_panel()
    noise = y - dgp.y_true

    #test dgp

    dgp_cov_e = np.array(
              [[ 1.    ,  0.9   ,  0.81  ,  0.729 ,  0.6561],
               [ 0.9   ,  1.    ,  0.9   ,  0.81  ,  0.729 ],
               [ 0.81  ,  0.9   ,  1.    ,  0.9   ,  0.81  ],
               [ 0.729 ,  0.81  ,  0.9   ,  1.    ,  0.9   ],
               [ 0.6561,  0.729 ,  0.81  ,  0.9   ,  1.    ]])

    npt.assert_almost_equal(dgp.cov, dgp_cov_e, 13)

    cov_noise = np.cov(noise.reshape(-1,n_groups, order='F'))
    corr_noise = cov2corr(cov_noise)
    npt.assert_almost_equal(corr_noise, dgp.cov, 1)

    #estimate panel model
    mod2 = ShortPanelGLS(y, dgp.exog, dgp.groups)
    res2 = mod2.fit_iterative(2)


    #whitened residual should be uncorrelated
    corr_wresid = np.corrcoef(res2.wresid.reshape(-1,n_groups, order='F'))
    assert_maxabs(corr_wresid, np.eye(5), 0.1)

    #residual should have same correlation as dgp
    corr_resid = np.corrcoef(res2.resid.reshape(-1,n_groups, order='F'))
    assert_maxabs(corr_resid, dgp.cov, 0.1)

    assert_almost_equal(res2.resid.std(),1, decimal=0)

    y_pred = np.dot(mod2.exog, res2.params)
    assert_almost_equal(res2.fittedvalues, y_pred, 13)


    #compare with OLS

    res2_ols = mod2._fit_ols()
    npt.assert_(mod2.res_pooled is res2_ols)

    res2_ols = mod2.res_pooled  #TODO: BUG: requires call to _fit_ols

    #fitting once is the same as OLS
    #note: I need to create new instance, otherwise it continuous fitting
    mod1 = ShortPanelGLS(y, dgp.exog, dgp.groups)
    res1 = mod1.fit_iterative(1)

    assert_almost_equal(res1.params, res2_ols.params, decimal=13)
    assert_almost_equal(res1.bse, res2_ols.bse, decimal=13)

    res_ols = OLS(y, dgp.exog).fit()
    assert_almost_equal(res1.params, res_ols.params, decimal=13)
    assert_almost_equal(res1.bse, res_ols.bse, decimal=13)


    #compare with old version
    mod_old = ShortPanelGLS2(y, dgp.exog, dgp.groups)
    res_old = mod_old.fit()

    assert_almost_equal(res2.params, res_old.params, decimal=13)
    assert_almost_equal(res2.bse, res_old.bse, decimal=13)


    mod5 = ShortPanelGLS(y, dgp.exog, dgp.groups)
    res5 = mod5.fit_iterative(5)

    #make sure it's different
    #npt.assert_array_less(0.009, em.maxabs(res5.bse, res2.bse))

    cov_clu = sw.cov_cluster(mod2.res_pooled, dgp.groups.astype(int))
    clubse = se_cov(cov_clu)
    pnwbse = se_cov(sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx))
    bser = np.vstack((res2.bse, res5.bse, clubse, pnwbse))
    bser_mean = np.mean(bser, axis=0)

    #cov_cluster close to robust and PanelGLS
    #is up to 24% larger than mean of bser
    #npt.assert_array_less(0, clubse / bser_mean - 1)
    npt.assert_array_less(clubse / bser_mean - 1, 0.25)
    #cov_nw_panel close to robust and PanelGLS
    npt.assert_array_less(pnwbse / bser_mean - 1, 0.1)
    #OLS underestimates bse, robust at least 60% larger
    npt.assert_array_less(0.6, bser_mean / res_ols.bse  - 1)

#.........这里部分代码省略.........

开发者ID:0ceangypsy，项目名称:statsmodels，代码行数:101，代码来源:test_random_panel.py

注：本文中的statsmodels.stats.sandwich_covariance.cov_cluster函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。