本文整理汇总了Python中statsmodels.compat.python.lmap函数的典型用法代码示例。如果您正苦于以下问题:Python lmap函数的具体用法?Python lmap怎么用?Python lmap使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了lmap函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: handle_missing
def handle_missing(cls, endog, exog, missing, **kwargs):
"""
This returns a dictionary with keys endog, exog and the keys of
kwargs. It preserves Nones.
"""
none_array_names = []
if exog is not None:
combined = (endog, exog)
combined_names = ['endog', 'exog']
else:
combined = (endog,)
combined_names = ['endog']
none_array_names += ['exog']
# deal with other arrays
combined_2d = ()
combined_2d_names = []
if len(kwargs):
for key, value_array in iteritems(kwargs):
if value_array is None or value_array.ndim == 0:
none_array_names += [key]
continue
# grab 1d arrays
if value_array.ndim == 1:
combined += (value_array,)
combined_names += [key]
elif value_array.squeeze().ndim == 1:
combined += (value_array,)
combined_names += [key]
# grab 2d arrays that are _assumed_ to be symmetric
elif value_array.ndim == 2:
combined_2d += (value_array,)
combined_2d_names += [key]
else:
raise ValueError("Arrays with more than 2 dimensions "
"aren't yet handled")
nan_mask = _nan_rows(*combined)
if combined_2d:
nan_mask = _nan_rows(*(nan_mask[:, None],) + combined_2d)
if missing == 'raise' and np.any(nan_mask):
raise MissingDataError("NaNs were encountered in the data")
elif missing == 'drop':
nan_mask = ~nan_mask
drop_nans = lambda x: cls._drop_nans(x, nan_mask)
drop_nans_2d = lambda x: cls._drop_nans_2d(x, nan_mask)
combined = dict(zip(combined_names, lmap(drop_nans, combined)))
if combined_2d:
combined.update(dict(zip(combined_2d_names,
lmap(drop_nans_2d, combined_2d))))
if none_array_names:
combined.update(dict(zip(none_array_names,
[None] * len(none_array_names))))
return combined, np.where(~nan_mask)[0].tolist()
else:
raise ValueError("missing option %s not understood" % missing)
示例2: prob_mv_grid
def prob_mv_grid(bins, cdf, axis=-1):
'''helper function for probability of a rectangle grid in a multivariate distribution
how does this generalize to more than 2 variates ?
bins : tuple
tuple of bin edges, currently it is assumed that they broadcast
correctly
'''
if not isinstance(bins, np.ndarray):
bins = lmap(np.asarray, bins)
n_dim = len(bins)
bins_ = []
#broadcast if binedges are 1d
if all(lmap(np.ndim, bins) == np.ones(n_dim)):
for d in range(n_dim):
sl = [None]*n_dim
sl[d] = slice(None)
bins_.append(bins[d][sl])
else: #assume it is already correctly broadcasted
n_dim = bins.shape[0]
bins_ = bins
print(len(bins))
cdf_values = cdf(bins_)
probs = cdf_values.copy()
for d in range(n_dim):
probs = np.diff(probs, axis=d)
return probs
示例3: date_range_str
def date_range_str(start, end=None, length=None):
"""
Returns a list of abbreviated date strings.
Parameters
----------
start : str
The first abbreviated date, for instance, '1965q1' or '1965m1'
end : str, optional
The last abbreviated date if length is None.
length : int, optional
The length of the returned array of end is None.
Returns
-------
date_range : list
List of strings
"""
flags = re.IGNORECASE | re.VERBOSE
#_check_range_inputs(end, length, freq)
start = start.lower()
if re.search(_m_pattern, start, flags):
annual_freq = 12
split = 'm'
elif re.search(_q_pattern, start, flags):
annual_freq = 4
split = 'q'
elif re.search(_y_pattern, start, flags):
annual_freq = 1
start += 'a1' # hack
if end:
end += 'a1'
split = 'a'
else:
raise ValueError("Date %s not understood" % start)
yr1, offset1 = lmap(int, start.replace(":","").split(split))
if end is not None:
end = end.lower()
yr2, offset2 = lmap(int, end.replace(":","").split(split))
length = (yr2 - yr1) * annual_freq + offset2
elif length:
yr2 = yr1 + length // annual_freq
offset2 = length % annual_freq + (offset1 - 1)
years = np.repeat(lrange(yr1+1, yr2), annual_freq).tolist()
years = np.r_[[str(yr1)]*(annual_freq+1-offset1), years] # tack on first year
years = np.r_[years, [str(yr2)]*offset2] # tack on last year
if split != 'a':
offset = np.tile(np.arange(1, annual_freq+1), yr2-yr1-1)
offset = np.r_[np.arange(offset1, annual_freq+1).astype('a2'), offset]
offset = np.r_[offset, np.arange(1,offset2+1).astype('a2')]
date_arr_range = [''.join([i, split, asstr(j)]) for i,j in
zip(years, offset)]
else:
date_arr_range = years.tolist()
return date_arr_range
示例4: setup_class
def setup_class(cls):
XLISTEXOG2 = 'aget aget2 educyr actlim totchr'.split()
endog_name = 'docvis'
exog_names = 'private medicaid'.split() + XLISTEXOG2 + ['const']
instrument_names = 'income ssiratio'.split() + XLISTEXOG2 + ['const']
endog = DATA[endog_name]
exog = DATA[exog_names]
instrument = DATA[instrument_names]
asarray = lambda x: np.asarray(x, float)
endog, exog, instrument = lmap(asarray, [endog, exog, instrument])
cls.bse_tol = [5e-6, 5e-7]
q_tol = [0.04, 0]
# compare to Stata default options, iterative GMM
# with const at end
start = OLS(np.log(endog+1), exog).fit().params
nobs, k_instr = instrument.shape
w0inv = np.dot(instrument.T, instrument) / nobs
mod = gmm.NonlinearIVGMM(endog, exog, instrument, moment_exponential_add)
res0 = mod.fit(start, maxiter=0, inv_weights=w0inv,
optim_method='bfgs', optim_args={'gtol':1e-8, 'disp': 0},
wargs={'centered':False})
cls.res1 = res0
from .results_gmm_poisson import results_addonestep as results
cls.res2 = results
示例5: anova_oneway
def anova_oneway(y, x, seq=0):
# new version to match NIST
# no generalization or checking of arguments, tested only for 1d
yrvs = y[:,np.newaxis] #- min(y)
#subracting mean increases numerical accuracy for NIST test data sets
xrvs = x[:,np.newaxis] - x.mean() #for 1d#- 1e12 trick for 'SmLs09.dat'
meang, varg, xdevmeangr, countg = groupsstats_dummy(yrvs[:,:1], xrvs[:,:1])#, seq=0)
#the following does not work as replacement
#gcount, gmean , meanarr, withinvar, withinvararr = groupstatsbin(y, x)#, seq=0)
sswn = np.dot(xdevmeangr.T,xdevmeangr)
ssbn = np.dot((meang-xrvs.mean())**2, countg.T)
nobs = yrvs.shape[0]
ncat = meang.shape[1]
dfbn = ncat - 1
dfwn = nobs - ncat
msb = ssbn/float(dfbn)
msw = sswn/float(dfwn)
f = msb/msw
prob = stats.f.sf(f,dfbn,dfwn)
R2 = (ssbn/(sswn+ssbn)) #R-squared
resstd = np.sqrt(msw) #residual standard deviation
#print(f, prob
def _fix2scalar(z): # return number
if np.shape(z) == (1, 1): return z[0,0]
else: return z
f, prob, R2, resstd = lmap(_fix2scalar, (f, prob, R2, resstd))
return f, prob, R2, resstd
示例6: dataset
def dataset(self, as_dict=False):
"""
Returns a Python generator object for iterating over the dataset.
Parameters
----------
as_dict : bool, optional
If as_dict is True, yield each row of observations as a dict.
If False, yields each row of observations as a list.
Returns
-------
Generator object for iterating over the dataset. Yields each row of
observations as a list by default.
Notes
-----
If missing_values is True during instantiation of StataReader then
observations with _StataMissingValue(s) are not filtered and should
be handled by your applcation.
"""
try:
self._file.seek(self._data_location)
except Exception:
pass
if as_dict:
vars = lmap(str, self.variables())
for i in range(len(self)):
yield dict(zip(vars, self._next()))
else:
for i in range(self._header['nobs']):
yield self._next()
示例7: bootstrap
def bootstrap(distr, args=(), nobs=200, nrep=100, value=None, batch_size=None):
'''Monte Carlo (or parametric bootstrap) p-values for gof
currently hardcoded for A^2 only
assumes vectorized fit_vec method,
builds and analyses (nobs, nrep) sample in one step
rename function to less generic
this works also with nrep=1
'''
#signature similar to kstest ?
#delegate to fn ?
#rvs_kwds = {'size':(nobs, nrep)}
#rvs_kwds.update(kwds)
#it will be better to build a separate batch function that calls bootstrap
#keep batch if value is true, but batch iterate from outside if stat is returned
if batch_size is not None:
if value is None:
raise ValueError('using batching requires a value')
n_batch = int(np.ceil(nrep/float(batch_size)))
count = 0
for irep in range(n_batch):
rvs = distr.rvs(args, **{'size':(batch_size, nobs)})
params = distr.fit_vec(rvs, axis=1)
params = lmap(lambda x: np.expand_dims(x, 1), params)
cdfvals = np.sort(distr.cdf(rvs, params), axis=1)
stat = asquare(cdfvals, axis=1)
count += (stat >= value).sum()
return count / float(n_batch * batch_size)
else:
#rvs = distr.rvs(args, **kwds) #extension to distribution kwds ?
rvs = distr.rvs(args, **{'size':(nrep, nobs)})
params = distr.fit_vec(rvs, axis=1)
params = lmap(lambda x: np.expand_dims(x, 1), params)
cdfvals = np.sort(distr.cdf(rvs, params), axis=1)
stat = asquare(cdfvals, axis=1)
if value is None: #return all bootstrap results
stat_sorted = np.sort(stat)
return stat_sorted
else: #calculate and return specific p-value
return (stat >= value).mean()
示例8: _col_size
def _col_size(self, k=None):
"""Calculate size of a data record."""
if len(self._col_sizes) == 0:
self._col_sizes = lmap(lambda x: self._calcsize(x), self._header["typlist"])
if k == None:
return self._col_sizes
else:
return self._col_sizes[k]
示例9: variables
def variables(self):
"""
Returns a list of the dataset's StataVariables objects.
"""
return lmap(_StataVariable, zip(lrange(self._header['nvar']),
self._header['typlist'], self._header['varlist'],
self._header['srtlist'],
self._header['fmtlist'], self._header['lbllist'],
self._header['vlblist']))
示例10: test_panel_robust_cov
def test_panel_robust_cov():
import pandas as pa
import statsmodels.datasets.grunfeld as gr
from .results.results_panelrobust import results as res_stata
dtapa = gr.data.load_pandas()
# Stata example/data seems to miss last firm
dtapa_endog = dtapa.endog[:200]
dtapa_exog = dtapa.exog[:200]
res = OLS(dtapa_endog, add_constant(dtapa_exog[["value", "capital"]], prepend=False)).fit()
# time indicator in range(max Ti)
time = np.asarray(dtapa_exog[["year"]])
time -= time.min()
time = np.squeeze(time).astype(int)
# sw.cov_nw_panel requires bounds instead of index
tidx = [(i * 20, 20 * (i + 1)) for i in range(10)]
# firm index in range(n_firms)
firm_names, firm_id = np.unique(np.asarray(dtapa_exog[["firm"]], "S20"), return_inverse=True)
# panel newey west standard errors
cov = sw.cov_nw_panel(res, 0, tidx, use_correction="hac")
# dropping numpy 1.4 soon
# np.testing.assert_allclose(cov, res_stata.cov_pnw0_stata, rtol=1e-6)
assert_almost_equal(cov, res_stata.cov_pnw0_stata, decimal=4)
cov = sw.cov_nw_panel(res, 1, tidx, use_correction="hac")
# np.testing.assert_allclose(cov, res_stata.cov_pnw1_stata, rtol=1e-6)
assert_almost_equal(cov, res_stata.cov_pnw1_stata, decimal=4)
cov = sw.cov_nw_panel(res, 4, tidx) # check default
# np.testing.assert_allclose(cov, res_stata.cov_pnw4_stata, rtol=1e-6)
assert_almost_equal(cov, res_stata.cov_pnw4_stata, decimal=4)
# cluster robust standard errors
cov_clu = sw.cov_cluster(res, firm_id)
assert_almost_equal(cov_clu, res_stata.cov_clu_stata, decimal=4)
# cluster robust standard errors, non-int groups
cov_clu = sw.cov_cluster(res, lmap(str, firm_id))
assert_almost_equal(cov_clu, res_stata.cov_clu_stata, decimal=4)
# Driscoll and Kraay panel robust standard errors
rcov = sw.cov_nw_groupsum(res, 0, time, use_correction=0)
assert_almost_equal(rcov, res_stata.cov_dk0_stata, decimal=4)
rcov = sw.cov_nw_groupsum(res, 1, time, use_correction=0)
assert_almost_equal(rcov, res_stata.cov_dk1_stata, decimal=4)
rcov = sw.cov_nw_groupsum(res, 4, time) # check default
assert_almost_equal(rcov, res_stata.cov_dk4_stata, decimal=4)
示例11: data2proddummy
def data2proddummy(x):
'''creates product dummy variables from 2 columns of 2d array
drops last dummy variable, but not from each category
singular with simple dummy variable but not with constant
quickly written, no safeguards
'''
#brute force, assumes x is 2d
#replace with encoding if possible
groups = np.unique(lmap(tuple, x.tolist()))
#includes singularity with additive factors
return (x==groups[:,None,:]).all(-1).T.astype(int)[:,:-1]
示例12: _next
def _next(self):
typlist = self._header["typlist"]
if self._has_string_data:
data = [None] * self._header["nvar"]
for i in range(len(data)):
if isinstance(typlist[i], int):
data[i] = self._null_terminate(self._file.read(typlist[i]), self._encoding)
else:
data[i] = self._unpack(typlist[i], self._file.read(self._col_size(i)))
return data
else:
return lmap(
lambda i: self._unpack(typlist[i], self._file.read(self._col_size(i))), lrange(self._header["nvar"])
)
示例13: variables
def variables(self):
"""
Returns a list of the dataset's StataVariables objects.
"""
return lmap(
_StataVariable,
zip(
lrange(self._header["nvar"]),
self._header["typlist"],
self._header["varlist"],
self._header["srtlist"],
self._header["fmtlist"],
self._header["lbllist"],
self._header["vlblist"],
),
)
示例14: dates_from_str
def dates_from_str(dates):
"""
Turns a sequence of date strings and returns a list of datetime.
Parameters
----------
dates : array-like
A sequence of abbreviated dates as string. For instance,
'1996m1' or '1996Q1'. The datetime dates are at the end of the
period.
Returns
-------
date_list : array
A list of datetime types.
"""
return lmap(date_parser, dates)
示例15: test_plot_quarter
def test_plot_quarter(close_figures):
dta = sm.datasets.macrodata.load_pandas().data
dates = lmap('Q'.join, zip(dta.year.astype(int).apply(str),
dta.quarter.astype(int).apply(str)))
# test dates argument
quarter_plot(dta.unemp.values, dates)
# test with a DatetimeIndex with no freq
dta.set_index(pd.to_datetime(dates), inplace=True)
quarter_plot(dta.unemp)
# w freq
# see pandas #6631
dta.index = pd.DatetimeIndex(pd.to_datetime(dates), freq='QS-Oct')
quarter_plot(dta.unemp)
# w PeriodIndex
dta.index = pd.PeriodIndex(pd.to_datetime(dates), freq='Q')
quarter_plot(dta.unemp)