本文整理汇总了Python中statsmodels.compat.python.lzip函数的典型用法代码示例。如果您正苦于以下问题:Python lzip函数的具体用法?Python lzip怎么用?Python lzip使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了lzip函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _plot_index
def _plot_index(self, y, ylabel, threshold=None, title=None, ax=None,**kwds):
from statsmodels.graphics import utils
fig, ax = utils.create_mpl_ax(ax)
if title is None:
title = "Index Plot"
nobs = len(self.endog)
index = np.arange(nobs)
ax.scatter(index, y, **kwds)
if threshold == 'all':
large_points = np.ones(nobs, np.bool_)
else:
large_points = np.abs(y) > threshold
psize = 3 * np.ones(nobs)
# add point labels
labels = self.results.model.data.row_labels
if labels is None:
labels = np.arange(nobs)
ax = utils.annotate_axes(np.where(large_points)[0], labels,
lzip(index, y),
lzip(-psize, psize), "large",
ax)
font = {"fontsize" : 16, "color" : "black"}
ax.set_ylabel(ylabel, **font)
ax.set_xlabel("Observation", **font)
ax.set_title(title, **font)
return fig
示例2: summary_params_2d
def summary_params_2d(result, extras=None, endog_names=None, exog_names=None,
title=None):
'''create summary table of regression parameters with several equations
This allows interleaving of parameters with bse and/or tvalues
Parameters
----------
result : result instance
the result instance with params and attributes in extras
extras : list of strings
additional attributes to add below a parameter row, e.g. bse or tvalues
endog_names : None or list of strings
names for rows of the parameter array (multivariate endog)
exog_names : None or list of strings
names for columns of the parameter array (exog)
alpha : float
level for confidence intervals, default 0.95
title : None or string
Returns
-------
tables : list of SimpleTable
this contains a list of all seperate Subtables
table_all : SimpleTable
the merged table with results concatenated for each row of the parameter
array
'''
if endog_names is None:
# TODO: note the [1:] is specific to current MNLogit
endog_names = ['endog_%d' % i for i in
np.unique(result.model.endog)[1:]]
if exog_names is None:
exog_names = ['var%d' % i for i in range(len(result.params))]
# TODO: check formatting options with different values
res_params = [[forg(item, prec=4) for item in row] for row in result.params]
if extras:
extras_list = [[['%10s' % ('(' + forg(v, prec=3).strip() + ')')
for v in col]
for col in getattr(result, what)]
for what in extras
]
data = lzip(res_params, *extras_list)
data = [i for j in data for i in j] #flatten
stubs = lzip(endog_names, *[['']*len(endog_names)]*len(extras))
stubs = [i for j in stubs for i in j] #flatten
else:
data = res_params
stubs = endog_names
txt_fmt = copy.deepcopy(fmt_params)
txt_fmt["data_fmts"] = ["%s"]*result.params.shape[1]
return SimpleTable(data, headers=exog_names,
stubs=stubs,
title=title,
txt_fmt=txt_fmt)
示例3: _influence_plot
def _influence_plot(results, influence, external=True, alpha=.05,
criterion="cooks", size=48, plot_alpha=.75, ax=None,
**kwargs):
infl = influence
fig, ax = utils.create_mpl_ax(ax)
if criterion.lower().startswith('coo'):
psize = infl.cooks_distance[0]
elif criterion.lower().startswith('dff'):
psize = np.abs(infl.dffits[0])
else:
raise ValueError("Criterion %s not understood" % criterion)
# scale the variables
#TODO: what is the correct scaling and the assumption here?
#we want plots to be comparable across different plots
#so we would need to use the expected distribution of criterion probably
old_range = np.ptp(psize)
new_range = size**2 - 8**2
psize = (psize - psize.min()) * new_range/old_range + 8**2
leverage = infl.hat_matrix_diag
if external:
resids = infl.resid_studentized_external
else:
resids = infl.resid_studentized
from scipy import stats
cutoff = stats.t.ppf(1.-alpha/2, results.df_resid)
large_resid = np.abs(resids) > cutoff
large_leverage = leverage > _high_leverage(results)
large_points = np.logical_or(large_resid, large_leverage)
ax.scatter(leverage, resids, s=psize, alpha=plot_alpha)
# add point labels
labels = results.model.data.row_labels
if labels is None:
labels = lrange(len(resids))
ax = utils.annotate_axes(np.where(large_points)[0], labels,
lzip(leverage, resids),
lzip(-(psize/2)**.5, (psize/2)**.5), "x-large",
ax)
#TODO: make configurable or let people do it ex-post?
font = {"fontsize" : 16, "color" : "black"}
ax.set_ylabel("Studentized Residuals", **font)
ax.set_xlabel("H Leverage", **font)
ax.set_title("Influence Plot", **font)
return fig
示例4: test_mcnemar_vectorized
def test_mcnemar_vectorized():
ttk = np.random.randint(5,15, size=(2,2,3))
mcnemar(ttk)
res = mcnemar(ttk, exact=False)
res1 = lzip(*[mcnemar(ttk[:,:,i], exact=False) for i in range(3)])
assert_allclose(res, res1, rtol=1e-13)
res = mcnemar(ttk, exact=False, correction=False)
res1 = lzip(*[mcnemar(ttk[:,:,i], exact=False, correction=False)
for i in range(3)])
assert_allclose(res, res1, rtol=1e-13)
res = mcnemar(ttk, exact=True)
res1 = lzip(*[mcnemar(ttk[:,:,i], exact=True) for i in range(3)])
assert_allclose(res, res1, rtol=1e-13)
示例5: add_dict
def add_dict(self, d, ncols=2, align='l', float_format="%.4f"):
'''Add the contents of a Dict to summary table
Parameters
----------
d : dict
Keys and values are automatically coerced to strings with str().
Users are encouraged to format them before using add_dict.
ncols: int
Number of columns of the output table
align : string
Data alignment (l/c/r)
'''
keys = [_formatter(x, float_format) for x in iterkeys(d)]
vals = [_formatter(x, float_format) for x in itervalues(d)]
data = np.array(lzip(keys, vals))
if data.shape[0] % ncols != 0:
pad = ncols - (data.shape[0] % ncols)
data = np.vstack([data, np.array(pad * [['', '']])])
data = np.split(data, ncols)
data = reduce(lambda x, y: np.hstack([x, y]), data)
self.add_array(data, align=align)
示例6: __init__
def __init__(self, group, name=''):
super(self.__class__, self).__init__(group, name=name)
idx = (np.nonzero(np.diff(group))[0]+1).tolist()
self.groupidx = groupidx = lzip([0]+idx, idx+[len(group)])
ngroups = len(groupidx)
示例7: _plot_leverage_resid2
def _plot_leverage_resid2(results, influence, alpha=.05, ax=None,
**kwargs):
from scipy.stats import zscore, norm
fig, ax = utils.create_mpl_ax(ax)
infl = influence
leverage = infl.hat_matrix_diag
resid = zscore(infl.resid)
ax.plot(resid**2, leverage, 'o', **kwargs)
ax.set_xlabel("Normalized residuals**2")
ax.set_ylabel("Leverage")
ax.set_title("Leverage vs. Normalized residuals squared")
large_leverage = leverage > _high_leverage(results)
#norm or t here if standardized?
cutoff = norm.ppf(1.-alpha/2)
large_resid = np.abs(resid) > cutoff
labels = results.model.data.row_labels
if labels is None:
labels = lrange(int(results.nobs))
index = np.where(np.logical_or(large_leverage, large_resid))[0]
ax = utils.annotate_axes(index, labels, lzip(resid**2, leverage),
[(0, 5)]*int(results.nobs), "large",
ax=ax, ha="center", va="bottom")
ax.margins(.075, .075)
return fig
示例8: _coef_table
def _coef_table(self):
model = self.model
k = model.neqs
Xnames = self.model.exog_names
data = lzip(model.params.T.ravel(),
model.stderr.T.ravel(),
model.tvalues.T.ravel(),
model.pvalues.T.ravel())
header = ('coefficient','std. error','t-stat','prob')
buf = StringIO()
dim = k * model.k_ar + model.k_trend
for i in range(k):
section = "Results for equation %s" % model.names[i]
buf.write(section + '\n')
#print >> buf, section
table = SimpleTable(data[dim * i : dim * (i + 1)], header,
Xnames, title=None, txt_fmt = self.default_fmt)
buf.write(str(table) + '\n')
if i < k - 1:
buf.write('\n')
return buf.getvalue()
示例9: _create_default_properties
def _create_default_properties(data):
""""Create the default properties of the mosaic given the data
first it will varies the color hue (first category) then the color
saturation (second category) and then the color value
(third category). If a fourth category is found, it will put
decoration on the rectangle. Doesn't manage more than four
level of categories
"""
categories_levels = _categories_level(list(iterkeys(data)))
Nlevels = len(categories_levels)
# first level, the hue
L = len(categories_levels[0])
# hue = np.linspace(1.0, 0.0, L+1)[:-1]
hue = np.linspace(0.0, 1.0, L + 2)[:-2]
# second level, the saturation
L = len(categories_levels[1]) if Nlevels > 1 else 1
saturation = np.linspace(0.5, 1.0, L + 1)[:-1]
# third level, the value
L = len(categories_levels[2]) if Nlevels > 2 else 1
value = np.linspace(0.5, 1.0, L + 1)[:-1]
# fourth level, the hatch
L = len(categories_levels[3]) if Nlevels > 3 else 1
hatch = ['', '/', '-', '|', '+'][:L + 1]
# convert in list and merge with the levels
hue = lzip(list(hue), categories_levels[0])
saturation = lzip(list(saturation),
categories_levels[1] if Nlevels > 1 else [''])
value = lzip(list(value),
categories_levels[2] if Nlevels > 2 else [''])
hatch = lzip(list(hatch),
categories_levels[3] if Nlevels > 3 else [''])
# create the properties dictionary
properties = {}
for h, s, v, t in product(hue, saturation, value, hatch):
hv, hn = h
sv, sn = s
vv, vn = v
tv, tn = t
level = (hn,) + ((sn,) if sn else tuple())
level = level + ((vn,) if vn else tuple())
level = level + ((tn,) if tn else tuple())
hsv = array([hv, sv, vv])
prop = {'color': _single_hsv_to_rgb(hsv), 'hatch': tv, 'lw': 0}
properties[level] = prop
return properties
示例10: qqline
def qqline(ax, line, x=None, y=None, dist=None, fmt='r-'):
"""
Plot a reference line for a qqplot.
Parameters
----------
ax : matplotlib axes instance
The axes on which to plot the line
line : str {'45','r','s','q'}
Options for the reference line to which the data is compared.:
- '45' - 45-degree line
- 's' - standardized line, the expected order statistics are scaled by
the standard deviation of the given sample and have the mean
added to them
- 'r' - A regression line is fit
- 'q' - A line is fit through the quartiles.
- None - By default no reference line is added to the plot.
x : array
X data for plot. Not needed if line is '45'.
y : array
Y data for plot. Not needed if line is '45'.
dist : scipy.stats.distribution
A scipy.stats distribution, needed if line is 'q'.
Notes
-----
There is no return value. The line is plotted on the given `ax`.
"""
if line == '45':
end_pts = lzip(ax.get_xlim(), ax.get_ylim())
end_pts[0] = min(end_pts[0])
end_pts[1] = max(end_pts[1])
ax.plot(end_pts, end_pts, fmt)
ax.set_xlim(end_pts)
ax.set_ylim(end_pts)
return # does this have any side effects?
if x is None and y is None:
raise ValueError("If line is not 45, x and y cannot be None.")
elif line == 'r':
# could use ax.lines[0].get_xdata(), get_ydata(),
# but don't know axes are 'clean'
y = OLS(y, add_constant(x)).fit().fittedvalues
ax.plot(x,y,fmt)
elif line == 's':
m,b = y.std(), y.mean()
ref_line = x*m + b
ax.plot(x, ref_line, fmt)
elif line == 'q':
_check_for_ppf(dist)
q25 = stats.scoreatpercentile(y, 25)
q75 = stats.scoreatpercentile(y, 75)
theoretical_quartiles = dist.ppf([0.25, 0.75])
m = (q75 - q25) / np.diff(theoretical_quartiles)
b = q25 - m*theoretical_quartiles[0]
ax.plot(x, m*x + b, fmt)
示例11: pval_table
def pval_table(self):
'''create a (n_levels, n_levels) array with corrected p_values
this needs to improve, similar to R pairwise output
'''
k = self.n_levels
pvals_mat = np.zeros((k, k))
# if we don't assume we have all pairs
pvals_mat[lzip(*self.all_pairs)] = self.pval_corrected()
return pvals_mat
示例12: test_mcnemar_vectorized
def test_mcnemar_vectorized(reset_randomstate):
ttk = np.random.randint(5,15, size=(2,2,3))
with pytest.deprecated_call():
res = sbmcnemar(ttk, exact=False)
with pytest.deprecated_call():
res1 = lzip(*[sbmcnemar(ttk[:, :, i], exact=False) for i in range(3)])
assert_allclose(res, res1, rtol=1e-13)
with pytest.deprecated_call():
res = sbmcnemar(ttk, exact=False, correction=False)
with pytest.deprecated_call():
res1 = lzip(*[sbmcnemar(ttk[:, :, i], exact=False, correction=False)
for i in range(3)])
assert_allclose(res, res1, rtol=1e-13)
with pytest.deprecated_call():
res = sbmcnemar(ttk, exact=True)
with pytest.deprecated_call():
res1 = lzip(*[sbmcnemar(ttk[:, :, i], exact=True) for i in range(3)])
assert_allclose(res, res1, rtol=1e-13)
示例13: summary_table
def summary_table(self, float_fmt="%6.3f"):
'''create a summary table with all influence and outlier measures
This does currently not distinguish between statistics that can be
calculated from the original regression results and for which a
leave-one-observation-out loop is needed
Returns
-------
res : SimpleTable instance
SimpleTable instance with the results, can be printed
Notes
-----
This also attaches table_data to the instance.
'''
#print self.dfbetas
# table_raw = [ np.arange(self.nobs),
# self.endog,
# self.fittedvalues,
# self.cooks_distance(),
# self.resid_studentized_internal,
# self.hat_matrix_diag,
# self.dffits_internal,
# self.resid_studentized_external,
# self.dffits,
# self.dfbetas
# ]
table_raw = [ ('obs', np.arange(self.nobs)),
('endog', self.endog),
('fitted\nvalue', self.results.fittedvalues),
("Cook's\nd", self.cooks_distance[0]),
("student.\nresidual", self.resid_studentized_internal),
('hat diag', self.hat_matrix_diag),
('dffits \ninternal', self.dffits_internal[0]),
("ext.stud.\nresidual", self.resid_studentized_external),
('dffits', self.dffits[0])
]
colnames, data = lzip(*table_raw) #unzip
data = np.column_stack(data)
self.table_data = data
from statsmodels.iolib.table import SimpleTable, default_html_fmt
from statsmodels.iolib.tableformatting import fmt_base
from copy import deepcopy
fmt = deepcopy(fmt_base)
fmt_html = deepcopy(default_html_fmt)
fmt['data_fmts'] = ["%4d"] + [float_fmt] * (data.shape[1] - 1)
#fmt_html['data_fmts'] = fmt['data_fmts']
return SimpleTable(data, headers=colnames, txt_fmt=fmt,
html_fmt=fmt_html)
示例14: in_domain
def in_domain(self, xs, ys, x):
"""
Returns the filtered (xs, ys) based on the Kernel domain centred on x
"""
# Disable black-list functions: filter used for speed instead of
# list-comprehension
# pylint: disable-msg=W0141
def isInDomain(xy):
"""Used for filter to check if point is in the domain"""
u = (xy[0]-x)/self.h
return u >= self.domain[0] and u <= self.domain[1]
if self.domain is None:
return (xs, ys)
else:
filtered = lfilter(isInDomain, lzip(xs, ys))
if len(filtered) > 0:
xs, ys = lzip(*filtered)
return (xs, ys)
else:
return ([], [])
示例15: getquotes
def getquotes(symbol, start, end):
quotes = fin.quotes_historical_yahoo(symbol, start, end)
dates, open, close, high, low, volume = lzip(*quotes)
data = {
'open' : open,
'close' : close,
'high' : high,
'low' : low,
'volume' : volume
}
dates = pa.Index([dt.datetime.fromordinal(int(d)) for d in dates])
return pa.DataFrame(data, index=dates)