本文整理汇总了Python中statsmodels.nonparametric.kde.KDEUnivariate类的典型用法代码示例。如果您正苦于以下问题:Python KDEUnivariate类的具体用法?Python KDEUnivariate怎么用?Python KDEUnivariate使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了KDEUnivariate类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: pdf
def pdf(self, token, years, bw=5, *args, **kwargs):
"""
Estimate a density function from a token's ratio series.
Args:
token (str)
years (iter)
bw (int)
Returns: OrderedDict {year: density}
"""
series = self.clean_series(token, *args, **kwargs)
# Use the ratio values as weights.
weights = np.array(list(series.values()))
# Fit the density estimate.
density = KDEUnivariate(list(series.keys()))
density.fit(fft=False, weights=weights, bw=bw)
samples = OrderedDict()
for year in years:
samples[year] = density.evaluate(year)[0]
return samples
示例2: find_outiers_kde
def find_outiers_kde(x):
x_scaled = scale(list(map(float,x)))
kde = KDEUnivariate(x_scaled)
kde.fit(bw="scott",fft=True)
pred = kde.evaluate(x_scaled)
n = sum(pred < 0.5)
outlierindices=np.asarray(pred).argsort()[:n]
outliervalue=np.asarray(x)[outlierindices]
return outlierindices,outliervalue
示例3: empiricalPDF
def empiricalPDF(data):
"""
Evaluate a probability density function using kernel density
estimation for input data.
:param data: :class:`numpy.ndarray` of data values.
:returns: PDF values at the data points.
"""
LOG.debug("Calculating empirical PDF")
sortedmax = np.sort(data)
kde = KDEUnivariate(sortedmax)
kde.fit()
try:
res = kde.evaluate(sortedmax)
except MemoryError:
res = np.zeros(len(sortedmax))
return res
示例4: kde_statsmodels_u
def kde_statsmodels_u(data, grid, **kwargs):
"""
Univariate Kernel Density Estimation with Statsmodels
Parameters
----------
data : numpy.array
Data points used to compute a density estimator. It
has `n x 1` dimensions, representing n points and p
variables.
grid : numpy.array
Data points at which the desity will be estimated. It
has `m x 1` dimensions, representing m points and p
variables.
Returns
-------
out : numpy.array
Density estimate. Has `m x 1` dimensions
"""
kde = KDEUnivariate(data)
kde.fit(**kwargs)
return kde.evaluate(grid)
示例5: lfdr
def lfdr(p_values, pi0, trunc = True, monotone = True, transf = "probit", adj = 1.5, eps = np.power(10.0,-8)):
""" Estimate local FDR / posterior error probability from p-values according to bioconductor/qvalue """
p = np.array(p_values)
# Compare to bioconductor/qvalue reference implementation
# import rpy2
# import rpy2.robjects as robjects
# from rpy2.robjects import pandas2ri
# pandas2ri.activate()
# density=robjects.r('density')
# smoothspline=robjects.r('smooth.spline')
# predict=robjects.r('predict')
# Check inputs
lfdr_out = p
rm_na = np.isfinite(p)
p = p[rm_na]
if (min(p) < 0 or max(p) > 1):
raise click.ClickException("p-values not in valid range [0,1].")
elif (pi0 < 0 or pi0 > 1):
raise click.ClickException("pi0 not in valid range [0,1].")
# Local FDR method for both probit and logit transformations
if (transf == "probit"):
p = np.maximum(p, eps)
p = np.minimum(p, 1-eps)
x = scipy.stats.norm.ppf(p, loc=0, scale=1)
# R-like implementation
bw = bw_nrd0(x)
myd = KDEUnivariate(x)
myd.fit(bw=adj*bw, gridsize = 512)
splinefit = sp.interpolate.splrep(myd.support, myd.density)
y = sp.interpolate.splev(x, splinefit)
# myd = density(x, adjust = 1.5) # R reference function
# mys = smoothspline(x = myd.rx2('x'), y = myd.rx2('y')) # R reference function
# y = predict(mys, x).rx2('y') # R reference function
lfdr = pi0 * scipy.stats.norm.pdf(x) / y
elif (transf == "logit"):
x = np.log((p + eps) / (1 - p + eps))
# R-like implementation
bw = bw_nrd0(x)
myd = KDEUnivariate(x)
myd.fit(bw=adj*bw, gridsize = 512)
splinefit = sp.interpolate.splrep(myd.support, myd.density)
y = sp.interpolate.splev(x, splinefit)
# myd = density(x, adjust = 1.5) # R reference function
# mys = smoothspline(x = myd.rx2('x'), y = myd.rx2('y')) # R reference function
# y = predict(mys, x).rx2('y') # R reference function
dx = np.exp(x) / np.power((1 + np.exp(x)),2)
lfdr = (pi0 * dx) / y
else:
raise click.ClickException("Invalid local FDR method.")
if (trunc):
lfdr[lfdr > 1] = 1
if (monotone):
lfdr = lfdr[p.ravel().argsort()]
for i in range(1,len(x)):
if (lfdr[i] < lfdr[i - 1]):
lfdr[i] = lfdr[i - 1]
lfdr = lfdr[scipy.stats.rankdata(p,"min")-1]
lfdr_out[rm_na] = lfdr
return lfdr_out
示例6: draw_logit_regression
def draw_logit_regression(df, kind):
w = open("logit_result.txt", "w")
formula = 'Survived ~ C(Pclass) + C(Sex) + Age + SibSp + C(Embarked)' # here the ~ sign is an = sign, and the features of our dataset
results = {} # create a results dictionary to hold our regression results for easy analysis later
y, x = dmatrices(formula, data=df, return_type='dataframe')
model = sm.Logit(y, x)
res = model.fit()
results['Logit'] = [res, formula]
print >> w, res.summary()
if kind is 1:
return results
# Plot Predictions Vs Actual
plt.figure(figsize=(18,4));
plt.subplot(121, axisbg="#DBDBDB")
# generate predictions from our fitted model
ypred = res.predict(x)
plt.plot(x.index, ypred, 'bo', x.index, y, 'mo', alpha=.25);
plt.grid(color='white', linestyle='dashed')
plt.title('Logit predictions, Blue: \nFitted/predicted values: Red');
plt.savefig("1.eps")
# Residuals
plt.subplot(122, axisbg="#DBDBDB")
plt.plot(res.resid, 'r-')
plt.grid(color='white', linestyle='dashed')
plt.title('Logit Residuals');
plt.savefig("2.eps")
fig = plt.figure(figsize=(18,9), dpi=1600)
a = .2
# Below are examples of more advanced plotting.
# It it looks strange check out the tutorial above.
fig.add_subplot(221, axisbg="#DBDBDB")
kde_res = KDEUnivariate(res.predict())
kde_res.fit()
plt.plot(kde_res.support,kde_res.density)
plt.fill_between(kde_res.support,kde_res.density, alpha=a)
title("Distribution of our Predictions")
fig.add_subplot(222, axisbg="#DBDBDB")
plt.scatter(res.predict(),x['C(Sex)[T.male]'] , alpha=a)
plt.grid(b=True, which='major', axis='x')
plt.xlabel("Predicted chance of survival")
plt.ylabel("Gender Bool")
title("The Change of Survival Probability by Gender (1 = Male)")
fig.add_subplot(223, axisbg="#DBDBDB")
plt.scatter(res.predict(),x['C(Pclass)[T.3]'] , alpha=a)
plt.xlabel("Predicted chance of survival")
plt.ylabel("Class Bool")
plt.grid(b=True, which='major', axis='x')
title("The Change of Survival Probability by Lower Class (1 = 3rd Class)")
fig.add_subplot(224, axisbg="#DBDBDB")
plt.scatter(res.predict(),x.Age , alpha=a)
plt.grid(True, linewidth=0.15)
title("The Change of Survival Probability by Age")
plt.xlabel("Predicted chance of survival")
plt.ylabel("Age")
plt.savefig("prediction.eps")
示例7: kde_statsmodels_u
def kde_statsmodels_u(x, x_grid, bandwidth=0.2, **kwargs):
"""Univariate Kernel Density Estimation with Statsmodels"""
kde = KDEUnivariate(x)
kde.fit(bw=bandwidth, **kwargs)
return kde.evaluate(x_grid)
示例8: KDEUnivariate
plt.title('Logit Residuals');
# Hey I've got an idea, let's just make more plots...
fig = plt.figure(figsize=(18,9), dpi=1600)
a = .2
fig.add_subplot(221, axisbg="#DBDBDB")
"""
this is the "kernel density estimator", just like was used above,
to create a nice smoothed density plot of the predictions
the y-values look incorrect, but I'm guessing the shape is right
"""
kde_res = KDEUnivariate(res.predict())
kde_res.fit()
# I think the "support" is simply the domain in which the
# density is greater than 0.
plt.plot(kde_res.support,kde_res.density)
plt.fill_between(kde_res.support,kde_res.density, alpha=a)
plt.title("Distribution of our Predictions")
# show that predicted survival probabilities are much lower
# for males than females
fig.add_subplot(222, axisbg="#DBDBDB")
plt.scatter(res.predict(),x['C(Sex)[T.male]'] , alpha=a)
plt.grid(b=True, which='major', axis='x')
plt.xlabel("Predicted chance of survival")
plt.ylabel("Gender Bool")
示例9: bootstrap_fit
ln_par, ln_lo, ln_up = bootstrap_fit(
stats.lognorm, resid, n_iter=n_bs, quant=q
)
hc_par, hc_lo, hc_up = bootstrap_fit(
stats.halfcauchy, resid, n_iter=n_bs, quant=q
)
gam_par, gam_lo, gam_up = bootstrap_fit(
stats.gamma, resid, n_iter=n_bs, quant=q
)
##################################################################
hc = stats.halfcauchy(*stats.halfcauchy.fit(resid))
lg = stats.lognorm(*stats.lognorm.fit(resid))
dens = KDEUnivariate(resid)
dens.fit()
ecdf = ECDF(resid)
##################################################################
# prepare X axes for plotting
ex = ecdf.x
x = np.linspace(min(resid), max(resid), 2000)
##################################################################
# Fit a Landau distribution with ROOT
if HAS_ROOT:
root_hist = rootpy.plotting.Hist(100, 0, np.pi)
root_hist.fill_array(resid)
示例10: kde_statsmodels_u
def kde_statsmodels_u(self, x_grid, bandwidth=0.2, **kwargs):
"""Univariate Kernel Density Estimation with Statsmodels"""
from statsmodels.nonparametric.kde import KDEUnivariate
kde = KDEUnivariate(self.data)
kde.fit(bw=bandwidth, **kwargs)
return kde.evaluate(x_grid)
示例11: setup_class
def setup_class(cls):
cls.decimal_density = 2 # low accuracy because binning is different
res1 = KDE(Xi)
res1.fit(kernel="gau", fft=True, bw="silverman")
cls.res1 = res1
rfname2 = os.path.join(curdir,'results','results_kde_fft.csv')
cls.res_density = np.genfromtxt(open(rfname2, 'rb'))
示例12: setupClass
def setupClass(cls):
cls.x = x = KDEWResults['x']
weights = KDEWResults['weights']
res1 = KDE(x)
res1.fit(kernel=cls.kernel_name, weights=weights, fft=False)
cls.res1 = res1
cls.res_density = KDEWResults[cls.res_kernel_name]