本文整理汇总了Python中scipy.stats方法的典型用法代码示例。如果您正苦于以下问题:Python scipy.stats方法的具体用法?Python scipy.stats怎么用?Python scipy.stats使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scipy
的用法示例。
在下文中一共展示了scipy.stats方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_influence
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import stats [as 别名]
def get_influence(self):
"""
get an instance of Influence with influence and outlier measures
Returns
-------
infl : Influence instance
the instance has methods to calculate the main influence and
outlier measures for the OLS regression
See also
--------
statsmodels.stats.outliers_influence.OLSInfluence
"""
from statsmodels.stats.outliers_influence import OLSInfluence
return OLSInfluence(self)
示例2: test_kurt
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import stats [as 别名]
def test_kurt(self):
from scipy.stats import kurtosis
alt = lambda x: kurtosis(x, bias=False)
self._check_stat_op('kurt', alt)
index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2],
[0, 1, 0, 1, 0, 1]])
s = Series(np.random.randn(6), index=index)
tm.assert_almost_equal(s.kurt(), s.kurt(level=0)['bar'])
# test corner cases, kurt() returns NaN unless there's at least 4
# values
min_N = 4
for i in range(1, min_N + 1):
s = Series(np.ones(i))
df = DataFrame(np.ones((i, i)))
if i < min_N:
assert np.isnan(s.kurt())
assert np.isnan(df.kurt()).all()
else:
assert 0 == s.kurt()
assert (df.kurt() == 0).all()
示例3: test_mwu
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import stats [as 别名]
def test_mwu(self):
"""Test function mwu"""
mwu_scp = scipy.stats.mannwhitneyu(x, y, use_continuity=True,
alternative='two-sided')
mwu_pg = mwu(x, y, tail='two-sided')
# Similar to R: wilcox.test(df$x, df$y, paired = FALSE, exact = FALSE)
# Note that the RBC value are compared to JASP in test_pairwise.py
assert mwu_scp[0] == mwu_pg.at['MWU', 'U-val']
assert mwu_scp[1] == mwu_pg.at['MWU', 'p-val']
# One-sided
assert np.median(x) > np.median(y) # Tail = greater, x > y
assert (mwu(x, y, tail='one-sided').at['MWU', 'p-val'] ==
mwu(x, y, tail='greater').at['MWU', 'p-val'])
assert (mwu(x, y, tail='less').at['MWU', 'p-val'] ==
scipy.stats.mannwhitneyu(x, y, use_continuity=True,
alternative='less')[1])
示例4: test_wilcoxon
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import stats [as 别名]
def test_wilcoxon(self):
"""Test function wilcoxon"""
# R: wilcox.test(df$x, df$y, paired = TRUE, exact = FALSE)
# The V value is slightly different between SciPy and R
# The p-value, however, is almost identical
wc_scp = scipy.stats.wilcoxon(x2, y2, correction=True)
wc_pg = wilcoxon(x2, y2, tail='two-sided')
assert wc_scp[0] == wc_pg.at['Wilcoxon', 'W-val'] == 20.5 # JASP
assert wc_scp[1] == wc_pg.at['Wilcoxon', 'p-val']
wc_pg_less = wilcoxon(x2, y2, tail='less')
wc_pg_greater = wilcoxon(x2, y2, tail='greater')
wc_pg_ones = wilcoxon(x2, y2, tail='one-sided')
pd.testing.assert_frame_equal(wc_pg_ones, wc_pg_less)
# Note that the RBC value are compared to JASP in test_pairwise.py
# The RBC values in JASP does not change according to the tail.
assert round(wc_pg.at['Wilcoxon', 'RBC'], 3) == -0.379
assert round(wc_pg_less.at['Wilcoxon', 'RBC'], 3) == -0.379
assert round(wc_pg_greater.at['Wilcoxon', 'RBC'], 3) == -0.379
# CLES is compared to:
# https://janhove.github.io/reporting/2016/11/16/common-language-effect-sizes
assert round(wc_pg.at['Wilcoxon', 'CLES'], 3) == 0.396
assert round(wc_pg_less.at['Wilcoxon', 'CLES'], 3) == 0.604
assert round(wc_pg_greater.at['Wilcoxon', 'CLES'], 3) == 0.396
示例5: _clean_nans
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import stats [as 别名]
def _clean_nans(scores):
"""
Fixes Issue #1240: NaNs can't be properly compared, so change them to the
smallest value of scores's dtype. -inf seems to be unreliable.
"""
# XXX where should this function be called? fit? scoring functions
# themselves?
scores = as_float_array(scores, copy=True)
scores[np.isnan(scores)] = np.finfo(scores.dtype).min
return scores
######################################################################
# Scoring functions
# The following function is a rewriting of scipy.stats.f_oneway
# Contrary to the scipy.stats.f_oneway implementation it does not
# copy the data while keeping the inputs unchanged.
示例6: _chisquare
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import stats [as 别名]
def _chisquare(f_obs, f_exp):
"""Fast replacement for scipy.stats.chisquare.
Version from https://github.com/scipy/scipy/pull/2525 with additional
optimizations.
"""
f_obs = np.asarray(f_obs, dtype=np.float64)
k = len(f_obs)
# Reuse f_obs for chi-squared statistics
chisq = f_obs
chisq -= f_exp
chisq **= 2
with np.errstate(invalid="ignore"):
chisq /= f_exp
chisq = chisq.sum(axis=0)
return chisq, special.chdtrc(k - 1, chisq)
示例7: apply
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import stats [as 别名]
def apply(self, solution):
# Make sure params are within range
assert self.ndsigma > 0, "Invalid st parameter"
# Extract components of the solution object for convenience
corr = solution.corr
err = solution.err
dt = solution.model.dt
# Create the weights for different timepoints
times = np.asarray(list(range(-len(corr), len(corr))))*dt
weights = scipy.stats.norm(scale=self.ndsigma, loc=self.nondectime).pdf(times)
if np.sum(weights) > 0:
weights /= np.sum(weights) # Ensure it integrates to 1
newcorr = np.convolve(weights, corr, mode="full")[len(corr):(2*len(corr))]
newerr = np.convolve(weights, err, mode="full")[len(corr):(2*len(corr))]
return Solution(newcorr, newerr, solution.model,
solution.conditions, solution.undec)
# End OverlayNonDecisionGaussian
# Start OverlayNonDecisionLR
示例8: plot
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import stats [as 别名]
def plot(fcts, data):
import matplotlib.pyplot as plt
import numpy as np
# plot data
plt.hist(data, normed=True, bins=max(10, len(data)/10))
# plot fitted probability
for fct in fcts:
params = eval("scipy.stats."+fct+".fit(data)")
f = eval("scipy.stats."+fct+".freeze"+str(params))
x = np.linspace(f.ppf(0.001), f.ppf(0.999), 500)
plt.plot(x, f.pdf(x), lw=3, label=fct)
plt.legend(loc='best', frameon=False)
plt.title("Top "+str(len(fcts))+" Results")
plt.show()
示例9: wald_test
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import stats [as 别名]
def wald_test(model,X):
'''
:param model: a model file that should have predict_proba() function
:param X: dataset features DataFrame
:return: the value of wald_stats,p_value
'''
pred_probs = np.matrix(model.predict_proba(X))
X_design = np.hstack((np.ones(shape=(X.shape[0], 1)), X))
diag_array = np.multiply(pred_probs[:, 0], pred_probs[:, 1]).A1
V = scipy.sparse.diags(diag_array)
m1 = X_design.T * V
m2 = m1.dot(X_design)
cov_mat = np.linalg.inv(m2)
model_params = np.hstack((model.intercept_[0], model.coef_[0]))
wald_stats = (model_params / np.sqrt(np.diag(cov_mat))) ** 2
wald = scipy.stats.wald()
p_value = wald.pdf(wald_stats)
return wald_stats,p_value
示例10: gaussian2d_fit
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import stats [as 别名]
def gaussian2d_fit(sx, sy, guess=[0.5,1]):
"""2D-Gaussian fit of samples S using a fit to the empirical CDF."""
assert sx.size == sy.size
## Empirical CDF
ecdfx = [np.sort(sx), np.arange(0.5,sx.size+0.5)*1./sx.size]
ecdfy = [np.sort(sy), np.arange(0.5,sy.size+0.5)*1./sy.size]
## Analytical gaussian CDF
gauss_cdf = lambda x, mu, sigma: 0.5*(1+erf((x-mu)/(np.sqrt(2)*sigma)))
## Fitting the empirical CDF
fitfunc = lambda p, x: gauss_cdf(x, p[0], p[1])
errfunc = lambda p, x, y: fitfunc(p, x) - y
px,v = leastsq(errfunc, x0=guess, args=(ecdfx[0],ecdfx[1]))
py,v = leastsq(errfunc, x0=guess, args=(ecdfy[0],ecdfy[1]))
print("2D Gaussian CDF fit", px, py)
mux, sigmax = px[0], px[1]
muy, sigmay = py[0], py[1]
return mux, sigmax, muy, sigmay
示例11: test_gaussian_fit
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import stats [as 别名]
def test_gaussian_fit():
m0 = 0.1
s0 = 0.4
size = 500
s = R.normal(size=size, loc=m0, scale=s0)
#s = s[s<0.4]
mu, sig = gaussian_fit(s)
mu1, sig1 = S.norm.fit(s)
mu2, sig2 = gaussian_fit_ml(s)
print("ECDF ", mu, sig)
print("ML ", mu1, sig1)
print("ML (manual)", mu2, sig2)
H = np.histogram(s, bins=20, density=True)
h = H[0]
bw = H[1][1] - H[1][0]
#bins_c = H[1][:-1]+0.5*bw
bar(H[1][:-1], H[0], bw, alpha=0.3)
x = np.r_[s.min()-1:s.max()+1:200j]
plot(x, normpdf(x,m0,s0), lw=2, color='grey')
plot(x, normpdf(x,mu,sig), lw=2, color='r', alpha=0.5)
plot(x, normpdf(x,mu1,sig1), lw=2, color='b', alpha=0.5)
示例12: _get_method_by_alias
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import stats [as 别名]
def _get_method_by_alias(alias, module, tf_distributions=None):
""" Fetch fullname of a randomizer from ``scipy.stats``, ``tensorflow`` or
``numpy`` by its alias or fullname.
"""
rnd_submodules = {'np': np.random,
'tf': tf_distributions,
'ss': ss}
# fetch fullname
fullname = ALIASES.get(alias, {module: alias for module in ['np', 'tf', 'ss']}).get(module, None)
if fullname is None:
raise ValueError("Distribution %s has no implementaion in module %s" % (alias, module))
# check that the randomizer is implemented in corresponding module
if not hasattr(rnd_submodules[module], fullname):
raise ValueError("Distribution %s has no implementaion in module %s" % (fullname, module))
return fullname
示例13: run_anova
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import stats [as 别名]
def run_anova(df, independent_variables_names, dependent_variables_names, NUM_GROUPS_CUTOFF=15):
'''
Returns either a dictionary with the anova stats are an empty list (if the anova test
is not valid)
df : dataframe
independent_variables : list of independent_variable's, where each independent_variable is of form [type, name, num_bins (0 means will be treated as continuous)]
depedendent_variables : list of dependent_variable's, where each dependent_variable is of form [type, name]
'''
num_independent_variables = len(independent_variables_names)
num_dependent_variables = len(dependent_variables_names)
transformed_data = add_binned_columns_to_df(df, independent_variables_names, dependent_variables_names)
if num_dependent_variables == 1:
first_dependent_variable = dependent_variables_names[0]
return anova(transformed_data, independent_variables_names, first_dependent_variable)
return []
示例14: _draw_samples
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import stats [as 别名]
def _draw_samples(self, size, random_state):
# pylint: disable=invalid-name
loc = self.loc.draw_sample(random_state=random_state)
scale = self.scale.draw_sample(random_state=random_state)
low = self.low.draw_sample(random_state=random_state)
high = self.high.draw_sample(random_state=random_state)
seed = random_state.generate_seed_()
if low > high:
low, high = high, low
assert scale >= 0, "Expected scale to be >=0, got %.4f." % (scale,)
if scale == 0:
return np.full(size, fill_value=loc, dtype=np.float32)
a = (low - loc) / scale
b = (high - loc) / scale
tnorm = scipy.stats.truncnorm(a=a, b=b, loc=loc, scale=scale)
# Using a seed here works with both np.random interfaces.
# Last time tried, scipy crashed when providing just
# random_state.generator on the new np.random interface.
return tnorm.rvs(size=size, random_state=seed).astype(np.float32)
示例15: optim
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import stats [as 别名]
def optim(WORK,handle, minsamp, CUT1, CUT2, datatype, haplos):
name = handle.split("/")[-1].replace(".clustS.gz","")
D = consensus(handle, minsamp, CUT1, CUT2, datatype)
P = makeP(D)
Tab = table_c(D)
del D
#H,E = scipy.optimize.fmin(LL,x0,(P,Tab),maxiter=500,maxfun=200,ftol=0.0001,disp=False,full_output=False)
if haplos == 1:
x0 = [0.001]
H = 0.
E = scipy.optimize.fmin(LL_haploid,x0,(P,Tab),disp=False,full_output=False)
else:
x0 = [0.01,0.001]
H,E = scipy.optimize.fmin(LL,x0,(P,Tab),disp=False,full_output=False)
del Tab
outfile = open(WORK+"stats/."+name+".temp",'w')
outfile.write("\t".join([name.strip(".gz"),str(round(H,8))[0:10],str(round(E,8))[0:10],"\n"]))
outfile.close()
sys.stderr.write(".")