本文整理汇总了Python中stats.rankdata函数的典型用法代码示例。如果您正苦于以下问题:Python rankdata函数的具体用法?Python rankdata怎么用?Python rankdata使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了rankdata函数的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: wilcoxon
def wilcoxon(x,y=None):
"""
Calculates the Wilcoxon signed-rank test for the null hypothesis that two samples come from the same distribution. A non-parametric T-test. (need N > 20)
Returns: t-statistic, two-tailed p-value
"""
if y is None:
d = x
else:
x, y = map(asarray, (x, y))
if len(x) <> len(y):
raise ValueError, 'Unequal N in wilcoxon. Aborting.'
d = x-y
d = compress(not_equal(d,0),d,axis=-1) # Keep all non-zero differences
count = len(d)
if (count < 10):
print "Warning: sample size too small for normal approximation."
r = stats.rankdata(abs(d))
r_plus = sum((d > 0)*r,axis=0)
r_minus = sum((d < 0)*r,axis=0)
T = min(r_plus, r_minus)
mn = count*(count+1.0)*0.25
se = math.sqrt(count*(count+1)*(2*count+1.0)/24)
if (len(r) != len(unique(r))): # handle ties in data
replist, repnum = find_repeats(r)
corr = 0.0
for i in range(len(replist)):
si = repnum[i]
corr += 0.5*si*(si*si-1.0)
V = se*se - corr
se = sqrt((count*V - T*T)/(count-1.0))
z = (T - mn)/se
prob = 2*(1.0 -stats.zprob(abs(z)))
return T, prob
示例2: mood
def mood(x,y):
"""Determine if the scale parameter for two distributions with equal
medians is the same using a Mood test.
Specifically, compute the z statistic and the probability of error
that the null hypothesis is true but rejected with the computed
statistic as the critical value.
One can reject the null hypothesis that the ratio of scale parameters is
1 if the returned probability of error is small (say < 0.05)
"""
n = len(x)
m = len(y)
xy = r_[x,y]
N = m+n
if (N < 3):
raise ValueError, "Not enough observations."
ranks = stats.rankdata(xy)
Ri = ranks[:n]
M = sum((Ri - (N+1.0)/2)**2,axis=0)
# Approx stat.
mnM = n*(N*N-1.0)/12
varM = m*n*(N+1.0)*(N+2)*(N-2)/180
z = (M-mnM)/sqrt(varM)
p = distributions.norm.cdf(z)
pval = 2*min(p,1-p)
return z, pval
示例3: mood
def mood(x,y):
"""
Perform Mood's test for equal scale parameters.
Mood's two-sample test for scale parameters is a non-parametric
test for the null hypothesis that two samples are drawn from the
same distribution with the same scale parameter.
Parameters
----------
x, y : array_like
Arrays of sample data.
Returns
-------
z : float
The z-score for the hypothesis test.
p-value : float
The p-value for the hypothesis test.
See Also
--------
fligner : A non-parametric test for the equality of k variances
ansari : A non-parametric test for the equality of 2 variances
bartlett : A parametric test for equality of k variances in normal samples
levene : A parametric test for equality of k variances
Notes
-----
The data are assumed to be drawn from probability distributions f(x) and
f(x/s)/s respectively, for some probability density function f. The
null hypothesis is that s = 1.
"""
n = len(x)
m = len(y)
xy = r_[x,y]
N = m+n
if N < 3:
raise ValueError("Not enough observations.")
ranks = stats.rankdata(xy)
Ri = ranks[:n]
M = sum((Ri - (N+1.0)/2)**2,axis=0)
# Approx stat.
mnM = n*(N*N-1.0)/12
varM = m*n*(N+1.0)*(N+2)*(N-2)/180
z = (M-mnM)/sqrt(varM)
# Numerically better than p = norm.cdf(x); p = min(p, 1 - p)
if z > 0:
pval = distributions.norm.sf(z)
else:
pval = distributions.norm.cdf(z)
# Account for two-sidedness
pval *= 2.
return z, pval
示例4: ansari
def ansari(x,y):
"""
Perform the Ansari-Bradley test for equal scale parameters
The Ansari-Bradley test is a non-parametric test for the equality
of the scale parameter of the distributions from which two
samples were drawn.
Parameters
----------
x, y : array_like
arrays of sample data
Returns
-------
AB : float
The Ansari-Bradley test statistic
p-value : float
The p-value of the hypothesis test
See Also
--------
fligner : A non-parametric test for the equality of k variances
mood : A non-parametric test for the equality of two scale parameters
Notes
-----
The p-value given is exact when the sample sizes are both less than
55 and there are no ties, otherwise a normal approximation for the
p-value is used.
References
----------
.. [1] Sprent, Peter and N.C. Smeeton. Applied nonparametric statistical
methods. 3rd ed. Chapman and Hall/CRC. 2001. Section 5.8.2.
"""
x,y = asarray(x),asarray(y)
n = len(x)
m = len(y)
if m < 1:
raise ValueError("Not enough other observations.")
if n < 1:
raise ValueError("Not enough test observations.")
N = m+n
xy = r_[x,y] # combine
rank = stats.rankdata(xy)
symrank = amin(array((rank,N-rank+1)),0)
AB = sum(symrank[:n],axis=0)
uxy = unique(xy)
repeats = (len(uxy) != len(xy))
exact = ((m<55) and (n<55) and not repeats)
if repeats and ((m < 55) or (n < 55)):
warnings.warn("Ties preclude use of exact statistic.")
if exact:
astart, a1, ifault = statlib.gscale(n,m)
ind = AB-astart
total = sum(a1,axis=0)
if ind < len(a1)/2.0:
cind = int(ceil(ind))
if (ind == cind):
pval = 2.0*sum(a1[:cind+1],axis=0)/total
else:
pval = 2.0*sum(a1[:cind],axis=0)/total
else:
find = int(floor(ind))
if (ind == floor(ind)):
pval = 2.0*sum(a1[find:],axis=0)/total
else:
pval = 2.0*sum(a1[find+1:],axis=0)/total
return AB, min(1.0,pval)
# otherwise compute normal approximation
if N % 2: # N odd
mnAB = n*(N+1.0)**2 / 4.0 / N
varAB = n*m*(N+1.0)*(3+N**2)/(48.0*N**2)
else:
mnAB = n*(N+2.0)/4.0
varAB = m*n*(N+2)*(N-2.0)/48/(N-1.0)
if repeats: # adjust variance estimates
# compute sum(tj * rj**2,axis=0)
fac = sum(symrank**2,axis=0)
if N % 2: # N odd
varAB = m*n*(16*N*fac-(N+1)**4)/(16.0 * N**2 * (N-1))
else: # N even
varAB = m*n*(16*fac-N*(N+2)**2)/(16.0 * N * (N-1))
z = (AB - mnAB)/sqrt(varAB)
pval = distributions.norm.sf(abs(z)) * 2.0
return AB, pval
示例5: wilcoxon
def wilcoxon(x,y=None):
"""
Calculate the Wilcoxon signed-rank test.
The Wilcoxon signed-rank test tests the null hypothesis that two
related paired samples come from the same distribution. In particular,
it tests whether the distribution of the differences x - y is symmetric
about zero. It is a non-parametric version of the paired T-test.
Parameters
----------
x : array_like
The first set of measurements.
y : array_like, optional
The second set of measurements. If y is not given, then the x array
is considered to be the differences between the two sets of
measurements.
Returns
-------
T : float
The sum of the ranks of the differences above or below zero, whichever
is smaller.
p-value : float
The two-sided p-value for the test.
Notes
-----
Because the normal approximation is used for the calculations, the
samples used should be large. A typical rule is to require that
n > 20.
References
----------
.. [1] http://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test
"""
if y is None:
d = x
else:
x, y = map(asarray, (x, y))
if len(x) <> len(y):
raise ValueError('Unequal N in wilcoxon. Aborting.')
d = x-y
d = compress(not_equal(d,0),d,axis=-1) # Keep all non-zero differences
count = len(d)
if (count < 10):
warnings.warn("Warning: sample size too small for normal approximation.")
r = stats.rankdata(abs(d))
r_plus = sum((d > 0)*r,axis=0)
r_minus = sum((d < 0)*r,axis=0)
T = min(r_plus, r_minus)
mn = count*(count+1.0)*0.25
se = math.sqrt(count*(count+1)*(2*count+1.0)/24)
if (len(r) != len(unique(r))): # handle ties in data
replist, repnum = find_repeats(r)
corr = 0.0
for i in range(len(replist)):
si = repnum[i]
corr += 0.5*si*(si*si-1.0)
V = se*se - corr
se = sqrt((count*V - T*T)/(count-1.0))
z = (T - mn)/se
prob = 2 * distributions.norm.sf(abs(z))
return T, prob
示例6: fligner
def fligner(*args,**kwds):
"""
Perform Fligner's test for equal variances.
Fligner's test tests the null hypothesis that all input samples
are from populations with equal variances. Fligner's test is
non-parametric in contrast to Bartlett's test `bartlett` and
Levene's test `levene`.
Parameters
----------
sample1, sample2, ... : array_like
arrays of sample data. Need not be the same length
center : {'mean', 'median', 'trimmed'}, optional
keyword argument controlling which function of the data
is used in computing the test statistic. The default
is 'median'.
proportiontocut : float, optional
When `center` is 'trimmed', this gives the proportion of data points
to cut from each end. (See `scipy.stats.trim_mean`.)
Default is 0.05.
Returns
-------
Xsq : float
the test statistic
p-value : float
the p-value for the hypothesis test
Notes
-----
As with Levene's test there are three variants
of Fligner's test that differ by the measure of central
tendency used in the test. See `levene` for more information.
References
----------
.. [1] http://www.stat.psu.edu/~bgl/center/tr/TR993.ps
.. [2] Fligner, M.A. and Killeen, T.J. (1976). Distribution-free two-sample
tests for scale. 'Journal of the American Statistical Association.'
71(353), 210-213.
"""
# Handle keyword arguments.
center = 'median'
proportiontocut = 0.05
for kw, value in kwds.items():
if kw not in ['center', 'proportiontocut']:
raise TypeError("fligner() got an unexpected keyword argument '%s'" % kw)
if kw == 'center':
center = value
else:
proportiontocut = value
k = len(args)
if k < 2:
raise ValueError("Must enter at least two input sample vectors.")
if not center in ['mean','median','trimmed']:
raise ValueError("Keyword argument <center> must be 'mean', 'median'"
+ "or 'trimmed'.")
if center == 'median':
func = lambda x: np.median(x, axis=0)
elif center == 'mean':
func = lambda x: np.mean(x, axis=0)
else: # center == 'trimmed'
args = tuple(stats.trimboth(arg, proportiontocut) for arg in args)
func = lambda x: np.mean(x, axis=0)
Ni = asarray([len(args[j]) for j in range(k)])
Yci = asarray([func(args[j]) for j in range(k)])
Ntot = sum(Ni,axis=0)
# compute Zij's
Zij = [abs(asarray(args[i])-Yci[i]) for i in range(k)]
allZij = []
g = [0]
for i in range(k):
allZij.extend(list(Zij[i]))
g.append(len(allZij))
ranks = stats.rankdata(allZij)
a = distributions.norm.ppf(ranks/(2*(Ntot+1.0)) + 0.5)
# compute Aibar
Aibar = _apply_func(a,g,sum) / Ni
anbar = np.mean(a, axis=0)
varsq = np.var(a,axis=0, ddof=1)
Xsq = sum(Ni*(asarray(Aibar)-anbar)**2.0,axis=0)/varsq
pval = distributions.chi2.sf(Xsq,k-1) # 1 - cdf
return Xsq, pval
示例7: range
m = range(4,24)
m[10] = 34
b = N.array(m)
print('\n\nF_oneway:')
print(stats.F_oneway(l,m))
print(stats.F_oneway(a,b))
# print 'F_value:',stats.F_value(l),stats.F_value(a)
print('\nSUPPORT')
print('sum:',stats.sum(l),stats.sum(lf),stats.sum(a),stats.sum(af))
print('cumsum:')
print(stats.cumsum(l))
print(stats.cumsum(lf))
print(stats.cumsum(a))
print(stats.cumsum(af))
print('ss:',stats.ss(l),stats.ss(lf),stats.ss(a),stats.ss(af))
print('summult:',stats.summult(l,m),stats.summult(lf,m),stats.summult(a,b),stats.summult(af,b))
print('sumsquared:',stats.square_of_sums(l),stats.square_of_sums(lf),stats.square_of_sums(a),stats.square_of_sums(af))
print('sumdiffsquared:',stats.sumdiffsquared(l,m),stats.sumdiffsquared(lf,m),stats.sumdiffsquared(a,b),stats.sumdiffsquared(af,b))
print('shellsort:')
print(stats.shellsort(m))
print(stats.shellsort(b))
print('rankdata:')
print(stats.rankdata(m))
print(stats.rankdata(b))
print('\nANOVAs')
execfile('testanova.py')
示例8: fligner
def fligner(*args,**kwds):
"""
Perform Fligner's test for equal variances
Fligner's test tests the null hypothesis that all input samples
are from populations with equal variances. Fligner's test is
non-parametric in contrast to Bartlett's test bartlett_ and
Levene's test levene_.
Parameters
----------
sample1, sample2, ... : array_like
arrays of sample data. Need not be the same length
center : {'mean', 'median', 'trimmed'}, optional
keyword argument controlling which function of the data
is used in computing the test statistic. The default
is 'median'.
Returns
-------
Xsq : float
the test statistic
p-value : float
the p-value for the hypothesis test
Notes
-----
As with Levene's test there are three variants
of Fligner's test that differ by the measure of central
tendency used in the test. See levene_ for more information.
References
----------
.. [1] http://www.stat.psu.edu/~bgl/center/tr/TR993.ps
.. [2] Fligner, M.A. and Killeen, T.J. (1976). Distribution-free two-sample
tests for scale. 'Journal of the American Statistical Association.'
71(353), 210-213.
"""
k = len(args)
if k < 2:
raise ValueError, "Must enter at least two input sample vectors."
if 'center' in kwds.keys():
center = kwds['center']
else:
center = 'median'
if not center in ['mean','median','trimmed']:
raise ValueError, "Keyword argument <center> must be 'mean', 'median'"\
+ "or 'trimmed'."
if center == 'median':
func = lambda x: np.median(x, axis=0)
elif center == 'mean':
func = lambda x: np.mean(x, axis=0)
else:
func = stats.trim_mean
Ni = asarray([len(args[j]) for j in range(k)])
Yci = asarray([func(args[j]) for j in range(k)])
Ntot = sum(Ni,axis=0)
# compute Zij's
Zij = [abs(asarray(args[i])-Yci[i]) for i in range(k)]
allZij = []
g = [0]
for i in range(k):
allZij.extend(list(Zij[i]))
g.append(len(allZij))
a = distributions.norm.ppf(stats.rankdata(allZij)/(2*(Ntot+1.0)) + 0.5)
# compute Aibar
Aibar = _apply_func(a,g,sum) / Ni
anbar = np.mean(a, axis=0)
varsq = np.var(a,axis=0, ddof=1)
Xsq = sum(Ni*(asarray(Aibar)-anbar)**2.0,axis=0)/varsq
pval = distributions.chi2.sf(Xsq,k-1) # 1 - cdf
return Xsq, pval
示例9: fligner
def fligner(*args,**kwds):
"""Perform Levene test with the null hypothesis that all input samples
have equal variances.
Inputs are sample vectors: bartlett(x,y,z,...)
One keyword input, center, can be used with values
center = 'mean', center='median' (default), center='trimmed'
Outputs: (Xsq, pval)
Xsq -- the Test statistic
pval -- significance level if null is rejected with this value of X
(prob. that null is true but rejected with this p-value.)
References:
http://www.stat.psu.edu/~bgl/center/tr/TR993.ps
Fligner, M.A. and Killeen, T.J. (1976). Distribution-free two-sample
tests for scale. 'Journal of the American Statistical Association.'
71(353), 210-213.
"""
k = len(args)
if k < 2:
raise ValueError, "Must enter at least two input sample vectors."
if 'center' in kwds.keys():
center = kwds['center']
else:
center = 'median'
if not center in ['mean','median','trimmed']:
raise ValueError, "Keyword argument <center> must be 'mean', 'median'"\
+ "or 'trimmed'."
if center == 'median':
func = stats.median
elif center == 'mean':
func = stats.mean
else:
func = stats.trim_mean
Ni = asarray([len(args[j]) for j in range(k)])
Yci = asarray([func(args[j]) for j in range(k)])
Ntot = sum(Ni,axis=0)
# compute Zij's
Zij = [abs(asarray(args[i])-Yci[i]) for i in range(k)]
allZij = []
g = [0]
for i in range(k):
allZij.extend(list(Zij[i]))
g.append(len(allZij))
a = distributions.norm.ppf(stats.rankdata(allZij)/(2*(Ntot+1.0)) + 0.5)
# compute Aibar
Aibar = _apply_func(a,g,sum) / Ni
anbar = stats.mean(a)
varsq = stats.var(a)
Xsq = sum(Ni*(asarray(Aibar)-anbar)**2.0,axis=0)/varsq
pval = distributions.chi2.sf(Xsq,k-1) # 1 - cdf
return Xsq, pval
示例10: ansari
def ansari(x,y):
"""Determine if the scale parameter for two distributions with equal
medians is the same using the Ansari-Bradley statistic.
Specifically, compute the AB statistic and the probability of error
that the null hypothesis is true but rejected with the computed
statistic as the critical value.
One can reject the null hypothesis that the ratio of variances is 1 if
returned probability of error is small (say < 0.05)
"""
x,y = asarray(x),asarray(y)
n = len(x)
m = len(y)
if (m < 1):
raise ValueError, "Not enough other observations."
if (n < 1):
raise ValueError, "Not enough test observations."
N = m+n
xy = r_[x,y] # combine
rank = stats.rankdata(xy)
symrank = amin(array((rank,N-rank+1)),0)
AB = sum(symrank[:n],axis=0)
uxy = unique(xy)
repeats = (len(uxy) != len(xy))
exact = ((m<55) and (n<55) and not repeats)
if repeats and ((m < 55) or (n < 55)):
print "Ties preclude use of exact statistic."
if exact:
astart, a1, ifault = statlib.gscale(n,m)
ind = AB-astart
total = sum(a1,axis=0)
if ind < len(a1)/2.0:
cind = int(ceil(ind))
if (ind == cind):
pval = 2.0*sum(a1[:cind+1],axis=0)/total
else:
pval = 2.0*sum(a1[:cind],axis=0)/total
else:
find = int(floor(ind))
if (ind == floor(ind)):
pval = 2.0*sum(a1[find:],axis=0)/total
else:
pval = 2.0*sum(a1[find+1:],axis=0)/total
return AB, min(1.0,pval)
# otherwise compute normal approximation
if N % 2: # N odd
mnAB = n*(N+1.0)**2 / 4.0 / N
varAB = n*m*(N+1.0)*(3+N**2)/(48.0*N**2)
else:
mnAB = n*(N+2.0)/4.0
varAB = m*n*(N+2)*(N-2.0)/48/(N-1.0)
if repeats: # adjust variance estimates
# compute sum(tj * rj**2,axis=0)
fac = sum(symrank**2,axis=0)
if N % 2: # N odd
varAB = m*n*(16*N*fac-(N+1)**4)/(16.0 * N**2 * (N-1))
else: # N even
varAB = m*n*(16*fac-N*(N+2)**2)/(16.0 * N * (N-1))
z = (AB - mnAB)/sqrt(varAB)
pval = (1-distributions.norm.cdf(abs(z)))*2.0
return AB, pval
示例11: range
m = range(4,24)
m[10] = 34
b = N.array(m)
print '\n\nF_oneway:'
print stats.F_oneway(l,m)
print stats.F_oneway(a,b)
#print 'F_value:',stats.F_value(l),stats.F_value(a)
print '\nSUPPORT'
print 'sum:',stats.sum(l),stats.sum(lf),stats.sum(a),stats.sum(af)
print 'cumsum:'
print stats.cumsum(l)
print stats.cumsum(lf)
print stats.cumsum(a)
print stats.cumsum(af)
print 'ss:',stats.ss(l),stats.ss(lf),stats.ss(a),stats.ss(af)
print 'summult:',stats.summult(l,m),stats.summult(lf,m),stats.summult(a,b),stats.summult(af,b)
print 'sumsquared:',stats.square_of_sums(l),stats.square_of_sums(lf),stats.square_of_sums(a),stats.square_of_sums(af)
print 'sumdiffsquared:',stats.sumdiffsquared(l,m),stats.sumdiffsquared(lf,m),stats.sumdiffsquared(a,b),stats.sumdiffsquared(af,b)
print 'shellsort:'
print stats.shellsort(m)
print stats.shellsort(b)
print 'rankdata:'
print stats.rankdata(m)
print stats.rankdata(b)
print '\nANOVAs'
execfile('testanova.py')