Python stats.rankdata函数代码示例

本文整理汇总了Python中stats.rankdata函数的典型用法代码示例。如果您正苦于以下问题：Python rankdata函数的具体用法？Python rankdata怎么用？Python rankdata使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了rankdata函数的11个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: wilcoxon

def wilcoxon(x,y=None):
    """
Calculates the Wilcoxon signed-rank test for the null hypothesis that two samples come from the same distribution. A non-parametric T-test. (need N > 20)

Returns: t-statistic, two-tailed p-value
"""
    if y is None:
        d = x
    else:
        x, y = map(asarray, (x, y))
        if len(x) <> len(y):
            raise ValueError, 'Unequal N in wilcoxon.  Aborting.'
        d = x-y
    d = compress(not_equal(d,0),d,axis=-1) # Keep all non-zero differences
    count = len(d)
    if (count < 10):
        print "Warning: sample size too small for normal approximation."
    r = stats.rankdata(abs(d))
    r_plus = sum((d > 0)*r,axis=0)
    r_minus = sum((d < 0)*r,axis=0)
    T = min(r_plus, r_minus)
    mn = count*(count+1.0)*0.25
    se = math.sqrt(count*(count+1)*(2*count+1.0)/24)
    if (len(r) != len(unique(r))):  # handle ties in data
        replist, repnum = find_repeats(r)
        corr = 0.0
        for i in range(len(replist)):
            si = repnum[i]
            corr += 0.5*si*(si*si-1.0)
        V = se*se - corr
        se = sqrt((count*V - T*T)/(count-1.0))
    z = (T - mn)/se
    prob = 2*(1.0 -stats.zprob(abs(z)))
    return T, prob

开发者ID:mbentz80，项目名称:jzigbeercp，代码行数:34，代码来源:morestats.py

示例2: mood

def mood(x,y):
    """Determine if the scale parameter for two distributions with equal
    medians is the same using a Mood test.

    Specifically, compute the z statistic and the probability of error
    that the null hypothesis is true but rejected with the computed
    statistic as the critical value.

    One can reject the null hypothesis that the ratio of scale parameters is
    1 if the returned probability of error is small (say < 0.05)
    """
    n = len(x)
    m = len(y)
    xy = r_[x,y]
    N = m+n
    if (N < 3):
        raise ValueError, "Not enough observations."
    ranks = stats.rankdata(xy)
    Ri = ranks[:n]
    M = sum((Ri - (N+1.0)/2)**2,axis=0)
    # Approx stat.
    mnM = n*(N*N-1.0)/12
    varM = m*n*(N+1.0)*(N+2)*(N-2)/180
    z = (M-mnM)/sqrt(varM)
    p = distributions.norm.cdf(z)
    pval = 2*min(p,1-p)
    return z, pval

开发者ID:mbentz80，项目名称:jzigbeercp，代码行数:27，代码来源:morestats.py

示例3: mood

def mood(x,y):
    """
    Perform Mood's test for equal scale parameters.

    Mood's two-sample test for scale parameters is a non-parametric
    test for the null hypothesis that two samples are drawn from the
    same distribution with the same scale parameter.

    Parameters
    ----------
    x, y : array_like
        Arrays of sample data.

    Returns
    -------
    z : float
        The z-score for the hypothesis test.
    p-value : float
        The p-value for the hypothesis test.

    See Also
    --------
    fligner : A non-parametric test for the equality of k variances
    ansari : A non-parametric test for the equality of 2 variances
    bartlett : A parametric test for equality of k variances in normal samples
    levene : A parametric test for equality of k variances

    Notes
    -----
    The data are assumed to be drawn from probability distributions f(x) and
    f(x/s)/s respectively, for some probability density function f.  The
    null hypothesis is that s = 1.

    """
    n = len(x)
    m = len(y)
    xy = r_[x,y]
    N = m+n
    if N < 3:
        raise ValueError("Not enough observations.")
    ranks = stats.rankdata(xy)
    Ri = ranks[:n]
    M = sum((Ri - (N+1.0)/2)**2,axis=0)
    # Approx stat.
    mnM = n*(N*N-1.0)/12
    varM = m*n*(N+1.0)*(N+2)*(N-2)/180
    z = (M-mnM)/sqrt(varM)

    # Numerically better than p = norm.cdf(x); p = min(p, 1 - p)
    if z > 0:
        pval = distributions.norm.sf(z)
    else:
        pval = distributions.norm.cdf(z)

    # Account for two-sidedness
    pval *= 2.
    return z, pval

开发者ID:87，项目名称:scipy，代码行数:57，代码来源:morestats.py

示例4: ansari

def ansari(x,y):
    """
    Perform the Ansari-Bradley test for equal scale parameters

    The Ansari-Bradley test is a non-parametric test for the equality
    of the scale parameter of the distributions from which two
    samples were drawn.

    Parameters
    ----------
    x, y : array_like
        arrays of sample data

    Returns
    -------
    AB : float
        The Ansari-Bradley test statistic
    p-value : float
        The p-value of the hypothesis test

    See Also
    --------
    fligner : A non-parametric test for the equality of k variances
    mood : A non-parametric test for the equality of two scale parameters

    Notes
    -----
    The p-value given is exact when the sample sizes are both less than
    55 and there are no ties, otherwise a normal approximation for the
    p-value is used.

    References
    ----------
    .. [1] Sprent, Peter and N.C. Smeeton.  Applied nonparametric statistical
           methods.  3rd ed. Chapman and Hall/CRC. 2001.  Section 5.8.2.

    """
    x,y = asarray(x),asarray(y)
    n = len(x)
    m = len(y)
    if m < 1:
        raise ValueError("Not enough other observations.")
    if n < 1:
        raise ValueError("Not enough test observations.")
    N = m+n
    xy = r_[x,y]  # combine
    rank = stats.rankdata(xy)
    symrank = amin(array((rank,N-rank+1)),0)
    AB = sum(symrank[:n],axis=0)
    uxy = unique(xy)
    repeats = (len(uxy) != len(xy))
    exact = ((m<55) and (n<55) and not repeats)
    if repeats and ((m < 55)  or (n < 55)):
        warnings.warn("Ties preclude use of exact statistic.")
    if exact:
        astart, a1, ifault = statlib.gscale(n,m)
        ind = AB-astart
        total = sum(a1,axis=0)
        if ind < len(a1)/2.0:
            cind = int(ceil(ind))
            if (ind == cind):
                pval = 2.0*sum(a1[:cind+1],axis=0)/total
            else:
                pval = 2.0*sum(a1[:cind],axis=0)/total
        else:
            find = int(floor(ind))
            if (ind == floor(ind)):
                pval = 2.0*sum(a1[find:],axis=0)/total
            else:
                pval = 2.0*sum(a1[find+1:],axis=0)/total
        return AB, min(1.0,pval)

    # otherwise compute normal approximation
    if N % 2:  # N odd
        mnAB = n*(N+1.0)**2 / 4.0 / N
        varAB = n*m*(N+1.0)*(3+N**2)/(48.0*N**2)
    else:
        mnAB = n*(N+2.0)/4.0
        varAB = m*n*(N+2)*(N-2.0)/48/(N-1.0)
    if repeats:   # adjust variance estimates
        # compute sum(tj * rj**2,axis=0)
        fac = sum(symrank**2,axis=0)
        if N % 2: # N odd
            varAB = m*n*(16*N*fac-(N+1)**4)/(16.0 * N**2 * (N-1))
        else:  # N even
            varAB = m*n*(16*fac-N*(N+2)**2)/(16.0 * N * (N-1))
    z = (AB - mnAB)/sqrt(varAB)
    pval = distributions.norm.sf(abs(z)) * 2.0
    return AB, pval

开发者ID:87，项目名称:scipy，代码行数:89，代码来源:morestats.py

示例5: wilcoxon

def wilcoxon(x,y=None):
    """
    Calculate the Wilcoxon signed-rank test.

    The Wilcoxon signed-rank test tests the null hypothesis that two
    related paired samples come from the same distribution. In particular,
    it tests whether the distribution of the differences x - y is symmetric
    about zero. It is a non-parametric version of the paired T-test.

    Parameters
    ----------
    x : array_like
        The first set of measurements.
    y : array_like, optional
        The second set of measurements.  If y is not given, then the x array
        is considered to be the differences between the two sets of
        measurements.

    Returns
    -------
    T : float
        The sum of the ranks of the differences above or below zero, whichever
        is smaller.
    p-value : float
        The two-sided p-value for the test.

    Notes
    -----
    Because the normal approximation is used for the calculations, the
    samples used should be large.  A typical rule is to require that
    n > 20.

    References
    ----------
    .. [1] http://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test

    """
    if y is None:
        d = x
    else:
        x, y = map(asarray, (x, y))
        if len(x) <> len(y):
            raise ValueError('Unequal N in wilcoxon.  Aborting.')
        d = x-y
    d = compress(not_equal(d,0),d,axis=-1) # Keep all non-zero differences
    count = len(d)
    if (count < 10):
        warnings.warn("Warning: sample size too small for normal approximation.")
    r = stats.rankdata(abs(d))
    r_plus = sum((d > 0)*r,axis=0)
    r_minus = sum((d < 0)*r,axis=0)
    T = min(r_plus, r_minus)
    mn = count*(count+1.0)*0.25
    se = math.sqrt(count*(count+1)*(2*count+1.0)/24)
    if (len(r) != len(unique(r))):  # handle ties in data
        replist, repnum = find_repeats(r)
        corr = 0.0
        for i in range(len(replist)):
            si = repnum[i]
            corr += 0.5*si*(si*si-1.0)
        V = se*se - corr
        se = sqrt((count*V - T*T)/(count-1.0))
    z = (T - mn)/se
    prob = 2 * distributions.norm.sf(abs(z))
    return T, prob

开发者ID:87，项目名称:scipy，代码行数:65，代码来源:morestats.py

示例6: fligner

def fligner(*args,**kwds):
    """
    Perform Fligner's test for equal variances.

    Fligner's test tests the null hypothesis that all input samples
    are from populations with equal variances.  Fligner's test is
    non-parametric in contrast to Bartlett's test `bartlett` and
    Levene's test `levene`.

    Parameters
    ----------
    sample1, sample2, ... : array_like
        arrays of sample data.  Need not be the same length
    center : {'mean', 'median', 'trimmed'}, optional
        keyword argument controlling which function of the data
        is used in computing the test statistic.  The default
        is 'median'.
    proportiontocut : float, optional
        When `center` is 'trimmed', this gives the proportion of data points
        to cut from each end. (See `scipy.stats.trim_mean`.)
        Default is 0.05.

    Returns
    -------
    Xsq : float
        the test statistic
    p-value : float
        the p-value for the hypothesis test

    Notes
    -----
    As with Levene's test there are three variants
    of Fligner's test that differ by the measure of central
    tendency used in the test.  See `levene` for more information.

    References
    ----------
    .. [1] http://www.stat.psu.edu/~bgl/center/tr/TR993.ps

    .. [2] Fligner, M.A. and Killeen, T.J. (1976). Distribution-free two-sample
           tests for scale. 'Journal of the American Statistical Association.'
           71(353), 210-213.

    """
    # Handle keyword arguments.
    center = 'median'
    proportiontocut = 0.05
    for kw, value in kwds.items():
        if kw not in ['center', 'proportiontocut']:
            raise TypeError("fligner() got an unexpected keyword argument '%s'" % kw)
        if kw == 'center':
            center = value
        else:
            proportiontocut = value

    k = len(args)
    if k < 2:
        raise ValueError("Must enter at least two input sample vectors.")

    if not center in ['mean','median','trimmed']:
        raise ValueError("Keyword argument <center> must be 'mean', 'median'"
              + "or 'trimmed'.")

    if center == 'median':
        func = lambda x: np.median(x, axis=0)
    elif center == 'mean':
        func = lambda x: np.mean(x, axis=0)
    else: # center == 'trimmed'
        args = tuple(stats.trimboth(arg, proportiontocut) for arg in args)
        func = lambda x: np.mean(x, axis=0)

    Ni = asarray([len(args[j]) for j in range(k)])
    Yci = asarray([func(args[j]) for j in range(k)])
    Ntot = sum(Ni,axis=0)
    # compute Zij's
    Zij = [abs(asarray(args[i])-Yci[i]) for i in range(k)]
    allZij = []
    g = [0]
    for i in range(k):
        allZij.extend(list(Zij[i]))
        g.append(len(allZij))

    ranks = stats.rankdata(allZij)
    a = distributions.norm.ppf(ranks/(2*(Ntot+1.0)) + 0.5)

    # compute Aibar
    Aibar = _apply_func(a,g,sum) / Ni
    anbar = np.mean(a, axis=0)
    varsq = np.var(a,axis=0, ddof=1)
    Xsq = sum(Ni*(asarray(Aibar)-anbar)**2.0,axis=0)/varsq
    pval = distributions.chi2.sf(Xsq,k-1) # 1 - cdf
    return Xsq, pval

开发者ID:87，项目名称:scipy，代码行数:92，代码来源:morestats.py

示例7: range

m = range(4,24)
m[10] = 34
b = N.array(m)

print('\n\nF_oneway:')
print(stats.F_oneway(l,m))
print(stats.F_oneway(a,b))
# print 'F_value:',stats.F_value(l),stats.F_value(a)

print('\nSUPPORT')
print('sum:',stats.sum(l),stats.sum(lf),stats.sum(a),stats.sum(af))
print('cumsum:')
print(stats.cumsum(l))
print(stats.cumsum(lf))
print(stats.cumsum(a))
print(stats.cumsum(af))
print('ss:',stats.ss(l),stats.ss(lf),stats.ss(a),stats.ss(af))
print('summult:',stats.summult(l,m),stats.summult(lf,m),stats.summult(a,b),stats.summult(af,b))
print('sumsquared:',stats.square_of_sums(l),stats.square_of_sums(lf),stats.square_of_sums(a),stats.square_of_sums(af))
print('sumdiffsquared:',stats.sumdiffsquared(l,m),stats.sumdiffsquared(lf,m),stats.sumdiffsquared(a,b),stats.sumdiffsquared(af,b))
print('shellsort:')
print(stats.shellsort(m))
print(stats.shellsort(b))
print('rankdata:')
print(stats.rankdata(m))
print(stats.rankdata(b))
print('\nANOVAs')

execfile('testanova.py')

开发者ID:klahnakoski，项目名称:pyLibrary，代码行数:29，代码来源:statstest.py

示例8: fligner

def fligner(*args,**kwds):
    """
    Perform Fligner's test for equal variances

    Fligner's test tests the null hypothesis that all input samples
    are from populations with equal variances.  Fligner's test is
    non-parametric in contrast to Bartlett's test bartlett_ and
    Levene's test levene_.

    Parameters
    ----------
    sample1, sample2, ... : array_like
        arrays of sample data.  Need not be the same length
    center : {'mean', 'median', 'trimmed'}, optional
        keyword argument controlling which function of the data
        is used in computing the test statistic.  The default
        is 'median'.

    Returns
    -------
    Xsq : float
        the test statistic
    p-value : float
        the p-value for the hypothesis test

    Notes
    -----
    As with Levene's test there are three variants
    of Fligner's test that differ by the measure of central
    tendency used in the test.  See levene_ for more information.

    References
    ----------

    .. [1] http://www.stat.psu.edu/~bgl/center/tr/TR993.ps

    .. [2] Fligner, M.A. and Killeen, T.J. (1976). Distribution-free two-sample
           tests for scale. 'Journal of the American Statistical Association.'
           71(353), 210-213.

    """
    k = len(args)
    if k < 2:
        raise ValueError, "Must enter at least two input sample vectors."
    if 'center' in kwds.keys():
        center = kwds['center']
    else:
        center = 'median'
    if not center in ['mean','median','trimmed']:
        raise ValueError, "Keyword argument <center> must be 'mean', 'median'"\
              + "or 'trimmed'."
    if center == 'median':
        func = lambda x: np.median(x, axis=0)
    elif center == 'mean':
        func = lambda x: np.mean(x, axis=0)
    else:
        func = stats.trim_mean

    Ni = asarray([len(args[j]) for j in range(k)])
    Yci = asarray([func(args[j]) for j in range(k)])
    Ntot = sum(Ni,axis=0)
    # compute Zij's
    Zij = [abs(asarray(args[i])-Yci[i]) for i in range(k)]
    allZij = []
    g = [0]
    for i in range(k):
        allZij.extend(list(Zij[i]))
        g.append(len(allZij))

    a = distributions.norm.ppf(stats.rankdata(allZij)/(2*(Ntot+1.0)) + 0.5)

    # compute Aibar
    Aibar = _apply_func(a,g,sum) / Ni
    anbar = np.mean(a, axis=0)
    varsq = np.var(a,axis=0, ddof=1)

    Xsq = sum(Ni*(asarray(Aibar)-anbar)**2.0,axis=0)/varsq

    pval = distributions.chi2.sf(Xsq,k-1) # 1 - cdf
    return Xsq, pval

开发者ID:donaldson-lab，项目名称:Gene-Designer，代码行数:80，代码来源:morestats.py

示例9: fligner

def fligner(*args,**kwds):
    """Perform Levene test with the null hypothesis that all input samples
    have equal variances.

    Inputs are sample vectors:  bartlett(x,y,z,...)

    One keyword input, center, can be used with values
        center = 'mean', center='median' (default), center='trimmed'

    Outputs: (Xsq, pval)

         Xsq  -- the Test statistic
         pval -- significance level if null is rejected with this value of X
                 (prob. that null is true but rejected with this p-value.)

    References:

       http://www.stat.psu.edu/~bgl/center/tr/TR993.ps

       Fligner, M.A. and Killeen, T.J. (1976). Distribution-free two-sample
       tests for scale. 'Journal of the American Statistical Association.'
       71(353), 210-213.
    """
    k = len(args)
    if k < 2:
        raise ValueError, "Must enter at least two input sample vectors."
    if 'center' in kwds.keys():
        center = kwds['center']
    else:
        center = 'median'
    if not center in ['mean','median','trimmed']:
        raise ValueError, "Keyword argument <center> must be 'mean', 'median'"\
              + "or 'trimmed'."
    if center == 'median':
        func = stats.median
    elif center == 'mean':
        func = stats.mean
    else:
        func = stats.trim_mean

    Ni = asarray([len(args[j]) for j in range(k)])
    Yci = asarray([func(args[j]) for j in range(k)])
    Ntot = sum(Ni,axis=0)
    # compute Zij's
    Zij = [abs(asarray(args[i])-Yci[i]) for i in range(k)]
    allZij = []
    g = [0]
    for i in range(k):
        allZij.extend(list(Zij[i]))
        g.append(len(allZij))

    a = distributions.norm.ppf(stats.rankdata(allZij)/(2*(Ntot+1.0)) + 0.5)

    # compute Aibar
    Aibar = _apply_func(a,g,sum) / Ni
    anbar = stats.mean(a)
    varsq = stats.var(a)

    Xsq = sum(Ni*(asarray(Aibar)-anbar)**2.0,axis=0)/varsq

    pval = distributions.chi2.sf(Xsq,k-1) # 1 - cdf
    return Xsq, pval

开发者ID:mbentz80，项目名称:jzigbeercp，代码行数:62，代码来源:morestats.py

示例10: ansari

def ansari(x,y):
    """Determine if the scale parameter for two distributions with equal
    medians is the same using the Ansari-Bradley statistic.

    Specifically, compute the AB statistic and the probability of error
    that the null hypothesis is true but rejected with the computed
    statistic as the critical value.

    One can reject the null hypothesis that the ratio of variances is 1 if
    returned probability of error is small (say < 0.05)
    """
    x,y = asarray(x),asarray(y)
    n = len(x)
    m = len(y)
    if (m < 1):
        raise ValueError, "Not enough other observations."
    if (n < 1):
        raise ValueError, "Not enough test observations."
    N = m+n
    xy = r_[x,y]  # combine
    rank = stats.rankdata(xy)
    symrank = amin(array((rank,N-rank+1)),0)
    AB = sum(symrank[:n],axis=0)
    uxy = unique(xy)
    repeats = (len(uxy) != len(xy))
    exact = ((m<55) and (n<55) and not repeats)
    if repeats and ((m < 55)  or (n < 55)):
        print "Ties preclude use of exact statistic."
    if exact:
        astart, a1, ifault = statlib.gscale(n,m)
        ind = AB-astart
        total = sum(a1,axis=0)
        if ind < len(a1)/2.0:
            cind = int(ceil(ind))
            if (ind == cind):
                pval = 2.0*sum(a1[:cind+1],axis=0)/total
            else:
                pval = 2.0*sum(a1[:cind],axis=0)/total
        else:
            find = int(floor(ind))
            if (ind == floor(ind)):
                pval = 2.0*sum(a1[find:],axis=0)/total
            else:
                pval = 2.0*sum(a1[find+1:],axis=0)/total
        return AB, min(1.0,pval)

    # otherwise compute normal approximation
    if N % 2:  # N odd
        mnAB = n*(N+1.0)**2 / 4.0 / N
        varAB = n*m*(N+1.0)*(3+N**2)/(48.0*N**2)
    else:
        mnAB = n*(N+2.0)/4.0
        varAB = m*n*(N+2)*(N-2.0)/48/(N-1.0)
    if repeats:   # adjust variance estimates
        # compute sum(tj * rj**2,axis=0)
        fac = sum(symrank**2,axis=0)
        if N % 2: # N odd
            varAB = m*n*(16*N*fac-(N+1)**4)/(16.0 * N**2 * (N-1))
        else:  # N even
            varAB = m*n*(16*fac-N*(N+2)**2)/(16.0 * N * (N-1))
    z = (AB - mnAB)/sqrt(varAB)
    pval = (1-distributions.norm.cdf(abs(z)))*2.0
    return AB, pval

开发者ID:mbentz80，项目名称:jzigbeercp，代码行数:63，代码来源:morestats.py

示例11: range

m = range(4,24)
m[10] = 34 
b = N.array(m)

print '\n\nF_oneway:'
print stats.F_oneway(l,m) 
print stats.F_oneway(a,b)
#print 'F_value:',stats.F_value(l),stats.F_value(a)

print '\nSUPPORT'
print 'sum:',stats.sum(l),stats.sum(lf),stats.sum(a),stats.sum(af)
print 'cumsum:'
print stats.cumsum(l)
print stats.cumsum(lf)
print stats.cumsum(a)
print stats.cumsum(af)
print 'ss:',stats.ss(l),stats.ss(lf),stats.ss(a),stats.ss(af)
print 'summult:',stats.summult(l,m),stats.summult(lf,m),stats.summult(a,b),stats.summult(af,b)
print 'sumsquared:',stats.square_of_sums(l),stats.square_of_sums(lf),stats.square_of_sums(a),stats.square_of_sums(af)
print 'sumdiffsquared:',stats.sumdiffsquared(l,m),stats.sumdiffsquared(lf,m),stats.sumdiffsquared(a,b),stats.sumdiffsquared(af,b)
print 'shellsort:'
print stats.shellsort(m)
print stats.shellsort(b)
print 'rankdata:'
print stats.rankdata(m)
print stats.rankdata(b)

print '\nANOVAs'
execfile('testanova.py')

开发者ID:Delostik，项目名称:common，代码行数:29，代码来源:statstest.py

注：本文中的stats.rankdata函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。