当前位置: 首页>>代码示例>>Python>>正文


Python hypergeom.sf方法代码示例

本文整理汇总了Python中scipy.stats.hypergeom.sf方法的典型用法代码示例。如果您正苦于以下问题:Python hypergeom.sf方法的具体用法?Python hypergeom.sf怎么用?Python hypergeom.sf使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scipy.stats.hypergeom的用法示例。


在下文中一共展示了hypergeom.sf方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: hypergeometric_test

# 需要导入模块: from scipy.stats import hypergeom [as 别名]
# 或者: from scipy.stats.hypergeom import sf [as 别名]
def hypergeometric_test(x, M, n, N):
    """
    The hypergeometric distribution models drawing objects from a bin.
    - M is total number of objects
    - n is total number of Type I objects. 
    - x (random variate) represents the number of Type I objects in N drawn without replacement from the total population

    - http://en.wikipedia.org/wiki/Hypergeometric_distribution
    - https://www.biostars.org/p/66729/
    - http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.hypergeom.html
    - http://docs.scipy.org/doc/numpy/reference/generated/numpy.random.hypergeometric.html
    - http://stackoverflow.com/questions/6594840/what-are-equivalents-to-rs-phyper-function-in-python
    """

    assert n <= M
    assert x <= n
    assert N <= M
    pv_le = hypergeom.cdf(x+1, M, n, N)
    pv_gt = hypergeom.sf(x-1, M, n, N)# 1-cdf sometimes more accurate
    return pv_le, pv_gt 
开发者ID:gis-rpd,项目名称:pipelines,代码行数:22,代码来源:essential_genes_from_tables.py

示例2: binomial_p

# 需要导入模块: from scipy.stats import hypergeom [as 别名]
# 或者: from scipy.stats.hypergeom import sf [as 别名]
def binomial_p(x, n, p, alternative='greater'):
    """
    Parameters
    ----------
    x : array-like
       list of elements consisting of x in {0, 1} where 0 represents a failure and
       1 represents a seccuess
    p : int
       hypothesized number of successes in n trials
    n : int
       number of trials 
    alternative : {'greater', 'less', 'two-sided'}
       alternative hypothesis to test (default: 'greater')
    Returns
    -------
    float
       estimated p-value 
    """

    assert alternative in ("two-sided", "less", "greater")
    if n < x:
        raise ValueError("Cannot observe more successes than the population size")

    plower = binom.cdf(x, n, p)
    pupper = binom.sf(x-1, n, p)
    if alternative == 'two-sided':
        pvalue = 2*np.min([plower, pupper, 0.5])
    elif alternative == 'greater':
        pvalue = pupper
    elif alternative == 'less':
        pvalue = plower
    return pvalue 
开发者ID:statlab,项目名称:permute,代码行数:34,代码来源:utils.py

示例3: compute_pvalues

# 需要导入模块: from scipy.stats import hypergeom [as 别名]
# 或者: from scipy.stats.hypergeom import sf [as 别名]
def compute_pvalues(self, **kwargs):

        if 'how' in kwargs:
            self.enrichment_type = kwargs['how']

        if 'neighborhood_score_type' in kwargs:
            self.neighborhood_score_type = kwargs['neighborhood_score_type']

        if 'multiple_testing' in kwargs:
            self.multiple_testing = kwargs['multiple_testing']

        # Make sure that the settings are still valid
        self.validate_config()

        if self.background == 'network':
            print('Setting all null attribute values to 0. Using the network as background for enrichment.')
            self.node2attribute[np.isnan(self.node2attribute)] = 0

        num_vals = self.node2attribute.shape[0]
        num_nans = np.sum(np.isnan(self.node2attribute), axis=0)

        if any(num_nans/num_vals > 0.5):
            print('WARNING: more than 50% of nodes in the network as set to NaN and will be ignored for calculating enrichment.')
            print('Consider setting sf.background = ''network''.')

        # Warn users if more than 50% of values are NaN
        num_other_values = np.sum(~np.isnan(self.node2attribute) & ~np.isin(self.node2attribute, [0, 1]))

        if (self.enrichment_type == 'hypergeometric') or ((self.enrichment_type == 'auto') and (num_other_values == 0)):
            self.compute_pvalues_by_hypergeom(**kwargs)
        else:
            self.compute_pvalues_by_randomization(**kwargs)

        idx = ~np.isnan(self.nes)
        self.nes_binary = np.zeros(self.nes.shape)
        self.nes_binary[idx] = np.abs(self.nes[idx]) > -np.log10(self.enrichment_threshold)

        self.attributes['num_neighborhoods_enriched'] = np.sum(self.nes_binary, axis=0) 
开发者ID:baryshnikova-lab,项目名称:safepy,代码行数:40,代码来源:safe.py

示例4: run_safe_batch

# 需要导入模块: from scipy.stats import hypergeom [as 别名]
# 或者: from scipy.stats.hypergeom import sf [as 别名]
def run_safe_batch(attribute_file):

    sf = SAFE()
    sf.load_network()
    sf.define_neighborhoods()

    sf.load_attributes(attribute_file=attribute_file)
    sf.compute_pvalues(num_permutations=1000)

    return sf.nes 
开发者ID:baryshnikova-lab,项目名称:safepy,代码行数:12,代码来源:safe.py

示例5: score_hypergeometric_test

# 需要导入模块: from scipy.stats import hypergeom [as 别名]
# 或者: from scipy.stats.hypergeom import sf [as 别名]
def score_hypergeometric_test(a, b, threshold=1, **kwargs):
    """
    Run a hypergeometric test. The probability in a two-sided test is approximated
    with the symmetric distribution with more extreme of the tails.
    """
    # type: (np.ndarray, np.ndarray, float) -> np.ndarray

    # Binary expression matrices
    _a = (a >= threshold).astype(int)
    _b = (b >= threshold).astype(int)
    alt = kwargs.get("alternative", ALT_TWO)
    assert alt in ALTERNATIVES

    # Test Parameters
    m = len(_a) + len(_b)
    n = len(_a)
    n_expr = _a.sum(axis=0) + _b.sum(axis=0)  # Number of cells expressing genes (overall)
    n_expr_clust = _a.sum(axis=0)  # Number of cells expressing genes (in cluster)

    # Test results --- both tails
    # Note: cumulatives do sum to >1 due to overlap at 1 point
    under = np.fromiter(map(lambda t: hypergeom.cdf(k=t[1], n=t[0], M=m, N=n), zip(n_expr, n_expr_clust)), dtype=float)
    over = np.fromiter(
        map(lambda t: hypergeom.sf(k=t[1] - 1, n=t[0], M=m, N=n), zip(n_expr, n_expr_clust)), dtype=float
    )
    signs = np.sign(under - over)
    if alt == ALT_TWO:
        pvalues = np.minimum(1.0, 2.0 * np.minimum(under, over))
    elif alt == ALT_LESS:
        pvalues = under
    else:
        pvalues = over
    scores = -np.log(pvalues) * signs
    return scores, pvalues 
开发者ID:biolab,项目名称:orange3-bioinformatics,代码行数:36,代码来源:statistics.py

示例6: hypergeometric

# 需要导入模块: from scipy.stats import hypergeom [as 别名]
# 或者: from scipy.stats.hypergeom import sf [as 别名]
def hypergeometric(x, N, n, G, alternative='greater'):
    
    """
    Parameters
    ----------
    x : int
        number of `good` elements observed in the sample
    N : int
        population size
    n : int
       sample size
    G : int
       hypothesized number of good elements in population
    alternative : {'greater', 'less', 'two-sided'}
       alternative hypothesis to test (default: 'greater')
    Returns
    -------
    float
       estimated p-value
    """
    if n < x:
        raise ValueError("Cannot observe more good elements than the sample size")
    if N < n:
        raise ValueError("Population size cannot be smaller than sample")
    if N < G:
        raise ValueError("Number of good elements can't exceed the population size")
    if G < x:
        raise ValueError("Number of observed good elements can't exceed the number in the population")

    assert alternative in ("two-sided", "less", "greater")
    if n < x:
        raise ValueError("Cannot observe more successes than the population size")

    plower = hypergeom.cdf(x, N, G, n)
    pupper = hypergeom.sf(x-1, N, G, n)
    if alternative == 'two-sided':
        pvalue = 2*np.min([plower, pupper, 0.5])
    elif alternative == 'greater':
        pvalue = pupper
    elif alternative == 'less':
        pvalue = plower
    return pvalue 
开发者ID:statlab,项目名称:permute,代码行数:44,代码来源:utils.py

示例7: compute_pvalues_by_hypergeom

# 需要导入模块: from scipy.stats import hypergeom [as 别名]
# 或者: from scipy.stats.hypergeom import sf [as 别名]
def compute_pvalues_by_hypergeom(self, **kwargs):

        if kwargs:
            if 'verbose' in kwargs:
                self.verbose = kwargs['verbose']

            if self.verbose:
                print('Overwriting global settings:')
                for k in kwargs:
                    print('\t%s=%s' % (k, str(kwargs[k])))

        # Make sure that the settings are still valid
        self.validate_config()

        if self.verbose:
            print('Using the hypergeometric test to calculate enrichment...')

        # Nodes with not-NaN values in >= 1 attribute
        nodes_not_nan = np.any(~np.isnan(self.node2attribute), axis=1)

        # -- Number of nodes
        # n = self.graph.number_of_nodes()    # total
        n = np.sum(nodes_not_nan)    # with not-NaN values in >=1 attribute

        N = np.zeros([self.graph.number_of_nodes(), len(self.attributes)]) + n

        # -- Number of nodes annotated to each attribute
        N_in_group = np.tile(np.nansum(self.node2attribute, axis=0), (self.graph.number_of_nodes(), 1))

        # -- Number of nodes in each neighborhood
        # neighborhood_size = np.sum(self.neighborhoods, axis=0)[:, np.newaxis]    # total
        neighborhood_size = np.dot(self.neighborhoods,
                                   nodes_not_nan.astype(int))[:, np.newaxis] # with not-NaN values in >=1 attribute

        N_in_neighborhood = np.tile(neighborhood_size, (1, len(self.attributes)))

        # -- Number of nodes in each neighborhood and  annotated to each attribute
        N_in_neighborhood_in_group = np.dot(self.neighborhoods,
                                            np.where(~np.isnan(self.node2attribute), self.node2attribute, 0))

        self.pvalues_pos = hypergeom.sf(N_in_neighborhood_in_group - 1, N, N_in_group, N_in_neighborhood)

        # Correct for multiple testing
        if self.multiple_testing:

            if self.verbose:
                print('Running FDR-adjustment of p-values...')

            out = np.apply_along_axis(fdrcorrection, 1, self.pvalues_pos)
            self.pvalues_pos = out[:, 1, :]

        # Log-transform into neighborhood enrichment scores (NES)
        self.nes = -np.log10(self.pvalues_pos) 
开发者ID:baryshnikova-lab,项目名称:safepy,代码行数:55,代码来源:safe.py

示例8: max_pairwise

# 需要导入模块: from scipy.stats import hypergeom [as 别名]
# 或者: from scipy.stats.hypergeom import sf [as 别名]
def max_pairwise(gene_scores, ntop=200, second_greatest=False):
    """ Get the maximum pairwise overlap of top genes

    Parameters
    ----------
    gene_scores : ndarray
        (ngenes, nfactors) array of gene scores
    ntop : int (optional, default 200)
        Number of top genes to consider in each factor
    second_greatest : bool, optional
        Return the second greatest pairwise overlap of top genes

    Returns
    -------
    max_pairwise : int
        The maximum pairwise overlap of the `ntop` highest scoring genes in
        each factors
    p : float
        Hypergeometric p value of max_pairwise, where the number of genes is
        the population size, `ntop` is the number of potential successes and
        the number of draws, and max_pairwise is the number of successes.
    """
    tops = np.argsort(gene_scores, axis=0)[-ntop:]
    max_pairwise, last_max = 0, 0
    for i in range(tops.shape[1]):
        for j in range(tops.shape[1]):
            if i >= j:
                continue
            overlap = len(np.intersect1d(tops[:,i], tops[:,j]))
            if overlap > max_pairwise:
                last_max = max_pairwise
                max_pairwise = overlap
            elif overlap > last_max:
                last_max = overlap

    overlap = last_max if second_greatest else max_pairwise
    p = hypergeom.pmf(k=overlap, M=gene_scores.shape[0],
                N=ntop, n=ntop) \
        + hypergeom.sf(k=overlap, M=gene_scores.shape[0],
                N=ntop, n=ntop)
    Overlap = namedtuple('Overlap', ['overlap', 'p'])
    return Overlap(overlap, p) 
开发者ID:simslab,项目名称:scHPF,代码行数:44,代码来源:util.py


注:本文中的scipy.stats.hypergeom.sf方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。