本文整理汇总了Python中scipy.stats.hypergeom.sf方法的典型用法代码示例。如果您正苦于以下问题:Python hypergeom.sf方法的具体用法?Python hypergeom.sf怎么用?Python hypergeom.sf使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scipy.stats.hypergeom
的用法示例。
在下文中一共展示了hypergeom.sf方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: hypergeometric_test
# 需要导入模块: from scipy.stats import hypergeom [as 别名]
# 或者: from scipy.stats.hypergeom import sf [as 别名]
def hypergeometric_test(x, M, n, N):
"""
The hypergeometric distribution models drawing objects from a bin.
- M is total number of objects
- n is total number of Type I objects.
- x (random variate) represents the number of Type I objects in N drawn without replacement from the total population
- http://en.wikipedia.org/wiki/Hypergeometric_distribution
- https://www.biostars.org/p/66729/
- http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.hypergeom.html
- http://docs.scipy.org/doc/numpy/reference/generated/numpy.random.hypergeometric.html
- http://stackoverflow.com/questions/6594840/what-are-equivalents-to-rs-phyper-function-in-python
"""
assert n <= M
assert x <= n
assert N <= M
pv_le = hypergeom.cdf(x+1, M, n, N)
pv_gt = hypergeom.sf(x-1, M, n, N)# 1-cdf sometimes more accurate
return pv_le, pv_gt
示例2: binomial_p
# 需要导入模块: from scipy.stats import hypergeom [as 别名]
# 或者: from scipy.stats.hypergeom import sf [as 别名]
def binomial_p(x, n, p, alternative='greater'):
"""
Parameters
----------
x : array-like
list of elements consisting of x in {0, 1} where 0 represents a failure and
1 represents a seccuess
p : int
hypothesized number of successes in n trials
n : int
number of trials
alternative : {'greater', 'less', 'two-sided'}
alternative hypothesis to test (default: 'greater')
Returns
-------
float
estimated p-value
"""
assert alternative in ("two-sided", "less", "greater")
if n < x:
raise ValueError("Cannot observe more successes than the population size")
plower = binom.cdf(x, n, p)
pupper = binom.sf(x-1, n, p)
if alternative == 'two-sided':
pvalue = 2*np.min([plower, pupper, 0.5])
elif alternative == 'greater':
pvalue = pupper
elif alternative == 'less':
pvalue = plower
return pvalue
示例3: compute_pvalues
# 需要导入模块: from scipy.stats import hypergeom [as 别名]
# 或者: from scipy.stats.hypergeom import sf [as 别名]
def compute_pvalues(self, **kwargs):
if 'how' in kwargs:
self.enrichment_type = kwargs['how']
if 'neighborhood_score_type' in kwargs:
self.neighborhood_score_type = kwargs['neighborhood_score_type']
if 'multiple_testing' in kwargs:
self.multiple_testing = kwargs['multiple_testing']
# Make sure that the settings are still valid
self.validate_config()
if self.background == 'network':
print('Setting all null attribute values to 0. Using the network as background for enrichment.')
self.node2attribute[np.isnan(self.node2attribute)] = 0
num_vals = self.node2attribute.shape[0]
num_nans = np.sum(np.isnan(self.node2attribute), axis=0)
if any(num_nans/num_vals > 0.5):
print('WARNING: more than 50% of nodes in the network as set to NaN and will be ignored for calculating enrichment.')
print('Consider setting sf.background = ''network''.')
# Warn users if more than 50% of values are NaN
num_other_values = np.sum(~np.isnan(self.node2attribute) & ~np.isin(self.node2attribute, [0, 1]))
if (self.enrichment_type == 'hypergeometric') or ((self.enrichment_type == 'auto') and (num_other_values == 0)):
self.compute_pvalues_by_hypergeom(**kwargs)
else:
self.compute_pvalues_by_randomization(**kwargs)
idx = ~np.isnan(self.nes)
self.nes_binary = np.zeros(self.nes.shape)
self.nes_binary[idx] = np.abs(self.nes[idx]) > -np.log10(self.enrichment_threshold)
self.attributes['num_neighborhoods_enriched'] = np.sum(self.nes_binary, axis=0)
示例4: run_safe_batch
# 需要导入模块: from scipy.stats import hypergeom [as 别名]
# 或者: from scipy.stats.hypergeom import sf [as 别名]
def run_safe_batch(attribute_file):
sf = SAFE()
sf.load_network()
sf.define_neighborhoods()
sf.load_attributes(attribute_file=attribute_file)
sf.compute_pvalues(num_permutations=1000)
return sf.nes
示例5: score_hypergeometric_test
# 需要导入模块: from scipy.stats import hypergeom [as 别名]
# 或者: from scipy.stats.hypergeom import sf [as 别名]
def score_hypergeometric_test(a, b, threshold=1, **kwargs):
"""
Run a hypergeometric test. The probability in a two-sided test is approximated
with the symmetric distribution with more extreme of the tails.
"""
# type: (np.ndarray, np.ndarray, float) -> np.ndarray
# Binary expression matrices
_a = (a >= threshold).astype(int)
_b = (b >= threshold).astype(int)
alt = kwargs.get("alternative", ALT_TWO)
assert alt in ALTERNATIVES
# Test Parameters
m = len(_a) + len(_b)
n = len(_a)
n_expr = _a.sum(axis=0) + _b.sum(axis=0) # Number of cells expressing genes (overall)
n_expr_clust = _a.sum(axis=0) # Number of cells expressing genes (in cluster)
# Test results --- both tails
# Note: cumulatives do sum to >1 due to overlap at 1 point
under = np.fromiter(map(lambda t: hypergeom.cdf(k=t[1], n=t[0], M=m, N=n), zip(n_expr, n_expr_clust)), dtype=float)
over = np.fromiter(
map(lambda t: hypergeom.sf(k=t[1] - 1, n=t[0], M=m, N=n), zip(n_expr, n_expr_clust)), dtype=float
)
signs = np.sign(under - over)
if alt == ALT_TWO:
pvalues = np.minimum(1.0, 2.0 * np.minimum(under, over))
elif alt == ALT_LESS:
pvalues = under
else:
pvalues = over
scores = -np.log(pvalues) * signs
return scores, pvalues
示例6: hypergeometric
# 需要导入模块: from scipy.stats import hypergeom [as 别名]
# 或者: from scipy.stats.hypergeom import sf [as 别名]
def hypergeometric(x, N, n, G, alternative='greater'):
"""
Parameters
----------
x : int
number of `good` elements observed in the sample
N : int
population size
n : int
sample size
G : int
hypothesized number of good elements in population
alternative : {'greater', 'less', 'two-sided'}
alternative hypothesis to test (default: 'greater')
Returns
-------
float
estimated p-value
"""
if n < x:
raise ValueError("Cannot observe more good elements than the sample size")
if N < n:
raise ValueError("Population size cannot be smaller than sample")
if N < G:
raise ValueError("Number of good elements can't exceed the population size")
if G < x:
raise ValueError("Number of observed good elements can't exceed the number in the population")
assert alternative in ("two-sided", "less", "greater")
if n < x:
raise ValueError("Cannot observe more successes than the population size")
plower = hypergeom.cdf(x, N, G, n)
pupper = hypergeom.sf(x-1, N, G, n)
if alternative == 'two-sided':
pvalue = 2*np.min([plower, pupper, 0.5])
elif alternative == 'greater':
pvalue = pupper
elif alternative == 'less':
pvalue = plower
return pvalue
示例7: compute_pvalues_by_hypergeom
# 需要导入模块: from scipy.stats import hypergeom [as 别名]
# 或者: from scipy.stats.hypergeom import sf [as 别名]
def compute_pvalues_by_hypergeom(self, **kwargs):
if kwargs:
if 'verbose' in kwargs:
self.verbose = kwargs['verbose']
if self.verbose:
print('Overwriting global settings:')
for k in kwargs:
print('\t%s=%s' % (k, str(kwargs[k])))
# Make sure that the settings are still valid
self.validate_config()
if self.verbose:
print('Using the hypergeometric test to calculate enrichment...')
# Nodes with not-NaN values in >= 1 attribute
nodes_not_nan = np.any(~np.isnan(self.node2attribute), axis=1)
# -- Number of nodes
# n = self.graph.number_of_nodes() # total
n = np.sum(nodes_not_nan) # with not-NaN values in >=1 attribute
N = np.zeros([self.graph.number_of_nodes(), len(self.attributes)]) + n
# -- Number of nodes annotated to each attribute
N_in_group = np.tile(np.nansum(self.node2attribute, axis=0), (self.graph.number_of_nodes(), 1))
# -- Number of nodes in each neighborhood
# neighborhood_size = np.sum(self.neighborhoods, axis=0)[:, np.newaxis] # total
neighborhood_size = np.dot(self.neighborhoods,
nodes_not_nan.astype(int))[:, np.newaxis] # with not-NaN values in >=1 attribute
N_in_neighborhood = np.tile(neighborhood_size, (1, len(self.attributes)))
# -- Number of nodes in each neighborhood and annotated to each attribute
N_in_neighborhood_in_group = np.dot(self.neighborhoods,
np.where(~np.isnan(self.node2attribute), self.node2attribute, 0))
self.pvalues_pos = hypergeom.sf(N_in_neighborhood_in_group - 1, N, N_in_group, N_in_neighborhood)
# Correct for multiple testing
if self.multiple_testing:
if self.verbose:
print('Running FDR-adjustment of p-values...')
out = np.apply_along_axis(fdrcorrection, 1, self.pvalues_pos)
self.pvalues_pos = out[:, 1, :]
# Log-transform into neighborhood enrichment scores (NES)
self.nes = -np.log10(self.pvalues_pos)
示例8: max_pairwise
# 需要导入模块: from scipy.stats import hypergeom [as 别名]
# 或者: from scipy.stats.hypergeom import sf [as 别名]
def max_pairwise(gene_scores, ntop=200, second_greatest=False):
""" Get the maximum pairwise overlap of top genes
Parameters
----------
gene_scores : ndarray
(ngenes, nfactors) array of gene scores
ntop : int (optional, default 200)
Number of top genes to consider in each factor
second_greatest : bool, optional
Return the second greatest pairwise overlap of top genes
Returns
-------
max_pairwise : int
The maximum pairwise overlap of the `ntop` highest scoring genes in
each factors
p : float
Hypergeometric p value of max_pairwise, where the number of genes is
the population size, `ntop` is the number of potential successes and
the number of draws, and max_pairwise is the number of successes.
"""
tops = np.argsort(gene_scores, axis=0)[-ntop:]
max_pairwise, last_max = 0, 0
for i in range(tops.shape[1]):
for j in range(tops.shape[1]):
if i >= j:
continue
overlap = len(np.intersect1d(tops[:,i], tops[:,j]))
if overlap > max_pairwise:
last_max = max_pairwise
max_pairwise = overlap
elif overlap > last_max:
last_max = overlap
overlap = last_max if second_greatest else max_pairwise
p = hypergeom.pmf(k=overlap, M=gene_scores.shape[0],
N=ntop, n=ntop) \
+ hypergeom.sf(k=overlap, M=gene_scores.shape[0],
N=ntop, n=ntop)
Overlap = namedtuple('Overlap', ['overlap', 'p'])
return Overlap(overlap, p)