本文整理汇总了Python中statsmodels.sandbox.stats.multicomp.multipletests方法的典型用法代码示例。如果您正苦于以下问题:Python multicomp.multipletests方法的具体用法?Python multicomp.multipletests怎么用?Python multicomp.multipletests使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类statsmodels.sandbox.stats.multicomp
的用法示例。
在下文中一共展示了multicomp.multipletests方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: pval_multiple_test_corrector
# 需要导入模块: from statsmodels.sandbox.stats import multicomp [as 别名]
# 或者: from statsmodels.sandbox.stats.multicomp import multipletests [as 别名]
def pval_multiple_test_corrector(pval_dict, alpha):
pval_lst, raw_pvals = ([] for _ in range(2))
for event in pval_dict:
pval_lst.append((event, pval_dict[event]))
raw_pvals.append(pval_dict[event])
_, pvals_corrected, _, _ = multipletests(raw_pvals, method='fdr_bh', alpha=alpha)
unflat_corrected_pval_dict = defaultdict(list)
for i, j in zip(pval_lst, pvals_corrected):
unflat_corrected_pval_dict[i[0]].append(j)
corrected_pval_dict = {k: sum(v) for k, v in unflat_corrected_pval_dict.items()}
return corrected_pval_dict
示例2: pval_multiple_test_corrector_by_gene
# 需要导入模块: from statsmodels.sandbox.stats import multicomp [as 别名]
# 或者: from statsmodels.sandbox.stats.multicomp import multipletests [as 别名]
def pval_multiple_test_corrector_by_gene(pvals_dict, alpha):
evid_pvals_dict, corrected_pval_dict = (defaultdict(list) for _ in range(2))
for ev_id in pvals_dict.keys():
gene = ev_id.split(";")[0]
evid_pvals_dict[gene].append((ev_id, pvals_dict[ev_id]))
for gene in evid_pvals_dict:
events, raw_pvals = zip(*evid_pvals_dict[gene])
_, pvals_corrected, _, _ = multipletests(raw_pvals, method='fdr_bh', alpha=alpha)
evid_corrected_pvals_list = list(zip(events, pvals_corrected))
for evid_pval in evid_corrected_pvals_list:
corrected_pval_dict[evid_pval[0]] = evid_pval[1]
return corrected_pval_dict
示例3: anova
# 需要导入模块: from statsmodels.sandbox.stats import multicomp [as 别名]
# 或者: from statsmodels.sandbox.stats.multicomp import multipletests [as 别名]
def anova(self, min_mean_expr=None):
"""
carry out non-parametric ANOVA across the groups of self.
:param min_mean_expr: minimum average gene expression value that must be reached
in at least one cluster for the gene to be considered
:return:
"""
if self._anova is not None:
return self._anova
# run anova
f = lambda v: kruskalwallis(*np.split(v, self.split_indices))[1]
pvals = np.apply_along_axis(f, 0, self.data) # todo could shunt to a multiprocessing pool
# correct the pvals
_, pval_corrected, _, _ = multipletests(pvals, self.alpha, method='fdr_tsbh')
# store data & return
if self.index is not None:
self._anova = pd.Series(pval_corrected, index=self.index)
else:
self._anova = pval_corrected
return self._anova
示例4: multiple_test_correction
# 需要导入模块: from statsmodels.sandbox.stats import multicomp [as 别名]
# 或者: from statsmodels.sandbox.stats.multicomp import multipletests [as 别名]
def multiple_test_correction(event_lst, uncorrected_pvals, alpha):
_, corrected_pvals, _, _ = multipletests(uncorrected_pvals, alpha=alpha, method='fdr_bh', returnsorted=False)
corrected_pvals_dict = {k: v for k, v in zip(event_lst, corrected_pvals)}
return corrected_pvals_dict
示例5: p_adj_bh
# 需要导入模块: from statsmodels.sandbox.stats import multicomp [as 别名]
# 或者: from statsmodels.sandbox.stats.multicomp import multipletests [as 别名]
def p_adj_bh(x):
'''Adjust p values using Benjamini/Hochberg method'''
return multipletests(x, method='fdr_bh', returnsorted = False)[1]
示例6: compute_poisson
# 需要导入模块: from statsmodels.sandbox.stats import multicomp [as 别名]
# 或者: from statsmodels.sandbox.stats.multicomp import multipletests [as 别名]
def compute_poisson(df, args):
nb_bins = int(args.effective_genome_fraction/int(args.window_size))
bad_bins = []
for fname in df:
s = df[fname]
unique_alignments = s.sum()
average = int(unique_alignments)/nb_bins
value_counts = s.drop_duplicates().values
poisson_scores = pd.Series(poisson.sf(value_counts, mu=average))
poisson_scores = pd.concat([pd.Series(value_counts).to_frame(), poisson_scores], axis=1)
poisson_scores.columns = ["Value", "Score"]
poisson_scores = poisson_scores.set_index("Value")
poisson_scores = pd.Series(index=poisson_scores.index, data=poisson_scores.Score)
poisson_p_vals = s.replace(poisson_scores.to_dict())
bonferroni = multipletests(poisson_p_vals, method="bonferroni")[1]
bonferroni = pd.Series(bonferroni, index=s.index, name="bonferroni")
bonferroni_df = pd.concat([s, bonferroni], axis=1)
print(bonferroni_df.head(10).to_csv(sep=" "))
r = bonferroni_df[bonferroni_df.bonferroni < args.bonferroni]
logging.info(str(len(r)) + " blacklist-bins found in file " + fname + " out of a total of " + str(len(bonferroni_df)) + " bins (" + str(len(r)/len(bonferroni_df)) + "%)")
bad_bins.append(r)
outdf = pd.concat(bad_bins, axis=1).reset_index()
outdf.insert(1, "End", outdf.Bin + args.window_size - 1)
return outdf["Chromosome Bin End".split()]
示例7: bhCorrection
# 需要导入模块: from statsmodels.sandbox.stats import multicomp [as 别名]
# 或者: from statsmodels.sandbox.stats.multicomp import multipletests [as 别名]
def bhCorrection(s):
"""
Benjamini-Hochberg correction for a Series of p-values.
"""
s = s.fillna(1.)
q = multicomp.multipletests(s, method='fdr_bh')[1][:len(s)]
q = pd.Series(q[:len(s)], s.index, name='p_adj')
return q
示例8: correct_fdr_bh
# 需要导入模块: from statsmodels.sandbox.stats import multicomp [as 别名]
# 或者: from statsmodels.sandbox.stats.multicomp import multipletests [as 别名]
def correct_fdr_bh(self, result, alpha=0.05):
"""
Perform FDR correction using the Benjamini-Hochberg method.
Only call this method from within a Corrector.
Parameters
----------
result : :obj:`nimare.results.MetaResult`
Result object from a KDA meta-analysis.
alpha : :obj:`float`, optional
Alpha. Default is 0.05.
Returns
-------
images : :obj:`dict`
Dictionary of 1D arrays corresponding to masked images generated by
the correction procedure. The following arrays are generated by
this method: 'consistency_z_FDR' and 'specificity_z_FDR'.
See Also
--------
nimare.correct.FDRCorrector : The Corrector from which to call this method.
Examples
--------
>>> meta = MKDAChi2()
>>> result = meta.fit(dset)
>>> corrector = FDRCorrector(method='bh', alpha=0.05)
>>> cresult = corrector.transform(result)
"""
pAgF_p_vals = result.get_map('p_desc-consistency', return_type='array')
pFgA_p_vals = result.get_map('p_desc-specificity', return_type='array')
pAgF_z_vals = result.get_map('z_desc-consistency', return_type='array')
pFgA_z_vals = result.get_map('z_desc-specificity', return_type='array')
pAgF_sign = np.sign(pAgF_z_vals)
pFgA_sign = np.sign(pFgA_z_vals)
_, pAgF_p_FDR, _, _ = multipletests(pAgF_p_vals, alpha=alpha,
method='fdr_bh',
is_sorted=False,
returnsorted=False)
pAgF_z_FDR = p_to_z(pAgF_p_FDR, tail='two') * pAgF_sign
_, pFgA_p_FDR, _, _ = multipletests(pFgA_p_vals, alpha=alpha,
method='fdr_bh',
is_sorted=False,
returnsorted=False)
pFgA_z_FDR = p_to_z(pFgA_p_FDR, tail='two') * pFgA_sign
images = {
'z_desc-consistency_level-voxel': pAgF_z_FDR,
'z_desc-specificity_level-voxel': pFgA_z_FDR,
}
return images
示例9: post_hoc_tests
# 需要导入模块: from statsmodels.sandbox.stats import multicomp [as 别名]
# 或者: from statsmodels.sandbox.stats.multicomp import multipletests [as 别名]
def post_hoc_tests(self):
"""
carries out post-hoc tests between genes with significant ANOVA results using
Welch's U-test on ranked data.
"""
if self._anova is None:
self.anova()
anova_significant = np.array(self._anova) < 1 # call array in case it is a Series
# limit to significant data, convert to column-wise ranks.
data = self.data[:, anova_significant]
rank_data = np.apply_along_axis(rankdata, 0, data)
# assignments = self.group_assignments[anova_significant]
split_indices = np.where(np.diff(self.group_assignments))[0] + 1
array_views = np.array_split(rank_data, split_indices, axis=0)
# get mean and standard deviations of each
fmean = partial(np.mean, axis=0)
fvar = partial(np.var, axis=0)
mu = np.vstack(list(map(fmean, array_views))).T # transpose to get gene rows
n = np.array(list(map(lambda x: x.shape[0], array_views)))
s = np.vstack(list(map(fvar, array_views))).T
s_norm = s / n # transpose to get gene rows
# calculate T
numerator = mu[:, np.newaxis, :] - mu[:, :, np.newaxis]
denominator = np.sqrt(s_norm[:, np.newaxis, :] + s_norm[:, :, np.newaxis])
statistic = numerator / denominator
# calculate df
s_norm2 = s**2 / (n**2 * n-1)
numerator = (s_norm[:, np.newaxis, :] + s_norm[:, :, np.newaxis]) ** 2
denominator = (s_norm2[:, np.newaxis, :] + s_norm2[:, :, np.newaxis])
df = np.floor(numerator / denominator)
# get significance
p = t.cdf(np.abs(statistic), df) # note, two tailed test
# calculate fdr correction; because above uses 2-tails, alpha here is halved
# because each test is evaluated twice due to the symmetry of vectorization.
p_adj = multipletests(np.ravel(p), alpha=self.alpha, method='fdr_tsbh')[1]
p_adj = p_adj.reshape(*p.shape)
phr = namedtuple('PostHocResults', ['p_adj', 'statistic', 'mu'])
self.post_hoc = phr(p_adj, statistic, mu)
if self.index is not None:
p_adj = pd.Panel(
p_adj, items=self.index[anova_significant], major_axis=self.groups,
minor_axis=self.groups)
statistic = pd.Panel(
statistic, items=self.index[anova_significant], major_axis=self.groups,
minor_axis=self.groups)
mu = pd.DataFrame(mu, self.index[anova_significant], columns=self.groups)
return p_adj, statistic, mu
示例10: bootstrap_t
# 需要导入模块: from statsmodels.sandbox.stats import multicomp [as 别名]
# 或者: from statsmodels.sandbox.stats.multicomp import multipletests [as 别名]
def bootstrap_t(a, b, n_samples=100, n_cells=None, alpha=0.05,
downsample_value_function=np.median, labels=None):
"""
:param np.ndarray a:
:param np.ndarray b:
:param int n_samples:
:param int n_cells:
:param float alpha: acceptable type-I error (default = 0.05)
:param Callable downsample_value_function: function that identifies the number of
molecules n to sample from a and b. the sampling number will be the minimum of the
result across a and b. default = np.median. Other values include np.mean and np.max.
:param labels: feature labels for columns of a & b
:return (int, int) statistic, q_val:
"""
assert_input_non_negative(a, b)
mult_a = estimate_multinomial(a)
mult_b = estimate_multinomial(b)
# get number of molecules to sample
a_sizes = a.sum(axis=1)
b_sizes = b.sum(axis=1)
n_molecules = min(
map(lambda x: downsample_value_function(x).astype(int), [a_sizes, b_sizes]))
# set n_cells to the smaller of the two passed samples (e.g. if comparing two sets,
# one with 130 cells, and one with 1902 cells, n_cells = 130).
if n_cells is None:
n_cells = min(a.shape[0], b.shape[0])
a_mu, a_var = sample_moments(mult_a, n_samples, n_cells, n_molecules)
b_mu, b_var = sample_moments(mult_b, n_samples, n_cells, n_molecules)
statistic, p, ci_95 = whelchs_t(a_mu, a_var, b_mu, b_var, a.shape[0], b.shape[0])
q = multipletests(p, alpha=alpha, method='fdr_tsbh')[1]
results = pd.DataFrame(
data=np.vstack([statistic, ci_95.T, p, q]).T,
index=labels,
columns=['t', 't_ci95_low', 't_ci95_high', 'p', 'q'])
return results
示例11: parse_hwe
# 需要导入模块: from statsmodels.sandbox.stats import multicomp [as 别名]
# 或者: from statsmodels.sandbox.stats.multicomp import multipletests [as 别名]
def parse_hwe(f, alpha, vcf_file):
"""
Parses a hardy-weinberg output file, corrects p-values according to a FDR
and generates several plots to visualize the hwe results
"""
vcf_outfile = vcf_file.split(".")[0] + "_filtered.vcf"
snp_pos = []
pvals = []
het_deficit = []
het_excess = []
with open(f) as fh:
#Skip header
next(fh)
for line in fh:
fields = line.strip().split()
snp_pos.append((fields[0], fields[1]))
pvals.append(float(fields[5]))
het_deficit.append(float(fields[6]))
het_excess.append(float(fields[7]))
fdr_bool_list, fdr_pvalue_list, alpha_S, alpha_B = \
multi_correction.multipletests(pvals, alpha=float(alpha),
method="fdr_bh")
snp_pvals = OrderedDict()
for pos, pval in zip(snp_pos, fdr_pvalue_list):
snp_pvals["-".join(pos)] = pval
with open(vcf_file) as vcf_fh, open(vcf_outfile, "w") as ofh:
for line in vcf_file:
if line.startswith("#"):
ofh.write(line)
elif line.strip() != "":
fields = line.split()
# Check pval for locus
pos = "-".join(fields[0], fields[1])
if snp_pvals[pos] <= 0.05:
ofh.write(line)