本文整理汇总了Python中statsmodels.stats.multitest.multipletests函数的典型用法代码示例。如果您正苦于以下问题:Python multipletests函数的具体用法?Python multipletests怎么用?Python multipletests使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了multipletests函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: significance_assessment
def significance_assessment(self, cscPairA, cscPairD, leftregion, rightregion, meta_chrome, arm, AmpPat, DelPat, chrm_genebkt):
if len(cscPairA.keys()) != 0 or len(cscPairD.keys()) != 0:
scorelistA, scorelistD = [], []
for i in range(0, self.num_permutation):
permute_regionA, permute_regionD = cna_utils.cycle_shift_permutation(self.dlcall.regionA[meta_chrome][arm], self.dlcall.regionD[meta_chrome][arm], leftregion, rightregion)
pedgesetA, pedgesetD, pedgetoPatient, pedgewA, pedgewD, pposA, pposD = cna_utils.formatEdgeId(AmpPat.union(DelPat), permute_regionA, permute_regionD)#, abbA, abbD)
pcscPairA, pcscPairD = self.RAIG_algo(pedgesetA, pedgesetD, pedgetoPatient, pedgewA, pedgewD, pposA, pposD, chrm_genebkt, len(AmpPat), len(DelPat))
if len(pcscPairA.keys()) != 0:
scorelistA.append(max([2*min(pcscPairA[cid]['lcount'],pcscPairA[cid]['rcount']) for cid in pcscPairA.keys()]))
else:
scorelistA.append(0)
if len(pcscPairD.keys()) != 0:
scorelistD.append(max([2*min(pcscPairD[cid]['lcount'],pcscPairD[cid]['rcount']) for cid in pcscPairD.keys()]))
else:
scorelistD.append(0)
if len(cscPairA.keys()) != 0:
pvals = list()
cidlist = list()
for cid in cscPairA.keys():
csc_score = 2*min(cscPairA[cid]['lcount'],cscPairA[cid]['rcount'])
count = 0
for s in scorelistA:
if s > csc_score:
count += 1
cscPairA[cid]['p-val'] = float(count)/self.num_permutation
pvals.append(float(count)/self.num_permutation)
cidlist.append(cid)
corrected_pval = smm.multipletests(pvals, alpha=0.05, method='fdr_bh')[1]
for i in range(len(cidlist)):
cscPairA[cidlist[i]]['corrected-p-val'] = corrected_pval[i]
if len(cscPairD.keys()) != 0:
pvals = list()
cidlist = list()
for cid in cscPairD.keys():
csc_score = 2*min(cscPairD[cid]['lcount'],cscPairD[cid]['rcount'])
count = 0
for s in scorelistD:
if s > csc_score:
count +=1
cscPairD[cid]['p-val'] = float(count)/self.num_permutation
pvals.append(float(count)/self.num_permutation)
cidlist.append(cid)
corrected_pval = smm.multipletests(pvals, alpha=0.05, method='fdr_bh')[1]
for i in range(len(cidlist)):
cscPairD[cidlist[i]]['corrected-p-val'] = corrected_pval[i]
示例2: test_issorted
def test_issorted(method):
# test that is_sorted keyword works correctly
# the fdrcorrection functions are tested indirectly
# data generated as random numbers np.random.beta(0.2, 0.5, size=10)
pvals = np.array([31, 9958111, 7430818, 8653643, 9892855, 876, 2651691,
145836, 9931, 6174747]) * 1e-7
sortind = np.argsort(pvals)
sortrevind = sortind.argsort()
pvals_sorted = pvals[sortind]
res1 = multipletests(pvals, method=method, is_sorted=False)
res2 = multipletests(pvals_sorted, method=method, is_sorted=True)
assert_equal(res2[0][sortrevind], res1[0])
assert_allclose(res2[0][sortrevind], res1[0], rtol=1e-10)
示例3: multi_correct
def multi_correct(data, meth='fdr_bh'):
"""
Run fdr correction on nodes of interest contained in an array of p values.
Parameters:
-----------
data : numpy array
nnodes x nnodes array containing p values of correlation between each node
noi_idx : numpy
indices (applicable to both row and column) of nodes of interest. This
reduces the number of nodes corrected for
meth : str
Method of correction. Options are:
`bonferroni` : one-step correction
`sidak` : on-step correction
`holm-sidak` :
`holm` :
`simes-hochberg` :
`hommel` :
`fdr_bh` : Benjamini/Hochberg (default)
`fdr_by` : Benjamini/Yekutieli
Returns:
----------
fdr_corrected : numpy array
array containing p values corrected with fdr
"""
rej, corrp, alpha_sidak, alpha_bonnf = smm.multipletests(data,
alpha=0.05,
method=meth)
return corrp
示例4: get_p_values
def get_p_values(dat):
#%%
feat_x = dat[dat['region']=='Before']
feat_y = dat[dat['region']=='After']
p_values = []
for feat in feat_avg_names:
x = feat_x[feat]
x = x.dropna()
y = feat_y[feat].dropna()
if x.size > 0 and y.size > 0:
_, p = ttest_ind(x, y)
else:
p = np.nan
p_values.append((feat, p))
feats, p_val = zip(*p_values)
p_values = pd.Series(p_val, index=feats).dropna()
p_values = p_values.sort_values(ascending=True)
if p_values.size > 0:
reject, pvals_corrected, alphacSidak, alphacBonf = \
smm.multipletests(p_values.values, method = 'fdr_tsbky')
pvals_corrected = pd.Series(pvals_corrected, index=p_values.index)
else:
pvals_corrected = pd.Series()
#%%
return p_values, pvals_corrected
示例5: test_hommel
def test_hommel():
#tested agains R stats p_adjust(pval0, method='hommel')
pval0 = np.array(
[ 0.00116, 0.00924, 0.01075, 0.01437, 0.01784, 0.01918,
0.02751, 0.02871, 0.03054, 0.03246, 0.04259, 0.06879,
0.0691 , 0.08081, 0.08593, 0.08993, 0.09386, 0.09412,
0.09718, 0.09758, 0.09781, 0.09788, 0.13282, 0.20191,
0.21757, 0.24031, 0.26061, 0.26762, 0.29474, 0.32901,
0.41386, 0.51479, 0.52461, 0.53389, 0.56276, 0.62967,
0.72178, 0.73403, 0.87182, 0.95384])
result_ho = np.array(
[ 0.0464 , 0.25872 , 0.29025 ,
0.3495714285714286, 0.41032 , 0.44114 ,
0.57771 , 0.60291 , 0.618954 ,
0.6492 , 0.7402725000000001, 0.86749 ,
0.86749 , 0.8889100000000001, 0.8971477777777778,
0.8993 , 0.9175374999999999, 0.9175374999999999,
0.9175374999999999, 0.9175374999999999, 0.9175374999999999,
0.9175374999999999, 0.95384 , 0.9538400000000001,
0.9538400000000001, 0.9538400000000001, 0.9538400000000001,
0.9538400000000001, 0.9538400000000001, 0.9538400000000001,
0.9538400000000001, 0.9538400000000001, 0.9538400000000001,
0.9538400000000001, 0.9538400000000001, 0.9538400000000001,
0.9538400000000001, 0.9538400000000001, 0.9538400000000001,
0.9538400000000001])
rej, pvalscorr, _, _ = multipletests(pval0, alpha=0.1, method='ho')
assert_almost_equal(pvalscorr, result_ho, 15)
assert_equal(rej, result_ho < 0.1) #booleans
示例6: DEGI
def DEGI(gctfile,clsfile,number):
#open and save input files
with open(gctfile) as gct:
gct=numpy.genfromtxt(gct,dtype=None,delimiter="\t",missing_values="NA",invalid_raise=False,skip_header=2)
gct_exp=gct[1:,2:].astype(float) #matrix of expression values
gct_genes=gct[1:,1] #list of gene names
with open(clsfile) as label:
label=label.read().splitlines()
label=label[2].split() #list of class labels
#initialize empty list for p-values
pvals=[]
#first, caluclate difference in means with original labels
for i in range(0,len(gct_genes)):
class0=[]
class1=[]
for j in range(0,len(label)):
if label[j]=="0":
class0.append(gct_exp[i,j])
if label[j]=="1":
class1.append(gct_exp[i,j])
mean0=sum(class0)/len(class0)
mean1=sum(class1)/len(class1)
null_diff=abs(mean0-mean1)
#then, calculate difference in means with permutated labels
#p-value is determined by the proportion of permutated differences that are less than the original difference
greater=0.
for k in range(0,number):
label_shuffle=numpy.random.permutation(label)
class0_shuffle=[]
class1_shuffle=[]
for j in range(0,len(label_shuffle)):
if label_shuffle[j]=="0":
class0_shuffle.append(gct_exp[i,j])
if label_shuffle[j]=="1":
class1_shuffle.append(gct_exp[i,j])
mean0_shuffle=sum(class0_shuffle)/len(class0_shuffle)
mean1_shuffle=sum(class1_shuffle)/len(class1_shuffle)
alt_diff=abs(mean0_shuffle-mean1_shuffle)
if null_diff>=alt_diff:
greater+=1.
pvals.append(greater/number)
#correct for multiple hypothesis tests using benjamini-hochberg
bh=smm.multipletests(pvals,alpha=0.05,method='fdr_bh')
bh_sig=bh[0]
bh_pvals=bh[1].astype(str)
sig=0
for i in range(0,len(bh_sig)):
if bh_sig[i]==True:
print gct_genes[i]+" is differentially expressed.\nThe adjusted p-value is "+bh_pvals[i]+"\n"
sig+=1
if sig==0:
print "There are no differentially expressed genes."
示例7: pval_corrected
def pval_corrected(self, method=None):
'''p-values corrected for multiple testing problem
This uses the default p-value correction of the instance stored in
``self.multitest_method`` if method is None.
'''
import statsmodels.stats.multitest as smt
if method is None:
method = self.multitest_method
#TODO: breaks with method=None
return smt.multipletests(self.pvals_raw, method=method)[1]
示例8: test_pvalcorrection_reject
def test_pvalcorrection_reject(alpha, method, ii):
# consistency test for reject boolean and pvalscorr
pval1 = np.hstack((np.linspace(0.0001, 0.0100, ii),
np.linspace(0.05001, 0.11, 10 - ii)))
# using .05001 instead of 0.05 to avoid edge case issue #768
reject, pvalscorr = multipletests(pval1, alpha=alpha,
method=method)[:2]
msg = 'case %s %3.2f rejected:%d\npval_raw=%r\npvalscorr=%r' % (
method, alpha, reject.sum(), pval1, pvalscorr)
assert_equal(reject, pvalscorr <= alpha, err_msg=msg)
示例9: test_multi_pvalcorrection_rmethods
def test_multi_pvalcorrection_rmethods(self, key, val):
# test against R package multtest mt.rawp2adjp
res_multtest = self.res2
pval0 = res_multtest[:, 0]
if val[1] in self.methods:
reject, pvalscorr = multipletests(pval0,
alpha=self.alpha,
method=val[1])[:2]
assert_almost_equal(pvalscorr, res_multtest[:, val[0]], 15)
assert_equal(reject, pvalscorr <= self.alpha)
示例10: get_score_df
def get_score_df(self, correction_method=None):
'''
:param correction_method: str or None, correction method from statsmodels.stats.multitest.multipletests
'fdr_bh' is recommended.
:return: pd.DataFrame
'''
# From https://people.kth.se/~lang/Effect_size.pdf
# Shinichi Nakagawa1 and Innes C. Cuthill. 2007. In Biological Reviews 82.
X = self._get_X().astype(np.float64)
X = X / X.sum(axis=1)
cat_X, ncat_X = self._get_cat_and_ncat(X)
n1, n2 = float(cat_X.shape[1]), float(ncat_X.shape[1])
n = n1 + n2
m1 = cat_X.mean(axis=0).A1
m2 = ncat_X.mean(axis=0).A1
v1 = cat_X.var(axis=0).A1
v2 = ncat_X.var(axis=0).A1
s_pooled = np.sqrt(((n2 - 1) * v2 + (n1 - 1) * v1) / (n - 2.))
cohens_d = (m1 - m2) / s_pooled
cohens_d_se = np.sqrt(((n - 1.) / (n - 3)) * (4. / n) * (1 + np.square(cohens_d)))
cohens_d_z = cohens_d / cohens_d_se
cohens_d_p = norm.sf(cohens_d_z)
hedges_r = cohens_d * (1 - 3. / ((4. * (n - 2)) - 1))
hedges_r_se = np.sqrt(n / (n1 * n2) + np.square(hedges_r) / (n - 2.))
hedges_r_z = hedges_r / hedges_r_se
hedges_r_p = norm.sf(hedges_r_z)
score_df = pd.DataFrame({
'cohens_d': cohens_d,
'cohens_d_se': cohens_d_se,
'cohens_d_z': cohens_d_z,
'cohens_d_p': cohens_d_p,
'hedges_r': hedges_r,
'hedges_r_se': hedges_r_se,
'hedges_r_z': hedges_r_z,
'hedges_r_p': hedges_r_p,
'm1': m1,
'm2': m2,
}, index=self.corpus_.get_terms()).fillna(0)
if correction_method is not None:
from statsmodels.stats.multitest import multipletests
score_df['hedges_r_p_corr'] = 0.5
for method in ['cohens_d', 'hedges_r']:
score_df[method + '_p_corr'] = 0.5
score_df.loc[(score_df['m1'] != 0) | (score_df['m2'] != 0), method + '_p_corr'] = (
multipletests(score_df.loc[(score_df['m1'] != 0) | (score_df['m2'] != 0), method + '_p'],
method=correction_method)[1]
)
return score_df
示例11: is_from_null
def is_from_null(self,alpha,samples,chane_prob):
dims = samples.shape[1]
boots = 10*int(dims/alpha)
pvals = np.zeros(dims)
for dim in range(dims):
U,_ = self.tester.get_statistic_multiple_dim(samples,dim)
p = self.tester.compute_pvalues_for_processes(U,chane_prob,boots)
pvals[dim] = p
print(pvals)
alt_is_true, pvals_corrected,_,_ = multipletests(pvals,alpha,method='holm')
return any(alt_is_true),pvals_corrected
示例12: test_pvalcorrection_reject
def test_pvalcorrection_reject():
# consistency test for reject boolean and pvalscorr
for alpha in [0.01, 0.05, 0.1]:
for method in ['b', 's', 'sh', 'hs', 'h', 'hommel', 'fdr_i', 'fdr_n',
'fdr_tsbky', 'fdr_tsbh', 'fdr_gbs']:
for ii in range(11):
pval1 = np.hstack((np.linspace(0.0001, 0.0100, ii),
np.linspace(0.05001, 0.11, 10 - ii)))
# using .05001 instead of 0.05 to avoid edge case issue #768
reject, pvalscorr = multipletests(pval1, alpha=alpha,
method=method)[:2]
#print 'reject.sum', v[1], reject.sum()
msg = 'case %s %3.2f rejected:%d\npval_raw=%r\npvalscorr=%r' % (
method, alpha, reject.sum(), pval1, pvalscorr)
assert_equal(reject, pvalscorr <= alpha, err_msg=msg)
示例13: correct_enrichment_pvalues
def correct_enrichment_pvalues(enrichments, method, sig_cutoff):
corrected_enrichments = []
for enrichment in enrichments:
pvalues = enrichment.values()
gene_set_names = enrichment.keys()
if method == 'none' or method is None:
corrected_pvalues = pvalues
reject = pvalues > sig_cutoff
else:
reject, corrected_pvalues, _, _ = smm.multipletests(pvalues,
alpha=sig_cutoff,
method=method)
accepted_indices = np.where(reject)[0]
accepted_pvalues = dict([(gene_set_names[i], corrected_pvalues[i])
for i in accepted_indices])
corrected_enrichments.append(accepted_pvalues)
return corrected_enrichments
示例14: __call__
def __call__(self, track):
print "Reading %s" % track
data = pandas.read_csv(self.openFile(track),
header=0,
names=["contig", "start", "p"],
sep="\t")
print "Done"
data["qvalues"] = multipletests(data["p"], method="fdr_bh")[1]
output = dict()
output["Bases"] = data.shape[0]
output["Significant"] = (data["qvalues"] < 0.01).sum()
output["Fraction_Significant"] = \
float(output["Significant"])/output["Bases"]
return output
示例15: test_multi_pvalcorrection
def test_multi_pvalcorrection():
#test against R package multtest mt.rawp2adjp
#because of sort this doesn't check correct sequence - TODO: rewrite DONE
rmethods = {'rawp':(0,'pval'), 'Bonferroni':(1,'b'), 'Holm':(2,'h'),
'Hochberg':(3,'sh'), 'SidakSS':(4,'s'), 'SidakSD':(5,'hs'),
'BH':(6,'fdr_i'), 'BY':(7,'fdr_n')}
for k,v in rmethods.items():
if v[1] in ['b', 's', 'sh', 'hs', 'h', 'fdr_i', 'fdr_n']:
#pvalscorr = np.sort(multipletests(pval0, alpha=0.1, method=v[1])[1])
r_sortindex = [6, 8, 9, 7, 5, 1, 2, 4, 0, 3]
pvalscorr = multipletests(pval0, alpha=0.1, method=v[1])[1][r_sortindex]
assert_almost_equal(pvalscorr, res_multtest[:,v[0]], 15)
pvalscorr = np.sort(fdrcorrection(pval0, method='n')[1])
assert_almost_equal(pvalscorr, res_multtest[:,7], 15)
pvalscorr = np.sort(fdrcorrection(pval0, method='i')[1])
assert_almost_equal(pvalscorr, res_multtest[:,6], 15)