本文整理汇总了Python中CGAT.Stats类的典型用法代码示例。如果您正苦于以下问题:Python Stats类的具体用法?Python Stats怎么用?Python Stats使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Stats类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testLRT
def testLRT(self):
"""test that the false positive rate is in the same order as mSignificance.
Sample from a normal distribution and compare two models:
1. mean estimated = complex model (1 df)
2. mean given = simple model (0 df)
Likelihood = P(model | data)
"""
simple_np = 0
complex_np = 1
npassed = 0
for replicate in range(0, self.mNumReplicates):
sample = scipy.stats.norm.rvs(
size=self.mNumSamples, loc=0.0, scale=1.0)
mean = scipy.mean(sample)
complex_ll = numpy.sum(
numpy.log(scipy.stats.norm.pdf(sample, loc=mean, scale=1.0)))
simple_ll = numpy.sum(
numpy.log(scipy.stats.norm.pdf(sample, loc=0.0, scale=1.0)))
a = Stats.doLogLikelihoodTest(complex_ll, complex_np,
simple_ll, simple_np,
significance_threshold=self.mSignificance)
if a.mPassed:
npassed += 1
r = float(npassed) / self.mNumReplicates
self.assertAlmostEqual(self.mSignificance, r, places=self.nplaces)
示例2: checkFDR
def checkFDR(self, pi0_method):
result = Stats.doFDR(
self.mPvalues, fdr_level=0.05, pi0_method=pi0_method)
R("""require ('qvalue')""")
qvalues = R.qvalue(ro.FloatVector(self.mPvalues),
fdr_level=0.05,
pi0_method=pi0_method)
assert qvalues.names[1] == "pi0"
assert qvalues.names[2] == "qvalues"
assert qvalues.names[5] == "significant"
assert qvalues.names[6] == "lambda"
r_qvalues = qvalues[2]
r_pi0 = qvalues[1][0]
self.assertEqual(len(result.mQValues), len(qvalues[2]))
self.assertEqual(len(result.mLambda), len(qvalues[6]))
self.assertEqual(result.mPi0, r_pi0)
for a, b in zip(result.mQValues, r_qvalues):
self.assertAlmostEqual(a, b, 2, "unequal: %f != %f" % (a, b))
for a, b in zip(result.mPassed, qvalues[5]):
self.assertEqual(
a, b, "threshold-passed flag not equal: %s != %s" % (a, b))
示例3: doOldFDR
def doOldFDR(options, args):
"""apply fdr to output of annotator."""
# read input
annotators = []
for filename in args:
infile = open(filename, "r")
annotators.append(readAnnotator(infile))
infile.close()
# apply filters and create diagnostic plots
for filename, data in zip(args, annotators):
ninput = len(data)
pvalues = [x.mPValue for x in data]
vlambda = numpy.arange(0, max(pvalues), 0.05)
try:
qvalues = Stats.doFDR(
pvalues, vlambda=vlambda, fdr_level=options.fdr)
except ValueError, msg:
E.warn("%s: fdr could not be computed - no filtering: %s" %
(filename, msg))
continue
qvalues.plot(filename + "_diagnostics.png")
data = [x[0] for x in zip(data, qvalues.mPassed) if x[1]]
示例4: __call__
def __call__(self, track, slice=None):
result = odict()
merged = None
rocs = []
for field in self.mFields:
data = []
for replicate in EXPERIMENTS.getTracks(track):
statement = "SELECT contig, start, end,%(field)s FROM %(replicate)s_intervals" % locals()
data.append(self.get(statement))
idx = []
for x in range(len(data)):
i = IndexedGenome.IndexedGenome()
for contig, start, end, peakval in data[x]:
i.add(contig, start, end, peakval)
idx.append(i)
def _iter(all):
all.sort()
last_contig, first_start, last_end, last_value = all[0]
for contig, start, end, value in all[1:]:
if contig != last_contig or last_end < start:
yield (last_contig, first_start, last_end)
last_contig, first_start, last_end = contig, start, end
else:
last_end = max(last_end, end)
yield (last_contig, first_start, last_end)
if not merged:
all = [x for x in itertools.chain(*data)]
merged = list(_iter(all))
roc_data = []
for contig, start, end in merged:
intervals = []
for i in idx:
try:
intervals.append(list(i.get(contig, start, end)))
except KeyError:
continue
if len(intervals) == 0:
continue
is_repro = len([x for x in intervals if x != []]) == len(data)
value = max([x[2] for x in itertools.chain(*intervals)])
# fpr, tpr
roc_data.append((value, is_repro))
roc_data.sort()
roc_data.reverse()
roc = zip(*Stats.computeROC(roc_data))
result[field] = odict((("FPR", roc[0]), (field, roc[1])))
return result
示例5: computeFDR
def computeFDR(all_results,
qvalue_method="storey"):
'''compute FDR.
update GOResult structure with field .fdr
'''
# flatten all_results
results = []
for key, data in all_results.iteritems():
results.extend(data.mResults.values())
observed_min_pvalues = [min(
x.mProbabilityOverRepresentation,
x.mProbabilityUnderRepresentation) for x in results]
if qvalue_method == "storey":
# compute fdr via Storey's method
fdr_data = Stats.doFDR(observed_min_pvalues, vlambda=0.1)
E.info("estimated proportion of true null hypotheses = %6.4f" %
fdr_data.mPi0)
if fdr_data.mPi0 < 0.1:
E.warn(
"estimated proportion of true null hypotheses is "
"less than 10%% (%6.4f)" % fdr_data.mPi0)
for result, qvalue in zip(results, fdr_data.mQValues):
result.fdr = qvalue
elif options.qvalue_method == "empirical":
assert options.sample > 0, "requiring a sample size of > 0"
raise NotImplementedError("empirical needs some work")
示例6: check
def check(self, method):
'''check for length equality and elementwise equality.'''
a = R['p.adjust'](self.pvalues, method=method)
b = Stats.adjustPValues(self.pvalues, method=method)
self.assertEqual(len(a), len(b))
for x, y in zip(a, b):
self.assertAlmostEqual(x, y)
示例7: testAgainstQValue
def testAgainstQValue(self):
R.assign("pvalues", self.pvalues)
qvalue = R('''qvalue( pvalues )''')
r_qvalues = qvalue[2]
r_pi0 = qvalue[1][0]
new = Stats.doFDRPython(self.pvalues)
self.assertTrue(getRelativeError(r_pi0, new.mPi0) < self.max_error)
for a, b in zip(r_qvalues, new.mQValues):
self.assertAlmostEqual(a, b, places=self.nplaces)
示例8: main
#.........这里部分代码省略.........
h.append("df%s" % (model))
h.append("chi%s" % (model))
h.append("lrt%s" % (model))
options.stdout.write("\t".join(h))
options.stdout.write("\tcons\tpassed\tfilename\n")
nmethod = 0
consistent_cols = [None for x in range(len(options.analysis))]
passed_tests = {}
for m in options.models:
passed_tests[m] = 0
for result in results:
row_consistent = None
if options.prefix:
options.stdout.write("%s" % (options.prefix))
options.stdout.write("%i" % nmethod)
options.stdout.write("\t%i" % (result.mNumSequences))
npassed = 0
for model in options.models:
sites = result.mSites[model]
# do significance test
full_model, null_model = model, map_nested_models[model]
lrt = Stats.doLogLikelihoodTest(
result.mSites[full_model].mLogLikelihood,
result.mSites[full_model].mNumParameters,
result.mSites[null_model].mLogLikelihood,
result.mSites[null_model].mNumParameters,
options.significance_threshold)
x = 0
for analysis in options.analysis:
if analysis == "neb":
s = set(
map(extract_f, filter(filter_f, sites.mNEB.mPositiveSites)))
elif analysis == "beb":
s = set(
map(extract_f, filter(filter_f, sites.mBEB.mPositiveSites)))
options.stdout.write("\t%i" % (len(s)))
if not lrt.mPassed:
s = set()
if row_consistent is None:
row_consistent = s
else:
row_consistent = row_consistent.intersection(s)
if consistent_cols[x] is None:
consistent_cols[x] = s
else:
consistent_cols[x] = consistent_cols[
x].intersection(s)
示例9: doFDR
def doFDR(options, args):
# read input
annotators = []
for filename in args:
infile = open(filename, "r")
annotators.append(readAnnotator(infile))
infile.close()
do_filter = options.fdr_qvalue is not None
extra_headers = set()
for data, fdr, synonyms, input_files in annotators:
for key, value in input_files.iteritems():
extra_headers.add(key)
extra_headers = sorted(list(extra_headers))
# note: id used to be file
options.stdout.write("id\tover\tcategory\tpvalue\tfold\tobserved\texpected\tci95low\tci95high\tstddev\tfdr\tqvalue\t%s\n" %
"\t".join(extra_headers))
# apply filters and create diagnostic plots
for filename, vv in zip(args, annotators):
data, fdr, synonyms, input_files = vv
ninput = len(data)
E.info("processing %s with %i data points" % (filename, ninput))
no_fdr = False
if options.fdr_method in ("annotator", "annotator-estimate"):
pvalues = fdr.keys()
pvalues.sort()
pvalues.reverse()
for pvalue in pvalues:
try:
d = fdr[pvalue]["Significant"]
except KeyError:
continue
if d.mObserved == 0:
E.info("no data after fdr")
break
elif d.mAverage / d.mObserved < options.fdr_qvalue:
E.info("filtering with P-value of %f" % pvalue)
if do_filter:
data = [x for x in data if x.mPValue < pvalue]
for d in data:
if d.mPValue < pvalue:
d.mFDR = 1
d.mQValue = options.fdr_qvalue
break
else:
E.warn("fdr could not be computed - compute more samples (at P = %f, actual fdr=%f)" %
(pvalue, d.mAverage / d.mObserved))
no_fdr = True
if options.fdr_method == "estimate" or (options.fdr_method == "annotator-estimate" and no_fdr):
E.info("estimating FDR from observed P-Values")
pvalues = [x.mPValue for x in data]
vlambda = numpy.arange(0, max(pvalues), 0.05)
try:
qvalues = Stats.doFDR(
pvalues, vlambda=vlambda, fdr_level=options.fdr_qvalue)
except ValueError, msg:
E.warn("fdr could not be computed - no output: %s" % msg)
no_fdr = True
else:
for d, p, q in zip(data, qvalues.mPassed, qvalues.mQValues):
if p:
d.mFDR = 1
d.mQValue = q
if do_filter:
data = [x[0] for x in zip(data, qvalues.mPassed) if x[1]]
if do_filter and no_fdr:
data = []
nremoved = ninput - len(data)
E.info("%s: %i data points left, %i removed" %
(filename, len(data), nremoved))
extra_values = []
for key in extra_headers:
if key in input_files:
extra_values.append(input_files[key])
else:
extra_values.append("")
extra_values = "\t".join(map(str, extra_values))
for d in data:
if d.mFoldChange < 1:
code = "-"
else:
code = "+"
#.........这里部分代码省略.........
示例10: Collect
#.........这里部分代码省略.........
annotator_fdr[annotator_level][dd[0]] = d
continue
else:
if line[0] == "Z":
continue # skip header
if len(line[:-1].split('\t')) != 9:
continue # HACK: accounts for a bug in Annotator output
try:
(z, percentchange, pvalue, observed, expected, low95,
up95, stddev, description) = line[:-1].split('\t')[:9]
except ValueError:
raise ValueError("# parsing error in line: %s" % line[:-1])
d = DataPoint()
d.mAnnotation = description
d.mPValue = float(pvalue)
d.mFoldChange = 1.0 + float(percentchange) / 100.0
data.append(d)
else:
for line in lines:
try:
(code, goid, scount, stotal, spercent, bcount, btotal, bpercent, ratio,
pover, punder, goid, category, description) = line[:-1].split("\t")[:14]
except ValueError:
raise ValueError("# parsing error in line: %s" % line[:-1])
if code == "+":
p = pover
else:
p = punder
d = DataPoint()
d.mAnnotation = description
d.mPValue = float(p)
d.mFoldChange = float(spercent) / float(bpercent)
data.append(d)
# apply filters
for c in delims:
for d in data:
d.mAnnotation = d.mAnnotation.split(c)[0]
for c in ignore:
for d in data:
d.mAnnotation = d.mAnnotation.replace(c, '')
ninput = len(data)
no_fdr = False
# apply filters
if ninput > 0:
if max_qvalue is not None:
if use_annotator_fdr:
pvalues = list(annotator_fdr.keys())
pvalues.sort()
pvalues.reverse()
for pvalue in pvalues:
try:
d = annotator_fdr[pvalue]["Significant"]
except KeyError:
continue
if d.mObserved == 0:
E.info("no data remaining after fdr filtering")
data = []
break
elif d.mAverage / d.mObserved < max_qvalue:
E.info("filtering with P-value of %f" % pvalue)
data = [x for x in data if x.mPValue < pvalue]
break
else:
E.warn("fdr could not be computed - compute more "
"samples (at P = %f, actual fdr=%f)" %
(pvalue, d.mAverage / d.mObserved))
no_fdr = True
if no_fdr:
if use_annotator_fdr:
E.info("estimating FDR from observed P-Values")
pvalues = [x.mPValue for x in data]
vlambda = numpy.arange(0, max(pvalues), 0.05)
try:
qvalues = Stats.doFDR(
pvalues, vlambda=vlambda, fdr_level=max_qvalue)
except ValueError as msg:
E.warn(
"fdr could not be computed - no filtering: %s" % msg)
no_fdr = True
else:
data = [x[0] for x in zip(data, qvalues.mPassed) if x[1]]
elif max_pvalue is not None:
data = [x for x in data if x.mPValue < max_pvalue]
if no_fdr:
data = []
nremoved = ninput - len(data)
return data, nremoved, no_fdr
示例11: set
options.stdout.write("%i" % nmethod)
options.stdout.write("\t%i" % (result.mNumSequences ))
npassed = 0
for model in options.models:
sites = result.mSites[model]
## do significance test
full_model, null_model = model, map_nested_models[model]
lrt = Stats.doLogLikelihoodTest(
result.mSites[full_model].mLogLikelihood,
result.mSites[full_model].mNumParameters,
result.mSites[null_model].mLogLikelihood,
result.mSites[null_model].mNumParameters,
options.significance_threshold )
x = 0
for analysis in options.analysis:
if analysis == "neb":
s = set(map( extract_f, filter( filter_f, sites.mNEB.mPositiveSites)))
elif analysis == "beb":
s = set(map( extract_f, filter( filter_f, sites.mBEB.mPositiveSites)))
options.stdout.write("\t%i" % ( len(s) ) )
if not lrt.mPassed:
示例12: main
#.........这里部分代码省略.........
if len(options.parameters) == 0:
raise "out of parameters - please supply probability or filename with probabilities."
param = options.parameters[0]
del options.parameters[0]
if options.write_separators:
probabilities = IOTools.ReadMap(
IOTools.openFile(param, "r"), map_functions=(str, float))
else:
probability = float(param)
for x in range(len(chunks) - 1):
ninput += 1
matrix, row_headers, col_headers = MatlabTools.readMatrix(
StringIO("".join(lines[chunks[x] + 1:chunks[x + 1]])),
format=options.input_format,
headers=options.headers)
nrows, ncols = matrix.shape
if options.loglevel >= 2:
options.stdlog.write("# read matrix: %i x %i, %i row titles, %i colum titles.\n" %
(nrows, ncols, len(row_headers), len(col_headers)))
if options.write_separators:
options.stdout.write(lines[chunks[x]][1:-1] + "\t")
pairs = []
if options.iteration == "pairwise":
pairs = []
for row1 in range(0, len(row_headers)):
for row2 in range(row1 + 1, len(row_headers)):
pairs.append((row1, row2))
elif options.iteration == "all-vs-all":
pairs = []
for row1 in range(0, len(row_headers)):
for row2 in range(0, len(row_headers)):
if row1 == row2:
continue
pairs.append((row1, row2))
if options.method == "chi-squared":
for row1, row2 in pairs:
row_header1 = row_headers[row1]
row_header2 = row_headers[row2]
try:
result = Stats.doChiSquaredTest(
numpy.vstack((matrix[row1], matrix[row2])))
except ValueError:
nskipped += 1
continue
noutput += 1
options.stdout.write("\t".join((
"%s" % row_header1,
"%s" % row_header2,
"%i" % result.mSampleSize,
"%i" % min(matrix.flat),
"%i" % max(matrix.flat),
options.value_format % result.mChiSquaredValue,
"%i" % result.mDegreesFreedom,
options.pvalue_format % result.mProbability,
"%s" % result.mSignificance,
options.value_format % result.mPhi)) + "\n")
elif options.method == "pearson-chi-squared":
if nrows != 2:
raise ValueError("only implemented for 2xn table")
if options.write_separators:
id = re.match("(\S+)", lines[chunks[x]][1:-1]).groups()[0]
probability = probabilities[id]
for col in range(ncols):
options.stdout.write("%s\t" % col_headers[col])
result = Stats.doPearsonChiSquaredTest(
probability, sum(matrix[:, col]), matrix[0, col])
options.stdout.write("\t".join((
"%i" % result.mSampleSize,
"%f" % probability,
"%i" % result.mObserved,
"%f" % result.mExpected,
options.value_format % result.mChiSquaredValue,
"%i" % result.mDegreesFreedom,
options.pvalue_format % result.mProbability,
"%s" % result.mSignificance,
options.value_format % result.mPhi)))
if col < ncols - 1:
options.stdout.write("\n")
if options.write_separators:
options.stdout.write(lines[chunks[x]][1:-1] + "\t")
options.stdout.write("\n")
E.info("# ninput=%i, noutput=%i, nskipped=%i\n" %
(ninput, noutput, nskipped))
E.Stop()
示例13: range
else:
for c in options.columns:
for r in range(nrows):
if type(table[c][r]) == types.FloatType and \
table[c][r] < boundary:
table[c][r] = new_value
elif method == "fdr":
pvalues = []
for c in options.columns: pvalues.extend( table[c] )
assert max(pvalues) <= 1.0, "pvalues > 1 in table"
assert min(pvalues) >= 0, "pvalue < 0 in table"
# convert to str to avoid test for float downstream
qvalues = map(str, Stats.adjustPValues( pvalues, method = options.fdr_method ))
x = 0
for c in options.columns:
table[c] = qvalues[x:x+nrows]
x += nrows
elif method == "normalize-by-table":
other_table_name = options.parameters[0]
del options.parameters[0]
other_fields, other_table = CSV.ReadTable( open(other_table_name, "r"), with_header = options.has_headers, as_rows = False )
# convert all values to float
for c in options.columns:
for r in range(nrows):
示例14: pairwiseGOEnrichment
#.........这里部分代码省略.........
shared = x_go_categories.intersection(y_go_categories)
c = E.Counter()
for category in shared:
c.shared += 1
xx = genelist1[category]
yy = genelist2[category]
# discard all tests with few observations in the observed
# counts
if xx.mSampleCountsCategory < min_observed_counts and yy.mSampleCountsCategory < min_observed_counts:
c.skipped += 1
continue
observed = (xx.mSampleCountsCategory, yy.mSampleCountsCategory)
aa, bb, cc, dd = \
(xx.mSampleCountsCategory,
yy.mSampleCountsCategory,
xx.mSampleCountsTotal - xx.mSampleCountsCategory,
yy.mSampleCountsTotal - yy.mSampleCountsCategory)
if cc == dd == 0:
c.skipped += 1
continue
c.tested += 1
fisher, pvalue = scipy.stats.fisher_exact(numpy.array(
((aa, bb),
(cc, dd))))
if pvalue < 0.05:
c.significant_pvalue += 1
else:
c.insignificant_pvalue += 1
results.append(PairResult._make((category,
labels[x],
labels[y],
xx.mSampleCountsCategory,
xx.mSampleCountsTotal,
xx.mPValue,
xx.mQValue,
yy.mSampleCountsCategory,
yy.mSampleCountsTotal,
yy.mPValue,
yy.mQValue,
pvalue,
1.0,
go2info[category].mDescription)))
outfile.write("\t".join(map(str,
(labels[x], labels[y],
len(x_go_categories),
len(y_go_categories),
c.shared,
c.skipped,
c.tested,
c.significant_pvalue,
c.insignicant_pvalue))) + "\n")
if options.output_filename_pattern:
outfile.close()
if options.fdr:
pvalues = [x.pvalue for x in results]
if options.qvalue_method == "storey":
# compute fdr via Storey's method
try:
fdr_data = Stats.doFDR(pvalues)
except ValueError as msg:
E.warn("failure in q-value computation: %s" % msg)
E.warn("reverting to Bonferroni correction")
method = "bonf"
fdr_data = Stats.FDRResult()
l = float(len(pvalues))
fdr_data.mQValues = [min(1.0, x * l) for x in pvalues]
qvalues = fdr_data.mQValues
else:
qvalues = R['p.adjust'](pvalues, method=options.qvalue_method)
# update qvalues
results = [x._replace(qvalue=y) for x, y in zip(results, qvalues)]
outfile = getFileName(options,
go=test_ontology,
section='pairs',
set="pairs")
outfile.write("\t".join(PairResult._fields) + "\n")
for result in results:
outfile.write("\t".join(map(str, result)) + "\n")
if options.output_filename_pattern:
outfile.close()
示例15: computeFDRs
def computeFDRs(go_results,
foreground,
background,
options,
test_ontology,
gene2go,
go2info):
pairs = sorted(go_results.mResults.items())
E.info("calculating the FDRs using method `%s`" % options.qvalue_method)
samples = None
observed_min_pvalues = [min(x[1].mProbabilityOverRepresentation,
x[1].mProbabilityUnderRepresentation) for x in pairs]
fdrs = {}
method = options.qvalue_method
if options.qvalue_method == "storey":
# compute fdr via Storey's method
try:
fdr_data = Stats.doFDR(observed_min_pvalues)
except ValueError as msg:
E.warn("failure in q-value computation: %s" % msg)
E.warn("reverting to Bonferroni correction")
method = "bonf"
fdr_data = Stats.FDRResult()
l = float(len(observed_min_pvalues))
fdr_data.mQValues = [min(1.0, x * l) for x in observed_min_pvalues]
for pair, qvalue in zip(pairs, fdr_data.mQValues):
fdrs[pair[0]] = (qvalue, 1.0, 1.0)
elif options.qvalue_method == "empirical":
assert options.sample > 0, "requiring a sample size of > 0"
#######################################################################
# sampling
# for each GO-category:
# get maximum and minimum counts in x samples -> calculate minimum/maximum significance
# get average and stdev counts in x samples -> calculate z-scores for
# test set
samples, simulation_min_pvalues = getSamples(gene2go,
foreground,
background,
options,
test_ontology,
go2info)
# compute P-values from sampling
observed_min_pvalues.sort()
observed_min_pvalues = numpy.array(observed_min_pvalues)
sample_size = options.sample
for k, v in pairs:
if k in samples:
s = samples[k]
else:
raise KeyError("category %s not in samples" % k)
# calculate values for z-score
if s.mStddev > 0:
zscore = abs(
float(v.mSampleCountsCategory) - s.mMean) / s.mStddev
else:
zscore = 0.0
#############################################################
# FDR:
# For each p-Value p at node n:
# a = average number of nodes in each simulation run with P-Value < p
# this can be obtained from the array of all p-values and all nodes
# simply divided by the number of samples.
# aka: expfpos=experimental false positive rate
# b = number of nodes in observed data, that have a P-Value of less than p.
# aka: pos=positives in observed data
# fdr = a/b
pvalue = v.mPValue
# calculate values for FDR:
# nfdr = number of entries with P-Value better than node.
a = 0
while a < len(simulation_min_pvalues) and \
simulation_min_pvalues[a] < pvalue:
a += 1
a = float(a) / float(sample_size)
b = 0
while b < len(observed_min_pvalues) and \
observed_min_pvalues[b] < pvalue:
b += 1
if b > 0:
fdr = min(1.0, float(a) / float(b))
#.........这里部分代码省略.........