本文整理匯總了Python中CGAT.Stats.doFDR方法的典型用法代碼示例。如果您正苦於以下問題:Python Stats.doFDR方法的具體用法?Python Stats.doFDR怎麽用?Python Stats.doFDR使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類CGAT.Stats
的用法示例。
在下文中一共展示了Stats.doFDR方法的11個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: checkFDR
# 需要導入模塊: from CGAT import Stats [as 別名]
# 或者: from CGAT.Stats import doFDR [as 別名]
def checkFDR(self, pi0_method):
result = Stats.doFDR(
self.mPvalues, fdr_level=0.05, pi0_method=pi0_method)
R("""require ('qvalue')""")
qvalues = R.qvalue(ro.FloatVector(self.mPvalues),
fdr_level=0.05,
pi0_method=pi0_method)
assert qvalues.names[1] == "pi0"
assert qvalues.names[2] == "qvalues"
assert qvalues.names[5] == "significant"
assert qvalues.names[6] == "lambda"
r_qvalues = qvalues[2]
r_pi0 = qvalues[1][0]
self.assertEqual(len(result.mQValues), len(qvalues[2]))
self.assertEqual(len(result.mLambda), len(qvalues[6]))
self.assertEqual(result.mPi0, r_pi0)
for a, b in zip(result.mQValues, r_qvalues):
self.assertAlmostEqual(a, b, 2, "unequal: %f != %f" % (a, b))
for a, b in zip(result.mPassed, qvalues[5]):
self.assertEqual(
a, b, "threshold-passed flag not equal: %s != %s" % (a, b))
示例2: computeFDR
# 需要導入模塊: from CGAT import Stats [as 別名]
# 或者: from CGAT.Stats import doFDR [as 別名]
def computeFDR(all_results,
qvalue_method="storey"):
'''compute FDR.
update GOResult structure with field .fdr
'''
# flatten all_results
results = []
for key, data in all_results.iteritems():
results.extend(data.mResults.values())
observed_min_pvalues = [min(
x.mProbabilityOverRepresentation,
x.mProbabilityUnderRepresentation) for x in results]
if qvalue_method == "storey":
# compute fdr via Storey's method
fdr_data = Stats.doFDR(observed_min_pvalues, vlambda=0.1)
E.info("estimated proportion of true null hypotheses = %6.4f" %
fdr_data.mPi0)
if fdr_data.mPi0 < 0.1:
E.warn(
"estimated proportion of true null hypotheses is "
"less than 10%% (%6.4f)" % fdr_data.mPi0)
for result, qvalue in zip(results, fdr_data.mQValues):
result.fdr = qvalue
elif options.qvalue_method == "empirical":
assert options.sample > 0, "requiring a sample size of > 0"
raise NotImplementedError("empirical needs some work")
示例3: doOldFDR
# 需要導入模塊: from CGAT import Stats [as 別名]
# 或者: from CGAT.Stats import doFDR [as 別名]
def doOldFDR(options, args):
"""apply fdr to output of annotator."""
# read input
annotators = []
for filename in args:
infile = open(filename, "r")
annotators.append(readAnnotator(infile))
infile.close()
# apply filters and create diagnostic plots
for filename, data in zip(args, annotators):
ninput = len(data)
pvalues = [x.mPValue for x in data]
vlambda = numpy.arange(0, max(pvalues), 0.05)
try:
qvalues = Stats.doFDR(
pvalues, vlambda=vlambda, fdr_level=options.fdr)
except ValueError, msg:
E.warn("%s: fdr could not be computed - no filtering: %s" %
(filename, msg))
continue
qvalues.plot(filename + "_diagnostics.png")
data = [x[0] for x in zip(data, qvalues.mPassed) if x[1]]
示例4: analysePolyphen
# 需要導入模塊: from CGAT import Stats [as 別名]
# 或者: from CGAT.Stats import doFDR [as 別名]
def analysePolyphen(infile, outfile):
'''compute enrichment of SNPs within genes
and deleterious SNPs within SNPs within genes.
del: enrichment of deleterious snps within snps per gene
len: enrichment of snps within genes
com: enrichment of deleterious snps within gene
'''
table = P.toTable(infile)
tablename_map = "polyphen_map"
dbhandle = connect()
cc = dbhandle.cursor()
statement = '''
SELECT i.gene_id,
COUNT(DISTINCT map.locus_id) as nsnps,
COUNT(DISTINCT case t.prediction when 'possiblydamaging' then map.locus_id when 'probablydamaging' then map.locus_id else NULL end) AS ndeleterious,
MAX(s.length)
FROM %(table)s as t,
%(tablename_map)s as map,
annotations.protein_stats as s,
annotations.transcript_info as i
WHERE map.snp_id = t.snp_id AND
i.transcript_id = map.transcript_id AND
s.protein_id = map.protein_id
GROUP BY i.gene_id
''' % locals()
data = cc.execute(statement).fetchall()
statement = '''SELECT DISTINCT i.gene_id, MAX(s.length)
FROM annotations.transcript_info AS i, annotations.protein_stats AS s
WHERE s.protein_id = i.protein_id
GROUP BY i.gene_id'''
gene_ids = cc.execute(statement).fetchall()
total_nsnps = sum([x[1] for x in data])
total_ndel = sum([x[2] for x in data])
total_length = sum([x[1] for x in gene_ids])
del_p = float(total_ndel) / total_nsnps
len_p = float(total_nsnps) / total_length
com_p = float(total_ndel) / total_length
E.info("del: background probability: %i/%i = %f" %
(total_ndel, total_nsnps, del_p))
E.info("len: background probability: %i/%i = %f" %
(total_nsnps, total_length, len_p))
E.info("com: background probability: %i/%i = %f" %
(total_ndel, total_length, com_p))
outf = open(outfile, "w")
outf.write("\t".join(("gene_id", "code",
"length", "nsnps", "ndel",
"del_p", "del_pvalue", "del_qvalue",
"len_p", "len_pvalue", "len_qvalue",
"com_p", "com_pvalue", "com_qvalue", )) + "\n")
del_pvalues, len_pvalues, com_pvalues = [], [], []
for gene_id, nsnps, ndel, length in data:
# use -1, because I need P( x >= X)
# sf = 1 - cdf and cdf = P( x <= X ), thus sf = 1 - P( x <= X ) = P (x
# > X ).
del_pvalues.append(scipy.stats.binom.sf(ndel - 1, nsnps, del_p))
len_pvalues.append(
scipy.stats.binom.sf(nsnps - 1, int(round(length)), len_p))
com_pvalues.append(
scipy.stats.binom.sf(ndel - 1, int(round(length)), com_p))
if len(del_pvalues) > 10:
del_qvalues = Stats.doFDR(del_pvalues).mQValues
else:
E.warn("no FDR computed for del")
del_qvalues = del_pvalues
if len(len_pvalues) > 10:
len_qvalues = Stats.doFDR(len_pvalues).mQValues
else:
E.warn("no FDR computed for del")
len_qvalues = len_pvalues
if len(com_pvalues) > 10:
com_q = Stats.doFDR(com_pvalues).mQValues
else:
E.warn("no FDR computed for com")
com_qvalues = com_pvalues
fdr = PARAMS["polyphen_fdr"]
found = set()
for a, del_pvalue, del_qvalue, len_pvalue, len_qvalue, com_pvalue, com_qvalue in \
zip(data,
del_pvalues, del_qvalues,
len_pvalues, len_qvalues,
com_pvalues, com_qvalues,
):
gene_id, nsnps, ndel, length = a
#.........這裏部分代碼省略.........
示例5: loadGOs
# 需要導入模塊: from CGAT import Stats [as 別名]
# 或者: from CGAT.Stats import doFDR [as 別名]
def loadGOs(infiles, outfile, tablename):
'''import GO results into a single table.
This method also computes a global QValue over all
tracks, genesets and annotation sets.
Arguments
---------
infiles : string
Output files of several runGO analyses
outfile : string
Output filename, contains log information
tablename : string
Table name for storing results.
'''
header = False
tempf1 = P.getTempFile()
pvalues = []
for infile in infiles:
indir = infile + ".dir"
if not os.path.exists(indir):
continue
track, geneset, annotationset = re.search(
"^(\S+)_vs_(\S+)\.(\S+)", infile).groups()
for filename in glob.glob(os.path.join(indir, "*.overall")):
for line in open(filename, "r"):
if line.startswith("#"):
continue
data = line[:-1].split("\t")
if line.startswith("code"):
if header:
continue
tempf1.write("track\tgeneset\tannotationset\t%s" % line)
header = True
assert data[10] == "pover" and data[
11] == "punder", "format error, expected pover-punder, got %s-%s" % (data[10], data[11])
continue
tempf1.write("%s\t%s\t%s\t%s" %
(track, geneset, annotationset, line))
pvalues.append(min(float(data[10]), float(data[11])))
tempf1.close()
E.info("analysing %i pvalues" % len(pvalues))
fdr = Stats.doFDR(pvalues)
E.info("got %i qvalues" % len(fdr.mQValues))
qvalues = ["global_qvalue"] + fdr.mQValues
tempf2 = P.getTempFile()
for line, qvalue in zip(open(tempf1.name, "r"), qvalues):
tempf2.write("%s\t%s\n" % (line[:-1], str(qvalue)))
tempf2.close()
P.load(tempf2.name, outfile,
tablename=tablename,
options="--allow-empty-file "
"--add-index=category "
"--add-index=track,geneset,annotationset "
"--add-index=geneset "
"--add-index=annotationset "
"--add-index=goid ")
os.unlink(tempf1.name)
os.unlink(tempf2.name)
示例6: main
# 需要導入模塊: from CGAT import Stats [as 別名]
# 或者: from CGAT.Stats import doFDR [as 別名]
#.........這裏部分代碼省略.........
"%6.4f" % r.score,
"%6.4f" % r.zscore,
"%6.4e" % r.pvalue,
"%6.4e" % r.qvalue,
alternatives) ) )
if options.add_sequence:
s = masked_seqs[int(r.id)][r.start:r.end]
if r.strand == "-": s = Genomics.complement( s )
s = s[:6].upper() + s[6:-6].lower() + s[-6:].upper()
options.stdout.write( "\t%s" % s )
options.stdout.write("\n")
noutput += 1
# output stats
if options.output_stats:
outfile = E.openOutputFile( "fdr" )
outfile.write("bin\thist\tnobserved\n" )
for bin, hist, nobs in zip(matcher.bin_edges, matcher.hist, matcher.nobservations):
outfile.write( "%f\t%f\t%f\n" % (bin, hist, nobs))
outfile.close()
elif options.fdr_control == "xall":
matcher = Nubiscan.MatcherRandomisationSequence( sense_matrix,
samples = options.iterations )
# collect all results
matches = []
for seq in FastaIterator.iterate(options.stdin):
ninput += 1
mm = matcher.run( seq.sequence,
options.arrangements,
qvalue_threshold = None )
for m in mm:
matches.append( m._replace( sequence = seq.title ) )
# estimate qvalues for all matches across all sequences
pvalues = [ x.pvalue for x in matches ]
fdr = Stats.doFDR( pvalues )
qvalues = fdr.mQValues
results = []
for m, qvalue in zip(matches, qvalues):
if qvalue > options.qvalue_threshold: continue
results.append( m._replace( qvalue = qvalue ) )
if options.combine:
results = Nubiscan.combineMotifs( results )
# output
for r in results:
options.stdout.write( "\t".join( (
r.id,
"%i" % r.start,
"%i" % r.end,
r.strand,
r.arrangement,
"%6.4f" % r.score,
"%6.4f" % r.zscore,
"%6.4e" % r.pvalue,
"%6.4e" % r.qvalue ) ) + "\n" )
noutput += 1
elif options.fdr_control == "per-sequence":
matcher = Nubiscan.MatcherRandomisationSequence( sense_matrix,
samples = options.iterations )
for seq in FastaIterator.iterate(options.stdin):
ninput += 1
result = matcher.run( seq.sequence,
options.arrangements,
qvalue_threshold = options.qvalue_threshold )
if options.combine:
result = Nubiscan.combineMotifs( result )
t = re.sub(" .*","", seq.title)
for r in result:
options.stdout.write( "\t".join( (
t,
"%i" % r.start,
"%i" % r.end,
r.strand,
r.arrangement,
"%6.4f" % r.score,
"%6.4f" % r.zscore,
"%f" % r.pvalue,
"%f" % r.qvalue ) ) + "\n" )
noutput += 1
E.info( "ninput=%i, noutput=%i, nskipped=%i" % (ninput, noutput,nskipped) )
## write footer and output benchmark information.
E.Stop()
示例7: doFDR
# 需要導入模塊: from CGAT import Stats [as 別名]
# 或者: from CGAT.Stats import doFDR [as 別名]
def doFDR(options, args):
# read input
annotators = []
for filename in args:
infile = open(filename, "r")
annotators.append(readAnnotator(infile))
infile.close()
do_filter = options.fdr_qvalue is not None
extra_headers = set()
for data, fdr, synonyms, input_files in annotators:
for key, value in input_files.iteritems():
extra_headers.add(key)
extra_headers = sorted(list(extra_headers))
# note: id used to be file
options.stdout.write("id\tover\tcategory\tpvalue\tfold\tobserved\texpected\tci95low\tci95high\tstddev\tfdr\tqvalue\t%s\n" %
"\t".join(extra_headers))
# apply filters and create diagnostic plots
for filename, vv in zip(args, annotators):
data, fdr, synonyms, input_files = vv
ninput = len(data)
E.info("processing %s with %i data points" % (filename, ninput))
no_fdr = False
if options.fdr_method in ("annotator", "annotator-estimate"):
pvalues = fdr.keys()
pvalues.sort()
pvalues.reverse()
for pvalue in pvalues:
try:
d = fdr[pvalue]["Significant"]
except KeyError:
continue
if d.mObserved == 0:
E.info("no data after fdr")
break
elif d.mAverage / d.mObserved < options.fdr_qvalue:
E.info("filtering with P-value of %f" % pvalue)
if do_filter:
data = [x for x in data if x.mPValue < pvalue]
for d in data:
if d.mPValue < pvalue:
d.mFDR = 1
d.mQValue = options.fdr_qvalue
break
else:
E.warn("fdr could not be computed - compute more samples (at P = %f, actual fdr=%f)" %
(pvalue, d.mAverage / d.mObserved))
no_fdr = True
if options.fdr_method == "estimate" or (options.fdr_method == "annotator-estimate" and no_fdr):
E.info("estimating FDR from observed P-Values")
pvalues = [x.mPValue for x in data]
vlambda = numpy.arange(0, max(pvalues), 0.05)
try:
qvalues = Stats.doFDR(
pvalues, vlambda=vlambda, fdr_level=options.fdr_qvalue)
except ValueError, msg:
E.warn("fdr could not be computed - no output: %s" % msg)
no_fdr = True
else:
for d, p, q in zip(data, qvalues.mPassed, qvalues.mQValues):
if p:
d.mFDR = 1
d.mQValue = q
if do_filter:
data = [x[0] for x in zip(data, qvalues.mPassed) if x[1]]
if do_filter and no_fdr:
data = []
nremoved = ninput - len(data)
E.info("%s: %i data points left, %i removed" %
(filename, len(data), nremoved))
extra_values = []
for key in extra_headers:
if key in input_files:
extra_values.append(input_files[key])
else:
extra_values.append("")
extra_values = "\t".join(map(str, extra_values))
for d in data:
if d.mFoldChange < 1:
code = "-"
else:
code = "+"
#.........這裏部分代碼省略.........
示例8: Collect
# 需要導入模塊: from CGAT import Stats [as 別名]
# 或者: from CGAT.Stats import doFDR [as 別名]
#.........這裏部分代碼省略.........
annotator_fdr[annotator_level][dd[0]] = d
continue
else:
if line[0] == "Z":
continue # skip header
if len(line[:-1].split('\t')) != 9:
continue # HACK: accounts for a bug in Annotator output
try:
(z, percentchange, pvalue, observed, expected, low95,
up95, stddev, description) = line[:-1].split('\t')[:9]
except ValueError:
raise ValueError("# parsing error in line: %s" % line[:-1])
d = DataPoint()
d.mAnnotation = description
d.mPValue = float(pvalue)
d.mFoldChange = 1.0 + float(percentchange) / 100.0
data.append(d)
else:
for line in lines:
try:
(code, goid, scount, stotal, spercent, bcount, btotal, bpercent, ratio,
pover, punder, goid, category, description) = line[:-1].split("\t")[:14]
except ValueError:
raise ValueError("# parsing error in line: %s" % line[:-1])
if code == "+":
p = pover
else:
p = punder
d = DataPoint()
d.mAnnotation = description
d.mPValue = float(p)
d.mFoldChange = float(spercent) / float(bpercent)
data.append(d)
# apply filters
for c in delims:
for d in data:
d.mAnnotation = d.mAnnotation.split(c)[0]
for c in ignore:
for d in data:
d.mAnnotation = d.mAnnotation.replace(c, '')
ninput = len(data)
no_fdr = False
# apply filters
if ninput > 0:
if max_qvalue is not None:
if use_annotator_fdr:
pvalues = list(annotator_fdr.keys())
pvalues.sort()
pvalues.reverse()
for pvalue in pvalues:
try:
d = annotator_fdr[pvalue]["Significant"]
except KeyError:
continue
if d.mObserved == 0:
E.info("no data remaining after fdr filtering")
data = []
break
elif d.mAverage / d.mObserved < max_qvalue:
E.info("filtering with P-value of %f" % pvalue)
data = [x for x in data if x.mPValue < pvalue]
break
else:
E.warn("fdr could not be computed - compute more "
"samples (at P = %f, actual fdr=%f)" %
(pvalue, d.mAverage / d.mObserved))
no_fdr = True
if no_fdr:
if use_annotator_fdr:
E.info("estimating FDR from observed P-Values")
pvalues = [x.mPValue for x in data]
vlambda = numpy.arange(0, max(pvalues), 0.05)
try:
qvalues = Stats.doFDR(
pvalues, vlambda=vlambda, fdr_level=max_qvalue)
except ValueError as msg:
E.warn(
"fdr could not be computed - no filtering: %s" % msg)
no_fdr = True
else:
data = [x[0] for x in zip(data, qvalues.mPassed) if x[1]]
elif max_pvalue is not None:
data = [x for x in data if x.mPValue < max_pvalue]
if no_fdr:
data = []
nremoved = ninput - len(data)
return data, nremoved, no_fdr
示例9: pairwiseGOEnrichment
# 需要導入模塊: from CGAT import Stats [as 別名]
# 或者: from CGAT.Stats import doFDR [as 別名]
#.........這裏部分代碼省略.........
shared = x_go_categories.intersection(y_go_categories)
c = E.Counter()
for category in shared:
c.shared += 1
xx = genelist1[category]
yy = genelist2[category]
# discard all tests with few observations in the observed
# counts
if xx.mSampleCountsCategory < min_observed_counts and yy.mSampleCountsCategory < min_observed_counts:
c.skipped += 1
continue
observed = (xx.mSampleCountsCategory, yy.mSampleCountsCategory)
aa, bb, cc, dd = \
(xx.mSampleCountsCategory,
yy.mSampleCountsCategory,
xx.mSampleCountsTotal - xx.mSampleCountsCategory,
yy.mSampleCountsTotal - yy.mSampleCountsCategory)
if cc == dd == 0:
c.skipped += 1
continue
c.tested += 1
fisher, pvalue = scipy.stats.fisher_exact(numpy.array(
((aa, bb),
(cc, dd))))
if pvalue < 0.05:
c.significant_pvalue += 1
else:
c.insignificant_pvalue += 1
results.append(PairResult._make((category,
labels[x],
labels[y],
xx.mSampleCountsCategory,
xx.mSampleCountsTotal,
xx.mPValue,
xx.mQValue,
yy.mSampleCountsCategory,
yy.mSampleCountsTotal,
yy.mPValue,
yy.mQValue,
pvalue,
1.0,
go2info[category].mDescription)))
outfile.write("\t".join(map(str,
(labels[x], labels[y],
len(x_go_categories),
len(y_go_categories),
c.shared,
c.skipped,
c.tested,
c.significant_pvalue,
c.insignicant_pvalue))) + "\n")
if options.output_filename_pattern:
outfile.close()
if options.fdr:
pvalues = [x.pvalue for x in results]
if options.qvalue_method == "storey":
# compute fdr via Storey's method
try:
fdr_data = Stats.doFDR(pvalues)
except ValueError as msg:
E.warn("failure in q-value computation: %s" % msg)
E.warn("reverting to Bonferroni correction")
method = "bonf"
fdr_data = Stats.FDRResult()
l = float(len(pvalues))
fdr_data.mQValues = [min(1.0, x * l) for x in pvalues]
qvalues = fdr_data.mQValues
else:
qvalues = R['p.adjust'](pvalues, method=options.qvalue_method)
# update qvalues
results = [x._replace(qvalue=y) for x, y in zip(results, qvalues)]
outfile = getFileName(options,
go=test_ontology,
section='pairs',
set="pairs")
outfile.write("\t".join(PairResult._fields) + "\n")
for result in results:
outfile.write("\t".join(map(str, result)) + "\n")
if options.output_filename_pattern:
outfile.close()
示例10: computeFDRs
# 需要導入模塊: from CGAT import Stats [as 別名]
# 或者: from CGAT.Stats import doFDR [as 別名]
def computeFDRs(go_results,
foreground,
background,
options,
test_ontology,
gene2go,
go2info):
pairs = sorted(go_results.mResults.items())
E.info("calculating the FDRs using method `%s`" % options.qvalue_method)
samples = None
observed_min_pvalues = [min(x[1].mProbabilityOverRepresentation,
x[1].mProbabilityUnderRepresentation) for x in pairs]
fdrs = {}
method = options.qvalue_method
if options.qvalue_method == "storey":
# compute fdr via Storey's method
try:
fdr_data = Stats.doFDR(observed_min_pvalues)
except ValueError as msg:
E.warn("failure in q-value computation: %s" % msg)
E.warn("reverting to Bonferroni correction")
method = "bonf"
fdr_data = Stats.FDRResult()
l = float(len(observed_min_pvalues))
fdr_data.mQValues = [min(1.0, x * l) for x in observed_min_pvalues]
for pair, qvalue in zip(pairs, fdr_data.mQValues):
fdrs[pair[0]] = (qvalue, 1.0, 1.0)
elif options.qvalue_method == "empirical":
assert options.sample > 0, "requiring a sample size of > 0"
#######################################################################
# sampling
# for each GO-category:
# get maximum and minimum counts in x samples -> calculate minimum/maximum significance
# get average and stdev counts in x samples -> calculate z-scores for
# test set
samples, simulation_min_pvalues = getSamples(gene2go,
foreground,
background,
options,
test_ontology,
go2info)
# compute P-values from sampling
observed_min_pvalues.sort()
observed_min_pvalues = numpy.array(observed_min_pvalues)
sample_size = options.sample
for k, v in pairs:
if k in samples:
s = samples[k]
else:
raise KeyError("category %s not in samples" % k)
# calculate values for z-score
if s.mStddev > 0:
zscore = abs(
float(v.mSampleCountsCategory) - s.mMean) / s.mStddev
else:
zscore = 0.0
#############################################################
# FDR:
# For each p-Value p at node n:
# a = average number of nodes in each simulation run with P-Value < p
# this can be obtained from the array of all p-values and all nodes
# simply divided by the number of samples.
# aka: expfpos=experimental false positive rate
# b = number of nodes in observed data, that have a P-Value of less than p.
# aka: pos=positives in observed data
# fdr = a/b
pvalue = v.mPValue
# calculate values for FDR:
# nfdr = number of entries with P-Value better than node.
a = 0
while a < len(simulation_min_pvalues) and \
simulation_min_pvalues[a] < pvalue:
a += 1
a = float(a) / float(sample_size)
b = 0
while b < len(observed_min_pvalues) and \
observed_min_pvalues[b] < pvalue:
b += 1
if b > 0:
fdr = min(1.0, float(a) / float(b))
#.........這裏部分代碼省略.........
示例11: loadGOs
# 需要導入模塊: from CGAT import Stats [as 別名]
# 或者: from CGAT.Stats import doFDR [as 別名]
def loadGOs( infiles, outfile, tablename ):
'''import GO results into a single table.
This method also computes a global QValue over all
tracks, genesets and annotation sets.
'''
header = False
tempf1 = P.getTempFile()
pvalues = []
for infile in infiles:
indir = infile + ".dir"
if not os.path.exists( indir ):
continue
track, geneset, annotationset = re.search("^(\S+)_vs_(\S+)\.(\S+)", infile ).groups()
for filename in glob.glob( os.path.join(indir, "*.overall") ):
for line in open(filename, "r" ):
if line.startswith("#"): continue
data = line[:-1].split("\t")
if line.startswith("code"):
if header: continue
tempf1.write( "track\tgeneset\tannotationset\t%s" % line )
header = True
assert data[10] == "pover" and data[11] == "punder", "format error, expected pover-punder, got %s-%s" % (data[10], data[11])
continue
tempf1.write( "%s\t%s\t%s\t%s" % (track, geneset, annotationset, line) )
pvalues.append( min( float(data[10]), float(data[11]) ) )
tempf1.close()
E.info( "analysing %i pvalues" % len(pvalues ))
fdr = Stats.doFDR( pvalues )
E.info( "got %i qvalues" % len(fdr.mQValues ))
qvalues = ["global_qvalue" ] + fdr.mQValues
tempf2 = P.getTempFile()
for line, qvalue in zip( open(tempf1.name,"r"), qvalues ):
tempf2.write( "%s\t%s\n" % (line[:-1], str(qvalue)) )
tempf2.close()
tempfilename = tempf2.name
print tempf1.name
print tempf2.name
statement = '''
python %(scriptsdir)s/csv2db.py %(csv2db_options)s
--allow-empty
--index=category
--index=track,geneset,annotationset
--index=geneset
--index=annotationset
--index=goid
--table=%(tablename)s
< %(tempfilename)s
> %(outfile)s
'''
P.run()