本文整理汇总了Python中IOTools.openFile方法的典型用法代码示例。如果您正苦于以下问题:Python IOTools.openFile方法的具体用法?Python IOTools.openFile怎么用?Python IOTools.openFile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类IOTools
的用法示例。
在下文中一共展示了IOTools.openFile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: buildPFAMDomains
# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def buildPFAMDomains( infiles, outfile ):
'''map PFAM domains onto current sequence collection.
The mapping is done by ID lookup.'''
infile = infiles[0]
with IOTools.openFile( "nrdb50.fasta.tsv") as inf:
reader = csv.DictReader( inf, dialect='excel-tab' )
map_id2nid = {}
for row in reader:
map_id2nid[row['repid']] = row['nid']
rx = re.compile( "(\S+)\/(\d+)-(\d+)\s+(\S+);(.*);" )
c = E.Counter()
outf = IOTools.openFile( outfile, "w" )
with IOTools.openFile( infile ) as inf:
for entry in FastaIterator.iterate( inf ):
c.input += 1
pid, start, end, pfam_id, description = rx.match( entry.title ).groups()
try:
outf.write( "%s\t%i\t%i\t%s\n" % (map_id2nid[pid], int(start)-1, int(end), pfam_id ) )
except KeyError:
c.missed += 1
continue
c.output += 1
outf.close()
E.info( c )
示例2: __call__
# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def __call__(self, track, slice = None):
c_transcript = []
c_gene = []
for transcript in GTF.transcript_iterator(GTF.iterator(IOTools.openFile(self.getFilename(track)))):
c_transcript.append(len(transcript))
for gene in GTF.flat_gene_iterator(GTF.iterator(IOTools.openFile(self.getFilename(track)))):
c_gene.append(len(gene))
return odict( ( ("transcript", np.mean(c_transcript)), ("gene",np.mean(c_gene) )) )
示例3: __call__
# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def __call__(self, track, slice = None):
if slice == "transcript":
lengths_transcripts = []
for transcript in GTF.transcript_iterator(GTF.iterator(IOTools.openFile(self.getFilename(track)))):
length = sum([gtf.end - gtf.start for gtf in transcript])
lengths_transcripts.append(length)
return np.mean(lengths_transcripts)
elif slice == "gene":
lengths_genes = []
for gene in GTF.flat_gene_iterator(GTF.iterator(IOTools.openFile(self.getFilename(track)))):
length = sum([gtf.end - gtf.start for gtf in gene])
lengths_genes.append(length)
return np.mean(lengths_genes)
示例4: buildSummaryCpGCoverage
# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def buildSummaryCpGCoverage(infiles, outfile):
'''build summary of differentially methylated regions.'''
dbh = connect()
cc = dbh.cursor()
outf = IOTools.openFile(outfile, "w")
outf.write("metatrack\ttrack\tcoverage\tncovered\tpcovered\n")
for track in TRACKS:
tables = [x[0] for x in cc.execute( """SELECT name FROM medip_%s.sqlite_master
WHERE type='table' and name LIKE '%%coveredpos%%' """ % track
).fetchall()]
for table in tables:
statement = """SELECT '%(track)s' as metatrack,
'%(table)s' as track,
coverage, ncovered, pcovered FROM medip_%(track)s.%(table)s"""
for x in cc.execute(statement % locals()):
outf.write("\t".join(map(str, x)) + "\n")
outf.close()
示例5: buildSummaryCalledDMRs
# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def buildSummaryCalledDMRs(infiles, outfile):
'''build summary of differentially methylated regions.'''
dbh = connect()
cc = dbh.cursor()
outf = IOTools.openFile(outfile, "w")
outf.write("metatrack\ttest\tntested\tnok\tnsignificant\tn2fold\n")
for track in TRACKS:
tables = [x[0] for x in cc.execute( """SELECT name FROM medip_%s.sqlite_master
WHERE type='table' and sql LIKE '%%control_mean%%' and sql LIKE '%%treatment_mean%%'""" % track
).fetchall()]
for table in tables:
statement = """SELECT
COUNT(*) as ntested,
SUM(CASE WHEN status='OK' THEN 1 ELSE 0 END) AS nok,
SUM(CASE WHEN significant THEN 1 ELSE 0 END) AS nsignificant,
SUM(CASE WHEN significant AND (l2fold < -1 OR l2fold > 1) THEN 1 ELSE 0 END) as n2fold
FROM medip_%(track)s.%(table)s"""
ntested, nok, nsignificant, n2fold = cc.execute(
statement % locals()).fetchone()
outf.write(
"\t".join(map(str, (track, table, ntested, nok, nsignificant, n2fold))) + "\n")
outf.close()
示例6: buildSummaryMapping
# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def buildSummaryMapping(infiles, outfile):
dbh = connect()
cc = dbh.cursor()
outf = IOTools.openFile(outfile, "w")
table = "bam_stats"
colnames = None
for track in TRACKS:
statement = """SELECT *
FROM medip_%(track)s.%(table)s"""
data = cc.execute(statement % locals()).fetchall()
_colnames = [x[0] for x in cc.description]
if not colnames:
colnames = _colnames
outf.write("\t".join(["metatrack"] + colnames,) + "\n")
assert colnames == _colnames
for row in data:
outf.write("\t".join(map(str, (track,) + row)) + "\n")
outf.close()
示例7: __call__
# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def __call__(self, track, slice=None):
fn = "ortholog_pairs_with_feature.matrix2"
if not os.path.exists(fn):
return
x = IOTools.openFile(fn)
matrix, rownames, colnames = IOTools.readMatrix(x)
return odict((("matrix", matrix), ("rows", rownames), ("columns", colnames)))
示例8: getReferenceLincRNA
# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def getReferenceLincRNA(self, reference_gtf):
lincs = []
for entry in GTF.iterator(IOTools.openFile(reference_gtf)):
if entry.source == "lincRNA":
if entry.gene_id not in lincs:
lincs.append(entry.gene_id)
return len(lincs)
示例9: checkBlastRuns
# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def checkBlastRuns( infiles, outfile ):
'''check if output files are complete.
'''
outf = IOTools.openFile( outfile, "w" )
outf.write( "chunkid\tquery_first\tquery_last\tfound_first\tfound_last\tfound_total\tfound_results\thas_finished\tattempts\t%s\n" %\
"\t".join(Logfile.RuntimeInformation._fields))
for infile in infiles:
E.debug( "processing %s" % infile)
chunkid = P.snip( os.path.basename( infile ), ".blast.gz" )
logfile = infile + ".log"
chunkfile = P.snip( infile, ".blast.gz" ) + ".fasta"
with IOTools.openFile( infile ) as inf:
l = inf.readline()
ids = set()
total_results = 0
for l in inf:
if l.startswith("#//"): continue
ids.add( int(l.split("\t")[0] ) )
total_results += 1
found_first = min(ids)
found_last = max(ids)
found_total = len(ids)
l = IOTools.getFirstLine( chunkfile )
query_first = l[1:-1]
l2 = IOTools.getLastLine( chunkfile, nlines = 2).split("\n")
query_last = l2[0][1:]
logresults = Logfile.parse( logfile )
outf.write( "\t".join( map(str, (\
chunkid, query_first, query_last,
found_first, found_last,
found_total, total_results,
logresults[-1].has_finished,
len(logresults),
"\t".join( map(str, logresults[-1]) ) ) ) ) + "\n" )
outf.close()
示例10: buildNrdb50
# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def buildNrdb50( infile, outfile ):
'''build nrdb50
Renumber seqences.'''
outf_fasta = IOTools.openFile( outfile, "w" )
outf_table = IOTools.openFile( outfile + ".tsv", "w" )
outf_table.write("nid\tpid\thid\tdescription\tcluster_size\ttaxon\trepid\n" )
rx = re.compile( "(\S+) (.*) n=(\d+) Tax=(.*) RepID=(\S+)" )
nid = 1
for entry in FastaIterator.iterate( IOTools.openFile( infile )):
outf_fasta.write(">%i\n%s\n" % (nid, entry.sequence ) )
cluster_name, description, cluster_size, taxon, repid = rx.match( entry.title ).groups()
hid = computeHID( entry.sequence )
outf_table.write( "\t".join( (str(nid), cluster_name, hid, description, cluster_size, taxon, repid)) + "\n" )
nid += 1
outf_fasta.close()
outf_table.close()
示例11: getNumColumns
# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def getNumColumns( filename ):
'''return number of fields in bed-file by looking at the first
entry.
Returns 0 if file is empty.
'''
with IOTools.openFile( filename ) as inf:
for line in inf:
if line.startswith("#"): continue
if line.startswith("track"): continue
return len(line[:-1].split("\t"))
return 0
示例12: checkBlastRun
# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def checkBlastRun( infiles, outfile ):
'''build summary stats on file.'''
pairsdbfile, seqfile = infiles
nids = set()
with IOTools.openFile( seqfile ) as inf:
for r in FastaIterator.iterate( inf ):
nids.add( int(r.title) )
with IOTools.openFile( pairsdbfile ) as inf:
query_ids, sbjct_ids = set(), set()
total_results, self_links = 0, 0
for l in inf:
l = inf.readline()
if l.startswith("#//"): continue
query_id, sbjct_id = l.split("\t")[:2]
query_ids.add( int(query_id) )
sbjct_ids.add( int(sbjct_id) )
if query_id == sbjct_id: self_links += 1
total_results += 1
outf = IOTools.openFile( outfile, "w" )
outf.write( "category\tcounts\n")
outf.write( "\t".join( map(str, ('nids', len(nids)))) + "\n" )
outf.write( "\t".join( map(str, ('links', total_results))) + "\n" )
outf.write( "\t".join( map(str, ('self', self_links))) + "\n" )
outf.write( "\t".join( map(str, ('queries', len(query_ids)))) + "\n" )
outf.write( "\t".join( map(str, ('sbjcts', len(sbjct_ids)))) + "\n" )
outf.close()
outf = IOTools.openFile( outfile + '.missing_queries.gz', 'w' )
outf.write( 'nid\n' )
outf.write( "\n".join( map(str, sorted( list( nids.difference( query_ids )) ) )) + "\n" )
outf.close()
outf = IOTools.openFile( outfile + '.missing_sbjcts.gz', 'w' )
outf.write( 'nid\n' )
outf.write( "\n".join( map(str, sorted( list( nids.difference( sbjct_ids )) ) )) + "\n" )
outf.close()
示例13: collectGenomeSizes
# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def collectGenomeSizes(infile, outfile):
'''
output the genome sizes for each genome
'''
to_cluster = True
outf = open(outfile, "w")
outf.write("genome\tlength\n")
# assume single fasta entry
for fasta in FastaIterator.iterate(IOTools.openFile(infile)):
name = P.snip(os.path.basename(infile), ".fna")
length = len(list(fasta.sequence))
outf.write("%s\t%s\n" % (name, str(length)))
outf.close()
示例14: buildPFAMFamilies
# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def buildPFAMFamilies( infiles, outfile ):
outf = IOTools.openFile( outfile, "w" )
outf.write( "family\tshort\tdescription\n" )
infile = infiles[1]
family, description, short = None, None, None
c = E.Counter()
with IOTools.openFile( infile ) as inf:
for line in inf:
if line.startswith( "#=GF AC"):
if family:
outf.write( "%s\n" % "\t".join( (family,description,short)))
c.output += 1
family = re.match("#=GF AC\s+(\S+)", line[:-1]).groups()[0]
elif line.startswith( "#=GF DE"):
description = re.match("#=GF DE\s+(.+)",line[:-1]).groups()[0]
elif line.startswith( "#=GF ID"):
short = re.match("#=GF ID\s+(.+)",line[:-1]).groups()[0]
outf.write( "%s\n" % "\t".join( (family,description,short)))
c.outptut += 1
outf.close()
E.info(c)
示例15: buildAlignmentSizes
# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def buildAlignmentSizes(infiles, outfile):
'''
use bed files to sum the total number of bases
that are aligned to the genomes
'''
outf = open(outfile, "w")
outf.write("genome\tsize\n")
for infile in infiles:
genome = P.snip(os.path.basename(infile), ".bed.gz")
c = 0
inf = IOTools.openFile(infile)
for bed in Bed.iterator(inf):
c += bed.end - bed.start
outf.write("%s\t%s\n" % (genome, str(c)))
outf.close()