本文整理汇总了Python中IOTools类的典型用法代码示例。如果您正苦于以下问题:Python IOTools类的具体用法?Python IOTools怎么用?Python IOTools使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了IOTools类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: buildPFAMDomains
def buildPFAMDomains( infiles, outfile ):
'''map PFAM domains onto current sequence collection.
The mapping is done by ID lookup.'''
infile = infiles[0]
with IOTools.openFile( "nrdb50.fasta.tsv") as inf:
reader = csv.DictReader( inf, dialect='excel-tab' )
map_id2nid = {}
for row in reader:
map_id2nid[row['repid']] = row['nid']
rx = re.compile( "(\S+)\/(\d+)-(\d+)\s+(\S+);(.*);" )
c = E.Counter()
outf = IOTools.openFile( outfile, "w" )
with IOTools.openFile( infile ) as inf:
for entry in FastaIterator.iterate( inf ):
c.input += 1
pid, start, end, pfam_id, description = rx.match( entry.title ).groups()
try:
outf.write( "%s\t%i\t%i\t%s\n" % (map_id2nid[pid], int(start)-1, int(end), pfam_id ) )
except KeyError:
c.missed += 1
continue
c.output += 1
outf.close()
E.info( c )
示例2: __call__
def __call__(self, track, slice=None):
fn = "ortholog_pairs_with_feature.matrix2"
if not os.path.exists(fn):
return
x = IOTools.openFile(fn)
matrix, rownames, colnames = IOTools.readMatrix(x)
return odict((("matrix", matrix), ("rows", rownames), ("columns", colnames)))
示例3: __call__
def __call__(self, track, slice = None):
c_transcript = []
c_gene = []
for transcript in GTF.transcript_iterator(GTF.iterator(IOTools.openFile(self.getFilename(track)))):
c_transcript.append(len(transcript))
for gene in GTF.flat_gene_iterator(GTF.iterator(IOTools.openFile(self.getFilename(track)))):
c_gene.append(len(gene))
return odict( ( ("transcript", np.mean(c_transcript)), ("gene",np.mean(c_gene) )) )
示例4: configToDictionary
def configToDictionary( config ):
p = {}
for section in config.sections():
for key,value in config.items( section ):
v = IOTools.convertValue( value )
p["%s_%s" % (section,key)] = v
if section in ( "general", "DEFAULT" ):
p["%s" % (key)] = v
for key, value in config.defaults().iteritems():
p["%s" % (key)] = IOTools.convertValue( value )
return p
示例5: __call__
def __call__(self, track, slice = None):
if slice == "transcript":
lengths_transcripts = []
for transcript in GTF.transcript_iterator(GTF.iterator(IOTools.openFile(self.getFilename(track)))):
length = sum([gtf.end - gtf.start for gtf in transcript])
lengths_transcripts.append(length)
return np.mean(lengths_transcripts)
elif slice == "gene":
lengths_genes = []
for gene in GTF.flat_gene_iterator(GTF.iterator(IOTools.openFile(self.getFilename(track)))):
length = sum([gtf.end - gtf.start for gtf in gene])
lengths_genes.append(length)
return np.mean(lengths_genes)
示例6: buildSummaryMapping
def buildSummaryMapping(infiles, outfile):
dbh = connect()
cc = dbh.cursor()
outf = IOTools.openFile(outfile, "w")
table = "bam_stats"
colnames = None
for track in TRACKS:
statement = """SELECT *
FROM medip_%(track)s.%(table)s"""
data = cc.execute(statement % locals()).fetchall()
_colnames = [x[0] for x in cc.description]
if not colnames:
colnames = _colnames
outf.write("\t".join(["metatrack"] + colnames,) + "\n")
assert colnames == _colnames
for row in data:
outf.write("\t".join(map(str, (track,) + row)) + "\n")
outf.close()
示例7: getParameters
def getParameters( filename = "pipeline.ini" ):
'''read a config file and return as a dictionary.
Sections and keys are combined with an underscore. If
a key without section does not exist, it will be added
plain.
For example::
[general]
input=input1.file
[special]
input=input2.file
will be entered as { 'general_input' : "input1.file",
'input: "input1.file", 'special_input' : "input2.file" }
This function also updates the module-wide parameter map.
'''
p = {}
config = ConfigParser.ConfigParser()
config.readfp(open(filename),"r")
for section in config.sections():
for key,value in config.items( section ):
v = IOTools.convertValue( value )
if key not in p: p[key] = v
p["%s_%s" % (section,key)] = v
PARAMS.update( p )
return p
示例8: buildSummaryCalledDMRs
def buildSummaryCalledDMRs(infiles, outfile):
'''build summary of differentially methylated regions.'''
dbh = connect()
cc = dbh.cursor()
outf = IOTools.openFile(outfile, "w")
outf.write("metatrack\ttest\tntested\tnok\tnsignificant\tn2fold\n")
for track in TRACKS:
tables = [x[0] for x in cc.execute( """SELECT name FROM medip_%s.sqlite_master
WHERE type='table' and sql LIKE '%%control_mean%%' and sql LIKE '%%treatment_mean%%'""" % track
).fetchall()]
for table in tables:
statement = """SELECT
COUNT(*) as ntested,
SUM(CASE WHEN status='OK' THEN 1 ELSE 0 END) AS nok,
SUM(CASE WHEN significant THEN 1 ELSE 0 END) AS nsignificant,
SUM(CASE WHEN significant AND (l2fold < -1 OR l2fold > 1) THEN 1 ELSE 0 END) as n2fold
FROM medip_%(track)s.%(table)s"""
ntested, nok, nsignificant, n2fold = cc.execute(
statement % locals()).fetchone()
outf.write(
"\t".join(map(str, (track, table, ntested, nok, nsignificant, n2fold))) + "\n")
outf.close()
示例9: buildSummaryCpGCoverage
def buildSummaryCpGCoverage(infiles, outfile):
'''build summary of differentially methylated regions.'''
dbh = connect()
cc = dbh.cursor()
outf = IOTools.openFile(outfile, "w")
outf.write("metatrack\ttrack\tcoverage\tncovered\tpcovered\n")
for track in TRACKS:
tables = [x[0] for x in cc.execute( """SELECT name FROM medip_%s.sqlite_master
WHERE type='table' and name LIKE '%%coveredpos%%' """ % track
).fetchall()]
for table in tables:
statement = """SELECT '%(track)s' as metatrack,
'%(table)s' as track,
coverage, ncovered, pcovered FROM medip_%(track)s.%(table)s"""
for x in cc.execute(statement % locals()):
outf.write("\t".join(map(str, x)) + "\n")
outf.close()
示例10: __call__
def __call__(self, track, slice = None):
fn = os.path.join( DATADIR, "%(track)s.peakshape.tsv.gz.matrix_%(slice)s.gz" % locals() )
if not os.path.exists( fn ):
return
matrix, rownames, colnames = IOTools.readMatrix( IOTools.openFile( fn ))
nrows = len(rownames)
if nrows == 0: return
if nrows > 1000:
take = numpy.array( numpy.floor( numpy.arange( 0, nrows, nrows / 1000 ) ), dtype = int )
rownames = [ rownames[x] for x in take ]
matrix = matrix[ take ]
return odict( (('matrix', matrix),
('rows', rownames),
('columns', colnames)) )
示例11: getReferenceLincRNA
def getReferenceLincRNA(self, reference_gtf):
lincs = []
for entry in GTF.iterator(IOTools.openFile(reference_gtf)):
if entry.source == "lincRNA":
if entry.gene_id not in lincs:
lincs.append(entry.gene_id)
return len(lincs)
示例12: checkBlastRuns
def checkBlastRuns( infiles, outfile ):
'''check if output files are complete.
'''
outf = IOTools.openFile( outfile, "w" )
outf.write( "chunkid\tquery_first\tquery_last\tfound_first\tfound_last\tfound_total\tfound_results\thas_finished\tattempts\t%s\n" %\
"\t".join(Logfile.RuntimeInformation._fields))
for infile in infiles:
E.debug( "processing %s" % infile)
chunkid = P.snip( os.path.basename( infile ), ".blast.gz" )
logfile = infile + ".log"
chunkfile = P.snip( infile, ".blast.gz" ) + ".fasta"
with IOTools.openFile( infile ) as inf:
l = inf.readline()
ids = set()
total_results = 0
for l in inf:
if l.startswith("#//"): continue
ids.add( int(l.split("\t")[0] ) )
total_results += 1
found_first = min(ids)
found_last = max(ids)
found_total = len(ids)
l = IOTools.getFirstLine( chunkfile )
query_first = l[1:-1]
l2 = IOTools.getLastLine( chunkfile, nlines = 2).split("\n")
query_last = l2[0][1:]
logresults = Logfile.parse( logfile )
outf.write( "\t".join( map(str, (\
chunkid, query_first, query_last,
found_first, found_last,
found_total, total_results,
logresults[-1].has_finished,
len(logresults),
"\t".join( map(str, logresults[-1]) ) ) ) ) + "\n" )
outf.close()
示例13: __call__
def __call__(self, track, slice = None):
pattern = self.pattern
fn = os.path.join( DATADIR, "liver_vs_testes/%(track)s%(pattern)s.matrix_%(slice)s.gz" % locals() )
if not os.path.exists( fn ):
return
x = IOTools.openFile( fn )
matrix, rownames, colnames = IOTools.readMatrix( x )
nrows = len(rownames)
if nrows == 0: return
if nrows > self.scale:
take = numpy.array( numpy.floor( numpy.arange( 0, nrows, float(nrows + 1) / self.scale ) ), dtype = int )
rownames = [ rownames[x] for x in take ]
matrix = matrix[ take ]
return odict( (('matrix', matrix),
('rows', rownames),
('columns', colnames)) )
示例14: getNumColumns
def getNumColumns( filename ):
'''return number of fields in bed-file by looking at the first
entry.
Returns 0 if file is empty.
'''
with IOTools.openFile( filename ) as inf:
for line in inf:
if line.startswith("#"): continue
if line.startswith("track"): continue
return len(line[:-1].split("\t"))
return 0
示例15: buildNrdb50
def buildNrdb50( infile, outfile ):
'''build nrdb50
Renumber seqences.'''
outf_fasta = IOTools.openFile( outfile, "w" )
outf_table = IOTools.openFile( outfile + ".tsv", "w" )
outf_table.write("nid\tpid\thid\tdescription\tcluster_size\ttaxon\trepid\n" )
rx = re.compile( "(\S+) (.*) n=(\d+) Tax=(.*) RepID=(\S+)" )
nid = 1
for entry in FastaIterator.iterate( IOTools.openFile( infile )):
outf_fasta.write(">%i\n%s\n" % (nid, entry.sequence ) )
cluster_name, description, cluster_size, taxon, repid = rx.match( entry.title ).groups()
hid = computeHID( entry.sequence )
outf_table.write( "\t".join( (str(nid), cluster_name, hid, description, cluster_size, taxon, repid)) + "\n" )
nid += 1
outf_fasta.close()
outf_table.close()