Python IOTools.openFile方法代码示例

本文整理汇总了Python中IOTools.openFile方法的典型用法代码示例。如果您正苦于以下问题:Python IOTools.openFile方法的具体用法?Python IOTools.openFile怎么用?Python IOTools.openFile使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在IOTools的用法示例。


示例1: buildPFAMDomains

# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def buildPFAMDomains( infiles, outfile ):
    '''map PFAM domains onto current sequence collection. 
    The mapping is done by ID lookup.'''
    infile = infiles[0]
    with IOTools.openFile( "nrdb50.fasta.tsv") as inf:

        reader = csv.DictReader( inf, dialect='excel-tab' )
        map_id2nid = {}
        for row in reader:
            map_id2nid[row['repid']] = row['nid']
    rx = re.compile( "(\S+)\/(\d+)-(\d+)\s+(\S+);(.*);" )

    c = E.Counter()
    outf = IOTools.openFile( outfile, "w" )
    with IOTools.openFile( infile ) as inf:
        for entry in FastaIterator.iterate( inf ):
            c.input += 1
            pid, start, end, pfam_id, description = rx.match( entry.title ).groups()
                outf.write( "%s\t%i\t%i\t%s\n" % (map_id2nid[pid], int(start)-1, int(end), pfam_id ) )
            except KeyError:
                c.missed += 1
            c.output += 1

    E.info( c )

示例2: __call__

# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
    def __call__(self, track, slice = None):
        c_transcript = []
        c_gene = []
        for transcript in GTF.transcript_iterator(GTF.iterator(IOTools.openFile(self.getFilename(track)))):
        for gene in GTF.flat_gene_iterator(GTF.iterator(IOTools.openFile(self.getFilename(track)))):

        return odict( ( ("transcript", np.mean(c_transcript)), ("gene",np.mean(c_gene) )) )

示例3: __call__

# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
 def __call__(self, track, slice = None):
     if slice == "transcript":
         lengths_transcripts = []
         for transcript in GTF.transcript_iterator(GTF.iterator(IOTools.openFile(self.getFilename(track)))):
             length = sum([gtf.end - gtf.start for gtf in transcript])
         return np.mean(lengths_transcripts)
     elif slice == "gene":
         lengths_genes = []
         for gene in GTF.flat_gene_iterator(GTF.iterator(IOTools.openFile(self.getFilename(track)))):
             length = sum([gtf.end - gtf.start for gtf in gene])
         return np.mean(lengths_genes)

示例4: buildSummaryCpGCoverage

# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def buildSummaryCpGCoverage(infiles, outfile):
    '''build summary of differentially methylated regions.'''

    dbh = connect()
    cc = dbh.cursor()

    outf = IOTools.openFile(outfile, "w")

    for track in TRACKS:

        tables = [x[0] for x in cc.execute( """SELECT name FROM medip_%s.sqlite_master 
            WHERE type='table' and name LIKE '%%coveredpos%%' """ % track

        for table in tables:

            statement = """SELECT '%(track)s' as metatrack,
                         '%(table)s' as track,
                         coverage, ncovered, pcovered FROM medip_%(track)s.%(table)s"""

            for x in cc.execute(statement % locals()):
                outf.write("\t".join(map(str, x)) + "\n")


示例5: buildSummaryCalledDMRs

# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def buildSummaryCalledDMRs(infiles, outfile):
    '''build summary of differentially methylated regions.'''

    dbh = connect()
    cc = dbh.cursor()

    outf = IOTools.openFile(outfile, "w")

    for track in TRACKS:
        tables = [x[0] for x in cc.execute( """SELECT name FROM medip_%s.sqlite_master 
            WHERE type='table' and sql LIKE '%%control_mean%%' and sql LIKE '%%treatment_mean%%'""" % track

        for table in tables:

            statement = """SELECT 
                         COUNT(*) as ntested, 
                         SUM(CASE WHEN status='OK' THEN 1 ELSE 0 END) AS nok, 
                         SUM(CASE WHEN significant THEN 1 ELSE 0 END) AS nsignificant, 
                         SUM(CASE WHEN significant AND (l2fold < -1 OR l2fold > 1) THEN 1 ELSE 0 END) as n2fold 
                         FROM medip_%(track)s.%(table)s"""

            ntested, nok, nsignificant, n2fold = cc.execute(
                statement % locals()).fetchone()

                "\t".join(map(str, (track, table, ntested, nok, nsignificant, n2fold))) + "\n")


示例6: buildSummaryMapping

# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def buildSummaryMapping(infiles, outfile):

    dbh = connect()
    cc = dbh.cursor()

    outf = IOTools.openFile(outfile, "w")

    table = "bam_stats"

    colnames = None
    for track in TRACKS:

        statement = """SELECT * 
                         FROM medip_%(track)s.%(table)s"""

        data = cc.execute(statement % locals()).fetchall()
        _colnames = [x[0] for x in cc.description]
        if not colnames:
            colnames = _colnames
            outf.write("\t".join(["metatrack"] + colnames,) + "\n")

        assert colnames == _colnames

        for row in data:
            outf.write("\t".join(map(str, (track,) + row)) + "\n")


示例7: __call__

# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
    def __call__(self, track, slice=None):
        fn = "ortholog_pairs_with_feature.matrix2"
        if not os.path.exists(fn):

        x = IOTools.openFile(fn)
        matrix, rownames, colnames = IOTools.readMatrix(x)
        return odict((("matrix", matrix), ("rows", rownames), ("columns", colnames)))

示例8: getReferenceLincRNA

# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
    def getReferenceLincRNA(self, reference_gtf):

        lincs = []
        for entry in GTF.iterator(IOTools.openFile(reference_gtf)):
            if entry.source == "lincRNA":
                if entry.gene_id not in lincs:
        return len(lincs)

示例9: checkBlastRuns

# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def checkBlastRuns( infiles, outfile ):
    '''check if output files are complete.
    outf = IOTools.openFile( outfile, "w" )

    outf.write( "chunkid\tquery_first\tquery_last\tfound_first\tfound_last\tfound_total\tfound_results\thas_finished\tattempts\t%s\n" %\

    for infile in infiles:
        E.debug( "processing %s" % infile)
        chunkid = P.snip( os.path.basename( infile ), ".blast.gz" )
        logfile = infile + ".log"
        chunkfile = P.snip( infile, ".blast.gz" ) + ".fasta"

        with IOTools.openFile( infile ) as inf:
            l = inf.readline()
            ids = set()
            total_results = 0
            for l in inf:
                if l.startswith("#//"): continue
                ids.add( int(l.split("\t")[0] ) )
                total_results += 1
            found_first = min(ids)
            found_last = max(ids)
            found_total = len(ids)

        l = IOTools.getFirstLine( chunkfile )
        query_first = l[1:-1]
        l2 = IOTools.getLastLine( chunkfile, nlines = 2).split("\n")
        query_last = l2[0][1:]

        logresults = Logfile.parse( logfile )
        outf.write( "\t".join( map(str, (\
                        chunkid, query_first, query_last,
                        found_first, found_last,
                        found_total, total_results,
                        "\t".join( map(str, logresults[-1]) ) ) ) ) + "\n" )

示例10: buildNrdb50

# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def buildNrdb50( infile, outfile ):
    '''build nrdb50
    Renumber seqences.'''
    outf_fasta = IOTools.openFile( outfile, "w" )
    outf_table = IOTools.openFile( outfile + ".tsv", "w" )
    outf_table.write("nid\tpid\thid\tdescription\tcluster_size\ttaxon\trepid\n" )

    rx = re.compile( "(\S+) (.*) n=(\d+) Tax=(.*) RepID=(\S+)" )

    nid = 1
    for entry in FastaIterator.iterate( IOTools.openFile( infile )):
        outf_fasta.write(">%i\n%s\n" % (nid, entry.sequence ) )
        cluster_name, description, cluster_size, taxon, repid = rx.match( entry.title ).groups()
        hid = computeHID( entry.sequence )
        outf_table.write( "\t".join( (str(nid), cluster_name, hid, description, cluster_size, taxon, repid)) + "\n" )
        nid += 1


示例11: getNumColumns

# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def getNumColumns( filename ):
    '''return number of fields in bed-file by looking at the first 
    Returns 0 if file is empty.
    with IOTools.openFile( filename ) as inf:
        for line in inf:
            if line.startswith("#"): continue
            if line.startswith("track"): continue
            return len(line[:-1].split("\t"))
    return 0

示例12: checkBlastRun

# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def checkBlastRun( infiles, outfile ):
    '''build summary stats on file.'''

    pairsdbfile, seqfile = infiles
    nids = set()
    with IOTools.openFile( seqfile ) as inf:
        for r in FastaIterator.iterate( inf ):
            nids.add( int(r.title) )

    with IOTools.openFile( pairsdbfile ) as inf:
        query_ids, sbjct_ids = set(), set()
        total_results, self_links = 0, 0
        for l in inf:
            l = inf.readline()
            if l.startswith("#//"): continue
            query_id, sbjct_id = l.split("\t")[:2]
            query_ids.add( int(query_id) )
            sbjct_ids.add( int(sbjct_id) )
            if query_id == sbjct_id: self_links += 1
            total_results += 1

    outf = IOTools.openFile( outfile, "w" )
    outf.write( "category\tcounts\n")
    outf.write( "\t".join( map(str, ('nids', len(nids)))) + "\n" )
    outf.write( "\t".join( map(str, ('links', total_results))) + "\n" )
    outf.write( "\t".join( map(str, ('self', self_links))) + "\n" )
    outf.write( "\t".join( map(str, ('queries', len(query_ids)))) + "\n" )
    outf.write( "\t".join( map(str, ('sbjcts', len(sbjct_ids)))) + "\n" )

    outf = IOTools.openFile( outfile + '.missing_queries.gz', 'w' )
    outf.write( 'nid\n' )
    outf.write( "\n".join( map(str, sorted( list( nids.difference( query_ids )) ) )) + "\n" )

    outf = IOTools.openFile( outfile + '.missing_sbjcts.gz', 'w' )
    outf.write( 'nid\n' )
    outf.write( "\n".join( map(str, sorted( list( nids.difference( sbjct_ids )) ) )) + "\n" )

示例13: collectGenomeSizes

# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def collectGenomeSizes(infile, outfile):
    output the genome sizes for each genome
    to_cluster = True
    outf = open(outfile, "w")
    # assume single fasta entry
    for fasta in FastaIterator.iterate(IOTools.openFile(infile)):
        name = P.snip(os.path.basename(infile), ".fna")
        length = len(list(fasta.sequence))
        outf.write("%s\t%s\n" % (name, str(length)))

示例14: buildPFAMFamilies

# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def buildPFAMFamilies( infiles, outfile ):

    outf = IOTools.openFile( outfile, "w" )
    outf.write( "family\tshort\tdescription\n" )
    infile = infiles[1]
    family, description, short = None, None, None
    c = E.Counter()
    with IOTools.openFile( infile ) as inf:
        for line in inf:
            if line.startswith( "#=GF AC"):
                if family:
                    outf.write( "%s\n" % "\t".join( (family,description,short)))
                    c.output += 1
                family = re.match("#=GF AC\s+(\S+)", line[:-1]).groups()[0]
            elif line.startswith( "#=GF DE"):
                description = re.match("#=GF DE\s+(.+)",line[:-1]).groups()[0]
            elif line.startswith( "#=GF ID"):
                short = re.match("#=GF ID\s+(.+)",line[:-1]).groups()[0]
    outf.write( "%s\n" % "\t".join( (family,description,short)))
    c.outptut += 1

示例15: buildAlignmentSizes

# 需要导入模块: import IOTools [as 别名]
# 或者: from IOTools import openFile [as 别名]
def buildAlignmentSizes(infiles, outfile):
    use bed files to sum the total number of bases
    that are aligned to the genomes
    outf = open(outfile, "w")
    for infile in infiles:
        genome = P.snip(os.path.basename(infile), ".bed.gz")
        c = 0
        inf = IOTools.openFile(infile)
        for bed in Bed.iterator(inf):
            c += bed.end - bed.start
        outf.write("%s\t%s\n" % (genome, str(c)))
