当前位置: 首页>>代码示例>>Python>>正文


Python IOTools类代码示例

本文整理汇总了Python中IOTools的典型用法代码示例。如果您正苦于以下问题:Python IOTools类的具体用法?Python IOTools怎么用?Python IOTools使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了IOTools类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: buildPFAMDomains

def buildPFAMDomains( infiles, outfile ):
    '''map PFAM domains onto current sequence collection. 
    The mapping is done by ID lookup.'''
    
    infile = infiles[0]
    with IOTools.openFile( "nrdb50.fasta.tsv") as inf:

        reader = csv.DictReader( inf, dialect='excel-tab' )
        map_id2nid = {}
        for row in reader:
            map_id2nid[row['repid']] = row['nid']
    
    rx = re.compile( "(\S+)\/(\d+)-(\d+)\s+(\S+);(.*);" )

    c = E.Counter()
    outf = IOTools.openFile( outfile, "w" )
    with IOTools.openFile( infile ) as inf:
        for entry in FastaIterator.iterate( inf ):
            c.input += 1
            pid, start, end, pfam_id, description = rx.match( entry.title ).groups()
            try:
                outf.write( "%s\t%i\t%i\t%s\n" % (map_id2nid[pid], int(start)-1, int(end), pfam_id ) )
            except KeyError:
                c.missed += 1
                continue
            c.output += 1

    outf.close()
    E.info( c )
开发者ID:AndreasHeger,项目名称:adda,代码行数:29,代码来源:pairsdb.py

示例2: __call__

    def __call__(self, track, slice=None):
        fn = "ortholog_pairs_with_feature.matrix2"
        if not os.path.exists(fn):
            return

        x = IOTools.openFile(fn)
        matrix, rownames, colnames = IOTools.readMatrix(x)
        return odict((("matrix", matrix), ("rows", rownames), ("columns", colnames)))
开发者ID:nishantthakur,项目名称:cgat,代码行数:8,代码来源:orthology.py

示例3: __call__

    def __call__(self, track, slice = None):
        
        c_transcript = []
        c_gene = []
        for transcript in GTF.transcript_iterator(GTF.iterator(IOTools.openFile(self.getFilename(track)))):
            c_transcript.append(len(transcript))
        for gene in GTF.flat_gene_iterator(GTF.iterator(IOTools.openFile(self.getFilename(track)))):
            c_gene.append(len(gene))

        return odict( ( ("transcript", np.mean(c_transcript)), ("gene",np.mean(c_gene) )) )
开发者ID:pombredanne,项目名称:cgat,代码行数:10,代码来源:CPC.py

示例4: configToDictionary

def configToDictionary( config ):

    p = {}
    for section in config.sections():
        for key,value in config.items( section ):
            v = IOTools.convertValue( value )
            p["%s_%s" % (section,key)] = v
            if section in ( "general", "DEFAULT" ):
                p["%s" % (key)] = v
               
    for key, value in config.defaults().iteritems():
        p["%s" % (key)] =  IOTools.convertValue( value )
        
    return p
开发者ID:pombredanne,项目名称:cgat,代码行数:14,代码来源:Pipeline.py

示例5: __call__

 def __call__(self, track, slice = None):
     
     if slice == "transcript":
         lengths_transcripts = []
         for transcript in GTF.transcript_iterator(GTF.iterator(IOTools.openFile(self.getFilename(track)))):
             length = sum([gtf.end - gtf.start for gtf in transcript])
             lengths_transcripts.append(length)
         return np.mean(lengths_transcripts)
     
     elif slice == "gene":
         lengths_genes = []
         for gene in GTF.flat_gene_iterator(GTF.iterator(IOTools.openFile(self.getFilename(track)))):
             length = sum([gtf.end - gtf.start for gtf in gene])
             lengths_genes.append(length)
         return np.mean(lengths_genes)
开发者ID:pombredanne,项目名称:cgat,代码行数:15,代码来源:LncRNACounts.py

示例6: buildSummaryMapping

def buildSummaryMapping(infiles, outfile):

    dbh = connect()
    cc = dbh.cursor()

    outf = IOTools.openFile(outfile, "w")

    table = "bam_stats"

    colnames = None
    for track in TRACKS:

        statement = """SELECT * 
                         FROM medip_%(track)s.%(table)s"""

        data = cc.execute(statement % locals()).fetchall()
        _colnames = [x[0] for x in cc.description]
        if not colnames:
            colnames = _colnames
            outf.write("\t".join(["metatrack"] + colnames,) + "\n")

        assert colnames == _colnames

        for row in data:
            outf.write("\t".join(map(str, (track,) + row)) + "\n")

    outf.close()
开发者ID:jmadzo,项目名称:cgat,代码行数:27,代码来源:metapipeline_medip.py

示例7: getParameters

def getParameters( filename = "pipeline.ini" ):
    '''read a config file and return as a dictionary.

    Sections and keys are combined with an underscore. If
    a key without section does not exist, it will be added 
    plain.

    For example::

       [general]
       input=input1.file

       [special]
       input=input2.file

    will be entered as { 'general_input' : "input1.file",
    'input: "input1.file", 'special_input' : "input2.file" }

    This function also updates the module-wide parameter map.
    
    '''
    p = {}
    
    config = ConfigParser.ConfigParser()
    config.readfp(open(filename),"r")

    for section in config.sections():
        for key,value in config.items( section ):
            v = IOTools.convertValue( value )
            if key not in p: p[key] = v
            p["%s_%s" % (section,key)] = v

    PARAMS.update( p )

    return p
开发者ID:Rfam,项目名称:rfam-website,代码行数:35,代码来源:Pipeline.py

示例8: buildSummaryCalledDMRs

def buildSummaryCalledDMRs(infiles, outfile):
    '''build summary of differentially methylated regions.'''

    dbh = connect()
    cc = dbh.cursor()

    outf = IOTools.openFile(outfile, "w")
    outf.write("metatrack\ttest\tntested\tnok\tnsignificant\tn2fold\n")

    for track in TRACKS:
        tables = [x[0] for x in cc.execute( """SELECT name FROM medip_%s.sqlite_master 
            WHERE type='table' and sql LIKE '%%control_mean%%' and sql LIKE '%%treatment_mean%%'""" % track
                                            ).fetchall()]

        for table in tables:

            statement = """SELECT 
                         COUNT(*) as ntested, 
                         SUM(CASE WHEN status='OK' THEN 1 ELSE 0 END) AS nok, 
                         SUM(CASE WHEN significant THEN 1 ELSE 0 END) AS nsignificant, 
                         SUM(CASE WHEN significant AND (l2fold < -1 OR l2fold > 1) THEN 1 ELSE 0 END) as n2fold 
                         FROM medip_%(track)s.%(table)s"""

            ntested, nok, nsignificant, n2fold = cc.execute(
                statement % locals()).fetchone()

            outf.write(
                "\t".join(map(str, (track, table, ntested, nok, nsignificant, n2fold))) + "\n")

    outf.close()
开发者ID:jmadzo,项目名称:cgat,代码行数:30,代码来源:metapipeline_medip.py

示例9: buildSummaryCpGCoverage

def buildSummaryCpGCoverage(infiles, outfile):
    '''build summary of differentially methylated regions.'''

    dbh = connect()
    cc = dbh.cursor()

    outf = IOTools.openFile(outfile, "w")
    outf.write("metatrack\ttrack\tcoverage\tncovered\tpcovered\n")

    for track in TRACKS:

        tables = [x[0] for x in cc.execute( """SELECT name FROM medip_%s.sqlite_master 
            WHERE type='table' and name LIKE '%%coveredpos%%' """ % track
                                            ).fetchall()]

        for table in tables:

            statement = """SELECT '%(track)s' as metatrack,
                         '%(table)s' as track,
                         coverage, ncovered, pcovered FROM medip_%(track)s.%(table)s"""

            for x in cc.execute(statement % locals()):
                outf.write("\t".join(map(str, x)) + "\n")

    outf.close()
开发者ID:jmadzo,项目名称:cgat,代码行数:25,代码来源:metapipeline_medip.py

示例10: __call__

    def __call__(self, track, slice = None):
        fn = os.path.join( DATADIR, "%(track)s.peakshape.tsv.gz.matrix_%(slice)s.gz" % locals() )
        if not os.path.exists( fn ): 
            return
        
        matrix, rownames, colnames = IOTools.readMatrix( IOTools.openFile( fn ))
        nrows = len(rownames)
        if nrows == 0: return

        if nrows > 1000:
            take = numpy.array( numpy.floor( numpy.arange( 0, nrows, nrows / 1000 ) ), dtype = int )
            rownames = [ rownames[x] for x in take ]
            matrix = matrix[ take ]
            
        return odict( (('matrix', matrix),
                       ('rows', rownames),
                       ('columns', colnames)) )
开发者ID:siping,项目名称:cgat,代码行数:17,代码来源:Intervals.py

示例11: getReferenceLincRNA

    def getReferenceLincRNA(self, reference_gtf):

        lincs = []
        for entry in GTF.iterator(IOTools.openFile(reference_gtf)):
            if entry.source == "lincRNA":
                if entry.gene_id not in lincs:
                    lincs.append(entry.gene_id)
        return len(lincs)
开发者ID:BioXiao,项目名称:CGATPipelines,代码行数:8,代码来源:TranscriptConstruction.py

示例12: checkBlastRuns

def checkBlastRuns( infiles, outfile ):
    '''check if output files are complete.
    '''
    
    outf = IOTools.openFile( outfile, "w" )

    outf.write( "chunkid\tquery_first\tquery_last\tfound_first\tfound_last\tfound_total\tfound_results\thas_finished\tattempts\t%s\n" %\
                    "\t".join(Logfile.RuntimeInformation._fields))

    for infile in infiles:
        E.debug( "processing %s" % infile)
        chunkid = P.snip( os.path.basename( infile ), ".blast.gz" )
        logfile = infile + ".log"
        chunkfile = P.snip( infile, ".blast.gz" ) + ".fasta"

        with IOTools.openFile( infile ) as inf:
            l = inf.readline()
            ids = set()
            total_results = 0
            for l in inf:
                if l.startswith("#//"): continue
                ids.add( int(l.split("\t")[0] ) )
                total_results += 1
            found_first = min(ids)
            found_last = max(ids)
            found_total = len(ids)

        l = IOTools.getFirstLine( chunkfile )
        query_first = l[1:-1]
        l2 = IOTools.getLastLine( chunkfile, nlines = 2).split("\n")
        query_last = l2[0][1:]

        logresults = Logfile.parse( logfile )
        
        outf.write( "\t".join( map(str, (\
                        chunkid, query_first, query_last,
                        found_first, found_last,
                        found_total, total_results,
                        logresults[-1].has_finished,
                        len(logresults),
                        "\t".join( map(str, logresults[-1]) ) ) ) ) + "\n" )
        
    outf.close()
开发者ID:AndreasHeger,项目名称:adda,代码行数:43,代码来源:pairsdb.py

示例13: __call__

 def __call__(self, track, slice = None):
     pattern = self.pattern
     fn = os.path.join( DATADIR, "liver_vs_testes/%(track)s%(pattern)s.matrix_%(slice)s.gz" % locals() )
     if not os.path.exists( fn ): 
         return
     
     x = IOTools.openFile( fn )
     matrix, rownames, colnames = IOTools.readMatrix( x )
     
     nrows = len(rownames)
     if nrows == 0: return
     if nrows > self.scale:
         take = numpy.array( numpy.floor( numpy.arange( 0, nrows, float(nrows + 1) / self.scale ) ), dtype = int )
         rownames = [ rownames[x] for x in take ]
         matrix = matrix[ take ]
         
     return odict( (('matrix', matrix),
                    ('rows', rownames),
                    ('columns', colnames)) )
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:19,代码来源:peakshape.py

示例14: getNumColumns

def getNumColumns( filename ):
    '''return number of fields in bed-file by looking at the first 
    entry.
    
    Returns 0 if file is empty.
    '''
    with IOTools.openFile( filename ) as inf:
        for line in inf:
            if line.startswith("#"): continue
            if line.startswith("track"): continue
            return len(line[:-1].split("\t"))
    return 0
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:12,代码来源:Bed.py

示例15: buildNrdb50

def buildNrdb50( infile, outfile ):
    '''build nrdb50
    
    Renumber seqences.'''
    
    outf_fasta = IOTools.openFile( outfile, "w" )
    outf_table = IOTools.openFile( outfile + ".tsv", "w" )
    outf_table.write("nid\tpid\thid\tdescription\tcluster_size\ttaxon\trepid\n" )

    rx = re.compile( "(\S+) (.*) n=(\d+) Tax=(.*) RepID=(\S+)" )

    nid = 1
    for entry in FastaIterator.iterate( IOTools.openFile( infile )):
        outf_fasta.write(">%i\n%s\n" % (nid, entry.sequence ) )
        cluster_name, description, cluster_size, taxon, repid = rx.match( entry.title ).groups()
        hid = computeHID( entry.sequence )
        outf_table.write( "\t".join( (str(nid), cluster_name, hid, description, cluster_size, taxon, repid)) + "\n" )
        nid += 1

    outf_fasta.close()
    outf_table.close()
开发者ID:AndreasHeger,项目名称:adda,代码行数:21,代码来源:pairsdb.py


注:本文中的IOTools类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。