当前位置: 首页>>代码示例>>Python>>正文


Python Pipeline.snip方法代码示例

本文整理汇总了Python中Pipeline.snip方法的典型用法代码示例。如果您正苦于以下问题:Python Pipeline.snip方法的具体用法?Python Pipeline.snip怎么用?Python Pipeline.snip使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Pipeline的用法示例。


在下文中一共展示了Pipeline.snip方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: chimeraTargets

# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def chimeraTargets(alignment_files, contig_files):
    '''
    generator object to produce filenames for 
    scoring chimericity
    '''
    parameters = []
    for alignment, contig in itertools.product(genome_files, contig_files):
        outfile = os.path.join("chimeras.dir", P.snip(alignment, ".bam") + ".chimeras")
        parameters.append( [outfile, alignment, contig] )
    return parameters
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:12,代码来源:pipeline_metagenomebenchmark.py

示例2: checkBlastRuns

# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def checkBlastRuns( infiles, outfile ):
    '''check if output files are complete.
    '''
    
    outf = IOTools.openFile( outfile, "w" )

    outf.write( "chunkid\tquery_first\tquery_last\tfound_first\tfound_last\tfound_total\tfound_results\thas_finished\tattempts\t%s\n" %\
                    "\t".join(Logfile.RuntimeInformation._fields))

    for infile in infiles:
        E.debug( "processing %s" % infile)
        chunkid = P.snip( os.path.basename( infile ), ".blast.gz" )
        logfile = infile + ".log"
        chunkfile = P.snip( infile, ".blast.gz" ) + ".fasta"

        with IOTools.openFile( infile ) as inf:
            l = inf.readline()
            ids = set()
            total_results = 0
            for l in inf:
                if l.startswith("#//"): continue
                ids.add( int(l.split("\t")[0] ) )
                total_results += 1
            found_first = min(ids)
            found_last = max(ids)
            found_total = len(ids)

        l = IOTools.getFirstLine( chunkfile )
        query_first = l[1:-1]
        l2 = IOTools.getLastLine( chunkfile, nlines = 2).split("\n")
        query_last = l2[0][1:]

        logresults = Logfile.parse( logfile )
        
        outf.write( "\t".join( map(str, (\
                        chunkid, query_first, query_last,
                        found_first, found_last,
                        found_total, total_results,
                        logresults[-1].has_finished,
                        len(logresults),
                        "\t".join( map(str, logresults[-1]) ) ) ) ) + "\n" )
        
    outf.close()
开发者ID:AndreasHeger,项目名称:adda,代码行数:45,代码来源:pairsdb.py

示例3: filterContigsByCoverage

# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def filterContigsByCoverage(infiles, outfile):
    '''
    filter contigs by their average base coverage
    '''
    fcoverage = PARAMS["coverage_filter"]
    contig_file = infiles[0]
    dbh = sqlite3.connect(PARAMS["database"])
    cc = dbh.cursor()
    for infile in infiles[1:]:
        print contig_file, P.snip(os.path.basename(infile), ".load")
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:12,代码来源:pipeline_metagenomebenchmark.py

示例4: alignmentTargets

# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def alignmentTargets(genome_files, contig_files):
    '''
    generator object to produce filenames for 
    aligning contigs to known ncbi genomes
    '''
    parameters = []
    for genome, contig in itertools.product(genome_files, contig_files):
        outfile = os.path.join("alignment.dir", P.snip(contig, ".contigs.fa") + "_vs_"  + P.snip(os.path.basename(genome), ".fna")) + ".delta"
        additional_input = add_inputs(contig)
        parameters.append( [outfile, genome, contig] )
    return parameters
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:13,代码来源:pipeline_metagenomebenchmark.py

示例5: collectGenomeSizes

# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def collectGenomeSizes(infile, outfile):
    '''
    output the genome sizes for each genome
    '''
    to_cluster = True
    outf = open(outfile, "w")
    outf.write("genome\tlength\n")
    # assume single fasta entry
    for fasta in FastaIterator.iterate(IOTools.openFile(infile)):
        name = P.snip(os.path.basename(infile), ".fna")
        length = len(list(fasta.sequence))
        outf.write("%s\t%s\n" % (name, str(length)))
    outf.close()
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:15,代码来源:pipeline_metagenomebenchmark.py

示例6: alignContigsToReference

# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def alignContigsToReference(outfile, param1, param2):
    '''
    align the contigs to the reference genomes
    using nucmer
    '''
    to_cluster = True

    reffile, contigfile = param1, param2
    pattern = P.snip(os.path.basename(outfile), ".delta")
    statement = '''nucmer -p %(pattern)s %(reffile)s %(contigfile)s'''
    P.run()
    outf = os.path.basename(outfile)
    statement = '''mv %(outf)s alignment.dir'''
    P.run()
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:16,代码来源:pipeline_metagenomebenchmark.py

示例7: buildAlignmentSizes

# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def buildAlignmentSizes(infiles, outfile):
    '''
    use bed files to sum the total number of bases
    that are aligned to the genomes
    '''
    outf = open(outfile, "w")
    outf.write("genome\tsize\n")
    for infile in infiles:
        genome = P.snip(os.path.basename(infile), ".bed.gz")
        c = 0
        inf = IOTools.openFile(infile)
        for bed in Bed.iterator(inf):
            c += bed.end - bed.start
        outf.write("%s\t%s\n" % (genome, str(c)))
    outf.close()
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:17,代码来源:pipeline_metagenomebenchmark.py

示例8: calculateFalsePositiveRate

# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def calculateFalsePositiveRate(infiles, outfile):
    '''
    calculate the false positive rate in taxonomic
    abundances
    '''

    # connect to database
    dbh = sqlite3.connect(PARAMS["database"])
    cc = dbh.cursor()

    true_file = infiles[0]
    true_set = set()
    estimate_set = set()
    for estimate_file in infiles[1:]:
        if os.path.basename(estimate_file)[len("metaphlan_"):] == os.path.basename(true_file):
            tablenames = [P.toTable(os.path.basename(true_file)), P.toTable(os.path.basename(estimate_file))]

            for species in cc.execute("""SELECT species_name FROM %s""" % tablenames[0]).fetchall():
                true_set.add(species[0])
            for species in cc.execute("""SELECT taxon FROM %s WHERE taxon_level == 'species'""" % tablenames[1]).fetchall():
                if species[0].find("_unclassified") != -1: continue
                estimate_set.add(species[0])
    
    total_estimate = len(estimate_set)
    total_true = len(true_set)

    E.info("counting false positives and false negatives")
    print estimate_set.difference(true_set)
    nfp = len(estimate_set.difference(true_set))
    nfn = len(true_set.difference(estimate_set))
    ntp = len(estimate_set.intersection(true_set))

    E.info("writing results")
    track = P.snip(os.path.basename(true_file), ".load")
    outf = open(outfile, "w")
    outf.write("track\ttp_rate\tfp_rate\tfn_rate\n")
    outf.write("\t".join(map(str, [track, float(ntp)/total_estimate, float(nfp)/total_estimate, float(nfn)/total_true])) + "\n")
    outf.close()
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:40,代码来源:pipeline_metagenomebenchmark.py

示例9: plotRelativeAbundanceCorrelations

# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def plotRelativeAbundanceCorrelations(infiles, outfile):
    '''
    plot the correlation between the estimated 
    relative abundance of species and the true
    relative abundances - done on the shared set
    '''
    # connect to database
    dbh = sqlite3.connect(PARAMS["database"])
    cc = dbh.cursor()

    true_file = infiles[0]
    temp = P.getTempFile()
    temp.write("true\testimate\n")
    for estimate_file in infiles[1:]:
        if os.path.basename(estimate_file)[len("metaphlan_"):] == os.path.basename(true_file):
            tablenames = [P.toTable(os.path.basename(true_file)), P.toTable(os.path.basename(estimate_file))]
            # get data
            statement = """SELECT a.relab, b.rel_abundance
                           FROM %s as a, %s as b
                           WHERE b.taxon_level == "species"
                           AND a.species_name == b.taxon""" % (tablenames[0], tablenames[1])
            for data in cc.execute(statement).fetchall():
                true, estimate = data[0], data[1]
                temp.write("%f\t%f\n" % (true, estimate))
    temp.close()
    print temp.name

    inf = temp.name
    R('''data <- read.csv("%s", header = T, stringsAsFactors = F, sep = "\t")''' % inf)
    R('''png("%s")''' % outfile)
    main_name = P.snip(outfile, ".png")
    R('''data$estimate <- data$estimate/100''')
    R('''plot(data$estimate, data$true, pch = 16, main = "%s", xlab = "estimated relative abundance", ylab = "observed relative abundance")''' % main_name)
    R('''text(0.05, y = 0.35, labels = paste("r = ", round(cor(data$estimate, data$true),2)), cex = 2)''')
    R["dev.off"]()
    os.unlink(inf)
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:38,代码来源:pipeline_metagenomebenchmark.py


注:本文中的Pipeline.snip方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。