当前位置: 首页>>代码示例>>Python>>正文


Python Pipeline.toTable方法代码示例

本文整理汇总了Python中Pipeline.toTable方法的典型用法代码示例。如果您正苦于以下问题:Python Pipeline.toTable方法的具体用法?Python Pipeline.toTable怎么用?Python Pipeline.toTable使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Pipeline的用法示例。


在下文中一共展示了Pipeline.toTable方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: calculateFalsePositiveRate

# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import toTable [as 别名]
def calculateFalsePositiveRate(infiles, outfile):
    '''
    calculate the false positive rate in taxonomic
    abundances
    '''

    # connect to database
    dbh = sqlite3.connect(PARAMS["database"])
    cc = dbh.cursor()

    true_file = infiles[0]
    true_set = set()
    estimate_set = set()
    for estimate_file in infiles[1:]:
        if os.path.basename(estimate_file)[len("metaphlan_"):] == os.path.basename(true_file):
            tablenames = [P.toTable(os.path.basename(true_file)), P.toTable(os.path.basename(estimate_file))]

            for species in cc.execute("""SELECT species_name FROM %s""" % tablenames[0]).fetchall():
                true_set.add(species[0])
            for species in cc.execute("""SELECT taxon FROM %s WHERE taxon_level == 'species'""" % tablenames[1]).fetchall():
                if species[0].find("_unclassified") != -1: continue
                estimate_set.add(species[0])
    
    total_estimate = len(estimate_set)
    total_true = len(true_set)

    E.info("counting false positives and false negatives")
    print estimate_set.difference(true_set)
    nfp = len(estimate_set.difference(true_set))
    nfn = len(true_set.difference(estimate_set))
    ntp = len(estimate_set.intersection(true_set))

    E.info("writing results")
    track = P.snip(os.path.basename(true_file), ".load")
    outf = open(outfile, "w")
    outf.write("track\ttp_rate\tfp_rate\tfn_rate\n")
    outf.write("\t".join(map(str, [track, float(ntp)/total_estimate, float(nfp)/total_estimate, float(nfn)/total_true])) + "\n")
    outf.close()
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:40,代码来源:pipeline_metagenomebenchmark.py

示例2: plotRelativeAbundanceCorrelations

# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import toTable [as 别名]
def plotRelativeAbundanceCorrelations(infiles, outfile):
    '''
    plot the correlation between the estimated 
    relative abundance of species and the true
    relative abundances - done on the shared set
    '''
    # connect to database
    dbh = sqlite3.connect(PARAMS["database"])
    cc = dbh.cursor()

    true_file = infiles[0]
    temp = P.getTempFile()
    temp.write("true\testimate\n")
    for estimate_file in infiles[1:]:
        if os.path.basename(estimate_file)[len("metaphlan_"):] == os.path.basename(true_file):
            tablenames = [P.toTable(os.path.basename(true_file)), P.toTable(os.path.basename(estimate_file))]
            # get data
            statement = """SELECT a.relab, b.rel_abundance
                           FROM %s as a, %s as b
                           WHERE b.taxon_level == "species"
                           AND a.species_name == b.taxon""" % (tablenames[0], tablenames[1])
            for data in cc.execute(statement).fetchall():
                true, estimate = data[0], data[1]
                temp.write("%f\t%f\n" % (true, estimate))
    temp.close()
    print temp.name

    inf = temp.name
    R('''data <- read.csv("%s", header = T, stringsAsFactors = F, sep = "\t")''' % inf)
    R('''png("%s")''' % outfile)
    main_name = P.snip(outfile, ".png")
    R('''data$estimate <- data$estimate/100''')
    R('''plot(data$estimate, data$true, pch = 16, main = "%s", xlab = "estimated relative abundance", ylab = "observed relative abundance")''' % main_name)
    R('''text(0.05, y = 0.35, labels = paste("r = ", round(cor(data$estimate, data$true),2)), cex = 2)''')
    R["dev.off"]()
    os.unlink(inf)
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:38,代码来源:pipeline_metagenomebenchmark.py

示例3: loadCoverageData

# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import toTable [as 别名]
def loadCoverageData(infile, outfile):
    '''
    load coverage data into database
    '''
    to_cluster = True
    tablename = P.toTable(outfile)
    database = os.path.join(PARAMS["results_resultsdir"], PARAMS["database"])
    dbh = sqlite3.connect(database)
    cc = dbh.cursor()
    temp = P.getTempFile()
    temp.write("contig_id\tacoverage\n")
    for data in cc.execute("""SELECT contig_id, AVG(coverage) FROM %s GROUP BY contig_id""" % tablename).fetchall():
        temp.write("\t".join(list(data)) + "\n")
    temp.close()
    P.load(temp.name, outfile)
    os.unlink(temp.name)
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:18,代码来源:pipeline_metagenomebenchmark.py


注:本文中的Pipeline.toTable方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。