本文整理汇总了Python中Pipeline.snip方法的典型用法代码示例。如果您正苦于以下问题:Python Pipeline.snip方法的具体用法?Python Pipeline.snip怎么用?Python Pipeline.snip使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Pipeline
的用法示例。
在下文中一共展示了Pipeline.snip方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: chimeraTargets
# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def chimeraTargets(alignment_files, contig_files):
'''
generator object to produce filenames for
scoring chimericity
'''
parameters = []
for alignment, contig in itertools.product(genome_files, contig_files):
outfile = os.path.join("chimeras.dir", P.snip(alignment, ".bam") + ".chimeras")
parameters.append( [outfile, alignment, contig] )
return parameters
示例2: checkBlastRuns
# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def checkBlastRuns( infiles, outfile ):
'''check if output files are complete.
'''
outf = IOTools.openFile( outfile, "w" )
outf.write( "chunkid\tquery_first\tquery_last\tfound_first\tfound_last\tfound_total\tfound_results\thas_finished\tattempts\t%s\n" %\
"\t".join(Logfile.RuntimeInformation._fields))
for infile in infiles:
E.debug( "processing %s" % infile)
chunkid = P.snip( os.path.basename( infile ), ".blast.gz" )
logfile = infile + ".log"
chunkfile = P.snip( infile, ".blast.gz" ) + ".fasta"
with IOTools.openFile( infile ) as inf:
l = inf.readline()
ids = set()
total_results = 0
for l in inf:
if l.startswith("#//"): continue
ids.add( int(l.split("\t")[0] ) )
total_results += 1
found_first = min(ids)
found_last = max(ids)
found_total = len(ids)
l = IOTools.getFirstLine( chunkfile )
query_first = l[1:-1]
l2 = IOTools.getLastLine( chunkfile, nlines = 2).split("\n")
query_last = l2[0][1:]
logresults = Logfile.parse( logfile )
outf.write( "\t".join( map(str, (\
chunkid, query_first, query_last,
found_first, found_last,
found_total, total_results,
logresults[-1].has_finished,
len(logresults),
"\t".join( map(str, logresults[-1]) ) ) ) ) + "\n" )
outf.close()
示例3: filterContigsByCoverage
# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def filterContigsByCoverage(infiles, outfile):
'''
filter contigs by their average base coverage
'''
fcoverage = PARAMS["coverage_filter"]
contig_file = infiles[0]
dbh = sqlite3.connect(PARAMS["database"])
cc = dbh.cursor()
for infile in infiles[1:]:
print contig_file, P.snip(os.path.basename(infile), ".load")
示例4: alignmentTargets
# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def alignmentTargets(genome_files, contig_files):
'''
generator object to produce filenames for
aligning contigs to known ncbi genomes
'''
parameters = []
for genome, contig in itertools.product(genome_files, contig_files):
outfile = os.path.join("alignment.dir", P.snip(contig, ".contigs.fa") + "_vs_" + P.snip(os.path.basename(genome), ".fna")) + ".delta"
additional_input = add_inputs(contig)
parameters.append( [outfile, genome, contig] )
return parameters
示例5: collectGenomeSizes
# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def collectGenomeSizes(infile, outfile):
'''
output the genome sizes for each genome
'''
to_cluster = True
outf = open(outfile, "w")
outf.write("genome\tlength\n")
# assume single fasta entry
for fasta in FastaIterator.iterate(IOTools.openFile(infile)):
name = P.snip(os.path.basename(infile), ".fna")
length = len(list(fasta.sequence))
outf.write("%s\t%s\n" % (name, str(length)))
outf.close()
示例6: alignContigsToReference
# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def alignContigsToReference(outfile, param1, param2):
'''
align the contigs to the reference genomes
using nucmer
'''
to_cluster = True
reffile, contigfile = param1, param2
pattern = P.snip(os.path.basename(outfile), ".delta")
statement = '''nucmer -p %(pattern)s %(reffile)s %(contigfile)s'''
P.run()
outf = os.path.basename(outfile)
statement = '''mv %(outf)s alignment.dir'''
P.run()
示例7: buildAlignmentSizes
# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def buildAlignmentSizes(infiles, outfile):
'''
use bed files to sum the total number of bases
that are aligned to the genomes
'''
outf = open(outfile, "w")
outf.write("genome\tsize\n")
for infile in infiles:
genome = P.snip(os.path.basename(infile), ".bed.gz")
c = 0
inf = IOTools.openFile(infile)
for bed in Bed.iterator(inf):
c += bed.end - bed.start
outf.write("%s\t%s\n" % (genome, str(c)))
outf.close()
示例8: calculateFalsePositiveRate
# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def calculateFalsePositiveRate(infiles, outfile):
'''
calculate the false positive rate in taxonomic
abundances
'''
# connect to database
dbh = sqlite3.connect(PARAMS["database"])
cc = dbh.cursor()
true_file = infiles[0]
true_set = set()
estimate_set = set()
for estimate_file in infiles[1:]:
if os.path.basename(estimate_file)[len("metaphlan_"):] == os.path.basename(true_file):
tablenames = [P.toTable(os.path.basename(true_file)), P.toTable(os.path.basename(estimate_file))]
for species in cc.execute("""SELECT species_name FROM %s""" % tablenames[0]).fetchall():
true_set.add(species[0])
for species in cc.execute("""SELECT taxon FROM %s WHERE taxon_level == 'species'""" % tablenames[1]).fetchall():
if species[0].find("_unclassified") != -1: continue
estimate_set.add(species[0])
total_estimate = len(estimate_set)
total_true = len(true_set)
E.info("counting false positives and false negatives")
print estimate_set.difference(true_set)
nfp = len(estimate_set.difference(true_set))
nfn = len(true_set.difference(estimate_set))
ntp = len(estimate_set.intersection(true_set))
E.info("writing results")
track = P.snip(os.path.basename(true_file), ".load")
outf = open(outfile, "w")
outf.write("track\ttp_rate\tfp_rate\tfn_rate\n")
outf.write("\t".join(map(str, [track, float(ntp)/total_estimate, float(nfp)/total_estimate, float(nfn)/total_true])) + "\n")
outf.close()
示例9: plotRelativeAbundanceCorrelations
# 需要导入模块: import Pipeline [as 别名]
# 或者: from Pipeline import snip [as 别名]
def plotRelativeAbundanceCorrelations(infiles, outfile):
'''
plot the correlation between the estimated
relative abundance of species and the true
relative abundances - done on the shared set
'''
# connect to database
dbh = sqlite3.connect(PARAMS["database"])
cc = dbh.cursor()
true_file = infiles[0]
temp = P.getTempFile()
temp.write("true\testimate\n")
for estimate_file in infiles[1:]:
if os.path.basename(estimate_file)[len("metaphlan_"):] == os.path.basename(true_file):
tablenames = [P.toTable(os.path.basename(true_file)), P.toTable(os.path.basename(estimate_file))]
# get data
statement = """SELECT a.relab, b.rel_abundance
FROM %s as a, %s as b
WHERE b.taxon_level == "species"
AND a.species_name == b.taxon""" % (tablenames[0], tablenames[1])
for data in cc.execute(statement).fetchall():
true, estimate = data[0], data[1]
temp.write("%f\t%f\n" % (true, estimate))
temp.close()
print temp.name
inf = temp.name
R('''data <- read.csv("%s", header = T, stringsAsFactors = F, sep = "\t")''' % inf)
R('''png("%s")''' % outfile)
main_name = P.snip(outfile, ".png")
R('''data$estimate <- data$estimate/100''')
R('''plot(data$estimate, data$true, pch = 16, main = "%s", xlab = "estimated relative abundance", ylab = "observed relative abundance")''' % main_name)
R('''text(0.05, y = 0.35, labels = paste("r = ", round(cor(data$estimate, data$true),2)), cex = 2)''')
R["dev.off"]()
os.unlink(inf)