本文整理汇总了Python中extern.run函数的典型用法代码示例。如果您正苦于以下问题:Python run函数的具体用法?Python run怎么用?Python run使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了run函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_query_with_otu_table_two_samples_same_sequence
def test_query_with_otu_table_two_samples_same_sequence(self):
with tempfile.NamedTemporaryFile() as f:
query = [self.headers,
# second sequence with an extra A at the end
['ribosomal_protein_L11_rplK_gpkg','maximal','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','7','4.95','Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
['ribosomal_protein_L11_rplK_gpkg','minimal','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','7','4.95','Root; k__Bacteria; p__Firmicutes; c__Bacilli']
] # converted A to T in the middle
query = "\n".join(["\t".join(x) for x in query])
f.write(query)
f.flush()
with tempdir.TempDir() as d:
cmd = "{} makedb --db {}/sdb --otu_table {}".format(
path_to_script, d, f.name)
extern.run(cmd)
cmd = "{} query --query_otu_table {} --db {}/sdb".format(
path_to_script,
f.name,
d)
expected = [['query_name','query_sequence','divergence','num_hits','sample','marker','hit_sequence','taxonomy'],
['maximal;ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','0','7','maximal','ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
['maximal;ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','0','7','minimal','ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','Root; k__Bacteria; p__Firmicutes; c__Bacilli'],
['minimal;ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','0','7','maximal','ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
['minimal;ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','0','7','minimal','ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','Root; k__Bacteria; p__Firmicutes; c__Bacilli'],
]
observed = subprocess.check_output(cmd, shell=True)
self.assertEqualOtuTable(expected, observed)
示例2: _align_sequences
def _align_sequences(self, input_sequences_path, output_alignment_path,
threads):
'''Align sequences into alignment_file
Parameters
----------
input_sequences_path: str
path to input sequences in fasta format
output_alignment_path: str
path to output alignment path
threads: str
number of threads to use
Returns
-------
Nothing
'''
logging.debug("Aligning sequences using mafft")
cmd = "mafft --anysymbol --thread %s --auto /dev/stdin > %s" % (
threads,
output_alignment_path)
inputs = []
with open(input_sequences_path) as f:
for name,seq,_ in SequenceIO().each(f):
inputs.append('>%s' % name)
# Do not include * characters in the HMM, as this means tree
# insertion fails.
inputs.append(seq.replace('*',''))
extern.run(cmd, stdin="\n".join(inputs))
示例3: test_no_clustering
def test_no_clustering(self):
otu_table = [self.headers,['ribosomal_protein_L11_rplK_gpkg','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','4.95','Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
['ribosomal_protein_L11_rplK_gpkg','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATA','6','4.95','Root; k__Bacteria; p__Firmicutes; c__Bacilli'], #last base only is different to first sequence
['ribosomal_protein_S17_gpkg','minimal','GCTAAATTAGGAGACATTGTTAAAATTCAAGAAACTCGTCCTTTATCAGCAACAAAACGT','9','4.95','Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']]
otu_table = "\n".join(["\t".join(x) for x in otu_table])
with tempfile.NamedTemporaryFile() as f:
f.write(otu_table)
f.flush()
with tempdir.TempDir() as d:
cmd = "{} makedb --db_path {}/db --otu_table {} --clustering_divergence 0".format(
path_to_script, d, f.name)
extern.run(cmd)
with tempfile.NamedTemporaryFile() as f2:
f2.write(">seq1\n")
# first sequence with an extra A at the start
f2.write("AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC\n")
f2.flush()
# Querying the smafadb directly should show no clustering
cmd = "smafa query {} {}".format(
os.path.join(d,'db','ribosomal_protein_L11_rplK_gpkg.smafadb'),
f2.name)
out = extern.run(cmd)
self.assertEqual(
out,
'seq1\tAGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC\tGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATA\t2\t60\n'+
'seq1\tAGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC\tGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC\t1\t60\n')
示例4: run
def run(self, input_sequence_file, input_sequence_type, daa_file_basename=None):
'''Run input sequences in either blastp or blastx mode against the
database specified in __init__.
Parameters
----------
input_sequence_file: str
path to query sequences
input_sequence_type: either 'nucleotide' or 'protein'
the input_sequences are this kind of sequence
Returns
-------
DiamondSearchResult
'''
cmd_list = ["diamond"]
if input_sequence_type == UnpackRawReads.PROTEIN_SEQUENCE_TYPE:
cmd_list.append('blastp')
elif input_sequence_type == UnpackRawReads.NUCLEOTIDE_SEQUENCE_TYPE:
cmd_list.append('blastx')
else:
raise Exception("Programming error")
basename = daa_file_basename
if basename is None:
with tempfile.NamedTemporaryFile(prefix='graftm_diamond') as t:
# we are just stealing the name, don't need the file itself
basename = t.name
for c in ['-k 1',
"-d",
self._database,
"-q",
"%s" % input_sequence_file,
"-a",
basename]:
cmd_list.append(c)
if self._threads:
cmd_list.append("--threads")
cmd_list.append(str(self._threads))
if self._evalue:
cmd_list.append("--evalue")
cmd_list.append(str(self._evalue))
cmd = ' '.join(cmd_list)
extern.run(cmd)
daa_name = "%s.daa" % basename
res = DiamondSearchResult.import_from_daa_file(daa_name)
if daa_file_basename is None:
# Diamond makes an extra file, need to remove this
os.remove(daa_name)
return res
示例5: test_biom_hello_world
def test_biom_hello_world(self):
insert_otu_table = [self.headers,
['4.12.ribosomal_protein_L11_rplK','insert','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','1','2.44','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
['4.12.ribosomal_protein_L11_rplK','insert','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTtttCAAGCAGGTGTG','2','2.94','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales']]
with tempdir.TempDir() as tmp:
with tempfile.NamedTemporaryFile(suffix='.otu_table.csv') as n:
n.write("\n".join(["\t".join(x) for x in insert_otu_table]+['']))
n.flush()
extern.run("%s summarise --biom_prefix '%s' --input_otu_tables '%s'" % (
path_to_script, os.path.join(tmp,"mybiom"), n.name))
self.assertEqual(['mybiom.4.12.ribosomal_protein_L11_rplK.biom'], os.listdir(tmp))
self.assertEqual(
'# Constructed from biom file\n#OTU ID\tinsert\ttaxonomy\nRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG\t1.0\tRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales\nRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTtttCAAGCAGGTGTG\t2.0\tRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales',
extern.run("biom convert -i '%s' -o /dev/stdout --to-tsv --header-key taxonomy" % os.path.join(tmp,'mybiom.4.12.ribosomal_protein_L11_rplK.biom')))
示例6: test_jplace_output
def test_jplace_output(self):
expected_jpace = {u'fields': [u'classification',
u'distal_length',
u'edge_num',
u'like_weight_ratio',
u'likelihood',
u'pendant_length'],
u'metadata': 'the_metadata',
u'placements':
[{
u'nm': [[u'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG',
2]],
u'p': [[u'o__Bacillales',
0.0874346630859,
13,
0.333351177694,
-631.301684875,
0.150831104822],
[u'o__Bacillales',
0.0643521435547,
14,
0.333326655502,
-631.301758441,
0.15083915761],
[u'p__Firmicutes',
5.97534179688e-06,
15,
0.333322166804,
-631.301771907,
0.150839131805]]}],
u'tree': 'tree_thanks',
u'version': 3}
with tempdir.TempDir() as d:
cmd = "%s pipe --sequences %s --otu_table /dev/null --output_jplace %s"\
" --singlem_packages %s" % (
path_to_script,
os.path.join(path_to_data,'1_pipe','jplace_test.fna'),
os.path.join(d, "my_jplace"),
os.path.join(path_to_data,'4.12.22seqs.spkg'))
extern.run(cmd)
jplace_path = os.path.join(d, 'my_jplace_jplace_test_4.12.22seqs.jplace')
j = json.load(open(jplace_path))
j['tree'] = 'tree_thanks'
j['metadata'] = 'the_metadata'
self.assertEqual(expected_jpace, j)
# Make sure the guppy sing does not croak
extern.run("guppy sing -o /dev/null '%s'" % jplace_path)
示例7: _create_dmnd_database
def _create_dmnd_database(self, unaligned_sequences_path, daa_output):
'''
Build a diamond database using diamond makedb
Parameters
----------
unaligned_sequences_path: str
path to a FASTA file containing unaligned sequences
daa_output: str
Name of output database.
'''
logging.debug("Building diamond database")
cmd = "diamond makedb --in '%s' -d '%s'" % (unaligned_sequences_path, daa_output)
extern.run(cmd)
示例8: global_search
def global_search(self, query_otu_table_collection,
subject_otu_table_collection, cluster_identity):
'''Search a query OTU table against a subject OTU table, yield over
UCEntry objects that have been modified so that the query
and subject are the relevant OtuTableEntry objects rather than
strings. Or they are None if there are no hits, since
--output_no_hits is used.
query_otu_table_collection: OtuTableCollection
subject_otu_table_collection: OtuTableCollection
cluster_identity: float or str
reject hits if have lower identity than this (implemented with vsearch --id).
'''
logging.info("Caching query OTUs")
query_otus = list(query_otu_table_collection)
logging.info("Caching target OTUs")
subject_otus = list(subject_otu_table_collection)
def name_to_index(name):
return int(string.split(name, ';')[0])
# write out fasta file numbered to corresponding to the OTU info
with tempfile.NamedTemporaryFile(prefix='singlem_q_for_vsearch') as query_f:
for i, u in enumerate(query_otus):
query_f.write(">%i;size=%i\n" % (i, u.count))
query_f.write(u.sequence.replace('-','')+"\n")
query_f.flush()
with tempfile.NamedTemporaryFile(prefix='singlem_db_for_vsearch') as db_f:
for i, u in enumerate(subject_otu_table_collection):
db_f.write(">%i;size=%i\n" % (i, u.count))
db_f.write(u.sequence.replace('-','')+"\n")
db_f.flush()
with tempfile.NamedTemporaryFile(prefix='singlem_uc') as uc:
command = "vsearch --usearch_global %s --db %s --uc %s --id %s --output_no_hits" % (query_f.name,
db_f.name,
uc.name,
str(cluster_identity))
logging.info("Running search")
extern.run(command)
logging.info("Finished running search")
with open(uc.name) as uc_read:
for uc_entry in UCFile(uc_read):
uc_entry.query = query_otus[name_to_index(uc_entry.query)]
if uc_entry.target is not None:
uc_entry.target = subject_otus[name_to_index(uc_entry.target)]
yield uc_entry
示例9: test_cluster_across_samples_via_script
def test_cluster_across_samples_via_script(self):
e = [['gene','sample','sequence','num_hits','coverage','taxonomy'],
['4.11.ribosomal_protein_L10','minimal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACT','2','4.88','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'],
['4.12.ribosomal_protein_L11_rplK','ma','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACA','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales']
]
exp = "\n".join(["\t".join(x) for x in e]+[''])
with tempfile.NamedTemporaryFile(prefix='singlem_cluster') as f:
cmd = "%s summarise --cluster --cluster_id %f --input_otu_tables %s --output_otu_table /dev/stdout" % (
path_to_script, 58.5/60, f.name)
for l in ["\t".join(o) for o in e]:
f.write(l+"\n")
f.flush()
output = extern.run(cmd)
out_clusters = [o.split("\t") for o in output.split("\n")]
self.assertEqual(
[['gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'],
['4.12.ribosomal_protein_L11_rplK',
'ma',
'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACA',
'4',
'9.76',
'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
['4.12.ribosomal_protein_L11_rplK',
'minimal',
'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACA',
'2',
'4.88',
'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
['']],
out_clusters)
示例10: test_seqs_dna
def test_seqs_dna(self):
aln = '''>s1
ga-------------TATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGtaACTGACGCTGATGTG
>s2 asdas
ca---------GAGATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGtaACTGACGCTGA----
>s3
ga-------------TATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGtaACTGGGCTGATGTG-
>d4
-g----------AGATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGtaACTGACGCTGATG--
'''
expected = '''TATGGAGGAACACCAGTGGC
TATGGAGGAACACCAGTGGC
TATGGAGGAACACCAGTGGC
TATGGAGGAACACCAGTGGC
'''
with tempfile.NamedTemporaryFile() as a:
a.write(aln)
a.flush()
with tempfile.NamedTemporaryFile() as stderr:
cmd = "%s --debug seqs --alignment %s --alignment_type dna"\
" --window_size 20 2>%s" % (
path_to_script, a.name, stderr.name)
self.assertEqual('', extern.run(cmd))
# This includes ignored columns at the front, which were messing things up.
self.assertTrue('Found best section of the alignment starting from 14\n' in \
open(stderr.name).read())
示例11: test_paired_reads_one_read_each_diamond_example
def test_paired_reads_one_read_each_diamond_example(self):
# Reads should be merged
expected = [
"\t".join(self.headers_with_extras),
'4.11.22seqs TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA 2 4.88 2524614704 HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 seq2 60 60 False',
'']
inseqs = '''>HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 1:N:0:TAAGGCGACTAAGCCT
ATTAACAGTAGCTGAAGTTACTGACTTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTACGTCGTGCAGCTGAA
>seq2
AAAAAAAAAAAAAAAAA
'''
inseqs_reverse = '''>HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 1:N:0:TAAGGCGACTAAGCCT
AAAAAAAAAAAAAAAAA
>seq2
TTCAGCTGCACGACGTACCATAGTGTTTTTGTATACTTTATACTCAACACCAGCTTCACGTAATTGTGAACGTAAGTCAGTAACTTCAGCTACTGTTAAT
''' # reverse complement of the forward, so should collapse.
with tempfile.NamedTemporaryFile(suffix='.fa') as n:
n.write(inseqs)
n.flush()
with tempfile.NamedTemporaryFile(suffix='.fa') as n2:
n2.write(inseqs_reverse)
n2.flush()
cmd = "{} pipe --sequences {} --otu_table /dev/stdout --singlem_packages {} --reverse {} --output_extras --assignment_method diamond_example".format(
path_to_script,
n.name,
os.path.join(path_to_data,'4.11.22seqs.gpkg.spkg'),
n2.name)
self.assertEqualOtuTable(
list([line.split("\t") for line in expected]),
extern.run(cmd).replace(os.path.basename(n.name).replace('.fa',''),''))
示例12: test_paired_reads_hello_world
def test_paired_reads_hello_world(self):
# Reads should be merged
expected = [
"\t".join(self.headers),
'4.11.22seqs TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA 1 2.44 Root; d__Bacteria; p__Firmicutes',
'']
inseqs = '''>HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 1:N:0:TAAGGCGACTAAGCCT
ATTAACAGTAGCTGAAGTTACTGACTTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTACGTCGTGCAGCTGAA
'''
inseqs_reverse = '''>HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 1:N:0:TAAGGCGACTAAGCCT
TTCAGCTGCACGACGTACCATAGTGTTTTTGTATACTTTATACTCAACACCAGCTTCACGTAATTGTGAACGTAAGTCAGTAACTTCAGCTACTGTTAAT
''' # reverse complement of the forward, so should collapse.
with tempfile.NamedTemporaryFile(suffix='.fa') as n:
n.write(inseqs)
n.flush()
with tempfile.NamedTemporaryFile(suffix='.fa') as n2:
n2.write(inseqs_reverse)
n2.flush()
cmd = "{} pipe --sequences {} --otu_table /dev/stdout --singlem_packages {} --reverse {}".format(
path_to_script,
n.name,
os.path.join(path_to_data,'4.11.22seqs.gpkg.spkg'),
n2.name)
self.assertEqualOtuTable(
list([line.split("\t") for line in expected]),
extern.run(cmd).replace(os.path.basename(n.name).replace('.fa',''),''))
示例13: test_known_sequence_taxonomy
def test_known_sequence_taxonomy(self):
expected = [
"\t".join(self.headers),
'4.11.22seqs TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA 2 4.88 mytax; yeh',
'']
inseqs = '''>HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 1:N:0:TAAGGCGACTAAGCCT
ATTAACAGTAGCTGAAGTTACTGACTTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTACGTCGTGCAGCTGAA
>another
ATTAACAGTAGCTGAAGTTACTGACTTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTACGTCGTGCAGCTGAA
'''
with tempfile.NamedTemporaryFile(suffix='.fa') as n:
n.write(inseqs)
n.flush()
with tempfile.NamedTemporaryFile() as taxf:
taxf.write("HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482\tmytax; yeh\n")
taxf.write("another\tmytax; yeh; 2\n")
taxf.flush()
cmd = "%s pipe --sequences %s --otu_table /dev/stdout --singlem_packages %s "\
"--no_assign_taxonomy --known_sequence_taxonomy %s"% (
path_to_script, n.name, os.path.join(path_to_data,'4.11.22seqs.gpkg.spkg'),
taxf.name)
self.assertEqual(expected,
extern.run(cmd).replace(
os.path.basename(n.name).replace('.fa',''),
'').split("\n"))
示例14: test_two_nucleotide_packages
def test_two_nucleotide_packages(self):
expected = [
"\t".join(self.headers),
'61_otus.v3 GGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGACTGACGCTGATGTGCGAAAGCG 2 5.13 Root; k__Bacteria; p__Proteobacteria',
'61_otus.second.v3 TTAGGTAGTTGCTGGGGTAACGTCCCAACAAGCCGATAATCGGTACGGGTTGTGAGAGCA 1 1.66 Root; k__Archaea; p__Euryarchaeota',
'']
inseqs = '''>HWI-ST1243:156:D1K83ACXX:7:1105:6981:63483 1:N:0:AAGAGGCAAAGGAGTA
GATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACGCTGATGTGCGAAAGCGTGGGGATCAAACAGGATTAGATACCCTGGTAGT
>HWI-ST1243:156:D1K83ACXX:7:1105:6981:63483_revcom
ACTACCAGGGTATCTAATCCTGTTTGATCCCCACGCTTTCGCACATCAGCGTCAGTTACAGACCAGAAAGTCGCCTTCGCCACTGGTGTTCCTCCATATC
>NS500333:10:H0V2GAGXX:2:13211:8623:16289 1:N:0:GATCAG
ATTAGGTAGTTGCTGGGGTAACGTCCCAACAAGCCGATAATCGGTACGGGTTGTGAGAGCAAGAGCCCGGAGATGGATTCTGAGACACGAATCCAGGTCCTACGGGGCGCAGCAGGCGCGAAAACTTTACACTGCGCGAAAGCGCGATA
'''
with tempfile.NamedTemporaryFile(suffix='.fa') as n:
n.write(inseqs)
n.flush()
cmd = "%s pipe --sequences %s --otu_table /dev/stdout --singlem_packages %s %s" % (
path_to_script,
n.name,
os.path.join(path_to_data,'61_otus.v3.gpkg.spkg'),
os.path.join(path_to_data,'second_packge.spkg'))
self.assertEqualOtuTable(
list([line.split("\t") for line in expected]),
extern.run(cmd).replace(os.path.basename(n.name).replace('.fa',''),''))
示例15: summarise
def summarise(**kwargs):
'''Summarise an OTU table'''
krona_output_file = kwargs.pop('krona_output')
table_collection = kwargs.pop('table_collection')
if len(kwargs) > 0:
raise Exception("Unexpected arguments detected: %s" % kwargs)
# prep the array
gene_to_sample_to_taxonomy_to_count = Summariser._collapse_otu_table_into_gene_to_sample_to_taxonomy_to_count(table_collection)
# write the output krona files
sample_name_to_tempfile = OrderedDict()
logging.info("Writing krona %s" % krona_output_file)
cmd = 'ktImportText -o %s' % krona_output_file
sample_tempfiles = []
sample_to_gene_to_taxonomy_to_count = {}
all_sample_names = set()
all_gene_names = set()
for gene, sample_to_taxonomy_to_count in gene_to_sample_to_taxonomy_to_count.items():
all_gene_names.add(gene)
for sample, taxonomy_to_count in sample_to_taxonomy_to_count.items():
all_sample_names.add(sample)
if sample not in sample_to_gene_to_taxonomy_to_count:
sample_to_gene_to_taxonomy_to_count[sample] = {}
sample_to_gene_to_taxonomy_to_count[sample][gene] = taxonomy_to_count
is_more_than_one_sample = len(sample_to_gene_to_taxonomy_to_count) > 1
for sample in sorted(all_sample_names):
for gene in sorted(all_gene_names):
if gene in sample_to_gene_to_taxonomy_to_count[sample]:
f = tempfile.NamedTemporaryFile(prefix='singlem_for_krona')
sample_tempfiles.append(f)
taxonomy_to_count = sample_to_gene_to_taxonomy_to_count[sample][gene]
for taxonomy, coverage in taxonomy_to_count.iteritems():
tax_split = taxonomy.split('; ')
if tax_split[0] == 'Root' and len(tax_split) > 1: tax_split = tax_split[1:]
f.write('\t'.join([str(coverage)]+tax_split))
f.write('\n')
f.flush()
if is_more_than_one_sample:
display_name = '%s: %s' % (sample, gene)
else:
display_name = gene
cmd += " %s,'%s'" % (f.name, display_name)
extern.run(cmd)
for f in sample_tempfiles:
f.close()