当前位置: 首页>>代码示例>>Python>>正文


Python extern.run函数代码示例

本文整理汇总了Python中extern.run函数的典型用法代码示例。如果您正苦于以下问题:Python run函数的具体用法?Python run怎么用?Python run使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了run函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_query_with_otu_table_two_samples_same_sequence

    def test_query_with_otu_table_two_samples_same_sequence(self):
        with tempfile.NamedTemporaryFile() as f:
            query = [self.headers,
                     # second sequence with an extra A at the end
                     ['ribosomal_protein_L11_rplK_gpkg','maximal','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','7','4.95','Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
                     ['ribosomal_protein_L11_rplK_gpkg','minimal','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','7','4.95','Root; k__Bacteria; p__Firmicutes; c__Bacilli']
                     ] # converted A to T in the middle
            query = "\n".join(["\t".join(x) for x in query])
            f.write(query)
            f.flush()

            with tempdir.TempDir() as d:
                cmd = "{} makedb --db {}/sdb --otu_table {}".format(
                    path_to_script, d, f.name)
                extern.run(cmd)

                cmd = "{} query --query_otu_table {} --db {}/sdb".format(
                    path_to_script,
                    f.name,
                    d)

                expected = [['query_name','query_sequence','divergence','num_hits','sample','marker','hit_sequence','taxonomy'],
                            ['maximal;ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','0','7','maximal','ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
                            ['maximal;ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','0','7','minimal','ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','Root; k__Bacteria; p__Firmicutes; c__Bacilli'],
                            ['minimal;ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','0','7','maximal','ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
                            ['minimal;ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','0','7','minimal','ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','Root; k__Bacteria; p__Firmicutes; c__Bacilli'],
                            ]
                observed = subprocess.check_output(cmd, shell=True)
                self.assertEqualOtuTable(expected, observed)
开发者ID:wwood,项目名称:singlem,代码行数:29,代码来源:test_makedb_and_query.py

示例2: _align_sequences

    def _align_sequences(self, input_sequences_path, output_alignment_path,
                         threads):
        '''Align sequences into alignment_file

        Parameters
        ----------
        input_sequences_path: str
            path to input sequences in fasta format
        output_alignment_path: str
            path to output alignment path
        threads: str
            number of threads to use
        Returns
        -------
        Nothing
        '''
        logging.debug("Aligning sequences using mafft")
        cmd = "mafft --anysymbol --thread %s --auto /dev/stdin > %s" % (
            threads,
            output_alignment_path)
        inputs = []
        with open(input_sequences_path) as f:
            for name,seq,_ in SequenceIO().each(f):
                inputs.append('>%s' % name)
                # Do not include * characters in the HMM, as this means tree
                # insertion fails.
                inputs.append(seq.replace('*',''))
        extern.run(cmd, stdin="\n".join(inputs))
开发者ID:geronimp,项目名称:graftM,代码行数:28,代码来源:create.py

示例3: test_no_clustering

    def test_no_clustering(self):
        otu_table = [self.headers,['ribosomal_protein_L11_rplK_gpkg','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','4.95','Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
['ribosomal_protein_L11_rplK_gpkg','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATA','6','4.95','Root; k__Bacteria; p__Firmicutes; c__Bacilli'], #last base only is different to first sequence
['ribosomal_protein_S17_gpkg','minimal','GCTAAATTAGGAGACATTGTTAAAATTCAAGAAACTCGTCCTTTATCAGCAACAAAACGT','9','4.95','Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']]
        otu_table = "\n".join(["\t".join(x) for x in otu_table])

        with tempfile.NamedTemporaryFile() as f:
            f.write(otu_table)
            f.flush()

            with tempdir.TempDir() as d:
                cmd = "{} makedb --db_path {}/db --otu_table {} --clustering_divergence 0".format(
                    path_to_script, d, f.name)
                extern.run(cmd)
                with tempfile.NamedTemporaryFile() as f2:
                    f2.write(">seq1\n")
                    # first sequence with an extra A at the start
                    f2.write("AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC\n")
                    f2.flush()

                    # Querying the smafadb directly should show no clustering
                    cmd = "smafa query {} {}".format(
                        os.path.join(d,'db','ribosomal_protein_L11_rplK_gpkg.smafadb'),
                        f2.name)
                    out = extern.run(cmd)
                    self.assertEqual(
                        out,
                        'seq1\tAGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC\tGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATA\t2\t60\n'+
                        'seq1\tAGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC\tGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC\t1\t60\n')
开发者ID:wwood,项目名称:singlem,代码行数:29,代码来源:test_makedb_and_query.py

示例4: run

    def run(self, input_sequence_file, input_sequence_type, daa_file_basename=None):
        '''Run input sequences in either blastp or blastx mode against the
        database specified in __init__.
            
        Parameters
        ----------
        input_sequence_file: str
            path to query sequences
        input_sequence_type: either 'nucleotide' or 'protein'
            the input_sequences are this kind of sequence
            
        Returns
        -------
        DiamondSearchResult
        '''
        
        cmd_list = ["diamond"]
        if input_sequence_type == UnpackRawReads.PROTEIN_SEQUENCE_TYPE:
            cmd_list.append('blastp')
        elif input_sequence_type == UnpackRawReads.NUCLEOTIDE_SEQUENCE_TYPE:
            cmd_list.append('blastx')
        else:
            raise Exception("Programming error")
        
        basename = daa_file_basename
        if basename is None:
            with tempfile.NamedTemporaryFile(prefix='graftm_diamond') as t:
                # we are just stealing the name, don't need the file itself
                basename = t.name
            
        for c in ['-k 1',
                  "-d",
                    self._database,
                    "-q",
                    "%s" % input_sequence_file,
                    "-a",
                    basename]:
            cmd_list.append(c)
        if self._threads:
            cmd_list.append("--threads")
            cmd_list.append(str(self._threads))
        if self._evalue:
            cmd_list.append("--evalue")
            cmd_list.append(str(self._evalue))

        cmd = ' '.join(cmd_list)
        extern.run(cmd)
        
        daa_name = "%s.daa" % basename
        res = DiamondSearchResult.import_from_daa_file(daa_name)
        
        if daa_file_basename is None:
            # Diamond makes an extra file, need to remove this
            os.remove(daa_name)
            
        return res
            
            
            
                
开发者ID:geronimp,项目名称:graftM,代码行数:56,代码来源:diamond.py

示例5: test_biom_hello_world

 def test_biom_hello_world(self):
     insert_otu_table = [self.headers,
                         ['4.12.ribosomal_protein_L11_rplK','insert','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','1','2.44','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
                         ['4.12.ribosomal_protein_L11_rplK','insert','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTtttCAAGCAGGTGTG','2','2.94','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales']]
     with tempdir.TempDir() as tmp:
         with tempfile.NamedTemporaryFile(suffix='.otu_table.csv') as n:
             n.write("\n".join(["\t".join(x) for x in insert_otu_table]+['']))
             n.flush()
             extern.run("%s summarise --biom_prefix '%s' --input_otu_tables '%s'" % (
                 path_to_script, os.path.join(tmp,"mybiom"), n.name))
             self.assertEqual(['mybiom.4.12.ribosomal_protein_L11_rplK.biom'], os.listdir(tmp))
             self.assertEqual(
                 '# Constructed from biom file\n#OTU ID\tinsert\ttaxonomy\nRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG\t1.0\tRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales\nRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTtttCAAGCAGGTGTG\t2.0\tRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales',
                 extern.run("biom convert -i '%s' -o /dev/stdout --to-tsv --header-key taxonomy" % os.path.join(tmp,'mybiom.4.12.ribosomal_protein_L11_rplK.biom')))
开发者ID:wwood,项目名称:singlem,代码行数:14,代码来源:test_summariser.py

示例6: test_jplace_output

    def test_jplace_output(self):
        expected_jpace = {u'fields': [u'classification',
                                      u'distal_length',
                                      u'edge_num',
                                      u'like_weight_ratio',
                                      u'likelihood',
                                      u'pendant_length'],
                          u'metadata': 'the_metadata',
                          u'placements':
                          [{
                           u'nm': [[u'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG',
                                     2]],
                            u'p': [[u'o__Bacillales',
                                    0.0874346630859,
                                    13,
                                    0.333351177694,
                                    -631.301684875,
                                    0.150831104822],
                                   [u'o__Bacillales',
                                    0.0643521435547,
                                    14,
                                    0.333326655502,
                                    -631.301758441,
                                    0.15083915761],
                                   [u'p__Firmicutes',
                                    5.97534179688e-06,
                                    15,
                                    0.333322166804,
                                    -631.301771907,
                                    0.150839131805]]}],
                          u'tree': 'tree_thanks',
                          u'version': 3}

        with tempdir.TempDir() as d:
            cmd = "%s pipe --sequences %s --otu_table /dev/null --output_jplace %s"\
                  " --singlem_packages %s" % (
                      path_to_script,
                      os.path.join(path_to_data,'1_pipe','jplace_test.fna'),
                      os.path.join(d, "my_jplace"),
                      os.path.join(path_to_data,'4.12.22seqs.spkg'))
            extern.run(cmd)
            jplace_path = os.path.join(d, 'my_jplace_jplace_test_4.12.22seqs.jplace')
            j = json.load(open(jplace_path))
            j['tree'] = 'tree_thanks'
            j['metadata'] = 'the_metadata'
            self.assertEqual(expected_jpace, j)

            # Make sure the guppy sing does not croak
            extern.run("guppy sing -o /dev/null '%s'" % jplace_path)
开发者ID:wwood,项目名称:singlem,代码行数:49,代码来源:test_pipe.py

示例7: _create_dmnd_database

    def _create_dmnd_database(self, unaligned_sequences_path, daa_output):
        '''
        Build a diamond database using diamond makedb

        Parameters
        ----------
        unaligned_sequences_path: str
            path to a FASTA file containing unaligned sequences
        daa_output: str
            Name of output database.
        '''
        logging.debug("Building diamond database")

        cmd = "diamond makedb --in '%s' -d '%s'" % (unaligned_sequences_path, daa_output)
        extern.run(cmd)
开发者ID:geronimp,项目名称:graftM,代码行数:15,代码来源:create.py

示例8: global_search

    def global_search(self, query_otu_table_collection,
                     subject_otu_table_collection, cluster_identity):
        '''Search a query OTU table against a subject OTU table, yield over
        UCEntry objects that have been modified so that the query
        and subject are the relevant OtuTableEntry objects rather than
        strings. Or they are None if there are no hits, since
        --output_no_hits is used.

        query_otu_table_collection: OtuTableCollection
        subject_otu_table_collection: OtuTableCollection
        cluster_identity: float or str
            reject hits if have lower identity than this (implemented with vsearch --id).
        '''
        logging.info("Caching query OTUs")
        query_otus = list(query_otu_table_collection)
        logging.info("Caching target OTUs")
        subject_otus = list(subject_otu_table_collection)

        def name_to_index(name):
            return int(string.split(name, ';')[0])

        # write out fasta file numbered to corresponding to the OTU info
        with tempfile.NamedTemporaryFile(prefix='singlem_q_for_vsearch') as query_f:
            for i, u in enumerate(query_otus):
                query_f.write(">%i;size=%i\n" % (i, u.count))
                query_f.write(u.sequence.replace('-','')+"\n")
            query_f.flush()

            with tempfile.NamedTemporaryFile(prefix='singlem_db_for_vsearch') as db_f:
                for i, u in enumerate(subject_otu_table_collection):
                    db_f.write(">%i;size=%i\n" % (i, u.count))
                    db_f.write(u.sequence.replace('-','')+"\n")
                db_f.flush()

                with tempfile.NamedTemporaryFile(prefix='singlem_uc') as uc:
                    command = "vsearch --usearch_global %s --db %s --uc %s --id %s --output_no_hits" % (query_f.name,
                                                                               db_f.name,
                                                                               uc.name,
                                                                               str(cluster_identity))
                    logging.info("Running search")
                    extern.run(command)
                    logging.info("Finished running search")
                    with open(uc.name) as uc_read:
                        for uc_entry in UCFile(uc_read):
                            uc_entry.query = query_otus[name_to_index(uc_entry.query)]
                            if uc_entry.target is not None:
                                uc_entry.target = subject_otus[name_to_index(uc_entry.target)]
                            yield uc_entry
开发者ID:wwood,项目名称:singlem,代码行数:48,代码来源:sequence_searcher.py

示例9: test_cluster_across_samples_via_script

    def test_cluster_across_samples_via_script(self):
        e = [['gene','sample','sequence','num_hits','coverage','taxonomy'],
            ['4.11.ribosomal_protein_L10','minimal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACT','2','4.88','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'],
            ['4.12.ribosomal_protein_L11_rplK','ma','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACA','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales']
            ]
        exp = "\n".join(["\t".join(x) for x in e]+[''])

        with tempfile.NamedTemporaryFile(prefix='singlem_cluster') as f:
            cmd = "%s summarise --cluster --cluster_id %f --input_otu_tables %s --output_otu_table /dev/stdout" % (
                path_to_script, 58.5/60, f.name)
            for l in ["\t".join(o) for o in e]:
                f.write(l+"\n")
            f.flush()
            output = extern.run(cmd)
            out_clusters = [o.split("\t") for o in output.split("\n")]
            self.assertEqual(
                [['gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'],
                 ['4.12.ribosomal_protein_L11_rplK',
                  'ma',
                  'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACA',
                  '4',
                  '9.76',
                  'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
                 ['4.12.ribosomal_protein_L11_rplK',
                  'minimal',
                  'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACA',
                  '2',
                  '4.88',
                  'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
                 ['']],
                out_clusters)
开发者ID:wwood,项目名称:singlem,代码行数:31,代码来源:test_clusterer.py

示例10: test_seqs_dna

    def test_seqs_dna(self):
        aln = '''>s1
ga-------------TATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGtaACTGACGCTGATGTG
>s2 asdas
ca---------GAGATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGtaACTGACGCTGA----
>s3
ga-------------TATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGtaACTGGGCTGATGTG-
>d4
-g----------AGATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGtaACTGACGCTGATG--
'''
        expected = '''TATGGAGGAACACCAGTGGC
TATGGAGGAACACCAGTGGC
TATGGAGGAACACCAGTGGC
TATGGAGGAACACCAGTGGC
'''
        with tempfile.NamedTemporaryFile() as a:
            a.write(aln)
            a.flush()
            with tempfile.NamedTemporaryFile() as stderr:
                cmd = "%s --debug seqs --alignment %s --alignment_type dna"\
                      " --window_size 20 2>%s" % (
                          path_to_script, a.name, stderr.name)
                self.assertEqual('', extern.run(cmd))
                # This includes ignored columns at the front, which were messing things up.
                self.assertTrue('Found best section of the alignment starting from 14\n' in \
                                open(stderr.name).read())
开发者ID:wwood,项目名称:singlem,代码行数:26,代码来源:test_seqs.py

示例11: test_paired_reads_one_read_each_diamond_example

    def test_paired_reads_one_read_each_diamond_example(self):
        # Reads should be merged
        expected = [
            "\t".join(self.headers_with_extras),
            '4.11.22seqs		TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA	2	4.88	2524614704	HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 seq2	60 60	False',
            '']
        inseqs = '''>HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 1:N:0:TAAGGCGACTAAGCCT
ATTAACAGTAGCTGAAGTTACTGACTTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTACGTCGTGCAGCTGAA
>seq2
AAAAAAAAAAAAAAAAA
'''
        inseqs_reverse = '''>HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 1:N:0:TAAGGCGACTAAGCCT
AAAAAAAAAAAAAAAAA
>seq2
TTCAGCTGCACGACGTACCATAGTGTTTTTGTATACTTTATACTCAACACCAGCTTCACGTAATTGTGAACGTAAGTCAGTAACTTCAGCTACTGTTAAT
''' # reverse complement of the forward, so should collapse.
        with tempfile.NamedTemporaryFile(suffix='.fa') as n:
            n.write(inseqs)
            n.flush()
            with tempfile.NamedTemporaryFile(suffix='.fa') as n2:
                n2.write(inseqs_reverse)
                n2.flush()

                cmd = "{} pipe --sequences {} --otu_table /dev/stdout --singlem_packages {} --reverse {} --output_extras --assignment_method diamond_example".format(
                    path_to_script,
                    n.name,
                    os.path.join(path_to_data,'4.11.22seqs.gpkg.spkg'),
                    n2.name)
                self.assertEqualOtuTable(
                    list([line.split("\t") for line in expected]),
                    extern.run(cmd).replace(os.path.basename(n.name).replace('.fa',''),''))
开发者ID:wwood,项目名称:singlem,代码行数:31,代码来源:test_pipe.py

示例12: test_paired_reads_hello_world

    def test_paired_reads_hello_world(self):
        # Reads should be merged
        expected = [
            "\t".join(self.headers),
            '4.11.22seqs		TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA	1	2.44	Root; d__Bacteria; p__Firmicutes',
            '']
        inseqs = '''>HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 1:N:0:TAAGGCGACTAAGCCT
ATTAACAGTAGCTGAAGTTACTGACTTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTACGTCGTGCAGCTGAA
'''
        inseqs_reverse = '''>HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 1:N:0:TAAGGCGACTAAGCCT
TTCAGCTGCACGACGTACCATAGTGTTTTTGTATACTTTATACTCAACACCAGCTTCACGTAATTGTGAACGTAAGTCAGTAACTTCAGCTACTGTTAAT
''' # reverse complement of the forward, so should collapse.
        with tempfile.NamedTemporaryFile(suffix='.fa') as n:
            n.write(inseqs)
            n.flush()
            with tempfile.NamedTemporaryFile(suffix='.fa') as n2:
                n2.write(inseqs_reverse)
                n2.flush()

                cmd = "{} pipe --sequences {} --otu_table /dev/stdout --singlem_packages {} --reverse {}".format(
                    path_to_script,
                    n.name,
                    os.path.join(path_to_data,'4.11.22seqs.gpkg.spkg'),
                    n2.name)
                self.assertEqualOtuTable(
                    list([line.split("\t") for line in expected]),
                    extern.run(cmd).replace(os.path.basename(n.name).replace('.fa',''),''))
开发者ID:wwood,项目名称:singlem,代码行数:27,代码来源:test_pipe.py

示例13: test_known_sequence_taxonomy

    def test_known_sequence_taxonomy(self):
        expected = [
            "\t".join(self.headers),
            '4.11.22seqs		TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA	2	4.88	mytax; yeh',
            '']
        inseqs = '''>HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 1:N:0:TAAGGCGACTAAGCCT
ATTAACAGTAGCTGAAGTTACTGACTTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTACGTCGTGCAGCTGAA
>another
ATTAACAGTAGCTGAAGTTACTGACTTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTACGTCGTGCAGCTGAA
'''
        with tempfile.NamedTemporaryFile(suffix='.fa') as n:
            n.write(inseqs)
            n.flush()
            with tempfile.NamedTemporaryFile() as taxf:
                taxf.write("HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482\tmytax; yeh\n")
                taxf.write("another\tmytax; yeh; 2\n")
                taxf.flush()

                cmd = "%s pipe --sequences %s --otu_table /dev/stdout --singlem_packages %s "\
                      "--no_assign_taxonomy --known_sequence_taxonomy %s"% (
                          path_to_script, n.name, os.path.join(path_to_data,'4.11.22seqs.gpkg.spkg'),
                          taxf.name)
                self.assertEqual(expected,
                                 extern.run(cmd).replace(
                                     os.path.basename(n.name).replace('.fa',''),
                                     '').split("\n"))
开发者ID:wwood,项目名称:singlem,代码行数:26,代码来源:test_pipe.py

示例14: test_two_nucleotide_packages

    def test_two_nucleotide_packages(self):
        expected = [
            "\t".join(self.headers),
            '61_otus.v3		GGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGACTGACGCTGATGTGCGAAAGCG	2	5.13	Root; k__Bacteria; p__Proteobacteria',
            '61_otus.second.v3		TTAGGTAGTTGCTGGGGTAACGTCCCAACAAGCCGATAATCGGTACGGGTTGTGAGAGCA	1	1.66	Root; k__Archaea; p__Euryarchaeota',
            '']
        inseqs = '''>HWI-ST1243:156:D1K83ACXX:7:1105:6981:63483 1:N:0:AAGAGGCAAAGGAGTA
GATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACGCTGATGTGCGAAAGCGTGGGGATCAAACAGGATTAGATACCCTGGTAGT
>HWI-ST1243:156:D1K83ACXX:7:1105:6981:63483_revcom
ACTACCAGGGTATCTAATCCTGTTTGATCCCCACGCTTTCGCACATCAGCGTCAGTTACAGACCAGAAAGTCGCCTTCGCCACTGGTGTTCCTCCATATC
>NS500333:10:H0V2GAGXX:2:13211:8623:16289 1:N:0:GATCAG
ATTAGGTAGTTGCTGGGGTAACGTCCCAACAAGCCGATAATCGGTACGGGTTGTGAGAGCAAGAGCCCGGAGATGGATTCTGAGACACGAATCCAGGTCCTACGGGGCGCAGCAGGCGCGAAAACTTTACACTGCGCGAAAGCGCGATA
'''
        with tempfile.NamedTemporaryFile(suffix='.fa') as n:
            n.write(inseqs)
            n.flush()

            cmd = "%s pipe --sequences %s --otu_table /dev/stdout --singlem_packages %s %s" % (
                path_to_script,
                n.name,
                os.path.join(path_to_data,'61_otus.v3.gpkg.spkg'),
                os.path.join(path_to_data,'second_packge.spkg'))
            self.assertEqualOtuTable(
                list([line.split("\t") for line in expected]),
                extern.run(cmd).replace(os.path.basename(n.name).replace('.fa',''),''))
开发者ID:wwood,项目名称:singlem,代码行数:25,代码来源:test_pipe.py

示例15: summarise

    def summarise(**kwargs):
        '''Summarise an OTU table'''
        krona_output_file = kwargs.pop('krona_output')
        table_collection = kwargs.pop('table_collection')
        if len(kwargs) > 0:
            raise Exception("Unexpected arguments detected: %s" % kwargs)

        # prep the array
        gene_to_sample_to_taxonomy_to_count = Summariser._collapse_otu_table_into_gene_to_sample_to_taxonomy_to_count(table_collection)

        # write the output krona files
        sample_name_to_tempfile = OrderedDict()
        logging.info("Writing krona %s" % krona_output_file)
        cmd = 'ktImportText -o %s' % krona_output_file
        sample_tempfiles = []
        sample_to_gene_to_taxonomy_to_count = {}
        all_sample_names = set()
        all_gene_names = set()
        for gene, sample_to_taxonomy_to_count in gene_to_sample_to_taxonomy_to_count.items():
            all_gene_names.add(gene)
            for sample, taxonomy_to_count in sample_to_taxonomy_to_count.items():
                all_sample_names.add(sample)
                if sample not in sample_to_gene_to_taxonomy_to_count:
                    sample_to_gene_to_taxonomy_to_count[sample] = {}
                sample_to_gene_to_taxonomy_to_count[sample][gene] = taxonomy_to_count
        is_more_than_one_sample = len(sample_to_gene_to_taxonomy_to_count) > 1
        for sample in sorted(all_sample_names):
            for gene in sorted(all_gene_names):
                if gene in sample_to_gene_to_taxonomy_to_count[sample]:
                    f = tempfile.NamedTemporaryFile(prefix='singlem_for_krona')
                    sample_tempfiles.append(f)

                    taxonomy_to_count = sample_to_gene_to_taxonomy_to_count[sample][gene]
                    for taxonomy, coverage in taxonomy_to_count.iteritems():
                        tax_split = taxonomy.split('; ')
                        if tax_split[0] == 'Root' and len(tax_split) > 1: tax_split = tax_split[1:]
                        f.write('\t'.join([str(coverage)]+tax_split))
                        f.write('\n')
                    f.flush()
                    if is_more_than_one_sample:
                        display_name = '%s: %s' % (sample, gene)
                    else:
                        display_name = gene
                    cmd += " %s,'%s'" % (f.name, display_name)
        extern.run(cmd)
        for f in sample_tempfiles:
            f.close()
开发者ID:wwood,项目名称:singlem,代码行数:47,代码来源:summariser.py


注:本文中的extern.run函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。