当前位置: 首页>>代码示例>>Python>>正文


Python skbio.read函数代码示例

本文整理汇总了Python中skbio.read函数的典型用法代码示例。如果您正苦于以下问题:Python read函数的具体用法?Python read怎么用?Python read使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了read函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_invalid_files

    def test_invalid_files(self):
        for constructor in [BiologicalSequence, NucleotideSequence, DNASequence, RNASequence, ProteinSequence]:
            for invalid, kwargs, errors, etype in self.invalid_files:
                with self.assertRaises(etype) as cm:
                    for kwarg in kwargs:
                        _drop_kwargs(kwarg, "constructor", "filter")

                        read(invalid, format="qseq", verify=False, into=constructor, **kwarg)
                for e in errors:
                    self.assertIn(e, str(cm.exception))
开发者ID:Kleptobismol,项目名称:scikit-bio,代码行数:10,代码来源:test_qseq.py

示例2: test_invalid_files

    def test_invalid_files(self):
        for constructor in [Sequence, DNA, RNA, Protein]:
            for invalid, kwargs, errors, etype in self.invalid_files:
                with self.assertRaises(etype) as cm:
                    for kwarg in kwargs:
                        _drop_kwargs(kwarg, 'constructor', 'filter')

                        read(invalid, format='qseq', verify=False,
                             into=constructor, **kwarg)
                for e in errors:
                    self.assertIn(e, str(cm.exception))
开发者ID:Achuth17,项目名称:scikit-bio,代码行数:11,代码来源:test_qseq.py

示例3: test_dna_iterator_to_dna_fasta_format

    def test_dna_iterator_to_dna_fasta_format(self):
        transformer = self.get_transformer(DNAIterator, DNAFASTAFormat)
        filepath = self.get_data_path('dna-sequences.fasta')
        generator = skbio.read(filepath, format='fasta', constructor=skbio.DNA)
        input = DNAIterator(generator)

        obs = transformer(input)
        self.assertIsInstance(obs, DNAFASTAFormat)
        obs = skbio.read(str(obs), format='fasta', constructor=skbio.DNA)

        for act, exp in zip(obs, input):
            self.assertEqual(act, exp)
开发者ID:BenKaehler,项目名称:q2-types,代码行数:12,代码来源:test_transformer.py

示例4: test_pair_dna_sequences_directory_format_to_pair_dna_iterator

    def test_pair_dna_sequences_directory_format_to_pair_dna_iterator(self):
        filenames = ('left-dna-sequences.fasta', 'right-dna-sequences.fasta')
        input, obs = self.transform_format(PairedDNASequencesDirectoryFormat,
                                           PairedDNAIterator,
                                           filenames=filenames)

        exp_left = skbio.read(self.get_data_path(filenames[0]),
                              format='fasta', constructor=skbio.DNA)
        exp_right = skbio.read(self.get_data_path(filenames[1]),
                               format='fasta', constructor=skbio.DNA)
        for act, exp in zip(obs, zip(exp_left, exp_right)):
            self.assertEqual(act, exp)
        self.assertIsInstance(obs, PairedDNAIterator)
开发者ID:BenKaehler,项目名称:q2-types,代码行数:13,代码来源:test_transformer.py

示例5: _annotate_fp

    def _annotate_fp(self, fp, aligner='blastp', evalue=0.001, cpus=1,
                     outfmt='tab', params=None) -> pd.DataFrame:
        '''Annotate the sequences in the file.

        Parameters
        ----------
        params : dict-like
            Parameters for diamond blastp/blastx that pass to ``run_blast``.
        '''
        found = []
        res = pd.DataFrame()
        for db in self.dat:
            out_prefix = splitext(basename(db))[0]
            daa_fp = join(self.out_dir, '%s.daa' % out_prefix)
            out_fp = join(self.out_dir, '%s.diamond' % out_prefix)
            self.run_blast(fp, daa_fp, db, aligner=aligner,
                           evalue=evalue, cpus=cpus, params=params)
            self.run_view(daa_fp, out_fp, params={'--outfmt': outfmt})
            res = res.append(self.parse_tabular(out_fp))
            found.extend(res.index)
            # save to a tmp file the seqs that do not hit current database
            new_fp = join(self.tmp_dir, '%s.fa' % out_prefix)
            with open(new_fp, 'w') as f:
                for seq in read(fp, format='fasta'):
                    if seq.metadata['id'] not in found:
                        seq.write(f, format='fasta')
            # no seq left
            if stat(new_fp).st_size == 0:
                break
            else:
                fp = new_fp
        return res
开发者ID:tkosciol,项目名称:micronota,代码行数:32,代码来源:diamond.py

示例6: setUp

 def setUp(self):
     super().setUp()
     tests = ('blastp', 'WP_009885814.faa')
     self.blast = (tests[0], get_data_path(tests[1]),
                   _get_named_data_path('%s.diamond' % tests[1]))
     seqs = skbio.read(_get_named_data_path('cache.faa'), format='fasta')
     self.cache = DiamondCache(list(seqs))
开发者ID:elsherbini,项目名称:micronota,代码行数:7,代码来源:test_diamond.py

示例7: test_valid_files

    def test_valid_files(self):
        for constructor in [Sequence, DNA, RNA, Protein]:
            for valid, kwargs, components in self.valid_files:
                for observed_kwargs in kwargs:
                    expected_kwargs = {}
                    # Currently not validating the alphabet for qseq
                    # files that are read in for this test.
                    if hasattr(constructor, 'alphabet'):
                        observed_kwargs['validate'] = False
                        expected_kwargs['validate'] = False
                    _drop_kwargs(observed_kwargs, 'constructor', 'filter')

                    seq_num = observed_kwargs.get('seq_num', 1)
                    c = components[seq_num - 1]
                    expected = constructor(
                        c['sequence'],
                        metadata={'id': c['id'],
                                  'machine_name': c['machine_name'],
                                  'run_number': c['run_number'],
                                  'lane_number': c['lane_number'],
                                  'tile_number': c['tile_number'],
                                  'x': c['x'],
                                  'y': c['y'],
                                  'index': c['index'],
                                  'read_number': c['read_number']},
                        positional_metadata={
                            'quality': np.array(c['quality'], np.uint8)},
                        **expected_kwargs)

                    observed = read(valid, into=constructor,
                                    format='qseq', verify=False,
                                    **observed_kwargs)
                    self.assertEqual(observed, expected)
开发者ID:Achuth17,项目名称:scikit-bio,代码行数:33,代码来源:test_qseq.py

示例8: body_site

def body_site(coords, mapping_file, output, filename, sample):
    """Generates a bodysite figure for a sample in the coordinates file"""
    o = read(coords, into=OrdinationResults)

    # coordinates
    c_df = pd.DataFrame(o.site, o.site_ids)

    # mapping file
    mf = pd.read_csv(mapping_file, sep='\t', dtype=str)
    mf.set_index('#SampleID', inplace=True)

    mf = mf.loc[o.site_ids]

    if sample not in o.site_ids:
        raise ValueError("Sample %s not found" % sample)

    color_hmp_fecal = sns.color_palette('Paired', 12)[10]  # light brown
    color_agp_fecal = sns.color_palette('Paired', 12)[11]  # dark brown
    color_hmp_oral = sns.color_palette('Paired', 12)[0]    # light blue
    color_agp_oral = sns.color_palette('Paired', 12)[1]    # dark blue
    color_hmp_skin = sns.color_palette('Paired', 12)[2]    # light green
    color_agp_skin = sns.color_palette('Paired', 12)[3]    # dark green

    grp_colors = {'AGP-FECAL': color_agp_fecal,
                  'AGP-ORAL':  color_agp_oral,
                  'AGP-SKIN':  color_agp_skin,
                  'HMP-FECAL': color_hmp_fecal,
                  'GG-FECAL':  color_hmp_fecal,
                  'PGP-FECAL': color_hmp_fecal,
                  'HMP-ORAL':  color_hmp_oral,
                  'PGP-ORAL':  color_hmp_oral,
                  'HMP-SKIN':  color_hmp_skin,
                  'PGP-SKIN':  color_hmp_skin}

    # plot categories as 50 slices with random zorder
    for grp, color in grp_colors.iteritems():
        sub_coords = c_df[mf.TITLE_BODY_SITE == grp].values
        for i in np.array_split(sub_coords, 50):
            if i.size == 0:
                continue
            plt.scatter(i[:, 0], i[:, 1], color=color,
                        edgecolor=np.asarray(color)*0.6, lw=LINE_WIDTH,
                        alpha=ALPHA, zorder=np.random.rand())

    # plot participant's dot
    plt.scatter(c_df.loc[sample][0], c_df.loc[sample][1],
                color=grp_colors[mf.loc[sample]['TITLE_BODY_SITE']],
                s=270, edgecolor='w', zorder=1, lw=LINE_WIDTH_WHITE)
    plt.scatter(c_df.loc[sample][0], c_df.loc[sample][1],
                color=grp_colors[mf.loc[sample]['TITLE_BODY_SITE']],
                s=250, edgecolor=np.asarray(
                grp_colors[mf.loc[sample]['TITLE_BODY_SITE']])*0.6,
                zorder=2, lw=LINE_WIDTH_BLACK)

    plt.axis('off')
    my_dpi = 72
    figsize = (1000 / my_dpi, 1000 / my_dpi)
    out_file = os.path.join(output, filename)
    plt.savefig(out_file, figsize=figsize, dpi=my_dpi)
    plt.close()
开发者ID:Pratap5387,项目名称:American-Gut,代码行数:60,代码来源:mod2_pcoa.py

示例9: test_fastq_to_sequence

    def test_fastq_to_sequence(self):
        for constructor in [partial(Sequence), partial(DNA, validate=False),
                            partial(RNA, validate=False),
                            partial(Protein, validate=False)]:
            for valid_files, kwargs, components in self.valid_configurations:
                for valid in valid_files:
                    # skip empty file case since we cannot read a specific
                    # sequencefrom an empty file
                    if len(components) == 0:
                        continue

                    for kwarg in kwargs:
                        _drop_kwargs(kwarg, 'constructor')

                        seq_num = kwarg.get('seq_num', 1)
                        c = components[seq_num - 1]
                        expected = \
                            constructor(
                                c[2], metadata={'id': c[0],
                                                'description': c[1]},
                                positional_metadata={'quality': np.array(c[3],
                                                     dtype=np.uint8)})

                        observed = read(valid, into=constructor.func,
                                        format='fastq', verify=False, **kwarg)
                        self.assertEqual(observed, expected)
开发者ID:7924102,项目名称:scikit-bio,代码行数:26,代码来源:test_fastq.py

示例10: fungi_from_fasta

def fungi_from_fasta(fasta_fh, accession_fh, taxonomy_fh):
    """Filter SILVA sequences to keep only fungi.

    Filters a fasta file of aligned or unaligned sequences to include only
    fungi. Only keeps sequences that have accession numbers that can be mapped
    to a fungal taxonomy string that ends at the genus rank.

    Parameters
    ----------
    fasta_fh : filehandle
        Fasta file of aligned or unaligned SILVA sequences. Each sequence
        identifier must be an accession number.
    accession_fh : filehandle
        A tab-separated file mapping accession numbers to a mapping number in
        `taxonomy_map`. This file should contain exactly two columns:
        accession number and mapping number.
    taxonomy_fh: filehandle
        A tab-separated file that identifes the taxonomy and rank of a mapping
        number in `accession_fh`. This file should contain exactly five
        columns beginning with taxonomy, mapping number and rank. The last two
        columns are ignored.

    Returns
    -------
    generator
        Yields ``skbio.BiologicalSequence`` objects.

    """
    accession_map = _parse_accession_map(accession_fh)
    taxonomy_map = _parse_taxonomy_map(taxonomy_fh)
    for seq in skbio.read(fasta_fh, format="fasta"):
        map_num = accession_map[seq.id]
        if map_num in taxonomy_map:
            yield seq
开发者ID:wasade,项目名称:ghost-tree,代码行数:34,代码来源:filter.py

示例11: _parse_fasta_dictionary

 def _parse_fasta_dictionary(self):
     fasta_dictionary = {}
     sequence_type = self.sequence_type
     for seq_entry in read(self.fasta_path, format="fasta"):
         seq_id = seq_entry.metadata["id"]
         fasta_dictionary[seq_id] = sequence_type(seq_entry)
     return fasta_dictionary
开发者ID:gravity226,项目名称:pyensembl,代码行数:7,代码来源:sequence_data.py

示例12: test_dna_fasta_format_to_dna_iterator

    def test_dna_fasta_format_to_dna_iterator(self):
        input, obs = self.transform_format(DNAFASTAFormat, DNAIterator,
                                           filename='dna-sequences.fasta')

        exp = skbio.read(str(input), format='fasta', constructor=skbio.DNA)

        for observed, expected in zip(obs, exp):
            self.assertEqual(observed, expected)
开发者ID:BenKaehler,项目名称:q2-types,代码行数:8,代码来源:test_transformer.py

示例13: gradient

def gradient(coords, mapping_file, color, output, filename, sample):
    """Generates as many figures as samples in the coordinates file"""
    o = read(coords, into=OrdinationResults)

    # coordinates
    c_df = pd.DataFrame(o.site, o.site_ids)

    # mapping file
    mf = pd.read_csv(mapping_file, '\t', converters=defaultdict(str),
                     dtype=str)
    mf.set_index('#SampleID', inplace=True)
    mf = mf.loc[o.site_ids]
    mf[color] = mf[color].convert_objects(convert_numeric=True)

    if sample not in o.site_ids:
        raise ValueError("Sample %s not found" % sample)

    numeric = mf[~pd.isnull(mf[color])]
    non_numeric = mf[pd.isnull(mf[color])]

    color_array = plt.cm.RdBu(numeric[color]/max(numeric[color]))

    # plot numeric metadata as colored gradient
    ids = numeric.index
    x, y = c_df.loc[ids][0], c_df.loc[ids][1]
    plt.scatter(x, y, c=numeric[color], cmap=plt.get_cmap('RdBu'),
                alpha=ALPHA, lw=LINE_WIDTH, edgecolor=color_array*0.6)

    # plot non-numeric metadata as gray
    ids = non_numeric.index
    x, y = c_df.loc[ids][0], c_df.loc[ids][1]
    plt.scatter(x, y, c='0.5', alpha=ALPHA, lw=LINE_WIDTH, edgecolor='0.3')

    # plot individual's dot
    try:
        color_index = numeric.index.tolist().index(sample)
    except ValueError:
        color_index = None

    if color_index is None:
        _color = (0.5, 0.5, 0.5)
    else:
        _color = color_array[color_index]

    plt.scatter(c_df.loc[sample][0], c_df.loc[sample][1],
                color=_color, s=270, edgecolor='w', lw=LINE_WIDTH_WHITE)
    plt.scatter(c_df.loc[sample][0], c_df.loc[sample][1],
                color=_color, s=250, edgecolor=np.asarray(_color)*0.6,
                lw=LINE_WIDTH_BLACK)

    plt.axis('off')
    my_dpi = 72
    figsize = (1000 / my_dpi, 1000 / my_dpi)
    out_file = os.path.join(output, filename)
    plt.savefig(out_file, figsize=figsize, dpi=my_dpi)
    plt.close()
开发者ID:Pratap5387,项目名称:American-Gut,代码行数:56,代码来源:mod2_pcoa.py

示例14: sort_uniref

def sort_uniref(db_fp, uniref_fp, out_d, resolution, force=False):
    '''Sort UniRef sequences into different partitions.

    This will sort UniRef100 seq into following partitions based on both
    quality and taxon:

    * ``uniref100/Swiss-Prot_Archaea.fasta``
    * ``uniref100/Swiss-Prot_Bacteria.fasta``
    * ``uniref100/Swiss-Prot_Viruses.fasta``
    * ``uniref100/Swiss-Prot_other.fasta``
    * ``uniref100/Swiss-Prot_Eukaryota.fasta``
    * ``uniref100/TrEMBL_Archaea.fasta``
    * ``uniref100/TrEMBL_Bacteria.fasta``
    * ``uniref100/TrEMBL_Viruses.fasta``
    * ``uniref100/TrEMBL_other.fasta``
    * ``uniref100/TrEMBL_Eukaryota.fasta``
    * ``uniref100/_other.fasta``

    Parameters
    ----------
    db_fp : str
        The database file created by ``prepare_metadata``.
    uniref_fp : str
        The UniRef100 fasta file. gzipped or not.
    out_d : str
        The output directory to place the resulting fasta files.
    '''
    _overwrite(out_d, force)
    makedirs(out_d)
    logger = getLogger(__name__)
    logger.info('Sorting UniRef sequences')
    fns = ['%s_%s' % (i, j) for i, j in product(_status, _kingdom)]
    fns.append('_other')
    fps = [join(out_d, 'uniref%d_%s.fasta' % (resolution, f)) for f in fns]
    files = {fn: open(fp, 'w') for fp, fn in zip(fps, fns)}

    with connect(db_fp) as conn:
        cursor = conn.cursor()
        for seq in read(uniref_fp, format='fasta', constructor=Sequence):
            id = seq.metadata['id']
            ac = id.replace('UniRef%d_' % resolution, '')
            group = ['', 'other']
            cursor.execute('''SELECT * FROM metadata
                              WHERE ac = ?''',
                           (ac,))
            for _, s, k in cursor.fetchall():
                group[0] = _status[s]
                group[1] = _kingdom[k]
            seq.write(files['_'.join(group)])

    for f in files:
        files[f].close()
    for fp in fps:
        # if the fasta file is not empty
        if stat(fp).st_size > 0:
            make_db(fp)
开发者ID:elsherbini,项目名称:micronota,代码行数:56,代码来源:_uniref.py

示例15: test_pair_dna_iterator_to_pair_dna_sequences_directory_format

    def test_pair_dna_iterator_to_pair_dna_sequences_directory_format(self):
        transformer = self.get_transformer(PairedDNAIterator,
                                           PairedDNASequencesDirectoryFormat)

        l_seqs = skbio.read(self.get_data_path('left-dna-sequences.fasta'),
                            format='fasta', constructor=skbio.DNA)
        r_seqs = skbio.read(self.get_data_path('right-dna-sequences.fasta'),
                            format='fasta', constructor=skbio.DNA)
        input = PairedDNAIterator(zip(l_seqs, r_seqs))

        obs = transformer(input)
        obs_l = skbio.read('%s/left-dna-sequences.fasta' % str(obs),
                           format='fasta', constructor=skbio.DNA)
        obs_r = skbio.read('%s/right-dna-sequences.fasta' % str(obs),
                           format='fasta', constructor=skbio.DNA)

        for act, exp in zip(zip(obs_l, obs_r), zip(l_seqs, r_seqs)):
            self.assertEqual(act, exp)
        self.assertIsInstance(obs, PairedDNASequencesDirectoryFormat)
开发者ID:BenKaehler,项目名称:q2-types,代码行数:19,代码来源:test_transformer.py


注:本文中的skbio.read函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。