当前位置: 首页>>代码示例>>Python>>正文


Python parse.fields_to_dict函数代码示例

本文整理汇总了Python中qiime.parse.fields_to_dict函数的典型用法代码示例。如果您正苦于以下问题:Python fields_to_dict函数的具体用法?Python fields_to_dict怎么用?Python fields_to_dict使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了fields_to_dict函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: map_otu_map_files

def map_otu_map_files(otu_files, failures_file=None):
    # passing delim=None splits on any whitespace, so can handle mixed tabs
    # and spaces
    result = fields_to_dict(otu_files[0], delim=None)
    for otu_file in otu_files[1:]:
        current_otu_map = fields_to_dict(otu_file, delim=None)
        result = expand_otu_map_seq_ids(current_otu_map, result)
    if failures_file:
        result = expand_failures(failures_file, result)
    return result
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:10,代码来源:pick_otus.py

示例2: rewrite_otu_table_with_taxonomy

def rewrite_otu_table_with_taxonomy(taxon_lines, otu_lines, id_map_lines=None,
    outfile=stdout):
    """Rewrites OTU table including taxonomy."""
    taxonomy = fields_to_dict(taxon_lines)
    #sometimes have extra fields after OTU id
    new_taxonomy = {}
    for k, v in taxonomy.items():
        new_taxonomy[k.split()[0]] = v
    taxonomy = new_taxonomy
    taxonomy = fix_taxonomy_delimiters(taxonomy)

    if id_map_lines:
        id_map = dict([map(strip, line.split('\t')) for line in
            id_map_lines])
        new_taxonomy = dict([(id_map[k], v) for k, v in taxonomy.items()
            if k in id_map])
        assert new_taxonomy != taxonomy
        taxonomy = new_taxonomy

    for line in otu_lines:
        if not line.endswith('\n'):
            line += '\n'
        if line.startswith('#OTU ID'):
            outfile.write(line[:-1]+'\tConsensus Lineage\n')
        elif line.startswith('#'):
            outfile.write(line)
        else:
            id_, rest = line.split('\t', 1)
            t = taxonomy.get(id_, 'None')
            outfile.write(line[:-1]+'\t'+t+'\n')
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:30,代码来源:add_taxa.py

示例3: main

def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
       
    sample_id_map_fp = opts.sample_id_map_fp
    if sample_id_map_fp:
        sample_id_map = dict([(k,v[0]) \
         for k,v in fields_to_dict(open(sample_id_map_fp, "U")).items()])
    else:
        sample_id_map = None
    
    input_dm_fps = opts.input_dms.split(',')
    output_f = open(opts.output_fp,'w')
    output_f.write(comment)
    output_f.write('DM1\tDM2\tNumber of entries\tMantel p-value\n')
    num_iterations = opts.num_iterations
    for i,fp1 in enumerate(input_dm_fps):
        for fp2 in input_dm_fps[i+1:]:
            (dm1_labels, dm1), (dm2_labels, dm2) =\
             make_compatible_distance_matrices(parse_distmat(open(fp1,'U')),
                                               parse_distmat(open(fp2,'U')),
                                               lookup=sample_id_map)
            if len(dm1_labels) < 2:
                output_f.write('%s\t%s\t%d\tToo few samples\n' % (fp1,fp2,len(dm1_labels)))
                continue
            p = mantel(dm1,dm2,n=num_iterations)
            p_str = format_p_value_for_num_iters(p,num_iterations)
            output_f.write('%s\t%s\t%d\t%s\n' % (fp1,fp2,len(dm1_labels),p_str))
    output_f.close()
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:29,代码来源:compare_distance_matrices.py

示例4: test_parallel_rdp_taxonomy_assigner

 def test_parallel_rdp_taxonomy_assigner(self):
     """ parallel_rdp_taxonomy_assigner functions as expected """
     
     params = {'id_to_taxonomy_fp':self.id_to_taxonomy_file.name,
       'rdp_max_memory':1500,
       'rdp_classifier_fp':getenv('RDP_JAR_PATH'),
       'confidence':0.80,
       'reference_seqs_fp':self.reference_seqs_file.name
     }
     
     app = ParallelRdpTaxonomyAssigner()
     r = app(self.tmp_seq_filepath,
             self.test_out,
             params,
             job_prefix='RDPTEST',
             poll_directly=True,
             suppress_submit_jobs=False)
     results = fields_to_dict(open(glob(join(
             self.test_out, '*_tax_assignments.txt'))[0], 'U'))
     # some basic sanity checks: we should get the same number of sequences
     # as our input with the same seq IDs. We should have a taxonomy string
     # and a confidence value for each seq as well.
     self.assertEqual(len(results), 2)
     self.assertEqual(len(results['X67228 some description']), 2)
     self.assertEqual(len(results['EF503697']), 2)
开发者ID:DDomogala3,项目名称:qiime,代码行数:25,代码来源:test_assign_taxonomy.py

示例5: _parse_taxonomic_information

def _parse_taxonomic_information(tax_map_lines, taxonomic_levels=8):
    """Parses a taxonomy mapping file to return mapping of seq ID to taxonomy.
    
    Returns a dictionary with sequence ID as the key and a list containing the
    taxonomy at each level. Empty taxonomic levels (i.e. ';;' or levels
    containing only whitespace) are ignored.

    Arguments:
        tax_map_lines - list of lines from the taxonomy mapping file (the
            result of calling readlines() on the open file handle)
        taxonomic_levels - the number of taxonomic levels in the taxonomy
            strings found in the taxonomy mapping file. All taxonomy strings
            must have this number of levels (excluding empty taxonomic levels)
    """
    tax_info = {}

    if tax_map_lines[0] != \
            "ID Number\tGenBank Number\tNew Taxon String\tSource\n":
        raise ValueError("The taxonomy map file appears to be invalid "
                         "because it is either missing the header or has a "
                         "corrupt header.")
    for seq_id, seq_info in fields_to_dict(tax_map_lines[1:]).items():
        if len(seq_info) != 3:
            raise ValueError("The taxonomy map file appears to be invalid "
                             "because it does not have exactly 4 columns.")
        # Split at each level and remove any empty levels or levels that
        # contain only whitespace.
        taxonomy = [level for level in seq_info[1].split(';') \
                    if level.strip() != '']
        if len(taxonomy) != taxonomic_levels:
            raise ValueError("Encountered invalid taxonomy '%s'. Valid "
                    "taxonomy strings must have %d levels separated by "
                    "semicolons." % (seq_info[1], taxonomic_levels))
        tax_info[seq_id] = taxonomy
    return tax_info
开发者ID:infotroph,项目名称:nested_reference_otus,代码行数:35,代码来源:summarize_taxonomic_agreement.py

示例6: get_seqs_to_keep_lookup_from_otu_map

def get_seqs_to_keep_lookup_from_otu_map(seqs_to_keep_f):
    """Generate a lookup dictionary from an OTU map"""
    otu_map = fields_to_dict(seqs_to_keep_f)
    seqs_to_keep = []
    for seq_ids in otu_map.values():
        seqs_to_keep += seq_ids
    return {}.fromkeys(seqs_to_keep)
开发者ID:Honglongwu,项目名称:qiime,代码行数:7,代码来源:filter_fasta.py

示例7: test_write_otu_map_prefix

    def test_write_otu_map_prefix(self):
        """write_otu_map functions as expected w otu prefix """
        write_otu_map(self.otu_map1, self.tmp_fp1, "my.otu.")
        actual = fields_to_dict(open(self.tmp_fp1))
        self.files_to_remove.append(self.tmp_fp1)

        exp = {"my.otu.0": ["seq1", "seq2", "seq5"], "my.otu.1": ["seq3", "seq4"], "my.otu.2": ["seq6", "seq7", "seq8"]}
        self.assertEqual(actual, exp)
开发者ID:Gaby1212,项目名称:qiime,代码行数:8,代码来源:test_format.py

示例8: _generate_taxonomic_agreement_summary

def _generate_taxonomic_agreement_summary(otu_map_lines, tax_map_lines,
                                         taxonomic_levels=8):
    """Computes a summary of taxonomic agreement between ref and its seqs.

    Returns a dictionary with OTU ID as the key. The value is a four-element
    list. The first element is the size of the OTU (i.e. the number of seqs in
    the OTU, including the reference). The second element is a list of sequence
    identifiers for each sequence in the OTU. The reference sequence ID will
    always be listed first, followed by the sequence IDs of the other members
    of the OTU as they appear in the OTU map. The third element is a list
    containing percent agreement at each taxonomic level (a list of floats).
    The fourth element is a list containing all taxonomic values that were
    encountered at each level. The reference taxonomic value will always be
    listed first. The third and fourth elements of the top-level list will
    always be the same length (taxonomic_levels) because they each contain
    information for each taxonomic level.

    Arguments:
        otu_map_lines - list of lines in the OTU map (the result of calling
            readlines() on the open file handle)
        tax_map_lines - list of lines from the taxonomy mapping file (the
            result of calling readlines() on the open file handle)
        taxonomic_levels - the number of taxonomic levels in the taxonomy
            strings found in the taxonomy mapping file. All taxonomy strings
            must have this number of levels to prevent inconsistent results in
            the summary
    """
    tax_map = _parse_taxonomic_information(tax_map_lines, taxonomic_levels)
    otu_map = fields_to_dict(otu_map_lines)

    taxonomic_agreement = {}
    for otu_id, seq_ids in otu_map.items():
        otu_size = len(seq_ids)
        taxonomic_agreement[otu_id] = [otu_size, seq_ids, [], []]

        # The reference sequence is always the first sequence listed in the OTU
        # map.
        ref_seq_id = seq_ids[0]
        ref_seq_tax = tax_map[ref_seq_id]

        # Calculate percent agreement for each taxonomic level. If the OTU only
        # contains a reference sequence, the percent agreement will be 100%.
        # Also keep track of all unique taxonomic values that are encountered
        # for each level (with the reference's taxonomic value listed first).
        for level_idx, ref_level in enumerate(ref_seq_tax):
            agreement_count = 0
            encountered_levels = []
            for seq_id in seq_ids:
                seq_level = tax_map[seq_id][level_idx]
                if ref_level == seq_level:
                    agreement_count += 1
                if seq_level not in encountered_levels:
                    encountered_levels.append(seq_level)
            taxonomic_agreement[otu_id][2].append(
                    (agreement_count / otu_size) * 100)
            taxonomic_agreement[otu_id][3].append(encountered_levels)
    return taxonomic_agreement
开发者ID:infotroph,项目名称:nested_reference_otus,代码行数:57,代码来源:summarize_taxonomic_agreement.py

示例9: test_write_otu_map_prefix

 def test_write_otu_map_prefix(self):
     """write_otu_map functions as expected w otu prefix """
     write_otu_map(self.otu_map1,self.tmp_fp1,'my.otu.')
     actual = fields_to_dict(open(self.tmp_fp1))
     self.files_to_remove.append(self.tmp_fp1)
     
     exp = {'my.otu.0':['seq1','seq2','seq5'],
            'my.otu.1':['seq3','seq4'],
            'my.otu.2':['seq6','seq7','seq8']}
     self.assertEqual(actual,exp)
开发者ID:oscaredd,项目名称:qiime,代码行数:10,代码来源:test_format.py

示例10: main

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create the output dir if it doesn't already exist.
    try:
        if not path.exists(opts.output_dir):
            create_dir(opts.output_dir)
    except:
        option_parser.error("Could not create or access output directory " "specified with the -o option.")
    sample_id_map = None
    if opts.sample_id_map_fp:
        sample_id_map = dict([(k, v[0]) for k, v in fields_to_dict(open(opts.sample_id_map_fp, "U")).items()])
    input_dm_fps = opts.input_dms
    distmats = [parse_distmat(open(dm_fp, "U")) for dm_fp in input_dm_fps]

    if opts.method == "mantel":
        output_f = open(path.join(opts.output_dir, "mantel_results.txt"), "w")
        output_f.write(
            run_mantel_test(
                "mantel",
                input_dm_fps,
                distmats,
                opts.num_permutations,
                opts.tail_type,
                comment_mantel_pmantel,
                sample_id_map=sample_id_map,
            )
        )
    elif opts.method == "partial_mantel":
        output_f = open(path.join(opts.output_dir, "partial_mantel_results.txt"), "w")
        output_f.write(
            run_mantel_test(
                "partial_mantel",
                input_dm_fps,
                distmats,
                opts.num_permutations,
                opts.tail_type,
                comment_mantel_pmantel,
                control_dm_fp=opts.control_dm,
                control_dm=parse_distmat(open(opts.control_dm, "U")),
                sample_id_map=sample_id_map,
            )
        )
    elif opts.method == "mantel_corr":
        output_f = open(path.join(opts.output_dir, "mantel_correlogram_results.txt"), "w")
        result_str, correlogram_fps, correlograms = run_mantel_correlogram(
            input_dm_fps, distmats, opts.num_permutations, comment_corr, opts.alpha, sample_id_map=sample_id_map
        )
        output_f.write(result_str)
        for corr_fp, corr in zip(correlogram_fps, correlograms):
            corr.savefig(path.join(opts.output_dir, corr_fp + opts.image_type), format=opts.image_type)
    output_f.close()
开发者ID:ranjit58,项目名称:qiime,代码行数:52,代码来源:compare_distance_matrices.py

示例11: test_fields_to_dict

    def test_fields_to_dict(self):
        """fields_to_dict should make first field key, rest val"""
        test_data = \
"""0	R27DLI_4812	R27DLI_600	R27DLI_727	U1PLI_403	U1PLI_8969	U1PLI_9080	U1PLI_9526	W3Cecum_6642	W3Cecum_8992
1	U1PLI_7889
2	W3Cecum_4858
3	R27DLI_3243	R27DLI_4562	R27DLI_6828	R27DLI_9097	U1PLI_2780	U1PLI_67	U9PSI_10475	U9PSI_4341	W3Cecum_5191""".splitlines()    #output from cd-hit
        obs = fields_to_dict(test_data)
        exp = {'0':['R27DLI_4812','R27DLI_600','R27DLI_727','U1PLI_403','U1PLI_8969','U1PLI_9080','U1PLI_9526','W3Cecum_6642','W3Cecum_8992'],
                '1':['U1PLI_7889'],
                '2':['W3Cecum_4858'],
                '3':['R27DLI_3243','R27DLI_4562','R27DLI_6828','R27DLI_9097','U1PLI_2780','U1PLI_67','U9PSI_10475','U9PSI_4341','W3Cecum_5191']}
        self.assertEqual(obs, exp)
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:13,代码来源:test_parse.py

示例12: main

def main():
    """opens files as necessary based on prefs"""
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    data = {}

    fasta_file = opts.input_fasta_fp

    # load the input alignment
    data['aln'] = SequenceCollection.from_fasta_records(
        parse_fasta(open(fasta_file)), DNA)

    # Load the otu file
    otu_path = opts.otu_map_fp
    otu_f = open(otu_path, 'U')
    otus = fields_to_dict(otu_f)
    otu_f.close()

    data['otus'] = otus
    # Determine which which samples to extract from representative seqs
    # and from otus file
    if opts.samples_to_extract:
        prefs = process_extract_samples(opts.samples_to_extract)

    filepath = opts.input_fasta_fp
    filename = filepath.strip().split('/')[-1]
    filename = filename.split('.')[0]

    if opts.output_dir:
        if os.path.exists(opts.output_dir):
            dir_path = opts.output_dir
        else:
            try:
                os.mkdir(opts.output_dir)
                dir_path = opts.output_dir
            except OSError:
                pass
    else:
        dir_path = './'

    try:
        action = filter_samples
    except NameError:
        action = None
    # Place this outside try/except so we don't mask NameError in action
    if action:
        action(prefs, data, dir_path, filename)
开发者ID:AhmedAbdelfattah,项目名称:qiime,代码行数:47,代码来源:filter_otus_by_sample.py

示例13: main

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    exclude_otus_fp = opts.exclude_otus_fp
    
    if opts.output_fp:
        outfile = open(opts.output_fp, 'w')
    else:
        outfile = stdout
    if not opts.taxonomy_fname:
        otu_to_taxonomy = None
    else:
       infile = open(opts.taxonomy_fname,'U')
       otu_to_taxonomy = parse_taxonomy(infile)

    otu_to_seqid = fields_to_dict(open(opts.otu_map_fp, 'U'))
    
    if exclude_otus_fp:
        otu_to_seqid = remove_otus(otu_to_seqid,open(exclude_otus_fp,'U'))

    outfile.write(make_otu_table(otu_to_seqid, otu_to_taxonomy))
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:21,代码来源:make_otu_table.py

示例14: test_parallel_blast_taxonomy_assigner

 def test_parallel_blast_taxonomy_assigner(self):
     """ parallel_blast_taxonomy_assigner functions as expected """
     params = {'id_to_taxonomy_fp':self.id_to_taxonomy_file.name,
       'blastmat_dir':None,
       'e_value':0.001,
       'blast_db':None,
       'reference_seqs_fp':self.reference_seqs_file.name
     }
     
     app = ParallelBlastTaxonomyAssigner()
     r = app(self.tmp_seq_filepath,
             self.test_out,
             params,
             job_prefix='BTATEST',
             poll_directly=True,
             suppress_submit_jobs=False)
     results = fields_to_dict(open(glob(join(
             self.test_out, '*_tax_assignments.txt'))[0], 'U'))
     # some basic sanity checks: we should get the same number of sequences
     # as our input with the same seq IDs. We should have a taxonomy string
     # and a confidence value for each seq as well.
     self.assertEqual(len(results), 6)
     self.assertEqual(len(results['s1']), 3)
     self.assertEqual(len(results['s6']), 3)
开发者ID:DDomogala3,项目名称:qiime,代码行数:24,代码来源:test_assign_taxonomy.py

示例15: test_parallel_uclust_taxonomy_assigner

    def test_parallel_uclust_taxonomy_assigner(self):
        """ parallel_uclust_taxonomy_assigner functions as expected """
        params = {'id_to_taxonomy_fp': self.id_to_taxonomy_file.name,
                  'reference_seqs_fp': self.reference_seqs_file.name,
                  'min_consensus_fraction': 0.51,
                  'similarity': 0.90,
                  'uclust_max_accepts': 3
                  }

        app = ParallelUclustConsensusTaxonomyAssigner()
        r = app(self.tmp_seq_filepath,
                self.test_out,
                params,
                job_prefix='UTATEST',
                poll_directly=True,
                suppress_submit_jobs=False)
        results = fields_to_dict(open(glob(join(
            self.test_out, '*_tax_assignments.txt'))[0], 'U'))
        # some basic sanity checks: we should get the same number of sequences
        # as our input with the same seq IDs. We should have a taxonomy string
        # and a confidence value for each seq as well.
        self.assertEqual(len(results), 6)
        self.assertEqual(len(results['s1']), 3)
        self.assertEqual(len(results['s6']), 3)
开发者ID:ElDeveloper,项目名称:qiime,代码行数:24,代码来源:test_assign_taxonomy.py


注:本文中的qiime.parse.fields_to_dict函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。