当前位置: 首页>>代码示例>>Python>>正文


Python parse.parse_mapping_file函数代码示例

本文整理汇总了Python中qiime.parse.parse_mapping_file函数的典型用法代码示例。如果您正苦于以下问题:Python parse_mapping_file函数的具体用法?Python parse_mapping_file怎么用?Python parse_mapping_file使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了parse_mapping_file函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_parse_mapping_file

 def test_parse_mapping_file(self):
     """parse_mapping_file functions as expected"""
     s1 = ['#sample\ta\tb', '#comment line to skip',\
           'x \t y \t z ', ' ', '#more skip', 'i\tj\tk']
     exp = ([['x','y','z'],['i','j','k']],\
            ['sample','a','b'],\
            ['comment line to skip','more skip'])
     obs = parse_mapping_file(s1)
     self.assertEqual(obs, exp)
     
     # We don't currently support this, but we should soon...
     # # check that first non-comment, non-blank line is used as 
     # # header
     # s1 = ['sample\ta\tb', '#comment line to skip',\
     #       'x \t y \t z ', ' ', '#more skip', 'i\tj\tk']
     # exp = ([['x','y','z'],['i','j','k']],\
     #        ['sample','a','b'],\
     #        ['comment line to skip','more skip'])
     # obs = parse_mapping_file(s1)
     # self.assertEqual(obs, exp)
     
     #check that we strip double quotes by default
     s2 = ['#sample\ta\tb', '#comment line to skip',\
           '"x "\t" y "\t z ', ' ', '"#more skip"', 'i\t"j"\tk']
     obs = parse_mapping_file(s2)
     self.assertEqual(obs, exp)
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:26,代码来源:test_parse.py

示例2: test_make_otu_table_with_sample_metadata

    def test_make_otu_table_with_sample_metadata(self):
        # Want to make sure that the order of the sample IDs in the OTU
        # map and the order of the IDs in the mapping file do not matter
        otu_map_lines = """0	ABC_0	DEF_1
1	ABC_1
x	GHI_2	GHI_3	GHI_77
z	DEF_3	XYZ_1""".split('\n')
        mapping_f = StringIO(MAPPING_FILE)
        sample_ids = ['ABC', 'DEF', 'GHI', 'XYZ']
        data = [[1, 1, 0, 0], [1, 0, 0, 0], [0, 0, 3, 0], [0, 1, 0, 1]]

        map_data, map_header, map_comments = parse_mapping_file(mapping_f)
        sample_metadata = mapping_file_to_dict(map_data, map_header)

        sample_md = [sample_metadata[sample_id] for sample_id in sample_ids]

        obs = make_otu_table(otu_map_lines, sample_metadata=sample_metadata)
        exp = Table(data, ['0', '1', 'x', 'z'], sample_ids,
                    sample_metadata=sample_md, input_is_dense=True)

        self.assertEqual(obs, exp)

        # Test with a mapping file that is missing a sample's metadata,
        # make sure it raises the KeyError
        mapping_f = StringIO(MAPPING_FILE_MISSING_SAMPLE)
        map_data, map_header, map_comments = parse_mapping_file(mapping_f)
        sample_metadata = mapping_file_to_dict(map_data, map_header)

        with self.assertRaises(KeyError):
            obs = make_otu_table(otu_map_lines,
                                 sample_metadata=sample_metadata)
开发者ID:AhmedAbdelfattah,项目名称:qiime,代码行数:31,代码来源:test_make_otu_table.py

示例3: split_mapping_file_on_field

def split_mapping_file_on_field(mapping_f,
                                mapping_field,
                                column_rename_ids=None,
                                include_repeat_cols=True):
    """ split mapping file based on value in field """

    mapping_f = list(mapping_f)
    mapping_values = get_mapping_values(mapping_f, mapping_field)

    mapping_data, mapping_headers, _ = parse_mapping_file(mapping_f)

    if column_rename_ids:
        try:
            column_rename_ids = mapping_headers.index(column_rename_ids)
        except ValueError:
            raise KeyError("Field is not in mapping file (search is case " +
                           "and white-space sensitive). \n\tProvided field: " +
                           "%s. \n\tValid fields: %s" % (mapping_field, ' '.join(mapping_headers)))

    for v in mapping_values:
        v_fp_str = v.replace(' ', '_')
        sample_ids_to_keep = sample_ids_from_metadata_description(
            mapping_f, valid_states_str="%s:%s" % (mapping_field, v))

        # parse mapping file each time though the loop as filtering operates on
        # values
        mapping_data, mapping_headers, _ = parse_mapping_file(mapping_f)
        mapping_headers, mapping_data = filter_mapping_file(
            mapping_data,
            mapping_headers,
            sample_ids_to_keep,
            include_repeat_cols=include_repeat_cols,

            column_rename_ids=column_rename_ids)
        yield v_fp_str, format_mapping_file(mapping_headers, mapping_data)
开发者ID:TheSchwa,项目名称:qiime,代码行数:35,代码来源:split.py

示例4: test_get_category_info

    def test_get_category_info(self):
        """get_category_info works"""
        category_mapping = """#SampleID\tcat1\tcat2
sample1\tA\t0
sample2\tB\t8.0
sample3\tC\t1.0""".split('\n')
        mapping_data, header, comments = parse_mapping_file(category_mapping)
        result, cat_vals = get_category_info(mapping_data, header, 'cat1')
        self.assertEqual(result, {'sample1': 'A', 'sample3': 'C', 'sample2': 'B'})
        self.assertEqual(cat_vals, (['A', 'B', 'C']))
        mapping_data, header, comments = parse_mapping_file(category_mapping)
        result, cat_vals = get_category_info(mapping_data, header, \
                        'cat2', threshold=5.0)
        self.assertEqual(result, {'sample1': '0', 'sample3': '0', 'sample2': '1'})
        self.assertEqual(cat_vals, (['0', '1']))
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:15,代码来源:test_otu_category_significance.py

示例5: test_parse_mapping_file

 def test_parse_mapping_file(self):
     """parse_mapping_file functions as expected"""
     s1 = ['#sample\ta\tb', '#comment line to skip',\
           'x \t y \t z ', ' ', '#more skip', 'i\tj\tk']
     exp = ([['x','y','z'],['i','j','k']],\
            ['sample','a','b'],\
            ['comment line to skip','more skip'])
     obs = parse_mapping_file(s1)
     self.assertEqual(obs, exp)
     
     #check that we strip double quotes by default
     s2 = ['#sample\ta\tb', '#comment line to skip',\
           '"x "\t" y "\t z ', ' ', '"#more skip"', 'i\t"j"\tk']
     obs = parse_mapping_file(s2)
     self.assertEqual(obs, exp)
开发者ID:carze,项目名称:clovr-base,代码行数:15,代码来源:test_parse.py

示例6: test_longitudinal_otu_table_conversion_wrapper

    def test_longitudinal_otu_table_conversion_wrapper(self):
        """londitudinal_otu_table_conversion_wrapper works
        """
        mapping_lines = """#SampleID\tindividual\ttimepoint_zero\ttimepoint
AT0\tA\t1\t0
AT1\tA\t0\t1
AT2\tA\t0\t2
BT0\tB\t1\t0
BT1\tB\t0\t1
BT2\tB\t0\t2
""".split('\n')
        category_mapping = parse_mapping_file(mapping_lines)
        otu_table = """{"rows": [{"id": "0", "metadata": null}, {"id": "1", "metadata": null}, {"id": "2", "metadata": null}, {"id": "3", "metadata": null}, {"id": "4", "metadata": null}], "format": "Biological Observation Matrix 1.0.0", "data": [[0, 0, 1.0], [0, 1, 2.0], [0, 2, 3.0], [1, 3, 1.0], [1, 4, 2.0], [1, 5, 3.0], [2, 0, 1.0], [2, 1, 2.0], [2, 2, 3.0], [2, 4, 1.0], [2, 5, 2.0], [3, 0, 2.0], [3, 1, 4.0], [3, 2, 6.0], [3, 4, 1.0], [3, 5, 2.0], [4, 0, 3.0], [4, 1, 2.0], [4, 2, 1.0], [4, 3, 6.0], [4, 4, 4.0], [4, 5, 2.0]], "columns": [{"id": "AT0", "metadata": null}, {"id": "AT1", "metadata": null}, {"id": "AT2", "metadata": null}, {"id": "BT0", "metadata": null}, {"id": "BT1", "metadata": null}, {"id": "BT2", "metadata": null}], "generated_by": "BIOM-Format 1.0.0-dev", "matrix_type": "sparse", "shape": [5, 6], "format_url": "http://biom-format.org", "date": "2012-08-01T09:14:03.574451", "type": "OTU table", "id": null, "matrix_element_type": "float"}"""

        otu_table = parse_biom_table_str(otu_table)
        new_otu_table = longitudinal_otu_table_conversion_wrapper(otu_table,
                                                                  category_mapping, 'individual', 'timepoint_zero')
        new_otu_table = str(new_otu_table).split('\n')
        self.assertEqual(new_otu_table[0], "# Constructed from biom file")
        data_line1 = new_otu_table[2].split('\t')
        self.assertFloatEqual(float(data_line1[0]), 0.0)
        # sets the reference to 0
        self.assertFloatEqual(float(data_line1[1]), 0.0)
        # subtracts values from same individual from the reference
        self.assertFloatEqual(float(data_line1[2]), 0.05714286)
        # sets to ignore number when not observed across a person
        self.assertFloatEqual(float(data_line1[4]), 999999999.0)
开发者ID:TheSchwa,项目名称:qiime,代码行数:27,代码来源:test_longitudinal_otu_category_significance.py

示例7: main

def main():
    option_parser, opts, args =\
      parse_command_line_parameters(**script_info)

    otu_table_data = parse_otu_table(open(opts.input_otu_table,'U'))
    sort_field = opts.sort_field
    mapping_fp = opts.mapping_fp
    sorted_sample_ids_fp = opts.sorted_sample_ids_fp
    
    if sort_field and mapping_fp:
        mapping_data = parse_mapping_file(open(mapping_fp,'U'))
        result = sort_otu_table_by_mapping_field(otu_table_data,
                                                 mapping_data,
                                                 sort_field)
    elif sorted_sample_ids_fp:
        sorted_sample_ids = sample_ids_from_f(open(sorted_sample_ids_fp,'U'))
        result = sort_otu_table(otu_table_data,
                                sorted_sample_ids)
    else:
        parser.error("must provide either --sort_field and --mapping_fp OR --sorted_sample_ids_fp")

    # format and write the otu table
    result_str = format_otu_table(result[0],result[1],result[2],result[3])
    of = open(opts.output_fp,'w')
    of.write(result_str)
    of.close()
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:26,代码来源:sort_otu_table.py

示例8: merge_mapping_files

def merge_mapping_files(mapping_files,no_data_value='no_data'):
    """ Merge list of mapping files into a single mapping file 
    
        mapping_files: open file objects containing mapping data
        no_data_value: value to be used in cases where there is no
         mapping field associated with a sample ID (default: 'no_data')
    """
    mapping_data = defaultdict(dict)
    all_headers = set([])
    
    # iterate over mapping files, parsing each
    for mapping_file in mapping_files:
        current_data, current_headers, current_comments = \
           parse_mapping_file(mapping_file,strip_quotes=False)
        all_headers.update(set(current_headers))

        for entry in current_data:
            current_values = {k:v for k,v in zip(current_headers, entry)}
            sample_id = current_values['SampleID']

            if sample_id in mapping_data:
                # if the sample id has already been seen, confirm that
                # there is no conflicting values across the different 
                # mapping files (e.g., pH=5.0 and pH=6.0)- if there is, 
                # raise a ValueError
                previous_data = mapping_data[sample_id]
                
                for key in current_values:
                    if key not in previous_data:
                        continue

                    if current_values[key] != previous_data[key]:
                        raise ValueError("Different values provided for %s for"
                                      "sample %s in different mapping files."\
                                      % (key, sample_id))

            mapping_data[sample_id].update(current_values)
    
    # remove and place the fields whose order is important
    ordered_beginning = []
    for e in ['SampleID','BarcodeSequence','LinkerPrimerSequence']:
        if e in all_headers:
            all_headers.remove(e)
            ordered_beginning.append(e)
            
    ordered_end = []
    for e in ['Description']:
        if e in all_headers:
            all_headers.remove(e)
            ordered_end.append(e)
    
    ordered_headers = ordered_beginning + list(all_headers) + ordered_end
    
    # generate the mapping file lines containing all fields
    result = ['#' + '\t'.join(ordered_headers)]
    for sample_id, data in mapping_data.items():
        values = [data.get(k, no_data_value) for k in ordered_headers]
        result.append('\t'.join(values))
    
    return result
开发者ID:rob-knight,项目名称:qiime,代码行数:60,代码来源:merge_mapping_files.py

示例9: _collate_cluster_pcoa_plot_data

def _collate_cluster_pcoa_plot_data(coords_f, map_f, category):
    pc_data = parse_coords(coords_f)
    coords_d = dict(zip(pc_data[0], pc_data[1]))

    map_data = parse_mapping_file(map_f)
    full_map_data = [map_data[1]]
    full_map_data.extend(map_data[0])

    sid_map = group_by_field(full_map_data, category)
    sorted_states = sorted(sid_map.keys())

    color_pool = get_color_pool()
    if len(sorted_states) > len(color_pool):
        raise ValueError("Not enough colors to uniquely color sample "
                         "groups.")

    results = []
    for state, color in zip(sorted_states,
                            color_pool[:len(sorted_states)]):
        sids = sid_map[state]
        xs = [coords_d[sid][0] for sid in sids]
        ys = [coords_d[sid][1] for sid in sids]
        results.append((xs, ys, color, state))

    return results
开发者ID:gregcaporaso,项目名称:microbiogeo,代码行数:25,代码来源:simulate.py

示例10: sample_ids_from_metadata_description

def sample_ids_from_metadata_description(mapping_f,valid_states_str):
    """ Given a description of metadata, return the corresponding sample ids
    """
    map_data, map_header, map_comments = parse_mapping_file(mapping_f)
    valid_states = parse_metadata_state_descriptions(valid_states_str)
    sample_ids = get_sample_ids(map_data, map_header, valid_states)
    return sample_ids
开发者ID:kartoffelpuffer,项目名称:qiime,代码行数:7,代码来源:filter.py

示例11: main

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    columns_to_merge = opts.columns_to_merge
    mapping_fp = opts.mapping_fp
    output_fp = opts.output_fp

    try:
        data, headers, comments = parse_mapping_file(open(mapping_fp, 'U'))
    except:
        option_parser.error('Bro, that doesn\'t look like a mapping file')

    for merging in columns_to_merge:
        retrieve = lambda x: headers.index(x)
        indices = map(retrieve, merging.split('&&'))

        headers.append(''.join([headers[element] for element in indices]))

        for line in data:
            line.append(''.join([line[element] for element in indices]))

    # this should never happen
    assert len(headers) == len(data[0]), "Something went horribly wrong, "+\
        "that's what you get for using non-unit-tested software"

    lines = format_mapping_file(headers, data, comments)

    fd = open(output_fp, 'w')
    fd.writelines(lines)
    fd.close()
开发者ID:ElDeveloper,项目名称:apocaqiime,代码行数:30,代码来源:merge_columns_in_mapping_file.py

示例12: merge_mapping_files

def merge_mapping_files(mapping_files, no_data_value="no_data"):
    """ Merge list of mapping files into a single mapping file 
    
        mapping_files: open file objects containing mapping data
        no_data_value: value to be used in cases where there is no
         mapping field associated with a sample ID (default: 'no_data')
    """
    mapping_data = {}
    all_headers = []
    result = []

    # iterate over mapping files, parsing each
    for mapping_file in mapping_files:
        current_data, current_headers, current_comments = parse_mapping_file(mapping_file, strip_quotes=False)
        all_headers += current_headers
        for entry in current_data:
            sample_id = entry[0]
            current_values = {}
            for header, value in zip(current_headers[1:], entry[1:]):
                current_values[header] = value
            if sample_id in mapping_data:
                # if the sample id has already been seen, confirm that
                # there is no conflicting values across the different
                # mapping files (e.g., pH=5.0 and pH=6.0)- if there is,
                # raise a ValueError
                previous_data = mapping_data[sample_id]
                for header, value in current_values.items():
                    if header in previous_data and value != previous_data[header]:
                        raise ValueError, "Different values provided for %s for sample %s in different mapping files." % (
                            header,
                            sample_id,
                        )
                mapping_data[sample_id].update(current_values)
            else:
                mapping_data[sample_id] = current_values
    all_headers = {}.fromkeys(all_headers)

    # remove and place the fields whose order is important
    ordered_beginning = []
    for e in ["SampleID", "BarcodeSequence", "LinkerPrimerSequence"]:
        try:
            del all_headers[e]
            ordered_beginning.append(e)
        except KeyError:
            pass

    ordered_end = []
    for e in ["Description"]:
        try:
            del all_headers[e]
            ordered_end.append(e)
        except KeyError:
            pass
    ordered_headers = ordered_beginning + list(all_headers) + ordered_end

    # generate the mapping file lines containing all fields
    result.append("#" + "\t".join(ordered_headers))
    for sample_id, data in mapping_data.items():
        result.append("\t".join([sample_id] + [data.get(h, no_data_value) for h in ordered_headers[1:]]))
    return result
开发者ID:ranjit58,项目名称:qiime,代码行数:60,代码来源:merge_mapping_files.py

示例13: test_run_single_paired_T_test

    def test_run_single_paired_T_test(self):
        """run_single_paired_T_test works
        """
        cat_mapping = """#SampleID\ttimepoint_zero\tindividual
s1\t1\tA
s2\t0\tA
s3\t1\tB
s4\t0\tB
s5\t1\tC
s6\t0\tC""".split('\n')
        otu_table = """#Full OTU Counts
#OTU ID\ts1\ts2\ts3\ts4\ts5\ts6
0\t999999999.0\t999999999.0\t0.0\t0.3\t0.0\t0.2
1\t0.0\t-0.2\t999999999.0\t999999999.0\t999999999.0\t999999999.0
2\t0.0\t0.2\t0.0\t-0.7\t0.0\t0.1""".split('\n')
        sample_ids, otu_ids, otu_data, lineages = parse_otu_table(otu_table, float)
        mapping_data, header, comments = parse_mapping_file(cat_mapping)
        otu_sample_info, num_samples, taxonomy_info = \
            get_otu_table_info(sample_ids, otu_ids, otu_data, lineages)
        OTU_list = ['0', '1', '2']
        #should return the results since there should be 4 values to evaluate
        result = run_single_paired_T_test('0', mapping_data, header, \
            'individual', 'timepoint_zero', otu_ids, sample_ids, otu_data, \
            999999999.0, 4)
        self.assertEqual(len(result), 4)
        self.assertFloatEqual(result[1], 0.12566591637800242)
        self.assertFloatEqual(result[2], [0.29999999999999999, 0.20000000000000001])
        self.assertEqual(result[3], 2)
        #check the the filter works
        result = run_single_paired_T_test('0', mapping_data, header, \
            'individual', 'timepoint_zero', otu_ids, sample_ids, otu_data, \
            999999999.0, 5)
        self.assertEqual(result, None)
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:33,代码来源:test_otu_category_significance.py

示例14: _collapse_metadata

def _collapse_metadata(mapping_f, collapse_fields):
    """ Load a mapping file into a DataFrame and then collapse rows

    Parameters
    ----------
    mapping_f : file handle or filepath
        The sample metadata mapping file.
    collapse_fields : iterable
        The fields to combine when collapsing samples. For each sample in the
        mapping_f, the ordered values from these columns will be tuplized and
        used as the group identfier. Samples whose tuplized values in these
        fields are identical will be grouped.

    Returns
    -------
    pd.DataFrame
        Sample metadata resulting from the collapse operation.

    Raises
    ------
    KeyError
        If sample_id_field or any of the collapse fields are not column headers
        in mapping_f.

    """
    mapping_data, header, _ = parse_mapping_file(mapping_f)
    sample_md = pd.DataFrame(mapping_data, columns=header)
    grouped = sample_md.groupby(collapse_fields)
    collapsed_md = grouped.agg(lambda x: tuple(x))
    return collapsed_md
开发者ID:ElDeveloper,项目名称:qiime,代码行数:30,代码来源:group.py

示例15: main

def main():
    option_parser, opts, args =\
      parse_command_line_parameters(**script_info)

    otu_table_data = parse_biom_table(open(opts.input_otu_table,'U'))
    sort_field = opts.sort_field
    mapping_fp = opts.mapping_fp
    sorted_sample_ids_fp = opts.sorted_sample_ids_fp
    
    if sort_field and mapping_fp:
        mapping_data = parse_mapping_file(open(mapping_fp,'U'))
        result = sort_otu_table_by_mapping_field(otu_table_data,
                                                 mapping_data,
                                                 sort_field)
    elif sorted_sample_ids_fp:
        sorted_sample_ids = sample_ids_from_f(open(sorted_sample_ids_fp,'U'))
        result = sort_otu_table(otu_table_data,
                                sorted_sample_ids)
    else:
        result = sort_otu_table(otu_table_data,
                        natsort_case_insensitive(otu_table_data.SampleIds))
    
    # format and write the otu table
    result_str = format_biom_table(result)
    of = open(opts.output_fp,'w')
    of.write(result_str)
    of.close()
开发者ID:rob-knight,项目名称:qiime,代码行数:27,代码来源:sort_otu_table.py


注:本文中的qiime.parse.parse_mapping_file函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。