本文整理汇总了Python中qiime.parse.parse_mapping_file函数的典型用法代码示例。如果您正苦于以下问题:Python parse_mapping_file函数的具体用法?Python parse_mapping_file怎么用?Python parse_mapping_file使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parse_mapping_file函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_parse_mapping_file
def test_parse_mapping_file(self):
"""parse_mapping_file functions as expected"""
s1 = ['#sample\ta\tb', '#comment line to skip',\
'x \t y \t z ', ' ', '#more skip', 'i\tj\tk']
exp = ([['x','y','z'],['i','j','k']],\
['sample','a','b'],\
['comment line to skip','more skip'])
obs = parse_mapping_file(s1)
self.assertEqual(obs, exp)
# We don't currently support this, but we should soon...
# # check that first non-comment, non-blank line is used as
# # header
# s1 = ['sample\ta\tb', '#comment line to skip',\
# 'x \t y \t z ', ' ', '#more skip', 'i\tj\tk']
# exp = ([['x','y','z'],['i','j','k']],\
# ['sample','a','b'],\
# ['comment line to skip','more skip'])
# obs = parse_mapping_file(s1)
# self.assertEqual(obs, exp)
#check that we strip double quotes by default
s2 = ['#sample\ta\tb', '#comment line to skip',\
'"x "\t" y "\t z ', ' ', '"#more skip"', 'i\t"j"\tk']
obs = parse_mapping_file(s2)
self.assertEqual(obs, exp)
示例2: test_make_otu_table_with_sample_metadata
def test_make_otu_table_with_sample_metadata(self):
# Want to make sure that the order of the sample IDs in the OTU
# map and the order of the IDs in the mapping file do not matter
otu_map_lines = """0 ABC_0 DEF_1
1 ABC_1
x GHI_2 GHI_3 GHI_77
z DEF_3 XYZ_1""".split('\n')
mapping_f = StringIO(MAPPING_FILE)
sample_ids = ['ABC', 'DEF', 'GHI', 'XYZ']
data = [[1, 1, 0, 0], [1, 0, 0, 0], [0, 0, 3, 0], [0, 1, 0, 1]]
map_data, map_header, map_comments = parse_mapping_file(mapping_f)
sample_metadata = mapping_file_to_dict(map_data, map_header)
sample_md = [sample_metadata[sample_id] for sample_id in sample_ids]
obs = make_otu_table(otu_map_lines, sample_metadata=sample_metadata)
exp = Table(data, ['0', '1', 'x', 'z'], sample_ids,
sample_metadata=sample_md, input_is_dense=True)
self.assertEqual(obs, exp)
# Test with a mapping file that is missing a sample's metadata,
# make sure it raises the KeyError
mapping_f = StringIO(MAPPING_FILE_MISSING_SAMPLE)
map_data, map_header, map_comments = parse_mapping_file(mapping_f)
sample_metadata = mapping_file_to_dict(map_data, map_header)
with self.assertRaises(KeyError):
obs = make_otu_table(otu_map_lines,
sample_metadata=sample_metadata)
示例3: split_mapping_file_on_field
def split_mapping_file_on_field(mapping_f,
mapping_field,
column_rename_ids=None,
include_repeat_cols=True):
""" split mapping file based on value in field """
mapping_f = list(mapping_f)
mapping_values = get_mapping_values(mapping_f, mapping_field)
mapping_data, mapping_headers, _ = parse_mapping_file(mapping_f)
if column_rename_ids:
try:
column_rename_ids = mapping_headers.index(column_rename_ids)
except ValueError:
raise KeyError("Field is not in mapping file (search is case " +
"and white-space sensitive). \n\tProvided field: " +
"%s. \n\tValid fields: %s" % (mapping_field, ' '.join(mapping_headers)))
for v in mapping_values:
v_fp_str = v.replace(' ', '_')
sample_ids_to_keep = sample_ids_from_metadata_description(
mapping_f, valid_states_str="%s:%s" % (mapping_field, v))
# parse mapping file each time though the loop as filtering operates on
# values
mapping_data, mapping_headers, _ = parse_mapping_file(mapping_f)
mapping_headers, mapping_data = filter_mapping_file(
mapping_data,
mapping_headers,
sample_ids_to_keep,
include_repeat_cols=include_repeat_cols,
column_rename_ids=column_rename_ids)
yield v_fp_str, format_mapping_file(mapping_headers, mapping_data)
示例4: test_get_category_info
def test_get_category_info(self):
"""get_category_info works"""
category_mapping = """#SampleID\tcat1\tcat2
sample1\tA\t0
sample2\tB\t8.0
sample3\tC\t1.0""".split('\n')
mapping_data, header, comments = parse_mapping_file(category_mapping)
result, cat_vals = get_category_info(mapping_data, header, 'cat1')
self.assertEqual(result, {'sample1': 'A', 'sample3': 'C', 'sample2': 'B'})
self.assertEqual(cat_vals, (['A', 'B', 'C']))
mapping_data, header, comments = parse_mapping_file(category_mapping)
result, cat_vals = get_category_info(mapping_data, header, \
'cat2', threshold=5.0)
self.assertEqual(result, {'sample1': '0', 'sample3': '0', 'sample2': '1'})
self.assertEqual(cat_vals, (['0', '1']))
示例5: test_parse_mapping_file
def test_parse_mapping_file(self):
"""parse_mapping_file functions as expected"""
s1 = ['#sample\ta\tb', '#comment line to skip',\
'x \t y \t z ', ' ', '#more skip', 'i\tj\tk']
exp = ([['x','y','z'],['i','j','k']],\
['sample','a','b'],\
['comment line to skip','more skip'])
obs = parse_mapping_file(s1)
self.assertEqual(obs, exp)
#check that we strip double quotes by default
s2 = ['#sample\ta\tb', '#comment line to skip',\
'"x "\t" y "\t z ', ' ', '"#more skip"', 'i\t"j"\tk']
obs = parse_mapping_file(s2)
self.assertEqual(obs, exp)
示例6: test_longitudinal_otu_table_conversion_wrapper
def test_longitudinal_otu_table_conversion_wrapper(self):
"""londitudinal_otu_table_conversion_wrapper works
"""
mapping_lines = """#SampleID\tindividual\ttimepoint_zero\ttimepoint
AT0\tA\t1\t0
AT1\tA\t0\t1
AT2\tA\t0\t2
BT0\tB\t1\t0
BT1\tB\t0\t1
BT2\tB\t0\t2
""".split('\n')
category_mapping = parse_mapping_file(mapping_lines)
otu_table = """{"rows": [{"id": "0", "metadata": null}, {"id": "1", "metadata": null}, {"id": "2", "metadata": null}, {"id": "3", "metadata": null}, {"id": "4", "metadata": null}], "format": "Biological Observation Matrix 1.0.0", "data": [[0, 0, 1.0], [0, 1, 2.0], [0, 2, 3.0], [1, 3, 1.0], [1, 4, 2.0], [1, 5, 3.0], [2, 0, 1.0], [2, 1, 2.0], [2, 2, 3.0], [2, 4, 1.0], [2, 5, 2.0], [3, 0, 2.0], [3, 1, 4.0], [3, 2, 6.0], [3, 4, 1.0], [3, 5, 2.0], [4, 0, 3.0], [4, 1, 2.0], [4, 2, 1.0], [4, 3, 6.0], [4, 4, 4.0], [4, 5, 2.0]], "columns": [{"id": "AT0", "metadata": null}, {"id": "AT1", "metadata": null}, {"id": "AT2", "metadata": null}, {"id": "BT0", "metadata": null}, {"id": "BT1", "metadata": null}, {"id": "BT2", "metadata": null}], "generated_by": "BIOM-Format 1.0.0-dev", "matrix_type": "sparse", "shape": [5, 6], "format_url": "http://biom-format.org", "date": "2012-08-01T09:14:03.574451", "type": "OTU table", "id": null, "matrix_element_type": "float"}"""
otu_table = parse_biom_table_str(otu_table)
new_otu_table = longitudinal_otu_table_conversion_wrapper(otu_table,
category_mapping, 'individual', 'timepoint_zero')
new_otu_table = str(new_otu_table).split('\n')
self.assertEqual(new_otu_table[0], "# Constructed from biom file")
data_line1 = new_otu_table[2].split('\t')
self.assertFloatEqual(float(data_line1[0]), 0.0)
# sets the reference to 0
self.assertFloatEqual(float(data_line1[1]), 0.0)
# subtracts values from same individual from the reference
self.assertFloatEqual(float(data_line1[2]), 0.05714286)
# sets to ignore number when not observed across a person
self.assertFloatEqual(float(data_line1[4]), 999999999.0)
示例7: main
def main():
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
otu_table_data = parse_otu_table(open(opts.input_otu_table,'U'))
sort_field = opts.sort_field
mapping_fp = opts.mapping_fp
sorted_sample_ids_fp = opts.sorted_sample_ids_fp
if sort_field and mapping_fp:
mapping_data = parse_mapping_file(open(mapping_fp,'U'))
result = sort_otu_table_by_mapping_field(otu_table_data,
mapping_data,
sort_field)
elif sorted_sample_ids_fp:
sorted_sample_ids = sample_ids_from_f(open(sorted_sample_ids_fp,'U'))
result = sort_otu_table(otu_table_data,
sorted_sample_ids)
else:
parser.error("must provide either --sort_field and --mapping_fp OR --sorted_sample_ids_fp")
# format and write the otu table
result_str = format_otu_table(result[0],result[1],result[2],result[3])
of = open(opts.output_fp,'w')
of.write(result_str)
of.close()
示例8: merge_mapping_files
def merge_mapping_files(mapping_files,no_data_value='no_data'):
""" Merge list of mapping files into a single mapping file
mapping_files: open file objects containing mapping data
no_data_value: value to be used in cases where there is no
mapping field associated with a sample ID (default: 'no_data')
"""
mapping_data = defaultdict(dict)
all_headers = set([])
# iterate over mapping files, parsing each
for mapping_file in mapping_files:
current_data, current_headers, current_comments = \
parse_mapping_file(mapping_file,strip_quotes=False)
all_headers.update(set(current_headers))
for entry in current_data:
current_values = {k:v for k,v in zip(current_headers, entry)}
sample_id = current_values['SampleID']
if sample_id in mapping_data:
# if the sample id has already been seen, confirm that
# there is no conflicting values across the different
# mapping files (e.g., pH=5.0 and pH=6.0)- if there is,
# raise a ValueError
previous_data = mapping_data[sample_id]
for key in current_values:
if key not in previous_data:
continue
if current_values[key] != previous_data[key]:
raise ValueError("Different values provided for %s for"
"sample %s in different mapping files."\
% (key, sample_id))
mapping_data[sample_id].update(current_values)
# remove and place the fields whose order is important
ordered_beginning = []
for e in ['SampleID','BarcodeSequence','LinkerPrimerSequence']:
if e in all_headers:
all_headers.remove(e)
ordered_beginning.append(e)
ordered_end = []
for e in ['Description']:
if e in all_headers:
all_headers.remove(e)
ordered_end.append(e)
ordered_headers = ordered_beginning + list(all_headers) + ordered_end
# generate the mapping file lines containing all fields
result = ['#' + '\t'.join(ordered_headers)]
for sample_id, data in mapping_data.items():
values = [data.get(k, no_data_value) for k in ordered_headers]
result.append('\t'.join(values))
return result
示例9: _collate_cluster_pcoa_plot_data
def _collate_cluster_pcoa_plot_data(coords_f, map_f, category):
pc_data = parse_coords(coords_f)
coords_d = dict(zip(pc_data[0], pc_data[1]))
map_data = parse_mapping_file(map_f)
full_map_data = [map_data[1]]
full_map_data.extend(map_data[0])
sid_map = group_by_field(full_map_data, category)
sorted_states = sorted(sid_map.keys())
color_pool = get_color_pool()
if len(sorted_states) > len(color_pool):
raise ValueError("Not enough colors to uniquely color sample "
"groups.")
results = []
for state, color in zip(sorted_states,
color_pool[:len(sorted_states)]):
sids = sid_map[state]
xs = [coords_d[sid][0] for sid in sids]
ys = [coords_d[sid][1] for sid in sids]
results.append((xs, ys, color, state))
return results
示例10: sample_ids_from_metadata_description
def sample_ids_from_metadata_description(mapping_f,valid_states_str):
""" Given a description of metadata, return the corresponding sample ids
"""
map_data, map_header, map_comments = parse_mapping_file(mapping_f)
valid_states = parse_metadata_state_descriptions(valid_states_str)
sample_ids = get_sample_ids(map_data, map_header, valid_states)
return sample_ids
示例11: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
columns_to_merge = opts.columns_to_merge
mapping_fp = opts.mapping_fp
output_fp = opts.output_fp
try:
data, headers, comments = parse_mapping_file(open(mapping_fp, 'U'))
except:
option_parser.error('Bro, that doesn\'t look like a mapping file')
for merging in columns_to_merge:
retrieve = lambda x: headers.index(x)
indices = map(retrieve, merging.split('&&'))
headers.append(''.join([headers[element] for element in indices]))
for line in data:
line.append(''.join([line[element] for element in indices]))
# this should never happen
assert len(headers) == len(data[0]), "Something went horribly wrong, "+\
"that's what you get for using non-unit-tested software"
lines = format_mapping_file(headers, data, comments)
fd = open(output_fp, 'w')
fd.writelines(lines)
fd.close()
示例12: merge_mapping_files
def merge_mapping_files(mapping_files, no_data_value="no_data"):
""" Merge list of mapping files into a single mapping file
mapping_files: open file objects containing mapping data
no_data_value: value to be used in cases where there is no
mapping field associated with a sample ID (default: 'no_data')
"""
mapping_data = {}
all_headers = []
result = []
# iterate over mapping files, parsing each
for mapping_file in mapping_files:
current_data, current_headers, current_comments = parse_mapping_file(mapping_file, strip_quotes=False)
all_headers += current_headers
for entry in current_data:
sample_id = entry[0]
current_values = {}
for header, value in zip(current_headers[1:], entry[1:]):
current_values[header] = value
if sample_id in mapping_data:
# if the sample id has already been seen, confirm that
# there is no conflicting values across the different
# mapping files (e.g., pH=5.0 and pH=6.0)- if there is,
# raise a ValueError
previous_data = mapping_data[sample_id]
for header, value in current_values.items():
if header in previous_data and value != previous_data[header]:
raise ValueError, "Different values provided for %s for sample %s in different mapping files." % (
header,
sample_id,
)
mapping_data[sample_id].update(current_values)
else:
mapping_data[sample_id] = current_values
all_headers = {}.fromkeys(all_headers)
# remove and place the fields whose order is important
ordered_beginning = []
for e in ["SampleID", "BarcodeSequence", "LinkerPrimerSequence"]:
try:
del all_headers[e]
ordered_beginning.append(e)
except KeyError:
pass
ordered_end = []
for e in ["Description"]:
try:
del all_headers[e]
ordered_end.append(e)
except KeyError:
pass
ordered_headers = ordered_beginning + list(all_headers) + ordered_end
# generate the mapping file lines containing all fields
result.append("#" + "\t".join(ordered_headers))
for sample_id, data in mapping_data.items():
result.append("\t".join([sample_id] + [data.get(h, no_data_value) for h in ordered_headers[1:]]))
return result
示例13: test_run_single_paired_T_test
def test_run_single_paired_T_test(self):
"""run_single_paired_T_test works
"""
cat_mapping = """#SampleID\ttimepoint_zero\tindividual
s1\t1\tA
s2\t0\tA
s3\t1\tB
s4\t0\tB
s5\t1\tC
s6\t0\tC""".split('\n')
otu_table = """#Full OTU Counts
#OTU ID\ts1\ts2\ts3\ts4\ts5\ts6
0\t999999999.0\t999999999.0\t0.0\t0.3\t0.0\t0.2
1\t0.0\t-0.2\t999999999.0\t999999999.0\t999999999.0\t999999999.0
2\t0.0\t0.2\t0.0\t-0.7\t0.0\t0.1""".split('\n')
sample_ids, otu_ids, otu_data, lineages = parse_otu_table(otu_table, float)
mapping_data, header, comments = parse_mapping_file(cat_mapping)
otu_sample_info, num_samples, taxonomy_info = \
get_otu_table_info(sample_ids, otu_ids, otu_data, lineages)
OTU_list = ['0', '1', '2']
#should return the results since there should be 4 values to evaluate
result = run_single_paired_T_test('0', mapping_data, header, \
'individual', 'timepoint_zero', otu_ids, sample_ids, otu_data, \
999999999.0, 4)
self.assertEqual(len(result), 4)
self.assertFloatEqual(result[1], 0.12566591637800242)
self.assertFloatEqual(result[2], [0.29999999999999999, 0.20000000000000001])
self.assertEqual(result[3], 2)
#check the the filter works
result = run_single_paired_T_test('0', mapping_data, header, \
'individual', 'timepoint_zero', otu_ids, sample_ids, otu_data, \
999999999.0, 5)
self.assertEqual(result, None)
示例14: _collapse_metadata
def _collapse_metadata(mapping_f, collapse_fields):
""" Load a mapping file into a DataFrame and then collapse rows
Parameters
----------
mapping_f : file handle or filepath
The sample metadata mapping file.
collapse_fields : iterable
The fields to combine when collapsing samples. For each sample in the
mapping_f, the ordered values from these columns will be tuplized and
used as the group identfier. Samples whose tuplized values in these
fields are identical will be grouped.
Returns
-------
pd.DataFrame
Sample metadata resulting from the collapse operation.
Raises
------
KeyError
If sample_id_field or any of the collapse fields are not column headers
in mapping_f.
"""
mapping_data, header, _ = parse_mapping_file(mapping_f)
sample_md = pd.DataFrame(mapping_data, columns=header)
grouped = sample_md.groupby(collapse_fields)
collapsed_md = grouped.agg(lambda x: tuple(x))
return collapsed_md
示例15: main
def main():
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
otu_table_data = parse_biom_table(open(opts.input_otu_table,'U'))
sort_field = opts.sort_field
mapping_fp = opts.mapping_fp
sorted_sample_ids_fp = opts.sorted_sample_ids_fp
if sort_field and mapping_fp:
mapping_data = parse_mapping_file(open(mapping_fp,'U'))
result = sort_otu_table_by_mapping_field(otu_table_data,
mapping_data,
sort_field)
elif sorted_sample_ids_fp:
sorted_sample_ids = sample_ids_from_f(open(sorted_sample_ids_fp,'U'))
result = sort_otu_table(otu_table_data,
sorted_sample_ids)
else:
result = sort_otu_table(otu_table_data,
natsort_case_insensitive(otu_table_data.SampleIds))
# format and write the otu table
result_str = format_biom_table(result)
of = open(opts.output_fp,'w')
of.write(result_str)
of.close()