本文整理汇总了Python中qiime.format.format_mapping_file函数的典型用法代码示例。如果您正苦于以下问题:Python format_mapping_file函数的具体用法?Python format_mapping_file怎么用?Python format_mapping_file使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了format_mapping_file函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: create_replicated_mapping_file
def create_replicated_mapping_file(map_f, num_replicates, sample_ids):
"""Returns a formatted mapping file with replicated sample IDs.
Each sample ID will have an ascending integer appended to it from the range
[0, num_replicates - 1]. For example, if there are two input sample IDs, S1
and S2, with 3 replicates each, the output will be:
S1.0
S1.1
S1.2
S2.0
S2.1
S2.2
All other metadata columns will simply be copied to the output mapping
file. The order of input sample IDs is preserved.
Arguments:
map_f - input mapping file to replicate (file-like object)
num_replicates - number of replicates at each sample
sample_ids - only sample IDs in the mapping file that are in this list
will be replicated. Sample IDs in the mapping file that are not
found in this list will not be added to the resulting mapping file
"""
if num_replicates < 1:
raise ValueError("Must specify at least one sample replicate (was "
"provided %d)." % num_replicates)
map_data, header, comments = parse_mapping_file(map_f)
rep_map_data = []
for row in map_data:
if row[0] in sample_ids:
for rep_num in range(num_replicates):
rep_map_data.append(['%s.%i' % (row[0], rep_num)] + row[1:])
return format_mapping_file(header, rep_map_data, comments)
示例2: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
mapping_fp = opts.input_fp
out_mapping_fp = opts.output_fp
valid_states = opts.valid_states
if opts.sample_id_fp:
valid_sample_ids = \
get_seqs_to_keep_lookup_from_seq_id_file(open(opts.sample_id_fp,'U'))
elif mapping_fp and valid_states:
valid_sample_ids = sample_ids_from_metadata_description(
open(mapping_fp, 'U'), valid_states)
data, headers, _ = parse_mapping_file(open(mapping_fp, 'U'))
good_mapping_file = []
for line in data:
if line[0] in valid_sample_ids:
good_mapping_file.append(line)
lines = format_mapping_file(headers, good_mapping_file)
fd = open(out_mapping_fp, 'w')
fd.write(lines)
fd.close()
示例3: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
mapping_fp = opts.input_fp
categories = opts.categories
header_names = opts.categories_header_names
output_fp = opts.output_fp
if len(categories) != len(header_names):
option_parser.error('This shouldnt be happening what are you doing?')
data, headers, _ = parse_mapping_file(open(mapping_fp, 'U'))
headers, data = apply_operation_on_mapping_file_columns(headers, data,
categories, header_names)
# for j in range(0,len(categories)):
# headers.append(header_names[j])
# for k, line in enumerate(data):
# temp = 0.0
# indices = map(lambda x: headers.index(x), categories[j].split(','))
# for index in indices:
# temp = temp + float(line[index])
# data[k].append('%f' % temp)
lines = format_mapping_file(headers, data)
fd = open(output_fp, 'w')
fd.writelines(lines)
fd.close()
示例4: split_mapping_file_on_field
def split_mapping_file_on_field(mapping_f,
mapping_field,
column_rename_ids=None,
include_repeat_cols=True):
""" split mapping file based on value in field """
mapping_f = list(mapping_f)
mapping_values = get_mapping_values(mapping_f, mapping_field)
mapping_data, mapping_headers, _ = parse_mapping_file(mapping_f)
if column_rename_ids:
try:
column_rename_ids = mapping_headers.index(column_rename_ids)
except ValueError:
raise KeyError("Field is not in mapping file (search is case " +
"and white-space sensitive). \n\tProvided field: " +
"%s. \n\tValid fields: %s" % (mapping_field, ' '.join(mapping_headers)))
for v in mapping_values:
v_fp_str = v.replace(' ', '_')
sample_ids_to_keep = sample_ids_from_metadata_description(
mapping_f, valid_states_str="%s:%s" % (mapping_field, v))
# parse mapping file each time though the loop as filtering operates on
# values
mapping_data, mapping_headers, _ = parse_mapping_file(mapping_f)
mapping_headers, mapping_data = filter_mapping_file(
mapping_data,
mapping_headers,
sample_ids_to_keep,
include_repeat_cols=include_repeat_cols,
column_rename_ids=column_rename_ids)
yield v_fp_str, format_mapping_file(mapping_headers, mapping_data)
示例5: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
columns_to_merge = opts.columns_to_merge
mapping_fp = opts.mapping_fp
output_fp = opts.output_fp
try:
data, headers, comments = parse_mapping_file(open(mapping_fp, 'U'))
except:
option_parser.error('Bro, that doesn\'t look like a mapping file')
for merging in columns_to_merge:
retrieve = lambda x: headers.index(x)
indices = map(retrieve, merging.split('&&'))
headers.append(''.join([headers[element] for element in indices]))
for line in data:
line.append(''.join([line[element] for element in indices]))
# this should never happen
assert len(headers) == len(data[0]), "Something went horribly wrong, "+\
"that's what you get for using non-unit-tested software"
lines = format_mapping_file(headers, data, comments)
fd = open(output_fp, 'w')
fd.writelines(lines)
fd.close()
示例6: create_personal_mapping_file
def create_personal_mapping_file(map_as_list,
header,
comments,
personal_id_of_interest,
output_fp,
personal_id_index,
individual_titles):
""" creates mapping file on a per-individual basis """
if individual_titles == None:
individual_titles = ['Self', 'Other']
else:
individual_titles = individual_titles.split(',')
personal_map = []
for line in map_as_list:
personal_map.append(line[:])
for i in personal_map:
if i[personal_id_index] == personal_id_of_interest:
i.append(individual_titles[0])
else:
i.append(individual_titles[1])
personal_mapping_file = format_mapping_file(header, personal_map, comments)
output_f = open(output_fp,'w')
output_f.write(personal_mapping_file)
output_f.close()
return personal_map
示例7: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
input_fp = opts.input_fp
output_fp = opts.output_fp
mapping_fp = opts.mapping_fp
output_mapping_fp = opts.output_mapping_fp
valid_states = opts.valid_states
min_count = opts.min_count
max_count = opts.max_count
sample_id_fp = opts.sample_id_fp
if mapping_fp is None and valid_states is not None:
option_parser.error("--mapping_fp must be provided if --valid_states " "is passed.")
if not ((mapping_fp and valid_states) or min_count != 0 or not isinf(max_count) or sample_id_fp is not None):
option_parser.error(
"No filtering requested. Must provide either "
"mapping_fp and valid states, min counts, "
"max counts, or sample_id_fp (or some combination "
"of those)."
)
if (mapping_fp and valid_states) and sample_id_fp:
option_parser.error("Providing both --sample_id_fp and " "--mapping_fp/--valid_states is not supported.")
if output_mapping_fp and not mapping_fp:
option_parser.error("Must provide input mapping file to generate" " output mapping file.")
otu_table = load_table(opts.input_fp)
negate_sample_id_fp = opts.negate_sample_id_fp
if mapping_fp and valid_states:
sample_ids_to_keep = sample_ids_from_metadata_description(open(mapping_fp, "U"), valid_states)
negate_sample_id_fp = False
else:
sample_ids_to_keep = otu_table.ids()
if sample_id_fp is not None:
o = open(sample_id_fp, "U")
sample_id_f_ids = set([l.strip().split()[0] for l in o if not l.startswith("#")])
o.close()
sample_ids_to_keep = set(sample_ids_to_keep) & sample_id_f_ids
filtered_otu_table = filter_samples_from_otu_table(
otu_table, sample_ids_to_keep, min_count, max_count, negate_ids_to_keep=negate_sample_id_fp
)
try:
write_biom_table(filtered_otu_table, output_fp)
except EmptyBIOMTableError:
option_parser.error(
"Filtering resulted in an empty BIOM table. " "This indicates that no samples remained after filtering."
)
# filter mapping file if requested
if output_mapping_fp:
mapping_data, mapping_headers, _ = parse_mapping_file(open(mapping_fp, "U"))
mapping_headers, mapping_data = filter_mapping_file(mapping_data, mapping_headers, filtered_otu_table.ids())
open(output_mapping_fp, "w").write(format_mapping_file(mapping_headers, mapping_data))
示例8: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
input_fp = opts.input_fp
output_fp = opts.output_fp
mapping_fp = opts.mapping_fp
output_mapping_fp = opts.output_mapping_fp
valid_states = opts.valid_states
min_count = opts.min_count
max_count = opts.max_count
sample_id_fp = opts.sample_id_fp
if not ((mapping_fp and valid_states) or
min_count != 0 or
not isinf(max_count) or
sample_id_fp is not None):
option_parser.error("No filtering requested. Must provide either "
"mapping_fp and valid states, min counts, "
"max counts, or sample_id_fp (or some combination "
"of those).")
if output_mapping_fp and not mapping_fp:
option_parser.error("Must provide input mapping file to generate"
" output mapping file.")
otu_table = load_table(opts.input_fp)
if mapping_fp and valid_states:
sample_ids_to_keep = sample_ids_from_metadata_description(
open(mapping_fp, 'U'), valid_states)
else:
sample_ids_to_keep = otu_table.ids()
if sample_id_fp is not None:
sample_id_f_ids = set([l.strip().split()[0]
for l in open(sample_id_fp, 'U') if not l.startswith('#')])
sample_ids_to_keep = set(sample_ids_to_keep) & sample_id_f_ids
filtered_otu_table = filter_samples_from_otu_table(otu_table,
sample_ids_to_keep,
min_count,
max_count)
write_biom_table(filtered_otu_table, output_fp)
# filter mapping file if requested
if output_mapping_fp:
mapping_data, mapping_headers, _ = parse_mapping_file(
open(mapping_fp, 'U'))
mapping_headers, mapping_data = \
filter_mapping_file(
mapping_data,
mapping_headers,
filtered_otu_table.ids())
open(
output_mapping_fp,
'w').write(
format_mapping_file(
mapping_headers,
mapping_data))
示例9: test_format_mapping_file
def test_format_mapping_file(self):
""" format_mapping file should match expected result"""
headers = ['SampleID','col1','col0','Description']
samples =\
[['bsample','v1_3','v0_3','d1'],['asample','aval','another','d2']]
comments = ['this goes after headers','this too']
self.assertEqual(format_mapping_file(headers,samples,comments),
example_mapping_file)
示例10: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
biom_table_fp = opts.biom_table_fp
mapping_fp = opts.mapping_fp
fields = opts.fields.split(',')
output_dir = opts.output_dir
suppress_mf = opts.suppress_mapping_file_output
# column_rename_ids = opts.column_rename_ids
# include_repeat_cols = opts.include_repeat_cols
bt = load_table(biom_table_fp)
mdata, mheaders, mcomments = parse_mapping_file(mapping_fp)
mdata = array(mdata)
# check that biom file and mapping file have matching sample names. discard
# those samples that do not appear in both.
shared_samples = list(set(mdata[:, 0]).intersection(bt.ids(axis='sample')))
if len(shared_samples) == 0:
raise ValueError('Mapping file and biom table share no samples.')
elif len(shared_samples) == len(mdata[:, 0]):
mdata = array(mdata)
else:
# we want to preserve the order of the samples in the biom table
ss_bt_order = [s for s in bt.ids(axis='sample') if s in
shared_samples]
bt = bt.filter(ss_bt_order, axis='sample', inplace=True)
mdata = subset_mapping_data(mdata, shared_samples)
# check that headers in mapping data
if not all([i in mheaders for i in fields]):
raise ValueError('One or more of the specified fields was not found ' +\
'in the mapping file.')
# create output directory and create base names
create_dir(output_dir)
mf_base_name = join(output_dir, splitext(split(mapping_fp)[1])[0])
bt_base_name = join(output_dir, splitext(split(biom_table_fp)[1])[0])
# run code and append output
sample_groups, value_groups = make_non_empty_sample_lists(fields, mheaders,
mdata)
for sg, vg in zip(sample_groups, value_groups):
name_base = '__' + '%s_%s_' * len(vg) + '_'
name_tmp = []
for f, v in zip(fields, vg):
name_tmp.extend([f, v])
nb = name_base % tuple(name_tmp)
tmp_mf_data = subset_mapping_data(mdata, sg)
tmp_mf_str = format_mapping_file(mheaders, tmp_mf_data, mcomments)
write_biom_table(bt.filter(sg, axis='sample', inplace=False),
bt_base_name + nb + '.biom')
if not suppress_mf:
o = open(mf_base_name + nb + '.txt', 'w')
o.writelines(tmp_mf_str)
o.close()
示例11: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
alpha_fps = opts.alpha_fps
mapping_fp = opts.mapping_fp
output_mapping_fp = opts.output_mapping_fp
binning_method = opts.binning_method
missing_value_name = opts.missing_value_name
depth = opts.depth
# make sure the number of bins is an integer
try:
number_of_bins = int(opts.number_of_bins)
except ValueError:
raise ValueError, 'The number of bins must be an integer, not %s'\
% opts.number_of_bins
# if using collated data, make sure they specify a depth
if depth is not None:
alpha_dict = {}
# build up a dictionary with the filenames as keys and lines as values
for single_alpha_fp in alpha_fps:
alpha_dict[splitext(basename(single_alpha_fp))[0]] = open(
single_alpha_fp, 'U').readlines()
# format the collated data
metrics, alpha_sample_ids, alpha_data = mean_alpha(alpha_dict,
depth)
# when not using collated data, the user can only specify one input file
else:
if len(alpha_fps) > 1:
option_parser.error('A comma-separated list of files should only be'
' passed with the --alpha_fps option when using collated alpha '
'diversity data and also selecting a rarefaction depth with the'
' --depth option.')
else:
metrics, alpha_sample_ids, alpha_data = parse_matrix(open(
alpha_fps[0], 'U'))
# parse the data from the files
mapping_file_data, mapping_file_headers, comments = parse_mapping_file(
open(mapping_fp, 'U'))
# add the alpha diversity data to the mapping file
out_mapping_file_data, out_mapping_file_headers = \
add_alpha_diversity_values_to_mapping_file(metrics, alpha_sample_ids,
alpha_data, mapping_file_headers, mapping_file_data, number_of_bins,
binning_method, missing_value_name)
# format the new data and write it down
lines = format_mapping_file(out_mapping_file_headers, out_mapping_file_data)
fd_out = open(output_mapping_fp, 'w')
fd_out.writelines(lines)
fd_out.close()
示例12: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
try:
data, headers, comments = parse_mapping_file(open(opts.input_fp, 'U'))
except:
option_parser.error('That doesn\'t look like a mapping file')
lines = format_mapping_file(headers, data, comments)
fd = open(opts.input_fp, 'w')
fd.writelines(lines)
fd.close()
示例13: filter_mapping_file_from_mapping_f
def filter_mapping_file_from_mapping_f(mapping_f,sample_ids_to_keep,negate=False):
""" Filter rows from a metadata mapping file """
mapping_data, header, comments = parse_mapping_file(mapping_f)
filtered_mapping_data = []
sample_ids_to_keep = {}.fromkeys(sample_ids_to_keep)
for mapping_datum in mapping_data:
if mapping_datum[0] in sample_ids_to_keep:
filtered_mapping_data.append(mapping_datum)
elif negate:
filtered_mapping_data.append(mapping_datum)
else:
pass
return format_mapping_file(header,filtered_mapping_data)
示例14: make_distance_boxplots
def make_distance_boxplots(
dm_f,
map_f,
fields,
width=None,
height=6.0,
suppress_all_within=False,
suppress_all_between=False,
suppress_individual_within=False,
suppress_individual_between=False,
y_min=0.0,
y_max=1.0,
whisker_length=1.5,
box_width=0.5,
box_color=None,
color_individual_within_by_field=None,
sort=None,
):
"""Generates various types of boxplots for distance comparisons.
Returns a list of tuples, one for each field. Each tuple contains the
following:
1) the name of the field (string)
2) a matplotlib.figure.Figure object containing the boxplots
3) a list of lists containing the raw plot data that was passed to mpl
4) a list of labels for each of the boxplots (string)
5) a list of mpl-compatible colors (one for each boxplot)
The Figure can be saved, and the raw data and labels can be useful (for
example) performing statistical tests or writing the raw data to disk.
The input arguments are exactly derived from the make_distance_boxplots.py
script (see the script options for details). To avoid duplicated effort,
their descriptions are not reproduced here.
"""
# Parse data files and do some preliminary error checking.
dm_header, dm_data = parse_distmat(dm_f)
map_data, map_header, map_comments = parse_mapping_file(map_f)
if fields is None or len(fields) < 1:
raise ValueError("You must provide at least one field to analyze.")
for field in fields:
if field not in map_header:
raise ValueError(
"The field '%s' is not in the provided mapping "
"file. Please supply correct fields "
"corresponding to fields in the mapping file." % field
)
# Make sure the y_min and y_max options make sense, as they can be either
# 'auto' or a number.
y_min = _cast_y_axis_extrema(y_min)
y_max = _cast_y_axis_extrema(y_max)
# Collate the distributions of distances that will comprise each boxplot.
# Suppress the generation of the indicated types of boxplots.
results = []
for field in fields:
plot_data = []
plot_labels = []
plot_colors = []
legend = None
# Little bit of duplicate code here... not sure it's worth the effort
# to clean up though.
if not suppress_all_within:
plot_data.append(get_all_grouped_distances(dm_header, dm_data, map_header, map_data, field, within=True))
plot_labels.append("All within %s" % field)
if color_individual_within_by_field is not None:
plot_colors.append(None)
else:
plot_colors.append(box_color)
if not suppress_all_between:
plot_data.append(get_all_grouped_distances(dm_header, dm_data, map_header, map_data, field, within=False))
plot_labels.append("All between %s" % field)
if color_individual_within_by_field is not None:
plot_colors.append(None)
else:
plot_colors.append(box_color)
if not suppress_individual_within:
within_dists = get_grouped_distances(dm_header, dm_data, map_header, map_data, field, within=True)
field_states = []
for grouping in within_dists:
plot_data.append(grouping[2])
plot_labels.append("%s vs. %s" % (grouping[0], grouping[1]))
field_states.append(grouping[0])
# If we need to color these boxplots by a field, build up a
# list of colors and a legend.
if color_individual_within_by_field is not None:
colors, color_mapping = _color_field_states(
format_mapping_file(map_header, map_data).split("\n"),
dm_header,
field,
field_states,
#.........这里部分代码省略.........
示例15: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
otu_table_fp = opts.otu_table_fp
otu_table = parse_biom_table(qiime_open(otu_table_fp))
min_counts, max_counts, median_counts, mean_counts, counts_per_sample = compute_seqs_per_library_stats(
otu_table, opts.num_otus
)
num_otus = len(otu_table.ObservationIds)
counts_per_sample_values = counts_per_sample.values()
med_abs_dev = median_absolute_deviation(counts_per_sample_values)[0]
even_sampling_depth = guess_even_sampling_depth(counts_per_sample_values)
num_samples = len(counts_per_sample)
print "Num samples: %s" % str(num_samples)
print "Num otus: %s" % str(num_otus)
if not opts.num_otus:
num_observations = sum(counts_per_sample_values)
print "Num observations (sequences): %s" % str(num_observations)
# port denisty functionality to a tested function. the following is broken (should be
# count of non-zero cells rather than number of observations in the numerator)
# print 'Table density (fraction of non-zero values): %1.4f' % (num_observations/(num_samples * num_otus))
print
if opts.num_otus:
print "OTUs/sample summary:"
else:
print "Seqs/sample summary:"
print " Min: %s" % str(min_counts)
print " Max: %s" % str(max_counts)
print " Median: %s" % str(median_counts)
print " Mean: %s" % str(mean_counts)
print " Std. dev.: %s" % (str(std(counts_per_sample_values)))
print " Median Absolute Deviation: %s" % str(med_abs_dev)
print " Default even sampling depth in\n core_qiime_analyses.py (just a suggestion): %s" % str(even_sampling_depth)
print ""
if opts.num_otus:
print "OTUs/sample detail:"
else:
print "Seqs/sample detail:"
sorted_counts_per_sample = [(v, k) for k, v in counts_per_sample.items()]
sorted_counts_per_sample.sort()
total_count = 0
for v, k in sorted_counts_per_sample:
total_count += v
print " %s: %s" % (k, str(v))
if opts.mapping_fp:
if not opts.output_mapping_fp:
raise RuntimeError("input mapping file supplied, but no path to" + " output file")
f = open(opts.mapping_fp, "U")
mapping_lines, headers, comments = parse_mapping_file(f)
f.close()
if len(headers) == 1:
endoffset = 0 # if we only have the sample id, this data -> last col
else:
endoffset = 1 # usually make this data the penultimate column.
headers.insert(len(headers) - endoffset, "NumIndividuals")
for map_line in mapping_lines:
sample_id = map_line
try:
depth = str(counts_per_sample[map_line[0]])
except KeyError:
depth = "na"
map_line.insert(len(map_line) - endoffset, depth)
new_map_str = format_mapping_file(headers, mapping_lines, comments)
f = open(opts.output_mapping_fp, "w")
f.write(new_map_str)
f.close()