本文整理汇总了Python中qiime.util.MetadataMap.hasSingleCategoryValue方法的典型用法代码示例。如果您正苦于以下问题:Python MetadataMap.hasSingleCategoryValue方法的具体用法?Python MetadataMap.hasSingleCategoryValue怎么用?Python MetadataMap.hasSingleCategoryValue使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类qiime.util.MetadataMap
的用法示例。
在下文中一共展示了MetadataMap.hasSingleCategoryValue方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run_core_diversity_analyses
# 需要导入模块: from qiime.util import MetadataMap [as 别名]
# 或者: from qiime.util.MetadataMap import hasSingleCategoryValue [as 别名]
def run_core_diversity_analyses(
biom_fp,
mapping_fp,
sampling_depth,
output_dir,
qiime_config,
command_handler=call_commands_serially,
tree_fp=None,
params=None,
categories=None,
arare_min_rare_depth=10,
arare_num_steps=10,
parallel=False,
suppress_taxa_summary=False,
suppress_beta_diversity=False,
suppress_alpha_diversity=False,
suppress_otu_category_significance=False,
status_update_callback=print_to_stdout,
):
"""
"""
if categories != None:
# Validate categories provided by the users
mapping_data, mapping_comments = parse_mapping_file_to_dict(open(mapping_fp, "U"))
metadata_map = MetadataMap(mapping_data, mapping_comments)
for c in categories:
if c not in metadata_map.CategoryNames:
raise ValueError, (
"Category '%s' is not a column header "
"in your mapping file. "
"Categories are case and white space sensitive. Valid "
"choices are: (%s)" % (c, ", ".join(metadata_map.CategoryNames))
)
if metadata_map.hasSingleCategoryValue(c):
raise ValueError, (
"Category '%s' contains only one value. "
"Categories analyzed here require at least two values." % c
)
else:
categories = []
# prep some variables
if params == None:
params = parse_qiime_parameters([])
create_dir(output_dir)
index_fp = "%s/index.html" % output_dir
index_links = []
commands = []
# begin logging
log_fp = generate_log_fp(output_dir)
index_links.append(("Master run log", log_fp, _index_headers["run_summary"]))
logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config)
input_fps = [biom_fp, mapping_fp]
if tree_fp != None:
input_fps.append(tree_fp)
log_input_md5s(logger, input_fps)
# run print_biom_table_summary.py on input BIOM table
try:
params_str = get_params_str(params["print_biom_table_summary"])
except KeyError:
params_str = ""
biom_table_stats_output_fp = "%s/biom_table_summary.txt" % output_dir
print_biom_table_summary_cmd = "print_biom_table_summary.py -i %s -o %s --suppress_md5 %s" % (
biom_fp,
biom_table_stats_output_fp,
params_str,
)
index_links.append(("BIOM table statistics", biom_table_stats_output_fp, _index_headers["run_summary"]))
commands.append([("Generate BIOM table summary", print_biom_table_summary_cmd)])
# filter samples with fewer observations than the requested sampling_depth.
# since these get filtered for some analyses (eg beta diversity after
# even sampling) it's useful to filter them here so they're filtered
# from all analyses.
filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" % (
biom_fp,
filtered_biom_fp,
sampling_depth,
)
commands.append(
[
(
"Filter low sequence count samples from table (minimum sequence count: %d)" % sampling_depth,
filter_samples_cmd,
)
]
)
biom_fp = filtered_biom_fp
# run initial commands and reset the command list
command_handler(commands, status_update_callback, logger, close_logger_on_success=False)
commands = []
if not suppress_beta_diversity:
bdiv_even_output_dir = "%s/bdiv_even%d/" % (output_dir, sampling_depth)
#.........这里部分代码省略.........
示例2: run_core_diversity_analyses
# 需要导入模块: from qiime.util import MetadataMap [as 别名]
# 或者: from qiime.util.MetadataMap import hasSingleCategoryValue [as 别名]
def run_core_diversity_analyses(
biom_fp,
mapping_fp,
sampling_depth,
output_dir,
qiime_config,
command_handler=call_commands_serially,
tree_fp=None,
params=None,
categories=None,
arare_min_rare_depth=10,
arare_num_steps=10,
parallel=False,
suppress_taxa_summary=False,
suppress_beta_diversity=False,
suppress_alpha_diversity=False,
suppress_otu_category_significance=False,
status_update_callback=print_to_stdout):
"""
"""
if categories != None:
# Validate categories provided by the users
mapping_data, mapping_comments = \
parse_mapping_file_to_dict(open(mapping_fp,'U'))
metadata_map = MetadataMap(mapping_data, mapping_comments)
for c in categories:
if c not in metadata_map.CategoryNames:
raise ValueError, ("Category '%s' is not a column header "
"in your mapping file. "
"Categories are case and white space sensitive. Valid "
"choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames)))
if metadata_map.hasSingleCategoryValue(c):
raise ValueError, ("Category '%s' contains only one value. "
"Categories analyzed here require at least two values." % c)
else:
categories= []
# prep some variables
if params == None:
params = parse_qiime_parameters([])
create_dir(output_dir)
index_fp = '%s/index.html' % output_dir
index_links = []
commands = []
# begin logging
old_log_fps = glob(join(output_dir,'log_20*txt'))
log_fp = generate_log_fp(output_dir)
index_links.append(('Master run log',log_fp,_index_headers['run_summary']))
for old_log_fp in old_log_fps:
index_links.append(('Previous run log',old_log_fp,_index_headers['run_summary']))
logger = WorkflowLogger(log_fp,
params=params,
qiime_config=qiime_config)
input_fps = [biom_fp,mapping_fp]
if tree_fp != None:
input_fps.append(tree_fp)
log_input_md5s(logger,input_fps)
# run 'biom summarize-table' on input BIOM table
try:
params_str = get_params_str(params['biom-summarize-table'])
except KeyError:
params_str = ''
biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir
if not exists(biom_table_stats_output_fp):
biom_table_summary_cmd = \
"biom summarize-table -i %s -o %s --suppress-md5 %s" % \
(biom_fp, biom_table_stats_output_fp,params_str)
commands.append([('Generate BIOM table summary',
biom_table_summary_cmd)])
else:
logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" \
% biom_table_stats_output_fp)
index_links.append(('BIOM table statistics',
biom_table_stats_output_fp,
_index_headers['run_summary']))
# filter samples with fewer observations than the requested sampling_depth.
# since these get filtered for some analyses (eg beta diversity after
# even sampling) it's useful to filter them here so they're filtered
# from all analyses.
filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
if not exists(filtered_biom_fp):
filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\
(biom_fp,filtered_biom_fp,sampling_depth)
commands.append([('Filter low sequence count samples from table (minimum sequence count: %d)' % sampling_depth,
filter_samples_cmd)])
else:
logger.write("Skipping filter_samples_from_otu_table.py as %s exists.\n\n" \
% filtered_biom_fp)
biom_fp = filtered_biom_fp
# run initial commands and reset the command list
if len(commands) > 0:
command_handler(commands,
status_update_callback,
logger,
#.........这里部分代码省略.........
示例3: run_core_diversity_analyses
# 需要导入模块: from qiime.util import MetadataMap [as 别名]
# 或者: from qiime.util.MetadataMap import hasSingleCategoryValue [as 别名]
def run_core_diversity_analyses(
biom_fp,
mapping_fp,
sampling_depth,
output_dir,
qiime_config,
command_handler=call_commands_serially,
tree_fp=None,
params=None,
categories=None,
arare_min_rare_depth=10,
arare_num_steps=10,
parallel=False,
suppress_taxa_summary=False,
suppress_beta_diversity=False,
suppress_alpha_diversity=False,
suppress_group_significance=False,
status_update_callback=print_to_stdout,
):
"""
"""
if categories is not None:
# Validate categories provided by the users
mapping_data, mapping_comments = parse_mapping_file_to_dict(open(mapping_fp, "U"))
metadata_map = MetadataMap(mapping_data, mapping_comments)
for c in categories:
if c not in metadata_map.CategoryNames:
raise ValueError(
"Category '%s' is not a column header "
"in your mapping file. "
"Categories are case and white space sensitive. Valid "
"choices are: (%s)" % (c, ", ".join(metadata_map.CategoryNames))
)
if metadata_map.hasSingleCategoryValue(c):
raise ValueError(
"Category '%s' contains only one value. "
"Categories analyzed here require at least two values." % c
)
else:
categories = []
comma_separated_categories = ",".join(categories)
# prep some variables
if params is None:
params = parse_qiime_parameters([])
create_dir(output_dir)
index_fp = "%s/index.html" % output_dir
index_links = []
commands = []
# begin logging
old_log_fps = glob(join(output_dir, "log_20*txt"))
log_fp = generate_log_fp(output_dir)
index_links.append(("Master run log", log_fp, _index_headers["run_summary"]))
for old_log_fp in old_log_fps:
index_links.append(("Previous run log", old_log_fp, _index_headers["run_summary"]))
logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config)
input_fps = [biom_fp, mapping_fp]
if tree_fp is not None:
input_fps.append(tree_fp)
log_input_md5s(logger, input_fps)
# run 'biom summarize-table' on input BIOM table
try:
params_str = get_params_str(params["biom-summarize-table"])
except KeyError:
params_str = ""
biom_table_stats_output_fp = "%s/biom_table_summary.txt" % output_dir
if not exists(biom_table_stats_output_fp):
biom_table_summary_cmd = "biom summarize-table -i %s -o %s --suppress-md5 %s" % (
biom_fp,
biom_table_stats_output_fp,
params_str,
)
commands.append([("Generate BIOM table summary", biom_table_summary_cmd)])
else:
logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" % biom_table_stats_output_fp)
index_links.append(("BIOM table statistics", biom_table_stats_output_fp, _index_headers["run_summary"]))
# filter samples with fewer observations than the requested sampling_depth.
# since these get filtered for some analyses (eg beta diversity after
# even sampling) it's useful to filter them here so they're filtered
# from all analyses.
filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
if not exists(filtered_biom_fp):
filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" % (
biom_fp,
filtered_biom_fp,
sampling_depth,
)
commands.append(
[
(
"Filter low sequence count samples from table (minimum sequence count: %d)" % sampling_depth,
filter_samples_cmd,
)
]
)
else:
#.........这里部分代码省略.........
示例4: run_core_diversity_analyses
# 需要导入模块: from qiime.util import MetadataMap [as 别名]
# 或者: from qiime.util.MetadataMap import hasSingleCategoryValue [as 别名]
def run_core_diversity_analyses(
biom_fp,
mapping_fp,
sampling_depth,
output_dir,
qiime_config,
command_handler=call_commands_serially,
tree_fp=None,
params=None,
categories=None,
arare_min_rare_depth=10,
arare_num_steps=10,
parallel=False,
status_update_callback=print_to_stdout):
"""
"""
if categories != None:
# Validate categories provided by the users
mapping_data, mapping_comments = \
parse_mapping_file_to_dict(open(mapping_fp,'U'))
metadata_map = MetadataMap(mapping_data, mapping_comments)
for c in categories:
if c not in metadata_map.CategoryNames:
raise ValueError, ("Category '%s' is not a column header "
"in your mapping file. "
"Categories are case and white space sensitive. Valid "
"choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames)))
if metadata_map.hasSingleCategoryValue(c):
raise ValueError, ("Category '%s' contains only one value. "
"Categories analyzed here require at least two values." % c)
else:
categories= []
# prep some variables
if params == None:
params = parse_qiime_parameters([])
create_dir(output_dir)
index_fp = '%s/index.html' % output_dir
index_links = []
commands = []
python_exe_fp = qiime_config['python_exe_fp']
script_dir = get_qiime_scripts_dir()
# begin logging
log_fp = generate_log_fp(output_dir)
index_links.append(('Master run log',log_fp,'Log files'))
logger = WorkflowLogger(log_fp,
params=params,
qiime_config=qiime_config)
input_fps = [biom_fp,mapping_fp]
if tree_fp != None:
input_fps.append(tree_fp)
log_input_md5s(logger,input_fps)
bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir,sampling_depth)
even_dm_fps = run_beta_diversity_through_plots(
otu_table_fp=biom_fp,
mapping_fp=mapping_fp,
output_dir=bdiv_even_output_dir,
command_handler=command_handler,
params=params,
qiime_config=qiime_config,
sampling_depth=sampling_depth,
# force suppression of distance histograms - boxplots work better
# in this context, and are created below.
histogram_categories=[],
tree_fp=tree_fp,
parallel=parallel,
logger=logger,
status_update_callback=status_update_callback)
for bdiv_metric, dm_fp in even_dm_fps:
for category in categories:
boxplots_output_dir = '%s/%s_boxplots/' % (bdiv_even_output_dir,bdiv_metric)
try:
params_str = get_params_str(params['make_distance_boxplots'])
except KeyError:
params_str = ''
boxplots_cmd = \
'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\
(dm_fp, category, boxplots_output_dir, mapping_fp, params_str)
commands.append([('Boxplots (%s)' % category,
boxplots_cmd)])
index_links.append(('Distance boxplots (%s)' % bdiv_metric,
'%s/%s_Distances.pdf' % \
(boxplots_output_dir,category),
'Beta diversity results (even sampling: %d)' % sampling_depth))
index_links.append(('Distance boxplots statistics (%s)' % bdiv_metric,
'%s/%s_Stats.txt' % \
(boxplots_output_dir,category),
'Beta diversity results (even sampling: %d)' % sampling_depth))
index_links.append(('3D plot (%s, continuous coloring)' % bdiv_metric,
'%s/%s_3d_continuous/%s_pc_3D_PCoA_plots.html' % \
(bdiv_even_output_dir,bdiv_metric,bdiv_metric),
'Beta diversity results (even sampling: %d)' % sampling_depth))
#.........这里部分代码省略.........
示例5: preprocess_mapping_file
# 需要导入模块: from qiime.util import MetadataMap [as 别名]
# 或者: from qiime.util.MetadataMap import hasSingleCategoryValue [as 别名]
def preprocess_mapping_file(data, headers, columns, unique=False, single=False, clones=0):
"""Process a mapping file to expand the data or remove unuseful fields
Inputs:
data: mapping file data
headers: mapping file headers
columns: list of headers to keep, if one of these headers includes two
ampersands, this function will create a new column by merging the delimited
columns.
unique: keep columns where all values are unique
single: keep columns where all values are the same
clones: number of times to replicate the metadata
Outputs:
data: processed mapping file data
headers: processed mapping file headers
"""
# The sample ID must always be there, else it's meaningless data
if "SampleID" != columns[0]:
columns = ["SampleID"] + columns
# process concatenated columns if needed
merge = []
for column in columns:
if "&&" in column:
merge.append(column)
# each element needs several columns to be merged
for new_column in merge:
indices = [headers.index(header_name) for header_name in new_column.split("&&")]
# join all the fields of the metadata that are listed in indices
for line in data:
line.append("".join([line[index] for index in indices]))
headers.append(new_column)
# remove all unique or singled valued columns
if unique or single:
columns_to_remove = []
metadata = MetadataMap(mapping_file_to_dict(data, headers), [])
# find columns that have values that are all unique
if unique == True:
columns_to_remove += [
column_name for column_name in headers[1::] if metadata.hasUniqueCategoryValues(column_name)
]
# remove categories where there is only one value
if single == True:
columns_to_remove += [
column_name for column_name in headers[1::] if metadata.hasSingleCategoryValue(column_name)
]
columns_to_remove = list(set(columns_to_remove))
# remove the single or unique columns
data, headers = keep_columns_from_mapping_file(data, headers, columns_to_remove, negate=True)
# remove anything not specified in the input
data, headers = keep_columns_from_mapping_file(data, headers, columns)
# sanitize the mapping file data and headers
data, headers = sanitize_mapping_file(data, headers)
# clones mean: replicate the metadata retagging the sample ids with a suffix
if clones:
out_data = []
for index in range(0, clones):
out_data.extend([[element[0] + "_%d" % index] + element[1::] for element in data])
data = out_data
return data, headers