本文整理汇总了Python中qiime.workflow.util.WorkflowLogger类的典型用法代码示例。如果您正苦于以下问题:Python WorkflowLogger类的具体用法?Python WorkflowLogger怎么用?Python WorkflowLogger使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了WorkflowLogger类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: iterative_pick_subsampled_open_reference_otus
def iterative_pick_subsampled_open_reference_otus(
input_fps,
refseqs_fp,
output_dir,
percent_subsample,
new_ref_set_id,
command_handler,
params,
qiime_config,
prefilter_refseqs_fp=None,
prefilter_percent_id=0.60,
min_otu_size=2,
run_assign_tax=True,
run_align_and_tree=True,
step1_otu_map_fp=None,
step1_failures_fasta_fp=None,
parallel=False,
suppress_step4=False,
logger=None,
suppress_md5=False,
denovo_otu_picking_method='uclust',
reference_otu_picking_method='uclust_ref',
status_update_callback=print_to_stdout):
""" Call the pick_subsampled_open_reference_otus workflow on multiple inputs
and handle processing of the results.
"""
create_dir(output_dir)
commands = []
if logger == None:
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params,
qiime_config=qiime_config)
close_logger_on_success = True
else:
close_logger_on_success = False
# if the user has not passed a different reference collection for the pre-filter,
# used the input refseqs_fp for all iterations. we want to pre-filter all data against
# the input data as lower percent identity searches with uclust can be slow, so we
# want the reference collection to stay at a reasonable size.
if prefilter_refseqs_fp == None:
prefilter_refseqs_fp = refseqs_fp
otu_table_fps = []
repset_fasta_fps = []
for i,input_fp in enumerate(input_fps):
iteration_output_dir = '%s/%d/' % (output_dir,i)
if iteration_output_exists(iteration_output_dir,min_otu_size):
# if the output from an iteration already exists, skip that
# iteration (useful for continuing failed runs)
log_input_md5s(logger,[input_fp,refseqs_fp])
logger.write('Iteration %d (input file: %s) output data already exists. '
'Skipping and moving to next.\n\n' % (i,input_fp))
else:
pick_subsampled_open_reference_otus(input_fp=input_fp,
refseqs_fp=refseqs_fp,
output_dir=iteration_output_dir,
percent_subsample=percent_subsample,
new_ref_set_id='.'.join([new_ref_set_id,str(i)]),
command_handler=command_handler,
params=params,
qiime_config=qiime_config,
run_assign_tax=False,
run_align_and_tree=False,
prefilter_refseqs_fp=prefilter_refseqs_fp,
prefilter_percent_id=prefilter_percent_id,
min_otu_size=min_otu_size,
step1_otu_map_fp=step1_otu_map_fp,
step1_failures_fasta_fp=step1_failures_fasta_fp,
parallel=parallel,
suppress_step4=suppress_step4,
logger=logger,
suppress_md5=suppress_md5,
denovo_otu_picking_method=denovo_otu_picking_method,
reference_otu_picking_method=reference_otu_picking_method,
status_update_callback=status_update_callback)
## perform post-iteration file shuffling whether the previous iteration's
## data previously existed or was just computed.
# step1 otu map and failures can only be used for the first iteration
# as subsequent iterations need to use updated refseqs files
step1_otu_map_fp = step1_failures_fasta_fp = None
new_refseqs_fp = '%s/new_refseqs.fna' % iteration_output_dir
refseqs_fp = new_refseqs_fp
otu_table_fps.append('%s/otu_table_mc%d.biom' % (iteration_output_dir,min_otu_size))
repset_fasta_fps.append('%s/rep_set.fna' % iteration_output_dir)
# Merge OTU tables - check for existence first as this step has historically
# been a frequent failure, so is sometimes run manually in failed runs.
otu_table_fp = '%s/otu_table_mc%d.biom' % (output_dir,min_otu_size)
if not (exists(otu_table_fp) and getsize(otu_table_fp) > 0):
merge_cmd = 'merge_otu_tables.py -i %s -o %s' %\
(','.join(otu_table_fps),otu_table_fp)
commands.append([("Merge OTU tables",merge_cmd)])
# Build master rep set
final_repset_fp = '%s/rep_set.fna' % output_dir
final_repset_from_iteration_repsets_fps(repset_fasta_fps,final_repset_fp)
command_handler(commands,
status_update_callback,
#.........这里部分代码省略.........
示例2: pick_subsampled_open_reference_otus
def pick_subsampled_open_reference_otus(input_fp,
refseqs_fp,
output_dir,
percent_subsample,
new_ref_set_id,
command_handler,
params,
qiime_config,
prefilter_refseqs_fp=None,
run_assign_tax=True,
run_align_and_tree=True,
prefilter_percent_id=0.60,
min_otu_size=2,
step1_otu_map_fp=None,
step1_failures_fasta_fp=None,
parallel=False,
suppress_step4=False,
logger=None,
suppress_md5=False,
denovo_otu_picking_method='uclust',
reference_otu_picking_method='uclust_ref',
status_update_callback=print_to_stdout):
""" Run the data preparation steps of Qiime
The steps performed by this function are:
- Pick reference OTUs against refseqs_fp
- Subsample the failures to n sequences.
- Pick OTUs de novo on the n failures.
- Pick representative sequences for the resulting OTUs.
- Pick reference OTUs on all failures using the
representative set from step 4 as the reference set.
"""
# for now only allowing uclust for otu picking
allowed_denovo_otu_picking_methods = ['uclust','usearch61']
allowed_reference_otu_picking_methods = ['uclust_ref','usearch61_ref']
assert denovo_otu_picking_method in allowed_denovo_otu_picking_methods,\
"Unknown de novo OTU picking method: %s. Known methods are: %s"\
% (denovo_otu_picking_method,
','.join(allowed_denovo_otu_picking_methods))
assert reference_otu_picking_method in allowed_reference_otu_picking_methods,\
"Unknown reference OTU picking method: %s. Known methods are: %s"\
% (reference_otu_picking_method,
','.join(allowed_reference_otu_picking_methods))
# Prepare some variables for the later steps
input_dir, input_filename = split(input_fp)
input_basename, input_ext = splitext(input_filename)
create_dir(output_dir)
commands = []
if logger == None:
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params,
qiime_config=qiime_config)
close_logger_on_success = True
else:
close_logger_on_success = False
if not suppress_md5:
log_input_md5s(logger,[input_fp,
refseqs_fp,
step1_otu_map_fp,
step1_failures_fasta_fp])
# if the user has not passed a different reference collection for the pre-filter,
# used the main refseqs_fp. this is useful if the user wants to provide a smaller
# reference collection, or to use the input reference collection when running in
# iterative mode (rather than an iteration's new refseqs)
if prefilter_refseqs_fp == None:
prefilter_refseqs_fp = refseqs_fp
## Step 1: Closed-reference OTU picking on the input file (if not already complete)
if step1_otu_map_fp and step1_failures_fasta_fp:
step1_dir = '%s/step1_otus' % output_dir
create_dir(step1_dir)
logger.write("Using pre-existing reference otu map and failures.\n\n")
else:
if prefilter_percent_id != None:
prefilter_dir = '%s/prefilter_otus/' % output_dir
prefilter_failures_list_fp = '%s/%s_failures.txt' % \
(prefilter_dir,input_basename)
prefilter_pick_otu_cmd = pick_reference_otus(\
input_fp,prefilter_dir,reference_otu_picking_method,
prefilter_refseqs_fp,parallel,params,logger,prefilter_percent_id)
commands.append([('Pick Reference OTUs (prefilter)', prefilter_pick_otu_cmd)])
prefiltered_input_fp = '%s/prefiltered_%s%s' %\
(prefilter_dir,input_basename,input_ext)
filter_fasta_cmd = 'filter_fasta.py -f %s -o %s -s %s -n' %\
(input_fp,prefiltered_input_fp,prefilter_failures_list_fp)
commands.append([('Filter prefilter failures from input', filter_fasta_cmd)])
input_fp = prefiltered_input_fp
input_dir, input_filename = split(input_fp)
input_basename, input_ext = splitext(input_filename)
## Build the OTU picking command
step1_dir = \
'%s/step1_otus' % output_dir
#.........这里部分代码省略.........
示例3: pick_subsampled_open_reference_otus
def pick_subsampled_open_reference_otus(input_fp,
refseqs_fp,
output_dir,
percent_subsample,
new_ref_set_id,
command_handler,
params,
qiime_config,
prefilter_refseqs_fp=None,
run_assign_tax=True,
run_align_and_tree=True,
prefilter_percent_id=None,
min_otu_size=2,
step1_otu_map_fp=None,
step1_failures_fasta_fp=None,
parallel=False,
suppress_step4=False,
logger=None,
suppress_md5=False,
suppress_index_page=False,
denovo_otu_picking_method='uclust',
reference_otu_picking_method='uclust_ref',
status_update_callback=print_to_stdout,
minimum_failure_threshold=100000):
""" Run the data preparation steps of Qiime
The steps performed by this function are:
- Pick reference OTUs against refseqs_fp
- Subsample the failures to n sequences.
- Pick OTUs de novo on the n failures.
- Pick representative sequences for the resulting OTUs.
- Pick reference OTUs on all failures using the
representative set from step 4 as the reference set.
"""
# for now only allowing uclust/usearch/sortmerna+sumaclust for otu picking
allowed_denovo_otu_picking_methods = ['uclust', 'usearch61', 'sumaclust']
allowed_reference_otu_picking_methods = ['uclust_ref', 'usearch61_ref',
'sortmerna']
assert denovo_otu_picking_method in allowed_denovo_otu_picking_methods,\
"Unknown de novo OTU picking method: %s. Known methods are: %s"\
% (denovo_otu_picking_method,
','.join(allowed_denovo_otu_picking_methods))
assert reference_otu_picking_method in allowed_reference_otu_picking_methods,\
"Unknown reference OTU picking method: %s. Known methods are: %s"\
% (reference_otu_picking_method,
','.join(allowed_reference_otu_picking_methods))
# Prepare some variables for the later steps
index_links = []
input_dir, input_filename = split(input_fp)
input_basename, input_ext = splitext(input_filename)
create_dir(output_dir)
commands = []
if logger is None:
log_fp = generate_log_fp(output_dir)
logger = WorkflowLogger(log_fp,
params=params,
qiime_config=qiime_config)
close_logger_on_success = True
index_links.append(
('Run summary data',
log_fp,
_index_headers['run_summary']))
else:
close_logger_on_success = False
if not suppress_md5:
log_input_md5s(logger, [input_fp,
refseqs_fp,
step1_otu_map_fp,
step1_failures_fasta_fp])
# if the user has not passed a different reference collection for the pre-filter,
# used the main refseqs_fp. this is useful if the user wants to provide a smaller
# reference collection, or to use the input reference collection when running in
# iterative mode (rather than an iteration's new refseqs)
if prefilter_refseqs_fp is None:
prefilter_refseqs_fp = refseqs_fp
# Step 1: Closed-reference OTU picking on the input file (if not already
# complete)
if step1_otu_map_fp and step1_failures_fasta_fp:
step1_dir = '%s/step1_otus' % output_dir
create_dir(step1_dir)
logger.write("Using pre-existing reference otu map and failures.\n\n")
else:
if prefilter_percent_id is not None:
prefilter_dir = '%s/prefilter_otus/' % output_dir
prefilter_failures_list_fp = '%s/%s_failures.txt' % \
(prefilter_dir, input_basename)
prefilter_pick_otu_cmd = pick_reference_otus(
input_fp, prefilter_dir, reference_otu_picking_method,
prefilter_refseqs_fp, parallel, params, logger, prefilter_percent_id)
commands.append(
[('Pick Reference OTUs (prefilter)', prefilter_pick_otu_cmd)])
#.........这里部分代码省略.........
示例4: run_pick_closed_reference_otus
def run_pick_closed_reference_otus(
input_fp,
refseqs_fp,
output_dir,
taxonomy_fp,
command_handler,
params,
qiime_config,
parallel=False,
logger=None,
suppress_md5=False,
status_update_callback=print_to_stdout):
""" Run the data preparation steps of Qiime
The steps performed by this function are:
1) Pick OTUs;
2) Build an OTU table with optional pre-defined taxonmy.
"""
# confirm that a valid otu picking method was supplied before doing
# any work
reference_otu_picking_methods = ['blast','uclust_ref','usearch61_ref']
try:
otu_picking_method = params['pick_otus']['otu_picking_method']
except KeyError:
otu_picking_method = 'uclust_ref'
assert otu_picking_method in reference_otu_picking_methods,\
"Invalid OTU picking method supplied: %s. Valid choices are: %s"\
% (otu_picking_method,' '.join(reference_otu_picking_methods))
# Prepare some variables for the later steps
input_dir, input_filename = split(input_fp)
input_basename, input_ext = splitext(input_filename)
create_dir(output_dir)
commands = []
python_exe_fp = qiime_config['python_exe_fp']
script_dir = get_qiime_scripts_dir()
if logger == None:
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params,
qiime_config=qiime_config)
close_logger_on_success = True
else:
close_logger_on_success = False
if not suppress_md5:
log_input_md5s(logger,[input_fp,refseqs_fp,taxonomy_fp])
# Prep the OTU picking command
pick_otu_dir = '%s/%s_picked_otus' % (output_dir, otu_picking_method)
otu_fp = '%s/%s_otus.txt' % (pick_otu_dir,input_basename)
if parallel and (otu_picking_method == 'blast' or
otu_picking_method == 'uclust_ref' or
otu_picking_method == 'usearch61_ref'):
# Grab the parallel-specific parameters
try:
params_str = get_params_str(params['parallel'])
except KeyError:
params_str = ''
# Grab the OTU picker parameters
try:
# Want to find a cleaner strategy for this: the parallel script
# is method-specific, so doesn't take a --alignment_method
# option. This works for now though.
d = params['pick_otus'].copy()
if 'otu_picking_method' in d:
del d['otu_picking_method']
params_str += ' %s' % get_params_str(d)
except KeyError:
pass
otu_picking_script = 'parallel_pick_otus_%s.py' % otu_picking_method
# Build the OTU picking command
pick_otus_cmd = '%s %s/%s -i %s -o %s -r %s -T %s' %\
(python_exe_fp,
script_dir,
otu_picking_script,
input_fp,
pick_otu_dir,
refseqs_fp,
params_str)
else:
try:
params_str = get_params_str(params['pick_otus'])
except KeyError:
params_str = ''
# Since this is reference-based OTU picking we always want to
# suppress new clusters -- force it here.
params_str+= ' --suppress_new_clusters'
logger.write("Forcing --suppress_new_clusters as this is closed-reference OTU picking.\n\n")
# Build the OTU picking command
pick_otus_cmd = '%s %s/pick_otus.py -i %s -o %s -r %s -m %s %s' %\
(python_exe_fp,
script_dir,
input_fp,
pick_otu_dir,
refseqs_fp,
otu_picking_method,
#.........这里部分代码省略.........
示例5: pick_nested_reference_otus
def pick_nested_reference_otus(input_fasta_fp,
input_tree_fp,
output_dir,
run_id,
similarity_thresholds,
command_handler,
status_update_callback=print_to_stdout):
# Prepare some variables for the later steps
create_dir(output_dir)
otu_dir = join(output_dir,'otus')
create_dir(otu_dir)
rep_set_dir = join(output_dir,'rep_set')
create_dir(rep_set_dir)
# currently not doing anything with taxonomies and trees
# tax_dir = join(output_dir,'taxonomies')
# create_dir(tax_dir)
if input_tree_fp:
tree_dir = join(output_dir,'trees')
create_dir(tree_dir)
commands = []
files_to_remove = []
logger = WorkflowLogger(generate_log_fp(output_dir))
similarity_thresholds.sort()
similarity_thresholds.reverse()
current_inseqs_fp = input_fasta_fp
current_tree_fp = input_tree_fp
previous_otu_map = None
for similarity_threshold in similarity_thresholds:
current_inseqs_basename = splitext(split(current_inseqs_fp)[1])[0]
# pick otus command
otu_fp = '%s/%d_otu_map.txt' % (otu_dir,similarity_threshold)
clusters_fp = '%s/%d_clusters.uc' % (otu_dir,similarity_threshold)
temp_otu_fp = '%s/%s_otus.txt' % (otu_dir, current_inseqs_basename)
temp_log_fp = '%s/%s_otus.log' % (otu_dir, current_inseqs_basename)
temp_clusters_fp = '%s/%s_clusters.uc' % (otu_dir, current_inseqs_basename)
pick_otus_cmd = \
'pick_otus.py -m uclust -DBz -i %s -s %1.2f -o %s' % (
current_inseqs_fp,
similarity_threshold/100,
otu_dir)
commands.append([('Pick OTUs (%d)' % similarity_threshold,
pick_otus_cmd)])
commands.append([('Rename OTU file (%d)' % similarity_threshold,
'mv %s %s' % (temp_otu_fp,otu_fp))])
commands.append([('Rename uc file (%d)' % similarity_threshold,
'mv %s %s' % (temp_clusters_fp,clusters_fp))])
files_to_remove.append(temp_log_fp)
# rep set picking
temp_rep_set_fp = get_tmp_filename(prefix='NestedReference',
suffix='.fasta')
pick_rep_set_cmd = \
'pick_rep_set.py -m first -i %s -o %s -f %s' % (
otu_fp,
temp_rep_set_fp,
current_inseqs_fp)
commands.append([('Pick Rep Set (%d)' % similarity_threshold,
pick_rep_set_cmd)])
command_handler(commands, status_update_callback, logger, close_logger_on_success=False)
commands = []
# rename representative sequences
rep_set_fp = '%s/%d_otus_%s.fasta' % (
rep_set_dir,
similarity_threshold,
run_id)
logger.write('Renaming OTU representative sequences so OTU ids are reference sequence ids.')
rep_set_f = open(rep_set_fp,'w')
for e in rename_rep_seqs(open(temp_rep_set_fp,'U')):
rep_set_f.write('>%s\n%s\n' % e)
rep_set_f.close()
files_to_remove.append(temp_rep_set_fp)
# filter the tree, if provided
if current_tree_fp != None:
tree_fp = '%s/%d_otus_%s.tre' % (
tree_dir,
similarity_threshold,
run_id)
tree_cmd = 'filter_tree.py -i %s -f %s -o %s' %\
(current_tree_fp,rep_set_fp,tree_fp)
commands.append([('Filter tree (%d)' % similarity_threshold,tree_cmd)])
command_handler(commands, status_update_callback, logger, close_logger_on_success=False)
# prep for the next iteration
current_tree_fp = tree_fp
# prep for the next iteration
remove_files(files_to_remove)
commands = []
files_to_remove = []
current_inseqs_fp = rep_set_fp
logger.close()
示例6: run_core_diversity_analyses
def run_core_diversity_analyses(
biom_fp,
mapping_fp,
sampling_depth,
output_dir,
qiime_config,
command_handler=call_commands_serially,
tree_fp=None,
params=None,
categories=None,
arare_min_rare_depth=10,
arare_num_steps=10,
parallel=False,
suppress_taxa_summary=False,
suppress_beta_diversity=False,
suppress_alpha_diversity=False,
suppress_otu_category_significance=False,
status_update_callback=print_to_stdout):
"""
"""
if categories != None:
# Validate categories provided by the users
mapping_data, mapping_comments = \
parse_mapping_file_to_dict(open(mapping_fp,'U'))
metadata_map = MetadataMap(mapping_data, mapping_comments)
for c in categories:
if c not in metadata_map.CategoryNames:
raise ValueError, ("Category '%s' is not a column header "
"in your mapping file. "
"Categories are case and white space sensitive. Valid "
"choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames)))
if metadata_map.hasSingleCategoryValue(c):
raise ValueError, ("Category '%s' contains only one value. "
"Categories analyzed here require at least two values." % c)
else:
categories= []
# prep some variables
if params == None:
params = parse_qiime_parameters([])
create_dir(output_dir)
index_fp = '%s/index.html' % output_dir
index_links = []
commands = []
# begin logging
old_log_fps = glob(join(output_dir,'log_20*txt'))
log_fp = generate_log_fp(output_dir)
index_links.append(('Master run log',log_fp,_index_headers['run_summary']))
for old_log_fp in old_log_fps:
index_links.append(('Previous run log',old_log_fp,_index_headers['run_summary']))
logger = WorkflowLogger(log_fp,
params=params,
qiime_config=qiime_config)
input_fps = [biom_fp,mapping_fp]
if tree_fp != None:
input_fps.append(tree_fp)
log_input_md5s(logger,input_fps)
# run 'biom summarize-table' on input BIOM table
try:
params_str = get_params_str(params['biom-summarize-table'])
except KeyError:
params_str = ''
biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir
if not exists(biom_table_stats_output_fp):
biom_table_summary_cmd = \
"biom summarize-table -i %s -o %s --suppress-md5 %s" % \
(biom_fp, biom_table_stats_output_fp,params_str)
commands.append([('Generate BIOM table summary',
biom_table_summary_cmd)])
else:
logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" \
% biom_table_stats_output_fp)
index_links.append(('BIOM table statistics',
biom_table_stats_output_fp,
_index_headers['run_summary']))
# filter samples with fewer observations than the requested sampling_depth.
# since these get filtered for some analyses (eg beta diversity after
# even sampling) it's useful to filter them here so they're filtered
# from all analyses.
filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
if not exists(filtered_biom_fp):
filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\
(biom_fp,filtered_biom_fp,sampling_depth)
commands.append([('Filter low sequence count samples from table (minimum sequence count: %d)' % sampling_depth,
filter_samples_cmd)])
else:
logger.write("Skipping filter_samples_from_otu_table.py as %s exists.\n\n" \
% filtered_biom_fp)
biom_fp = filtered_biom_fp
# run initial commands and reset the command list
if len(commands) > 0:
command_handler(commands,
status_update_callback,
logger,
#.........这里部分代码省略.........
示例7: assign_taxonomy_multiple_times
def assign_taxonomy_multiple_times(input_dirs, output_dir, assignment_methods,
reference_seqs_fp, id_to_taxonomy_fp,
confidences=None, e_values=None, rtax_modes=None,
uclust_min_consensus_fractions=None, uclust_similarities=None,
uclust_max_accepts=None, input_fasta_filename='rep_set.fna',
clean_otu_table_filename='otu_table_mc2_no_pynast_failures.biom',
read_1_seqs_filename='seqs1.fna', read_2_seqs_filename='seqs2.fna',
rtax_read_id_regexes=None, rtax_amplicon_id_regexes=None,
rtax_header_id_regexes=None, rdp_max_memory=4000,
command_handler=call_commands_serially,
status_update_callback=no_status_updates, force=False):
""" Performs sanity checks on passed arguments and directories. Builds
commands for each method and sends them off to be executed. """
## Check if output directory exists
try:
create_dir(output_dir, fail_on_exist=not force)
except OSError:
raise WorkflowError("Output directory '%s' already exists. Please "
"choose a different directory, or force overwrite with -f."
% output_dir)
logger = WorkflowLogger(generate_log_fp(output_dir))
# We're going to zip these with the input directories.
num_dirs = len(input_dirs)
if rtax_read_id_regexes is None:
rtax_read_id_regexes = [None] * num_dirs
if rtax_amplicon_id_regexes is None:
rtax_amplicon_id_regexes = [None] * num_dirs
if rtax_header_id_regexes is None:
rtax_header_id_regexes = [None] * num_dirs
if num_dirs != len(rtax_read_id_regexes) or \
num_dirs != len(rtax_amplicon_id_regexes) or \
num_dirs != len(rtax_header_id_regexes):
raise WorkflowError("The number of RTAX regular expressions must "
"match the number of input directories.")
for input_dir, rtax_read_id_regex, rtax_amplicon_id_regex, \
rtax_header_id_regex in zip(input_dirs, rtax_read_id_regexes,
rtax_amplicon_id_regexes, rtax_header_id_regexes):
## Make sure the input dataset directory exists.
if not isdir(input_dir):
raise WorkflowError("The input dataset directory '%s' does not "
"exist." % input_dir)
input_dir_name = split(normpath(input_dir))[1]
output_dataset_dir = join(output_dir, input_dir_name)
input_fasta_fp = join(input_dir, input_fasta_filename)
clean_otu_table_fp = join(input_dir, clean_otu_table_filename)
read_1_seqs_fp = join(input_dir, read_1_seqs_filename)
read_2_seqs_fp = join(input_dir, read_2_seqs_filename)
logger.write("\nCreating output subdirectory '%s' if it doesn't "
"already exist.\n" % output_dataset_dir)
create_dir(output_dataset_dir)
for method in assignment_methods:
## Method is RDP
if method == 'rdp':
## Check for execution parameters required by RDP method
if confidences is None:
raise WorkflowError("You must specify at least one "
"confidence level.")
## Generate command for RDP
commands = _generate_rdp_commands(output_dataset_dir,
input_fasta_fp,
reference_seqs_fp,
id_to_taxonomy_fp,
clean_otu_table_fp,
confidences,
rdp_max_memory=rdp_max_memory)
## Method is BLAST
elif method == 'blast':
## Check for execution parameters required by BLAST method
if e_values is None:
raise WorkflowError("You must specify at least one "
"E-value.")
## Generate command for BLAST
commands = _generate_blast_commands(output_dataset_dir,
input_fasta_fp,
reference_seqs_fp,
id_to_taxonomy_fp,
clean_otu_table_fp,
e_values)
## Method is Mothur
elif method == 'mothur':
## Check for execution parameters required by Mothur method
if confidences is None:
raise WorkflowError("You must specify at least one "
"confidence level.")
## Generate command for mothur
commands = _generate_mothur_commands(output_dataset_dir,
input_fasta_fp,
reference_seqs_fp,
id_to_taxonomy_fp,
clean_otu_table_fp,
confidences)
#.........这里部分代码省略.........
示例8: pick_subsampled_open_reference_otus
def pick_subsampled_open_reference_otus(
input_fp,
refseqs_fp,
output_dir,
percent_subsample,
new_ref_set_id,
command_handler,
params,
qiime_config,
prefilter_refseqs_fp=None,
run_assign_tax=True,
run_align_and_tree=True,
prefilter_percent_id=0.60,
min_otu_size=2,
step1_otu_map_fp=None,
step1_failures_fasta_fp=None,
parallel=False,
suppress_step4=False,
logger=None,
suppress_md5=False,
denovo_otu_picking_method="uclust",
reference_otu_picking_method="uclust_ref",
status_update_callback=print_to_stdout,
):
""" Run the data preparation steps of Qiime
The steps performed by this function are:
- Pick reference OTUs against refseqs_fp
- Subsample the failures to n sequences.
- Pick OTUs de novo on the n failures.
- Pick representative sequences for the resulting OTUs.
- Pick reference OTUs on all failures using the
representative set from step 4 as the reference set.
"""
# for now only allowing uclust for otu picking
allowed_denovo_otu_picking_methods = ["uclust", "usearch61"]
allowed_reference_otu_picking_methods = ["uclust_ref", "usearch61_ref"]
assert denovo_otu_picking_method in allowed_denovo_otu_picking_methods, (
"Unknown de novo OTU picking method: %s. Known methods are: %s"
% (denovo_otu_picking_method, ",".join(allowed_denovo_otu_picking_methods))
)
assert reference_otu_picking_method in allowed_reference_otu_picking_methods, (
"Unknown reference OTU picking method: %s. Known methods are: %s"
% (reference_otu_picking_method, ",".join(allowed_reference_otu_picking_methods))
)
# Prepare some variables for the later steps
input_dir, input_filename = split(input_fp)
input_basename, input_ext = splitext(input_filename)
create_dir(output_dir)
commands = []
if logger is None:
logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config)
close_logger_on_success = True
else:
close_logger_on_success = False
if not suppress_md5:
log_input_md5s(logger, [input_fp, refseqs_fp, step1_otu_map_fp, step1_failures_fasta_fp])
# if the user has not passed a different reference collection for the pre-filter,
# used the main refseqs_fp. this is useful if the user wants to provide a smaller
# reference collection, or to use the input reference collection when running in
# iterative mode (rather than an iteration's new refseqs)
if prefilter_refseqs_fp is None:
prefilter_refseqs_fp = refseqs_fp
# Step 1: Closed-reference OTU picking on the input file (if not already
# complete)
if step1_otu_map_fp and step1_failures_fasta_fp:
step1_dir = "%s/step1_otus" % output_dir
create_dir(step1_dir)
logger.write("Using pre-existing reference otu map and failures.\n\n")
else:
if prefilter_percent_id is not None:
prefilter_dir = "%s/prefilter_otus/" % output_dir
prefilter_failures_list_fp = "%s/%s_failures.txt" % (prefilter_dir, input_basename)
prefilter_pick_otu_cmd = pick_reference_otus(
input_fp,
prefilter_dir,
reference_otu_picking_method,
prefilter_refseqs_fp,
parallel,
params,
logger,
prefilter_percent_id,
)
commands.append([("Pick Reference OTUs (prefilter)", prefilter_pick_otu_cmd)])
prefiltered_input_fp = "%s/prefiltered_%s%s" % (prefilter_dir, input_basename, input_ext)
filter_fasta_cmd = "filter_fasta.py -f %s -o %s -s %s -n" % (
input_fp,
prefiltered_input_fp,
prefilter_failures_list_fp,
)
commands.append([("Filter prefilter failures from input", filter_fasta_cmd)])
# Call the command handler on the list of commands
#.........这里部分代码省略.........
示例9: create_personal_results
def create_personal_results(output_dir,
mapping_fp,
coord_fp,
collated_dir,
otu_table_fp,
prefs_fp,
personal_id_column,
personal_ids=None,
column_title='Self',
individual_titles=None,
category_to_split='BodySite',
time_series_category='WeeksSinceStart',
rarefaction_depth=10000,
alpha=0.05,
rep_set_fp=None,
body_site_rarefied_otu_table_dir=None,
retain_raw_data=False,
suppress_alpha_rarefaction=False,
suppress_beta_diversity=False,
suppress_taxa_summary_plots=False,
suppress_alpha_diversity_boxplots=False,
suppress_otu_category_significance=False,
command_handler=call_commands_serially,
status_update_callback=no_status_updates):
# Create our output directory and copy over the resources the personalized
# pages need (e.g. javascript, images, etc.).
create_dir(output_dir)
support_files_dir = join(output_dir, 'support_files')
if not exists(support_files_dir):
copytree(join(get_project_dir(), 'my_microbes', 'support_files'),
support_files_dir)
logger = WorkflowLogger(generate_log_fp(output_dir))
mapping_data, header, comments = parse_mapping_file(open(mapping_fp, 'U'))
try:
personal_id_index = header.index(personal_id_column)
except ValueError:
raise ValueError("Personal ID field '%s' is not a mapping file column "
"header." % personal_id_column)
try:
bodysite_index = header.index(category_to_split)
except ValueError:
raise ValueError("Category to split field '%s' is not a mapping file "
"column header." % category_to_split)
header = header[:-1] + [column_title] + [header[-1]]
# column that differentiates between body-sites within a single individual
# used for the creation of the vectors in make_3d_plots.py, this data is
# created by concatenating the two columns when writing the mapping file
site_id_category = '%s&&%s' % (personal_id_column, category_to_split)
header.insert(len(header)-1, site_id_category)
all_personal_ids = get_personal_ids(mapping_data, personal_id_index)
if personal_ids == None:
personal_ids = all_personal_ids
else:
for pid in personal_ids:
if pid not in all_personal_ids:
raise ValueError("'%s' is not a personal ID in the mapping "
"file column '%s'." %
(pid, personal_id_column))
if time_series_category not in header:
raise ValueError("Time series field '%s' is not a mapping file column "
"header." % time_series_category)
otu_table_title = splitext(basename(otu_table_fp))
output_directories = []
raw_data_files = []
raw_data_dirs = []
# Rarefy the OTU table and split by body site here (instead of on a
# per-individual basis) as we can use the same rarefied and split tables
# for each individual.
if not suppress_otu_category_significance:
rarefied_otu_table_fp = join(output_dir,
add_filename_suffix(otu_table_fp,
'_even%d' % rarefaction_depth))
if body_site_rarefied_otu_table_dir is None:
commands = []
cmd_title = 'Rarefying OTU table'
cmd = 'single_rarefaction.py -i %s -o %s -d %s' % (otu_table_fp,
rarefied_otu_table_fp, rarefaction_depth)
commands.append([(cmd_title, cmd)])
raw_data_files.append(rarefied_otu_table_fp)
per_body_site_dir = join(output_dir, 'per_body_site_otu_tables')
cmd_title = 'Splitting rarefied OTU table by body site'
cmd = 'split_otu_table.py -i %s -m %s -f %s -o %s' % (
rarefied_otu_table_fp, mapping_fp, category_to_split,
per_body_site_dir)
commands.append([(cmd_title, cmd)])
raw_data_dirs.append(per_body_site_dir)
#.........这里部分代码省略.........
示例10: run_pick_closed_reference_otus
def run_pick_closed_reference_otus(
input_fp,
refseqs_fp,
output_dir,
taxonomy_fp,
command_handler,
params,
qiime_config,
assign_taxonomy=False,
parallel=False,
logger=None,
suppress_md5=False,
status_update_callback=print_to_stdout):
""" Run the data preparation steps of Qiime
The steps performed by this function are:
1) Pick OTUs;
2) If assignment_taxonomy is True, choose representative sequence
for OTUs and assign taxonomy using a classifier.
3) Build an OTU table with optional predefined taxonomy
(if assign_taxonomy=False) or taxonomic assignments from step 2
(if assign_taxonomy=True).
"""
# confirm that a valid otu picking method was supplied before doing
# any work
reference_otu_picking_methods = ['blast', 'uclust_ref', 'usearch61_ref',
'usearch_ref', 'sortmerna']
try:
otu_picking_method = params['pick_otus']['otu_picking_method']
except KeyError:
otu_picking_method = 'uclust_ref'
assert otu_picking_method in reference_otu_picking_methods,\
"Invalid OTU picking method supplied: %s. Valid choices are: %s"\
% (otu_picking_method, ' '.join(reference_otu_picking_methods))
# Prepare some variables for the later steps
input_dir, input_filename = split(input_fp)
input_basename, input_ext = splitext(input_filename)
create_dir(output_dir)
commands = []
if logger is None:
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params,
qiime_config=qiime_config)
close_logger_on_success = True
else:
close_logger_on_success = False
if not suppress_md5:
log_input_md5s(logger, [input_fp, refseqs_fp, taxonomy_fp])
# Prep the OTU picking command
pick_otu_dir = '%s/%s_picked_otus' % (output_dir, otu_picking_method)
otu_fp = '%s/%s_otus.txt' % (pick_otu_dir, input_basename)
if parallel and (otu_picking_method == 'blast' or
otu_picking_method == 'uclust_ref' or
otu_picking_method == 'usearch61_ref' or
otu_picking_method == 'sortmerna'):
# Grab the parallel-specific parameters
try:
params_str = get_params_str(params['parallel'])
except KeyError:
params_str = ''
# Grab the OTU picker parameters
try:
# Want to find a cleaner strategy for this: the parallel script
# is method-specific, so doesn't take a --alignment_method
# option. This works for now though.
d = params['pick_otus'].copy()
if 'otu_picking_method' in d:
del d['otu_picking_method']
params_str += ' %s' % get_params_str(d)
except KeyError:
pass
otu_picking_script = 'parallel_pick_otus_%s.py' % otu_picking_method
# Build the OTU picking command
pick_otus_cmd = '%s -i %s -o %s -r %s -T %s' %\
(otu_picking_script,
input_fp,
pick_otu_dir,
refseqs_fp,
params_str)
else:
try:
params_str = get_params_str(params['pick_otus'])
except KeyError:
params_str = ''
# Since this is reference-based OTU picking we always want to
# suppress new clusters -- force it here.
params_str += ' --suppress_new_clusters'
logger.write(
"Forcing --suppress_new_clusters as this is "
"closed-reference OTU picking.\n\n")
# Build the OTU picking command
pick_otus_cmd = 'pick_otus.py -i %s -o %s -r %s -m %s %s' %\
(input_fp,
#.........这里部分代码省略.........
示例11: run_core_diversity_analyses
def run_core_diversity_analyses(
biom_fp,
mapping_fp,
sampling_depth,
output_dir,
qiime_config,
command_handler=call_commands_serially,
tree_fp=None,
params=None,
categories=None,
arare_min_rare_depth=10,
arare_num_steps=10,
parallel=False,
suppress_taxa_summary=False,
suppress_beta_diversity=False,
suppress_alpha_diversity=False,
suppress_group_significance=False,
status_update_callback=print_to_stdout,
):
"""
"""
if categories is not None:
# Validate categories provided by the users
mapping_data, mapping_comments = parse_mapping_file_to_dict(open(mapping_fp, "U"))
metadata_map = MetadataMap(mapping_data, mapping_comments)
for c in categories:
if c not in metadata_map.CategoryNames:
raise ValueError(
"Category '%s' is not a column header "
"in your mapping file. "
"Categories are case and white space sensitive. Valid "
"choices are: (%s)" % (c, ", ".join(metadata_map.CategoryNames))
)
if metadata_map.hasSingleCategoryValue(c):
raise ValueError(
"Category '%s' contains only one value. "
"Categories analyzed here require at least two values." % c
)
else:
categories = []
comma_separated_categories = ",".join(categories)
# prep some variables
if params is None:
params = parse_qiime_parameters([])
create_dir(output_dir)
index_fp = "%s/index.html" % output_dir
index_links = []
commands = []
# begin logging
old_log_fps = glob(join(output_dir, "log_20*txt"))
log_fp = generate_log_fp(output_dir)
index_links.append(("Master run log", log_fp, _index_headers["run_summary"]))
for old_log_fp in old_log_fps:
index_links.append(("Previous run log", old_log_fp, _index_headers["run_summary"]))
logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config)
input_fps = [biom_fp, mapping_fp]
if tree_fp is not None:
input_fps.append(tree_fp)
log_input_md5s(logger, input_fps)
# run 'biom summarize-table' on input BIOM table
try:
params_str = get_params_str(params["biom-summarize-table"])
except KeyError:
params_str = ""
biom_table_stats_output_fp = "%s/biom_table_summary.txt" % output_dir
if not exists(biom_table_stats_output_fp):
biom_table_summary_cmd = "biom summarize-table -i %s -o %s --suppress-md5 %s" % (
biom_fp,
biom_table_stats_output_fp,
params_str,
)
commands.append([("Generate BIOM table summary", biom_table_summary_cmd)])
else:
logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" % biom_table_stats_output_fp)
index_links.append(("BIOM table statistics", biom_table_stats_output_fp, _index_headers["run_summary"]))
# filter samples with fewer observations than the requested sampling_depth.
# since these get filtered for some analyses (eg beta diversity after
# even sampling) it's useful to filter them here so they're filtered
# from all analyses.
filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
if not exists(filtered_biom_fp):
filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" % (
biom_fp,
filtered_biom_fp,
sampling_depth,
)
commands.append(
[
(
"Filter low sequence count samples from table (minimum sequence count: %d)" % sampling_depth,
filter_samples_cmd,
)
]
)
else:
#.........这里部分代码省略.........