本文整理汇总了Python中qiime.workflow.util.WorkflowLogger.close方法的典型用法代码示例。如果您正苦于以下问题:Python WorkflowLogger.close方法的具体用法?Python WorkflowLogger.close怎么用?Python WorkflowLogger.close使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类qiime.workflow.util.WorkflowLogger
的用法示例。
在下文中一共展示了WorkflowLogger.close方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: iterative_pick_subsampled_open_reference_otus
# 需要导入模块: from qiime.workflow.util import WorkflowLogger [as 别名]
# 或者: from qiime.workflow.util.WorkflowLogger import close [as 别名]
def iterative_pick_subsampled_open_reference_otus(
input_fps,
refseqs_fp,
output_dir,
percent_subsample,
new_ref_set_id,
command_handler,
params,
qiime_config,
prefilter_refseqs_fp=None,
prefilter_percent_id=0.60,
min_otu_size=2,
run_assign_tax=True,
run_align_and_tree=True,
step1_otu_map_fp=None,
step1_failures_fasta_fp=None,
parallel=False,
suppress_step4=False,
logger=None,
suppress_md5=False,
denovo_otu_picking_method='uclust',
reference_otu_picking_method='uclust_ref',
status_update_callback=print_to_stdout):
""" Call the pick_subsampled_open_reference_otus workflow on multiple inputs
and handle processing of the results.
"""
create_dir(output_dir)
commands = []
if logger == None:
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params,
qiime_config=qiime_config)
close_logger_on_success = True
else:
close_logger_on_success = False
# if the user has not passed a different reference collection for the pre-filter,
# used the input refseqs_fp for all iterations. we want to pre-filter all data against
# the input data as lower percent identity searches with uclust can be slow, so we
# want the reference collection to stay at a reasonable size.
if prefilter_refseqs_fp == None:
prefilter_refseqs_fp = refseqs_fp
otu_table_fps = []
repset_fasta_fps = []
for i,input_fp in enumerate(input_fps):
iteration_output_dir = '%s/%d/' % (output_dir,i)
if iteration_output_exists(iteration_output_dir,min_otu_size):
# if the output from an iteration already exists, skip that
# iteration (useful for continuing failed runs)
log_input_md5s(logger,[input_fp,refseqs_fp])
logger.write('Iteration %d (input file: %s) output data already exists. '
'Skipping and moving to next.\n\n' % (i,input_fp))
else:
pick_subsampled_open_reference_otus(input_fp=input_fp,
refseqs_fp=refseqs_fp,
output_dir=iteration_output_dir,
percent_subsample=percent_subsample,
new_ref_set_id='.'.join([new_ref_set_id,str(i)]),
command_handler=command_handler,
params=params,
qiime_config=qiime_config,
run_assign_tax=False,
run_align_and_tree=False,
prefilter_refseqs_fp=prefilter_refseqs_fp,
prefilter_percent_id=prefilter_percent_id,
min_otu_size=min_otu_size,
step1_otu_map_fp=step1_otu_map_fp,
step1_failures_fasta_fp=step1_failures_fasta_fp,
parallel=parallel,
suppress_step4=suppress_step4,
logger=logger,
suppress_md5=suppress_md5,
denovo_otu_picking_method=denovo_otu_picking_method,
reference_otu_picking_method=reference_otu_picking_method,
status_update_callback=status_update_callback)
## perform post-iteration file shuffling whether the previous iteration's
## data previously existed or was just computed.
# step1 otu map and failures can only be used for the first iteration
# as subsequent iterations need to use updated refseqs files
step1_otu_map_fp = step1_failures_fasta_fp = None
new_refseqs_fp = '%s/new_refseqs.fna' % iteration_output_dir
refseqs_fp = new_refseqs_fp
otu_table_fps.append('%s/otu_table_mc%d.biom' % (iteration_output_dir,min_otu_size))
repset_fasta_fps.append('%s/rep_set.fna' % iteration_output_dir)
# Merge OTU tables - check for existence first as this step has historically
# been a frequent failure, so is sometimes run manually in failed runs.
otu_table_fp = '%s/otu_table_mc%d.biom' % (output_dir,min_otu_size)
if not (exists(otu_table_fp) and getsize(otu_table_fp) > 0):
merge_cmd = 'merge_otu_tables.py -i %s -o %s' %\
(','.join(otu_table_fps),otu_table_fp)
commands.append([("Merge OTU tables",merge_cmd)])
# Build master rep set
final_repset_fp = '%s/rep_set.fna' % output_dir
final_repset_from_iteration_repsets_fps(repset_fasta_fps,final_repset_fp)
command_handler(commands,
status_update_callback,
#.........这里部分代码省略.........
示例2: pick_subsampled_open_reference_otus
# 需要导入模块: from qiime.workflow.util import WorkflowLogger [as 别名]
# 或者: from qiime.workflow.util.WorkflowLogger import close [as 别名]
def pick_subsampled_open_reference_otus(input_fp,
refseqs_fp,
output_dir,
percent_subsample,
new_ref_set_id,
command_handler,
params,
qiime_config,
prefilter_refseqs_fp=None,
run_assign_tax=True,
run_align_and_tree=True,
prefilter_percent_id=0.60,
min_otu_size=2,
step1_otu_map_fp=None,
step1_failures_fasta_fp=None,
parallel=False,
suppress_step4=False,
logger=None,
suppress_md5=False,
denovo_otu_picking_method='uclust',
reference_otu_picking_method='uclust_ref',
status_update_callback=print_to_stdout):
""" Run the data preparation steps of Qiime
The steps performed by this function are:
- Pick reference OTUs against refseqs_fp
- Subsample the failures to n sequences.
- Pick OTUs de novo on the n failures.
- Pick representative sequences for the resulting OTUs.
- Pick reference OTUs on all failures using the
representative set from step 4 as the reference set.
"""
# for now only allowing uclust for otu picking
allowed_denovo_otu_picking_methods = ['uclust','usearch61']
allowed_reference_otu_picking_methods = ['uclust_ref','usearch61_ref']
assert denovo_otu_picking_method in allowed_denovo_otu_picking_methods,\
"Unknown de novo OTU picking method: %s. Known methods are: %s"\
% (denovo_otu_picking_method,
','.join(allowed_denovo_otu_picking_methods))
assert reference_otu_picking_method in allowed_reference_otu_picking_methods,\
"Unknown reference OTU picking method: %s. Known methods are: %s"\
% (reference_otu_picking_method,
','.join(allowed_reference_otu_picking_methods))
# Prepare some variables for the later steps
input_dir, input_filename = split(input_fp)
input_basename, input_ext = splitext(input_filename)
create_dir(output_dir)
commands = []
if logger == None:
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params,
qiime_config=qiime_config)
close_logger_on_success = True
else:
close_logger_on_success = False
if not suppress_md5:
log_input_md5s(logger,[input_fp,
refseqs_fp,
step1_otu_map_fp,
step1_failures_fasta_fp])
# if the user has not passed a different reference collection for the pre-filter,
# used the main refseqs_fp. this is useful if the user wants to provide a smaller
# reference collection, or to use the input reference collection when running in
# iterative mode (rather than an iteration's new refseqs)
if prefilter_refseqs_fp == None:
prefilter_refseqs_fp = refseqs_fp
## Step 1: Closed-reference OTU picking on the input file (if not already complete)
if step1_otu_map_fp and step1_failures_fasta_fp:
step1_dir = '%s/step1_otus' % output_dir
create_dir(step1_dir)
logger.write("Using pre-existing reference otu map and failures.\n\n")
else:
if prefilter_percent_id != None:
prefilter_dir = '%s/prefilter_otus/' % output_dir
prefilter_failures_list_fp = '%s/%s_failures.txt' % \
(prefilter_dir,input_basename)
prefilter_pick_otu_cmd = pick_reference_otus(\
input_fp,prefilter_dir,reference_otu_picking_method,
prefilter_refseqs_fp,parallel,params,logger,prefilter_percent_id)
commands.append([('Pick Reference OTUs (prefilter)', prefilter_pick_otu_cmd)])
prefiltered_input_fp = '%s/prefiltered_%s%s' %\
(prefilter_dir,input_basename,input_ext)
filter_fasta_cmd = 'filter_fasta.py -f %s -o %s -s %s -n' %\
(input_fp,prefiltered_input_fp,prefilter_failures_list_fp)
commands.append([('Filter prefilter failures from input', filter_fasta_cmd)])
input_fp = prefiltered_input_fp
input_dir, input_filename = split(input_fp)
input_basename, input_ext = splitext(input_filename)
## Build the OTU picking command
step1_dir = \
'%s/step1_otus' % output_dir
#.........这里部分代码省略.........
示例3: run_core_diversity_analyses
# 需要导入模块: from qiime.workflow.util import WorkflowLogger [as 别名]
# 或者: from qiime.workflow.util.WorkflowLogger import close [as 别名]
#.........这里部分代码省略.........
if not exists(biom_table_stats_output_fp):
biom_table_summary_cmd = \
"biom summarize-table -i %s -o %s --suppress-md5 %s" % \
(biom_fp, biom_table_stats_output_fp,params_str)
commands.append([('Generate BIOM table summary',
biom_table_summary_cmd)])
else:
logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" \
% biom_table_stats_output_fp)
index_links.append(('BIOM table statistics',
biom_table_stats_output_fp,
_index_headers['run_summary']))
# filter samples with fewer observations than the requested sampling_depth.
# since these get filtered for some analyses (eg beta diversity after
# even sampling) it's useful to filter them here so they're filtered
# from all analyses.
filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
if not exists(filtered_biom_fp):
filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\
(biom_fp,filtered_biom_fp,sampling_depth)
commands.append([('Filter low sequence count samples from table (minimum sequence count: %d)' % sampling_depth,
filter_samples_cmd)])
else:
logger.write("Skipping filter_samples_from_otu_table.py as %s exists.\n\n" \
% filtered_biom_fp)
biom_fp = filtered_biom_fp
# run initial commands and reset the command list
if len(commands) > 0:
command_handler(commands,
status_update_callback,
logger,
close_logger_on_success=False)
commands = []
if not suppress_beta_diversity:
bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir,sampling_depth)
# Need to check for the existence of any distance matrices, since the user
# can select which will be generated.
existing_dm_fps = glob('%s/*_dm.txt' % bdiv_even_output_dir)
if len(existing_dm_fps) == 0:
even_dm_fps = run_beta_diversity_through_plots(
otu_table_fp=biom_fp,
mapping_fp=mapping_fp,
output_dir=bdiv_even_output_dir,
command_handler=command_handler,
params=params,
qiime_config=qiime_config,
sampling_depth=sampling_depth,
tree_fp=tree_fp,
parallel=parallel,
logger=logger,
suppress_md5=True,
status_update_callback=status_update_callback)
else:
logger.write("Skipping beta_diversity_through_plots.py as %s exist(s).\n\n" \
% ', '.join(existing_dm_fps))
even_dm_fps = [(split(fp)[1].strip('_dm.txt'),fp) for fp in existing_dm_fps]
# Get make_distance_boxplots parameters
try:
params_str = get_params_str(params['make_distance_boxplots'])
except KeyError:
params_str = ''
示例4: pick_subsampled_open_reference_otus
# 需要导入模块: from qiime.workflow.util import WorkflowLogger [as 别名]
# 或者: from qiime.workflow.util.WorkflowLogger import close [as 别名]
def pick_subsampled_open_reference_otus(input_fp,
refseqs_fp,
output_dir,
percent_subsample,
new_ref_set_id,
command_handler,
params,
qiime_config,
prefilter_refseqs_fp=None,
run_assign_tax=True,
run_align_and_tree=True,
prefilter_percent_id=None,
min_otu_size=2,
step1_otu_map_fp=None,
step1_failures_fasta_fp=None,
parallel=False,
suppress_step4=False,
logger=None,
suppress_md5=False,
suppress_index_page=False,
denovo_otu_picking_method='uclust',
reference_otu_picking_method='uclust_ref',
status_update_callback=print_to_stdout,
minimum_failure_threshold=100000):
""" Run the data preparation steps of Qiime
The steps performed by this function are:
- Pick reference OTUs against refseqs_fp
- Subsample the failures to n sequences.
- Pick OTUs de novo on the n failures.
- Pick representative sequences for the resulting OTUs.
- Pick reference OTUs on all failures using the
representative set from step 4 as the reference set.
"""
# for now only allowing uclust/usearch/sortmerna+sumaclust for otu picking
allowed_denovo_otu_picking_methods = ['uclust', 'usearch61', 'sumaclust']
allowed_reference_otu_picking_methods = ['uclust_ref', 'usearch61_ref',
'sortmerna']
assert denovo_otu_picking_method in allowed_denovo_otu_picking_methods,\
"Unknown de novo OTU picking method: %s. Known methods are: %s"\
% (denovo_otu_picking_method,
','.join(allowed_denovo_otu_picking_methods))
assert reference_otu_picking_method in allowed_reference_otu_picking_methods,\
"Unknown reference OTU picking method: %s. Known methods are: %s"\
% (reference_otu_picking_method,
','.join(allowed_reference_otu_picking_methods))
# Prepare some variables for the later steps
index_links = []
input_dir, input_filename = split(input_fp)
input_basename, input_ext = splitext(input_filename)
create_dir(output_dir)
commands = []
if logger is None:
log_fp = generate_log_fp(output_dir)
logger = WorkflowLogger(log_fp,
params=params,
qiime_config=qiime_config)
close_logger_on_success = True
index_links.append(
('Run summary data',
log_fp,
_index_headers['run_summary']))
else:
close_logger_on_success = False
if not suppress_md5:
log_input_md5s(logger, [input_fp,
refseqs_fp,
step1_otu_map_fp,
step1_failures_fasta_fp])
# if the user has not passed a different reference collection for the pre-filter,
# used the main refseqs_fp. this is useful if the user wants to provide a smaller
# reference collection, or to use the input reference collection when running in
# iterative mode (rather than an iteration's new refseqs)
if prefilter_refseqs_fp is None:
prefilter_refseqs_fp = refseqs_fp
# Step 1: Closed-reference OTU picking on the input file (if not already
# complete)
if step1_otu_map_fp and step1_failures_fasta_fp:
step1_dir = '%s/step1_otus' % output_dir
create_dir(step1_dir)
logger.write("Using pre-existing reference otu map and failures.\n\n")
else:
if prefilter_percent_id is not None:
prefilter_dir = '%s/prefilter_otus/' % output_dir
prefilter_failures_list_fp = '%s/%s_failures.txt' % \
(prefilter_dir, input_basename)
prefilter_pick_otu_cmd = pick_reference_otus(
input_fp, prefilter_dir, reference_otu_picking_method,
prefilter_refseqs_fp, parallel, params, logger, prefilter_percent_id)
commands.append(
[('Pick Reference OTUs (prefilter)', prefilter_pick_otu_cmd)])
#.........这里部分代码省略.........
示例5: pick_nested_reference_otus
# 需要导入模块: from qiime.workflow.util import WorkflowLogger [as 别名]
# 或者: from qiime.workflow.util.WorkflowLogger import close [as 别名]
def pick_nested_reference_otus(input_fasta_fp,
input_tree_fp,
output_dir,
run_id,
similarity_thresholds,
command_handler,
status_update_callback=print_to_stdout):
# Prepare some variables for the later steps
create_dir(output_dir)
otu_dir = join(output_dir,'otus')
create_dir(otu_dir)
rep_set_dir = join(output_dir,'rep_set')
create_dir(rep_set_dir)
# currently not doing anything with taxonomies and trees
# tax_dir = join(output_dir,'taxonomies')
# create_dir(tax_dir)
if input_tree_fp:
tree_dir = join(output_dir,'trees')
create_dir(tree_dir)
commands = []
files_to_remove = []
logger = WorkflowLogger(generate_log_fp(output_dir))
similarity_thresholds.sort()
similarity_thresholds.reverse()
current_inseqs_fp = input_fasta_fp
current_tree_fp = input_tree_fp
previous_otu_map = None
for similarity_threshold in similarity_thresholds:
current_inseqs_basename = splitext(split(current_inseqs_fp)[1])[0]
# pick otus command
otu_fp = '%s/%d_otu_map.txt' % (otu_dir,similarity_threshold)
clusters_fp = '%s/%d_clusters.uc' % (otu_dir,similarity_threshold)
temp_otu_fp = '%s/%s_otus.txt' % (otu_dir, current_inseqs_basename)
temp_log_fp = '%s/%s_otus.log' % (otu_dir, current_inseqs_basename)
temp_clusters_fp = '%s/%s_clusters.uc' % (otu_dir, current_inseqs_basename)
pick_otus_cmd = \
'pick_otus.py -m uclust -DBz -i %s -s %1.2f -o %s' % (
current_inseqs_fp,
similarity_threshold/100,
otu_dir)
commands.append([('Pick OTUs (%d)' % similarity_threshold,
pick_otus_cmd)])
commands.append([('Rename OTU file (%d)' % similarity_threshold,
'mv %s %s' % (temp_otu_fp,otu_fp))])
commands.append([('Rename uc file (%d)' % similarity_threshold,
'mv %s %s' % (temp_clusters_fp,clusters_fp))])
files_to_remove.append(temp_log_fp)
# rep set picking
temp_rep_set_fp = get_tmp_filename(prefix='NestedReference',
suffix='.fasta')
pick_rep_set_cmd = \
'pick_rep_set.py -m first -i %s -o %s -f %s' % (
otu_fp,
temp_rep_set_fp,
current_inseqs_fp)
commands.append([('Pick Rep Set (%d)' % similarity_threshold,
pick_rep_set_cmd)])
command_handler(commands, status_update_callback, logger, close_logger_on_success=False)
commands = []
# rename representative sequences
rep_set_fp = '%s/%d_otus_%s.fasta' % (
rep_set_dir,
similarity_threshold,
run_id)
logger.write('Renaming OTU representative sequences so OTU ids are reference sequence ids.')
rep_set_f = open(rep_set_fp,'w')
for e in rename_rep_seqs(open(temp_rep_set_fp,'U')):
rep_set_f.write('>%s\n%s\n' % e)
rep_set_f.close()
files_to_remove.append(temp_rep_set_fp)
# filter the tree, if provided
if current_tree_fp != None:
tree_fp = '%s/%d_otus_%s.tre' % (
tree_dir,
similarity_threshold,
run_id)
tree_cmd = 'filter_tree.py -i %s -f %s -o %s' %\
(current_tree_fp,rep_set_fp,tree_fp)
commands.append([('Filter tree (%d)' % similarity_threshold,tree_cmd)])
command_handler(commands, status_update_callback, logger, close_logger_on_success=False)
# prep for the next iteration
current_tree_fp = tree_fp
# prep for the next iteration
remove_files(files_to_remove)
commands = []
files_to_remove = []
current_inseqs_fp = rep_set_fp
logger.close()
示例6: assign_taxonomy_multiple_times
# 需要导入模块: from qiime.workflow.util import WorkflowLogger [as 别名]
# 或者: from qiime.workflow.util.WorkflowLogger import close [as 别名]
#.........这里部分代码省略.........
## Generate command for RDP
commands = _generate_rdp_commands(output_dataset_dir,
input_fasta_fp,
reference_seqs_fp,
id_to_taxonomy_fp,
clean_otu_table_fp,
confidences,
rdp_max_memory=rdp_max_memory)
## Method is BLAST
elif method == 'blast':
## Check for execution parameters required by BLAST method
if e_values is None:
raise WorkflowError("You must specify at least one "
"E-value.")
## Generate command for BLAST
commands = _generate_blast_commands(output_dataset_dir,
input_fasta_fp,
reference_seqs_fp,
id_to_taxonomy_fp,
clean_otu_table_fp,
e_values)
## Method is Mothur
elif method == 'mothur':
## Check for execution parameters required by Mothur method
if confidences is None:
raise WorkflowError("You must specify at least one "
"confidence level.")
## Generate command for mothur
commands = _generate_mothur_commands(output_dataset_dir,
input_fasta_fp,
reference_seqs_fp,
id_to_taxonomy_fp,
clean_otu_table_fp,
confidences)
## Method is RTAX
elif method == 'rtax':
## Check for execution parameters required by RTAX method
if rtax_modes is None:
raise WorkflowError("You must specify at least one mode "
"to run RTAX in.")
for mode in rtax_modes:
if mode not in ['single', 'paired']:
raise WorkflowError("Invalid rtax mode '%s'. Must be "
"'single' or 'paired'." % mode)
## Generate command for rtax
commands = _generate_rtax_commands(output_dataset_dir,
input_fasta_fp,
reference_seqs_fp,
id_to_taxonomy_fp,
clean_otu_table_fp,
rtax_modes,
read_1_seqs_fp,
read_2_seqs_fp,
rtax_read_id_regex,
rtax_amplicon_id_regex,
rtax_header_id_regex)
## Method is uclust
elif method == 'uclust':
## Check for execution parameters required by uclust method
if uclust_min_consensus_fractions is None:
raise WorkflowError("You must specify at least one uclust "
"minimum consensus fraction.")
if uclust_similarities is None:
raise WorkflowError("You must specify at least one uclust "
"similarity.")
if uclust_max_accepts is None:
raise WorkflowError("You must specify at least one uclust "
"max accepts.")
## Generate command for uclust
commands = _generate_uclust_commands(output_dataset_dir,
input_fasta_fp,
reference_seqs_fp,
id_to_taxonomy_fp,
clean_otu_table_fp,
uclust_min_consensus_fractions,
uclust_similarities,
uclust_max_accepts)
## Unsupported method
else:
raise WorkflowError("Unrecognized or unsupported taxonomy "
"assignment method '%s'." % method)
# send command for current method to command handler
for command in commands:
start = time()
# call_commands_serially needs a list of commands so here's a
# length one commmand list.
command_handler([command], status_update_callback, logger,
close_logger_on_success=False)
end = time()
logger.write('Time (s): %d\n\n' % (end - start))
logger.close()
示例7: pick_subsampled_open_reference_otus
# 需要导入模块: from qiime.workflow.util import WorkflowLogger [as 别名]
# 或者: from qiime.workflow.util.WorkflowLogger import close [as 别名]
def pick_subsampled_open_reference_otus(
input_fp,
refseqs_fp,
output_dir,
percent_subsample,
new_ref_set_id,
command_handler,
params,
qiime_config,
prefilter_refseqs_fp=None,
run_assign_tax=True,
run_align_and_tree=True,
prefilter_percent_id=0.60,
min_otu_size=2,
step1_otu_map_fp=None,
step1_failures_fasta_fp=None,
parallel=False,
suppress_step4=False,
logger=None,
suppress_md5=False,
denovo_otu_picking_method="uclust",
reference_otu_picking_method="uclust_ref",
status_update_callback=print_to_stdout,
):
""" Run the data preparation steps of Qiime
The steps performed by this function are:
- Pick reference OTUs against refseqs_fp
- Subsample the failures to n sequences.
- Pick OTUs de novo on the n failures.
- Pick representative sequences for the resulting OTUs.
- Pick reference OTUs on all failures using the
representative set from step 4 as the reference set.
"""
# for now only allowing uclust for otu picking
allowed_denovo_otu_picking_methods = ["uclust", "usearch61"]
allowed_reference_otu_picking_methods = ["uclust_ref", "usearch61_ref"]
assert denovo_otu_picking_method in allowed_denovo_otu_picking_methods, (
"Unknown de novo OTU picking method: %s. Known methods are: %s"
% (denovo_otu_picking_method, ",".join(allowed_denovo_otu_picking_methods))
)
assert reference_otu_picking_method in allowed_reference_otu_picking_methods, (
"Unknown reference OTU picking method: %s. Known methods are: %s"
% (reference_otu_picking_method, ",".join(allowed_reference_otu_picking_methods))
)
# Prepare some variables for the later steps
input_dir, input_filename = split(input_fp)
input_basename, input_ext = splitext(input_filename)
create_dir(output_dir)
commands = []
if logger is None:
logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config)
close_logger_on_success = True
else:
close_logger_on_success = False
if not suppress_md5:
log_input_md5s(logger, [input_fp, refseqs_fp, step1_otu_map_fp, step1_failures_fasta_fp])
# if the user has not passed a different reference collection for the pre-filter,
# used the main refseqs_fp. this is useful if the user wants to provide a smaller
# reference collection, or to use the input reference collection when running in
# iterative mode (rather than an iteration's new refseqs)
if prefilter_refseqs_fp is None:
prefilter_refseqs_fp = refseqs_fp
# Step 1: Closed-reference OTU picking on the input file (if not already
# complete)
if step1_otu_map_fp and step1_failures_fasta_fp:
step1_dir = "%s/step1_otus" % output_dir
create_dir(step1_dir)
logger.write("Using pre-existing reference otu map and failures.\n\n")
else:
if prefilter_percent_id is not None:
prefilter_dir = "%s/prefilter_otus/" % output_dir
prefilter_failures_list_fp = "%s/%s_failures.txt" % (prefilter_dir, input_basename)
prefilter_pick_otu_cmd = pick_reference_otus(
input_fp,
prefilter_dir,
reference_otu_picking_method,
prefilter_refseqs_fp,
parallel,
params,
logger,
prefilter_percent_id,
)
commands.append([("Pick Reference OTUs (prefilter)", prefilter_pick_otu_cmd)])
prefiltered_input_fp = "%s/prefiltered_%s%s" % (prefilter_dir, input_basename, input_ext)
filter_fasta_cmd = "filter_fasta.py -f %s -o %s -s %s -n" % (
input_fp,
prefiltered_input_fp,
prefilter_failures_list_fp,
)
commands.append([("Filter prefilter failures from input", filter_fasta_cmd)])
# Call the command handler on the list of commands
#.........这里部分代码省略.........
示例8: create_personal_results
# 需要导入模块: from qiime.workflow.util import WorkflowLogger [as 别名]
# 或者: from qiime.workflow.util.WorkflowLogger import close [as 别名]
#.........这里部分代码省略.........
otu_table_title = splitext(basename(otu_table_fp))
output_directories = []
raw_data_files = []
raw_data_dirs = []
# Rarefy the OTU table and split by body site here (instead of on a
# per-individual basis) as we can use the same rarefied and split tables
# for each individual.
if not suppress_otu_category_significance:
rarefied_otu_table_fp = join(output_dir,
add_filename_suffix(otu_table_fp,
'_even%d' % rarefaction_depth))
if body_site_rarefied_otu_table_dir is None:
commands = []
cmd_title = 'Rarefying OTU table'
cmd = 'single_rarefaction.py -i %s -o %s -d %s' % (otu_table_fp,
rarefied_otu_table_fp, rarefaction_depth)
commands.append([(cmd_title, cmd)])
raw_data_files.append(rarefied_otu_table_fp)
per_body_site_dir = join(output_dir, 'per_body_site_otu_tables')
cmd_title = 'Splitting rarefied OTU table by body site'
cmd = 'split_otu_table.py -i %s -m %s -f %s -o %s' % (
rarefied_otu_table_fp, mapping_fp, category_to_split,
per_body_site_dir)
commands.append([(cmd_title, cmd)])
raw_data_dirs.append(per_body_site_dir)
command_handler(commands, status_update_callback, logger,
close_logger_on_success=False)
else:
per_body_site_dir = body_site_rarefied_otu_table_dir
for person_of_interest in personal_ids:
# Files to clean up on a per-individual basis.
personal_raw_data_files = []
personal_raw_data_dirs = []
create_dir(join(output_dir, person_of_interest))
personal_mapping_file_fp = join(output_dir, person_of_interest,
'mapping_file.txt')
html_fp = join(output_dir, person_of_interest, 'index.html')
personal_mapping_data = create_personal_mapping_file(mapping_data,
person_of_interest, personal_id_index, bodysite_index,
individual_titles)
personal_mapping_f = open(personal_mapping_file_fp, 'w')
personal_mapping_f.write(
format_mapping_file(header, personal_mapping_data, comments))
personal_mapping_f.close()
personal_raw_data_files.append(personal_mapping_file_fp)
column_title_index = header.index(column_title)
column_title_values = set([e[column_title_index]
for e in personal_mapping_data])
cat_index = header.index(category_to_split)
cat_values = set([e[cat_index] for e in personal_mapping_data])
# Generate alpha diversity boxplots, split by body site, one per
# metric. We run this one first because it completes relatively
示例9: run_core_diversity_analyses
# 需要导入模块: from qiime.workflow.util import WorkflowLogger [as 别名]
# 或者: from qiime.workflow.util.WorkflowLogger import close [as 别名]
#.........这里部分代码省略.........
# filter samples with fewer observations than the requested sampling_depth.
# since these get filtered for some analyses (eg beta diversity after
# even sampling) it's useful to filter them here so they're filtered
# from all analyses.
filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
if not exists(filtered_biom_fp):
filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" % (
biom_fp,
filtered_biom_fp,
sampling_depth,
)
commands.append(
[
(
"Filter low sequence count samples from table (minimum sequence count: %d)" % sampling_depth,
filter_samples_cmd,
)
]
)
else:
logger.write("Skipping filter_samples_from_otu_table.py as %s exists.\n\n" % filtered_biom_fp)
biom_fp = filtered_biom_fp
# rarify the BIOM table to sampling_depth
rarefied_biom_fp = "%s/table_even%d.biom" % (output_dir, sampling_depth)
if not exists(rarefied_biom_fp):
single_rarefaction_cmd = "single_rarefaction.py -i %s -o %s -d %d" % (biom_fp, rarefied_biom_fp, sampling_depth)
commands.append([("Rarify the OTU table to %d sequences/sample" % sampling_depth, single_rarefaction_cmd)])
else:
logger.write("Skipping single_rarefaction.py as %s exists.\n\n" % rarefied_biom_fp)
# run initial commands and reset the command list
if len(commands) > 0:
command_handler(commands, status_update_callback, logger, close_logger_on_success=False)
commands = []
if not suppress_beta_diversity:
bdiv_even_output_dir = "%s/bdiv_even%d/" % (output_dir, sampling_depth)
# Need to check for the existence of any distance matrices, since the user
# can select which will be generated.
existing_dm_fps = glob("%s/*_dm.txt" % bdiv_even_output_dir)
if len(existing_dm_fps) == 0:
even_dm_fps = run_beta_diversity_through_plots(
otu_table_fp=rarefied_biom_fp,
mapping_fp=mapping_fp,
output_dir=bdiv_even_output_dir,
command_handler=command_handler,
params=params,
qiime_config=qiime_config,
# Note: we pass sampling depth=None here as
# we rarify the BIOM table above and pass that
# in here.
sampling_depth=None,
tree_fp=tree_fp,
parallel=parallel,
logger=logger,
suppress_md5=True,
status_update_callback=status_update_callback,
)
else:
logger.write("Skipping beta_diversity_through_plots.py as %s exist(s).\n\n" % ", ".join(existing_dm_fps))
even_dm_fps = [(split(fp)[1].strip("_dm.txt"), fp) for fp in existing_dm_fps]
# Get make_distance_boxplots parameters
try:
params_str = get_params_str(params["make_distance_boxplots"])