当前位置: 首页>>代码示例>>Python>>正文


Python util.WorkflowLogger类代码示例

本文整理汇总了Python中qiime.workflow.util.WorkflowLogger的典型用法代码示例。如果您正苦于以下问题:Python WorkflowLogger类的具体用法?Python WorkflowLogger怎么用?Python WorkflowLogger使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了WorkflowLogger类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: iterative_pick_subsampled_open_reference_otus

def iterative_pick_subsampled_open_reference_otus(
                              input_fps, 
                              refseqs_fp,
                              output_dir,
                              percent_subsample,
                              new_ref_set_id,
                              command_handler,
                              params,
                              qiime_config,
                              prefilter_refseqs_fp=None,
                              prefilter_percent_id=0.60,
                              min_otu_size=2,
                              run_assign_tax=True,
                              run_align_and_tree=True,
                              step1_otu_map_fp=None,
                              step1_failures_fasta_fp=None,
                              parallel=False,
                              suppress_step4=False,
                              logger=None,
                              suppress_md5=False,
                              denovo_otu_picking_method='uclust',
                              reference_otu_picking_method='uclust_ref',
                              status_update_callback=print_to_stdout):
    """ Call the pick_subsampled_open_reference_otus workflow on multiple inputs
         and handle processing of the results.
    """
    create_dir(output_dir)
    commands = []
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False
    
    # if the user has not passed a different reference collection for the pre-filter,
    # used the input refseqs_fp for all iterations. we want to pre-filter all data against
    # the input data as lower percent identity searches with uclust can be slow, so we 
    # want the reference collection to stay at a reasonable size.
    if prefilter_refseqs_fp == None:
       prefilter_refseqs_fp = refseqs_fp
    
    otu_table_fps = []
    repset_fasta_fps = []
    for i,input_fp in enumerate(input_fps):
        iteration_output_dir = '%s/%d/' % (output_dir,i)
        if iteration_output_exists(iteration_output_dir,min_otu_size):
            # if the output from an iteration already exists, skip that 
            # iteration (useful for continuing failed runs)
            log_input_md5s(logger,[input_fp,refseqs_fp])
            logger.write('Iteration %d (input file: %s) output data already exists. '
                         'Skipping and moving to next.\n\n' % (i,input_fp))
        else:
            pick_subsampled_open_reference_otus(input_fp=input_fp,
                                     refseqs_fp=refseqs_fp,
                                     output_dir=iteration_output_dir,
                                     percent_subsample=percent_subsample,
                                     new_ref_set_id='.'.join([new_ref_set_id,str(i)]),
                                     command_handler=command_handler,
                                     params=params,
                                     qiime_config=qiime_config,
                                     run_assign_tax=False,
                                     run_align_and_tree=False,
                                     prefilter_refseqs_fp=prefilter_refseqs_fp,
                                     prefilter_percent_id=prefilter_percent_id,
                                     min_otu_size=min_otu_size,
                                     step1_otu_map_fp=step1_otu_map_fp,
                                     step1_failures_fasta_fp=step1_failures_fasta_fp,
                                     parallel=parallel,
                                     suppress_step4=suppress_step4,
                                     logger=logger,
                                     suppress_md5=suppress_md5,
                                     denovo_otu_picking_method=denovo_otu_picking_method,
                                     reference_otu_picking_method=reference_otu_picking_method,
                                     status_update_callback=status_update_callback)
        ## perform post-iteration file shuffling whether the previous iteration's
        ## data previously existed or was just computed.
        # step1 otu map and failures can only be used for the first iteration
        # as subsequent iterations need to use updated refseqs files
        step1_otu_map_fp = step1_failures_fasta_fp = None
        new_refseqs_fp = '%s/new_refseqs.fna' % iteration_output_dir
        refseqs_fp = new_refseqs_fp
        otu_table_fps.append('%s/otu_table_mc%d.biom' % (iteration_output_dir,min_otu_size))
        repset_fasta_fps.append('%s/rep_set.fna' % iteration_output_dir)
    
    # Merge OTU tables - check for existence first as this step has historically
    # been a frequent failure, so is sometimes run manually in failed runs.
    otu_table_fp = '%s/otu_table_mc%d.biom' % (output_dir,min_otu_size)
    if not (exists(otu_table_fp) and getsize(otu_table_fp) > 0):
        merge_cmd = 'merge_otu_tables.py -i %s -o %s' %\
         (','.join(otu_table_fps),otu_table_fp)        
        commands.append([("Merge OTU tables",merge_cmd)])
    
    # Build master rep set
    final_repset_fp = '%s/rep_set.fna' % output_dir
    final_repset_from_iteration_repsets_fps(repset_fasta_fps,final_repset_fp)
    
    command_handler(commands,
            status_update_callback,
#.........这里部分代码省略.........
开发者ID:Jorge-C,项目名称:qiime,代码行数:101,代码来源:pick_open_reference_otus.py

示例2: pick_subsampled_open_reference_otus

def pick_subsampled_open_reference_otus(input_fp, 
                              refseqs_fp,
                              output_dir,
                              percent_subsample,
                              new_ref_set_id,
                              command_handler,
                              params,
                              qiime_config,
                              prefilter_refseqs_fp=None,
                              run_assign_tax=True,
                              run_align_and_tree=True,
                              prefilter_percent_id=0.60,
                              min_otu_size=2,
                              step1_otu_map_fp=None,
                              step1_failures_fasta_fp=None,
                              parallel=False,
                              suppress_step4=False,
                              logger=None,
                              suppress_md5=False,
                              denovo_otu_picking_method='uclust',
                              reference_otu_picking_method='uclust_ref',
                              status_update_callback=print_to_stdout):
    """ Run the data preparation steps of Qiime 
    
        The steps performed by this function are:
          - Pick reference OTUs against refseqs_fp
          - Subsample the failures to n sequences.
          - Pick OTUs de novo on the n failures.
          - Pick representative sequences for the resulting OTUs.
          - Pick reference OTUs on all failures using the 
             representative set from step 4 as the reference set.
    
    """
    # for now only allowing uclust for otu picking
    allowed_denovo_otu_picking_methods = ['uclust','usearch61']
    allowed_reference_otu_picking_methods = ['uclust_ref','usearch61_ref']
    assert denovo_otu_picking_method in allowed_denovo_otu_picking_methods,\
     "Unknown de novo OTU picking method: %s. Known methods are: %s"\
     % (denovo_otu_picking_method,
        ','.join(allowed_denovo_otu_picking_methods))

    assert reference_otu_picking_method in allowed_reference_otu_picking_methods,\
     "Unknown reference OTU picking method: %s. Known methods are: %s"\
     % (reference_otu_picking_method,
        ','.join(allowed_reference_otu_picking_methods))
    
    # Prepare some variables for the later steps
    input_dir, input_filename = split(input_fp)
    input_basename, input_ext = splitext(input_filename)
    create_dir(output_dir)
    commands = []
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False
    
    if not suppress_md5:
        log_input_md5s(logger,[input_fp,
                               refseqs_fp,
                               step1_otu_map_fp,
                               step1_failures_fasta_fp])
    
    # if the user has not passed a different reference collection for the pre-filter,
    # used the main refseqs_fp. this is useful if the user wants to provide a smaller
    # reference collection, or to use the input reference collection when running in 
    # iterative mode (rather than an iteration's new refseqs)
    if prefilter_refseqs_fp == None:
       prefilter_refseqs_fp = refseqs_fp
    
    ## Step 1: Closed-reference OTU picking on the input file (if not already complete)
    if step1_otu_map_fp and step1_failures_fasta_fp:
        step1_dir = '%s/step1_otus' % output_dir
        create_dir(step1_dir)
        logger.write("Using pre-existing reference otu map and failures.\n\n")
    else:
        if prefilter_percent_id != None:
            prefilter_dir = '%s/prefilter_otus/' % output_dir
            prefilter_failures_list_fp = '%s/%s_failures.txt' % \
             (prefilter_dir,input_basename)
            prefilter_pick_otu_cmd = pick_reference_otus(\
             input_fp,prefilter_dir,reference_otu_picking_method,
             prefilter_refseqs_fp,parallel,params,logger,prefilter_percent_id)
            commands.append([('Pick Reference OTUs (prefilter)', prefilter_pick_otu_cmd)])
            
            prefiltered_input_fp = '%s/prefiltered_%s%s' %\
             (prefilter_dir,input_basename,input_ext)
            filter_fasta_cmd = 'filter_fasta.py -f %s -o %s -s %s -n' %\
             (input_fp,prefiltered_input_fp,prefilter_failures_list_fp)
            commands.append([('Filter prefilter failures from input', filter_fasta_cmd)])
            
            input_fp = prefiltered_input_fp
            input_dir, input_filename = split(input_fp)
            input_basename, input_ext = splitext(input_filename)
            
        ## Build the OTU picking command
        step1_dir = \
         '%s/step1_otus' % output_dir
#.........这里部分代码省略.........
开发者ID:Jorge-C,项目名称:qiime,代码行数:101,代码来源:pick_open_reference_otus.py

示例3: pick_subsampled_open_reference_otus

def pick_subsampled_open_reference_otus(input_fp,
                                        refseqs_fp,
                                        output_dir,
                                        percent_subsample,
                                        new_ref_set_id,
                                        command_handler,
                                        params,
                                        qiime_config,
                                        prefilter_refseqs_fp=None,
                                        run_assign_tax=True,
                                        run_align_and_tree=True,
                                        prefilter_percent_id=None,
                                        min_otu_size=2,
                                        step1_otu_map_fp=None,
                                        step1_failures_fasta_fp=None,
                                        parallel=False,
                                        suppress_step4=False,
                                        logger=None,
                                        suppress_md5=False,
                                        suppress_index_page=False,
                                        denovo_otu_picking_method='uclust',
                                        reference_otu_picking_method='uclust_ref',
                                        status_update_callback=print_to_stdout,
                                        minimum_failure_threshold=100000):
    """ Run the data preparation steps of Qiime

        The steps performed by this function are:
          - Pick reference OTUs against refseqs_fp
          - Subsample the failures to n sequences.
          - Pick OTUs de novo on the n failures.
          - Pick representative sequences for the resulting OTUs.
          - Pick reference OTUs on all failures using the
             representative set from step 4 as the reference set.

    """
    # for now only allowing uclust/usearch/sortmerna+sumaclust for otu picking
    allowed_denovo_otu_picking_methods = ['uclust', 'usearch61', 'sumaclust']
    allowed_reference_otu_picking_methods = ['uclust_ref', 'usearch61_ref',
                                             'sortmerna']
    assert denovo_otu_picking_method in allowed_denovo_otu_picking_methods,\
        "Unknown de novo OTU picking method: %s. Known methods are: %s"\
        % (denovo_otu_picking_method,
           ','.join(allowed_denovo_otu_picking_methods))

    assert reference_otu_picking_method in allowed_reference_otu_picking_methods,\
        "Unknown reference OTU picking method: %s. Known methods are: %s"\
        % (reference_otu_picking_method,
           ','.join(allowed_reference_otu_picking_methods))

    # Prepare some variables for the later steps
    index_links = []
    input_dir, input_filename = split(input_fp)
    input_basename, input_ext = splitext(input_filename)
    create_dir(output_dir)
    commands = []
    if logger is None:
        log_fp = generate_log_fp(output_dir)
        logger = WorkflowLogger(log_fp,
                                params=params,
                                qiime_config=qiime_config)

        close_logger_on_success = True
        index_links.append(
                ('Run summary data',
                log_fp,
                _index_headers['run_summary']))
    else:
        close_logger_on_success = False


    if not suppress_md5:
        log_input_md5s(logger, [input_fp,
                                refseqs_fp,
                                step1_otu_map_fp,
                                step1_failures_fasta_fp])

    # if the user has not passed a different reference collection for the pre-filter,
    # used the main refseqs_fp. this is useful if the user wants to provide a smaller
    # reference collection, or to use the input reference collection when running in
    # iterative mode (rather than an iteration's new refseqs)
    if prefilter_refseqs_fp is None:
        prefilter_refseqs_fp = refseqs_fp

    # Step 1: Closed-reference OTU picking on the input file (if not already
    # complete)
    if step1_otu_map_fp and step1_failures_fasta_fp:
        step1_dir = '%s/step1_otus' % output_dir
        create_dir(step1_dir)
        logger.write("Using pre-existing reference otu map and failures.\n\n")
    else:
        if prefilter_percent_id is not None:
            prefilter_dir = '%s/prefilter_otus/' % output_dir
            prefilter_failures_list_fp = '%s/%s_failures.txt' % \
                (prefilter_dir, input_basename)
            prefilter_pick_otu_cmd = pick_reference_otus(
                input_fp, prefilter_dir, reference_otu_picking_method,
                prefilter_refseqs_fp, parallel, params, logger, prefilter_percent_id)
            commands.append(
                [('Pick Reference OTUs (prefilter)', prefilter_pick_otu_cmd)])

#.........这里部分代码省略.........
开发者ID:Springbudder,项目名称:qiime,代码行数:101,代码来源:pick_open_reference_otus.py

示例4: run_pick_closed_reference_otus

def run_pick_closed_reference_otus(
                              input_fp, 
                              refseqs_fp,
                              output_dir,
                              taxonomy_fp,
                              command_handler,
                              params,
                              qiime_config,
                              parallel=False,
                              logger=None,
                              suppress_md5=False,
                              status_update_callback=print_to_stdout):
    """ Run the data preparation steps of Qiime 
    
        The steps performed by this function are:
          1) Pick OTUs;
          2) Build an OTU table with optional pre-defined taxonmy.
    
    """
    
    # confirm that a valid otu picking method was supplied before doing
    # any work
    reference_otu_picking_methods = ['blast','uclust_ref','usearch61_ref']

    try:
        otu_picking_method = params['pick_otus']['otu_picking_method']
    except KeyError:
        otu_picking_method = 'uclust_ref'
    assert otu_picking_method in reference_otu_picking_methods,\
     "Invalid OTU picking method supplied: %s. Valid choices are: %s"\
     % (otu_picking_method,' '.join(reference_otu_picking_methods))
    
    # Prepare some variables for the later steps
    input_dir, input_filename = split(input_fp)
    input_basename, input_ext = splitext(input_filename)
    create_dir(output_dir)
    commands = []
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    if not suppress_md5:
        log_input_md5s(logger,[input_fp,refseqs_fp,taxonomy_fp])

    # Prep the OTU picking command
    pick_otu_dir = '%s/%s_picked_otus' % (output_dir, otu_picking_method)
    otu_fp = '%s/%s_otus.txt' % (pick_otu_dir,input_basename)
    if parallel and (otu_picking_method == 'blast' or 
                     otu_picking_method == 'uclust_ref' or
                     otu_picking_method == 'usearch61_ref'):
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params['parallel'])
        except KeyError:
            params_str = ''
        
        # Grab the OTU picker parameters
        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --alignment_method
            # option. This works for now though.
            d = params['pick_otus'].copy()
            if 'otu_picking_method' in d:
                del d['otu_picking_method']
            params_str += ' %s' % get_params_str(d)
        except KeyError:
            pass
        otu_picking_script = 'parallel_pick_otus_%s.py' % otu_picking_method
        # Build the OTU picking command
        pick_otus_cmd = '%s %s/%s -i %s -o %s -r %s -T %s' %\
          (python_exe_fp, 
           script_dir, 
           otu_picking_script,
           input_fp,
           pick_otu_dir,
           refseqs_fp,
           params_str)
    else:
        try:
            params_str = get_params_str(params['pick_otus'])
        except KeyError:
            params_str = ''
        # Since this is reference-based OTU picking we always want to
        # suppress new clusters -- force it here.
        params_str+= ' --suppress_new_clusters'
        logger.write("Forcing --suppress_new_clusters as this is closed-reference OTU picking.\n\n")
        # Build the OTU picking command
        pick_otus_cmd = '%s %s/pick_otus.py -i %s -o %s -r %s -m %s %s' %\
         (python_exe_fp,
          script_dir,
          input_fp,
          pick_otu_dir,
          refseqs_fp,
          otu_picking_method,
#.........这里部分代码省略.........
开发者ID:rob-knight,项目名称:qiime,代码行数:101,代码来源:upstream.py

示例5: pick_nested_reference_otus

def pick_nested_reference_otus(input_fasta_fp,
                              input_tree_fp,
                              output_dir,
                              run_id,
                              similarity_thresholds,
                              command_handler,
                              status_update_callback=print_to_stdout):


    # Prepare some variables for the later steps
    create_dir(output_dir)
    otu_dir = join(output_dir,'otus')
    create_dir(otu_dir)
    rep_set_dir = join(output_dir,'rep_set')
    create_dir(rep_set_dir)
    # currently not doing anything with taxonomies and trees
    # tax_dir = join(output_dir,'taxonomies')
    # create_dir(tax_dir)
    if input_tree_fp:
        tree_dir = join(output_dir,'trees')
        create_dir(tree_dir)
    commands = []
    files_to_remove = []
    
    logger = WorkflowLogger(generate_log_fp(output_dir))
    similarity_thresholds.sort()
    similarity_thresholds.reverse()
    
    current_inseqs_fp = input_fasta_fp
    current_tree_fp = input_tree_fp
    previous_otu_map = None
    for similarity_threshold in similarity_thresholds:
        current_inseqs_basename = splitext(split(current_inseqs_fp)[1])[0]
        
        # pick otus command
        otu_fp = '%s/%d_otu_map.txt' % (otu_dir,similarity_threshold)
        clusters_fp = '%s/%d_clusters.uc' % (otu_dir,similarity_threshold)
        temp_otu_fp = '%s/%s_otus.txt' % (otu_dir, current_inseqs_basename)
        temp_log_fp = '%s/%s_otus.log' % (otu_dir, current_inseqs_basename)
        temp_clusters_fp = '%s/%s_clusters.uc' % (otu_dir, current_inseqs_basename)
        pick_otus_cmd = \
         'pick_otus.py -m uclust -DBz -i %s -s %1.2f -o %s' % (
           current_inseqs_fp,
           similarity_threshold/100,
           otu_dir)
        
        commands.append([('Pick OTUs (%d)' % similarity_threshold,
                          pick_otus_cmd)])
        commands.append([('Rename OTU file (%d)' % similarity_threshold,
                          'mv %s %s' % (temp_otu_fp,otu_fp))])
        commands.append([('Rename uc file (%d)' % similarity_threshold,
                          'mv %s %s' % (temp_clusters_fp,clusters_fp))])
        files_to_remove.append(temp_log_fp)
        
        # rep set picking
        temp_rep_set_fp = get_tmp_filename(prefix='NestedReference',
                                           suffix='.fasta')
        pick_rep_set_cmd = \
         'pick_rep_set.py -m first -i %s -o %s -f %s' % (
          otu_fp, 
          temp_rep_set_fp,
          current_inseqs_fp)
        commands.append([('Pick Rep Set (%d)' % similarity_threshold,
                           pick_rep_set_cmd)])
        command_handler(commands, status_update_callback, logger, close_logger_on_success=False)
        commands = []
        
        # rename representative sequences
        rep_set_fp = '%s/%d_otus_%s.fasta' % (
          rep_set_dir,
          similarity_threshold,
          run_id)
        logger.write('Renaming OTU representative sequences so OTU ids are reference sequence ids.')
        rep_set_f = open(rep_set_fp,'w')
        for e in rename_rep_seqs(open(temp_rep_set_fp,'U')):
            rep_set_f.write('>%s\n%s\n' % e)
        rep_set_f.close()
        files_to_remove.append(temp_rep_set_fp)
        
        # filter the tree, if provided
        if current_tree_fp != None:
            tree_fp = '%s/%d_otus_%s.tre' % (
              tree_dir,
              similarity_threshold,
              run_id)
            tree_cmd = 'filter_tree.py -i %s -f %s -o %s' %\
               (current_tree_fp,rep_set_fp,tree_fp)
            commands.append([('Filter tree (%d)' % similarity_threshold,tree_cmd)])
            command_handler(commands, status_update_callback, logger, close_logger_on_success=False)
            # prep for the next iteration
            current_tree_fp = tree_fp
        
        
        # prep for the next iteration
        remove_files(files_to_remove)
        commands = []
        files_to_remove = []
        current_inseqs_fp = rep_set_fp
        
    logger.close()
开发者ID:infotroph,项目名称:nested_reference_otus,代码行数:100,代码来源:nested_reference_workflow.py

示例6: run_core_diversity_analyses

def run_core_diversity_analyses(
    biom_fp,
    mapping_fp,
    sampling_depth,
    output_dir,
    qiime_config,
    command_handler=call_commands_serially,
    tree_fp=None,
    params=None,
    categories=None,
    arare_min_rare_depth=10,
    arare_num_steps=10,
    parallel=False,
    suppress_taxa_summary=False,
    suppress_beta_diversity=False,
    suppress_alpha_diversity=False,
    suppress_otu_category_significance=False,
    status_update_callback=print_to_stdout):
    """
    """
    if categories != None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = \
         parse_mapping_file_to_dict(open(mapping_fp,'U'))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError, ("Category '%s' is not a column header "
                 "in your mapping file. "
                 "Categories are case and white space sensitive. Valid "
                 "choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames)))
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError, ("Category '%s' contains only one value. "
                 "Categories analyzed here require at least two values." % c)
            
    else:
        categories= []
    
    # prep some variables
    if params == None:
        params = parse_qiime_parameters([])
        
    create_dir(output_dir)
    index_fp = '%s/index.html' % output_dir
    index_links = []
    commands = []
    
    # begin logging
    old_log_fps = glob(join(output_dir,'log_20*txt'))
    log_fp = generate_log_fp(output_dir)
    index_links.append(('Master run log',log_fp,_index_headers['run_summary']))
    for old_log_fp in old_log_fps:
        index_links.append(('Previous run log',old_log_fp,_index_headers['run_summary']))
    logger = WorkflowLogger(log_fp,
                            params=params,
                            qiime_config=qiime_config)
    input_fps = [biom_fp,mapping_fp]
    if tree_fp != None:
        input_fps.append(tree_fp)
    log_input_md5s(logger,input_fps)

    # run 'biom summarize-table' on input BIOM table
    try:
        params_str = get_params_str(params['biom-summarize-table'])
    except KeyError:
        params_str = ''
    biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir
    if not exists(biom_table_stats_output_fp):
        biom_table_summary_cmd = \
         "biom summarize-table -i %s -o %s --suppress-md5 %s" % \
         (biom_fp, biom_table_stats_output_fp,params_str)
        commands.append([('Generate BIOM table summary',
                          biom_table_summary_cmd)])
    else:
        logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" \
                     % biom_table_stats_output_fp)
    index_links.append(('BIOM table statistics',
                        biom_table_stats_output_fp,
                        _index_headers['run_summary']))
    
    # filter samples with fewer observations than the requested sampling_depth. 
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered 
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    if not exists(filtered_biom_fp):
        filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\
         (biom_fp,filtered_biom_fp,sampling_depth)
        commands.append([('Filter low sequence count samples from table (minimum sequence count: %d)' % sampling_depth,
                          filter_samples_cmd)])
    else:
        logger.write("Skipping filter_samples_from_otu_table.py as %s exists.\n\n" \
                     % filtered_biom_fp)
    biom_fp = filtered_biom_fp
    
    # run initial commands and reset the command list
    if len(commands) > 0:
        command_handler(commands, 
                        status_update_callback, 
                        logger,
#.........这里部分代码省略.........
开发者ID:jasonbogovich,项目名称:qiime,代码行数:101,代码来源:core_diversity_analyses.py

示例7: assign_taxonomy_multiple_times

def assign_taxonomy_multiple_times(input_dirs, output_dir, assignment_methods,
        reference_seqs_fp, id_to_taxonomy_fp,
        confidences=None, e_values=None, rtax_modes=None,
        uclust_min_consensus_fractions=None, uclust_similarities=None,
        uclust_max_accepts=None, input_fasta_filename='rep_set.fna',
        clean_otu_table_filename='otu_table_mc2_no_pynast_failures.biom',
        read_1_seqs_filename='seqs1.fna', read_2_seqs_filename='seqs2.fna',
        rtax_read_id_regexes=None, rtax_amplicon_id_regexes=None,
        rtax_header_id_regexes=None, rdp_max_memory=4000,
        command_handler=call_commands_serially,
        status_update_callback=no_status_updates, force=False):
    """ Performs sanity checks on passed arguments and directories. Builds 
        commands for each method and sends them off to be executed. """
    ## Check if output directory exists
    try:
        create_dir(output_dir, fail_on_exist=not force)
    except OSError:
        raise WorkflowError("Output directory '%s' already exists. Please "
                "choose a different directory, or force overwrite with -f."
                % output_dir)

    logger = WorkflowLogger(generate_log_fp(output_dir))

    # We're going to zip these with the input directories.
    num_dirs = len(input_dirs)
    if rtax_read_id_regexes is None:
        rtax_read_id_regexes = [None] * num_dirs
    if rtax_amplicon_id_regexes is None:
        rtax_amplicon_id_regexes = [None] * num_dirs
    if rtax_header_id_regexes is None:
        rtax_header_id_regexes = [None] * num_dirs

    if num_dirs != len(rtax_read_id_regexes) or \
       num_dirs != len(rtax_amplicon_id_regexes) or \
       num_dirs != len(rtax_header_id_regexes):
        raise WorkflowError("The number of RTAX regular expressions must "
                            "match the number of input directories.")

    for input_dir, rtax_read_id_regex, rtax_amplicon_id_regex, \
            rtax_header_id_regex in zip(input_dirs, rtax_read_id_regexes,
                         rtax_amplicon_id_regexes, rtax_header_id_regexes):
        ## Make sure the input dataset directory exists.
        if not isdir(input_dir):
            raise WorkflowError("The input dataset directory '%s' does not "
                                "exist." % input_dir)

        input_dir_name = split(normpath(input_dir))[1]
        output_dataset_dir = join(output_dir, input_dir_name)
        input_fasta_fp = join(input_dir, input_fasta_filename)
        clean_otu_table_fp = join(input_dir, clean_otu_table_filename)
        read_1_seqs_fp = join(input_dir, read_1_seqs_filename)
        read_2_seqs_fp = join(input_dir, read_2_seqs_filename)

        logger.write("\nCreating output subdirectory '%s' if it doesn't "
                     "already exist.\n" % output_dataset_dir)
        create_dir(output_dataset_dir)

        for method in assignment_methods:
            ## Method is RDP
            if method == 'rdp':
                ## Check for execution parameters required by RDP method
                if confidences is None:
                    raise WorkflowError("You must specify at least one "
                                        "confidence level.")
                ## Generate command for RDP
                commands = _generate_rdp_commands(output_dataset_dir,
                                                  input_fasta_fp,
                                                  reference_seqs_fp,
                                                  id_to_taxonomy_fp,
                                                  clean_otu_table_fp,
                                                  confidences,
                                                  rdp_max_memory=rdp_max_memory)
                        
            ## Method is BLAST
            elif method == 'blast':
                ## Check for execution parameters required by BLAST method
                if e_values is None:
                    raise WorkflowError("You must specify at least one "
                                        "E-value.")
                ## Generate command for BLAST
                commands = _generate_blast_commands(output_dataset_dir,
                                                    input_fasta_fp,
                                                    reference_seqs_fp,
                                                    id_to_taxonomy_fp,
                                                    clean_otu_table_fp,
                                                    e_values)
                        
            ## Method is Mothur
            elif method == 'mothur':
                ## Check for execution parameters required by Mothur method
                if confidences is None:
                    raise WorkflowError("You must specify at least one "
                                        "confidence level.")
                ## Generate command for mothur
                commands = _generate_mothur_commands(output_dataset_dir,
                                                     input_fasta_fp,
                                                     reference_seqs_fp,
                                                     id_to_taxonomy_fp,
                                                     clean_otu_table_fp,
                                                     confidences)
#.........这里部分代码省略.........
开发者ID:ebolyen,项目名称:short-read-tax-assignment,代码行数:101,代码来源:multiple_assign_taxonomy.py

示例8: pick_subsampled_open_reference_otus

def pick_subsampled_open_reference_otus(
    input_fp,
    refseqs_fp,
    output_dir,
    percent_subsample,
    new_ref_set_id,
    command_handler,
    params,
    qiime_config,
    prefilter_refseqs_fp=None,
    run_assign_tax=True,
    run_align_and_tree=True,
    prefilter_percent_id=0.60,
    min_otu_size=2,
    step1_otu_map_fp=None,
    step1_failures_fasta_fp=None,
    parallel=False,
    suppress_step4=False,
    logger=None,
    suppress_md5=False,
    denovo_otu_picking_method="uclust",
    reference_otu_picking_method="uclust_ref",
    status_update_callback=print_to_stdout,
):
    """ Run the data preparation steps of Qiime

        The steps performed by this function are:
          - Pick reference OTUs against refseqs_fp
          - Subsample the failures to n sequences.
          - Pick OTUs de novo on the n failures.
          - Pick representative sequences for the resulting OTUs.
          - Pick reference OTUs on all failures using the
             representative set from step 4 as the reference set.

    """
    # for now only allowing uclust for otu picking
    allowed_denovo_otu_picking_methods = ["uclust", "usearch61"]
    allowed_reference_otu_picking_methods = ["uclust_ref", "usearch61_ref"]
    assert denovo_otu_picking_method in allowed_denovo_otu_picking_methods, (
        "Unknown de novo OTU picking method: %s. Known methods are: %s"
        % (denovo_otu_picking_method, ",".join(allowed_denovo_otu_picking_methods))
    )

    assert reference_otu_picking_method in allowed_reference_otu_picking_methods, (
        "Unknown reference OTU picking method: %s. Known methods are: %s"
        % (reference_otu_picking_method, ",".join(allowed_reference_otu_picking_methods))
    )

    # Prepare some variables for the later steps
    input_dir, input_filename = split(input_fp)
    input_basename, input_ext = splitext(input_filename)
    create_dir(output_dir)
    commands = []
    if logger is None:
        logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    if not suppress_md5:
        log_input_md5s(logger, [input_fp, refseqs_fp, step1_otu_map_fp, step1_failures_fasta_fp])

    # if the user has not passed a different reference collection for the pre-filter,
    # used the main refseqs_fp. this is useful if the user wants to provide a smaller
    # reference collection, or to use the input reference collection when running in
    # iterative mode (rather than an iteration's new refseqs)
    if prefilter_refseqs_fp is None:
        prefilter_refseqs_fp = refseqs_fp

    # Step 1: Closed-reference OTU picking on the input file (if not already
    # complete)
    if step1_otu_map_fp and step1_failures_fasta_fp:
        step1_dir = "%s/step1_otus" % output_dir
        create_dir(step1_dir)
        logger.write("Using pre-existing reference otu map and failures.\n\n")
    else:
        if prefilter_percent_id is not None:
            prefilter_dir = "%s/prefilter_otus/" % output_dir
            prefilter_failures_list_fp = "%s/%s_failures.txt" % (prefilter_dir, input_basename)
            prefilter_pick_otu_cmd = pick_reference_otus(
                input_fp,
                prefilter_dir,
                reference_otu_picking_method,
                prefilter_refseqs_fp,
                parallel,
                params,
                logger,
                prefilter_percent_id,
            )
            commands.append([("Pick Reference OTUs (prefilter)", prefilter_pick_otu_cmd)])

            prefiltered_input_fp = "%s/prefiltered_%s%s" % (prefilter_dir, input_basename, input_ext)
            filter_fasta_cmd = "filter_fasta.py -f %s -o %s -s %s -n" % (
                input_fp,
                prefiltered_input_fp,
                prefilter_failures_list_fp,
            )
            commands.append([("Filter prefilter failures from input", filter_fasta_cmd)])

            # Call the command handler on the list of commands
#.........这里部分代码省略.........
开发者ID:justin212k,项目名称:qiime,代码行数:101,代码来源:pick_open_reference_otus.py

示例9: create_personal_results

def create_personal_results(output_dir,
                            mapping_fp,
                            coord_fp,
                            collated_dir,
                            otu_table_fp,
                            prefs_fp,
                            personal_id_column,
                            personal_ids=None,
                            column_title='Self',
                            individual_titles=None,
                            category_to_split='BodySite',
                            time_series_category='WeeksSinceStart',
                            rarefaction_depth=10000,
                            alpha=0.05,
                            rep_set_fp=None,
                            body_site_rarefied_otu_table_dir=None,
                            retain_raw_data=False,
                            suppress_alpha_rarefaction=False,
                            suppress_beta_diversity=False,
                            suppress_taxa_summary_plots=False,
                            suppress_alpha_diversity_boxplots=False,
                            suppress_otu_category_significance=False,
                            command_handler=call_commands_serially,
                            status_update_callback=no_status_updates):
    # Create our output directory and copy over the resources the personalized
    # pages need (e.g. javascript, images, etc.).
    create_dir(output_dir)

    support_files_dir = join(output_dir, 'support_files')
    if not exists(support_files_dir):
        copytree(join(get_project_dir(), 'my_microbes', 'support_files'),
                 support_files_dir)

    logger = WorkflowLogger(generate_log_fp(output_dir))

    mapping_data, header, comments = parse_mapping_file(open(mapping_fp, 'U'))
    try:
        personal_id_index = header.index(personal_id_column)
    except ValueError:
        raise ValueError("Personal ID field '%s' is not a mapping file column "
                         "header." % personal_id_column)
    try:
        bodysite_index = header.index(category_to_split)
    except ValueError:
        raise ValueError("Category to split field '%s' is not a mapping file "
            "column header." % category_to_split)

    header = header[:-1] + [column_title] + [header[-1]]

    # column that differentiates between body-sites within a single individual
    # used for the creation of the vectors in make_3d_plots.py, this data is
    # created by concatenating the two columns when writing the mapping file
    site_id_category = '%s&&%s' % (personal_id_column, category_to_split)
    header.insert(len(header)-1, site_id_category)

    all_personal_ids = get_personal_ids(mapping_data, personal_id_index)
    if personal_ids == None: 
        personal_ids = all_personal_ids
    else:
        for pid in personal_ids:
            if pid not in all_personal_ids:
                raise ValueError("'%s' is not a personal ID in the mapping "
                                 "file column '%s'." %
                                 (pid, personal_id_column))

    if time_series_category not in header:
        raise ValueError("Time series field '%s' is not a mapping file column "
                         "header." % time_series_category)

    otu_table_title = splitext(basename(otu_table_fp))

    output_directories = []
    raw_data_files = []
    raw_data_dirs = []

    # Rarefy the OTU table and split by body site here (instead of on a
    # per-individual basis) as we can use the same rarefied and split tables
    # for each individual.
    if not suppress_otu_category_significance:
        rarefied_otu_table_fp = join(output_dir,
                add_filename_suffix(otu_table_fp,
                                    '_even%d' % rarefaction_depth))

        if body_site_rarefied_otu_table_dir is None:
            commands = []
            cmd_title = 'Rarefying OTU table'
            cmd = 'single_rarefaction.py -i %s -o %s -d %s' % (otu_table_fp,
                    rarefied_otu_table_fp, rarefaction_depth)
            commands.append([(cmd_title, cmd)])
            raw_data_files.append(rarefied_otu_table_fp)

            per_body_site_dir = join(output_dir, 'per_body_site_otu_tables')

            cmd_title = 'Splitting rarefied OTU table by body site'
            cmd = 'split_otu_table.py -i %s -m %s -f %s -o %s' % (
                    rarefied_otu_table_fp, mapping_fp, category_to_split,
                    per_body_site_dir)
            commands.append([(cmd_title, cmd)])
            raw_data_dirs.append(per_body_site_dir)

#.........这里部分代码省略.........
开发者ID:biocore,项目名称:my-microbes,代码行数:101,代码来源:util.py

示例10: run_pick_closed_reference_otus

def run_pick_closed_reference_otus(
        input_fp,
        refseqs_fp,
        output_dir,
        taxonomy_fp,
        command_handler,
        params,
        qiime_config,
        assign_taxonomy=False,
        parallel=False,
        logger=None,
        suppress_md5=False,
        status_update_callback=print_to_stdout):
    """ Run the data preparation steps of Qiime

        The steps performed by this function are:
          1) Pick OTUs;
          2) If assignment_taxonomy is True, choose representative sequence
             for OTUs and assign taxonomy using a classifier.
          3) Build an OTU table with optional predefined taxonomy
             (if assign_taxonomy=False) or taxonomic assignments from step 2
             (if assign_taxonomy=True).

    """

    # confirm that a valid otu picking method was supplied before doing
    # any work
    reference_otu_picking_methods = ['blast', 'uclust_ref', 'usearch61_ref',
                                     'usearch_ref', 'sortmerna']

    try:
        otu_picking_method = params['pick_otus']['otu_picking_method']
    except KeyError:
        otu_picking_method = 'uclust_ref'
    assert otu_picking_method in reference_otu_picking_methods,\
        "Invalid OTU picking method supplied: %s. Valid choices are: %s"\
        % (otu_picking_method, ' '.join(reference_otu_picking_methods))

    # Prepare some variables for the later steps
    input_dir, input_filename = split(input_fp)
    input_basename, input_ext = splitext(input_filename)
    create_dir(output_dir)
    commands = []
    if logger is None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    if not suppress_md5:
        log_input_md5s(logger, [input_fp, refseqs_fp, taxonomy_fp])

    # Prep the OTU picking command
    pick_otu_dir = '%s/%s_picked_otus' % (output_dir, otu_picking_method)
    otu_fp = '%s/%s_otus.txt' % (pick_otu_dir, input_basename)
    if parallel and (otu_picking_method == 'blast' or
                     otu_picking_method == 'uclust_ref' or
                     otu_picking_method == 'usearch61_ref' or
                     otu_picking_method == 'sortmerna'):
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params['parallel'])
        except KeyError:
            params_str = ''

        # Grab the OTU picker parameters
        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --alignment_method
            # option. This works for now though.
            d = params['pick_otus'].copy()
            if 'otu_picking_method' in d:
                del d['otu_picking_method']
            params_str += ' %s' % get_params_str(d)
        except KeyError:
            pass
        otu_picking_script = 'parallel_pick_otus_%s.py' % otu_picking_method
        # Build the OTU picking command
        pick_otus_cmd = '%s -i %s -o %s -r %s -T %s' %\
            (otu_picking_script,
             input_fp,
             pick_otu_dir,
             refseqs_fp,
             params_str)
    else:
        try:
            params_str = get_params_str(params['pick_otus'])
        except KeyError:
            params_str = ''
        # Since this is reference-based OTU picking we always want to
        # suppress new clusters -- force it here.
        params_str += ' --suppress_new_clusters'
        logger.write(
            "Forcing --suppress_new_clusters as this is "
            "closed-reference OTU picking.\n\n")
        # Build the OTU picking command
        pick_otus_cmd = 'pick_otus.py -i %s -o %s -r %s -m %s %s' %\
            (input_fp,
#.........这里部分代码省略.........
开发者ID:ElDeveloper,项目名称:qiime,代码行数:101,代码来源:upstream.py

示例11: run_core_diversity_analyses

def run_core_diversity_analyses(
    biom_fp,
    mapping_fp,
    sampling_depth,
    output_dir,
    qiime_config,
    command_handler=call_commands_serially,
    tree_fp=None,
    params=None,
    categories=None,
    arare_min_rare_depth=10,
    arare_num_steps=10,
    parallel=False,
    suppress_taxa_summary=False,
    suppress_beta_diversity=False,
    suppress_alpha_diversity=False,
    suppress_group_significance=False,
    status_update_callback=print_to_stdout,
):
    """
    """
    if categories is not None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = parse_mapping_file_to_dict(open(mapping_fp, "U"))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError(
                    "Category '%s' is not a column header "
                    "in your mapping file. "
                    "Categories are case and white space sensitive. Valid "
                    "choices are: (%s)" % (c, ", ".join(metadata_map.CategoryNames))
                )
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError(
                    "Category '%s' contains only one value. "
                    "Categories analyzed here require at least two values." % c
                )

    else:
        categories = []
    comma_separated_categories = ",".join(categories)
    # prep some variables
    if params is None:
        params = parse_qiime_parameters([])

    create_dir(output_dir)
    index_fp = "%s/index.html" % output_dir
    index_links = []
    commands = []

    # begin logging
    old_log_fps = glob(join(output_dir, "log_20*txt"))
    log_fp = generate_log_fp(output_dir)
    index_links.append(("Master run log", log_fp, _index_headers["run_summary"]))
    for old_log_fp in old_log_fps:
        index_links.append(("Previous run log", old_log_fp, _index_headers["run_summary"]))
    logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config)
    input_fps = [biom_fp, mapping_fp]
    if tree_fp is not None:
        input_fps.append(tree_fp)
    log_input_md5s(logger, input_fps)

    # run 'biom summarize-table' on input BIOM table
    try:
        params_str = get_params_str(params["biom-summarize-table"])
    except KeyError:
        params_str = ""
    biom_table_stats_output_fp = "%s/biom_table_summary.txt" % output_dir
    if not exists(biom_table_stats_output_fp):
        biom_table_summary_cmd = "biom summarize-table -i %s -o %s --suppress-md5 %s" % (
            biom_fp,
            biom_table_stats_output_fp,
            params_str,
        )
        commands.append([("Generate BIOM table summary", biom_table_summary_cmd)])
    else:
        logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" % biom_table_stats_output_fp)
    index_links.append(("BIOM table statistics", biom_table_stats_output_fp, _index_headers["run_summary"]))

    # filter samples with fewer observations than the requested sampling_depth.
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    if not exists(filtered_biom_fp):
        filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" % (
            biom_fp,
            filtered_biom_fp,
            sampling_depth,
        )
        commands.append(
            [
                (
                    "Filter low sequence count samples from table (minimum sequence count: %d)" % sampling_depth,
                    filter_samples_cmd,
                )
            ]
        )
    else:
#.........这里部分代码省略.........
开发者ID:EESI,项目名称:qiime,代码行数:101,代码来源:core_diversity_analyses.py


注:本文中的qiime.workflow.util.WorkflowLogger类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。