当前位置: 首页>>代码示例>>Python>>正文


Python filter.sample_ids_from_metadata_description函数代码示例

本文整理汇总了Python中qiime.filter.sample_ids_from_metadata_description函数的典型用法代码示例。如果您正苦于以下问题:Python sample_ids_from_metadata_description函数的具体用法?Python sample_ids_from_metadata_description怎么用?Python sample_ids_from_metadata_description使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了sample_ids_from_metadata_description函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    output_f = open(opts.output_distance_matrix, 'w')
    if opts.otu_table_fp:
        otu_table = load_table(opts.otu_table_fp)
        samples_to_keep = otu_table.ids()
        # samples_to_keep = \
        # sample_ids_from_otu_table(open(opts.otu_table_fp,'U'))
    elif opts.sample_id_fp:
        samples_to_keep = \
            get_seqs_to_keep_lookup_from_seq_id_file(
                open(opts.sample_id_fp, 'U'))
    elif opts.mapping_fp and opts.valid_states:
        try:
            samples_to_keep = sample_ids_from_metadata_description(
                open(opts.mapping_fp, 'U'), opts.valid_states)
        except ValueError as e:
            option_parser.error(e.message)
    else:
        option_parser.error('must pass either --sample_id_fp, -t, or -m and '
                            '-s')
    # note that negate gets a little weird here. The function we're calling
    # removes the specified samples from the distance matrix, but the other
    # QIIME filter scripts keep these samples specified.  So, the interface of
    # this script is designed to keep the specified samples, and therefore
    # negate=True is passed to filter_samples_from_distance_matrix by default.
    d = filter_samples_from_distance_matrix(
        parse_distmat(
            open(opts.input_distance_matrix, 'U')),
        samples_to_keep,
        negate=not opts.negate)
    output_f.write(d)
    output_f.close()
开发者ID:AhmedAbdelfattah,项目名称:qiime,代码行数:34,代码来源:filter_distance_matrix.py

示例2: main

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    mapping_fp = opts.input_fp
    out_mapping_fp = opts.output_fp
    valid_states = opts.valid_states

    if opts.sample_id_fp:
        valid_sample_ids = \
         get_seqs_to_keep_lookup_from_seq_id_file(open(opts.sample_id_fp,'U'))
    elif mapping_fp and valid_states:
        valid_sample_ids = sample_ids_from_metadata_description(
            open(mapping_fp, 'U'), valid_states)

    data, headers, _ = parse_mapping_file(open(mapping_fp, 'U'))

    good_mapping_file = []
    for line in data:
        if line[0] in valid_sample_ids:
            good_mapping_file.append(line)

    lines = format_mapping_file(headers, good_mapping_file)

    fd = open(out_mapping_fp, 'w')
    fd.write(lines)
    fd.close()
开发者ID:ElDeveloper,项目名称:apocaqiime,代码行数:26,代码来源:filter_mapping_file.py

示例3: main

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    coords_fp = opts.input_coords
    mapping_fp = opts.mapping_fp
    output_fp = opts.output_fp
    valid_states = opts.valid_states
    negate = opts.negate
    mapping_header_name = opts.mapping_header_name

    coords_ids, coords, eigen_values, pct_exp = parse_coords(open(coords_fp, "U"))

    data, headers, _ = parse_mapping_file(open(mapping_fp, "U"))

    if mapping_fp and valid_states:
        valid_sample_ids = sample_ids_from_metadata_description(open(mapping_fp, "U"), valid_states)

    valid_coords_ids, valid_coords = filter_sample_ids_from_coords(coords_ids, coords, valid_sample_ids, negate)

    if mapping_header_name:
        sorted_sample_ids = sort_sample_ids(data, headers, mapping_header_name)
        sorted_coord_ids, sorted_coords = sort_coords(valid_coords_ids, valid_coords, sorted_sample_ids)
        valid_coords_ids, valid_coords = sorted_coord_ids, sorted_coords

    lines = format_coords(valid_coords_ids, valid_coords, eigen_values, pct_exp)
    fd = open(output_fp, "w")
    fd.writelines(lines)
    fd.close
开发者ID:ElDeveloper,项目名称:apocaqiime,代码行数:28,代码来源:filter_coords_from_pcoa.py

示例4: silly_function

    def silly_function(ui):
        for c_value in ui.series(coloring_values):
            sample_ids = sample_ids_from_metadata_description(open(mapping_fp, 'U'),
                '%s:%s' % (coloring_header_name, c_value))

            _headers, _data = filter_mapping_file(data, headers, sample_ids, True)
            per_color_subject_values = list(set([row[subject_index] for row in _data]))

            fd = open(join(output_path, 'color_by_'+c_value+'.txt'), 'w')
            for s in ui.series(per_color_subject_values):
                fd.write('%s\n' % s)
            fd.close()

            if not suppress_trajectory_files:
                for s in ui.series(per_color_subject_values):
                    filename = join(output_path, s+'.txt')

                    if opts.verbose:
                        print 'Working on printing', filename

                    COMMAND_CALL = FILTER_CMD % (coords_fp, mapping_fp,
                        '%s:%s' % (subject_header_name, s), filename,
                        sorting_category)
                    o, e, r = qiime_system_call(COMMAND_CALL)
                    if opts.verbose and e:
                        print 'Error happened on filtering step: \n%s' % e
                        continue

                    COMMAND_CALL = CONVERSION_CMD % (filename, filename)
                    o, e, r = qiime_system_call(COMMAND_CALL)
                    if opts.verbose and e:
                        print 'Error happened on conversion step: \n%s' % e
                        continue # useless here but just in case
开发者ID:ElDeveloper,项目名称:apocaqiime,代码行数:33,代码来源:build_input_files_for_category.py

示例5: make_profiles_by_category

def make_profiles_by_category(mapping_fp, taxa_level, category):
    """ Creates a list of profiles for each unique value in the category
    Inputs:
        mapping_fp: filepath to the mapping file
        category: mapping file category to split data over
                  defaults to HOST_SUBJECT_ID
    Returns a dictionary keyed by the values on that category and a list of 
        profiles as values
    """
    # Parse the mapping file
    map_f = open(mapping_fp, 'U')
    mapping_data, comments = parse_mapping_file_to_dict(map_f)
    map_f.close()
    # Get a list of unique keys for the specified category
    if category == 'SampleID':
        result = {}
        for sid in mapping_data:
            result[sid] = [make_profile_by_sid(mapping_data, sid, taxa_level)]
    else:
        values = set([mapping_data[sid][category] for sid in mapping_data])
        result = {}
        # Loop over each value in that category
        for value in values:
            # Re-open the mapping file
            map_f = open(mapping_fp, 'U')
            # Get sample ids that match the value
            sids = sample_ids_from_metadata_description(map_f,
                                                        category+":"+value)
            map_f.close()
            # Create the list with all the profiles of the sample IDs in this
            # category value
            result[value] = [make_profile_by_sid(mapping_data,
                                                sid, taxa_level) for sid in sids]
    return result
开发者ID:squirrelo,项目名称:SCGM,代码行数:34,代码来源:profile.py

示例6: split_mapping_file_on_field

def split_mapping_file_on_field(mapping_f,
                                mapping_field,
                                column_rename_ids=None,
                                include_repeat_cols=True):
    """ split mapping file based on value in field """

    mapping_f = list(mapping_f)
    mapping_values = get_mapping_values(mapping_f, mapping_field)

    mapping_data, mapping_headers, _ = parse_mapping_file(mapping_f)

    if column_rename_ids:
        try:
            column_rename_ids = mapping_headers.index(column_rename_ids)
        except ValueError:
            raise KeyError("Field is not in mapping file (search is case " +
                           "and white-space sensitive). \n\tProvided field: " +
                           "%s. \n\tValid fields: %s" % (mapping_field, ' '.join(mapping_headers)))

    for v in mapping_values:
        v_fp_str = v.replace(' ', '_')
        sample_ids_to_keep = sample_ids_from_metadata_description(
            mapping_f, valid_states_str="%s:%s" % (mapping_field, v))

        # parse mapping file each time though the loop as filtering operates on
        # values
        mapping_data, mapping_headers, _ = parse_mapping_file(mapping_f)
        mapping_headers, mapping_data = filter_mapping_file(
            mapping_data,
            mapping_headers,
            sample_ids_to_keep,
            include_repeat_cols=include_repeat_cols,

            column_rename_ids=column_rename_ids)
        yield v_fp_str, format_mapping_file(mapping_headers, mapping_data)
开发者ID:TheSchwa,项目名称:qiime,代码行数:35,代码来源:split.py

示例7: split_otu_table_on_sample_metadata

def split_otu_table_on_sample_metadata(otu_table, mapping_f, mapping_field):
    """ split otu table into sub otu tables where each represent samples
    corresponding to only a certain value in mapping_field
    """
    with errstate(empty='raise'):
        mapping_f = list(mapping_f)
        mapping_values = get_mapping_values(mapping_f, mapping_field)
        tables = 0

        for v in mapping_values:
            v_fp_str = v.replace(' ', '_')
            sample_ids_to_keep = sample_ids_from_metadata_description(
                mapping_f, valid_states_str="%s:%s" % (mapping_field, v))

            try:
                # filtering cannot be inplace otherwise we lose data
                filtered_otu_table = otu_table.filter(
                    lambda values, id_, metadata: id_ in sample_ids_to_keep,
                    axis='sample', inplace=False)
                tables += 1
            except TableException:
                # all samples are filtered out, so no otu table to write
                continue
            yield v_fp_str, filtered_otu_table

        if not tables:
            raise OTUTableSplitError(
                "Could not split OTU tables! There are no matches between the "
                "sample identifiers in the OTU table and the mapping file.")
开发者ID:Honglongwu,项目名称:qiime,代码行数:29,代码来源:split.py

示例8: main

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    input_fp = opts.input_fp
    output_fp = opts.output_fp

    mapping_fp = opts.mapping_fp
    output_mapping_fp = opts.output_mapping_fp
    valid_states = opts.valid_states
    min_count = opts.min_count
    max_count = opts.max_count
    sample_id_fp = opts.sample_id_fp

    if mapping_fp is None and valid_states is not None:
        option_parser.error("--mapping_fp must be provided if --valid_states " "is passed.")

    if not ((mapping_fp and valid_states) or min_count != 0 or not isinf(max_count) or sample_id_fp is not None):
        option_parser.error(
            "No filtering requested. Must provide either "
            "mapping_fp and valid states, min counts, "
            "max counts, or sample_id_fp (or some combination "
            "of those)."
        )
    if (mapping_fp and valid_states) and sample_id_fp:
        option_parser.error("Providing both --sample_id_fp and " "--mapping_fp/--valid_states is not supported.")
    if output_mapping_fp and not mapping_fp:
        option_parser.error("Must provide input mapping file to generate" " output mapping file.")

    otu_table = load_table(opts.input_fp)

    negate_sample_id_fp = opts.negate_sample_id_fp
    if mapping_fp and valid_states:
        sample_ids_to_keep = sample_ids_from_metadata_description(open(mapping_fp, "U"), valid_states)
        negate_sample_id_fp = False
    else:
        sample_ids_to_keep = otu_table.ids()

        if sample_id_fp is not None:
            o = open(sample_id_fp, "U")
            sample_id_f_ids = set([l.strip().split()[0] for l in o if not l.startswith("#")])
            o.close()
            sample_ids_to_keep = set(sample_ids_to_keep) & sample_id_f_ids

    filtered_otu_table = filter_samples_from_otu_table(
        otu_table, sample_ids_to_keep, min_count, max_count, negate_ids_to_keep=negate_sample_id_fp
    )

    try:
        write_biom_table(filtered_otu_table, output_fp)
    except EmptyBIOMTableError:
        option_parser.error(
            "Filtering resulted in an empty BIOM table. " "This indicates that no samples remained after filtering."
        )

    # filter mapping file if requested
    if output_mapping_fp:
        mapping_data, mapping_headers, _ = parse_mapping_file(open(mapping_fp, "U"))
        mapping_headers, mapping_data = filter_mapping_file(mapping_data, mapping_headers, filtered_otu_table.ids())
        open(output_mapping_fp, "w").write(format_mapping_file(mapping_headers, mapping_data))
开发者ID:colinbrislawn,项目名称:qiime,代码行数:59,代码来源:filter_samples_from_otu_table.py

示例9: main

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    input_fp = opts.input_fp
    output_fp = opts.output_fp

    mapping_fp = opts.mapping_fp
    output_mapping_fp = opts.output_mapping_fp
    valid_states = opts.valid_states
    min_count = opts.min_count
    max_count = opts.max_count
    sample_id_fp = opts.sample_id_fp

    if not ((mapping_fp and valid_states) or
            min_count != 0 or
            not isinf(max_count) or
            sample_id_fp is not None):
        option_parser.error("No filtering requested. Must provide either "
                            "mapping_fp and valid states, min counts, "
                            "max counts, or sample_id_fp (or some combination "
                            "of those).")
    if output_mapping_fp and not mapping_fp:
        option_parser.error("Must provide input mapping file to generate"
                            " output mapping file.")

    otu_table =  load_table(opts.input_fp)

    if mapping_fp and valid_states:
        sample_ids_to_keep = sample_ids_from_metadata_description(
            open(mapping_fp, 'U'), valid_states)
    else:
        sample_ids_to_keep = otu_table.ids()

    if sample_id_fp is not None:
        sample_id_f_ids = set([l.strip().split()[0]
                              for l in open(sample_id_fp, 'U') if not l.startswith('#')])
        sample_ids_to_keep = set(sample_ids_to_keep) & sample_id_f_ids

    filtered_otu_table = filter_samples_from_otu_table(otu_table,
                                                       sample_ids_to_keep,
                                                       min_count,
                                                       max_count)
    write_biom_table(filtered_otu_table, output_fp)

    # filter mapping file if requested
    if output_mapping_fp:
        mapping_data, mapping_headers, _ = parse_mapping_file(
            open(mapping_fp, 'U'))
        mapping_headers, mapping_data = \
            filter_mapping_file(
                mapping_data,
                mapping_headers,
                filtered_otu_table.ids())
        open(
            output_mapping_fp,
            'w').write(
            format_mapping_file(
                mapping_headers,
                mapping_data))
开发者ID:cmokeefe,项目名称:qiime,代码行数:59,代码来源:filter_samples_from_otu_table.py

示例10: get_seqs_to_keep_lookup_from_mapping_file

def get_seqs_to_keep_lookup_from_mapping_file(fasta_f, mapping_f, valid_states):
    sample_ids = {}.fromkeys(sample_ids_from_metadata_description(mapping_f, valid_states))
    seqs_to_keep = []
    for seq_id, seq in parse_fasta(fasta_f):
        if seq_id.split("_")[0] in sample_ids:
            seqs_to_keep.append(seq_id)
        else:
            continue
    return {}.fromkeys(seqs_to_keep)
开发者ID:Bonder-MJ,项目名称:qiime,代码行数:9,代码来源:filter_fasta.py

示例11: split_otu_table_on_sample_metadata

def split_otu_table_on_sample_metadata(otu_table_f, mapping_f, mapping_field):
    """ split otu table into sub otu tables where each represent samples corresponding to only a certain value in mapping_field
    """
    mapping_f = list(mapping_f)
    mapping_values = get_mapping_values(mapping_f, mapping_field)
    otu_table = parse_biom_table(otu_table_f)

    for v in mapping_values:
        v_fp_str = v.replace(' ', '_')
        sample_ids_to_keep = sample_ids_from_metadata_description(
            mapping_f, valid_states_str="%s:%s" % (mapping_field, v))

        try:
            filtered_otu_table = otu_table.filterSamples(
                lambda values, id_, metadata: id_ in sample_ids_to_keep)
        except TableException:
            # all samples are filtered out, so no otu table to write
            continue
        yield v_fp_str, format_biom_table(filtered_otu_table)
开发者ID:TheSchwa,项目名称:qiime,代码行数:19,代码来源:split.py

示例12: main

def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    output_f = open(opts.output_distance_matrix,'w')
    if opts.otu_table_fp:
        otu_table = parse_biom_table(open(opts.otu_table_fp,'U'))
        samples_to_keep = otu_table.SampleIds
        #samples_to_keep = \
        # sample_ids_from_otu_table(open(opts.otu_table_fp,'U'))
    elif opts.sample_id_fp:
        samples_to_keep = \
         get_seqs_to_keep_lookup_from_seq_id_file(open(opts.sample_id_fp,'U'))
    elif opts.mapping_fp and opts.valid_states:
        try:
            samples_to_keep = sample_ids_from_metadata_description(
                open(opts.mapping_fp,'U'),opts.valid_states)
        except ValueError, e:
            option_parser.error(e.message)
开发者ID:Jorge-C,项目名称:qiime,代码行数:19,代码来源:filter_distance_matrix.py

示例13: split_otu_table_on_sample_metadata

def split_otu_table_on_sample_metadata(otu_table, mapping_f, mapping_field):
    """ split otu table into sub otu tables where each represent samples corresponding to only a certain value in mapping_field
    """
    mapping_f = list(mapping_f)
    mapping_values = get_mapping_values(mapping_f, mapping_field)

    for v in mapping_values:
        v_fp_str = v.replace(' ', '_')
        sample_ids_to_keep = sample_ids_from_metadata_description(
            mapping_f, valid_states_str="%s:%s" % (mapping_field, v))

        try:
            # filtering cannot be inplace otherwise we lose data
            filtered_otu_table = otu_table.filter(
                lambda values, id_, metadata: id_ in sample_ids_to_keep,
                axis='observation', inplace=False)
        except TableException:
            # all samples are filtered out, so no otu table to write
            continue
        yield v_fp_str, filtered_otu_table
开发者ID:jrherr,项目名称:qiime,代码行数:20,代码来源:split.py

示例14: main


#.........这里部分代码省略.........

    # If sequence reinstatement is requested, make sure all necessary options
    # are specified

    reinstatement_options_counter = 0
    if reinstatement_stat_blank:
        reinstatement_options_counter += 1
    if reinstatement_stat_sample:
        reinstatement_options_counter += 1
    if reinstatement_differential:
        reinstatement_options_counter += 1

    if ((reinstatement_options_counter > 0) and 
        (reinstatement_options_counter < 3)):
        option_parser.error("Must provide all of "
                            "reinstatement_stats_blank, "
                            "reinstatement_stat_sample, and "
                            "reinstatement_differential, or none.")

    if ((reinstatement_options_counter == 3 and reinstatement_sample_number)
        and not reinstatement_method):
        option_parser.error("If providing sample number AND abundance criteria "
                            "for sequence reinstatement, must also provide "
                            "a method for combining results.")

    if reinstatement_options_counter == 3 or reinstatement_sample_number:
        reinstatement = True
    else:
        reinstatement = False

    # get blank sample IDs from mapping file or sample ID list

    if mapping_fp and valid_states:
        blank_sample_ids = sample_ids_from_metadata_description(
            open(mapping_fp, 'U'), valid_states)
        blanks = True
    elif blank_id_fp is not None:
        blank_id_f = open(blank_id_fp, 'Ur')
        blank_sample_ids = set([line.strip().split()[0]
                                for line in blank_id_f
                                if not line.startswith('#')])
        blank_id_f.close()
        blanks = True
    else:
        blanks = False


    # Initialize output objets  

    output_dict = {}
    contaminant_types = []

    contamination_stats_dict = None
    contamination_stats_header = None
    corr_data_dict = None

    # Do blank-based stats calculations, if not there check to make sure no 
    # blank-dependent methods are requested:

    if blanks:
        if prescreen_threshold:
            low_contam_libraries = prescreen_libraries(unique_seq_biom,
                                                       blank_sample_ids,
                                                       removal_stat_sample, 
                                                       removal_stat_blank, 
                                                       removal_differential, 
开发者ID:mortonjt,项目名称:decontaminate,代码行数:67,代码来源:decontaminate_unitary.py

示例15: format_vectors_to_js

def format_vectors_to_js(mapping_file_data, mapping_file_headers, coords_data,
                        coords_headers, connected_by_header,
                        sorted_by_header=None):
    """Write a string representing the vectors in a PCoA plot as javascript

    Inputs:
    mapping_file_data: contents of the mapping file
    mapping_file_headers: headers of the mapping file
    coords_data: coordinates of the PCoA plot in a numpy 2-D array or a list of
    numpy 2-D arrays for jackknifed input
    coords_headers: headers of the coords in the PCoA plot or a list of lists
    with the headers for jackknifed input
    connected_by_header: header of the mapping file that represents how the
    lines will be connected
    sorted_by_header: numeric-only header name to sort the samples in the
    vectors

    Output:
    js_vectors_string: string that represents the vectors in the shape of a
    javascript object

    Notes:
    If using jackknifed input, the coordinates and headers that will be used are
    the ones belonging to the master coords i. e. the first element.
    """

    js_vectors_string = []
    js_vectors_string.append('\nvar g_vectorPositions = new Array();\n')

    if connected_by_header != None:
        # check if we are processing jackknifed input, if so just get the master
        if type(coords_data) == list:
            coords_data = coords_data[0]
            coords_headers = coords_headers[0]

        columns_to_keep = ['SampleID', connected_by_header]

        # do not ad None if sorted_by_header is None or empty
        if sorted_by_header:
            columns_to_keep.append(sorted_by_header)

        # reduce the amount of data by keeping the required fields only
        mapping_file_data, mapping_file_headers =\
            keep_columns_from_mapping_file(mapping_file_data,
            mapping_file_headers, columns_to_keep)

        # format the mapping file to use this with the filtering function
        mf_string = format_mapping_file(mapping_file_headers, mapping_file_data)

        index = mapping_file_headers.index(connected_by_header)
        connected_by = list(set([line[index] for line in mapping_file_data]))

        for category in connected_by:
            # convert to StringIO to for each iteration; else the object
            # won't be usable after the first iteration & you'll get an error
            sample_ids = sample_ids_from_metadata_description(
                StringIO(mf_string),'%s:%s' % (connected_by_header,category))

            # if there is a sorting header, sort the coords using these values
            if sorted_by_header:
                sorting_index = mapping_file_headers.index(sorted_by_header)
                to_sort = [line for line in mapping_file_data if line[0] in\
                    sample_ids]

                # get the sorted sample ids from the sorted-reduced mapping file
                sample_ids = zip(*sorted(to_sort,
                    key=lambda x: float(x[sorting_index])))[0]

            # each category value is a new vector
            js_vectors_string.append("g_vectorPositions['%s'] = new Array();\n"
                % (category))

            for s in sample_ids:
                index = coords_headers.index(s)

                # print the first three elements of each coord for each sample
                js_vectors_string.append("g_vectorPositions['%s']['%s'] = %s;\n"
                    % (category, s, coords_data[index, :3].tolist()))

    return ''.join(js_vectors_string)
开发者ID:jessicalmetcalf,项目名称:emperor,代码行数:80,代码来源:format.py


注:本文中的qiime.filter.sample_ids_from_metadata_description函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。