当前位置: 首页>>代码示例>>Python>>正文


Python format.format_distance_matrix函数代码示例

本文整理汇总了Python中qiime.format.format_distance_matrix函数的典型用法代码示例。如果您正苦于以下问题:Python format_distance_matrix函数的具体用法?Python format_distance_matrix怎么用?Python format_distance_matrix使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了format_distance_matrix函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Open the input distance matrices, parse them, find the intersection, and
    # write the two new distance matrices to the output filepaths.
    input_dm_fps = opts.input_dms.split(',')
    output_dm_fps = opts.output_dms.split(',')
    if len(input_dm_fps) != 2 or len(output_dm_fps) != 2:
        option_parser.error("You must provide exactly two input and output "
            "distance matrix filepaths.")

    labels1, dm1_data = parse_distmat(open(input_dm_fps[0], 'U'))
    labels2, dm2_data = parse_distmat(open(input_dm_fps[1], 'U'))

    (dm1_labels, dm1), (dm2_labels, dm2) = make_compatible_distance_matrices(
        parse_distmat(open(input_dm_fps[0],'U')),
        parse_distmat(open(input_dm_fps[1],'U')))
    assert (dm1_labels == dm2_labels), "The order of sample IDs is not the " +\
        "same for the two matrices."

    output1_f = open(output_dm_fps[0], 'w')
    output2_f = open(output_dm_fps[1], 'w')
    output1_f.write(format_distance_matrix(dm1_labels, dm1))
    output2_f.write(format_distance_matrix(dm2_labels, dm2))
    output1_f.close()
    output2_f.close()
开发者ID:gregcaporaso,项目名称:microbiogeo,代码行数:26,代码来源:make_compatible_distance_matrices.py

示例2: calc_shared_phylotypes

def calc_shared_phylotypes(infile, reference_sample=None):
    """Calculates number of shared phylotypes for each pair of sample.

    infile: otu table filehandle

    reference_sample: if set, will use this sample name to calculate shared OTUs
                      between reference sample, and pair of samples. Useful, 
                      e.g. when the reference sample is the Donor in a transplant study
    """

    sample_ids, otu_ids, otu_table, lineages = parse_otu_table(infile)
 
    if reference_sample:
        ref_idx = sample_ids.index(reference_sample)
    (n,m) = otu_table.shape
    result_array = zeros((m,m), dtype=int)
    for i in range(m):
        for j in range (i+1):
            if reference_sample:
                result_array[i,j] = result_array[j,i] = \
                    _calc_shared_phylotypes_multiple(otu_table, [i, j, ref_idx])
            else:  
                result_array[i,j] = result_array[j,i] = \
                    _calc_shared_phylotypes_pairwise(otu_table, i, j)
                
    return format_distance_matrix(sample_ids, result_array)+"\n"
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:26,代码来源:shared_phylotypes.py

示例3: distance_matrix

def distance_matrix(input_path, column):
    """ calculates distance matrix on a single column of a mapping file
    
    inputs:
     input_path (file handler)
     column (str)
    """
    data, comments = parse_mapping_file_to_dict(input_path)
    column_data = []
    column_headers = []
    for i in data:
        if column not in data[i]:
            stderr.write("\n\nNo column: '%s' in the mapping file. Existing columns are: %s\n\n" % (column,data[i].keys()))
            exit(1)
        try:
            column_data.append(float(data[i][column]))
        except ValueError:
            stderr.write("\n\nall the values in the column '%s' must be numeric but '%s' has '%s'\n\n"\
                % (column,i,data[i][column]))
            exit(1)
            
        column_headers.append(i)
    
    data_row = array(column_data)
    data_col = reshape(data_row, (1, len(data_row)))
    dist_mtx = abs(data_row-data_col.T)
    
    return format_distance_matrix(column_headers, dist_mtx)
开发者ID:DDomogala3,项目名称:qiime,代码行数:28,代码来源:distance_matrix_from_mapping.py

示例4: assemble_distance_matrix

def assemble_distance_matrix(dm_components):
    """ assemble distance matrix components into a complete dm string

    """
    print "I get called."
    data = {}
    # iterate over compenents
    for c in dm_components:
        # create a blank list to store the column ids
        col_ids = []
        # iterate over lines
        for line in c:
            # split on tabs remove leading and trailing whitespace
            fields = line.strip().split()
            if fields:
                # if no column ids seen yet, these are them
                if not col_ids:
                    col_ids = fields
                # otherwise this is a data row so add it to data
                else:
                    sid = fields[0]
                    data[sid] = dict(zip(col_ids,fields[1:]))

    # grab the col/row ids as a list so it's ordered
    labels = data.keys()
    # create an empty list to build the dm
    dm = []
    # construct the dm one row at a time
    for l1 in labels:
        dm.append([data[l1][l2] for l2 in labels])
    # create the dm string and return it
    dm = format_distance_matrix(labels,dm)
    return dm
开发者ID:rob-knight,项目名称:qiime,代码行数:33,代码来源:beta_diversity.py

示例5: test_format_distance_matrix

 def test_format_distance_matrix(self):
     """format_distance_matrix should return tab-delimited dist mat"""
     a = array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
     labels = [11, 22, 33]
     res = format_distance_matrix(labels, a)
     self.assertEqual(res, "\t11\t22\t33\n11\t1\t2\t3\n22\t4\t5\t6\n33\t7\t8\t9")
     self.assertRaises(ValueError, format_distance_matrix, labels[:2], a)
开发者ID:Gaby1212,项目名称:qiime,代码行数:7,代码来源:test_format.py

示例6: calc_shared_phylotypes

def calc_shared_phylotypes(infile, reference_sample=None):
    """Calculates number of shared phylotypes for each pair of sample.

    infile: otu table filehandle

    reference_sample: if set, will use this sample name to calculate shared OTUs
                      between reference sample, and pair of samples. Useful, 
                      e.g. when the reference sample is the Donor in a transplant study
    """

    otu_table = parse_biom_table(infile)

    if reference_sample:
        #ref_idx = sample_ids.index(reference_sample)
        ref_idx = reference_sample
    
    num_samples = len(otu_table.SampleIds)
    result_array = zeros((num_samples, num_samples), dtype=int)
    for i,samp1_id in enumerate(otu_table.SampleIds):
        for j,samp2_id in enumerate(otu_table.SampleIds[:i+1]):
            if reference_sample:
                result_array[i,j] = result_array[j,i] = \
                    _calc_shared_phylotypes_multiple(otu_table, 
                                                 [samp1_id, samp2_id, ref_idx])
            else:  
                result_array[i,j] = result_array[j,i] = \
                    _calc_shared_phylotypes_pairwise(otu_table, samp1_id, 
                                                      samp2_id)
                
    return format_distance_matrix(otu_table.SampleIds, result_array)+"\n"
开发者ID:DDomogala3,项目名称:qiime,代码行数:30,代码来源:shared_phylotypes.py

示例7: main

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    data, comments = parse_mapping_file_to_dict(opts.input_path)
    column_headers = []
    if ',' not in opts.column:
        column_data = []
        column_name = opts.column
        for i in data:
            if column_name not in data[i]:
                raise ValueError(
                    "No column: '%s' in the mapping file. Existing columns are: %s" %
                    (column_name, data[i].keys()))

            try:
                column_data.append(float(data[i][opts.column]))
            except ValueError:
                raise ValueError(
                    "All the values in the column '%s' must be numeric but '%s' has '%s'" %
                    (column_name, i, data[i][column_name]))

            column_headers.append(i)
        dtx_mtx = compute_distance_matrix_from_metadata(column_data)
    else:
        latitudes = []
        longitudes = []
        try:
            latitude, longitude = opts.column.split(',')
        except ValueError:
            raise ValueError(
                "This script accepts a maximum of 2 colums separated by comma and you passed: %s" %
                (opts.column))

        for i in data:
            if latitude not in data[i] or longitude not in data[i]:
                raise ValueError(
                    "One of these columns or both do not exist: '%s' or '%s' in the mapping file. Existing columns are: %s" %
                    (latitude, longitude, data[i].keys()))

            try:
                latitudes.append(float(data[i][latitude]))
                longitudes.append(float(data[i][longitude]))
            except ValueError:
                raise ValueError(
                    "All the values in the columnd '%s' & '%s' must be numeric but '%s' has '%s'" %
                    (latitude, longitude, i, data[i][column_name]))

            column_headers.append(i)

        dtx_mtx = calculate_dist_vincenty(latitudes, longitudes)

    dtx_txt = format_distance_matrix(column_headers, dtx_mtx)

    outfilepath = os.path.join(opts.output_fp)
    f = open(outfilepath, 'w')
    f.write(dtx_txt)
    f.close()
开发者ID:TheSchwa,项目名称:qiime,代码行数:57,代码来源:distance_matrix_from_mapping.py

示例8: main

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Open the input distance matrix and parse it. Shuffle its labels and write
    # them and the original data to the output file.
    labels, dm_data = parse_distmat(open(opts.input_distance_matrix, 'U'))
    shuffle(labels)
    output_f = open(opts.output_distance_matrix, 'w')
    output_f.write(format_distance_matrix(labels, dm_data))
    output_f.close()
开发者ID:gregcaporaso,项目名称:microbiogeo,代码行数:10,代码来源:shuffle_distance_matrix.py

示例9: compute_procrustes

def compute_procrustes(result_tables, expected_pc_lookup, taxonomy_level=6, num_dimensions=3, random_trials=999):
    """ Compute Procrustes M2 and p-values for a set of results
    
        result_tables: 2d list of tables to be compared to expected tables, 
         where the data in the inner list is:
          [dataset_id, reference_database_id, method_id, 
           parameter_combination_id, table_fp]
        expected_pc_lookup: 2d dict of dataset_id, reference_db_id to principal
         coordinate matrices, for the expected result coordinate matrices
        taxonomy_level: level to compute results
    """
    ### Start code copied ALMOST* directly from compute_prfs - some re-factoring for re-use is
    ### in order here. *ALMOST refers to changes to parser and variable names since expected
    ### is a pc matrix here.

    for dataset_id, reference_id, method_id, params, actual_table_fp in result_tables:
        ## parse the expected table (unless taxonomy_level is specified, this should be
        ## collapsed on level 6 taxonomy)
        try:
            expected_pc_fp = expected_pc_lookup[dataset_id][reference_id]
        except KeyError:
            raise KeyError, "Can't find expected table for (%s, %s)." % (dataset_id, reference_id)

        ## parse the actual table and collapse it at the specified taxonomic level
        try:
            actual_table = parse_biom_table(open(actual_table_fp, "U"))
        except ValueError:
            raise ValueError, "Couldn't parse BIOM table: %s" % actual_table_fp
        collapse_by_taxonomy = get_taxonomy_collapser(taxonomy_level)
        actual_table = actual_table.collapseObservationsByMetadata(collapse_by_taxonomy)
        ### End code copied directly from compute_prfs.

        # Next block of code, how do I hate thee? Let me count the ways...
        # (1) dist_bray_curtis doesn't take a BIOM Table object
        # (2) pcoa takes a qiime-formatted distance matrix as a list of lines
        # (3) pcoa return a qiime-formatted pc matrix
        # (4) procrustes_monte_carlo needs to pass through the pc "file" multiple
        #     times, so we actually *need* those the pcs that get passed in to be
        #     lists of lines
        dm = dist_bray_curtis(asarray([v for v in actual_table.iterSampleData()]))
        formatted_dm = format_distance_matrix(actual_table.SampleIds, dm)
        actual_pc = pcoa(formatted_dm.split("\n")).split("\n")
        expected_pc = list(open(expected_pc_fp, "U"))

        ## run Procrustes analysis with monte carlo simulation
        actual_m_squared, trial_m_squareds, count_better, mc_p_value = procrustes_monte_carlo(
            expected_pc,
            actual_pc,
            trials=random_trials,
            max_dimensions=num_dimensions,
            sample_id_map=None,
            trial_output_dir=None,
        )

        yield (dataset_id, reference_id, method_id, params, actual_m_squared, mc_p_value)
开发者ID:jairideout,项目名称:short-read-tax-assignment,代码行数:55,代码来源:eval_framework.py

示例10: main

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    
    indir = opts.input_dir
    outdir = opts.output_dir
    if not os.path.exists(outdir):
      os.makedirs(outdir)

    #input    
    file_names = os.listdir(indir)
    file_names = [fname for fname in file_names if not fname.startswith('.')]

    distmats = []
    headers_list = []
    for fname in file_names:
      f = open(os.path.join(indir,fname), 'U')
      headers, data = parse_distmat(f)
      f.close()
      distmats.append(data)
      headers_list.append(headers)

    #calcs
    headers, means, medians, stdevs = matrix_stats(headers_list, distmats)

    #output
    f = open(os.path.join(outdir,'means.txt'), 'w')
    f.write(format_distance_matrix(headers,means))
    f.close()

    f = open(os.path.join(outdir,'medians.txt'), 'w')
    f.write(format_distance_matrix(headers,medians))
    f.close()

    f = open(os.path.join(outdir,'stdevs.txt'), 'w')
    f.write(format_distance_matrix(headers,stdevs))
    f.close()
开发者ID:Jorge-C,项目名称:qiime,代码行数:36,代码来源:dissimilarity_mtx_stats.py

示例11: test_single_file_nj

    def test_single_file_nj(self):
        """ single_file_nj should throw no errors"""

        titles = ["hi", "ho", "yo"]
        distdata = numpy.array([[0, 0.5, 0.3], [0.5, 0.0, 0.9], [0.3, 0.9, 0.0]])
        fname = get_tmp_filename(prefix="nj_", suffix=".txt")
        f = open(fname, "w")
        self._paths_to_clean_up.append(fname)
        f.write(format_distance_matrix(titles, distdata))
        f.close()

        fname2 = get_tmp_filename(prefix="nj_", suffix=".txt", result_constructor=str)
        self._paths_to_clean_up.append(fname2)
        single_file_nj(fname, fname2)
        assert os.path.exists(fname2)
开发者ID:qinjunjie,项目名称:qiime,代码行数:15,代码来源:test_hierarchical_cluster.py

示例12: test_single_file_nj

    def test_single_file_nj(self):
        """ single_file_nj should throw no errors"""

        titles = ['hi','ho','yo']
        distdata = numpy.array([[0,.5,.3],[.5,0.,.9],[.3,.9,0.]])
        fname = get_tmp_filename(prefix='nj_',suffix='.txt')
        f = open(fname,'w')
        self._paths_to_clean_up.append(fname)
        f.write(format_distance_matrix(titles, distdata))
        f.close()
        
        fname2 = get_tmp_filename(prefix='nj_',suffix='.txt',
            result_constructor=str)
        self._paths_to_clean_up.append(fname2)
        single_file_nj(fname,fname2)
        assert(os.path.exists(fname2))
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:16,代码来源:test_hierarchical_cluster.py

示例13: test_single_file_upgma

    def test_single_file_upgma(self):
        """ single_file_upgma should throw no errors"""

        titles = ['hi', 'ho']
        distdata = numpy.array([[0, .5], [.5, 0.]])
        fd, fname = mkstemp(prefix='upgma_', suffix='.txt')
        close(fd)
        f = open(fname, 'w')
        self._paths_to_clean_up.append(fname)
        f.write(format_distance_matrix(titles, distdata))
        f.close()

        fd, fname2 = mkstemp(prefix='upgma_', suffix='.txt')
        close(fd)
        self._paths_to_clean_up.append(fname2)
        single_file_upgma(fname, fname2)
        assert(os.path.exists(fname2))
开发者ID:Springbudder,项目名称:qiime,代码行数:17,代码来源:test_hierarchical_cluster.py

示例14: test_single_file_upgma

    def test_single_file_upgma(self):
        """ single_file_upgma should throw no errors"""

        titles = ["hi", "ho"]
        distdata = numpy.array([[0, 0.5], [0.5, 0.0]])
        fd, fname = mkstemp(prefix="upgma_", suffix=".txt")
        close(fd)
        f = open(fname, "w")
        self._paths_to_clean_up.append(fname)
        f.write(format_distance_matrix(titles, distdata))
        f.close()

        fd, fname2 = mkstemp(prefix="upgma_", suffix=".txt")
        close(fd)
        self._paths_to_clean_up.append(fname2)
        single_file_upgma(fname, fname2)
        assert os.path.exists(fname2)
开发者ID:colinbrislawn,项目名称:qiime,代码行数:17,代码来源:test_hierarchical_cluster.py

示例15: filter_samples_from_distance_matrix

def filter_samples_from_distance_matrix(dm, samples_to_discard, negate=False):
    """ Remove specified samples from distance matrix

        dm: (sample_ids, dm_data) tuple, as returned from
         qiime.parse.parse_distmat; or a file handle that can be passed
         to qiime.parse.parse_distmat

    """
    try:
        sample_ids, dm_data = dm
    except ValueError:
        # input was provide as a file handle
        sample_ids, dm_data = parse_distmat(dm)

    sample_lookup = {}.fromkeys([e.split()[0] for e in samples_to_discard])
    temp_dm_data = []
    new_dm_data = []
    new_sample_ids = []

    if negate:

        def keep_sample(s):
            return s in sample_lookup

    else:

        def keep_sample(s):
            return s not in sample_lookup

    for row, sample_id in zip(dm_data, sample_ids):
        if keep_sample(sample_id):
            temp_dm_data.append(row)
            new_sample_ids.append(sample_id)
    temp_dm_data = array(temp_dm_data).transpose()

    for col, sample_id in zip(temp_dm_data, sample_ids):
        if keep_sample(sample_id):
            new_dm_data.append(col)
    new_dm_data = array(new_dm_data).transpose()

    return format_distance_matrix(new_sample_ids, new_dm_data)
开发者ID:nbresnick,项目名称:qiime,代码行数:41,代码来源:filter.py


注:本文中的qiime.format.format_distance_matrix函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。