当前位置: 首页>>代码示例>>Python>>正文


Python BiomIO.from_json方法代码示例

本文整理汇总了Python中frogsBiom.BiomIO.from_json方法的典型用法代码示例。如果您正苦于以下问题:Python BiomIO.from_json方法的具体用法?Python BiomIO.from_json怎么用?Python BiomIO.from_json使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在frogsBiom.BiomIO的用法示例。


在下文中一共展示了BiomIO.from_json方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_retrieved_by_sample

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def get_retrieved_by_sample( biom_file, reference_by_obs_id, references_by_sample, uniq_id, uniq_id_by_sample ):
    counts_by_sample = dict()
    biom = BiomIO.from_json( biom_file )
    for sample_name in biom.get_samples_names():
        nb_detected = 0
        retrieved = dict()
        expected_retrieved = dict()
        for obs in biom.get_observations_by_sample( sample_name ):
            nb_detected += 1
            if not "," in reference_by_obs_id[obs['id']]: # Is not a chimera
                ref_id = reference_by_obs_id[obs['id']]
                retrieved[ref_id] = 1
                if ref_id in references_by_sample[sample_name]:
                    expected_retrieved[ref_id] = 1
        # Uniq sequence for retrieved
        uniq_retrieved = set()
        for ref_id in retrieved:
            uniq_retrieved.add( uniq_id[ref_id] )
        # Uniq sequence for retrieved
        uniq_expected_retrieved = set()
        for ref_id in expected_retrieved:
            uniq_expected_retrieved.add( uniq_id_by_sample[sample_name][ref_id] )
        # Results
        counts_by_sample[sample_name] = {
            "detected": nb_detected,
            "retrieved": len(uniq_retrieved),
            "expected_retrieved": len(uniq_expected_retrieved)
        }
    return counts_by_sample
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:31,代码来源:clustersAssessment.py

示例2: biom_fasta_to_tsv

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def biom_fasta_to_tsv( input_biom, input_fasta, output_tsv, fields, list_separator ):
    """
    @summary: Convert BIOM file to TSV file with sequence.
    @param input_biom: [str] Path to the BIOM file.
    @param input_fasta: [str] Path to the sequences of the observations.
    @param output_tsv: [str] Path to the output file (format : TSV).
    @param fields: [list] Columns and their order in output. Special columns : '@observation_name', '@observation_sum', '@sample_count', '@rdp_tax_and_bootstrap', '@seed_sequence'. The others columns must be metadata title.
    @param list_separator: [str] Separator for complex metadata.
    """
    biom = BiomIO.from_json( input_biom )
    out_fh = open( output_tsv, "w" )
    sequence_idx = fields.index("@seed_sequence")
    # Header
    header_parts = header_line_parts( fields, biom )
    out_fh.write( "#" + "\t".join(header_parts) + "\n" )
    # Data
    fields_without_seq = fields
    del fields_without_seq[sequence_idx]
    FH_in = FastaIO( input_fasta )
    for record in FH_in:
        obs_idx = biom.find_idx("observation", record.id)
        count_by_sample = biom.data.get_row_array(obs_idx)
        observation_parts = observation_line_parts( biom.rows[obs_idx], count_by_sample, fields_without_seq, list_separator )
        observation_parts.insert( sequence_idx, record.string )
        out_fh.write( "\t".join(observation_parts) + "\n" )
    out_fh.close()
开发者ID:sandrineperrin,项目名称:frogs-docker,代码行数:28,代码来源:biom2tsv.py

示例3: observations_depth

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def observations_depth( input_biom, output_depth ):
    """
    @summary : Write the depths of the observation in file.
    @param input_biom : [str] path to the biom file processed.
    @param output_depth : [str] path to the output file.
    @note : Example of one output file
                #Depth<TAB>Nb_Observ_concerned<TAB>Prct_Observ_concerned
                1<TAB>65<TAB>65.000
                2<TAB>30<TAB>30.000
                3<TAB>0<TAB>0.000
                4<TAB>5<TAB>5.000
    """
    obs_depth = list()
    nb_observ = 0
    # Process depth calculation
    biom = BiomIO.from_json( input_biom )
    for observation_id, observation_count in biom.get_observations_counts():
        while len(obs_depth) <= observation_count:
            obs_depth.append(0)
        obs_depth[observation_count] += 1
        if observation_count != 0:
            nb_observ += 1
    del biom
    # Write output
    out_fh = open( output_depth, 'w' )
    out_fh.write( "#Depth\tNb_Observ_concerned\tPrct_Observ_concerned\n" )
    for depth in range(1, len(obs_depth)):
        prct = (float(obs_depth[depth])/ nb_observ)*100
        out_fh.write( str(depth) + "\t" + str(obs_depth[depth]) + "\t" + ("%.3f" % prct) + "\n" )
    out_fh.close()
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:32,代码来源:biomTools.py

示例4: sampling_by_sample

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def sampling_by_sample( input_biom, output_biom, nb_sampled=None, sampled_ratio=None ):
    """
    @summary: Writes a BIOM after a random sampling in each sample.
    @param input_biom: [str] Path to the processed BIOM.
    @param output_biom: [str] Path to outputed BIOM.
    @param nb_sampled: [int] Number of sampled sequences by sample.
    @param sampled_ratio: [float] Ratio of sampled sequences by sample.
    @note: nb_sampled and sampled_ratio are mutually exclusive.
    """
    initial_biom = BiomIO.from_json( input_biom )
    new_biom = Biom(
                    matrix_type="sparse",
                    generated_by="Sampling " + (str(nb_sampled) if nb_sampled is not None else str(sampled_ratio) + "%" ) + " elements by sample from " + input_biom
    )
    observations_already_added = dict()
    for sample_name in initial_biom.get_samples_names():
        new_biom.add_sample( sample_name, initial_biom.get_sample_metadata(sample_name) )
        sample_seq = initial_biom.get_sample_count(sample_name)
        sample_nb_sampled = nb_sampled
        if nb_sampled is None:
            sample_nb_sampled = int(sample_seq * sampled_ratio)
        if sample_seq < nb_sampled:
            raise Exception( str(sample_nb_sampled) + " sequences cannot be sampled in sample '" + str(sample_name) + "'. It only contains " + str(sample_seq) + " sequences." )
        else:
            for current_nb_iter in range(sample_nb_sampled):
                # Take an observation in initial BIOM
                selected_observation = initial_biom.random_obs_by_sample(sample_name)
                selected_observation_id = selected_observation['id']
                initial_biom.subtract_count( selected_observation_id, sample_name, 1 )
                # Put in new BIOM
                if not observations_already_added.has_key(selected_observation_id):
                    new_biom.add_observation( selected_observation_id, initial_biom.get_observation_metadata(selected_observation_id) )
                    observations_already_added[selected_observation_id] = True
                new_biom.add_count( selected_observation_id, sample_name, 1 )
    BiomIO.write( output_biom, new_biom )
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:37,代码来源:biomTools.py

示例5: excluded_obs_on_blastMetrics

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def excluded_obs_on_blastMetrics( input_biom, tag, cmp_operator, threshold, excluded_file ):
    """
    @summary: Writes the list of the observations with no affiliations with sufficient blast value.
    @param input_biom: [str] The path to the BIOM file to check.
    @param tag: [str] The metadata checked.
    @param cmp_operator: [str] The operator use in comparison (tag_value ">=" thresold or tag_value "<=" thresold ).
    @param threshold: [float] The limit for the tag value.
    @param excluded_file: [str] The path to the output file.
    """
    valid_operators = {
        ">=": operator.__ge__,
        "<=": operator.__le__
    }
    cmp_func = valid_operators[cmp_operator]
    biom = BiomIO.from_json( input_biom )
    FH_excluded_file = open( excluded_file, "w" )
    for observation in biom.get_observations():
        alignments = observation["metadata"]["blast_affiliations"]
        is_discarded = True
        for current_alignment in alignments:
            if cmp_func(float(current_alignment[tag]), threshold):
                is_discarded = False
        if is_discarded:
            FH_excluded_file.write( str(observation["id"]) + "\n" )
    FH_excluded_file.close()
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:27,代码来源:filters.py

示例6: process

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def process( in_biom, out_biom, out_metadata ):
    ordered_blast_keys = ["taxonomy", "subject", "evalue", "perc_identity", "perc_query_coverage", "aln_length"] # Keys in blast_affiliations metadata
    taxonomy_depth = 0
    unclassified_observations = list()

    FH_metadata = open( out_metadata, "w" )
    FH_metadata.write( "#OTUID\t" + "\t".join([item for item in ordered_blast_keys]) + "\n" )
    biom = BiomIO.from_json( in_biom )
    for observation in biom.get_observations():
        for metadata_key in observation["metadata"].keys():
            if metadata_key == "blast_affiliations": # Extract blast_affiliations metadata in metadata_file
                if observation["metadata"][metadata_key] is not None:
                    for current_affi in observation["metadata"][metadata_key]:
                        if isinstance(current_affi["taxonomy"], list) or isinstance(current_affi["taxonomy"], tuple):
                            current_affi["taxonomy"] = ";".join( current_affi["taxonomy"] )
                        FH_metadata.write( observation["id"] + "\t" + "\t".join([str(current_affi[item]) for item in ordered_blast_keys]) + "\n" )
                del observation["metadata"][metadata_key]
            elif observation["metadata"][metadata_key] is not None: # All list are transformed in string
                if isinstance(observation["metadata"][metadata_key], list) or isinstance(observation["metadata"][metadata_key], tuple):
                    observation["metadata"][metadata_key] = ";".join( map(str, observation["metadata"][metadata_key]) )
        if observation["metadata"].has_key( "blast_taxonomy" ):
            if observation["metadata"]["blast_taxonomy"] is None:
                unclassified_observations.append( observation["id"] )
                observation["metadata"]["taxonomy"] = list()
            else:
                taxonomy_depth = len(observation["metadata"]["blast_taxonomy"].split(";"))
                observation["metadata"]["taxonomy"] = observation["metadata"]["blast_taxonomy"].split(";")
    # Add "Unclassified" ranks in unclassified observations
    if taxonomy_depth > 0:
        for observation_id in unclassified_observations:
            observation_metadata = biom.get_observation_metadata(observation_id)
            observation_metadata["taxonomy"] = ["Unclassified"] * taxonomy_depth
    BiomIO.write( out_biom, biom )
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:35,代码来源:biom_to_stdBiom.py

示例7: get_checked

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def get_checked( abund_file, checked_sample, taxonomy_key, expected_by_depth ):
    checked_by_depth = dict()
    biom = BiomIO.from_json(abund_file)
    for current_obs in biom.get_observations():
        clean_taxonomy = getCleanedTaxonomy(current_obs["metadata"][taxonomy_key])
        count = biom.get_count(current_obs["id"], checked_sample)
        if count > 0:
            if clean_taxonomy[len(clean_taxonomy)-1] == "Multi-affiliation":
                nb_selected = 0
                selected = list()
                taxonomies = list()
                expected_taxonomies = expected_by_depth[len(clean_taxonomy)-1]
                for affi_idx in range(len(current_obs["metadata"]["blast_affiliations"])):
                    affi_taxonomy = ";".join(getCleanedTaxonomy(current_obs["metadata"]["blast_affiliations"][affi_idx]["taxonomy"]))
                    if affi_taxonomy not in taxonomies:
                        taxonomies.append(affi_taxonomy)
                        if affi_taxonomy in expected_taxonomies:
                            selected = getCleanedTaxonomy(current_obs["metadata"]["blast_affiliations"][affi_idx]["taxonomy"])
                            nb_selected += 1
                if nb_selected == 1:
                    clean_taxonomy = selected
                else:
                    warnings.warn( "Multi-affiliation cannot be resolved for " + str((float(count)*100)/biom.get_total_count()) + "% sequences. Possible taxonomies: '" + "', '".join(taxonomies) + "'." )
            for rank_depth in range(len(clean_taxonomy)):
                rank_taxonomy = ";".join(clean_taxonomy[:rank_depth + 1])
                if rank_depth not in checked_by_depth:
                    checked_by_depth[rank_depth] = dict()
                if rank_taxonomy not in checked_by_depth[rank_depth]:
                    checked_by_depth[rank_depth][rank_taxonomy] = 0
                checked_by_depth[rank_depth][rank_taxonomy] += count
    return checked_by_depth
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:33,代码来源:assessRealMock.py

示例8: __init__

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
    def __init__( self, out_tsv, in_biom, in_fasta=None ):
        """
        @param in_biom: [str] Path to BIOM file.
        @param out_tsv: [str] Path to output TSV file.
        """
        # Sequence file option
        sequence_file_opt = "" if in_fasta is None else " --input-fasta " + in_fasta

        # Check the metadata
        biom = BiomIO.from_json( in_biom )
        conversion_tags = ""
        if biom.has_observation_metadata( 'rdp_taxonomy' ) and biom.has_observation_metadata( 'rdp_bootstrap' ):
            conversion_tags += "'@rdp_tax_and_bootstrap' "
        if biom.has_observation_metadata( 'blast_taxonomy' ):
            conversion_tags += "'blast_taxonomy' "
        if biom.has_observation_metadata( 'blast_affiliations' ):
            conversion_tags += "'@blast_subject' "
            conversion_tags += "'@blast_perc_identity' "
            conversion_tags += "'@blast_perc_query_coverage' "
            conversion_tags += "'@blast_evalue' "
            conversion_tags += "'@blast_aln_length' "
        if biom.has_observation_metadata( 'seed_id' ):
            conversion_tags += "'seed_id' "
        if in_fasta is not None:
            conversion_tags += "'@seed_sequence' "
        conversion_tags += "'@observation_name' '@observation_sum' '@sample_count'"

        # Set command
        Cmd.__init__( self,
                      'biom2tsv.py',
                      'Converts a BIOM file in TSV file.',
                      "--input-file " + in_biom + sequence_file_opt + " --output-file " + out_tsv + " --fields " + conversion_tags,
                      '--version' )
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:35,代码来源:biom_to_tsv.py

示例9: write_log

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def write_log(in_biom, out_biom, log):
    FH_log=open(log,"w")
    FH_log.write("#sample\tnb_otu_before\tnb_otu_after\n")
    initial_biom = BiomIO.from_json( in_biom )
    new_biom = BiomIO.from_json( out_biom )

    for sample_name in initial_biom.get_samples_names():
        nb_otu_before = len(initial_biom.get_sample_obs(sample_name))
        nb_otu_after = len(new_biom.get_sample_obs(sample_name))
        FH_log.write("Sample name: "+sample_name+"\n\tnb initials OTU: "+str(nb_otu_before)+"\n\tnb normalized OTU: "+str(nb_otu_after)+"\n")

    nb_initial_otu=len(initial_biom.rows)
    nb_new_otu=len(new_biom.rows)
    FH_log.write("Sample name: all samples\n\tnb initials OTU: "+str(nb_initial_otu)+"\n\tnb normalized OTU: "+str(nb_new_otu)+"\n")

    FH_log.close()
开发者ID:sandrineperrin,项目名称:frogs-docker,代码行数:18,代码来源:normalisation.py

示例10: getRealTaxByRefID

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def getRealTaxByRefID( input_biom, taxonomy_key, duplication_groups ):
    """
    @summary: Return taxonomy by reference.
    @param input_biom: [str] Path to BIOM file.
    @param taxonomy_key: [str] The metadata key for taxonomy.
    @param duplication_groups: [dict] By reference ID the list of references with the same sequence.
    @return: [dict] List of taxonomies by reference ID.
             Example: 
               {
                 "MVF01000012.1.1317": [
                   ["Root", "Bacteria", "Proteobacteria", "Gammaproteobacteria", "Enterobacteriales", "Enterobacteriaceae", "Cronobacter", "Escherichia coli BIDMC 73"]
                 ],
                 "JQ607252.1.1437": [
                   ["Root", "Bacteria", "Firmicutes", "Bacilli", "Bacillales", "Staphylococcaceae", "Staphylococcus", "bacterium NLAE-zl-P471"],
                   ["Root", "Bacteria", "Firmicutes", "Bacilli", "Bacillales", "Staphylococcaceae", "Staphylococcus", "Staphylococcus aureus M17299"]
                 ] 
               }
    """
    taxonomy_by_obs_id = dict()
    tmp_taxonomy_by_obs_id = dict()
    biom = BiomIO.from_json( input_biom )
    for observation in biom.get_observations():
        taxonomy_clean = getCleanedTaxonomy(observation["metadata"][taxonomy_key])
        taxonomy_by_obs_id[observation["id"]] = [taxonomy_clean]
        tmp_taxonomy_by_obs_id[observation["id"]] = taxonomy_clean
    if duplication_groups is not None:
        for obs_id in duplication_groups:
            taxonomy_by_obs_id[obs_id] = list()
            for id_duplicated_seq in duplication_groups[obs_id]: # For each duplication group member
                taxonomy_by_obs_id[obs_id].append(tmp_taxonomy_by_obs_id[id_duplicated_seq])
    return taxonomy_by_obs_id
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:33,代码来源:biomCmpTax.py

示例11: get_step_size

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
 def get_step_size(self, nb_step=35):
     """
     @summary: Returns the step size to obtain 'nb_step' steps or more in 3/4 of samples.
     @param nb_step: [int] The number of expected steps.
     @returns: [int] The step size.
     """
     counts = list()
     # Get the number of sequences by sample
     biom = BiomIO.from_json( self.in_biom )
     for sample_name in biom.get_samples_names():
         counts.append( biom.get_sample_count(sample_name) )
     del biom
     counts = sorted(counts)
     nb_samples = len(counts)
     # Finds the lower quartile number of sequences
     lower_quartile_idx = nb_samples/4
     nb_seq = counts[lower_quartile_idx]
     # If lower quartile sample is empty
     if nb_seq == 0:
         idx = 1
         while (lower_quartile_idx + idx) < nb_samples and counts[lower_quartile_idx + idx] == 0:
             idx += 1
         if (lower_quartile_idx + idx) < nb_samples:
             nb_seq = counts[lower_quartile_idx + idx]
     step_size = int(nb_seq/nb_step)
     return max(1, step_size)
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:28,代码来源:affiliations_stat.py

示例12: samples_hclassification

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def samples_hclassification( input_biom, output_newick, distance_method, linkage_method, min_count=1 ):
    """
    @summary : Process and write an hierarchical classification from Biom.
    @param input_biom : [str] Path to the BIOM file to process.
    @param output_newick : [str] Path to the newick output file.
    @param distance_method : [str] Used distance method for classify.
    @param linkage_method : [str] Used linkage method for classify.
    @param min_count : [int] Samples with a count lower than this value are not processed.
    """
    from scipy.spatial.distance import pdist, squareform
    from scipy.cluster.hierarchy import linkage, dendrogram
    import scipy.cluster.hierarchy
    data_array = list()
    processed_samples = list()
    excluded_samples = list()
    nb_samples = None

    # Normalisation on count by sample
    biom = BiomIO.from_json( input_biom )
    for col_idx, current_sample in enumerate(biom.columns):
        sum_on_sample = biom.data.get_col_sum( col_idx )
        if sum_on_sample < min_count:
			excluded_samples.append( current_sample['id'] )
        else:
            processed_samples.append( current_sample['id'] )
            OTUs_norm = list()
            for row_idx in range(len(biom.rows)):
                OTUs_norm.append( biom.data.nb_at(row_idx, col_idx)/float(sum_on_sample) )
            data_array.append( OTUs_norm )
    nb_samples = len(biom.columns)
    del biom

    # Process distance
    if len(processed_samples) < 1:
        raise Exception("All samples have a count lower than threshold (" + str(min_count) + ").")
    elif len(processed_samples) == 1:
        # Write newick
        out_fh = open( output_newick, "w" )
        out_fh.write( "(" + processed_samples[0] + ");\n" )
        out_fh.close()
    else:
        # Computing the distance and linkage
        data_dist = pdist( data_array, distance_method )
        data_link = linkage( data_dist, linkage_method )
        # Write newick
        scipy_hc_tree = scipy.cluster.hierarchy.to_tree( data_link , rd=False )
        id_2_name = dict( zip(range(len(processed_samples)), processed_samples) )
        out_fh = open( output_newick, "w" )
        out_fh.write( to_newick(scipy_hc_tree, id_2_name) + "\n" )
        out_fh.close()

    # Display log
    print "# Hierarchical clustering log:\n" + \
          "\tNumber of samples in BIOM: " + str(nb_samples) + "\n" + \
          "\tNumber of processed samples: " + str(len(processed_samples))
    if nb_samples > len(processed_samples):
        print "\n\tExcluded samples (count < " + str(min_count) + "): " + ", ".join(sorted(excluded_samples))
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:59,代码来源:biomTools.py

示例13: remove_observations

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def remove_observations( removed_observations, input_biom, output_biom ):
    """
    @summary: Removes the specified list of observations.
    @param removed_observations: [list] The names of the observations to remove.
    @param input_biom: [str] The path to the input BIOM.
    @param output_biom: [str] The path to the output BIOM.
    """
    biom = BiomIO.from_json( input_biom )
    biom.remove_observations( removed_observations )
    BiomIO.write( output_biom, biom )
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:12,代码来源:filters.py

示例14: filter_biom

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def filter_biom( removed_observations, in_biom, out_biom ):
    """
    @summary: Removed the specified observations from BIOM.
    @param removed_observations: [dict] Each key is an observation name.
    @param in_biom: [str]: Path to the processed BIOM file.
    @param out_biom: [str]: Path to the cleaned BIOM file.
    """
    biom = BiomIO.from_json(in_biom)
    biom.remove_observations(removed_observations)
    BiomIO.write(out_biom, biom)
开发者ID:sandrineperrin,项目名称:frogs-docker,代码行数:12,代码来源:removeConta.py

示例15: rarefaction

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def rarefaction( input_biom, interval=10000, ranks=None, taxonomy_key="taxonomy" ):
    """
    @summary: Returns the rarefaction by ranks by samples.
    @param input_biom: [str] Path to the biom file processed.
    @param interval: [int] Size of first sampling.
    @param ranks: [list] The rank(s) level for the diversity.
                   Example :
                     Sampled set :
                       Bacteria; Proteobacteria; Alphaproteobacteria; Sphingomonadales; Sphingomonadaceae; Sphingomonas
                       Bacteria; Proteobacteria; Gammaproteobacteria; Vibrionales; Vibrionaceae; Vibrio; Vibrio halioticoli
                       Bacteria; Proteobacteria; Gammaproteobacteria; Legionellales; Coxiellaceae; Coxiella; Ornithodoros moubata symbiont A
                       Bacteria; Proteobacteria; Betaproteobacteria; Burkholderiales; Burkholderiaceae; Limnobacter; Limnobacter thiooxidans
                     Result for this set
                       With rank 1 or 2 : 1 group
                       With rank 3 : 3 different groups
                       With rank 4 or 5 or 6 : 4 different groups
    @param taxonomy_key : [str] The metadata title for the taxonomy in the input.
    @return: [dict] By ranks by samples the list of differents taxa for each steps.
              Example :
                 {
                   1: {
                        "sampleA" : [10, 20, 22, 23, 24, 25, 25, 25 ],
                        "sampleB" : [15, 25, 28, 30, 32, 34, 35, 36, 37, 37, 37, 37]
                      }
                 }
    @warning: The taxa with name starting with unknown used as complete new name 'unknown'.
    """
    sample_rarefaction = dict()
    biom = BiomIO.from_json( input_biom )
    for current_rank in ranks:
        sample_rarefaction[current_rank] = dict()
    for sample in biom.get_samples_names():
        taxa = dict()
        for current_rank in ranks:
            sample_rarefaction[current_rank][sample] = list()
            taxa[current_rank] = dict()
        sample_count = biom.get_sample_count( sample )
        expected_nb_iter = sample_count/interval
        for current_nb_iter in range(expected_nb_iter):
            selected_observations = biom.random_obs_extract_by_sample(sample, interval)
            for current_selected in selected_observations:
                taxonomy = list()
                if current_selected['observation']["metadata"].has_key(taxonomy_key) and current_selected['observation']["metadata"][taxonomy_key] is not None:
                    taxonomy = biom.get_observation_taxonomy( current_selected['observation']["id"], taxonomy_key )
                for idx, taxon in enumerate(taxonomy):
                    if taxon.lower().startswith("unknown"):
                        taxonomy[idx] = "unknown"
                while len(taxonomy) < max(ranks):
                    taxonomy.append("unknown")
                for current_rank in ranks:
                    taxonomy_str = (';'.join(taxonomy[0:current_rank+1])).lower()
                    taxa[current_rank][taxonomy_str] = True
            for current_rank in ranks:
                sample_rarefaction[current_rank][sample].append( str(len(taxa[current_rank])) )
    return sample_rarefaction
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:57,代码来源:biomTools.py


注:本文中的frogsBiom.BiomIO.from_json方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。