Python frogsBiom.BiomIO类代码示例

本文整理汇总了Python中frogsBiom.BiomIO类的典型用法代码示例。如果您正苦于以下问题：Python BiomIO类的具体用法？Python BiomIO怎么用？Python BiomIO使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了BiomIO类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: sampling_by_sample

def sampling_by_sample( input_biom, output_biom, nb_sampled=None, sampled_ratio=None ):
    """
    @summary: Writes a BIOM after a random sampling in each sample.
    @param input_biom: [str] Path to the processed BIOM.
    @param output_biom: [str] Path to outputed BIOM.
    @param nb_sampled: [int] Number of sampled sequences by sample.
    @param sampled_ratio: [float] Ratio of sampled sequences by sample.
    @note: nb_sampled and sampled_ratio are mutually exclusive.
    """
    initial_biom = BiomIO.from_json( input_biom )
    new_biom = Biom(
                    matrix_type="sparse",
                    generated_by="Sampling " + (str(nb_sampled) if nb_sampled is not None else str(sampled_ratio) + "%" ) + " elements by sample from " + input_biom
    )
    observations_already_added = dict()
    for sample_name in initial_biom.get_samples_names():
        new_biom.add_sample( sample_name, initial_biom.get_sample_metadata(sample_name) )
        sample_seq = initial_biom.get_sample_count(sample_name)
        sample_nb_sampled = nb_sampled
        if nb_sampled is None:
            sample_nb_sampled = int(sample_seq * sampled_ratio)
        if sample_seq < nb_sampled:
            raise Exception( str(sample_nb_sampled) + " sequences cannot be sampled in sample '" + str(sample_name) + "'. It only contains " + str(sample_seq) + " sequences." )
        else:
            for current_nb_iter in range(sample_nb_sampled):
                # Take an observation in initial BIOM
                selected_observation = initial_biom.random_obs_by_sample(sample_name)
                selected_observation_id = selected_observation['id']
                initial_biom.subtract_count( selected_observation_id, sample_name, 1 )
                # Put in new BIOM
                if not observations_already_added.has_key(selected_observation_id):
                    new_biom.add_observation( selected_observation_id, initial_biom.get_observation_metadata(selected_observation_id) )
                    observations_already_added[selected_observation_id] = True
                new_biom.add_count( selected_observation_id, sample_name, 1 )
    BiomIO.write( output_biom, new_biom )

开发者ID:geraldinepascal，项目名称:FROGS，代码行数:35，代码来源:biomTools.py

示例2: process

def process( in_biom, out_biom, out_metadata ):
    ordered_blast_keys = ["taxonomy", "subject", "evalue", "perc_identity", "perc_query_coverage", "aln_length"] # Keys in blast_affiliations metadata
    taxonomy_depth = 0
    unclassified_observations = list()

    FH_metadata = open( out_metadata, "w" )
    FH_metadata.write( "#OTUID\t" + "\t".join([item for item in ordered_blast_keys]) + "\n" )
    biom = BiomIO.from_json( in_biom )
    for observation in biom.get_observations():
        for metadata_key in observation["metadata"].keys():
            if metadata_key == "blast_affiliations": # Extract blast_affiliations metadata in metadata_file
                if observation["metadata"][metadata_key] is not None:
                    for current_affi in observation["metadata"][metadata_key]:
                        if isinstance(current_affi["taxonomy"], list) or isinstance(current_affi["taxonomy"], tuple):
                            current_affi["taxonomy"] = ";".join( current_affi["taxonomy"] )
                        FH_metadata.write( observation["id"] + "\t" + "\t".join([str(current_affi[item]) for item in ordered_blast_keys]) + "\n" )
                del observation["metadata"][metadata_key]
            elif observation["metadata"][metadata_key] is not None: # All list are transformed in string
                if isinstance(observation["metadata"][metadata_key], list) or isinstance(observation["metadata"][metadata_key], tuple):
                    observation["metadata"][metadata_key] = ";".join( map(str, observation["metadata"][metadata_key]) )
        if observation["metadata"].has_key( "blast_taxonomy" ):
            if observation["metadata"]["blast_taxonomy"] is None:
                unclassified_observations.append( observation["id"] )
                observation["metadata"]["taxonomy"] = list()
            else:
                taxonomy_depth = len(observation["metadata"]["blast_taxonomy"].split(";"))
                observation["metadata"]["taxonomy"] = observation["metadata"]["blast_taxonomy"].split(";")
    # Add "Unclassified" ranks in unclassified observations
    if taxonomy_depth > 0:
        for observation_id in unclassified_observations:
            observation_metadata = biom.get_observation_metadata(observation_id)
            observation_metadata["taxonomy"] = ["Unclassified"] * taxonomy_depth
    BiomIO.write( out_biom, biom )

开发者ID:geraldinepascal，项目名称:FROGS，代码行数:33，代码来源:biom_to_stdBiom.py

示例3: remove_observations

def remove_observations( removed_observations, input_biom, output_biom ):
    """
    @summary: Removes the specified list of observations.
    @param removed_observations: [list] The names of the observations to remove.
    @param input_biom: [str] The path to the input BIOM.
    @param output_biom: [str] The path to the output BIOM.
    """
    biom = BiomIO.from_json( input_biom )
    biom.remove_observations( removed_observations )
    BiomIO.write( output_biom, biom )

开发者ID:geraldinepascal，项目名称:FROGS，代码行数:10，代码来源:filters.py

示例4: filter_biom

def filter_biom( removed_observations, in_biom, out_biom ):
    """
    @summary: Removed the specified observations from BIOM.
    @param removed_observations: [dict] Each key is an observation name.
    @param in_biom: [str]: Path to the processed BIOM file.
    @param out_biom: [str]: Path to the cleaned BIOM file.
    """
    biom = BiomIO.from_json(in_biom)
    biom.remove_observations(removed_observations)
    BiomIO.write(out_biom, biom)

开发者ID:sandrineperrin，项目名称:frogs-docker，代码行数:10，代码来源:removeConta.py

示例5: to_biom

def to_biom( clusters_file, count_file, output_biom, size_separator ):
    """
    @summary : Write a biom file from swarm results.
    @param clusters_file : [str] path to the '.clstr' file.
    @param count_file : [str] path to the count file. It contains the count of
                         sequences by sample of each preclusters.
                         Line format : "Precluster_id    nb_in_sampleA    nb_in_sampleB"
    @param output_biom : [str] path to the output file.
    @param size_separator : [str] the pre-cluster abundance separator.
    """
    biom = Biom( generated_by='swarm', matrix_type="sparse" )

    # Preclusters count by sample
    preclusters_count = dict()
    count_fh = open( count_file )
    samples = count_fh.readline().strip().split()[1:]
    for line in count_fh:
        precluster_id, count_str = line.strip().split(None, 1)
        preclusters_count[precluster_id] = count_str # For large dataset store count into a string consumes minus RAM than a sparse count
    count_fh.close()

    # Add samples
    for sample_name in samples:
        biom.add_sample( sample_name )

    # Process count
    cluster_idx = 1
    clusters_fh = open( clusters_file )
    for line in clusters_fh:
        cluster_name = "Cluster_" + str(cluster_idx)
        cluster_count = {key:0 for key in samples}
        line_fields = line.strip().split()
        # Retrieve count by sample
        for seq_id in line_fields:
            real_seq_id = seq_id.rsplit(size_separator, 1)[0]
            sample_counts = preclusters_count[real_seq_id].split()
            for sample_idx, sample_name in enumerate(samples):
                cluster_count[sample_name] += int(sample_counts[sample_idx])
            preclusters_count[real_seq_id] = None
        # Add cluster on biom
        biom.add_observation( cluster_name, {'seed_id':line_fields[0].rsplit(size_separator, 1)[0]} )
        observation_idx = biom.find_idx("observation", cluster_name)
        for sample_idx, sample_name in enumerate(samples):
            if cluster_count[sample_name] > 0:
                biom.data.change( observation_idx, sample_idx, cluster_count[sample_name] )
        # Next cluster
        cluster_idx += 1

    # Write
    BiomIO.write( output_biom, biom )

开发者ID:geraldinepascal，项目名称:FROGS，代码行数:50，代码来源:swarm2biom.py

示例6: excluded_obs_on_blastMetrics

def excluded_obs_on_blastMetrics( input_biom, tag, cmp_operator, threshold, excluded_file ):
    """
    @summary: Writes the list of the observations with no affiliations with sufficient blast value.
    @param input_biom: [str] The path to the BIOM file to check.
    @param tag: [str] The metadata checked.
    @param cmp_operator: [str] The operator use in comparison (tag_value ">=" thresold or tag_value "<=" thresold ).
    @param threshold: [float] The limit for the tag value.
    @param excluded_file: [str] The path to the output file.
    """
    valid_operators = {
        ">=": operator.__ge__,
        "<=": operator.__le__
    }
    cmp_func = valid_operators[cmp_operator]
    biom = BiomIO.from_json( input_biom )
    FH_excluded_file = open( excluded_file, "w" )
    for observation in biom.get_observations():
        alignments = observation["metadata"]["blast_affiliations"]
        is_discarded = True
        for current_alignment in alignments:
            if cmp_func(float(current_alignment[tag]), threshold):
                is_discarded = False
        if is_discarded:
            FH_excluded_file.write( str(observation["id"]) + "\n" )
    FH_excluded_file.close()

开发者ID:geraldinepascal，项目名称:FROGS，代码行数:25，代码来源:filters.py

示例7: init

    def __init__( self, out_tsv, in_biom, in_fasta=None ):
        """
        @param in_biom: [str] Path to BIOM file.
        @param out_tsv: [str] Path to output TSV file.
        """
        # Sequence file option
        sequence_file_opt = "" if in_fasta is None else " --input-fasta " + in_fasta

        # Check the metadata
        biom = BiomIO.from_json( in_biom )
        conversion_tags = ""
        if biom.has_observation_metadata( 'rdp_taxonomy' ) and biom.has_observation_metadata( 'rdp_bootstrap' ):
            conversion_tags += "'@rdp_tax_and_bootstrap' "
        if biom.has_observation_metadata( 'blast_taxonomy' ):
            conversion_tags += "'blast_taxonomy' "
        if biom.has_observation_metadata( 'blast_affiliations' ):
            conversion_tags += "'@blast_subject' "
            conversion_tags += "'@blast_perc_identity' "
            conversion_tags += "'@blast_perc_query_coverage' "
            conversion_tags += "'@blast_evalue' "
            conversion_tags += "'@blast_aln_length' "
        if biom.has_observation_metadata( 'seed_id' ):
            conversion_tags += "'seed_id' "
        if in_fasta is not None:
            conversion_tags += "'@seed_sequence' "
        conversion_tags += "'@observation_name' '@observation_sum' '@sample_count'"

        # Set command
        Cmd.__init__( self,
                      'biom2tsv.py',
                      'Converts a BIOM file in TSV file.',
                      "--input-file " + in_biom + sequence_file_opt + " --output-file " + out_tsv + " --fields " + conversion_tags,
                      '--version' )

开发者ID:geraldinepascal，项目名称:FROGS，代码行数:33，代码来源:biom_to_tsv.py

示例8: biom_fasta_to_tsv

def biom_fasta_to_tsv( input_biom, input_fasta, output_tsv, fields, list_separator ):
    """
    @summary: Convert BIOM file to TSV file with sequence.
    @param input_biom: [str] Path to the BIOM file.
    @param input_fasta: [str] Path to the sequences of the observations.
    @param output_tsv: [str] Path to the output file (format : TSV).
    @param fields: [list] Columns and their order in output. Special columns : '@observation_name', '@observation_sum', '@sample_count', '@rdp_tax_and_bootstrap', '@seed_sequence'. The others columns must be metadata title.
    @param list_separator: [str] Separator for complex metadata.
    """
    biom = BiomIO.from_json( input_biom )
    out_fh = open( output_tsv, "w" )
    sequence_idx = fields.index("@seed_sequence")
    # Header
    header_parts = header_line_parts( fields, biom )
    out_fh.write( "#" + "\t".join(header_parts) + "\n" )
    # Data
    fields_without_seq = fields
    del fields_without_seq[sequence_idx]
    FH_in = FastaIO( input_fasta )
    for record in FH_in:
        obs_idx = biom.find_idx("observation", record.id)
        count_by_sample = biom.data.get_row_array(obs_idx)
        observation_parts = observation_line_parts( biom.rows[obs_idx], count_by_sample, fields_without_seq, list_separator )
        observation_parts.insert( sequence_idx, record.string )
        out_fh.write( "\t".join(observation_parts) + "\n" )
    out_fh.close()

开发者ID:sandrineperrin，项目名称:frogs-docker，代码行数:26，代码来源:biom2tsv.py

示例9: get_checked

def get_checked( abund_file, checked_sample, taxonomy_key, expected_by_depth ):
    checked_by_depth = dict()
    biom = BiomIO.from_json(abund_file)
    for current_obs in biom.get_observations():
        clean_taxonomy = getCleanedTaxonomy(current_obs["metadata"][taxonomy_key])
        count = biom.get_count(current_obs["id"], checked_sample)
        if count > 0:
            if clean_taxonomy[len(clean_taxonomy)-1] == "Multi-affiliation":
                nb_selected = 0
                selected = list()
                taxonomies = list()
                expected_taxonomies = expected_by_depth[len(clean_taxonomy)-1]
                for affi_idx in range(len(current_obs["metadata"]["blast_affiliations"])):
                    affi_taxonomy = ";".join(getCleanedTaxonomy(current_obs["metadata"]["blast_affiliations"][affi_idx]["taxonomy"]))
                    if affi_taxonomy not in taxonomies:
                        taxonomies.append(affi_taxonomy)
                        if affi_taxonomy in expected_taxonomies:
                            selected = getCleanedTaxonomy(current_obs["metadata"]["blast_affiliations"][affi_idx]["taxonomy"])
                            nb_selected += 1
                if nb_selected == 1:
                    clean_taxonomy = selected
                else:
                    warnings.warn( "Multi-affiliation cannot be resolved for " + str((float(count)*100)/biom.get_total_count()) + "% sequences. Possible taxonomies: '" + "', '".join(taxonomies) + "'." )
            for rank_depth in range(len(clean_taxonomy)):
                rank_taxonomy = ";".join(clean_taxonomy[:rank_depth + 1])
                if rank_depth not in checked_by_depth:
                    checked_by_depth[rank_depth] = dict()
                if rank_taxonomy not in checked_by_depth[rank_depth]:
                    checked_by_depth[rank_depth][rank_taxonomy] = 0
                checked_by_depth[rank_depth][rank_taxonomy] += count
    return checked_by_depth

开发者ID:geraldinepascal，项目名称:FROGS，代码行数:31，代码来源:assessRealMock.py

示例10: write_log

def write_log(in_biom, out_biom, log):
    FH_log=open(log,"w")
    FH_log.write("#sample\tnb_otu_before\tnb_otu_after\n")
    initial_biom = BiomIO.from_json( in_biom )
    new_biom = BiomIO.from_json( out_biom )

    for sample_name in initial_biom.get_samples_names():
        nb_otu_before = len(initial_biom.get_sample_obs(sample_name))
        nb_otu_after = len(new_biom.get_sample_obs(sample_name))
        FH_log.write("Sample name: "+sample_name+"\n\tnb initials OTU: "+str(nb_otu_before)+"\n\tnb normalized OTU: "+str(nb_otu_after)+"\n")

    nb_initial_otu=len(initial_biom.rows)
    nb_new_otu=len(new_biom.rows)
    FH_log.write("Sample name: all samples\n\tnb initials OTU: "+str(nb_initial_otu)+"\n\tnb normalized OTU: "+str(nb_new_otu)+"\n")

    FH_log.close()

开发者ID:sandrineperrin，项目名称:frogs-docker，代码行数:16，代码来源:normalisation.py

示例11: getRealTaxByRefID

def getRealTaxByRefID( input_biom, taxonomy_key, duplication_groups ):
    """
    @summary: Return taxonomy by reference.
    @param input_biom: [str] Path to BIOM file.
    @param taxonomy_key: [str] The metadata key for taxonomy.
    @param duplication_groups: [dict] By reference ID the list of references with the same sequence.
    @return: [dict] List of taxonomies by reference ID.
             Example: 
               {
                 "MVF01000012.1.1317": [
                   ["Root", "Bacteria", "Proteobacteria", "Gammaproteobacteria", "Enterobacteriales", "Enterobacteriaceae", "Cronobacter", "Escherichia coli BIDMC 73"]
                 ],
                 "JQ607252.1.1437": [
                   ["Root", "Bacteria", "Firmicutes", "Bacilli", "Bacillales", "Staphylococcaceae", "Staphylococcus", "bacterium NLAE-zl-P471"],
                   ["Root", "Bacteria", "Firmicutes", "Bacilli", "Bacillales", "Staphylococcaceae", "Staphylococcus", "Staphylococcus aureus M17299"]
                 ] 
               }
    """
    taxonomy_by_obs_id = dict()
    tmp_taxonomy_by_obs_id = dict()
    biom = BiomIO.from_json( input_biom )
    for observation in biom.get_observations():
        taxonomy_clean = getCleanedTaxonomy(observation["metadata"][taxonomy_key])
        taxonomy_by_obs_id[observation["id"]] = [taxonomy_clean]
        tmp_taxonomy_by_obs_id[observation["id"]] = taxonomy_clean
    if duplication_groups is not None:
        for obs_id in duplication_groups:
            taxonomy_by_obs_id[obs_id] = list()
            for id_duplicated_seq in duplication_groups[obs_id]: # For each duplication group member
                taxonomy_by_obs_id[obs_id].append(tmp_taxonomy_by_obs_id[id_duplicated_seq])
    return taxonomy_by_obs_id

开发者ID:geraldinepascal，项目名称:FROGS，代码行数:31，代码来源:biomCmpTax.py

示例12: get_step_size

 def get_step_size(self, nb_step=35):
     """
     @summary: Returns the step size to obtain 'nb_step' steps or more in 3/4 of samples.
     @param nb_step: [int] The number of expected steps.
     @returns: [int] The step size.
     """
     counts = list()
     # Get the number of sequences by sample
     biom = BiomIO.from_json( self.in_biom )
     for sample_name in biom.get_samples_names():
         counts.append( biom.get_sample_count(sample_name) )
     del biom
     counts = sorted(counts)
     nb_samples = len(counts)
     # Finds the lower quartile number of sequences
     lower_quartile_idx = nb_samples/4
     nb_seq = counts[lower_quartile_idx]
     # If lower quartile sample is empty
     if nb_seq == 0:
         idx = 1
         while (lower_quartile_idx + idx) < nb_samples and counts[lower_quartile_idx + idx] == 0:
             idx += 1
         if (lower_quartile_idx + idx) < nb_samples:
             nb_seq = counts[lower_quartile_idx + idx]
     step_size = int(nb_seq/nb_step)
     return max(1, step_size)

开发者ID:geraldinepascal，项目名称:FROGS，代码行数:26，代码来源:affiliations_stat.py

示例13: observations_depth

def observations_depth( input_biom, output_depth ):
    """
    @summary : Write the depths of the observation in file.
    @param input_biom : [str] path to the biom file processed.
    @param output_depth : [str] path to the output file.
    @note : Example of one output file
                #Depth<TAB>Nb_Observ_concerned<TAB>Prct_Observ_concerned
                1<TAB>65<TAB>65.000
                2<TAB>30<TAB>30.000
                3<TAB>0<TAB>0.000
                4<TAB>5<TAB>5.000
    """
    obs_depth = list()
    nb_observ = 0
    # Process depth calculation
    biom = BiomIO.from_json( input_biom )
    for observation_id, observation_count in biom.get_observations_counts():
        while len(obs_depth) <= observation_count:
            obs_depth.append(0)
        obs_depth[observation_count] += 1
        if observation_count != 0:
            nb_observ += 1
    del biom
    # Write output
    out_fh = open( output_depth, 'w' )
    out_fh.write( "#Depth\tNb_Observ_concerned\tPrct_Observ_concerned\n" )
    for depth in range(1, len(obs_depth)):
        prct = (float(obs_depth[depth])/ nb_observ)*100
        out_fh.write( str(depth) + "\t" + str(obs_depth[depth]) + "\t" + ("%.3f" % prct) + "\n" )
    out_fh.close()

开发者ID:geraldinepascal，项目名称:FROGS，代码行数:30，代码来源:biomTools.py

示例14: get_retrieved_by_sample

def get_retrieved_by_sample( biom_file, reference_by_obs_id, references_by_sample, uniq_id, uniq_id_by_sample ):
    counts_by_sample = dict()
    biom = BiomIO.from_json( biom_file )
    for sample_name in biom.get_samples_names():
        nb_detected = 0
        retrieved = dict()
        expected_retrieved = dict()
        for obs in biom.get_observations_by_sample( sample_name ):
            nb_detected += 1
            if not "," in reference_by_obs_id[obs['id']]: # Is not a chimera
                ref_id = reference_by_obs_id[obs['id']]
                retrieved[ref_id] = 1
                if ref_id in references_by_sample[sample_name]:
                    expected_retrieved[ref_id] = 1
        # Uniq sequence for retrieved
        uniq_retrieved = set()
        for ref_id in retrieved:
            uniq_retrieved.add( uniq_id[ref_id] )
        # Uniq sequence for retrieved
        uniq_expected_retrieved = set()
        for ref_id in expected_retrieved:
            uniq_expected_retrieved.add( uniq_id_by_sample[sample_name][ref_id] )
        # Results
        counts_by_sample[sample_name] = {
            "detected": nb_detected,
            "retrieved": len(uniq_retrieved),
            "expected_retrieved": len(uniq_expected_retrieved)
        }
    return counts_by_sample

开发者ID:geraldinepascal，项目名称:FROGS，代码行数:29，代码来源:clustersAssessment.py

示例15: aff_to_metadata

def aff_to_metadata(reference_file, biom_in, biom_out, blast_files=None, rdp_files=None):
    """
    @summary: Add taxonomy metadata on biom file from a blast result.
    @param reference_file: [str] The path to the reference file.
    @param biom_in: [str] The path to the Biom file to process.
    @param biom_out: [str] The path to the biom output file.
    @param blast_files: [list] the list of the path to the blast results in tabular format (outfmt 6 with NCBI Blast+).
    @param rdp_files: [list] the list of path to the RDPClassifier results.
    """
    # Build an hash with the taxonomy for each gene (key=gene_id ; value=gene_taxonomy)
    taxonomy_by_reference = get_tax_from_fasta( reference_file )

    # Retrieve blast clusters annotations
    cluster_blast_annot = dict()
    if blast_files is not None:
        cluster_blast_annot = get_bests_blast_affi( blast_files, taxonomy_by_reference )
    del taxonomy_by_reference

    # Retrieve rdp clusters annotations
    cluster_rdp_annot = dict()
    if rdp_files is not None:
        cluster_rdp_annot = get_rdp_affi( rdp_files )

    # Add metadata to biom
    biom = BiomIO.from_json(biom_in)
    for cluster in biom.get_observations():
        cluster_id = cluster["id"]
        # Blast
        if blast_files is not None:
            blast_taxonomy = None
            blast_affiliations = list()
            if cluster_blast_annot.has_key(cluster_id): # Current observation has a match
                blast_taxonomy = get_tax_consensus( [alignment['taxonomy'] for alignment in cluster_blast_annot[cluster_id]['alignments']] )
                blast_affiliations = cluster_blast_annot[cluster_id]['alignments']
            biom.add_metadata( cluster_id, "blast_affiliations", blast_affiliations, "observation" )
            biom.add_metadata( cluster_id, "blast_taxonomy", blast_taxonomy, "observation" )
        # RDP
        if rdp_files is not None:
            rdp_taxonomy = None
            rdp_bootstrap = None
            if cluster_rdp_annot.has_key(cluster_id):
                rdp_taxonomy = cluster_rdp_annot[cluster_id]['taxonomy']
                rdp_bootstrap = cluster_rdp_annot[cluster_id]['bootstrap']
            biom.add_metadata(cluster_id, "rdp_taxonomy", rdp_taxonomy, "observation")
            biom.add_metadata(cluster_id, "rdp_bootstrap", rdp_bootstrap, "observation")
    BiomIO.write(biom_out, biom)

开发者ID:sandrineperrin，项目名称:frogs-docker，代码行数:46，代码来源:addAffiliation2biom.py

注：本文中的frogsBiom.BiomIO类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。