當前位置: 首頁>>代碼示例>>Python>>正文


Python BiomIO.write方法代碼示例

本文整理匯總了Python中frogsBiom.BiomIO.write方法的典型用法代碼示例。如果您正苦於以下問題:Python BiomIO.write方法的具體用法?Python BiomIO.write怎麽用?Python BiomIO.write使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在frogsBiom.BiomIO的用法示例。


在下文中一共展示了BiomIO.write方法的12個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: sampling_by_sample

# 需要導入模塊: from frogsBiom import BiomIO [as 別名]
# 或者: from frogsBiom.BiomIO import write [as 別名]
def sampling_by_sample( input_biom, output_biom, nb_sampled=None, sampled_ratio=None ):
    """
    @summary: Writes a BIOM after a random sampling in each sample.
    @param input_biom: [str] Path to the processed BIOM.
    @param output_biom: [str] Path to outputed BIOM.
    @param nb_sampled: [int] Number of sampled sequences by sample.
    @param sampled_ratio: [float] Ratio of sampled sequences by sample.
    @note: nb_sampled and sampled_ratio are mutually exclusive.
    """
    initial_biom = BiomIO.from_json( input_biom )
    new_biom = Biom(
                    matrix_type="sparse",
                    generated_by="Sampling " + (str(nb_sampled) if nb_sampled is not None else str(sampled_ratio) + "%" ) + " elements by sample from " + input_biom
    )
    observations_already_added = dict()
    for sample_name in initial_biom.get_samples_names():
        new_biom.add_sample( sample_name, initial_biom.get_sample_metadata(sample_name) )
        sample_seq = initial_biom.get_sample_count(sample_name)
        sample_nb_sampled = nb_sampled
        if nb_sampled is None:
            sample_nb_sampled = int(sample_seq * sampled_ratio)
        if sample_seq < nb_sampled:
            raise Exception( str(sample_nb_sampled) + " sequences cannot be sampled in sample '" + str(sample_name) + "'. It only contains " + str(sample_seq) + " sequences." )
        else:
            for current_nb_iter in range(sample_nb_sampled):
                # Take an observation in initial BIOM
                selected_observation = initial_biom.random_obs_by_sample(sample_name)
                selected_observation_id = selected_observation['id']
                initial_biom.subtract_count( selected_observation_id, sample_name, 1 )
                # Put in new BIOM
                if not observations_already_added.has_key(selected_observation_id):
                    new_biom.add_observation( selected_observation_id, initial_biom.get_observation_metadata(selected_observation_id) )
                    observations_already_added[selected_observation_id] = True
                new_biom.add_count( selected_observation_id, sample_name, 1 )
    BiomIO.write( output_biom, new_biom )
開發者ID:geraldinepascal,項目名稱:FROGS,代碼行數:37,代碼來源:biomTools.py

示例2: process

# 需要導入模塊: from frogsBiom import BiomIO [as 別名]
# 或者: from frogsBiom.BiomIO import write [as 別名]
def process( in_biom, out_biom, out_metadata ):
    ordered_blast_keys = ["taxonomy", "subject", "evalue", "perc_identity", "perc_query_coverage", "aln_length"] # Keys in blast_affiliations metadata
    taxonomy_depth = 0
    unclassified_observations = list()

    FH_metadata = open( out_metadata, "w" )
    FH_metadata.write( "#OTUID\t" + "\t".join([item for item in ordered_blast_keys]) + "\n" )
    biom = BiomIO.from_json( in_biom )
    for observation in biom.get_observations():
        for metadata_key in observation["metadata"].keys():
            if metadata_key == "blast_affiliations": # Extract blast_affiliations metadata in metadata_file
                if observation["metadata"][metadata_key] is not None:
                    for current_affi in observation["metadata"][metadata_key]:
                        if isinstance(current_affi["taxonomy"], list) or isinstance(current_affi["taxonomy"], tuple):
                            current_affi["taxonomy"] = ";".join( current_affi["taxonomy"] )
                        FH_metadata.write( observation["id"] + "\t" + "\t".join([str(current_affi[item]) for item in ordered_blast_keys]) + "\n" )
                del observation["metadata"][metadata_key]
            elif observation["metadata"][metadata_key] is not None: # All list are transformed in string
                if isinstance(observation["metadata"][metadata_key], list) or isinstance(observation["metadata"][metadata_key], tuple):
                    observation["metadata"][metadata_key] = ";".join( map(str, observation["metadata"][metadata_key]) )
        if observation["metadata"].has_key( "blast_taxonomy" ):
            if observation["metadata"]["blast_taxonomy"] is None:
                unclassified_observations.append( observation["id"] )
                observation["metadata"]["taxonomy"] = list()
            else:
                taxonomy_depth = len(observation["metadata"]["blast_taxonomy"].split(";"))
                observation["metadata"]["taxonomy"] = observation["metadata"]["blast_taxonomy"].split(";")
    # Add "Unclassified" ranks in unclassified observations
    if taxonomy_depth > 0:
        for observation_id in unclassified_observations:
            observation_metadata = biom.get_observation_metadata(observation_id)
            observation_metadata["taxonomy"] = ["Unclassified"] * taxonomy_depth
    BiomIO.write( out_biom, biom )
開發者ID:geraldinepascal,項目名稱:FROGS,代碼行數:35,代碼來源:biom_to_stdBiom.py

示例3: filter_biom

# 需要導入模塊: from frogsBiom import BiomIO [as 別名]
# 或者: from frogsBiom.BiomIO import write [as 別名]
def filter_biom( removed_observations, in_biom, out_biom ):
    """
    @summary: Removed the specified observations from BIOM.
    @param removed_observations: [dict] Each key is an observation name.
    @param in_biom: [str]: Path to the processed BIOM file.
    @param out_biom: [str]: Path to the cleaned BIOM file.
    """
    biom = BiomIO.from_json(in_biom)
    biom.remove_observations(removed_observations)
    BiomIO.write(out_biom, biom)
開發者ID:sandrineperrin,項目名稱:frogs-docker,代碼行數:12,代碼來源:removeConta.py

示例4: remove_observations

# 需要導入模塊: from frogsBiom import BiomIO [as 別名]
# 或者: from frogsBiom.BiomIO import write [as 別名]
def remove_observations( removed_observations, input_biom, output_biom ):
    """
    @summary: Removes the specified list of observations.
    @param removed_observations: [list] The names of the observations to remove.
    @param input_biom: [str] The path to the input BIOM.
    @param output_biom: [str] The path to the output BIOM.
    """
    biom = BiomIO.from_json( input_biom )
    biom.remove_observations( removed_observations )
    BiomIO.write( output_biom, biom )
開發者ID:geraldinepascal,項目名稱:FROGS,代碼行數:12,代碼來源:filters.py

示例5: to_biom

# 需要導入模塊: from frogsBiom import BiomIO [as 別名]
# 或者: from frogsBiom.BiomIO import write [as 別名]
def to_biom( clusters_file, count_file, output_biom, size_separator ):
    """
    @summary : Write a biom file from swarm results.
    @param clusters_file : [str] path to the '.clstr' file.
    @param count_file : [str] path to the count file. It contains the count of
                         sequences by sample of each preclusters.
                         Line format : "Precluster_id    nb_in_sampleA    nb_in_sampleB"
    @param output_biom : [str] path to the output file.
    @param size_separator : [str] the pre-cluster abundance separator.
    """
    biom = Biom( generated_by='swarm', matrix_type="sparse" )

    # Preclusters count by sample
    preclusters_count = dict()
    count_fh = open( count_file )
    samples = count_fh.readline().strip().split()[1:]
    for line in count_fh:
        precluster_id, count_str = line.strip().split(None, 1)
        preclusters_count[precluster_id] = count_str # For large dataset store count into a string consumes minus RAM than a sparse count
    count_fh.close()

    # Add samples
    for sample_name in samples:
        biom.add_sample( sample_name )

    # Process count
    cluster_idx = 1
    clusters_fh = open( clusters_file )
    for line in clusters_fh:
        cluster_name = "Cluster_" + str(cluster_idx)
        cluster_count = {key:0 for key in samples}
        line_fields = line.strip().split()
        # Retrieve count by sample
        for seq_id in line_fields:
            real_seq_id = seq_id.rsplit(size_separator, 1)[0]
            sample_counts = preclusters_count[real_seq_id].split()
            for sample_idx, sample_name in enumerate(samples):
                cluster_count[sample_name] += int(sample_counts[sample_idx])
            preclusters_count[real_seq_id] = None
        # Add cluster on biom
        biom.add_observation( cluster_name, {'seed_id':line_fields[0].rsplit(size_separator, 1)[0]} )
        observation_idx = biom.find_idx("observation", cluster_name)
        for sample_idx, sample_name in enumerate(samples):
            if cluster_count[sample_name] > 0:
                biom.data.change( observation_idx, sample_idx, cluster_count[sample_name] )
        # Next cluster
        cluster_idx += 1

    # Write
    BiomIO.write( output_biom, biom )
開發者ID:geraldinepascal,項目名稱:FROGS,代碼行數:52,代碼來源:swarm2biom.py

示例6: aff_to_metadata

# 需要導入模塊: from frogsBiom import BiomIO [as 別名]
# 或者: from frogsBiom.BiomIO import write [as 別名]
def aff_to_metadata(reference_file, biom_in, biom_out, blast_files=None, rdp_files=None):
    """
    @summary: Add taxonomy metadata on biom file from a blast result.
    @param reference_file: [str] The path to the reference file.
    @param biom_in: [str] The path to the Biom file to process.
    @param biom_out: [str] The path to the biom output file.
    @param blast_files: [list] the list of the path to the blast results in tabular format (outfmt 6 with NCBI Blast+).
    @param rdp_files: [list] the list of path to the RDPClassifier results.
    """
    # Build an hash with the taxonomy for each gene (key=gene_id ; value=gene_taxonomy)
    taxonomy_by_reference = get_tax_from_fasta( reference_file )

    # Retrieve blast clusters annotations
    cluster_blast_annot = dict()
    if blast_files is not None:
        cluster_blast_annot = get_bests_blast_affi( blast_files, taxonomy_by_reference )
    del taxonomy_by_reference

    # Retrieve rdp clusters annotations
    cluster_rdp_annot = dict()
    if rdp_files is not None:
        cluster_rdp_annot = get_rdp_affi( rdp_files )

    # Add metadata to biom
    biom = BiomIO.from_json(biom_in)
    for cluster in biom.get_observations():
        cluster_id = cluster["id"]
        # Blast
        if blast_files is not None:
            blast_taxonomy = None
            blast_affiliations = list()
            if cluster_blast_annot.has_key(cluster_id): # Current observation has a match
                blast_taxonomy = get_tax_consensus( [alignment['taxonomy'] for alignment in cluster_blast_annot[cluster_id]['alignments']] )
                blast_affiliations = cluster_blast_annot[cluster_id]['alignments']
            biom.add_metadata( cluster_id, "blast_affiliations", blast_affiliations, "observation" )
            biom.add_metadata( cluster_id, "blast_taxonomy", blast_taxonomy, "observation" )
        # RDP
        if rdp_files is not None:
            rdp_taxonomy = None
            rdp_bootstrap = None
            if cluster_rdp_annot.has_key(cluster_id):
                rdp_taxonomy = cluster_rdp_annot[cluster_id]['taxonomy']
                rdp_bootstrap = cluster_rdp_annot[cluster_id]['bootstrap']
            biom.add_metadata(cluster_id, "rdp_taxonomy", rdp_taxonomy, "observation")
            biom.add_metadata(cluster_id, "rdp_bootstrap", rdp_bootstrap, "observation")
    BiomIO.write(biom_out, biom)
開發者ID:sandrineperrin,項目名稱:frogs-docker,代碼行數:48,代碼來源:addAffiliation2biom.py

示例7: process

# 需要導入模塊: from frogsBiom import BiomIO [as 別名]
# 或者: from frogsBiom.BiomIO import write [as 別名]
def process( args ):
    tmp_files = TmpFiles( os.path.split(args.output_file)[0] )

    try:
        # Add temp taxonomy if multiple and without consensus
        tmp_biom = args.input_biom
        used_taxonomy_tag = args.taxonomy_tag
        if args.multiple_tag is not None:
            used_taxonomy_tag = args.tax_consensus_tag
            if args.tax_consensus_tag is None:
                used_taxonomy_tag = "Used_taxonomy_FROGS-affi"
                tmp_biom = tmp_files.add( "tax.biom" )
                biom = BiomIO.from_json( args.input_biom )
                for observation in biom.get_observations():
                    metadata = observation["metadata"]
                    if len(metadata[args.multiple_tag]) > 0:
                        metadata[used_taxonomy_tag] = metadata[args.multiple_tag][0][args.taxonomy_tag]
                BiomIO.write( tmp_biom, biom )
                del biom

        # Rarefaction
        tax_depth = [args.taxonomic_ranks.index(rank) for rank in args.rarefaction_ranks]
        rarefaction_cmd = Rarefaction(tmp_biom, tmp_files, used_taxonomy_tag, tax_depth)
        rarefaction_cmd.submit( args.log_file )
        rarefaction_files = rarefaction_cmd.output_files

        # Taxonomy tree
        tree_count_file = tmp_files.add( "taxCount.enewick" )
        tree_ids_file = tmp_files.add( "taxCount_ids.tsv" )
        TaxonomyTree(tmp_biom, used_taxonomy_tag, tree_count_file, tree_ids_file).submit( args.log_file )

        # Writes summary
        write_summary( args.output_file, args.input_biom, tree_count_file, tree_ids_file, rarefaction_files, args )
    finally:
        if not args.debug:
            tmp_files.deleteAll()
開發者ID:geraldinepascal,項目名稱:FROGS,代碼行數:38,代碼來源:affiliations_stat.py

示例8: FastaIO

# 需要導入模塊: from frogsBiom import BiomIO [as 別名]
# 或者: from frogsBiom.BiomIO import write [as 別名]
    cmd_grinder2biom = os.path.join(os.path.dirname(os.path.abspath(__file__)), "grinder2biom.py") + \
        " --affiliation " + os.path.abspath(args.databank) + \
        " --output " + real_biom + \
        " --samples"
    for current_sample in samples:
        cmd_grinder2biom += " '" + current_sample['name'] + ":" + current_sample['path'] + "'"
    subprocess.check_call( cmd_grinder2biom, shell=True )

    # Add reference id in checked BIOM
    biom = BiomIO.from_json( args.checked_biom )
    fasta = FastaIO( args.checked_fasta )
    for record in fasta:
        reference = re.search("reference=([^\s]+)", record.description).group(1)
        biom.add_metadata( record.id, "grinder_source", reference, "observation" )
    fasta.close()
    BiomIO.write( checked_biom, biom )
    del(biom)

    # Compare expected to obtained
    for current_sample in samples:
        print current_sample['name']
        cmd_compareSample = os.path.join(os.path.dirname(os.path.abspath(__file__)), "biomCmpTax.py") \
            + " --real-biom " + os.path.abspath(real_biom) \
            + " --real-tax-key 'real_taxonomy'" \
            + " --checked-biom " + os.path.abspath(checked_biom) \
            + " --checked-tax-key '" + args.taxonomy_key + "'" \
            + (" --multi-affiliations" if args.multi_affiliations else "") \
            + (" --uniq-groups " + args.uniq_groups if args.uniq_groups is not None else "") \
            + " --sample " + current_sample['name']
        print subprocess.check_output( cmd_compareSample, shell=True )
        print ""
開發者ID:geraldinepascal,項目名稱:FROGS,代碼行數:33,代碼來源:affiliationsAssessment.py

示例9: remove_chimera_biom

# 需要導入模塊: from frogsBiom import BiomIO [as 別名]
# 或者: from frogsBiom.BiomIO import write [as 別名]
def remove_chimera_biom( samples, in_biom_file, out_biom_file, lenient_filter, global_report, bySample_report ):
    """
    @summary: Removes the chimera observation from BIOM.
    @param samples: [dict] The chimera observations by sample. Example for
                    sample splA: sample['splA']['chimera_path'] where the value
                    is the path to the file containing the list of the chimera
                    observations names.
    @param in_biom_file: [str] The path to the BIOM file to filter.
    @param out_biom_file: [str] The path to the BIOM after filter.
    @param lenient_filter: [bool] True: removes one sequence in all samples
                           only if it is detected as chimera in all samples
                           where it is present. With False removes one
                           sequence in all samples if it is detected as chimera
                           in at least one sample.
    @param global_report: [dict] This dictionary is update with the global
                          number of removed observations, the global removed
                          abundance, ...
    @param bySample_report: [dict] This dictionary is update for add by sample the
                            number of removed observations, the removed
                            abundance, ...
    """
    nb_sample_by_chimera = dict()

    # Init bySample_report
    for sample_name in samples.keys():
        bySample_report[sample_name] = {
            'nb_kept': 0,
            'kept_abundance': 0,
            'nb_removed': 0,
            'removed_abundance': 0,
            'removed_max_abundance': 0
        }

    # Retrieve chimera
    for sample_name in samples.keys():
        chimera_fh = open( samples[sample_name]['chimera_path'] )
        for line in chimera_fh:
            observation_name = line.strip()
            if not nb_sample_by_chimera.has_key(observation_name):
                nb_sample_by_chimera[observation_name] = 0
            nb_sample_by_chimera[observation_name] += 1
        chimera_fh.close()

    # Remove chimera
    removed_chimera = list()
    biom = BiomIO.from_json(in_biom_file)
    for chimera_name in nb_sample_by_chimera.keys():
        is_always_chimera = True
        nb_sample_with_obs = sum( 1 for sample in biom.get_samples_by_observation(chimera_name) )
        observation_abundance = biom.get_observation_count(chimera_name)
        if nb_sample_with_obs != nb_sample_by_chimera[chimera_name]:
            is_always_chimera = False
            global_report['nb_ambiguous'] += 1
            global_report['abundance_ambiguous'] += observation_abundance
            print "'" + chimera_name + "' is not interpreted as chimera in all samples where it is present."
        if not lenient_filter or is_always_chimera:
            removed_chimera.append(chimera_name)
            # Global metrics
            global_report['nb_removed'] += 1
            global_report['abundance_removed'] += observation_abundance
            # By sample metrics
            for sample in biom.get_samples_by_observation(chimera_name):
                sample_count = biom.get_count(chimera_name, sample['id'])
                bySample_report[sample['id']]['nb_removed'] += 1
                bySample_report[sample['id']]['removed_abundance'] += sample_count
                bySample_report[sample['id']]['removed_max_abundance'] = max(bySample_report[sample['id']]['removed_max_abundance'], sample_count)
    biom.remove_observations(removed_chimera)

    # Nb non-chimera
    for observation_name in biom.get_observations_names():
        global_report['nb_kept'] += 1
        global_report['abundance_kept'] += biom.get_observation_count(observation_name)
        # By sample metrics
        for sample in biom.get_samples_by_observation(observation_name):
            sample_count = biom.get_count(observation_name, sample['id'])
            bySample_report[sample['id']]['nb_kept'] += 1
            bySample_report[sample['id']]['kept_abundance'] += sample_count
    BiomIO.write(out_biom_file, biom)
開發者ID:sandrineperrin,項目名稱:frogs-docker,代碼行數:80,代碼來源:parallelChimera.py

示例10: Biom

# 需要導入模塊: from frogsBiom import BiomIO [as 別名]
# 或者: from frogsBiom.BiomIO import write [as 別名]
    biom = Biom( generated_by="grinder", matrix_type="sparse" )

    # Set observations count
    for sample_name in args.samples:
        biom.add_sample( sample_name )
        fh_abund = open( args.samples[sample_name] )
        for line in fh_abund: # Content format: "# rank<TAB>seq_id<TAB>rel_abund_perc"
            if not line.startswith('#'):
                fields = line.strip().split()
                try:
                    biom.add_observation( fields[1] )
                except: # already exist
                    pass
                biom.change_count( fields[1], sample_name, int(float(fields[2])*100000000000000) )################## depend de la precision grinder
        fh_abund.close()

    # Set taxonomy metadata
    fh_classif = FastaIO( args.affiliation )
    for record in fh_classif:
        try:
            metadata = biom.get_observation_metadata( record.id )
            if metadata is None or not metadata.has_key( taxonomy_key ):
                taxonomy = getCleanedTaxonomy(record.description)
                biom.add_metadata( record.id, taxonomy_key, taxonomy, "observation" )
        except ValueError: # is not in BIOM
            pass
    fh_classif.close()

    # Write BIOM
    BiomIO.write( args.output, biom )
開發者ID:geraldinepascal,項目名稱:FROGS,代碼行數:32,代碼來源:grinder2biom.py

示例11: tsv_to_biom

# 需要導入模塊: from frogsBiom import BiomIO [as 別名]
# 或者: from frogsBiom.BiomIO import write [as 別名]
def tsv_to_biom( input_tsv, multi_hit_dict, fields, samples_names, output_biom, output_fasta ):
    """
    @summary: Convert TSV file to Biom file.
    @param input_tsv: [str] Path to the TSV file.
    @param multi_hit_dict: [dict] Dictionnary describing equivalent multi blast hit : 
    dict[observation_name]=[ {"blast_taxonomy":taxonomy, "blast_subject":subject, "blast_perc_identity": per_id, "blast_perc_query_coverage":per_cov, "blast_evalue":eval, "blast_aln_length":aln}]
    @param fields: [list] column name to include as metadata (must at least contain observation_name): observation_sum and seed_sequence will be excluded, rdp_tax_and_bootstrap will be split in two metadata
    @param samples_names: [list] list of sample names.
    @param output_biom: [str] Path to the output file (format : BIOM).
    @param output_fasta: [str] Path to the output file (format : fasta).
    """
#     biom = Biom( generated_by='frogs', matrix_type="sparse" )
    biom = Biom( matrix_type="sparse" )

    seed_seq_idx = -1 
    metadata_index = dict()
    sample_index = dict()
    clusters_count = dict()
    clusters_metadata = dict()
    in_fh = open( input_tsv )

    if not output_fasta is None:
        Fasta_fh=FastaIO(output_fasta , "w" )

    # parse header and store column index 
    header=in_fh.readline()
    if header.startswith("#"):
        header=header[1:]
    header = header.strip()
    seed_seq_idx, metadata_index, sample_index = header_line_dict(fields,header,samples_names)
    if not output_fasta is None and seed_seq_idx == -1:
        raise Exception("\nYou want to extract seed fasta sequence but there is no seed_sequence column in your TSV file\n\n")

    # count by sample, and metadata
    for line in in_fh:

        cluster_name=""
        line_list=line.strip().split("\t")
        count_by_sample = {}
        metadata_dict = {}
        # parse columns
        for idx,val in enumerate(line_list):
            # recover metadata
            if idx in metadata_index:
                if metadata_index[idx]=="observation_name" :
                    cluster_name = val
                else:
                    metadata_dict[metadata_index[idx]] = val
            # recover samples count
            elif idx in sample_index and val > 0:
                count_by_sample[sample_index[idx]] = int(val)
            # recover seed sequence
            elif idx == seed_seq_idx:
                seed_seq = val

        # if fasta output file => store de seed sequence
        if not output_fasta is None:
            seq = Sequence( cluster_name, seed_seq) 
            Fasta_fh.write(seq)

        if "taxonomy" in metadata_dict:
            metadata_dict["taxonomy"] = metadata_dict["taxonomy"].split(";")

        # format rdp taxonomy to fit BIOM format
        if "rdp_tax_and_bootstrap" in metadata_dict:
            metadata_dict["rdp_taxonomy"]=[]
            metadata_dict["rdp_bootstrap"]=[]
            tax = metadata_dict["rdp_tax_and_bootstrap"].rstrip(";").split(";")
            for i in range(0,len(tax),2):
                metadata_dict["rdp_taxonomy"].append(tax[i])
                metadata_dict["rdp_bootstrap"].append(tax[i+1].replace("(","").replace(")",""))
            metadata_dict.pop("rdp_tax_and_bootstrap")

        # format blast taxonomy to fit BIOM format (one consensus blast_taxonomy and possible multiples blast_affiliation detailed
        if "blast_taxonomy" in metadata_dict:
            metadata_dict["blast_taxonomy"] = metadata_dict["blast_taxonomy"].split(";")

            # check multihit blast : filter non consistent taxonomy hit with blast_taxonomy (if TSV modified), and compute consensus tax (if multihit line suppressed)
            if metadata_dict["blast_subject"] == "multi-subject" and not multi_hit_dict is None:
                if not cluster_name in multi_hit_dict:
                    raise Exception("\n"+cluster_name+" has multi-subject tag but is not present in your multi-hit TSV file. Please, provide the original multi-hit TSV file.\n\n")
                else:
                    metadata_dict["blast_taxonomy"], metadata_dict["blast_affiliations"] = observation_blast_parts(metadata_dict, multi_hit_dict[cluster_name])
                    if metadata_dict["blast_affiliations"] == []:
                        raise Exception("\nyour multihit TSV file is no more consistent with your abundance TSV file for (at least) "+cluster_name+"\n\n")
            # no multi tag= blast affiliation is equal to blast_taxonomy
            else:
                blast_dict={key.replace("blast_",""):metadata_dict[key] for key in metadata_dict if key.startswith("blast")}
                metadata_dict["blast_affiliations"]=[blast_dict]

            # filter blast metadata which are moved to blast_affiliations
            for metadata in metadata_dict["blast_affiliations"][0]:
                if not metadata == "taxonomy":
                    metadata_dict.pop("blast_"+metadata)

        # add cluster and count to clusters_count dict
        clusters_count[cluster_name] = count_by_sample
        # ok print clusters_count[cluster_name].keys(), "CDT0#LOT05" in clusters_count[cluster_name], "CDT0#LOT02" in clusters_count[cluster_name]
        # add cluster and metadata to clusters_metadata dict
        clusters_metadata[cluster_name] = metadata_dict
#.........這裏部分代碼省略.........
開發者ID:sandrineperrin,項目名稱:frogs-docker,代碼行數:103,代碼來源:tsv2biom.py

示例12: UTAX

# 需要導入模塊: from frogsBiom import BiomIO [as 別名]
# 或者: from frogsBiom.BiomIO import write [as 別名]
#
##################################################################################################################################################
if __name__ == "__main__":
    # Manage parameters
    parser = argparse.ArgumentParser(description="Add taxonomy from UTAX result in BIOM file.")
    parser.add_argument( '-t', '--taxonomy-tag', default="taxonomy", help="The taxonomy tag in BIOM file. [Default: taxonomy]")
    parser.add_argument( '-v', '--version', action='version', version=__version__)
    # Inputs
    group_input = parser.add_argument_group('Inputs')
    group_input.add_argument('-f', '--input-fasta', required=True, help='Path to the sequence file outputed by UTAX (format: fasta).')
    group_input.add_argument('-b', '--input-biom', required=True, help='Path to the abundance file (format: BIOM).')
    # Outputs
    group_output = parser.add_argument_group('Outputs')
    group_output.add_argument('-o', '--output-biom', required=True, help='Path to the abundance file with taxonomy (format: BIOM).')
    args = parser.parse_args()

    # Process
    biom = BiomIO.from_json( args.input_biom )
    fasta = FastaIO( args.input_fasta )
    for record in fasta:
        # record.id example: Cluster_1;size=19714;tax=d:Bacteria(1.0000),p:"Proteobacteria"(0.9997),c:Alphaproteobacteria(0.9903),o:Rhodospirillales(0.9940),f:Acetobacteraceae(0.9887),g:Humitalea(0.9724);
        match = re.search("^([^\;]+)\;size\=\d+\;tax=(.+)$", record.id)
        if match is None:
            fasta.close()
            raise Exception("ID and taxonomy cannot be retrieved from '" + record.id + "'")
        record.id = match.group(1)
        record.description = match.group(2)
        biom.add_metadata( record.id, args.taxonomy_tag, record.description, "observation" )
    fasta.close()
    BiomIO.write( args.output_biom, biom )
開發者ID:geraldinepascal,項目名稱:FROGS,代碼行數:32,代碼來源:addUtaxFromFasta.py


注:本文中的frogsBiom.BiomIO.write方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。