当前位置: 首页>>代码示例>>Python>>正文


Python BiomIO.write方法代码示例

本文整理汇总了Python中frogsBiom.BiomIO.write方法的典型用法代码示例。如果您正苦于以下问题:Python BiomIO.write方法的具体用法?Python BiomIO.write怎么用?Python BiomIO.write使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在frogsBiom.BiomIO的用法示例。


在下文中一共展示了BiomIO.write方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: sampling_by_sample

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def sampling_by_sample( input_biom, output_biom, nb_sampled=None, sampled_ratio=None ):
    """
    @summary: Writes a BIOM after a random sampling in each sample.
    @param input_biom: [str] Path to the processed BIOM.
    @param output_biom: [str] Path to outputed BIOM.
    @param nb_sampled: [int] Number of sampled sequences by sample.
    @param sampled_ratio: [float] Ratio of sampled sequences by sample.
    @note: nb_sampled and sampled_ratio are mutually exclusive.
    """
    initial_biom = BiomIO.from_json( input_biom )
    new_biom = Biom(
                    matrix_type="sparse",
                    generated_by="Sampling " + (str(nb_sampled) if nb_sampled is not None else str(sampled_ratio) + "%" ) + " elements by sample from " + input_biom
    )
    observations_already_added = dict()
    for sample_name in initial_biom.get_samples_names():
        new_biom.add_sample( sample_name, initial_biom.get_sample_metadata(sample_name) )
        sample_seq = initial_biom.get_sample_count(sample_name)
        sample_nb_sampled = nb_sampled
        if nb_sampled is None:
            sample_nb_sampled = int(sample_seq * sampled_ratio)
        if sample_seq < nb_sampled:
            raise Exception( str(sample_nb_sampled) + " sequences cannot be sampled in sample '" + str(sample_name) + "'. It only contains " + str(sample_seq) + " sequences." )
        else:
            for current_nb_iter in range(sample_nb_sampled):
                # Take an observation in initial BIOM
                selected_observation = initial_biom.random_obs_by_sample(sample_name)
                selected_observation_id = selected_observation['id']
                initial_biom.subtract_count( selected_observation_id, sample_name, 1 )
                # Put in new BIOM
                if not observations_already_added.has_key(selected_observation_id):
                    new_biom.add_observation( selected_observation_id, initial_biom.get_observation_metadata(selected_observation_id) )
                    observations_already_added[selected_observation_id] = True
                new_biom.add_count( selected_observation_id, sample_name, 1 )
    BiomIO.write( output_biom, new_biom )
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:37,代码来源:biomTools.py

示例2: process

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def process( in_biom, out_biom, out_metadata ):
    ordered_blast_keys = ["taxonomy", "subject", "evalue", "perc_identity", "perc_query_coverage", "aln_length"] # Keys in blast_affiliations metadata
    taxonomy_depth = 0
    unclassified_observations = list()

    FH_metadata = open( out_metadata, "w" )
    FH_metadata.write( "#OTUID\t" + "\t".join([item for item in ordered_blast_keys]) + "\n" )
    biom = BiomIO.from_json( in_biom )
    for observation in biom.get_observations():
        for metadata_key in observation["metadata"].keys():
            if metadata_key == "blast_affiliations": # Extract blast_affiliations metadata in metadata_file
                if observation["metadata"][metadata_key] is not None:
                    for current_affi in observation["metadata"][metadata_key]:
                        if isinstance(current_affi["taxonomy"], list) or isinstance(current_affi["taxonomy"], tuple):
                            current_affi["taxonomy"] = ";".join( current_affi["taxonomy"] )
                        FH_metadata.write( observation["id"] + "\t" + "\t".join([str(current_affi[item]) for item in ordered_blast_keys]) + "\n" )
                del observation["metadata"][metadata_key]
            elif observation["metadata"][metadata_key] is not None: # All list are transformed in string
                if isinstance(observation["metadata"][metadata_key], list) or isinstance(observation["metadata"][metadata_key], tuple):
                    observation["metadata"][metadata_key] = ";".join( map(str, observation["metadata"][metadata_key]) )
        if observation["metadata"].has_key( "blast_taxonomy" ):
            if observation["metadata"]["blast_taxonomy"] is None:
                unclassified_observations.append( observation["id"] )
                observation["metadata"]["taxonomy"] = list()
            else:
                taxonomy_depth = len(observation["metadata"]["blast_taxonomy"].split(";"))
                observation["metadata"]["taxonomy"] = observation["metadata"]["blast_taxonomy"].split(";")
    # Add "Unclassified" ranks in unclassified observations
    if taxonomy_depth > 0:
        for observation_id in unclassified_observations:
            observation_metadata = biom.get_observation_metadata(observation_id)
            observation_metadata["taxonomy"] = ["Unclassified"] * taxonomy_depth
    BiomIO.write( out_biom, biom )
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:35,代码来源:biom_to_stdBiom.py

示例3: filter_biom

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def filter_biom( removed_observations, in_biom, out_biom ):
    """
    @summary: Removed the specified observations from BIOM.
    @param removed_observations: [dict] Each key is an observation name.
    @param in_biom: [str]: Path to the processed BIOM file.
    @param out_biom: [str]: Path to the cleaned BIOM file.
    """
    biom = BiomIO.from_json(in_biom)
    biom.remove_observations(removed_observations)
    BiomIO.write(out_biom, biom)
开发者ID:sandrineperrin,项目名称:frogs-docker,代码行数:12,代码来源:removeConta.py

示例4: remove_observations

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def remove_observations( removed_observations, input_biom, output_biom ):
    """
    @summary: Removes the specified list of observations.
    @param removed_observations: [list] The names of the observations to remove.
    @param input_biom: [str] The path to the input BIOM.
    @param output_biom: [str] The path to the output BIOM.
    """
    biom = BiomIO.from_json( input_biom )
    biom.remove_observations( removed_observations )
    BiomIO.write( output_biom, biom )
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:12,代码来源:filters.py

示例5: to_biom

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def to_biom( clusters_file, count_file, output_biom, size_separator ):
    """
    @summary : Write a biom file from swarm results.
    @param clusters_file : [str] path to the '.clstr' file.
    @param count_file : [str] path to the count file. It contains the count of
                         sequences by sample of each preclusters.
                         Line format : "Precluster_id    nb_in_sampleA    nb_in_sampleB"
    @param output_biom : [str] path to the output file.
    @param size_separator : [str] the pre-cluster abundance separator.
    """
    biom = Biom( generated_by='swarm', matrix_type="sparse" )

    # Preclusters count by sample
    preclusters_count = dict()
    count_fh = open( count_file )
    samples = count_fh.readline().strip().split()[1:]
    for line in count_fh:
        precluster_id, count_str = line.strip().split(None, 1)
        preclusters_count[precluster_id] = count_str # For large dataset store count into a string consumes minus RAM than a sparse count
    count_fh.close()

    # Add samples
    for sample_name in samples:
        biom.add_sample( sample_name )

    # Process count
    cluster_idx = 1
    clusters_fh = open( clusters_file )
    for line in clusters_fh:
        cluster_name = "Cluster_" + str(cluster_idx)
        cluster_count = {key:0 for key in samples}
        line_fields = line.strip().split()
        # Retrieve count by sample
        for seq_id in line_fields:
            real_seq_id = seq_id.rsplit(size_separator, 1)[0]
            sample_counts = preclusters_count[real_seq_id].split()
            for sample_idx, sample_name in enumerate(samples):
                cluster_count[sample_name] += int(sample_counts[sample_idx])
            preclusters_count[real_seq_id] = None
        # Add cluster on biom
        biom.add_observation( cluster_name, {'seed_id':line_fields[0].rsplit(size_separator, 1)[0]} )
        observation_idx = biom.find_idx("observation", cluster_name)
        for sample_idx, sample_name in enumerate(samples):
            if cluster_count[sample_name] > 0:
                biom.data.change( observation_idx, sample_idx, cluster_count[sample_name] )
        # Next cluster
        cluster_idx += 1

    # Write
    BiomIO.write( output_biom, biom )
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:52,代码来源:swarm2biom.py

示例6: aff_to_metadata

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def aff_to_metadata(reference_file, biom_in, biom_out, blast_files=None, rdp_files=None):
    """
    @summary: Add taxonomy metadata on biom file from a blast result.
    @param reference_file: [str] The path to the reference file.
    @param biom_in: [str] The path to the Biom file to process.
    @param biom_out: [str] The path to the biom output file.
    @param blast_files: [list] the list of the path to the blast results in tabular format (outfmt 6 with NCBI Blast+).
    @param rdp_files: [list] the list of path to the RDPClassifier results.
    """
    # Build an hash with the taxonomy for each gene (key=gene_id ; value=gene_taxonomy)
    taxonomy_by_reference = get_tax_from_fasta( reference_file )

    # Retrieve blast clusters annotations
    cluster_blast_annot = dict()
    if blast_files is not None:
        cluster_blast_annot = get_bests_blast_affi( blast_files, taxonomy_by_reference )
    del taxonomy_by_reference

    # Retrieve rdp clusters annotations
    cluster_rdp_annot = dict()
    if rdp_files is not None:
        cluster_rdp_annot = get_rdp_affi( rdp_files )

    # Add metadata to biom
    biom = BiomIO.from_json(biom_in)
    for cluster in biom.get_observations():
        cluster_id = cluster["id"]
        # Blast
        if blast_files is not None:
            blast_taxonomy = None
            blast_affiliations = list()
            if cluster_blast_annot.has_key(cluster_id): # Current observation has a match
                blast_taxonomy = get_tax_consensus( [alignment['taxonomy'] for alignment in cluster_blast_annot[cluster_id]['alignments']] )
                blast_affiliations = cluster_blast_annot[cluster_id]['alignments']
            biom.add_metadata( cluster_id, "blast_affiliations", blast_affiliations, "observation" )
            biom.add_metadata( cluster_id, "blast_taxonomy", blast_taxonomy, "observation" )
        # RDP
        if rdp_files is not None:
            rdp_taxonomy = None
            rdp_bootstrap = None
            if cluster_rdp_annot.has_key(cluster_id):
                rdp_taxonomy = cluster_rdp_annot[cluster_id]['taxonomy']
                rdp_bootstrap = cluster_rdp_annot[cluster_id]['bootstrap']
            biom.add_metadata(cluster_id, "rdp_taxonomy", rdp_taxonomy, "observation")
            biom.add_metadata(cluster_id, "rdp_bootstrap", rdp_bootstrap, "observation")
    BiomIO.write(biom_out, biom)
开发者ID:sandrineperrin,项目名称:frogs-docker,代码行数:48,代码来源:addAffiliation2biom.py

示例7: process

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def process( args ):
    tmp_files = TmpFiles( os.path.split(args.output_file)[0] )

    try:
        # Add temp taxonomy if multiple and without consensus
        tmp_biom = args.input_biom
        used_taxonomy_tag = args.taxonomy_tag
        if args.multiple_tag is not None:
            used_taxonomy_tag = args.tax_consensus_tag
            if args.tax_consensus_tag is None:
                used_taxonomy_tag = "Used_taxonomy_FROGS-affi"
                tmp_biom = tmp_files.add( "tax.biom" )
                biom = BiomIO.from_json( args.input_biom )
                for observation in biom.get_observations():
                    metadata = observation["metadata"]
                    if len(metadata[args.multiple_tag]) > 0:
                        metadata[used_taxonomy_tag] = metadata[args.multiple_tag][0][args.taxonomy_tag]
                BiomIO.write( tmp_biom, biom )
                del biom

        # Rarefaction
        tax_depth = [args.taxonomic_ranks.index(rank) for rank in args.rarefaction_ranks]
        rarefaction_cmd = Rarefaction(tmp_biom, tmp_files, used_taxonomy_tag, tax_depth)
        rarefaction_cmd.submit( args.log_file )
        rarefaction_files = rarefaction_cmd.output_files

        # Taxonomy tree
        tree_count_file = tmp_files.add( "taxCount.enewick" )
        tree_ids_file = tmp_files.add( "taxCount_ids.tsv" )
        TaxonomyTree(tmp_biom, used_taxonomy_tag, tree_count_file, tree_ids_file).submit( args.log_file )

        # Writes summary
        write_summary( args.output_file, args.input_biom, tree_count_file, tree_ids_file, rarefaction_files, args )
    finally:
        if not args.debug:
            tmp_files.deleteAll()
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:38,代码来源:affiliations_stat.py

示例8: FastaIO

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
    cmd_grinder2biom = os.path.join(os.path.dirname(os.path.abspath(__file__)), "grinder2biom.py") + \
        " --affiliation " + os.path.abspath(args.databank) + \
        " --output " + real_biom + \
        " --samples"
    for current_sample in samples:
        cmd_grinder2biom += " '" + current_sample['name'] + ":" + current_sample['path'] + "'"
    subprocess.check_call( cmd_grinder2biom, shell=True )

    # Add reference id in checked BIOM
    biom = BiomIO.from_json( args.checked_biom )
    fasta = FastaIO( args.checked_fasta )
    for record in fasta:
        reference = re.search("reference=([^\s]+)", record.description).group(1)
        biom.add_metadata( record.id, "grinder_source", reference, "observation" )
    fasta.close()
    BiomIO.write( checked_biom, biom )
    del(biom)

    # Compare expected to obtained
    for current_sample in samples:
        print current_sample['name']
        cmd_compareSample = os.path.join(os.path.dirname(os.path.abspath(__file__)), "biomCmpTax.py") \
            + " --real-biom " + os.path.abspath(real_biom) \
            + " --real-tax-key 'real_taxonomy'" \
            + " --checked-biom " + os.path.abspath(checked_biom) \
            + " --checked-tax-key '" + args.taxonomy_key + "'" \
            + (" --multi-affiliations" if args.multi_affiliations else "") \
            + (" --uniq-groups " + args.uniq_groups if args.uniq_groups is not None else "") \
            + " --sample " + current_sample['name']
        print subprocess.check_output( cmd_compareSample, shell=True )
        print ""
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:33,代码来源:affiliationsAssessment.py

示例9: remove_chimera_biom

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def remove_chimera_biom( samples, in_biom_file, out_biom_file, lenient_filter, global_report, bySample_report ):
    """
    @summary: Removes the chimera observation from BIOM.
    @param samples: [dict] The chimera observations by sample. Example for
                    sample splA: sample['splA']['chimera_path'] where the value
                    is the path to the file containing the list of the chimera
                    observations names.
    @param in_biom_file: [str] The path to the BIOM file to filter.
    @param out_biom_file: [str] The path to the BIOM after filter.
    @param lenient_filter: [bool] True: removes one sequence in all samples
                           only if it is detected as chimera in all samples
                           where it is present. With False removes one
                           sequence in all samples if it is detected as chimera
                           in at least one sample.
    @param global_report: [dict] This dictionary is update with the global
                          number of removed observations, the global removed
                          abundance, ...
    @param bySample_report: [dict] This dictionary is update for add by sample the
                            number of removed observations, the removed
                            abundance, ...
    """
    nb_sample_by_chimera = dict()

    # Init bySample_report
    for sample_name in samples.keys():
        bySample_report[sample_name] = {
            'nb_kept': 0,
            'kept_abundance': 0,
            'nb_removed': 0,
            'removed_abundance': 0,
            'removed_max_abundance': 0
        }

    # Retrieve chimera
    for sample_name in samples.keys():
        chimera_fh = open( samples[sample_name]['chimera_path'] )
        for line in chimera_fh:
            observation_name = line.strip()
            if not nb_sample_by_chimera.has_key(observation_name):
                nb_sample_by_chimera[observation_name] = 0
            nb_sample_by_chimera[observation_name] += 1
        chimera_fh.close()

    # Remove chimera
    removed_chimera = list()
    biom = BiomIO.from_json(in_biom_file)
    for chimera_name in nb_sample_by_chimera.keys():
        is_always_chimera = True
        nb_sample_with_obs = sum( 1 for sample in biom.get_samples_by_observation(chimera_name) )
        observation_abundance = biom.get_observation_count(chimera_name)
        if nb_sample_with_obs != nb_sample_by_chimera[chimera_name]:
            is_always_chimera = False
            global_report['nb_ambiguous'] += 1
            global_report['abundance_ambiguous'] += observation_abundance
            print "'" + chimera_name + "' is not interpreted as chimera in all samples where it is present."
        if not lenient_filter or is_always_chimera:
            removed_chimera.append(chimera_name)
            # Global metrics
            global_report['nb_removed'] += 1
            global_report['abundance_removed'] += observation_abundance
            # By sample metrics
            for sample in biom.get_samples_by_observation(chimera_name):
                sample_count = biom.get_count(chimera_name, sample['id'])
                bySample_report[sample['id']]['nb_removed'] += 1
                bySample_report[sample['id']]['removed_abundance'] += sample_count
                bySample_report[sample['id']]['removed_max_abundance'] = max(bySample_report[sample['id']]['removed_max_abundance'], sample_count)
    biom.remove_observations(removed_chimera)

    # Nb non-chimera
    for observation_name in biom.get_observations_names():
        global_report['nb_kept'] += 1
        global_report['abundance_kept'] += biom.get_observation_count(observation_name)
        # By sample metrics
        for sample in biom.get_samples_by_observation(observation_name):
            sample_count = biom.get_count(observation_name, sample['id'])
            bySample_report[sample['id']]['nb_kept'] += 1
            bySample_report[sample['id']]['kept_abundance'] += sample_count
    BiomIO.write(out_biom_file, biom)
开发者ID:sandrineperrin,项目名称:frogs-docker,代码行数:80,代码来源:parallelChimera.py

示例10: Biom

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
    biom = Biom( generated_by="grinder", matrix_type="sparse" )

    # Set observations count
    for sample_name in args.samples:
        biom.add_sample( sample_name )
        fh_abund = open( args.samples[sample_name] )
        for line in fh_abund: # Content format: "# rank<TAB>seq_id<TAB>rel_abund_perc"
            if not line.startswith('#'):
                fields = line.strip().split()
                try:
                    biom.add_observation( fields[1] )
                except: # already exist
                    pass
                biom.change_count( fields[1], sample_name, int(float(fields[2])*100000000000000) )################## depend de la precision grinder
        fh_abund.close()

    # Set taxonomy metadata
    fh_classif = FastaIO( args.affiliation )
    for record in fh_classif:
        try:
            metadata = biom.get_observation_metadata( record.id )
            if metadata is None or not metadata.has_key( taxonomy_key ):
                taxonomy = getCleanedTaxonomy(record.description)
                biom.add_metadata( record.id, taxonomy_key, taxonomy, "observation" )
        except ValueError: # is not in BIOM
            pass
    fh_classif.close()

    # Write BIOM
    BiomIO.write( args.output, biom )
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:32,代码来源:grinder2biom.py

示例11: tsv_to_biom

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def tsv_to_biom( input_tsv, multi_hit_dict, fields, samples_names, output_biom, output_fasta ):
    """
    @summary: Convert TSV file to Biom file.
    @param input_tsv: [str] Path to the TSV file.
    @param multi_hit_dict: [dict] Dictionnary describing equivalent multi blast hit : 
    dict[observation_name]=[ {"blast_taxonomy":taxonomy, "blast_subject":subject, "blast_perc_identity": per_id, "blast_perc_query_coverage":per_cov, "blast_evalue":eval, "blast_aln_length":aln}]
    @param fields: [list] column name to include as metadata (must at least contain observation_name): observation_sum and seed_sequence will be excluded, rdp_tax_and_bootstrap will be split in two metadata
    @param samples_names: [list] list of sample names.
    @param output_biom: [str] Path to the output file (format : BIOM).
    @param output_fasta: [str] Path to the output file (format : fasta).
    """
#     biom = Biom( generated_by='frogs', matrix_type="sparse" )
    biom = Biom( matrix_type="sparse" )

    seed_seq_idx = -1 
    metadata_index = dict()
    sample_index = dict()
    clusters_count = dict()
    clusters_metadata = dict()
    in_fh = open( input_tsv )

    if not output_fasta is None:
        Fasta_fh=FastaIO(output_fasta , "w" )

    # parse header and store column index 
    header=in_fh.readline()
    if header.startswith("#"):
        header=header[1:]
    header = header.strip()
    seed_seq_idx, metadata_index, sample_index = header_line_dict(fields,header,samples_names)
    if not output_fasta is None and seed_seq_idx == -1:
        raise Exception("\nYou want to extract seed fasta sequence but there is no seed_sequence column in your TSV file\n\n")

    # count by sample, and metadata
    for line in in_fh:

        cluster_name=""
        line_list=line.strip().split("\t")
        count_by_sample = {}
        metadata_dict = {}
        # parse columns
        for idx,val in enumerate(line_list):
            # recover metadata
            if idx in metadata_index:
                if metadata_index[idx]=="observation_name" :
                    cluster_name = val
                else:
                    metadata_dict[metadata_index[idx]] = val
            # recover samples count
            elif idx in sample_index and val > 0:
                count_by_sample[sample_index[idx]] = int(val)
            # recover seed sequence
            elif idx == seed_seq_idx:
                seed_seq = val

        # if fasta output file => store de seed sequence
        if not output_fasta is None:
            seq = Sequence( cluster_name, seed_seq) 
            Fasta_fh.write(seq)

        if "taxonomy" in metadata_dict:
            metadata_dict["taxonomy"] = metadata_dict["taxonomy"].split(";")

        # format rdp taxonomy to fit BIOM format
        if "rdp_tax_and_bootstrap" in metadata_dict:
            metadata_dict["rdp_taxonomy"]=[]
            metadata_dict["rdp_bootstrap"]=[]
            tax = metadata_dict["rdp_tax_and_bootstrap"].rstrip(";").split(";")
            for i in range(0,len(tax),2):
                metadata_dict["rdp_taxonomy"].append(tax[i])
                metadata_dict["rdp_bootstrap"].append(tax[i+1].replace("(","").replace(")",""))
            metadata_dict.pop("rdp_tax_and_bootstrap")

        # format blast taxonomy to fit BIOM format (one consensus blast_taxonomy and possible multiples blast_affiliation detailed
        if "blast_taxonomy" in metadata_dict:
            metadata_dict["blast_taxonomy"] = metadata_dict["blast_taxonomy"].split(";")

            # check multihit blast : filter non consistent taxonomy hit with blast_taxonomy (if TSV modified), and compute consensus tax (if multihit line suppressed)
            if metadata_dict["blast_subject"] == "multi-subject" and not multi_hit_dict is None:
                if not cluster_name in multi_hit_dict:
                    raise Exception("\n"+cluster_name+" has multi-subject tag but is not present in your multi-hit TSV file. Please, provide the original multi-hit TSV file.\n\n")
                else:
                    metadata_dict["blast_taxonomy"], metadata_dict["blast_affiliations"] = observation_blast_parts(metadata_dict, multi_hit_dict[cluster_name])
                    if metadata_dict["blast_affiliations"] == []:
                        raise Exception("\nyour multihit TSV file is no more consistent with your abundance TSV file for (at least) "+cluster_name+"\n\n")
            # no multi tag= blast affiliation is equal to blast_taxonomy
            else:
                blast_dict={key.replace("blast_",""):metadata_dict[key] for key in metadata_dict if key.startswith("blast")}
                metadata_dict["blast_affiliations"]=[blast_dict]

            # filter blast metadata which are moved to blast_affiliations
            for metadata in metadata_dict["blast_affiliations"][0]:
                if not metadata == "taxonomy":
                    metadata_dict.pop("blast_"+metadata)

        # add cluster and count to clusters_count dict
        clusters_count[cluster_name] = count_by_sample
        # ok print clusters_count[cluster_name].keys(), "CDT0#LOT05" in clusters_count[cluster_name], "CDT0#LOT02" in clusters_count[cluster_name]
        # add cluster and metadata to clusters_metadata dict
        clusters_metadata[cluster_name] = metadata_dict
#.........这里部分代码省略.........
开发者ID:sandrineperrin,项目名称:frogs-docker,代码行数:103,代码来源:tsv2biom.py

示例12: UTAX

# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
#
##################################################################################################################################################
if __name__ == "__main__":
    # Manage parameters
    parser = argparse.ArgumentParser(description="Add taxonomy from UTAX result in BIOM file.")
    parser.add_argument( '-t', '--taxonomy-tag', default="taxonomy", help="The taxonomy tag in BIOM file. [Default: taxonomy]")
    parser.add_argument( '-v', '--version', action='version', version=__version__)
    # Inputs
    group_input = parser.add_argument_group('Inputs')
    group_input.add_argument('-f', '--input-fasta', required=True, help='Path to the sequence file outputed by UTAX (format: fasta).')
    group_input.add_argument('-b', '--input-biom', required=True, help='Path to the abundance file (format: BIOM).')
    # Outputs
    group_output = parser.add_argument_group('Outputs')
    group_output.add_argument('-o', '--output-biom', required=True, help='Path to the abundance file with taxonomy (format: BIOM).')
    args = parser.parse_args()

    # Process
    biom = BiomIO.from_json( args.input_biom )
    fasta = FastaIO( args.input_fasta )
    for record in fasta:
        # record.id example: Cluster_1;size=19714;tax=d:Bacteria(1.0000),p:"Proteobacteria"(0.9997),c:Alphaproteobacteria(0.9903),o:Rhodospirillales(0.9940),f:Acetobacteraceae(0.9887),g:Humitalea(0.9724);
        match = re.search("^([^\;]+)\;size\=\d+\;tax=(.+)$", record.id)
        if match is None:
            fasta.close()
            raise Exception("ID and taxonomy cannot be retrieved from '" + record.id + "'")
        record.id = match.group(1)
        record.description = match.group(2)
        biom.add_metadata( record.id, args.taxonomy_tag, record.description, "observation" )
    fasta.close()
    BiomIO.write( args.output_biom, biom )
开发者ID:geraldinepascal,项目名称:FROGS,代码行数:32,代码来源:addUtaxFromFasta.py


注:本文中的frogsBiom.BiomIO.write方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。