本文整理汇总了Python中frogsBiom.BiomIO.write方法的典型用法代码示例。如果您正苦于以下问题:Python BiomIO.write方法的具体用法?Python BiomIO.write怎么用?Python BiomIO.write使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类frogsBiom.BiomIO
的用法示例。
在下文中一共展示了BiomIO.write方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sampling_by_sample
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def sampling_by_sample( input_biom, output_biom, nb_sampled=None, sampled_ratio=None ):
"""
@summary: Writes a BIOM after a random sampling in each sample.
@param input_biom: [str] Path to the processed BIOM.
@param output_biom: [str] Path to outputed BIOM.
@param nb_sampled: [int] Number of sampled sequences by sample.
@param sampled_ratio: [float] Ratio of sampled sequences by sample.
@note: nb_sampled and sampled_ratio are mutually exclusive.
"""
initial_biom = BiomIO.from_json( input_biom )
new_biom = Biom(
matrix_type="sparse",
generated_by="Sampling " + (str(nb_sampled) if nb_sampled is not None else str(sampled_ratio) + "%" ) + " elements by sample from " + input_biom
)
observations_already_added = dict()
for sample_name in initial_biom.get_samples_names():
new_biom.add_sample( sample_name, initial_biom.get_sample_metadata(sample_name) )
sample_seq = initial_biom.get_sample_count(sample_name)
sample_nb_sampled = nb_sampled
if nb_sampled is None:
sample_nb_sampled = int(sample_seq * sampled_ratio)
if sample_seq < nb_sampled:
raise Exception( str(sample_nb_sampled) + " sequences cannot be sampled in sample '" + str(sample_name) + "'. It only contains " + str(sample_seq) + " sequences." )
else:
for current_nb_iter in range(sample_nb_sampled):
# Take an observation in initial BIOM
selected_observation = initial_biom.random_obs_by_sample(sample_name)
selected_observation_id = selected_observation['id']
initial_biom.subtract_count( selected_observation_id, sample_name, 1 )
# Put in new BIOM
if not observations_already_added.has_key(selected_observation_id):
new_biom.add_observation( selected_observation_id, initial_biom.get_observation_metadata(selected_observation_id) )
observations_already_added[selected_observation_id] = True
new_biom.add_count( selected_observation_id, sample_name, 1 )
BiomIO.write( output_biom, new_biom )
示例2: process
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def process( in_biom, out_biom, out_metadata ):
ordered_blast_keys = ["taxonomy", "subject", "evalue", "perc_identity", "perc_query_coverage", "aln_length"] # Keys in blast_affiliations metadata
taxonomy_depth = 0
unclassified_observations = list()
FH_metadata = open( out_metadata, "w" )
FH_metadata.write( "#OTUID\t" + "\t".join([item for item in ordered_blast_keys]) + "\n" )
biom = BiomIO.from_json( in_biom )
for observation in biom.get_observations():
for metadata_key in observation["metadata"].keys():
if metadata_key == "blast_affiliations": # Extract blast_affiliations metadata in metadata_file
if observation["metadata"][metadata_key] is not None:
for current_affi in observation["metadata"][metadata_key]:
if isinstance(current_affi["taxonomy"], list) or isinstance(current_affi["taxonomy"], tuple):
current_affi["taxonomy"] = ";".join( current_affi["taxonomy"] )
FH_metadata.write( observation["id"] + "\t" + "\t".join([str(current_affi[item]) for item in ordered_blast_keys]) + "\n" )
del observation["metadata"][metadata_key]
elif observation["metadata"][metadata_key] is not None: # All list are transformed in string
if isinstance(observation["metadata"][metadata_key], list) or isinstance(observation["metadata"][metadata_key], tuple):
observation["metadata"][metadata_key] = ";".join( map(str, observation["metadata"][metadata_key]) )
if observation["metadata"].has_key( "blast_taxonomy" ):
if observation["metadata"]["blast_taxonomy"] is None:
unclassified_observations.append( observation["id"] )
observation["metadata"]["taxonomy"] = list()
else:
taxonomy_depth = len(observation["metadata"]["blast_taxonomy"].split(";"))
observation["metadata"]["taxonomy"] = observation["metadata"]["blast_taxonomy"].split(";")
# Add "Unclassified" ranks in unclassified observations
if taxonomy_depth > 0:
for observation_id in unclassified_observations:
observation_metadata = biom.get_observation_metadata(observation_id)
observation_metadata["taxonomy"] = ["Unclassified"] * taxonomy_depth
BiomIO.write( out_biom, biom )
示例3: filter_biom
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def filter_biom( removed_observations, in_biom, out_biom ):
"""
@summary: Removed the specified observations from BIOM.
@param removed_observations: [dict] Each key is an observation name.
@param in_biom: [str]: Path to the processed BIOM file.
@param out_biom: [str]: Path to the cleaned BIOM file.
"""
biom = BiomIO.from_json(in_biom)
biom.remove_observations(removed_observations)
BiomIO.write(out_biom, biom)
示例4: remove_observations
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def remove_observations( removed_observations, input_biom, output_biom ):
"""
@summary: Removes the specified list of observations.
@param removed_observations: [list] The names of the observations to remove.
@param input_biom: [str] The path to the input BIOM.
@param output_biom: [str] The path to the output BIOM.
"""
biom = BiomIO.from_json( input_biom )
biom.remove_observations( removed_observations )
BiomIO.write( output_biom, biom )
示例5: to_biom
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def to_biom( clusters_file, count_file, output_biom, size_separator ):
"""
@summary : Write a biom file from swarm results.
@param clusters_file : [str] path to the '.clstr' file.
@param count_file : [str] path to the count file. It contains the count of
sequences by sample of each preclusters.
Line format : "Precluster_id nb_in_sampleA nb_in_sampleB"
@param output_biom : [str] path to the output file.
@param size_separator : [str] the pre-cluster abundance separator.
"""
biom = Biom( generated_by='swarm', matrix_type="sparse" )
# Preclusters count by sample
preclusters_count = dict()
count_fh = open( count_file )
samples = count_fh.readline().strip().split()[1:]
for line in count_fh:
precluster_id, count_str = line.strip().split(None, 1)
preclusters_count[precluster_id] = count_str # For large dataset store count into a string consumes minus RAM than a sparse count
count_fh.close()
# Add samples
for sample_name in samples:
biom.add_sample( sample_name )
# Process count
cluster_idx = 1
clusters_fh = open( clusters_file )
for line in clusters_fh:
cluster_name = "Cluster_" + str(cluster_idx)
cluster_count = {key:0 for key in samples}
line_fields = line.strip().split()
# Retrieve count by sample
for seq_id in line_fields:
real_seq_id = seq_id.rsplit(size_separator, 1)[0]
sample_counts = preclusters_count[real_seq_id].split()
for sample_idx, sample_name in enumerate(samples):
cluster_count[sample_name] += int(sample_counts[sample_idx])
preclusters_count[real_seq_id] = None
# Add cluster on biom
biom.add_observation( cluster_name, {'seed_id':line_fields[0].rsplit(size_separator, 1)[0]} )
observation_idx = biom.find_idx("observation", cluster_name)
for sample_idx, sample_name in enumerate(samples):
if cluster_count[sample_name] > 0:
biom.data.change( observation_idx, sample_idx, cluster_count[sample_name] )
# Next cluster
cluster_idx += 1
# Write
BiomIO.write( output_biom, biom )
示例6: aff_to_metadata
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def aff_to_metadata(reference_file, biom_in, biom_out, blast_files=None, rdp_files=None):
"""
@summary: Add taxonomy metadata on biom file from a blast result.
@param reference_file: [str] The path to the reference file.
@param biom_in: [str] The path to the Biom file to process.
@param biom_out: [str] The path to the biom output file.
@param blast_files: [list] the list of the path to the blast results in tabular format (outfmt 6 with NCBI Blast+).
@param rdp_files: [list] the list of path to the RDPClassifier results.
"""
# Build an hash with the taxonomy for each gene (key=gene_id ; value=gene_taxonomy)
taxonomy_by_reference = get_tax_from_fasta( reference_file )
# Retrieve blast clusters annotations
cluster_blast_annot = dict()
if blast_files is not None:
cluster_blast_annot = get_bests_blast_affi( blast_files, taxonomy_by_reference )
del taxonomy_by_reference
# Retrieve rdp clusters annotations
cluster_rdp_annot = dict()
if rdp_files is not None:
cluster_rdp_annot = get_rdp_affi( rdp_files )
# Add metadata to biom
biom = BiomIO.from_json(biom_in)
for cluster in biom.get_observations():
cluster_id = cluster["id"]
# Blast
if blast_files is not None:
blast_taxonomy = None
blast_affiliations = list()
if cluster_blast_annot.has_key(cluster_id): # Current observation has a match
blast_taxonomy = get_tax_consensus( [alignment['taxonomy'] for alignment in cluster_blast_annot[cluster_id]['alignments']] )
blast_affiliations = cluster_blast_annot[cluster_id]['alignments']
biom.add_metadata( cluster_id, "blast_affiliations", blast_affiliations, "observation" )
biom.add_metadata( cluster_id, "blast_taxonomy", blast_taxonomy, "observation" )
# RDP
if rdp_files is not None:
rdp_taxonomy = None
rdp_bootstrap = None
if cluster_rdp_annot.has_key(cluster_id):
rdp_taxonomy = cluster_rdp_annot[cluster_id]['taxonomy']
rdp_bootstrap = cluster_rdp_annot[cluster_id]['bootstrap']
biom.add_metadata(cluster_id, "rdp_taxonomy", rdp_taxonomy, "observation")
biom.add_metadata(cluster_id, "rdp_bootstrap", rdp_bootstrap, "observation")
BiomIO.write(biom_out, biom)
示例7: process
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def process( args ):
tmp_files = TmpFiles( os.path.split(args.output_file)[0] )
try:
# Add temp taxonomy if multiple and without consensus
tmp_biom = args.input_biom
used_taxonomy_tag = args.taxonomy_tag
if args.multiple_tag is not None:
used_taxonomy_tag = args.tax_consensus_tag
if args.tax_consensus_tag is None:
used_taxonomy_tag = "Used_taxonomy_FROGS-affi"
tmp_biom = tmp_files.add( "tax.biom" )
biom = BiomIO.from_json( args.input_biom )
for observation in biom.get_observations():
metadata = observation["metadata"]
if len(metadata[args.multiple_tag]) > 0:
metadata[used_taxonomy_tag] = metadata[args.multiple_tag][0][args.taxonomy_tag]
BiomIO.write( tmp_biom, biom )
del biom
# Rarefaction
tax_depth = [args.taxonomic_ranks.index(rank) for rank in args.rarefaction_ranks]
rarefaction_cmd = Rarefaction(tmp_biom, tmp_files, used_taxonomy_tag, tax_depth)
rarefaction_cmd.submit( args.log_file )
rarefaction_files = rarefaction_cmd.output_files
# Taxonomy tree
tree_count_file = tmp_files.add( "taxCount.enewick" )
tree_ids_file = tmp_files.add( "taxCount_ids.tsv" )
TaxonomyTree(tmp_biom, used_taxonomy_tag, tree_count_file, tree_ids_file).submit( args.log_file )
# Writes summary
write_summary( args.output_file, args.input_biom, tree_count_file, tree_ids_file, rarefaction_files, args )
finally:
if not args.debug:
tmp_files.deleteAll()
示例8: FastaIO
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
cmd_grinder2biom = os.path.join(os.path.dirname(os.path.abspath(__file__)), "grinder2biom.py") + \
" --affiliation " + os.path.abspath(args.databank) + \
" --output " + real_biom + \
" --samples"
for current_sample in samples:
cmd_grinder2biom += " '" + current_sample['name'] + ":" + current_sample['path'] + "'"
subprocess.check_call( cmd_grinder2biom, shell=True )
# Add reference id in checked BIOM
biom = BiomIO.from_json( args.checked_biom )
fasta = FastaIO( args.checked_fasta )
for record in fasta:
reference = re.search("reference=([^\s]+)", record.description).group(1)
biom.add_metadata( record.id, "grinder_source", reference, "observation" )
fasta.close()
BiomIO.write( checked_biom, biom )
del(biom)
# Compare expected to obtained
for current_sample in samples:
print current_sample['name']
cmd_compareSample = os.path.join(os.path.dirname(os.path.abspath(__file__)), "biomCmpTax.py") \
+ " --real-biom " + os.path.abspath(real_biom) \
+ " --real-tax-key 'real_taxonomy'" \
+ " --checked-biom " + os.path.abspath(checked_biom) \
+ " --checked-tax-key '" + args.taxonomy_key + "'" \
+ (" --multi-affiliations" if args.multi_affiliations else "") \
+ (" --uniq-groups " + args.uniq_groups if args.uniq_groups is not None else "") \
+ " --sample " + current_sample['name']
print subprocess.check_output( cmd_compareSample, shell=True )
print ""
示例9: remove_chimera_biom
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def remove_chimera_biom( samples, in_biom_file, out_biom_file, lenient_filter, global_report, bySample_report ):
"""
@summary: Removes the chimera observation from BIOM.
@param samples: [dict] The chimera observations by sample. Example for
sample splA: sample['splA']['chimera_path'] where the value
is the path to the file containing the list of the chimera
observations names.
@param in_biom_file: [str] The path to the BIOM file to filter.
@param out_biom_file: [str] The path to the BIOM after filter.
@param lenient_filter: [bool] True: removes one sequence in all samples
only if it is detected as chimera in all samples
where it is present. With False removes one
sequence in all samples if it is detected as chimera
in at least one sample.
@param global_report: [dict] This dictionary is update with the global
number of removed observations, the global removed
abundance, ...
@param bySample_report: [dict] This dictionary is update for add by sample the
number of removed observations, the removed
abundance, ...
"""
nb_sample_by_chimera = dict()
# Init bySample_report
for sample_name in samples.keys():
bySample_report[sample_name] = {
'nb_kept': 0,
'kept_abundance': 0,
'nb_removed': 0,
'removed_abundance': 0,
'removed_max_abundance': 0
}
# Retrieve chimera
for sample_name in samples.keys():
chimera_fh = open( samples[sample_name]['chimera_path'] )
for line in chimera_fh:
observation_name = line.strip()
if not nb_sample_by_chimera.has_key(observation_name):
nb_sample_by_chimera[observation_name] = 0
nb_sample_by_chimera[observation_name] += 1
chimera_fh.close()
# Remove chimera
removed_chimera = list()
biom = BiomIO.from_json(in_biom_file)
for chimera_name in nb_sample_by_chimera.keys():
is_always_chimera = True
nb_sample_with_obs = sum( 1 for sample in biom.get_samples_by_observation(chimera_name) )
observation_abundance = biom.get_observation_count(chimera_name)
if nb_sample_with_obs != nb_sample_by_chimera[chimera_name]:
is_always_chimera = False
global_report['nb_ambiguous'] += 1
global_report['abundance_ambiguous'] += observation_abundance
print "'" + chimera_name + "' is not interpreted as chimera in all samples where it is present."
if not lenient_filter or is_always_chimera:
removed_chimera.append(chimera_name)
# Global metrics
global_report['nb_removed'] += 1
global_report['abundance_removed'] += observation_abundance
# By sample metrics
for sample in biom.get_samples_by_observation(chimera_name):
sample_count = biom.get_count(chimera_name, sample['id'])
bySample_report[sample['id']]['nb_removed'] += 1
bySample_report[sample['id']]['removed_abundance'] += sample_count
bySample_report[sample['id']]['removed_max_abundance'] = max(bySample_report[sample['id']]['removed_max_abundance'], sample_count)
biom.remove_observations(removed_chimera)
# Nb non-chimera
for observation_name in biom.get_observations_names():
global_report['nb_kept'] += 1
global_report['abundance_kept'] += biom.get_observation_count(observation_name)
# By sample metrics
for sample in biom.get_samples_by_observation(observation_name):
sample_count = biom.get_count(observation_name, sample['id'])
bySample_report[sample['id']]['nb_kept'] += 1
bySample_report[sample['id']]['kept_abundance'] += sample_count
BiomIO.write(out_biom_file, biom)
示例10: Biom
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
biom = Biom( generated_by="grinder", matrix_type="sparse" )
# Set observations count
for sample_name in args.samples:
biom.add_sample( sample_name )
fh_abund = open( args.samples[sample_name] )
for line in fh_abund: # Content format: "# rank<TAB>seq_id<TAB>rel_abund_perc"
if not line.startswith('#'):
fields = line.strip().split()
try:
biom.add_observation( fields[1] )
except: # already exist
pass
biom.change_count( fields[1], sample_name, int(float(fields[2])*100000000000000) )################## depend de la precision grinder
fh_abund.close()
# Set taxonomy metadata
fh_classif = FastaIO( args.affiliation )
for record in fh_classif:
try:
metadata = biom.get_observation_metadata( record.id )
if metadata is None or not metadata.has_key( taxonomy_key ):
taxonomy = getCleanedTaxonomy(record.description)
biom.add_metadata( record.id, taxonomy_key, taxonomy, "observation" )
except ValueError: # is not in BIOM
pass
fh_classif.close()
# Write BIOM
BiomIO.write( args.output, biom )
示例11: tsv_to_biom
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
def tsv_to_biom( input_tsv, multi_hit_dict, fields, samples_names, output_biom, output_fasta ):
"""
@summary: Convert TSV file to Biom file.
@param input_tsv: [str] Path to the TSV file.
@param multi_hit_dict: [dict] Dictionnary describing equivalent multi blast hit :
dict[observation_name]=[ {"blast_taxonomy":taxonomy, "blast_subject":subject, "blast_perc_identity": per_id, "blast_perc_query_coverage":per_cov, "blast_evalue":eval, "blast_aln_length":aln}]
@param fields: [list] column name to include as metadata (must at least contain observation_name): observation_sum and seed_sequence will be excluded, rdp_tax_and_bootstrap will be split in two metadata
@param samples_names: [list] list of sample names.
@param output_biom: [str] Path to the output file (format : BIOM).
@param output_fasta: [str] Path to the output file (format : fasta).
"""
# biom = Biom( generated_by='frogs', matrix_type="sparse" )
biom = Biom( matrix_type="sparse" )
seed_seq_idx = -1
metadata_index = dict()
sample_index = dict()
clusters_count = dict()
clusters_metadata = dict()
in_fh = open( input_tsv )
if not output_fasta is None:
Fasta_fh=FastaIO(output_fasta , "w" )
# parse header and store column index
header=in_fh.readline()
if header.startswith("#"):
header=header[1:]
header = header.strip()
seed_seq_idx, metadata_index, sample_index = header_line_dict(fields,header,samples_names)
if not output_fasta is None and seed_seq_idx == -1:
raise Exception("\nYou want to extract seed fasta sequence but there is no seed_sequence column in your TSV file\n\n")
# count by sample, and metadata
for line in in_fh:
cluster_name=""
line_list=line.strip().split("\t")
count_by_sample = {}
metadata_dict = {}
# parse columns
for idx,val in enumerate(line_list):
# recover metadata
if idx in metadata_index:
if metadata_index[idx]=="observation_name" :
cluster_name = val
else:
metadata_dict[metadata_index[idx]] = val
# recover samples count
elif idx in sample_index and val > 0:
count_by_sample[sample_index[idx]] = int(val)
# recover seed sequence
elif idx == seed_seq_idx:
seed_seq = val
# if fasta output file => store de seed sequence
if not output_fasta is None:
seq = Sequence( cluster_name, seed_seq)
Fasta_fh.write(seq)
if "taxonomy" in metadata_dict:
metadata_dict["taxonomy"] = metadata_dict["taxonomy"].split(";")
# format rdp taxonomy to fit BIOM format
if "rdp_tax_and_bootstrap" in metadata_dict:
metadata_dict["rdp_taxonomy"]=[]
metadata_dict["rdp_bootstrap"]=[]
tax = metadata_dict["rdp_tax_and_bootstrap"].rstrip(";").split(";")
for i in range(0,len(tax),2):
metadata_dict["rdp_taxonomy"].append(tax[i])
metadata_dict["rdp_bootstrap"].append(tax[i+1].replace("(","").replace(")",""))
metadata_dict.pop("rdp_tax_and_bootstrap")
# format blast taxonomy to fit BIOM format (one consensus blast_taxonomy and possible multiples blast_affiliation detailed
if "blast_taxonomy" in metadata_dict:
metadata_dict["blast_taxonomy"] = metadata_dict["blast_taxonomy"].split(";")
# check multihit blast : filter non consistent taxonomy hit with blast_taxonomy (if TSV modified), and compute consensus tax (if multihit line suppressed)
if metadata_dict["blast_subject"] == "multi-subject" and not multi_hit_dict is None:
if not cluster_name in multi_hit_dict:
raise Exception("\n"+cluster_name+" has multi-subject tag but is not present in your multi-hit TSV file. Please, provide the original multi-hit TSV file.\n\n")
else:
metadata_dict["blast_taxonomy"], metadata_dict["blast_affiliations"] = observation_blast_parts(metadata_dict, multi_hit_dict[cluster_name])
if metadata_dict["blast_affiliations"] == []:
raise Exception("\nyour multihit TSV file is no more consistent with your abundance TSV file for (at least) "+cluster_name+"\n\n")
# no multi tag= blast affiliation is equal to blast_taxonomy
else:
blast_dict={key.replace("blast_",""):metadata_dict[key] for key in metadata_dict if key.startswith("blast")}
metadata_dict["blast_affiliations"]=[blast_dict]
# filter blast metadata which are moved to blast_affiliations
for metadata in metadata_dict["blast_affiliations"][0]:
if not metadata == "taxonomy":
metadata_dict.pop("blast_"+metadata)
# add cluster and count to clusters_count dict
clusters_count[cluster_name] = count_by_sample
# ok print clusters_count[cluster_name].keys(), "CDT0#LOT05" in clusters_count[cluster_name], "CDT0#LOT02" in clusters_count[cluster_name]
# add cluster and metadata to clusters_metadata dict
clusters_metadata[cluster_name] = metadata_dict
#.........这里部分代码省略.........
示例12: UTAX
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import write [as 别名]
#
##################################################################################################################################################
if __name__ == "__main__":
# Manage parameters
parser = argparse.ArgumentParser(description="Add taxonomy from UTAX result in BIOM file.")
parser.add_argument( '-t', '--taxonomy-tag', default="taxonomy", help="The taxonomy tag in BIOM file. [Default: taxonomy]")
parser.add_argument( '-v', '--version', action='version', version=__version__)
# Inputs
group_input = parser.add_argument_group('Inputs')
group_input.add_argument('-f', '--input-fasta', required=True, help='Path to the sequence file outputed by UTAX (format: fasta).')
group_input.add_argument('-b', '--input-biom', required=True, help='Path to the abundance file (format: BIOM).')
# Outputs
group_output = parser.add_argument_group('Outputs')
group_output.add_argument('-o', '--output-biom', required=True, help='Path to the abundance file with taxonomy (format: BIOM).')
args = parser.parse_args()
# Process
biom = BiomIO.from_json( args.input_biom )
fasta = FastaIO( args.input_fasta )
for record in fasta:
# record.id example: Cluster_1;size=19714;tax=d:Bacteria(1.0000),p:"Proteobacteria"(0.9997),c:Alphaproteobacteria(0.9903),o:Rhodospirillales(0.9940),f:Acetobacteraceae(0.9887),g:Humitalea(0.9724);
match = re.search("^([^\;]+)\;size\=\d+\;tax=(.+)$", record.id)
if match is None:
fasta.close()
raise Exception("ID and taxonomy cannot be retrieved from '" + record.id + "'")
record.id = match.group(1)
record.description = match.group(2)
biom.add_metadata( record.id, args.taxonomy_tag, record.description, "observation" )
fasta.close()
BiomIO.write( args.output_biom, biom )