本文整理汇总了Python中frogsBiom.BiomIO.from_json方法的典型用法代码示例。如果您正苦于以下问题:Python BiomIO.from_json方法的具体用法?Python BiomIO.from_json怎么用?Python BiomIO.from_json使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类frogsBiom.BiomIO
的用法示例。
在下文中一共展示了BiomIO.from_json方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_retrieved_by_sample
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def get_retrieved_by_sample( biom_file, reference_by_obs_id, references_by_sample, uniq_id, uniq_id_by_sample ):
counts_by_sample = dict()
biom = BiomIO.from_json( biom_file )
for sample_name in biom.get_samples_names():
nb_detected = 0
retrieved = dict()
expected_retrieved = dict()
for obs in biom.get_observations_by_sample( sample_name ):
nb_detected += 1
if not "," in reference_by_obs_id[obs['id']]: # Is not a chimera
ref_id = reference_by_obs_id[obs['id']]
retrieved[ref_id] = 1
if ref_id in references_by_sample[sample_name]:
expected_retrieved[ref_id] = 1
# Uniq sequence for retrieved
uniq_retrieved = set()
for ref_id in retrieved:
uniq_retrieved.add( uniq_id[ref_id] )
# Uniq sequence for retrieved
uniq_expected_retrieved = set()
for ref_id in expected_retrieved:
uniq_expected_retrieved.add( uniq_id_by_sample[sample_name][ref_id] )
# Results
counts_by_sample[sample_name] = {
"detected": nb_detected,
"retrieved": len(uniq_retrieved),
"expected_retrieved": len(uniq_expected_retrieved)
}
return counts_by_sample
示例2: biom_fasta_to_tsv
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def biom_fasta_to_tsv( input_biom, input_fasta, output_tsv, fields, list_separator ):
"""
@summary: Convert BIOM file to TSV file with sequence.
@param input_biom: [str] Path to the BIOM file.
@param input_fasta: [str] Path to the sequences of the observations.
@param output_tsv: [str] Path to the output file (format : TSV).
@param fields: [list] Columns and their order in output. Special columns : '@observation_name', '@observation_sum', '@sample_count', '@rdp_tax_and_bootstrap', '@seed_sequence'. The others columns must be metadata title.
@param list_separator: [str] Separator for complex metadata.
"""
biom = BiomIO.from_json( input_biom )
out_fh = open( output_tsv, "w" )
sequence_idx = fields.index("@seed_sequence")
# Header
header_parts = header_line_parts( fields, biom )
out_fh.write( "#" + "\t".join(header_parts) + "\n" )
# Data
fields_without_seq = fields
del fields_without_seq[sequence_idx]
FH_in = FastaIO( input_fasta )
for record in FH_in:
obs_idx = biom.find_idx("observation", record.id)
count_by_sample = biom.data.get_row_array(obs_idx)
observation_parts = observation_line_parts( biom.rows[obs_idx], count_by_sample, fields_without_seq, list_separator )
observation_parts.insert( sequence_idx, record.string )
out_fh.write( "\t".join(observation_parts) + "\n" )
out_fh.close()
示例3: observations_depth
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def observations_depth( input_biom, output_depth ):
"""
@summary : Write the depths of the observation in file.
@param input_biom : [str] path to the biom file processed.
@param output_depth : [str] path to the output file.
@note : Example of one output file
#Depth<TAB>Nb_Observ_concerned<TAB>Prct_Observ_concerned
1<TAB>65<TAB>65.000
2<TAB>30<TAB>30.000
3<TAB>0<TAB>0.000
4<TAB>5<TAB>5.000
"""
obs_depth = list()
nb_observ = 0
# Process depth calculation
biom = BiomIO.from_json( input_biom )
for observation_id, observation_count in biom.get_observations_counts():
while len(obs_depth) <= observation_count:
obs_depth.append(0)
obs_depth[observation_count] += 1
if observation_count != 0:
nb_observ += 1
del biom
# Write output
out_fh = open( output_depth, 'w' )
out_fh.write( "#Depth\tNb_Observ_concerned\tPrct_Observ_concerned\n" )
for depth in range(1, len(obs_depth)):
prct = (float(obs_depth[depth])/ nb_observ)*100
out_fh.write( str(depth) + "\t" + str(obs_depth[depth]) + "\t" + ("%.3f" % prct) + "\n" )
out_fh.close()
示例4: sampling_by_sample
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def sampling_by_sample( input_biom, output_biom, nb_sampled=None, sampled_ratio=None ):
"""
@summary: Writes a BIOM after a random sampling in each sample.
@param input_biom: [str] Path to the processed BIOM.
@param output_biom: [str] Path to outputed BIOM.
@param nb_sampled: [int] Number of sampled sequences by sample.
@param sampled_ratio: [float] Ratio of sampled sequences by sample.
@note: nb_sampled and sampled_ratio are mutually exclusive.
"""
initial_biom = BiomIO.from_json( input_biom )
new_biom = Biom(
matrix_type="sparse",
generated_by="Sampling " + (str(nb_sampled) if nb_sampled is not None else str(sampled_ratio) + "%" ) + " elements by sample from " + input_biom
)
observations_already_added = dict()
for sample_name in initial_biom.get_samples_names():
new_biom.add_sample( sample_name, initial_biom.get_sample_metadata(sample_name) )
sample_seq = initial_biom.get_sample_count(sample_name)
sample_nb_sampled = nb_sampled
if nb_sampled is None:
sample_nb_sampled = int(sample_seq * sampled_ratio)
if sample_seq < nb_sampled:
raise Exception( str(sample_nb_sampled) + " sequences cannot be sampled in sample '" + str(sample_name) + "'. It only contains " + str(sample_seq) + " sequences." )
else:
for current_nb_iter in range(sample_nb_sampled):
# Take an observation in initial BIOM
selected_observation = initial_biom.random_obs_by_sample(sample_name)
selected_observation_id = selected_observation['id']
initial_biom.subtract_count( selected_observation_id, sample_name, 1 )
# Put in new BIOM
if not observations_already_added.has_key(selected_observation_id):
new_biom.add_observation( selected_observation_id, initial_biom.get_observation_metadata(selected_observation_id) )
observations_already_added[selected_observation_id] = True
new_biom.add_count( selected_observation_id, sample_name, 1 )
BiomIO.write( output_biom, new_biom )
示例5: excluded_obs_on_blastMetrics
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def excluded_obs_on_blastMetrics( input_biom, tag, cmp_operator, threshold, excluded_file ):
"""
@summary: Writes the list of the observations with no affiliations with sufficient blast value.
@param input_biom: [str] The path to the BIOM file to check.
@param tag: [str] The metadata checked.
@param cmp_operator: [str] The operator use in comparison (tag_value ">=" thresold or tag_value "<=" thresold ).
@param threshold: [float] The limit for the tag value.
@param excluded_file: [str] The path to the output file.
"""
valid_operators = {
">=": operator.__ge__,
"<=": operator.__le__
}
cmp_func = valid_operators[cmp_operator]
biom = BiomIO.from_json( input_biom )
FH_excluded_file = open( excluded_file, "w" )
for observation in biom.get_observations():
alignments = observation["metadata"]["blast_affiliations"]
is_discarded = True
for current_alignment in alignments:
if cmp_func(float(current_alignment[tag]), threshold):
is_discarded = False
if is_discarded:
FH_excluded_file.write( str(observation["id"]) + "\n" )
FH_excluded_file.close()
示例6: process
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def process( in_biom, out_biom, out_metadata ):
ordered_blast_keys = ["taxonomy", "subject", "evalue", "perc_identity", "perc_query_coverage", "aln_length"] # Keys in blast_affiliations metadata
taxonomy_depth = 0
unclassified_observations = list()
FH_metadata = open( out_metadata, "w" )
FH_metadata.write( "#OTUID\t" + "\t".join([item for item in ordered_blast_keys]) + "\n" )
biom = BiomIO.from_json( in_biom )
for observation in biom.get_observations():
for metadata_key in observation["metadata"].keys():
if metadata_key == "blast_affiliations": # Extract blast_affiliations metadata in metadata_file
if observation["metadata"][metadata_key] is not None:
for current_affi in observation["metadata"][metadata_key]:
if isinstance(current_affi["taxonomy"], list) or isinstance(current_affi["taxonomy"], tuple):
current_affi["taxonomy"] = ";".join( current_affi["taxonomy"] )
FH_metadata.write( observation["id"] + "\t" + "\t".join([str(current_affi[item]) for item in ordered_blast_keys]) + "\n" )
del observation["metadata"][metadata_key]
elif observation["metadata"][metadata_key] is not None: # All list are transformed in string
if isinstance(observation["metadata"][metadata_key], list) or isinstance(observation["metadata"][metadata_key], tuple):
observation["metadata"][metadata_key] = ";".join( map(str, observation["metadata"][metadata_key]) )
if observation["metadata"].has_key( "blast_taxonomy" ):
if observation["metadata"]["blast_taxonomy"] is None:
unclassified_observations.append( observation["id"] )
observation["metadata"]["taxonomy"] = list()
else:
taxonomy_depth = len(observation["metadata"]["blast_taxonomy"].split(";"))
observation["metadata"]["taxonomy"] = observation["metadata"]["blast_taxonomy"].split(";")
# Add "Unclassified" ranks in unclassified observations
if taxonomy_depth > 0:
for observation_id in unclassified_observations:
observation_metadata = biom.get_observation_metadata(observation_id)
observation_metadata["taxonomy"] = ["Unclassified"] * taxonomy_depth
BiomIO.write( out_biom, biom )
示例7: get_checked
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def get_checked( abund_file, checked_sample, taxonomy_key, expected_by_depth ):
checked_by_depth = dict()
biom = BiomIO.from_json(abund_file)
for current_obs in biom.get_observations():
clean_taxonomy = getCleanedTaxonomy(current_obs["metadata"][taxonomy_key])
count = biom.get_count(current_obs["id"], checked_sample)
if count > 0:
if clean_taxonomy[len(clean_taxonomy)-1] == "Multi-affiliation":
nb_selected = 0
selected = list()
taxonomies = list()
expected_taxonomies = expected_by_depth[len(clean_taxonomy)-1]
for affi_idx in range(len(current_obs["metadata"]["blast_affiliations"])):
affi_taxonomy = ";".join(getCleanedTaxonomy(current_obs["metadata"]["blast_affiliations"][affi_idx]["taxonomy"]))
if affi_taxonomy not in taxonomies:
taxonomies.append(affi_taxonomy)
if affi_taxonomy in expected_taxonomies:
selected = getCleanedTaxonomy(current_obs["metadata"]["blast_affiliations"][affi_idx]["taxonomy"])
nb_selected += 1
if nb_selected == 1:
clean_taxonomy = selected
else:
warnings.warn( "Multi-affiliation cannot be resolved for " + str((float(count)*100)/biom.get_total_count()) + "% sequences. Possible taxonomies: '" + "', '".join(taxonomies) + "'." )
for rank_depth in range(len(clean_taxonomy)):
rank_taxonomy = ";".join(clean_taxonomy[:rank_depth + 1])
if rank_depth not in checked_by_depth:
checked_by_depth[rank_depth] = dict()
if rank_taxonomy not in checked_by_depth[rank_depth]:
checked_by_depth[rank_depth][rank_taxonomy] = 0
checked_by_depth[rank_depth][rank_taxonomy] += count
return checked_by_depth
示例8: __init__
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def __init__( self, out_tsv, in_biom, in_fasta=None ):
"""
@param in_biom: [str] Path to BIOM file.
@param out_tsv: [str] Path to output TSV file.
"""
# Sequence file option
sequence_file_opt = "" if in_fasta is None else " --input-fasta " + in_fasta
# Check the metadata
biom = BiomIO.from_json( in_biom )
conversion_tags = ""
if biom.has_observation_metadata( 'rdp_taxonomy' ) and biom.has_observation_metadata( 'rdp_bootstrap' ):
conversion_tags += "'@rdp_tax_and_bootstrap' "
if biom.has_observation_metadata( 'blast_taxonomy' ):
conversion_tags += "'blast_taxonomy' "
if biom.has_observation_metadata( 'blast_affiliations' ):
conversion_tags += "'@blast_subject' "
conversion_tags += "'@blast_perc_identity' "
conversion_tags += "'@blast_perc_query_coverage' "
conversion_tags += "'@blast_evalue' "
conversion_tags += "'@blast_aln_length' "
if biom.has_observation_metadata( 'seed_id' ):
conversion_tags += "'seed_id' "
if in_fasta is not None:
conversion_tags += "'@seed_sequence' "
conversion_tags += "'@observation_name' '@observation_sum' '@sample_count'"
# Set command
Cmd.__init__( self,
'biom2tsv.py',
'Converts a BIOM file in TSV file.',
"--input-file " + in_biom + sequence_file_opt + " --output-file " + out_tsv + " --fields " + conversion_tags,
'--version' )
示例9: write_log
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def write_log(in_biom, out_biom, log):
FH_log=open(log,"w")
FH_log.write("#sample\tnb_otu_before\tnb_otu_after\n")
initial_biom = BiomIO.from_json( in_biom )
new_biom = BiomIO.from_json( out_biom )
for sample_name in initial_biom.get_samples_names():
nb_otu_before = len(initial_biom.get_sample_obs(sample_name))
nb_otu_after = len(new_biom.get_sample_obs(sample_name))
FH_log.write("Sample name: "+sample_name+"\n\tnb initials OTU: "+str(nb_otu_before)+"\n\tnb normalized OTU: "+str(nb_otu_after)+"\n")
nb_initial_otu=len(initial_biom.rows)
nb_new_otu=len(new_biom.rows)
FH_log.write("Sample name: all samples\n\tnb initials OTU: "+str(nb_initial_otu)+"\n\tnb normalized OTU: "+str(nb_new_otu)+"\n")
FH_log.close()
示例10: getRealTaxByRefID
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def getRealTaxByRefID( input_biom, taxonomy_key, duplication_groups ):
"""
@summary: Return taxonomy by reference.
@param input_biom: [str] Path to BIOM file.
@param taxonomy_key: [str] The metadata key for taxonomy.
@param duplication_groups: [dict] By reference ID the list of references with the same sequence.
@return: [dict] List of taxonomies by reference ID.
Example:
{
"MVF01000012.1.1317": [
["Root", "Bacteria", "Proteobacteria", "Gammaproteobacteria", "Enterobacteriales", "Enterobacteriaceae", "Cronobacter", "Escherichia coli BIDMC 73"]
],
"JQ607252.1.1437": [
["Root", "Bacteria", "Firmicutes", "Bacilli", "Bacillales", "Staphylococcaceae", "Staphylococcus", "bacterium NLAE-zl-P471"],
["Root", "Bacteria", "Firmicutes", "Bacilli", "Bacillales", "Staphylococcaceae", "Staphylococcus", "Staphylococcus aureus M17299"]
]
}
"""
taxonomy_by_obs_id = dict()
tmp_taxonomy_by_obs_id = dict()
biom = BiomIO.from_json( input_biom )
for observation in biom.get_observations():
taxonomy_clean = getCleanedTaxonomy(observation["metadata"][taxonomy_key])
taxonomy_by_obs_id[observation["id"]] = [taxonomy_clean]
tmp_taxonomy_by_obs_id[observation["id"]] = taxonomy_clean
if duplication_groups is not None:
for obs_id in duplication_groups:
taxonomy_by_obs_id[obs_id] = list()
for id_duplicated_seq in duplication_groups[obs_id]: # For each duplication group member
taxonomy_by_obs_id[obs_id].append(tmp_taxonomy_by_obs_id[id_duplicated_seq])
return taxonomy_by_obs_id
示例11: get_step_size
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def get_step_size(self, nb_step=35):
"""
@summary: Returns the step size to obtain 'nb_step' steps or more in 3/4 of samples.
@param nb_step: [int] The number of expected steps.
@returns: [int] The step size.
"""
counts = list()
# Get the number of sequences by sample
biom = BiomIO.from_json( self.in_biom )
for sample_name in biom.get_samples_names():
counts.append( biom.get_sample_count(sample_name) )
del biom
counts = sorted(counts)
nb_samples = len(counts)
# Finds the lower quartile number of sequences
lower_quartile_idx = nb_samples/4
nb_seq = counts[lower_quartile_idx]
# If lower quartile sample is empty
if nb_seq == 0:
idx = 1
while (lower_quartile_idx + idx) < nb_samples and counts[lower_quartile_idx + idx] == 0:
idx += 1
if (lower_quartile_idx + idx) < nb_samples:
nb_seq = counts[lower_quartile_idx + idx]
step_size = int(nb_seq/nb_step)
return max(1, step_size)
示例12: samples_hclassification
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def samples_hclassification( input_biom, output_newick, distance_method, linkage_method, min_count=1 ):
"""
@summary : Process and write an hierarchical classification from Biom.
@param input_biom : [str] Path to the BIOM file to process.
@param output_newick : [str] Path to the newick output file.
@param distance_method : [str] Used distance method for classify.
@param linkage_method : [str] Used linkage method for classify.
@param min_count : [int] Samples with a count lower than this value are not processed.
"""
from scipy.spatial.distance import pdist, squareform
from scipy.cluster.hierarchy import linkage, dendrogram
import scipy.cluster.hierarchy
data_array = list()
processed_samples = list()
excluded_samples = list()
nb_samples = None
# Normalisation on count by sample
biom = BiomIO.from_json( input_biom )
for col_idx, current_sample in enumerate(biom.columns):
sum_on_sample = biom.data.get_col_sum( col_idx )
if sum_on_sample < min_count:
excluded_samples.append( current_sample['id'] )
else:
processed_samples.append( current_sample['id'] )
OTUs_norm = list()
for row_idx in range(len(biom.rows)):
OTUs_norm.append( biom.data.nb_at(row_idx, col_idx)/float(sum_on_sample) )
data_array.append( OTUs_norm )
nb_samples = len(biom.columns)
del biom
# Process distance
if len(processed_samples) < 1:
raise Exception("All samples have a count lower than threshold (" + str(min_count) + ").")
elif len(processed_samples) == 1:
# Write newick
out_fh = open( output_newick, "w" )
out_fh.write( "(" + processed_samples[0] + ");\n" )
out_fh.close()
else:
# Computing the distance and linkage
data_dist = pdist( data_array, distance_method )
data_link = linkage( data_dist, linkage_method )
# Write newick
scipy_hc_tree = scipy.cluster.hierarchy.to_tree( data_link , rd=False )
id_2_name = dict( zip(range(len(processed_samples)), processed_samples) )
out_fh = open( output_newick, "w" )
out_fh.write( to_newick(scipy_hc_tree, id_2_name) + "\n" )
out_fh.close()
# Display log
print "# Hierarchical clustering log:\n" + \
"\tNumber of samples in BIOM: " + str(nb_samples) + "\n" + \
"\tNumber of processed samples: " + str(len(processed_samples))
if nb_samples > len(processed_samples):
print "\n\tExcluded samples (count < " + str(min_count) + "): " + ", ".join(sorted(excluded_samples))
示例13: remove_observations
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def remove_observations( removed_observations, input_biom, output_biom ):
"""
@summary: Removes the specified list of observations.
@param removed_observations: [list] The names of the observations to remove.
@param input_biom: [str] The path to the input BIOM.
@param output_biom: [str] The path to the output BIOM.
"""
biom = BiomIO.from_json( input_biom )
biom.remove_observations( removed_observations )
BiomIO.write( output_biom, biom )
示例14: filter_biom
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def filter_biom( removed_observations, in_biom, out_biom ):
"""
@summary: Removed the specified observations from BIOM.
@param removed_observations: [dict] Each key is an observation name.
@param in_biom: [str]: Path to the processed BIOM file.
@param out_biom: [str]: Path to the cleaned BIOM file.
"""
biom = BiomIO.from_json(in_biom)
biom.remove_observations(removed_observations)
BiomIO.write(out_biom, biom)
示例15: rarefaction
# 需要导入模块: from frogsBiom import BiomIO [as 别名]
# 或者: from frogsBiom.BiomIO import from_json [as 别名]
def rarefaction( input_biom, interval=10000, ranks=None, taxonomy_key="taxonomy" ):
"""
@summary: Returns the rarefaction by ranks by samples.
@param input_biom: [str] Path to the biom file processed.
@param interval: [int] Size of first sampling.
@param ranks: [list] The rank(s) level for the diversity.
Example :
Sampled set :
Bacteria; Proteobacteria; Alphaproteobacteria; Sphingomonadales; Sphingomonadaceae; Sphingomonas
Bacteria; Proteobacteria; Gammaproteobacteria; Vibrionales; Vibrionaceae; Vibrio; Vibrio halioticoli
Bacteria; Proteobacteria; Gammaproteobacteria; Legionellales; Coxiellaceae; Coxiella; Ornithodoros moubata symbiont A
Bacteria; Proteobacteria; Betaproteobacteria; Burkholderiales; Burkholderiaceae; Limnobacter; Limnobacter thiooxidans
Result for this set
With rank 1 or 2 : 1 group
With rank 3 : 3 different groups
With rank 4 or 5 or 6 : 4 different groups
@param taxonomy_key : [str] The metadata title for the taxonomy in the input.
@return: [dict] By ranks by samples the list of differents taxa for each steps.
Example :
{
1: {
"sampleA" : [10, 20, 22, 23, 24, 25, 25, 25 ],
"sampleB" : [15, 25, 28, 30, 32, 34, 35, 36, 37, 37, 37, 37]
}
}
@warning: The taxa with name starting with unknown used as complete new name 'unknown'.
"""
sample_rarefaction = dict()
biom = BiomIO.from_json( input_biom )
for current_rank in ranks:
sample_rarefaction[current_rank] = dict()
for sample in biom.get_samples_names():
taxa = dict()
for current_rank in ranks:
sample_rarefaction[current_rank][sample] = list()
taxa[current_rank] = dict()
sample_count = biom.get_sample_count( sample )
expected_nb_iter = sample_count/interval
for current_nb_iter in range(expected_nb_iter):
selected_observations = biom.random_obs_extract_by_sample(sample, interval)
for current_selected in selected_observations:
taxonomy = list()
if current_selected['observation']["metadata"].has_key(taxonomy_key) and current_selected['observation']["metadata"][taxonomy_key] is not None:
taxonomy = biom.get_observation_taxonomy( current_selected['observation']["id"], taxonomy_key )
for idx, taxon in enumerate(taxonomy):
if taxon.lower().startswith("unknown"):
taxonomy[idx] = "unknown"
while len(taxonomy) < max(ranks):
taxonomy.append("unknown")
for current_rank in ranks:
taxonomy_str = (';'.join(taxonomy[0:current_rank+1])).lower()
taxa[current_rank][taxonomy_str] = True
for current_rank in ranks:
sample_rarefaction[current_rank][sample].append( str(len(taxa[current_rank])) )
return sample_rarefaction