本文整理匯總了Python中biom.Table方法的典型用法代碼示例。如果您正苦於以下問題:Python biom.Table方法的具體用法?Python biom.Table怎麽用?Python biom.Table使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類biom
的用法示例。
在下文中一共展示了biom.Table方法的11個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: denoise_single
# 需要導入模塊: import biom [as 別名]
# 或者: from biom import Table [as 別名]
def denoise_single(demultiplexed_seqs: SingleLanePerSampleSingleEndFastqDirFmt,
trunc_len: int, trim_left: int = 0, max_ee: float = 2.0,
trunc_q: int = 2, pooling_method: str = 'independent',
chimera_method: str = 'consensus',
min_fold_parent_over_abundance: float = 1.0,
n_threads: int = 1, n_reads_learn: int = 1000000,
hashed_feature_ids: bool = True
) -> (biom.Table, DNAIterator, qiime2.Metadata):
return _denoise_single(
demultiplexed_seqs=demultiplexed_seqs,
trunc_len=trunc_len,
trim_left=trim_left,
max_ee=max_ee,
trunc_q=trunc_q,
max_len=0,
pooling_method=pooling_method,
chimera_method=chimera_method,
min_fold_parent_over_abundance=min_fold_parent_over_abundance,
n_threads=n_threads,
n_reads_learn=n_reads_learn,
hashed_feature_ids=hashed_feature_ids,
homopolymer_gap_penalty='NULL',
band_size='16')
示例2: denoise_pyro
# 需要導入模塊: import biom [as 別名]
# 或者: from biom import Table [as 別名]
def denoise_pyro(demultiplexed_seqs: SingleLanePerSampleSingleEndFastqDirFmt,
trunc_len: int, trim_left: int = 0, max_ee: float = 2.0,
trunc_q: int = 2, max_len: int = 0,
pooling_method: str = 'independent',
chimera_method: str = 'consensus',
min_fold_parent_over_abundance: float = 1.0,
n_threads: int = 1, n_reads_learn: int = 250000,
hashed_feature_ids: bool = True
) -> (biom.Table, DNAIterator, qiime2.Metadata):
return _denoise_single(
demultiplexed_seqs=demultiplexed_seqs,
trunc_len=trunc_len,
trim_left=trim_left,
max_ee=max_ee,
trunc_q=trunc_q,
max_len=max_len,
pooling_method=pooling_method,
chimera_method=chimera_method,
min_fold_parent_over_abundance=min_fold_parent_over_abundance,
n_threads=n_threads,
n_reads_learn=n_reads_learn,
hashed_feature_ids=hashed_feature_ids,
homopolymer_gap_penalty='-1',
band_size='32')
示例3: pandas2biom
# 需要導入模塊: import biom [as 別名]
# 或者: from biom import Table [as 別名]
def pandas2biom(file_biom, table):
""" Writes a Pandas.DataFrame into a biom file.
Parameters
----------
file_biom: str
The filename of the BIOM file to be created.
table: a Pandas.DataFrame
The table that should be written as BIOM.
Returns
-------
Nothing
"""
bt = biom.Table(table.values,
observation_ids=list(map(str, table.index)),
sample_ids=table.columns)
with biom_open(file_biom, 'w') as f:
bt.to_hdf5(f, "example")
示例4: paired_heatmap
# 需要導入模塊: import biom [as 別名]
# 或者: from biom import Table [as 別名]
def paired_heatmap(output_dir: str,
ranks: pd.DataFrame,
microbes_table: biom.Table,
metabolites_table: biom.Table,
features: str = None,
top_k_microbes: int = 2,
keep_top_samples: bool = True,
microbe_metadata: qiime2.CategoricalMetadataColumn = None,
normalize: str = 'log10',
color_palette: str = 'magma',
top_k_metabolites: int = 50,
level: int = -1,
row_center: bool = True) -> None:
if microbe_metadata is not None:
microbe_metadata = microbe_metadata.to_series()
ranks = ranks.T
if row_center:
ranks = ranks - ranks.mean(axis=0)
select_microbes, select_metabolites, hotmaps = paired_heatmaps(
ranks, microbes_table, metabolites_table, microbe_metadata, features,
top_k_microbes, top_k_metabolites, keep_top_samples, level, normalize,
color_palette)
hotmaps.savefig(join(output_dir, 'heatmap.pdf'), bbox_inches='tight')
hotmaps.savefig(join(output_dir, 'heatmap.png'), bbox_inches='tight')
select_microbes.to_csv(join(output_dir, 'select_microbes.tsv'), sep='\t')
select_metabolites.to_csv(
join(output_dir, 'select_metabolites.tsv'), sep='\t')
index = join(TEMPLATES, 'index.html')
q2templates.render(index, output_dir, context={
'title': 'Paired Feature Abundance Heatmaps',
'pdf_fp': 'heatmap.pdf',
'png_fp': 'heatmap.png',
'table1_fp': 'select_microbes.tsv',
'download1_text': 'Download microbe abundances as TSV',
'table2_fp': 'select_metabolites.tsv',
'download2_text': 'Download top k metabolite abundances as TSV'})
示例5: setUp
# 需要導入模塊: import biom [as 別名]
# 或者: from biom import Table [as 別名]
def setUp(self):
_ranks = pd.DataFrame([[4.1, 1.3, 2.1], [0.1, 0.3, 0.2],
[2.2, 4.3, 3.2], [-6.3, -4.4, 2.1]],
index=pd.Index([c for c in 'ABCD'], name='id'),
columns=['m1', 'm2', 'm3']).T
self.ranks = Artifact.import_data('FeatureData[Conditional]', _ranks)
self.taxa = CategoricalMetadataColumn(pd.Series([
'k__Bacteria; p__Proteobacteria; c__Deltaproteobacteria; '
'o__Desulfobacterales; f__Desulfobulbaceae; g__; s__',
'k__Bacteria; p__Cyanobacteria; c__Chloroplast; o__Streptophyta',
'k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; '
'o__Rickettsiales; f__mitochondria; g__Lardizabala; s__biternata',
'k__Archaea; p__Euryarchaeota; c__Methanomicrobia; '
'o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina'],
index=pd.Index([c for c in 'ABCD'], name='feature-id'),
name='Taxon'))
metabolites = biom.Table(
np.array([[9, 8, 2], [2, 1, 2], [9, 4, 5], [8, 8, 7]]),
sample_ids=['s1', 's2', 's3'],
observation_ids=['m1', 'm2', 'm3', 'm4'])
self.metabolites = Artifact.import_data(
'FeatureTable[Frequency]', metabolites)
microbes = biom.Table(
np.array([[1, 2, 3], [3, 6, 3], [1, 9, 9], [8, 8, 7]]),
sample_ids=['s1', 's2', 's3'], observation_ids=[i for i in 'ABCD'])
self.microbes = Artifact.import_data(
'FeatureTable[Frequency]', microbes)
示例6: setUp
# 需要導入模塊: import biom [as 別名]
# 或者: from biom import Table [as 別名]
def setUp(self):
np.random.seed(1)
res = random_multimodal(
num_microbes=8, num_metabolites=8, num_samples=150,
latent_dim=2, sigmaQ=2,
microbe_total=1000, metabolite_total=10000, seed=1
)
(self.microbes, self.metabolites, self.X, self.B,
self.U, self.Ubias, self.V, self.Vbias) = res
n, d1 = self.microbes.shape
n, d2 = self.metabolites.shape
self.microbes = biom.Table(self.microbes.values.T,
self.microbes.columns,
self.microbes.index)
self.metabolites = biom.Table(self.metabolites.values.T,
self.metabolites.columns,
self.metabolites.index)
U_ = np.hstack(
(np.ones((self.U.shape[0], 1)), self.Ubias, self.U))
V_ = np.vstack(
(self.Vbias, np.ones((1, self.V.shape[1])), self.V))
uv = U_ @ V_
h = np.zeros((d1, 1))
self.exp_ranks = clr_inv(np.hstack((h, uv)))
示例7: setUp
# 需要導入模塊: import biom [as 別名]
# 或者: from biom import Table [as 別名]
def setUp(self):
omat = np.array([
[104, 10, 2, 0, 0],
[4, 100, 20, 0, 0],
[0, 1, 0, 0, 4],
[4, 0, 21, 0, 2],
[40, 0, 2, 1, 39],
[0, 0, 32, 10, 3],
[59, 1, 0, 0, 3]
])
mmat = np.array([
[104, 1, 31, 0, 8],
[4, 100, 20, 0, 0],
[0, 8, 0, 0, 4],
[0, 0, 2, 1, 2],
[0, 0, 20, 10, 3],
[0, 8, 0, 0, 4],
[0, 0, 2, 10, 3],
[0, 0, 320, 139, 3],
[59, 9, 0, 0, 33]
]) * 10e6
oids = list(map(lambda x: 'o'+str(x), np.arange(omat.shape[0])))
mids = list(map(lambda x: 'm'+str(x), np.arange(mmat.shape[0])))
sids = list(map(lambda x: 'm'+str(x), np.arange(mmat.shape[1])))
self.otu_table = Table(omat, oids, sids)
self.metabolite_table = Table(mmat, mids, sids)
self.metadata = pd.DataFrame(
{
'testing': ['Train', 'Test', 'Train', 'Test', 'Train'],
'bad': [True, False, True, False, True]
}, index=sids
)
示例8: format_barplots
# 需要導入模塊: import biom [as 別名]
# 或者: from biom import Table [as 別名]
def format_barplots(table: biom.Table, normalize: bool):
barplots = []
barplots.append('DATASET_MULTIBAR')
barplots.append('SEPARATOR TAB')
barplots.append('DATASET_LABEL\tRelative Abundance')
if normalize:
table = table.norm(axis='observation', inplace=False)
table = table.to_dataframe(dense=True)
field_labels = list(table.columns)
field_colors = values_to_colors(field_labels, 'husl').values()
barplots.append('FIELD_COLORS\t'+'\t'.join(field_colors))
barplots.append('FIELD_LABELS\t'+'\t'.join(field_labels))
barplots.append('LEGEND_TITLE\tRelative Abundance')
barplots.append('LEGEND_SHAPES\t'+'\t'.join(['1']*len(field_colors)))
barplots.append('LEGEND_COLORS\t'+'\t'.join(field_colors))
barplots.append('LEGEND_LABELS\t'+'\t'.join(field_labels))
barplots.append('WIDTH\t100')
barplots.append('DATA')
table = table.reset_index()
for idx in table.index:
barplots.append('\t'.join(table.loc[idx].apply(str)))
return '\n'.join(barplots)
示例9: gibbs
# 需要導入模塊: import biom [as 別名]
# 或者: from biom import Table [as 別名]
def gibbs(table_fp: Table,
mapping_fp: pd.DataFrame,
output_dir: str,
loo: bool,
jobs: int,
alpha1: float,
alpha2: float,
beta: float,
source_rarefaction_depth: int,
sink_rarefaction_depth: int,
restarts: int,
draws_per_restart: int,
burnin: int,
delay: int,
per_sink_feature_assignments: bool,
sample_with_replacement: bool,
source_sink_column: str,
source_column_value: str,
sink_column_value: str,
source_category_column: str):
'''Gibb's sampler for Bayesian estimation of microbial sample sources.
For details, see the project README file.
'''
# Create results directory. Click has already checked if it exists, and
# failed if so.
os.mkdir(output_dir)
# Load the metadata file and feature table.
sample_metadata = parse_sample_metadata(open(mapping_fp, 'U'))
feature_table = biom_to_df(load_table(table_fp))
# run the gibbs sampler helper function (same used for q2)
results = gibbs_helper(feature_table, sample_metadata, loo, jobs,
alpha1, alpha2, beta, source_rarefaction_depth,
sink_rarefaction_depth, restarts, draws_per_restart,
burnin, delay, per_sink_feature_assignments,
sample_with_replacement, source_sink_column,
source_column_value, sink_column_value,
source_category_column)
# import the results (will change based on per_sink_feature_assignments)
if len(results) == 3:
mpm, mps, fas = results
# write the feature tables from fas
for sink, fa in zip(mpm.columns, fas):
fa.to_csv(os.path.join(output_dir, sink + '.feature_table.txt'),
sep='\t')
else:
# get the results (without fas)
mpm, mps = results
# Write results.
mpm.to_csv(os.path.join(output_dir, 'mixing_proportions.txt'), sep='\t')
mps.to_csv(os.path.join(output_dir, 'mixing_proportions_stds.txt'),
sep='\t')
# Plot contributions.
fig, ax = plot_heatmap(mpm.T)
fig.savefig(os.path.join(output_dir, 'mixing_proportions.pdf'), dpi=300)
示例10: _denoise_helper
# 需要導入模塊: import biom [as 別名]
# 或者: from biom import Table [as 別名]
def _denoise_helper(biom_fp, track_fp, hashed_feature_ids):
_check_featureless_table(biom_fp)
with open(biom_fp) as fh:
table = biom.Table.from_tsv(fh, None, None, None)
df = pd.read_csv(track_fp, sep='\t', index_col=0)
df.index.name = 'sample-id'
df = df.rename(index=_filepath_to_sample)
PASSED_FILTER = 'percentage of input passed filter'
NON_CHIMERIC = 'percentage of input non-chimeric'
round_cols = {PASSED_FILTER: 2, NON_CHIMERIC: 2}
df[PASSED_FILTER] = df['filtered'] / df['input'] * 100
df[NON_CHIMERIC] = df['non-chimeric'] / df['input'] * 100
col_order = ['input', 'filtered', PASSED_FILTER, 'denoised',
'non-chimeric', NON_CHIMERIC]
# only calculate percentage of input merged if paired end
if 'merged' in df:
MERGED = 'percentage of input merged'
round_cols[MERGED] = 2
df[MERGED] = df['merged'] / df['input'] * 100
col_order.insert(4, 'merged')
col_order.insert(5, MERGED)
df = df[col_order]
df.fillna(0, inplace=True)
df = df.round(round_cols)
metadata = qiime2.Metadata(df)
# Currently the sample IDs in DADA2 are the file names. We make
# them the sample id part of the filename here.
sid_map = {id_: _filepath_to_sample(id_)
for id_ in table.ids(axis='sample')}
table.update_ids(sid_map, axis='sample', inplace=True)
# The feature IDs in DADA2 are the sequences themselves.
if hashed_feature_ids:
# Make feature IDs the md5 sums of the sequences.
fid_map = {id_: hashlib.md5(id_.encode('utf-8')).hexdigest()
for id_ in table.ids(axis='observation')}
table.update_ids(fid_map, axis='observation', inplace=True)
rep_sequences = DNAIterator((skbio.DNA(k, metadata={'id': v})
for k, v in fid_map.items()))
else:
rep_sequences = DNAIterator(
(skbio.DNA(id_, metadata={'id': id_})
for id_ in table.ids(axis='observation')))
return table, rep_sequences, metadata
# Since `denoise-single` and `denoise-pyro` are almost identical, break out
# the bulk of the functionality to this helper util. Typechecking is assumed
# to have occurred in the calling functions, this is primarily for making
# sure that DADA2 is able to do what it needs to do.
示例11: denoise_paired
# 需要導入模塊: import biom [as 別名]
# 或者: from biom import Table [as 別名]
def denoise_paired(demultiplexed_seqs: SingleLanePerSamplePairedEndFastqDirFmt,
trunc_len_f: int, trunc_len_r: int,
trim_left_f: int = 0, trim_left_r: int = 0,
max_ee_f: float = 2.0, max_ee_r: float = 2.0,
trunc_q: int = 2, pooling_method: str = 'independent',
chimera_method: str = 'consensus',
min_fold_parent_over_abundance: float = 1.0,
n_threads: int = 1, n_reads_learn: int = 1000000,
hashed_feature_ids: bool = True
) -> (biom.Table, DNAIterator, qiime2.Metadata):
_check_inputs(**locals())
if trunc_len_f != 0 and trim_left_f >= trunc_len_f:
raise ValueError("trim_left_f (%r) must be smaller than trunc_len_f"
" (%r)" % (trim_left_f, trunc_len_f))
if trunc_len_r != 0 and trim_left_r >= trunc_len_r:
raise ValueError("trim_left_r (%r) must be smaller than trunc_len_r"
" (%r)" % (trim_left_r, trunc_len_r))
with tempfile.TemporaryDirectory() as temp_dir:
tmp_forward = os.path.join(temp_dir, 'forward')
tmp_reverse = os.path.join(temp_dir, 'reverse')
biom_fp = os.path.join(temp_dir, 'output.tsv.biom')
track_fp = os.path.join(temp_dir, 'track.tsv')
filt_forward = os.path.join(temp_dir, 'filt_f')
filt_reverse = os.path.join(temp_dir, 'filt_r')
for fp in tmp_forward, tmp_reverse, filt_forward, filt_reverse:
os.mkdir(fp)
for rp, view in demultiplexed_seqs.sequences.iter_views(FastqGzFormat):
fp = str(view)
if 'R1_001.fastq' in rp.name:
qiime2.util.duplicate(fp, os.path.join(tmp_forward, rp.name))
elif 'R2_001.fastq' in rp.name:
qiime2.util.duplicate(fp, os.path.join(tmp_reverse, rp.name))
cmd = ['run_dada_paired.R',
tmp_forward, tmp_reverse, biom_fp, track_fp, filt_forward,
filt_reverse,
str(trunc_len_f), str(trunc_len_r),
str(trim_left_f), str(trim_left_r),
str(max_ee_f), str(max_ee_r), str(trunc_q),
str(pooling_method),
str(chimera_method), str(min_fold_parent_over_abundance),
str(n_threads), str(n_reads_learn)]
try:
run_commands([cmd])
except subprocess.CalledProcessError as e:
if e.returncode == 2:
raise ValueError(
"No reads passed the filter. trunc_len_f (%r) or"
" trunc_len_r (%r) may be individually longer than"
" read lengths, or trunc_len_f + trunc_len_r may be"
" shorter than the length of the amplicon + 12"
" nucleotides (the length of the overlap). Alternatively,"
" other arguments (such as max_ee or trunc_q) may be"
" preventing reads from passing the filter."
% (trunc_len_f, trunc_len_r))
else:
raise Exception("An error was encountered while running DADA2"
" in R (return code %d), please inspect stdout"
" and stderr to learn more." % e.returncode)
return _denoise_helper(biom_fp, track_fp, hashed_feature_ids)