本文整理汇总了Python中biom.Table.ids方法的典型用法代码示例。如果您正苦于以下问题:Python Table.ids方法的具体用法?Python Table.ids怎么用?Python Table.ids使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类biom.Table
的用法示例。
在下文中一共展示了Table.ids方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: merge
# 需要导入模块: from biom import Table [as 别名]
# 或者: from biom.Table import ids [as 别名]
def merge(table1: biom.Table, table2: biom.Table) -> biom.Table:
table1_sids = set(table1.ids(axis='sample'))
table2_sids = set(table2.ids(axis='sample'))
if len(table1_sids & table2_sids) > 0:
raise ValueError('Some samples are present in both tables: %s' %
', '.join(table1_sids & table2_sids))
return table1.merge(table2)
示例2: beta_phylogenetic
# 需要导入模块: from biom import Table [as 别名]
# 或者: from biom.Table import ids [as 别名]
def beta_phylogenetic(table: biom.Table, phylogeny: skbio.TreeNode,
metric: str, n_jobs: int=1)-> skbio.DistanceMatrix:
if metric not in phylogenetic_metrics():
raise ValueError("Unknown phylogenetic metric: %s" % metric)
if table.is_empty():
raise ValueError("The provided table object is empty")
if n_jobs != 1 and metric == 'weighted_unifrac':
raise ValueError("Weighted UniFrac is not parallelizable")
counts = table.matrix_data.toarray().astype(int).T
sample_ids = table.ids(axis='sample')
feature_ids = table.ids(axis='observation')
try:
results = skbio.diversity.beta_diversity(
metric=metric,
counts=counts,
ids=sample_ids,
otu_ids=feature_ids,
tree=phylogeny,
pairwise_func=sklearn.metrics.pairwise_distances,
n_jobs=n_jobs
)
except skbio.tree.MissingNodeError as e:
message = str(e).replace('otu_ids', 'feature_ids')
message = message.replace('tree', 'phylogeny')
raise skbio.tree.MissingNodeError(message)
return results
示例3: group
# 需要导入模块: from biom import Table [as 别名]
# 或者: from biom.Table import ids [as 别名]
def group(table: biom.Table, axis: str,
metadata: qiime2.CategoricalMetadataColumn, mode: str) -> biom.Table:
if table.is_empty():
raise ValueError("Cannot group an empty table.")
if axis == 'feature':
biom_axis = 'observation'
else:
biom_axis = axis
metadata = _munge_metadata_column(metadata, table.ids(axis=biom_axis),
axis)
grouped_table = table.collapse(
lambda axis_id, _: metadata.get_value(axis_id),
collapse_f=_mode_lookup[mode],
axis=biom_axis,
norm=False,
include_collapsed_metadata=False)
# Reorder axis by first unique appearance of each group value in metadata
# (makes it stable for identity mappings and easier to test)
# TODO use CategoricalMetadataColumn API for retrieving categories/groups,
# when the API exists.
series = metadata.to_series()
return grouped_table.sort_order(series.unique(), axis=biom_axis)
示例4: beta
# 需要导入模块: from biom import Table [as 别名]
# 或者: from biom.Table import ids [as 别名]
def beta(table: biom.Table, metric: str,
pseudocount: int=1, n_jobs: int=1)-> skbio.DistanceMatrix:
if not (metric in non_phylogenetic_metrics()):
raise ValueError("Unknown metric: %s" % metric)
counts = table.matrix_data.toarray().T
def aitchison(x, y, **kwds):
return euclidean(clr(x), clr(y))
if metric == 'aitchison':
counts += pseudocount
metric = aitchison
if table.is_empty():
raise ValueError("The provided table object is empty")
sample_ids = table.ids(axis='sample')
return skbio.diversity.beta_diversity(
metric=metric,
counts=counts,
ids=sample_ids,
validate=True,
pairwise_func=sklearn.metrics.pairwise_distances,
n_jobs=n_jobs
)
示例5: filter_table
# 需要导入模块: from biom import Table [as 别名]
# 或者: from biom.Table import ids [as 别名]
def filter_table(table: biom.Table, tree: skbio.TreeNode) -> biom.Table:
""" Filter table to remove feature ids that are not tip ids in tree
"""
tip_ids = set([t.name for t in tree.tips()])
feature_ids = set(table.ids(axis='observation'))
# ids_to_keep can only include ids that are in table
ids_to_keep = tip_ids & feature_ids
table.filter(ids_to_keep, axis='observation', inplace=True)
return table
示例6: test_collapse_full
# 需要导入模块: from biom import Table [as 别名]
# 或者: from biom.Table import ids [as 别名]
def test_collapse_full(self):
obs = collapse_full(table)
exp = Table(array([[0.00769230769231], [0.0282051282051],
[0.0487179487179], [0.0692307692308],
[0.0897435897436], [0.110256410256],
[0.130769230769], [0.151282051282],
[0.171794871795], [0.192307692308]]),
observ_ids, ['average'],
observation_metadata=observ_metadata)
for r in range(10):
assert_almost_equal(obs[r, 0], exp[r, 0])
self.assertEqual(obs.ids(), exp.ids())
self.assertItemsEqual(obs.ids('observation'), exp.ids('observation'))
obs_meta = []
for _, _, m in obs.iter(axis='observation'):
obs_meta.append(m)
self.assertItemsEqual(obs_meta, observ_metadata)
示例7: alpha
# 需要导入模块: from biom import Table [as 别名]
# 或者: from biom.Table import ids [as 别名]
def alpha(table: biom.Table, metric: str) -> pd.Series:
if metric not in non_phylogenetic_metrics():
raise ValueError("Unknown metric: %s" % metric)
counts = table.matrix_data.toarray().astype(int).T
sample_ids = table.ids(axis='sample')
result = skbio.diversity.alpha_diversity(metric=metric, counts=counts,
ids=sample_ids)
result.name = metric
return result
示例8: alpha_phylogenetic
# 需要导入模块: from biom import Table [as 别名]
# 或者: from biom.Table import ids [as 别名]
def alpha_phylogenetic(table: biom.Table, phylogeny: skbio.TreeNode,
metric: str) -> pd.Series:
if metric not in phylogenetic_metrics():
raise ValueError("Unknown phylogenetic metric: %s" % metric)
counts = table.matrix_data.toarray().astype(int).T
sample_ids = table.ids(axis='sample')
feature_ids = table.ids(axis='observation')
try:
result = skbio.diversity.alpha_diversity(metric=metric,
counts=counts,
ids=sample_ids,
otu_ids=feature_ids,
tree=phylogeny)
except skbio.tree.MissingNodeError as e:
message = str(e).replace('otu_ids', 'feature_ids')
message = message.replace('tree', 'phylogeny')
raise skbio.tree.MissingNodeError(message)
result.name = metric
return result
示例9: beta
# 需要导入模块: from biom import Table [as 别名]
# 或者: from biom.Table import ids [as 别名]
def beta(table: biom.Table, metric: str, n_jobs: int=1)-> skbio.DistanceMatrix:
if metric not in non_phylogenetic_metrics():
raise ValueError("Unknown metric: %s" % metric)
if table.is_empty():
raise ValueError("The provided table object is empty")
counts = table.matrix_data.toarray().astype(int).T
sample_ids = table.ids(axis='sample')
return skbio.diversity.beta_diversity(
metric=metric,
counts=counts,
ids=sample_ids,
pairwise_func=sklearn.metrics.pairwise_distances,
n_jobs=n_jobs
)
示例10: beta
# 需要导入模块: from biom import Table [as 别名]
# 或者: from biom.Table import ids [as 别名]
def beta(table: biom.Table, metric: str,
pseudocount: int = 1, n_jobs: int = 1) -> skbio.DistanceMatrix:
if not (metric in non_phylogenetic_metrics()):
raise ValueError("Unknown metric: %s" % metric)
counts = table.matrix_data.toarray().T
def aitchison(x, y, **kwds):
return euclidean(clr(x), clr(y))
def canberra_adkins(x, y, **kwds):
if (x < 0).any() or (y < 0).any():
raise ValueError("Canberra-Adkins is only defined over positive "
"values.")
nz = ((x > 0) | (y > 0))
x_ = x[nz]
y_ = y[nz]
nnz = nz.sum()
return (1. / nnz) * np.sum(np.abs(x_ - y_) / (x_ + y_))
if metric == 'aitchison':
counts += pseudocount
metric = aitchison
elif metric == 'canberra_adkins':
metric = canberra_adkins
if table.is_empty():
raise ValueError("The provided table object is empty")
sample_ids = table.ids(axis='sample')
return skbio.diversity.beta_diversity(
metric=metric,
counts=counts,
ids=sample_ids,
validate=True,
pairwise_func=sklearn.metrics.pairwise_distances,
n_jobs=n_jobs
)
示例11: filter_seqs
# 需要导入模块: from biom import Table [as 别名]
# 或者: from biom.Table import ids [as 别名]
def filter_seqs(data: pd.Series, table: biom.Table=None,
metadata: qiime2.Metadata=None, where: str=None,
exclude_ids: bool=False) -> pd.Series:
if table is not None and metadata is not None:
raise ValueError('Filtering with metadata and filtering with a table '
'are mutually exclusive.')
elif table is None and metadata is None:
raise ValueError('No filtering requested. Must provide either table '
'or metadata.')
elif table is not None:
ids_to_keep = table.ids(axis='observation')
else:
# Note, no need to check for missing feature IDs in the metadata,
# because that is basically the point of this method.
ids_to_keep = metadata.get_ids(where=where)
if exclude_ids is True:
ids_to_keep = set(data.index) - set(ids_to_keep)
filtered = data[data.index.isin(ids_to_keep)]
if filtered.empty is True:
raise ValueError('All features were filtered out of the data.')
return filtered
示例12: create_non_rarefied_biom_artifact
# 需要导入模块: from biom import Table [as 别名]
# 或者: from biom.Table import ids [as 别名]
def create_non_rarefied_biom_artifact(analysis, biom_data, rarefied_table):
"""Creates the initial non-rarefied BIOM artifact of the analysis
Parameters
----------
analysis : dict
Dictionary with the analysis information
biom_data : dict
Dictionary with the biom file information
rarefied_table : biom.Table
The rarefied BIOM table
Returns
-------
int
The id of the new artifact
"""
# The non rarefied biom artifact is the initial biom table of the analysis.
# This table does not currently exist anywhere, so we need to actually
# create the BIOM file. To create this BIOM file we need: (1) the samples
# and artifacts they come from and (2) whether the samples where
# renamed or not. (1) is on the database, but we need to inferr (2) from
# the existing rarefied BIOM table. Fun, fun...
with TRN:
# Get the samples included in the BIOM table grouped by artifact id
# Note that the analysis contains a BIOM table per data type included
# in it, and the table analysis_sample does not differentiate between
# datatypes, so we need to check the data type in the artifact table
sql = """SELECT artifact_id, array_agg(sample_id)
FROM qiita.analysis_sample
JOIN qiita.artifact USING (artifact_id)
WHERE analysis_id = %s AND data_type_id = %s
GROUP BY artifact_id"""
TRN.add(sql, [analysis['analysis_id'], biom_data['data_type_id']])
samples_by_artifact = TRN.execute_fetchindex()
# Create an empty BIOM table to be the new master table
new_table = Table([], [], [])
ids_map = {}
for a_id, samples in samples_by_artifact:
# Get the filepath of the BIOM table from the artifact
artifact = Artifact(a_id)
biom_fp = None
for _, fp, fp_type in artifact.filepaths:
if fp_type == 'biom':
biom_fp = fp
# Note that we are sure that the biom table exists for sure, so
# no need to check if biom_fp is undefined
biom_table = load_table(biom_fp)
samples = set(samples).intersection(biom_table.ids())
biom_table.filter(samples, axis='sample', inplace=True)
# we need to check if the table has samples left before merging
if biom_table.shape[0] != 0 and biom_table.shape[1] != 0:
new_table = new_table.merge(biom_table)
ids_map.update({sid: "%d.%s" % (a_id, sid)
for sid in biom_table.ids()})
# Check if we need to rename the sample ids in the biom table
new_table_ids = set(new_table.ids())
if not new_table_ids.issuperset(rarefied_table.ids()):
# We need to rename the sample ids
new_table.update_ids(ids_map, 'sample', True, True)
sql = """INSERT INTO qiita.artifact
(generated_timestamp, data_type_id, visibility_id,
artifact_type_id, submitted_to_vamps)
VALUES (%s, %s, %s, %s, %s)
RETURNING artifact_id"""
# Magic number 4 -> visibility sandbox
# Magix number 7 -> biom artifact type
TRN.add(sql, [analysis['timestamp'], biom_data['data_type_id'],
4, 7, False])
artifact_id = TRN.execute_fetchlast()
# Associate the artifact with the analysis
sql = """INSERT INTO qiita.analysis_artifact
(analysis_id, artifact_id)
VALUES (%s, %s)"""
TRN.add(sql, [analysis['analysis_id'], artifact_id])
# Link the artifact with its file
dd_id, mp = get_mountpoint('BIOM')[0]
dir_fp = join(get_db_files_base_dir(), mp, str(artifact_id))
if not exists(dir_fp):
makedirs(dir_fp)
new_table_fp = join(dir_fp, "biom_table.biom")
with biom_open(new_table_fp, 'w') as f:
new_table.to_hdf5(f, "Generated by Qiita")
sql = """INSERT INTO qiita.filepath
(filepath, filepath_type_id, checksum,
checksum_algorithm_id, data_directory_id)
VALUES (%s, %s, %s, %s, %s)
RETURNING filepath_id"""
# Magic number 7 -> filepath_type_id = 'biom'
# Magic number 1 -> the checksum algorithm id
TRN.add(sql, [basename(new_table_fp), 7,
compute_checksum(new_table_fp), 1, dd_id])
fp_id = TRN.execute_fetchlast()
sql = """INSERT INTO qiita.artifact_filepath
#.........这里部分代码省略.........
示例13: _table_to_dataframe
# 需要导入模块: from biom import Table [as 别名]
# 或者: from biom.Table import ids [as 别名]
def _table_to_dataframe(table: biom.Table) -> pd.DataFrame:
array = table.matrix_data.toarray().T
sample_ids = table.ids(axis='sample')
feature_ids = table.ids(axis='observation')
return pd.DataFrame(array, index=sample_ids, columns=feature_ids)
示例14: alpha_rarefaction
# 需要导入模块: from biom import Table [as 别名]
# 或者: from biom.Table import ids [as 别名]
def alpha_rarefaction(output_dir: str, table: biom.Table, max_depth: int,
phylogeny: skbio.TreeNode = None, metrics: set = None,
metadata: qiime2.Metadata = None, min_depth: int = 1,
steps: int = 10, iterations: int = 10) -> None:
if metrics is None:
metrics = {'observed_otus', 'shannon'}
if phylogeny is not None:
metrics.add('faith_pd')
elif not metrics:
raise ValueError('`metrics` was given an empty set.')
else:
phylo_overlap = phylogenetic_metrics() & metrics
if phylo_overlap and phylogeny is None:
raise ValueError('Phylogenetic metric %s was requested but '
'phylogeny was not provided.' % phylo_overlap)
if max_depth <= min_depth:
raise ValueError('Provided max_depth of %d must be greater than '
'provided min_depth of %d.' % (max_depth, min_depth))
possible_steps = max_depth - min_depth
if possible_steps < steps:
raise ValueError('Provided number of steps (%d) is greater than the '
'steps possible between min_depth and '
'max_depth (%d).' % (steps, possible_steps))
if table.is_empty():
raise ValueError('Provided table is empty.')
max_frequency = max(table.sum(axis='sample'))
if max_frequency < max_depth:
raise ValueError('Provided max_depth of %d is greater than '
'the maximum sample total frequency of the '
'feature_table (%d).' % (max_depth, max_frequency))
if metadata is None:
columns, filtered_columns = set(), set()
else:
# Filter metadata to only include sample IDs present in the feature
# table. Also ensures every feature table sample ID is present in the
# metadata.
metadata = metadata.filter_ids(table.ids(axis='sample'))
# Drop metadata columns that aren't categorical, or consist solely of
# missing values.
pre_filtered_cols = set(metadata.columns)
metadata = metadata.filter_columns(column_type='categorical',
drop_all_missing=True)
filtered_columns = pre_filtered_cols - set(metadata.columns)
metadata_df = metadata.to_dataframe()
if metadata_df.empty or len(metadata.columns) == 0:
raise ValueError("All metadata filtered after dropping columns "
"that contained non-categorical data.")
metadata_df.columns = pd.MultiIndex.from_tuples(
[(c, '') for c in metadata_df.columns])
columns = metadata_df.columns.get_level_values(0)
data = _compute_rarefaction_data(table, min_depth, max_depth,
steps, iterations, phylogeny, metrics)
filenames = []
for m, data in data.items():
metric_name = quote(m)
filename = '%s.csv' % metric_name
if metadata is None:
n_df = _compute_summary(data, 'sample-id')
jsonp_filename = '%s.jsonp' % metric_name
_alpha_rarefaction_jsonp(output_dir, jsonp_filename, metric_name,
n_df, '')
filenames.append(jsonp_filename)
else:
merged = data.join(metadata_df, how='left')
for column in columns:
column_name = quote(column)
reindexed_df, counts = _reindex_with_metadata(column,
columns,
merged)
c_df = _compute_summary(reindexed_df, column, counts=counts)
jsonp_filename = "%s-%s.jsonp" % (metric_name, column_name)
_alpha_rarefaction_jsonp(output_dir, jsonp_filename,
metric_name, c_df, column)
filenames.append(jsonp_filename)
with open(os.path.join(output_dir, filename), 'w') as fh:
data.columns = ['depth-%d_iter-%d' % (t[0], t[1])
for t in data.columns.values]
if metadata is not None:
data = data.join(metadata.to_dataframe(), how='left')
data.to_csv(fh, index_label=['sample-id'])
index = os.path.join(TEMPLATES, 'alpha_rarefaction_assets', 'index.html')
q2templates.render(index, output_dir,
context={'metrics': list(metrics),
'filenames': [quote(f) for f in filenames],
'columns': list(columns),
'steps': steps,
'filtered_columns': sorted(filtered_columns)})
shutil.copytree(os.path.join(TEMPLATES, 'alpha_rarefaction_assets',
'dist'),
#.........这里部分代码省略.........
示例15: beta_rarefaction
# 需要导入模块: from biom import Table [as 别名]
# 或者: from biom.Table import ids [as 别名]
def beta_rarefaction(output_dir: str, table: biom.Table, metric: str,
clustering_method: str, metadata: qiime2.Metadata,
sampling_depth: int, iterations: int=10,
phylogeny: skbio.TreeNode=None,
correlation_method: str='spearman',
color_scheme: str='BrBG') -> None:
if metric in phylogenetic_metrics():
if phylogeny is None:
raise ValueError("A phylogenetic metric (%s) was requested, "
"but a phylogenetic tree was not provided. "
"Phylogeny must be provided when using a "
"phylogenetic diversity metric." % metric)
beta_func = functools.partial(beta_phylogenetic, phylogeny=phylogeny)
else:
beta_func = beta
if table.is_empty():
raise ValueError("Input feature table is empty.")
# Filter metadata to only include sample IDs present in the feature table.
# Also ensures every feature table sample ID is present in the metadata.
metadata = metadata.filter_ids(table.ids(axis='sample'))
distance_matrices = _get_multiple_rarefaction(
beta_func, metric, iterations, table, sampling_depth)
primary = distance_matrices[0]
support = distance_matrices[1:]
heatmap_fig, similarity_df = _make_heatmap(
distance_matrices, metric, correlation_method, color_scheme)
heatmap_fig.savefig(os.path.join(output_dir, 'heatmap.svg'))
similarity_df.to_csv(
os.path.join(output_dir, 'rarefaction-iteration-correlation.tsv'),
sep='\t')
tree = _cluster_samples(primary, support, clustering_method)
tree.write(os.path.join(output_dir,
'sample-clustering-%s.tre' % clustering_method))
emperor = _jackknifed_emperor(primary, support, metadata)
emperor_dir = os.path.join(output_dir, 'emperor')
emperor.copy_support_files(emperor_dir)
with open(os.path.join(emperor_dir, 'index.html'), 'w') as fh:
fh.write(emperor.make_emperor(standalone=True))
templates = list(map(
lambda page: os.path.join(TEMPLATES, 'beta_rarefaction_assets', page),
['index.html', 'heatmap.html', 'tree.html', 'emperor.html']))
context = {
'metric': metric,
'clustering_method': clustering_method,
'tabs': [{'url': 'emperor.html',
'title': 'PCoA'},
{'url': 'heatmap.html',
'title': 'Heatmap'},
{'url': 'tree.html',
'title': 'Clustering'}]
}
q2templates.render(templates, output_dir, context=context)