本文整理汇总了Python中biom.load_table方法的典型用法代码示例。如果您正苦于以下问题:Python biom.load_table方法的具体用法?Python biom.load_table怎么用?Python biom.load_table使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类biom
的用法示例。
在下文中一共展示了biom.load_table方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_create_otu_table
# 需要导入模块: import biom [as 别名]
# 或者: from biom import load_table [as 别名]
def test_create_otu_table(self):
fp_table = join(self.output_dir, 'all.biom')
create_otu_table(
fp_table,
[(join(self.data_dir, 'usecase_mixedchars',
('1.SKB7.640196.fastq.trim.derep.'
'no_artifacts.msa.deblur.no_chimeras')), '1.SKB7.640196'),
(join(self.data_dir, 'usecase_mixedchars',
('1.SKB8.640193.fastq.trim.derep.'
'no_artifacts.deblur.no_chimeras')), '1.SKB8.640193')],
outputfasta_fp=join(self.output_dir, 'all.seqs'), minreads=0)
table = load_table(fp_table)
# should produce a table with two samples and two features
self.assertEqual(table.shape, (2, 2))
# assert that counts from different case entries are collapsed
self.assertTrue(list(table.to_dataframe().to_dense().loc[
('TACGGGGGGGGTTAGCGTTATTCAATGATATTTGGCGTAAAGTGCATGTAGATGGTGTTAC'
'AAGTTAAAAAAATAAAAACTAAGGACAAATCTTTTCGTT'), :].values) == [60, 0])
示例2: read_taxonomic_profile
# 需要导入模块: import biom [as 别名]
# 或者: from biom import load_table [as 别名]
def read_taxonomic_profile(biom_profile, config, no_samples = None):
table = biom.load_table(biom_profile)
ids = table.ids(axis="observation")
samples = table.ids()
if no_samples is None:
no_samples = len(samples)
if no_samples is not None and no_samples != len(samples) and no_samples != 1:
_log.warning("Number of samples (%s) does not match number of samples in biom file (%s)" % (no_samples, len(samples)))
if no_samples > len(samples):
no_samples = len(samples)
_log.warning("Using the first %s samples" % no_samples)
config.set("Main", "number_of_samples", str(no_samples))
profile = {}
for otu in ids:
lineage = table.metadata(otu,axis="observation")["taxonomy"]
try:
lineage = lineage.split(";") # if no spaces
except AttributeError:
pass
abundances = []
for sample in samples[:no_samples]:
abundances.append(table.get_value_by_ids(otu,sample))
profile[otu] = (lineage, abundances)
return profile
示例3: setUp
# 需要导入模块: import biom [as 别名]
# 或者: from biom import load_table [as 别名]
def setUp(self):
self.microbes = load_table(get_data_path('soil_microbes.biom'))
self.metabolites = load_table(get_data_path('soil_metabolites.biom'))
X = self.microbes.to_dataframe().T
Y = self.metabolites.to_dataframe().T
X = X.loc[Y.index]
self.trainX = X.iloc[:-2]
self.trainY = Y.iloc[:-2]
self.testX = X.iloc[-2:]
self.testY = Y.iloc[-2:]
示例4: test_featureMatch1
# 需要导入模块: import biom [as 别名]
# 或者: from biom import load_table [as 别名]
def test_featureMatch1(self):
goodcsi = self.goodcsi.view(CSIDirFmt)
tablefp = collate_fingerprint(goodcsi)
features = load_table(self.featureTable)
allfeatrs = set(features.ids(axis='observation'))
fpfeatrs = set(tablefp.index)
self.assertEqual(fpfeatrs <= allfeatrs, True)
示例5: setUp
# 需要导入模块: import biom [as 别名]
# 或者: from biom import load_table [as 别名]
def setUp(self):
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
'data')
fp = os.path.join(data_dir, 'feature_data_itol.txt')
self.feature_data = pd.read_csv(fp, sep='\t')
fp = os.path.join(data_dir, 'grouped_feature_table.biom')
self.grouped_table = biom.load_table(fp)
示例6: setUp
# 需要导入模块: import biom [as 别名]
# 或者: from biom import load_table [as 别名]
def setUp(self):
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
table = pd.DataFrame()
self.emptyfps = table
table = pd.DataFrame(index=['a', 'b', 'c'], data=['a', 'b', 'c'])
self.wrongtips = table
goodtable = os.path.join(THIS_DIR, 'data/features_formated.biom')
self.features = load_table(goodtable)
goodsmiles = os.path.join(THIS_DIR, 'data/features_smiles.txt')
self.smiles = pd.read_csv(goodsmiles, dtype=str, sep='\t')
self.smiles = self.smiles.set_index('#featureID')
goodcsi = os.path.join(THIS_DIR, 'data/goodcsi')
self.tablefp = collate_fingerprint(goodcsi)
示例7: setUp
# 需要导入模块: import biom [as 别名]
# 或者: from biom import load_table [as 别名]
def setUp(self):
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
tablefp = Table({}, [], [])
self.emptyfeatures = tablefp
goodtable = os.path.join(THIS_DIR, 'data/features_formated.biom')
self.features = load_table(goodtable)
goodtable = os.path.join(THIS_DIR, 'data/features2_formated.biom')
ms2_match = os.path.join(THIS_DIR, 'data/ms2_match.txt')
self.ms2_match = pd.read_csv(ms2_match, sep='\t',
index_col='cluster index')
self.features2 = load_table(goodtable)
self.goodcsi = qiime2.Artifact.load(os.path.join(THIS_DIR,
'data/csiFolder.qza'))
self.goodcsi2 = qiime2.Artifact.load(os.path.join(
THIS_DIR, 'data/csiFolder2.qza'))
示例8: test_norm_by_marker_copies
# 需要导入模块: import biom [as 别名]
# 或者: from biom import load_table [as 别名]
def test_norm_by_marker_copies(self):
'''Test that expected normalized sequence abundance table generated.'''
seqtab_in = biom.load_table(seqtab_biom).to_dataframe(dense=True)
# Get output index labels in same order as expected.
seqtab_in = seqtab_in.reindex(exp_norm_in.index)
test_norm = norm_by_marker_copies(input_seq_counts=seqtab_in,
input_marker_num=marker_predict_in,
norm_filename=None)
# Test whether normalized table matches expected table.
pd.testing.assert_frame_equal(test_norm, exp_norm_in, check_like=True)
示例9: test_rare_4_reads
# 需要导入模块: import biom [as 别名]
# 或者: from biom import load_table [as 别名]
def test_rare_4_reads(self):
'''Check that correct sequences are identified as rare when a cut-off
of 4 reads is used.'''
seqtab_in = biom.load_table(seqtab_biom).to_dataframe(dense=True)
rare_seqs = id_rare_seqs(seqtab_in, 4, 1)
self.assertSetEqual(set(rare_seqs), set(["2558860574", "extra"]))
示例10: test_rare_2_samp
# 需要导入模块: import biom [as 别名]
# 或者: from biom import load_table [as 别名]
def test_rare_2_samp(self):
'''Check that correct sequences are identified as rare when a cut-off
of 2 samples is used.'''
seqtab_in = biom.load_table(seqtab_biom).to_dataframe(dense=True)
rare_seqs = id_rare_seqs(seqtab_in, 1, 2)
self.assertSetEqual(set(rare_seqs), set(["2558860574", "2571042244"]))
示例11: test_pandas2biom
# 需要导入模块: import biom [as 别名]
# 或者: from biom import load_table [as 别名]
def test_pandas2biom(self):
fh, filename = mkstemp()
p = pd.read_csv(get_data_path('float.tsv'), sep='\t', index_col=0)
with self.assertRaisesRegex(IOError, 'Unable to create file'):
pandas2biom('/dev/', p)
pandas2biom(filename, p)
b = biom.load_table(filename)
self.assertCountEqual(b.ids(), p.columns)
self.assertCountEqual(b.ids(axis='observation'), p.index)
示例12: validate_results
# 需要导入模块: import biom [as 别名]
# 或者: from biom import load_table [as 别名]
def validate_results(self, table_name, orig_fasta_name):
res_table = load_table(table_name)
res_seqs = list(res_table.ids(axis='observation'))
exp_seqs = [item[1] for item in sequence_generator(orig_fasta_name)]
exp_seqs = list(map(lambda x: x.upper()[:self.trim_length], exp_seqs))
self.assertListEqual(res_seqs, exp_seqs)
示例13: test_filter_minreads_samples_from_table
# 需要导入模块: import biom [as 别名]
# 或者: from biom import load_table [as 别名]
def test_filter_minreads_samples_from_table(self):
""" Test filter_minreads_samples_from_table() function
for removal of samples with small number of reads
using the s4 dataset biom table
"""
input_biom_file = join(self.test_data_dir, 'final.s4.biom')
table = load_table(input_biom_file)
# test basic filtering with 0 reads does not remove ok sample
new_table = filter_minreads_samples_from_table(table)
self.assertEqual(new_table.shape[1], 1)
# test basic filtering with enough reads removes the sample
# and also inplace=False works
new_table = filter_minreads_samples_from_table(table,
minreads=182,
inplace=False)
self.assertEqual(new_table.shape[1], 0)
self.assertEqual(table.shape[1], 1)
# test basic filtering with enough reads removes the sample
# and also inplace=True works
filter_minreads_samples_from_table(table,
minreads=200,
inplace=True)
self.assertEqual(table.shape[1], 0)
示例14: gibbs
# 需要导入模块: import biom [as 别名]
# 或者: from biom import load_table [as 别名]
def gibbs(table_fp: Table,
mapping_fp: pd.DataFrame,
output_dir: str,
loo: bool,
jobs: int,
alpha1: float,
alpha2: float,
beta: float,
source_rarefaction_depth: int,
sink_rarefaction_depth: int,
restarts: int,
draws_per_restart: int,
burnin: int,
delay: int,
per_sink_feature_assignments: bool,
sample_with_replacement: bool,
source_sink_column: str,
source_column_value: str,
sink_column_value: str,
source_category_column: str):
'''Gibb's sampler for Bayesian estimation of microbial sample sources.
For details, see the project README file.
'''
# Create results directory. Click has already checked if it exists, and
# failed if so.
os.mkdir(output_dir)
# Load the metadata file and feature table.
sample_metadata = parse_sample_metadata(open(mapping_fp, 'U'))
feature_table = biom_to_df(load_table(table_fp))
# run the gibbs sampler helper function (same used for q2)
results = gibbs_helper(feature_table, sample_metadata, loo, jobs,
alpha1, alpha2, beta, source_rarefaction_depth,
sink_rarefaction_depth, restarts, draws_per_restart,
burnin, delay, per_sink_feature_assignments,
sample_with_replacement, source_sink_column,
source_column_value, sink_column_value,
source_category_column)
# import the results (will change based on per_sink_feature_assignments)
if len(results) == 3:
mpm, mps, fas = results
# write the feature tables from fas
for sink, fa in zip(mpm.columns, fas):
fa.to_csv(os.path.join(output_dir, sink + '.feature_table.txt'),
sep='\t')
else:
# get the results (without fas)
mpm, mps = results
# Write results.
mpm.to_csv(os.path.join(output_dir, 'mixing_proportions.txt'), sep='\t')
mps.to_csv(os.path.join(output_dir, 'mixing_proportions_stds.txt'),
sep='\t')
# Plot contributions.
fig, ax = plot_heatmap(mpm.T)
fig.savefig(os.path.join(output_dir, 'mixing_proportions.pdf'), dpi=300)
示例15: read_seqabun
# 需要导入模块: import biom [as 别名]
# 或者: from biom import load_table [as 别名]
def read_seqabun(infile):
'''Will read in sequence abundance table in either TSV, BIOM, or mothur
shared format.'''
# First check extension of input file. If extension is "biom" then read in
# as BIOM table and return. This is expected to be the most common input.
in_name, in_ext = splitext(infile)
if in_ext == ".biom":
input_seqabun = biom.load_table(infile).to_dataframe(dense=True)
input_seqabun.index.astype('str', copy=False)
return(input_seqabun)
# Next check if input file is a mothur shared file or not by read in first
# row only.
mothur_format = False
try:
in_test = pd.read_csv(filepath_or_buffer=infile, sep="\t", nrows=1)
in_test_col = list(in_test.columns.values)
if len(in_test_col) >= 4 and (in_test_col[0] == "label" and \
in_test_col[1] == "Group" and \
in_test_col[2] == "numOtus"):
mothur_format = True
except Exception:
pass
# If identified to be mothur format then remove extra columns, set "Group"
# to be index (i.e. row) names and then transpose.
if mothur_format:
input_seqabun = pd.read_csv(filepath_or_buffer=infile, sep="\t",
dtype={'Group': str}, low_memory=False)
input_seqabun.drop(labels=["label", "numOtus"], axis=1, inplace=True)
input_seqabun.set_index(keys="Group", drop=True, inplace=True)
input_seqabun.index.name = None
input_seqabun = input_seqabun.transpose()
input_seqabun.index.astype('str', copy=False)
return(input_seqabun)
else:
first_col = str(pd.read_csv(infile, sep="\t", nrows=0).columns[0])
input_seqabun = pd.read_csv(filepath_or_buffer=infile, sep="\t",
dtype={first_col: str}, low_memory=False)
input_seqabun.set_index(first_col, drop=True, inplace=True)
return(input_seqabun)