本文整理汇总了Python中skbio.tree.TreeNode.read方法的典型用法代码示例。如果您正苦于以下问题:Python TreeNode.read方法的具体用法?Python TreeNode.read怎么用?Python TreeNode.read使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类skbio.tree.TreeNode
的用法示例。
在下文中一共展示了TreeNode.read方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: depth_partition
# 需要导入模块: from skbio.tree import TreeNode [as 别名]
# 或者: from skbio.tree.TreeNode import read [as 别名]
def depth_partition(self, input_tree, percentile, output_tree):
'''
Attempt to cluster tree with nodes of tip-to-tip distrubution <
an nth percentile cutoff of the whole-tree distance distribution.
A better description can be found in the citation below.
Parameters
----------
tree: skbio TreeNode obj
http://scikit-bio.org/docs/latest/generated/skbio.tree.TreeNode.html #skbio.tree.TreeNode
percentile: float
The percentile cutoff to use to determine the cutoff from clading
from a given node.
Clustering method modified from Prosperi et al method:
Prosperi, M.C.F., et al. A novel methodology for large-scale phylogeny
partition. Nat. Commun. 2:321 doi: 10.1038/ncomms1325 (2011).
http://www.nature.com/ncomms/journal/v2/n5/full/ncomms1325.html
'''
tree = TreeNode.read(input_tree)
tree = tree.root_at_midpoint()
cluster_count = 1
clustered = set()
clusters = {}
logging.debug("Calculating %ith percentile cutoff from root" \
% (percentile))
whole_tree_distribution = self._node_dist(tree)
cutoff = np.percentile(whole_tree_distribution, percentile)
logging.debug("Cutoff (%ith percentile): %f" % (percentile,
cutoff))
for node in tree.preorder():
if node in clustered:
continue
elif node.is_tip():
continue
else:
node_distribution = self._node_dist(node)
median=np.median(node_distribution)
logging.debug("Median of node: %f" % median)
if median <= cutoff:
logging.debug("Cluster found!")
cluster_name = "partition_%i" % (cluster_count)
clusters[cluster_name] = [x.name.replace(' ','_')
for x in node.tips()]
self._rename(node, cluster_name)
cluster_count+=1
for descenent in node.traverse():
clustered.add(descenent)
logging.info("%i depth cluster(s) found in tree" % (cluster_count-1))
tree.write(output_tree, "newick")
logging.debug("Recording tips that were not partitioned")
clusters[self.UNCLUSTERED] = []
for tip in tree.tips():
if tip not in clustered:
clusters[self.UNCLUSTERED].append(tip.name.replace(' ','_'))
return clusters
示例2: testFindParents
# 需要导入模块: from skbio.tree import TreeNode [as 别名]
# 或者: from skbio.tree.TreeNode import read [as 别名]
def testFindParents(self):
ann = TreeAnnotator()
tree = TreeNode.read(StringIO("(((A:1, B:2)'g__genus1':3, (C:4, D:5)'g__genus2':6)'f__family':10)root;"))
assert_equals('g__genus1', ann.find_named_parent(tree, tree.find('B')).name, 'self is named')
tree = TreeNode.read(StringIO("(((A:1, 2475:2)'g__genus1':3, (C:4, D:5)'g__genus2':6)'f__family':10)root;"))
assert_equals('g__genus1', ann.find_named_parent(tree, tree.find('2475')).name, 'parent directly above')
tree = TreeNode.read(StringIO("(((A:1, 2475:2):3, (C:4, D:5)'g__genus2':6)'f__family':10)root;"))
assert_equals('f__family', ann.find_named_parent(tree, tree.find('2475')).name, 'parent 2 above')
tree = TreeNode.read(StringIO("(((A:1, 2475:2):3, (C:4, D:5)'g__genus2':6)'f__family':10);"))
assert_equals(None, ann.find_named_parent(tree, tree.find('f__family').parent), 'parent of root')
tree = TreeNode.read(StringIO("(((A:1, 2475:2):3, (C:4, D:5)'g__genus2':6):10);"))
assert_equals(None, ann.find_named_parent(tree, tree.find('g__genus2').parent), 'no parent before root')
示例3: generate_html_summary
# 需要导入模块: from skbio.tree import TreeNode [as 别名]
# 或者: from skbio.tree.TreeNode import read [as 别名]
def generate_html_summary(qclient, job_id, parameters, out_dir):
"""Generates the HTML summary of a BIOM artifact
Parameters
----------
qclient : qiita_client.QiitaClient
The Qiita server client
job_id : str
The job id
parameters : dict
The parameter values to validate and create the artifact
out_dir : str
The path to the job's output directory
Returns
-------
bool, None, str
Whether the job is successful
Ignored
The error message, if not successful
"""
# Step 1: gather file information from qiita using REST api
artifact_id = parameters['input_data']
qclient_url = "/qiita_db/artifacts/%s/" % artifact_id
artifact_info = qclient.get(qclient_url)
# Step 2: get the mapping file, depends if analysis or not
if artifact_info['analysis'] is None:
is_analysis = False
qurl = ('/qiita_db/prep_template/%s/' %
artifact_info['prep_information'][0])
md = qclient.get(qurl)['qiime-map']
else:
is_analysis = True
qurl = '/qiita_db/analysis/%s/metadata/' % artifact_info['analysis']
md = qclient.get(qurl)
tree = None
if 'plain_text' in artifact_info['files']:
tree = TreeNode.read(artifact_info['files']['plain_text'][0])
# Step 3: generate HTML summary
# if we get to this point of the code we are sure that this is a biom file
# and that it only has one element
index_fp, viz_fp, qza_fp = _generate_html_summary(
artifact_info['files']['biom'][0], md, out_dir, is_analysis, tree)
# Step 4: add the new file to the artifact using REST api
success = True
error_msg = ""
try:
qclient.patch(qclient_url, 'add', '/html_summary/',
value=dumps({'html': index_fp, 'dir': viz_fp}))
except Exception as e:
success = False
error_msg = str(e)
return success, None, error_msg
示例4: testTipToCluster
# 需要导入模块: from skbio.tree import TreeNode [as 别名]
# 或者: from skbio.tree.TreeNode import read [as 别名]
def testTipToCluster(self):
tree = TreeNode.read(StringIO('((F:20, ((A:11, B:12):10, (H:8, D:9):3):20)G:30)root;'))
clusters = Tree2Tax().named_clusters_for_several_thresholds(tree, [40, 25])
self.assertSameClusterSets([[25,[['F'], _('A B'), _('D H')]], [40,[['F'], _('A B D H')]]], clusters)
assert_equals(_('G.3 G.1 G.2'), [c.name() for c in clusters[0].clusters])
assert_equals(_('G.2 G.1'), [c.name() for c in clusters[1].clusters])
tip = tree.find('F')
assert_equals('G.3', clusters[0].tip_to_cluster(tip).name())
assert_equals('G.2', clusters[1].tip_to_cluster(tip).name())
tip = tree.find('D')
assert_equals('G.2', clusters[0].tip_to_cluster(tip).name())
assert_equals('G.1', clusters[1].tip_to_cluster(tip).name())
示例5: _open_tree
# 需要导入模块: from skbio.tree import TreeNode [as 别名]
# 或者: from skbio.tree.TreeNode import read [as 别名]
def _open_tree(self, tree_path):
'''
Open a tree file, determine what decorations are already present. Strip
Unwanted decoration
Parameters
----------
tree_path: str
Path to a file containing a phylogenetic tree, in Newick format.
Returns
-------
skbio TreeNode object
'''
tree_obj=TreeNode.read(open(tree_path))
bootstrapped = True
for node in tree_obj.non_tips():
if node.name:
try:
float(node.name)
except:
logging.debug("Tree is decorated already. Stripping all \
previous decoration from the tree.")
bootstrapped = False
tree_obj = self._strip_tree(tree_obj)
break
else:
if bootstrapped:
logging.warning("This tree doesn't appear correctly \
formatted or there is information missing. No boostrap value or decoration \
found for bare node. ")
bootstrapped = False
if bootstrapped:
logging.debug("Tree is bootstrap or has confidence values \
assigned to the nodes.")
return tree_obj
示例6: testClusterNamingWithBootstraps
# 需要导入模块: from skbio.tree import TreeNode [as 别名]
# 或者: from skbio.tree.TreeNode import read [as 别名]
def testClusterNamingWithBootstraps(self):
tree = TreeNode.read(StringIO("((F:20, ((A:11, B:12):10, (H:8, D:9):3):20)'0.7:G':30)root;"))
clusters = Tree2Tax().named_clusters(tree, 40)
self.assertSameClusters([['F'], _('A B D H')], clusters)
assert_equals(_('G.2 G.1'), [c.name() for c in clusters])
示例7: testNamingWithBootstraps
# 需要导入模块: from skbio.tree import TreeNode [as 别名]
# 或者: from skbio.tree.TreeNode import read [as 别名]
def testNamingWithBootstraps(self):
tree = TreeNode.read(StringIO('((A:0.11, B:0.12)0.091:0.1, D:0.2)root;'))
clusters = Tree2Tax().named_clusters(tree, 0.05)
self.assertSameClusters([['A'],['B'],['D']], clusters)
assert_equals(_('Root.1 Root.2 Root.3'), [c.name() for c in clusters])
示例8: testClusterNamingOnTwoInternalNodesReverseOrder
# 需要导入模块: from skbio.tree import TreeNode [as 别名]
# 或者: from skbio.tree.TreeNode import read [as 别名]
def testClusterNamingOnTwoInternalNodesReverseOrder(self):
tree = TreeNode.read(StringIO('((F:20, ((A:11, B:12):10, (H:8, D:9):3):20)G:30)root;'))
clusters = Tree2Tax().named_clusters(tree, 40)
self.assertSameClusters([['F'], _('A B D H')], clusters)
assert_equals(_('G.2 G.1'), [c.name() for c in clusters])
示例9: testClusterIntoThree
# 需要导入模块: from skbio.tree import TreeNode [as 别名]
# 或者: from skbio.tree.TreeNode import read [as 别名]
def testClusterIntoThree(self):
tree = TreeNode.read(StringIO('((((A:11, B:12)C:10, (H:8, D:9)I:3)E:20, F:20)G:30)root;'))
clusters = Tree2Tax().named_clusters(tree, 25)
self.assertSameClusters([_('A B'), _('D H'), ['F']], clusters)
示例10: testClusterOnTwoInternalNodes
# 需要导入模块: from skbio.tree import TreeNode [as 别名]
# 或者: from skbio.tree.TreeNode import read [as 别名]
def testClusterOnTwoInternalNodes(self):
tree = TreeNode.read(StringIO('((((A:11, B:12)C:10, (H:8, D:9)I:3)E:20, F:20)G:30)root;'))
clusters = Tree2Tax().named_clusters(tree, 40)
self.assertSameClusters([_('A B D H'), ['F']], clusters)
示例11: testTreeSubtree2
# 需要导入模块: from skbio.tree import TreeNode [as 别名]
# 或者: from skbio.tree.TreeNode import read [as 别名]
def testTreeSubtree2(self):
'''one genus is a subtree of another, and the longest branch is in both subtrees'''
tree = TreeNode.read(StringIO("((((A:1, B:52)'g__genus1':3, D:50)'g__genus2':6)'f__family':10)root;"))
examples = ThresholdFinder().find_examples(tree, 'f', 'g')
self.assertSameCladeDistanceSet([['f__family','g__genus1','g__genus2',105.0]],
examples)
示例12: validate
# 需要导入模块: from skbio.tree import TreeNode [as 别名]
# 或者: from skbio.tree.TreeNode import read [as 别名]
#.........这里部分代码省略.........
table = load_table(biom_fp)
metadata_ids = set(metadata)
biom_sample_ids = set(table.ids())
if not metadata_ids.issuperset(biom_sample_ids):
# The BIOM sample ids are different from the ones in the prep template
qclient.update_job_step(job_id, "Step 3: Fixing BIOM sample ids")
# Attempt 1: the user provided the run prefix column - in this case
# the run prefix column holds the sample ids present in the BIOM file
if 'run_prefix' in metadata[next(iter(metadata_ids))]:
id_map = {v['run_prefix']: k for k, v in metadata.items()}
else:
# Attemp 2: the sample ids in the BIOM table are the same that in
# the prep template but without the prefix
prefix = next(iter(metadata_ids)).split('.', 1)[0]
prefixed = set("%s.%s" % (prefix, s) for s in biom_sample_ids)
if metadata_ids.issuperset(prefixed):
id_map = {s: "%s.%s" % (prefix, s) for s in biom_sample_ids}
else:
# There is nothing we can do. The samples in the BIOM table do
# not match the ones in the prep template and we can't fix it
error_msg = ('The sample ids in the BIOM table do not match '
'the ones in the prep information. Please, '
'provide the column "run_prefix" in the prep '
'information to map the existing sample ids to '
'the prep information sample ids.')
return False, None, error_msg
# Fix the sample ids
try:
table.update_ids(id_map, axis='sample')
except TableException:
missing = biom_sample_ids - set(id_map)
error_msg = ('Your prep information is missing samples that are '
'present in your BIOM table: %s' % ', '.join(missing))
return False, None, error_msg
new_biom_fp = join(out_dir, basename(biom_fp))
with biom_open(new_biom_fp, 'w') as f:
table.to_hdf5(f, "Qiita BIOM type plugin")
filepaths = [(new_biom_fp, 'biom')]
# Validate the representative set, if it exists
if 'preprocessed_fasta' in files:
repset_fp = files['preprocessed_fasta'][0]
# The observations ids of the biom table should be the same
# as the representative sequences ids found in the representative set
observation_ids = table.ids(axis='observation').tolist()
extra_ids = []
for record in load([repset_fp], constructor=FastaIterator):
rec_id = record['SequenceID'].split()[0]
try:
observation_ids.remove(rec_id)
except ValueError:
extra_ids.append(rec_id)
error_msg = []
if extra_ids:
error_msg.append("The representative set sequence file includes "
"observations not found in the BIOM table: %s"
% ', '.join(extra_ids))
if observation_ids:
error_msg.append("The representative set sequence file is missing "
"observation ids found in the BIOM tabe: %s" %
', '.join(observation_ids))
if error_msg:
return False, None, '\n'.join(error_msg)
filepaths.append((repset_fp, 'preprocessed_fasta'))
# Validate the sequence specific phylogenetic tree (e.g. generated
# by SEPP for Deblur), if it exists
tree = None
if 'plain_text' in files:
phylogeny_fp = files['plain_text'][0]
try:
tree = TreeNode.read(phylogeny_fp)
filepaths.append((phylogeny_fp, 'plain_text'))
except Exception:
return False, None, ("Phylogenetic tree cannot be parsed "
"via scikit-biom")
for fp_type, fps in files.items():
if fp_type not in ('biom', 'preprocessed_fasta', 'plain_text'):
for fp in fps:
filepaths.append((fp, fp_type))
index_fp, viz_fp, qza_fp = _generate_html_summary(
new_biom_fp, md, join(out_dir), is_analysis, tree)
filepaths.append((index_fp, 'html_summary'))
filepaths.append((viz_fp, 'html_summary_dir'))
if 'qza' not in files:
filepaths.append((qza_fp, 'qza'))
return True, [ArtifactInfo(None, 'BIOM', filepaths)], ""
示例13: testFullTaxonomy
# 需要导入模块: from skbio.tree import TreeNode [as 别名]
# 或者: from skbio.tree.TreeNode import read [as 别名]
def testFullTaxonomy(self):
ann = TreeAnnotator()
tree = TreeNode.read(StringIO("(((A:1, B:2):3, (C:4, D:5)'g__genus2':6)'f__family':10)root;"))
assert_equals('f__family; g__genus2', ann.full_taxonomy(tree, tree.find('D')))
示例14: testSistersOneIncompleteSister
# 需要导入模块: from skbio.tree import TreeNode [as 别名]
# 或者: from skbio.tree.TreeNode import read [as 别名]
def testSistersOneIncompleteSister(self):
ann = TreeAnnotator()
tree = TreeNode.read(StringIO("((A:1, B:2):3, ((C:1,D:1):1, (E:1,F:5)'g3':6):10)root;"))
print(tree.ascii_art())
sisters = ann.find_sisters(tree, tree.find('B'))
assert_equals(sorted(['g3']), sorted([s.name for s in sisters]))
示例15: test_missing_taxonomy
# 需要导入模块: from skbio.tree import TreeNode [as 别名]
# 或者: from skbio.tree.TreeNode import read [as 别名]
def test_missing_taxonomy(self):
tree = TreeNode.read(StringIO('((((A:11, B:12)C:10, D:9)E:20, F:20)G:30)root;'))
assert_equals(['C'], TaxonomyFunctions().missing_taxonomy(tree, tree.find('A'), tree.find('E')))
assert_equals([], TaxonomyFunctions().missing_taxonomy(tree, tree.find('A'), tree.find('A')))
assert_equals(['E','C'], TaxonomyFunctions().missing_taxonomy(tree, tree.find('A'), tree.find('G')))