本文整理汇总了Python中dendropy.Tree.get方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.get方法的具体用法?Python Tree.get怎么用?Python Tree.get使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dendropy.Tree
的用法示例。
在下文中一共展示了Tree.get方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: generate_ATT_from_files
# 需要导入模块: from dendropy import Tree [as 别名]
# 或者: from dendropy.Tree import get [as 别名]
def generate_ATT_from_files(seqaln,
mattype,
workdir,
treefile,
otu_json,
ingroup_mrca=None):
"""Build an ATT object without phylesystem.
If no ingroup mrca ott_id is provided, will use all taxa in tree to calc mrca."""
aln = DnaCharacterMatrix.get(path=seqaln, schema=mattype)
for tax in aln.taxon_namespace:
tax.label = tax.label.replace(" ", "_") #Forcing all spaces to underscore UGH
tre = Tree.get(path=treefile,
schema="newick",
preserve_underscores=True,
taxon_namespace=aln.taxon_namespace)
with open(otu_json) as data_file:
otu_dict = json.load(data_file)
for tax in aln:
assert tax.label in otu_dict
tre = Tree.get(path=treefile,
schema="newick",
preserve_underscores=True,
taxon_namespace=aln.taxon_namespace)
otu_newick = tre.as_string(schema="newick")
if ingroup_mrca:
ott_mrca = int(ingroup_mrca)
else:
ott_ids = [otu_dict[otu].get['^ot:ottId'] for otu in otu_dict]
ott_mrca = get_mrca_ott(ott_ids)
return AlignTreeTax(otu_newick, otu_dict, aln, ingroup_mrca=ott_mrca, workdir=workdir)
示例2: __init__
# 需要导入模块: from dendropy import Tree [as 别名]
# 或者: from dendropy.Tree import get [as 别名]
def __init__(self, **kwargs):
'''
Parameters
----------
reference_tree_path: str
Path to the file containing the reference tree, which is used to
retroot the tree tree provided to tree
tree_path: str
Path to the file containing the tree to be re-rooted. This tree will
be rerooted at the same position as the tree porovided to the
reference_tree
'''
reference_tree_path = kwargs.pop('reference_tree_path', None)
tree_path = kwargs.pop('tree_path')
logging.debug("Importing old tree from file: %s"
% tree_path)
self.tree = Tree.get(path=tree_path,
schema='newick')
if reference_tree_path:
logging.debug("Importing reference tree from file: %s"
% reference_tree_path)
self.reference_tree = Tree.get(path=reference_tree_path,
schema='newick')
else:
self.reference_tree = reference_tree_path
if len(kwargs) > 0:
raise Exception("Unexpected arguments provided to Decorator class: %s" % kwargs)
示例3: main
# 需要导入模块: from dendropy import Tree [as 别名]
# 或者: from dendropy.Tree import get [as 别名]
def main(OT_filehandle, OTTs_to_keep, outfile):
#read in tree, but don't create taxa (faster)\
tree = Tree.get(stream=OT_filehandle, schema="newick", suppress_leaf_node_taxa=True)
for node in tree.postorder_node_iter():
if hasattr(node, 'keep') or node_label_in(node, OTTs_to_keep):
if node.parent_node: #this is not the root
node.parent_node.keep=True
else:
if not hasattr(node, 'keep'):
node.parent_node.remove_child(node, suppress_unifurcations=False)
tree.write(file=outfile, schema='newick', suppress_leaf_node_labels=False)
示例4: generate_ATT_from_phylesystem
# 需要导入模块: from dendropy import Tree [as 别名]
# 或者: from dendropy.Tree import get [as 别名]
def generate_ATT_from_phylesystem(aln,
workdir,
study_id,
tree_id,
phylesystem_loc='api'):
"""gathers together tree, alignment, and study info - forces names to otu_ids.
Outputs AlignTreeTax object.
an alignemnt, a
Input can be either a study ID and tree ID from OpenTree
Alignemnt need to be a Dendropy DNA character matrix!"""
#TODO CHECK ARGS
assert(isinstance(aln, datamodel.charmatrixmodel.DnaCharacterMatrix))
for tax in aln.taxon_namespace:
tax.label = tax.label.replace(" ", "_") #Forcing all spaces to underscore UGH
nexson = get_nexson(study_id, phylesystem_loc)
ott_ids = get_subtree_otus(nexson,
tree_id=tree_id,
subtree_id="ingroup",
return_format="ottid")
ott_mrca = get_mrca_ott(ott_ids)
newick = extract_tree(nexson,
tree_id,
PhyloSchema('newick',
output_nexml2json='1.2.1',
content="tree",
tip_label="ot:originalLabel"))
newick = newick.replace(" ", "_") #UGH Very heavy handed, need to make sure happens on alignement side as well.
tre = Tree.get(data=newick,
schema="newick",
preserve_underscores=True,
taxon_namespace=aln.taxon_namespace)
otus = get_subtree_otus(nexson, tree_id=tree_id)
otu_dict = {}
orig_lab_to_otu = {}
treed_taxa = {}
for otu_id in otus:
otu_dict[otu_id] = extract_otu_nexson(nexson, otu_id)[otu_id]
otu_dict[otu_id]['^physcraper:status'] = "original"
otu_dict[otu_id]['^physcraper:last_blasted'] = "1900/01/01"
orig = otu_dict[otu_id].get(u'^ot:originalLabel').replace(" ", "_")
orig_lab_to_otu[orig] = otu_id
treed_taxa[orig] = otu_dict[otu_id].get(u'^ot:ottId')
for tax in aln.taxon_namespace:
try:
tax.label = orig_lab_to_otu[tax.label].encode('ascii')
except KeyError:
sys.stderr.write("{} doesn't have an otu id. It is being removed from the alignement. This may indicate a mismatch between tree and alignement\n".format(tax.label))
#need to prune tree to seqs and seqs to tree...
otu_newick = tre.as_string(schema="newick")
return AlignTreeTax(otu_newick, otu_dict, aln, ingroup_mrca=ott_mrca, workdir=workdir) #newick should be bare, but alignement should be DNACharacterMatrix
示例5: read_matrix_and_tree
# 需要导入模块: from dendropy import Tree [as 别名]
# 或者: from dendropy.Tree import get [as 别名]
def read_matrix_and_tree(char_file_path,
tree_file_path,
char_type=DnaCharacterMatrix,
char_schema='fasta',
tree_schema='newick'):
if char_file_path:
d = char_type.get(path=char_file_path, schema=char_schema)
tn = d.taxon_namespace
tn.is_mutable = False
else:
d, tn = None, None
tree = Tree.get(path=tree_file_path,
schema=tree_schema,
preserve_underscores=True,
taxon_namespace=tn)
return d, tree
示例6: generate_streamed_alignment
# 需要导入模块: from dendropy import Tree [as 别名]
# 或者: from dendropy.Tree import get [as 别名]
def generate_streamed_alignment(self):
"""runs the key steps and then replaces the tree and alignemnt with the expanded ones"""
self.read_blast()
pickle.dump(self, open('{}/scrape.p'.format(self.workdir), 'wb'))
if len(self.new_seqs) > 0:
self.remove_identical_seqs()
self.data.write_files() #should happen before aligning in case of pruning
if len(self.new_seqs_otu_id) > 0:#TODO rename to something more intutitive
self.write_query_seqs()
self.align_query_seqs()
self.data.reconcile()
self.place_query_seqs()
self.est_full_tree()
self.data.tre = Tree.get(path="{}/RAxML_bestTree.{}".format(self.workdir, self.date),
schema="newick",
preserve_underscores=True,
taxon_namespace=self.data.aln.taxon_namespace)
self.data.write_files()
if os.path.exists("{}/previous_run".format(self.workdir)):
prev_dir = "{}/previous_run{}".format(self.workdir, self.date)
i = 0
while os.path.exists(prev_dir):
i+=1
prev_dir = "previous_run" + str(i)
os.rename("{}/previous_run".format(self.workdir), prev_dir)
os.rename(self.blast_subdir, "{}/previous_run".format(self.workdir))
if os.path.exists("{}/last_completed_update".format(self.workdir)):
os.rename(self.tmpfi, "{}/last_completed_update".format(self.workdir))
for filename in glob.glob('{}/RAxML*'.format(self.workdir)):
os.rename(filename, "{}/previous_run/{}".format(self.workdir, filename.split("/")[1]))
for filename in glob.glob('{}/papara*'.format(self.workdir)):
os.rename(filename, "{}/previous_run/{}".format(self.workdir, filename.split("/")[1]))
os.rename("{}/{}".format(self.workdir, self.newseqs_file), "{}/previous_run/newseqs.fasta".format(self.workdir))
self.data.write_labelled()
self.new_seqs = {} #Wipe for next run
self.new_seqs_otu_id = {}
self.repeat = 1
else:
sys.stdout.write("No new sequences after filtering.\n")
self.repeat = 0
else:
sys.stdout.write("No new sequences found.\n")
self.repeat = 0
self.reset_markers()
pickle.dump(self, open('{}/scrape.p'.format(self.workdir), 'wb'))
pickle.dump(self.data.otu_dict, open('{}/otu_dict.p'.format(self.workdir), 'wb'))
示例7: __init__
# 需要导入模块: from dendropy import Tree [as 别名]
# 或者: from dendropy.Tree import get [as 别名]
def __init__(self, newick, otu_dict, alignment, ingroup_mrca, workdir):
self.aln = alignment
self.tre = Tree.get(data=newick,
schema="newick",
preserve_underscores=True,
taxon_namespace=self.aln.taxon_namespace)
self.otu_dict = otu_dict
self.ps_otu = 1 #iterator for new otu IDs
self._reconcile_names()
self.workdir = workdir #TODO - is this where the workdir should live?
if not os.path.exists(self.workdir):
os.makedirs(self.workdir)
assert int(ingroup_mrca)
self.ott_mrca = ingroup_mrca
self.orig_seqlen = [] #FIXME
self.gi_dict = {}
self.orig_aln = alignment
self.orig_newick = newick
示例8: mutable_read_matrix_and_tree
# 需要导入模块: from dendropy import Tree [as 别名]
# 或者: from dendropy.Tree import get [as 别名]
def mutable_read_matrix_and_tree(char_file_path,
tree_file_path,
char_type=DnaCharacterMatrix,
char_schema='fasta',
tree_schema='newick'):
'''Reads in tree and character matrix,
mutable namespace means names may not match'''
if char_file_path:
char_mat = char_type.get(path=char_file_path,
schema=char_schema)
# make the taxon_namespace mutable,
# so that tree can be read even if different
char_mat.taxon_namespace.is_mutable = True
tree = Tree.get(path=tree_file_path,
schema=tree_schema,
preserve_underscores=True,
taxon_namespace=char_mat.taxon_namespace)
else:
char_mat, tree = None, None
return char_mat, tree
示例9: write_labelled
# 需要导入模块: from dendropy import Tree [as 别名]
# 或者: from dendropy.Tree import get [as 别名]
def write_labelled(self, label='^ot:ottTaxonName', treepath="labelled.tre", alnpath="labelled.fas"):
"""output tree and alignement with human readble labels
Jumps through abunch of hoops to make labels unique.
NOT MEMORY EFFICIENT AT ALL"""
assert label in ['^ot:ottTaxonName', "^ot:originalLabel", "^ot:ottId", "^ncbi:taxon"]
tmp_newick = self.tre.as_string(schema="newick")
tmp_tre = Tree.get(data=tmp_newick,
schema="newick",
preserve_underscores=True)
tmp_fasta = self.aln.as_string(schema="fasta")
tmp_aln = DnaCharacterMatrix.get(data=tmp_fasta,
schema="fasta",
taxon_namespace=tmp_tre.taxon_namespace)
new_names = set()
for taxon in tmp_tre.taxon_namespace:
new_label = self.otu_dict[taxon.label].get(label)
if new_label:
if new_label in new_names:
new_label = " ".join([new_label, taxon.label])
new_names.add(new_label)
taxon.label = new_label
elif self.otu_dict[taxon.label].get("^ot:originalLabel"):
new_label = self.otu_dict[taxon.label].get("^ot:originalLabel")
if new_label in new_names:
new_label = " ".join([new_label, taxon.label])
new_names.add(new_label)
taxon.label = new_label
elif self.otu_dict[taxon.label].get("^ncbi:taxon"):
new_label = " ".join(["ncbi", str(self.otu_dict[taxon.label].get("^ncbi:taxon"))])
if new_label in new_names:
new_label = " ".join([new_label, taxon.label])
new_names.add(new_label)
taxon.label = new_label
tmp_tre.write(path="{}/{}".format(self.workdir, treepath),
schema="newick",
unquoted_underscores=True,
suppress_edge_lengths=False)
tmp_aln.write(path="{}/{}".format(self.workdir, alnpath),
schema="fasta")
示例10: place_query_seqs
# 需要导入模块: from dendropy import Tree [as 别名]
# 或者: from dendropy.Tree import get [as 别名]
def place_query_seqs(self):
"""runs raxml on the tree, and the combined alignment including the new quesry seqs
Just for placement, to use as starting tree."""
if os.path.exists("RAxML_labelledTree.PLACE"):
os.rename(filename, "RAxML_labelledTreePLACE.tmp")
sys.stdout.write("placing query sequences \n")
os.chdir(self.workdir)
p1 = subprocess.call(["raxmlHPC", "-m", "GTRCAT",
"-f", "v",
"-s", "papara_alignment.extended",
"-t", "random_resolve.tre",
"-n", "PLACE"])
placetre = Tree.get(path="RAxML_labelledTree.PLACE",
schema="newick",
preserve_underscores=True)
placetre.resolve_polytomies()
for taxon in placetre.taxon_namespace:
if taxon.label.startswith("QUERY"):
taxon.label = taxon.label.replace("QUERY___", "")
placetre.write(path="place_resolve.tre", schema="newick", unquoted_underscores=True)
os.chdir('..')
self._query_seqs_placed = 1
示例11: range
# 需要导入模块: from dendropy import Tree [as 别名]
# 或者: from dendropy.Tree import get [as 别名]
else:
onlyfiles.append(AllFiles[j])
for j in range(len(onlyfiles)):
try:
path = file_path+'/'+onlyfiles[j]
fil = open('/home/4/u1we1f44/Documents/appbio15/projekt/data/'+path, 'r')
lines_list=fil.readlines()
fil.close()
test = SeqDic(lines_list) # If this dose not worke we do not have a FASTA file
##
# Makes a newick tree and checks if the referense tree is recovered. The none reducing file.
##
line = 'cat /home/4/u1we1f44/Documents/appbio15/projekt/data/'+path+' | fastprot -I fasta -O phylip | fnj -I phylip -O "newick" -o "Treeout.txt"'
os.system(line)
TreePath=file_path+'/'+RefTree
t1=Tree.get(file=open('/home/4/u1we1f44/Documents/appbio15/projekt/data/'+TreePath,'r'),schema="newick",tree_offset=0)
t2=Tree.get(file=open('/home/4/u1we1f44/Documents/appbio15/projekt/src/Treeout.txt','r'),schema="newick",tree_offset=0,taxon_namespace=t1.taxon_namespace)
t1.encode_bipartitions()
t2.encode_bipartitions()
if treecompare.symmetric_difference(t1, t2)==0:
NotFixedCount += 1
os.remove('/home/4/u1we1f44/Documents/appbio15/projekt/src/Treeout.txt')
Total += 1
else:
Total += 1
os.remove('/home/4/u1we1f44/Documents/appbio15/projekt/src/Treeout.txt')
##
# Makes a temporary file. In the temporary file with data with the nosie columns remoeved. MAkes a newick tree and checks if the refernse tree is recovered.
# The nosie columns removed.
##
os.system("touch temp.fa")
示例12: build_subsets_tree
# 需要导入模块: from dendropy import Tree [as 别名]
# 或者: from dendropy.Tree import get [as 别名]
def build_subsets_tree(self, curr_tmp_dir_par,build_min_tree=True):
# uym2 added: add option for MST
if build_min_tree:
_LOG.debug("START building Minimum Spanning Tree")
grouping = {}
groupName2jobName = {}
for node in self.tree._tree.leaf_node_iter():
groupName = self.pasta_team.subsets[node.taxon.label].tmp_dir_par[len(curr_tmp_dir_par)+1:]
grouping[node.taxon.label] = groupName.replace("/","")
groupName2jobName[groupName] = self.pasta_team.subsets[node.taxon.label]
subsets_tree = build_groups_MST(self.tree._tree,grouping)
for node in subsets_tree.postorder_node_iter():
if node.is_leaf():
node.taxon.label = node.taxon.label.replace("d","/d")
node.label = node.label.replace("d","/d")
self.pasta_team.subsets = groupName2jobName
MST = PhylogeneticTree(subsets_tree)
_LOG.debug("Spanning tree is:\n %s" %MST)
return MST
###################################
_LOG.debug("START building heuristic spanning tree")
translate={}
t2 = {}
for node in self.tree._tree.leaf_node_iter():
nalsj = self.pasta_team.subsets[node.taxon.label]
newname = nalsj.tmp_dir_par[len(curr_tmp_dir_par)+1:]
translate[node.taxon.label] = newname
t2[newname] = set([nalsj])
subsets_tree = PhylogeneticTree(Tree.get(data=self.tree_str,schema='newick'))
for node in subsets_tree._tree.leaf_node_iter():
node.alignment_subset_job = t2[translate[node.taxon.label]]
#node.alignment_subset_job = t2[node.taxon]
del t2
del translate
_LOG.debug("leafs labeled")
#subsets_tree._tree.infer_taxa()
#_LOG.debug("fake taxa inferred")
#Then make sure the tree is rooted at a branch (not at a node).
if len(subsets_tree._tree.seed_node.child_nodes()) > 2:
for c in subsets_tree._tree.seed_node.child_nodes():
if c.edge.is_internal():
break
subsets_tree._tree.is_rooted = True
subsets_tree._tree.reroot_at_edge(c.edge,length1=c.edge.length/2.,
length2=c.edge.length/2., suppress_unifurcations=False)
_LOG.debug("Subset Labeling (start):\n%s" %str(subsets_tree.compose_newick(suppress_rooting=False))[0:5000])
#_LOG.debug("Subset Labeling (start):\n%s" %str(len(subsets_tree._tree.seed_node.child_nodes())))
# Then label internal branches based on their children, and collapse redundant edges.
for node in subsets_tree._tree.postorder_internal_node_iter():
# my label is the intersection of my children,
# unless the intersection is empty, in which case it is the union
if not hasattr(node, "alignment_subset_job") or node.alignment_subset_job is None:
node.alignment_subset_job = set.intersection(*[c.alignment_subset_job for c in node.child_nodes()])
if not node.alignment_subset_job:
node.alignment_subset_job = set.union(*[c.alignment_subset_job for c in node.child_nodes()])
# Now go ahead and prune any child whose label encompasses my label.
# Use indexing instead of iteration, because with each collapse,
# new children can be added, and we want to process them as well.
i = 0;
while i < len(node.child_nodes()):
c = node.child_nodes()[i]
if node.alignment_subset_job.issubset(c.alignment_subset_job):
# Dendropy does not collapsing and edge that leads to a tip. Remove instead
if c.child_nodes():
c.edge.collapse()
else:
node.remove_child(c)
else:
i += 1
node.label = "+".join(nj.tmp_dir_par[len(curr_tmp_dir_par)+1:] for nj in node.alignment_subset_job)
if node.is_leaf():
node.taxon = subsets_tree._tree.taxon_namespace.new_taxon(label=node.label)
_LOG.debug("Before final round, the tree is:\n %s" %str(subsets_tree.compose_newick(suppress_rooting=False))[0:5000])
# Now, the remaining edges have multiple labels. These need to
# be further resolved. Do it by minimum length
# First find all candidate edges that we might want to contract
candidate_edges = set()
for e in subsets_tree._tree.postorder_edge_iter():
if e.tail_node and e.head_node.alignment_subset_job.intersection(e.tail_node.alignment_subset_job):
candidate_edges.add( (e.length,e) )
# Then sort the edges, and start removing them one by one
# only if an edge is still having intersecting labels at the two ends
candidate_edges = sorted(candidate_edges, key=lambda x: x[0] if x[0] else -1)
for (el, edge) in candidate_edges:
I = edge.tail_node.alignment_subset_job.intersection(edge.head_node.alignment_subset_job)
if I:
edge.tail_node.alignment_subset_job = I
if edge.head_node.child_nodes():
#edge.collapse(adjust_collapsed_head_children_edge_lengths=True)
edge.collapse()
else:
#.........这里部分代码省略.........
示例13: warn
# 需要导入模块: from dendropy import Tree [as 别名]
# 或者: from dendropy.Tree import get [as 别名]
from dendropy import Tree
label_nodes = {'Other':0, 'Chloroplastida_ott361838':1, 'Metazoa_ott691846':2, 'Fungi_ott352914':3, 'Bacteria_ott844192':4}
target_nodes = {}
names = {index:re.sub("_ott\d+", "", k) for k, index in label_nodes.items()}
parser = argparse.ArgumentParser(description='Count the number of unnamed nodes in a tree')
parser.add_argument('treefile', type=argparse.FileType('r'), help='A newick-format tree')
args = parser.parse_args()
def warn(*objs):
print(*objs, file=sys.stderr)
tree = Tree.get(file=args.treefile, schema='newick', preserve_underscores=True, suppress_leaf_node_taxa=True)
#set edge length to number of leaves
for node in tree.postorder_node_iter():
if node.is_leaf():
node.n_leaves = 1
else:
if node.label in label_nodes:
target_nodes[node.label] = node
try:
node._parent_node.n_leaves += node.n_leaves
except:
try:
node._parent_node.n_leaves = node.n_leaves
except:
pass #the root
示例14: SeqDic
# 需要导入模块: from dendropy import Tree [as 别名]
# 或者: from dendropy.Tree import get [as 别名]
lines_list = fil.readlines()
fil.close()
test = SeqDic(lines_list) # If this dose not worke we do not have a FASTA file
##
# Makes a newick tree and checks if the referense tree is recovered. The none reducing file.
##
line = (
"cat /home/4/u1we1f44/Documents/appbio15/project/data/"
+ path
+ ' | fastprot -I fasta -O phylip | fnj -I phylip -O "newick" -o "Treeout.txt"'
)
os.system(line)
TreePath = file_path + "/" + RefTree
t1 = Tree.get(
file=open("/home/4/u1we1f44/Documents/appbio15/project/data/" + TreePath, "r"),
schema="newick",
tree_offset=0,
)
t2 = Tree.get(
file=open("/home/4/u1we1f44/Documents/appbio15/project/src/Treeout.txt", "r"),
schema="newick",
tree_offset=0,
taxon_namespace=t1.taxon_namespace,
)
t1.encode_bipartitions()
t2.encode_bipartitions()
if treecompare.symmetric_difference(t1, t2) == 0:
NotFixedCount += 1
os.remove("/home/4/u1we1f44/Documents/appbio15/project/src/Treeout.txt")
Total += 1
else:
示例15: max
# 需要导入模块: from dendropy import Tree [as 别名]
# 或者: from dendropy.Tree import get [as 别名]
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import argparse
from dendropy import Tree
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Rescale tree height')
parser.add_argument('--max-height', type=float, metavar='FLOAT', default=0.1,
help='Scale longest branch to max height [0.1]')
parser.add_argument('--if', dest='input_format', default='newick', choices=['newick', 'nexus', 'nexml'],
help='Input tree format [newick]')
parser.add_argument('--of', dest='output_format', default='newick', choices=['newick', 'nexus', 'nexml'],
help='output tree format [newick]')
parser.add_argument('input', type=argparse.FileType('r'), default='-',
help='Input tree')
parser.add_argument('output', type=argparse.FileType('w'), default='-',
nargs='?', help='Output tree [stdout]')
args = parser.parse_args()
tr = Tree.get(file=args.input, schema=args.input_format)
tr.scale_edges(args.max_height / max(tr.calc_node_root_distances()))
tr.write_to_stream(args.output, args.output_format)