本文整理汇总了Python中ete3.Tree.set_outgroup方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.set_outgroup方法的具体用法?Python Tree.set_outgroup怎么用?Python Tree.set_outgroup使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类ete3.Tree
的用法示例。
在下文中一共展示了Tree.set_outgroup方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: smart_reroot
# 需要导入模块: from ete3 import Tree [as 别名]
# 或者: from ete3.Tree import set_outgroup [as 别名]
def smart_reroot(treefile, outgroupfile, outfile, format=0):
"""
simple function to reroot Newick format tree using ete2
Tree reading format options see here:
http://packages.python.org/ete2/tutorial/tutorial_trees.html#reading-newick-trees
"""
tree = Tree(treefile, format=format)
leaves = [t.name for t in tree.get_leaves()][::-1]
outgroup = []
for o in must_open(outgroupfile):
o = o.strip()
for leaf in leaves:
if leaf[:len(o)] == o:
outgroup.append(leaf)
if outgroup:
break
if not outgroup:
print("Outgroup not found. Tree {0} cannot be rerooted.".format(treefile), file=sys.stderr)
return treefile
try:
tree.set_outgroup(tree.get_common_ancestor(*outgroup))
except ValueError:
assert type(outgroup) == list
outgroup = outgroup[0]
tree.set_outgroup(outgroup)
tree.write(outfile=outfile, format=format)
logging.debug("Rerooted tree printed to {0}".format(outfile))
return outfile
示例2: check_hgt
# 需要导入模块: from ete3 import Tree [as 别名]
# 或者: from ete3.Tree import set_outgroup [as 别名]
def check_hgt(tree_path, taxid_dict, bootstrap_threshold=70.0):
tree = Tree(tree_path)
tree = remove_bad_nodes(tree, support_threshold=bootstrap_threshold)
leaf_tags = get_tags_leaves(tree, taxid_dict)
dpapi_leaf = None
for leaf in tree.iter_leaves():
if leaf_tags[leaf.name] == "dpapi":
if not dpapi_leaf:
dpapi_leaf = leaf
else:
print ("More than one Dpapi leaf!\n" + tree_path + "\n")
farthest_node = dpapi_leaf.get_farthest_node(topology_only=True)[0]
if farthest_node.up == tree:
return False
else:
tree.set_outgroup(farthest_node.up)
edited_tree_path = tree_path + "_edited.tree"
tree.write(outfile=edited_tree_path)
if check_sisters_bacteria(dpapi_leaf, leaf_tags) and check_sisters_bacteria(dpapi_leaf.up, leaf_tags):
return True
else:
return False
示例3: refdistance
# 需要导入模块: from ete3 import Tree [as 别名]
# 或者: from ete3.Tree import set_outgroup [as 别名]
def refdistance(trees, reftree, coords, outgroup):
"""Calculates the RF distance between a reference topology and set of trees
in windows along a chromosome
Parameters
------
trees: obj, obj from loadtrees
reftree: string, reference topology to check distance against
coords: list, list of genome coordinates for each tree
outgroup: string, name of outgroup
Returns
------
file
"""
print("getting RF distances from ref")
with open(reftree, 'r') as tt:
for line in tt:
reftree = Tree(line)
if outgroup:
reftree.set_outgroup(outgroup)
rfnorm = []
for t in trees:
try:
rf = t.compare(reftree)
except Exception as e:
if "unrooted" in str(e):
rf = t.compare(reftree, unrooted_trees=True)
print("setting as unrooted, consider using --outgroup")
else:
import ipdb;ipdb.set_trace()
rfnorm.append(rf["norm_rf"])
with open("rf.reftree", 'w') as f:
if coords:
f.write("start\tstop\trfnorm\n")
reflist = zip(coords, rfnorm)
for x, y in reflist:
f.write("{}\t{}\t{}\n".format(x.split("-")[0], x.split("-")[1], y))
else:
for y in rfnorm:
f.write("{}\n".format(y))
return(None)
示例4: get_tree
# 需要导入模块: from ete3 import Tree [as 别名]
# 或者: from ete3.Tree import set_outgroup [as 别名]
def get_tree(n, keys, wscaf, wsta, wsto, seq, outg, fnw, fout, lgi, vb, kali=kali, notree=notree):
# increment tree number
n += 1
# prepare ali
desc = "%s:%s-%s" % (wscaf, wsta, wsto)
if kali:
fnom = "%s/%s.%s-%s.fasta" % (prefali, wscaf, wsta, wsto)
else:
fnom = "%s/ali.temp.fasta" % (prefali)
with open(fnom, "w") as o:
for k in keys:
record = SeqRecord(Seq(seq[k], IUPAC.ambiguous_dna),
id=k, description=desc)
if vb: print record.format("fasta").strip()
o.write(record.upper().format("fasta"))
if notree:
return n
# compute and retrieve tree using seaview...
cmd = "seaview -build_tree -distance observed -NJ -o - %s" % fnom
print cmd
tr = os.popen(cmd).read().strip()
if tr == '':
n -= 1
cmd = "rm %s" % fnom
os.system(cmd)
return n
tr = Tree(tr.split("] ")[1])
# root tree
if outg:
tr.set_outgroup(outg)
# write tree in a gz file
tr = tr.write(format=1)
fnw.write(tr + "\n")
fout.write("\t".join([ str(x) for x in [wscaf, wsta, wsto, (int(wsta)+int(wsto))/2,
int(wsto)-int(wsta), lgi ] ]) + "\n")
return n
示例5: phyldogSpeciesTree
# 需要导入模块: from ete3 import Tree [as 别名]
# 或者: from ete3.Tree import set_outgroup [as 别名]
def phyldogSpeciesTree(phyldogTreeFile, brlenTreeFile, outgroupName, codon = False):
"""Add branch lengths to PHYLDOG tree from a topologically equivalent tree.
Args:
phyldogTreeFile (string) "" Path and file name of PHYLDOG output file.
brlenTreeFile (string) "" Path and file name of tree with branch lengths.
outgroupName (string) "" Leaf name of outgroup in brlenTreeFile.
codon (bool) False If True branch lengths divided by 3 (for codon models).
"""
b = Tree(brlenTreeFile)
b.set_outgroup(b&outgroupName)
brlenD = {}
for node in b.traverse():
leaveNames = [x.name for x in node.get_leaves()]
leaveNames.sort()
if codon: # nucleotide substitutions per codon site
branchLength = node.dist / 3.
else: # nucleotide substitutions per site
branchLength = node.dist
brlenD[tuple(leaveNames)] = branchLength
t = Tree(phyldogTreeFile)
for node in t.traverse():
PHYLDOGid = ''
if node.is_leaf():
PHYLDOGid = (re.sub(r'.+_.+_(\d+)', r'\1', node.name))
node.name = numbered2name(node.name)
elif node.is_root():
PHYLDOGid = '0'
else:
PHYLDOGid = str(int(node.support))
node.add_feature("ND", PHYLDOGid)
leaveNames = [numbered2name(x.name) for x in node.get_leaves()]
leaveNames.sort()
node.dist = brlenD[tuple(leaveNames)]
return(t)
示例6: loadtrees
# 需要导入模块: from ete3 import Tree [as 别名]
# 或者: from ete3.Tree import set_outgroup [as 别名]
def loadtrees(newickfile, topo, outgroup):
"""Reads and stores phylogenetic trees from a file
Parameters
------
newickfile: file, file of newick trees, 1 per line
topo: bool, use only the topology and not branch information
outgroup: string, outgroup species
Returns
------
treelist: obj, ete3 object of trees
"""
print("loading trees...")
treelist = []
if topo:
topolist = []
with open(newickfile, 'r') as t:
for line in t:
topolist.append(line.strip())
topolist = removebl(topolist)
for tree in topolist:
t1 = Tree(tree)
if outgroup:
t1.set_outgroup(outgroup)
treelist.append(t1)
else:
with open(newickfile, 'r') as t:
for line in t:
if not line.startswith("NA"):
t1 = Tree(line)
if outgroup:
t1.set_outgroup(outgroup)
treelist.append(t1)
return(treelist)
示例7: outgroup
# 需要导入模块: from ete3 import Tree [as 别名]
# 或者: from ete3.Tree import set_outgroup [as 别名]
parser.add_argument(
'--verbose', action='store_true',
help=('Print information about the outgroup (if any) taxa to standard '
'error'))
args = parser.parse_args()
tree = Tree(args.treeFile.read())
if args.outgroupRegex:
from re import compile
regex = compile(args.outgroupRegex)
taxa = [leaf.name for leaf in tree.iter_leaves() if regex.match(leaf.name)]
if taxa:
ca = tree.get_common_ancestor(taxa)
if args.verbose:
print('Taxa for outgroup:', taxa, file=sys.stderr)
print('Common ancestor:', ca.name, file=sys.stderr)
print('Common ancestor is tree:', tree == ca, file=sys.stderr)
if len(taxa) == 1:
tree.set_outgroup(tree & taxa[0])
else:
if ca == tree:
tree.set_outgroup(tree.get_midpoint_outgroup())
else:
tree.set_outgroup(tree.get_common_ancestor(taxa))
print(tree.get_ascii())
示例8: recurseTreeMakingJSON
# 需要导入模块: from ete3 import Tree [as 别名]
# 或者: from ete3.Tree import set_outgroup [as 别名]
# --------------------------------------------------------------------------------------------------------------
# so everyone in the tree can only have one parent. This is great, because PHYLIP text files are horrible
for leaf in adult_tree:
node = leaf
old_name = leaf.name
if leaf.is_leaf():
while node:
if node.name == "" and old_name in node_to_parent:
node.name = node_to_parent[old_name]
old_name = node.name
node = node.up
# --------------------------------------------------------------------------------------------------------------
# reroot the tree and get a new node -> parent relationship dict
adult_tree.set_outgroup(args.rootname)
new_node_to_parent = {}
for leaf in adult_tree:
node = leaf
while node:
if node.up:
new_node_to_parent[node.name] = node.up.name
node = node.up
# --------------------------------------------------------------------------------------------------------------
# output the final annotated tree
#
myTreeNode = recurseTreeMakingJSON(adult_tree, annotations, args.rootname, cladeAssignments, nameToColor, args.splitToken, parent="null", curHeight = 0)
out2 = open(args.outputtree,"w")
示例9: main
# 需要导入模块: from ete3 import Tree [as 别名]
# 或者: from ete3.Tree import set_outgroup [as 别名]
#.........这里部分代码省略.........
os.system(andi_c)
#Read in the andi dist matrix, convert to lower triangle
dm = read_file_lines(andi_mat)[1:]
dm = lower_tri(dm)
#Correct the names in the matrix
for iso in isos:
#Could do it this way, but this is slower than a nested loop
#dm.names[dm.names.index(iso_ID_trans[iso])] = iso
#real 0m9.417s
#user 1m18.576s
#sys 0m2.620s
#Nested loop is faster
for i in range(0, len(dm.names)):
#iso_ID_trans[iso] is the short_id
if dm.names[i] == iso_ID_trans[iso]:
dm.names[i] = iso
#real 0m8.789s
#user 1m14.637s
#sys 0m2.420s
#From the distance matrix in dm, infer the NJ tree
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
constructor = DistanceTreeConstructor()
njtree = constructor.nj(dm)
njtree.rooted = True
from Bio import Phylo
Phylo.write(njtree, 'temp.tre', 'newick')
from ete3 import Tree
t = Tree('temp.tre', format=1)
#Get rid of negative branch lengths (an artefact, not an error, of NJ)
for node in t.traverse():
node.dist = abs(node.dist)
t.set_outgroup(t.get_midpoint_outgroup())
t_out = base+'_andi_NJ_'+ARGS.model_andi_distance+'dist.nwk.tre'
t.write(format=1, outfile=t_out)
print('Final tree (midpoint-rooted, NJ under '+\
ARGS.model_andi_distance+' distance) looks like this:')
#Print the ascii tree
print(t)
#Remove the temp.tre
os.remove('temp.tre')
print('Tree (NJ under '+ARGS.model_andi_distance+\
' distance, midpoint-rooted) written to '+t_out+'.')
#Run roary?
if ARGS.roary_run:
roary_keepers = [
"accessory.header.embl",
"accessory.tab",
"accessory_binary_genes.fa",
"accessory_binary_genes.fa.newick",
"accessory_binary_genes_midpoint.nwk.tre",
"accessory_graph.dot",
"blast_identity_frequency.Rtab",
"clustered_proteins",
"core_accessory.header.embl",
"core_accessory.tab",
"core_accessory_graph.dot",
"core_gene_alignment.aln",
"gene_presence_absence.Ltab.csv",
"gene_presence_absence.Rtab",
"gene_presence_absence.csv",
"number_of_conserved_genes.Rtab",
"number_of_genes_in_pan_genome.Rtab",
"number_of_new_genes.Rtab",
示例10: Tree
# 需要导入模块: from ete3 import Tree [as 别名]
# 或者: from ete3.Tree import set_outgroup [as 别名]
#!/usr/bin/env python
from ete3 import Tree
import sys
tree = sys.argv[1]
root = sys.argv[2]
t = Tree(tree)
t.set_outgroup(t & root)
for leaf in t.iter_leaves():
cols = leaf.name.split("|")
if cols[0] == 'EBOV':
leaf.name = cols[1]
elif cols[1] == 'SLE':
leaf.name = cols[0]
print t.write()
示例11: clade
# 需要导入模块: from ete3 import Tree [as 别名]
# 或者: from ete3.Tree import set_outgroup [as 别名]
if group_assignments[leaf.name] in euk_supergroups:
eukaryote_seqs.append(leaf.name)
leaf.add_features(domain="Eukaryote")
else:
leaf.add_features(domain="Other")
else:
leaf.add_features(domain="Other")
#print eukaryote_seqs
#root the tree on a clade (the biggest?) of bacteria, to avoid ridiculous problems with arbitrary roots on trees
biggest_other_node = 0
for node in tree.get_monophyletic(values=['Other'], target_attr="domain"):
if len(node) > biggest_other_node:
biggest_other_node = len(node)
tree.set_outgroup(node)
#test the various phylogenetic criteria for LGT.
print "Tree\tResult\tEuksInTree\tSupportEukMonophyly\tEuksInTargetGroup\tDistanceToClosestEukClade\tSupergroupsInTargetGroup"
#euk sequence is a singleton nested within a clade of bacteria, and there is only one eukaryote sequence in the tree
if len(eukaryote_seqs) == 1: #this is, I guess, an LGT candidate
print sys.argv[1] + "\tSingleton\t1\tN/A\tN/A\tN/A\t1"
#euk sequence is a singleton nested within a clade of bacteria, and the eukaryotes are not monophyletic in the tree
#print len(eukaryote_seqs)
else:
try:
answer = tree.check_monophyly(values=eukaryote_seqs, target_attr="name")
if answer[0] == True:
ca = tree.get_common_ancestor(eukaryote_seqs)
target_group_sgs = {}
for leaf in ca:
示例12: Tree
# 需要导入模块: from ete3 import Tree [as 别名]
# 或者: from ete3.Tree import set_outgroup [as 别名]
#! /usr/bin/python
from ete3 import Tree
t = Tree('test.nw')
t.set_outgroup(t & "X")
print t.write()
示例13:
# 需要导入模块: from ete3 import Tree [as 别名]
# 或者: from ete3.Tree import set_outgroup [as 别名]
# /-A
# |
# | /-H
#---------|---------|
# | \-F
# |
# | /-B
# \--------|
# | /-E
# \--------|
# \-D
#
# Let's define that the ancestor of E and D as the tree outgroup. Of
# course, the definition of an outgroup will depend on user criteria.
ancestor = t.get_common_ancestor("E","D")
t.set_outgroup(ancestor)
print "Tree rooteda at E and D's ancestor is more basal that the others."
print t
#
# /-B
# /--------|
# | | /-A
# | \--------|
# | | /-H
#---------| \--------|
# | \-F
# |
# | /-E
# \--------|
# \-D
#
示例14: open
# 需要导入模块: from ete3 import Tree [as 别名]
# 或者: from ete3.Tree import set_outgroup [as 别名]
outg.close()
target_taxa = []
tt = open(sys.argv[2])
for line in tt:
target_taxa.append(line.rstrip())
tt.close()
#now read in a collection of trees, calc branch lengths over sample, summarise and print out
branch_lengths = defaultdict(list) #key = taxa, value = list of brlens
treefile = open(sys.argv[3])
for line in treefile:
curr_tree = Tree(line.rstrip())
root_node = curr_tree.get_common_ancestor(outgroups)
if curr_tree != root_node:
curr_tree.set_outgroup(root_node)
print curr_tree
#bundle = curr_tree.check_monophyly(values=outgroups,target_attr='name')
#print bundle
#if bundle[0] == False:
# continue
#find common ancestor of the target taxa, and use this as the reference node for calculating branch lengths. This might not always be the measure you want!
reference_node = curr_tree.get_common_ancestor(target_taxa)
#if reference_node != curr_tree:
# curr_tree.set_outgroup(reference_node)
#calc distance from root to each branch of interest
for taxon in target_taxa:
dist = curr_tree.get_distance(taxon, reference_node)
branch_lengths[taxon].append(dist)
#now compute the credible intervals of the branch length for each of the target taxa
示例15: __init__
# 需要导入模块: from ete3 import Tree [as 别名]
# 或者: from ete3.Tree import set_outgroup [as 别名]
class exponential_mixture:
"""ML search PTP, to use: __init__(), search() and count_species()"""
def __init__(self, tree, sp_rate = 0, fix_sp_rate = False, max_iters = 20000, min_br = 0.0001):
self.min_brl = min_br
self.tree = Tree(tree, format = 1)
self.tree.resolve_polytomy(recursive=True)
self.tree.dist = 0.0
self.fix_spe_rate = fix_sp_rate
self.fix_spe = sp_rate
self.max_logl = float("-inf")
self.max_setting = None
self.null_logl = 0.0
self.null_model()
self.species_list = None
self.counter = 0
self.setting_set = set([])
self.max_num_search = max_iters
def null_model(self):
coa_br = []
all_nodes = self.tree.get_descendants()
for node in all_nodes:
if node.dist > self.min_brl:
coa_br.append(node.dist)
e1 = exp_distribution(coa_br)
self.null_logl = e1.sum_log_l()
return e1.rate
def __compare_node(self, node):
return node.dist
def re_rooting(self):
node_list = self.tree.get_descendants()
node_list.sort(key=self.__compare_node)
node_list.reverse()
rootnode = node_list[0]
self.tree.set_outgroup(rootnode)
self.tree.dist = 0.0
def comp_num_comb(self):
for node in self.tree.traverse(strategy='postorder'):
if node.is_leaf():
node.add_feature("cnt", 1.0)
else:
acum = 1.0
for child in node.get_children():
acum = acum * child.cnt
acum = acum + 1.0
node.add_feature("cnt", acum)
return self.tree.cnt
def next(self, sp_setting):
self.setting_set.add(frozenset(sp_setting.spe_nodes))
logl = sp_setting.get_log_l()
if logl > self.max_logl:
self.max_logl = logl
self.max_setting = sp_setting
for node in sp_setting.active_nodes:
if node.is_leaf():
pass
else:
childs = node.get_children()
sp_nodes = []
for child in childs:
sp_nodes.append(child)
for nod in sp_setting.spe_nodes:
sp_nodes.append(nod)
new_sp_setting = species_setting(spe_nodes = sp_nodes, root = sp_setting.root, sp_rate = sp_setting.spe_rate, fix_sp_rate = sp_setting.fix_spe_rate, minbr = self.min_brl)
if frozenset(sp_nodes) in self.setting_set:
pass
else:
self.next(new_sp_setting)
def H0(self, reroot = True):
self.H1(reroot)
self.H2(reroot = False)
self.H3(reroot = False)
def H1(self, reroot = True):
if reroot:
self.re_rooting()
#self.init_tree()
sorted_node_list = self.tree.get_descendants()
sorted_node_list.sort(key=self.__compare_node)
sorted_node_list.reverse()
first_node_list = []
first_node_list.append(self.tree)
first_childs = self.tree.get_children()
for child in first_childs:
first_node_list.append(child)
first_setting = species_setting(spe_nodes = first_node_list, root = self.tree, sp_rate = self.fix_spe, fix_sp_rate = self.fix_spe_rate, minbr = self.min_brl)
#.........这里部分代码省略.........