本文整理汇总了Python中Bio.Phylo.TreeConstruction.DistanceTreeConstructor.nj方法的典型用法代码示例。如果您正苦于以下问题:Python DistanceTreeConstructor.nj方法的具体用法?Python DistanceTreeConstructor.nj怎么用?Python DistanceTreeConstructor.nj使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.Phylo.TreeConstruction.DistanceTreeConstructor
的用法示例。
在下文中一共展示了DistanceTreeConstructor.nj方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: distance_matrix
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import nj [as 别名]
def distance_matrix(cls, cluster_list):
print cluster_list
dists = Distance.objects.filter(rep_accnum1__in=cluster_list, rep_accnum2__in=cluster_list)
distance_pairs = {g.rep_accnum1 + '_' + g.rep_accnum2: g.distance for g in dists.all()}
matrix = []
for i in range(0,len(cluster_list)):
matrix_iteration = []
for j in range(0,i+1):
if i == j:
matrix_iteration.append(0)
elif cluster_list[i] + '_' + cluster_list[j] in distance_pairs:
matrix_iteration.append(distance_pairs[cluster_list[i] + '_' + cluster_list[j]])
elif cluster_list[j] + '_' + cluster_list[i] in distance_pairs:
matrix_iteration.append(distance_pairs[cluster_list[j] + '_' + cluster_list[i]])
else:
raise("Error, can't find pair!")
matrix.append(matrix_iteration)
#print matrix_iteration
cluster_list = [s.encode('ascii', 'ignore') for s in cluster_list]
matrix_obj = _DistanceMatrix(names=cluster_list, matrix=matrix)
constructor = DistanceTreeConstructor()
tree = constructor.nj(matrix_obj)
tree.ladderize()
#Phylo.draw_ascii(tree)
output = StringIO.StringIO()
Phylo.write(tree, output, 'newick')
tree_str = output.getvalue()
#print tree_str
return tree_str
示例2: DistanceTreeConstructorTest
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import nj [as 别名]
class DistanceTreeConstructorTest(unittest.TestCase):
"""Test DistanceTreeConstructor"""
def setUp(self):
self.aln = AlignIO.read(open('TreeConstruction/msa.phy'), 'phylip')
calculator = DistanceCalculator('blosum62')
self.dm = calculator.get_distance(self.aln)
self.constructor = DistanceTreeConstructor(calculator)
def test_upgma(self):
tree = self.constructor.upgma(self.dm)
self.assertTrue(isinstance(tree, BaseTree.Tree))
tree_file = StringIO.StringIO()
Phylo.write(tree, tree_file, 'newick')
ref_tree = open('./TreeConstruction/upgma.tre')
self.assertEqual(tree_file.getvalue(), ref_tree.readline())
ref_tree.close()
def test_nj(self):
tree = self.constructor.nj(self.dm)
self.assertTrue(isinstance(tree, BaseTree.Tree))
tree_file = StringIO.StringIO()
Phylo.write(tree, tree_file, 'newick')
ref_tree = open('./TreeConstruction/nj.tre')
self.assertEqual(tree_file.getvalue(), ref_tree.readline())
ref_tree.close()
def test_built_tree(self):
tree = self.constructor.build_tree(self.aln)
self.assertTrue(isinstance(tree, BaseTree.Tree))
tree_file = StringIO.StringIO()
Phylo.write(tree, tree_file, 'newick')
ref_tree = open('./TreeConstruction/nj.tre')
self.assertEqual(tree_file.getvalue(), ref_tree.readline())
ref_tree.close()
示例3: DistanceTreeConstructorTest
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import nj [as 别名]
class DistanceTreeConstructorTest(unittest.TestCase):
"""Test DistanceTreeConstructor"""
def setUp(self):
self.aln = AlignIO.read('TreeConstruction/msa.phy', 'phylip')
calculator = DistanceCalculator('blosum62')
self.dm = calculator.get_distance(self.aln)
self.constructor = DistanceTreeConstructor(calculator)
def test_upgma(self):
tree = self.constructor.upgma(self.dm)
self.assertTrue(isinstance(tree, BaseTree.Tree))
# tree_file = StringIO()
# Phylo.write(tree, tree_file, 'newick')
ref_tree = Phylo.read('./TreeConstruction/upgma.tre', 'newick')
self.assertTrue(Consensus._equal_topology(tree, ref_tree))
# ref_tree.close()
def test_nj(self):
tree = self.constructor.nj(self.dm)
self.assertTrue(isinstance(tree, BaseTree.Tree))
# tree_file = StringIO()
# Phylo.write(tree, tree_file, 'newick')
ref_tree = Phylo.read('./TreeConstruction/nj.tre', 'newick')
self.assertTrue(Consensus._equal_topology(tree, ref_tree))
# ref_tree.close()
def test_built_tree(self):
tree = self.constructor.build_tree(self.aln)
self.assertTrue(isinstance(tree, BaseTree.Tree))
# tree_file = StringIO()
# Phylo.write(tree, tree_file, 'newick')
ref_tree = Phylo.read('./TreeConstruction/nj.tre', 'newick')
self.assertTrue(Consensus._equal_topology(tree, ref_tree))
示例4: build_nj_tree
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import nj [as 别名]
def build_nj_tree(self):
dm = self.distance_matrix()
constructor = DistanceTreeConstructor()
tree = constructor.nj(dm)
treeio = StringIO.StringIO()
Phylo.write(tree, treeio, 'newick')
treestr = treeio.getvalue()
treeio.close()
return treestr
示例5: get_dn_ds_tree
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import nj [as 别名]
def get_dn_ds_tree(self, dn_ds_method="NG86", tree_method="UPGMA"):
"""Method for constructing dn tree and ds tree.
Argument:
- dn_ds_method - Available methods include NG86, LWL85, YN00
and ML.
- tree_method - Available methods include UPGMA and NJ.
"""
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
dn_dm, ds_dm = self.get_dn_ds_matrix(method=dn_ds_method)
dn_constructor = DistanceTreeConstructor()
ds_constructor = DistanceTreeConstructor()
if tree_method == "UPGMA":
dn_tree = dn_constructor.upgma(dn_dm)
ds_tree = ds_constructor.upgma(ds_dm)
elif tree_method == "NJ":
dn_tree = dn_constructor.nj(dn_dm)
ds_tree = ds_constructor.nj(ds_dm)
else:
raise RuntimeError("Unkown tree method ({0}). Only NJ and UPGMA "
"are accepted.".format(tree_method))
return dn_tree, ds_tree
示例6: construct_tree
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import nj [as 别名]
def construct_tree(X_2d, acc, title):
acc = list(acc)
data = pairwise_distances(X_2d).astype('float')
data[np.isnan(data)] = 0
data_list = []
for i in range(data.shape[0]):
#for j in range(i, data.shape[0]):
data_list.append([data[i, j] for j in range(0, i+1)])
data = data_list
dm = _DistanceMatrix(acc, matrix=data)
constructor = DistanceTreeConstructor()
tree = constructor.nj(dm)
Phylo.write(tree, title + ".nwk", 'newick')
示例7: nj_tree
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import nj [as 别名]
def nj_tree(distanceMatrix):
print "Constructing Neighbor Joining Tree"
constructor = DistanceTreeConstructor()
tree = constructor.nj(distanceMatrix)
Phylo.write(tree, "geneContentTree.newick", "newick")
print "Done constructing tree"
示例8: main
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import nj [as 别名]
def main(argv):
input_file=''
title='Title'
label_internal_nodes = False
label_leaves = False
out_file=''
width=750
out_file_xml=''
plot_rectangular = False
common_kmer_data_path=''
taxonomic_names_on_leaves = False
try:
opts, args = getopt.getopt(argv,"h:i:lnrto:w:x:D:",["Help=","InputCommonKmerXFile=","LabelLeaves=", "LabelInternalNodes=","Rectangular=", "TaxonomicNamesOnLeaves=", "OutFile=","Width=","OutFileXML=","CommonKmerDataPath="])
except getopt.GetoptError:
print 'Unknown option, call using: ./PlotNJTree.py -i <InputCommonKmerXFile> -D <CommonKmerDataPath> -l <LabelLeavesFlag> -n <LabelInternalNodesFlag> -r <RectangularPlotFlag> -t <TaxonomicNamesOnLeavesFlag> -o <OutFile.png> -x <Outfile.xml> -w <Width>'
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print './PlotNJTree.py -i <InputCommonKmerXFile> -D <CommonKmerDataPath> -l <LabelLeavesFlag> -n <LabelInternalNodesFlag> -r <RectangularPlotFlag> -t <TaxonomicNamesOnLeavesFlag> -o <OutFile.png> -x <Outfile.xml> -w <Width>'
sys.exit(2)
elif opt in ("-i", "--InputCommonKmerXFile"):
input_file = arg
elif opt in ("-l", "--LabelLeaves"):
label_leaves = True
elif opt in ("-n","--LabelInternalNodes"):
label_internal_nodes = True
elif opt in ("-o", "--OutFile"):
out_file = arg
elif opt in ("-w", "--Width"):
width = int(arg)
elif opt in ("-x", "--OutFileXML"):
out_file_xml = arg
elif opt in ("-D", "--CommonKmerDataPath"):
common_kmer_data_path = arg
elif opt in ("-r", "--Rectangular"):
plot_rectangular = True
elif opt in ("-t", "--TaxonomicNamesOnLeaves"):
taxonomic_names_on_leaves = True
#Read in the x vector
fid = open(input_file,'r')
x = map(lambda y: float(y),fid.readlines())
fid.close()
#Normalize the x vector
#x = map(lambda y: y/sum(x),x)
#Read in the taxonomy
taxonomy = list()
fid = open(os.path.join(common_kmer_data_path,"Taxonomy.txt"),'r')
for line in fid:
taxonomy.append('_'.join(line.split()[0].split("_")[1:])) #Just take the first line of the taxonomy (erasing the taxID)
fid.close()
#Read in the basis for the ckm matrices
x_file_names = list()
fid = open(os.path.join(common_kmer_data_path,"FileNames.txt"),'r')
for line in fid:
x_file_names.append(os.path.basename(line.strip()))
fid.close()
#Read in the common kmer matrix
f=h5py.File(os.path.join(common_kmer_data_path,'CommonKmerMatrix-30mers.h5'),'r')
ckm30=np.array(f['common_kmers'],dtype=np.float64)
f.close()
f=h5py.File(os.path.join(common_kmer_data_path,'CommonKmerMatrix-50mers.h5'),'r')
ckm50=np.array(f['common_kmers'],dtype=np.float64)
f.close()
ckm30_norm = np.multiply(ckm30,1/np.diag(ckm30))
ckm50_norm = np.multiply(ckm50,1/np.diag(ckm50))
num_rows = ckm30_norm.shape[0]
num_cols = ckm30_norm.shape[1]
names = x_file_names
matrix=list()
for i in range(num_rows):
matrix.append([.5*(1-.5*ckm30_norm[i,j]-.5*ckm30_norm[j,i])+.5*(1-.5*ckm50_norm[i,j]-.5*ckm50_norm[j,i]) for j in range(i+1)])
#Construct the tree. Note I could use RapidNJ here, but a few tests have shown that the trees that RapidNJ creates are rubbish.
dm = _DistanceMatrix(names, matrix)
constructor = DistanceTreeConstructor()
tree = constructor.nj(dm)
t=Tree(tree.format('newick'),format=1)
#tree.format('newick')
#Phylo.draw_ascii(tree)
#Now I will put internal nodes in a certain phylogenetic distance between the root and a given node.
#Function to insert a node at a given distance
def insert_node(t, name_to_insert, insert_above, dist_along):
insert_at_node = t.search_nodes(name=insert_above)[0]
parent = (t&insert_above).up
orig_branch_length = t.get_distance(insert_at_node,parent)
if orig_branch_length < dist_along:
raise ValueError("error: dist_along larger than orig_branch_length")
removed_node = insert_at_node.detach()
removed_node.dist = orig_branch_length - dist_along
added_node = parent.add_child(name=name_to_insert, dist=dist_along)
added_node.add_child(removed_node)
#Function to insert a node some % along a branch
#.........这里部分代码省略.........
示例9: MakePlot
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import nj [as 别名]
def MakePlot(x, org_names, ckm30, ckm50, outgroup, outfile, outfilexml, sum_x):
#Make sure names are unique
names = org_names
for name in names:
if names.count(name)>1:
temp_name = name
i=1
for dummy in range(0,names.count(name)-1): #Don't change the last one, just to make sure we don't conflict with the outgroup
names[names.index(temp_name)] = temp_name + "_" + str(i)
i = i +1
#Normalize the x vector
x = map(lambda y: y/sum(x),x)
ckm30_norm = np.multiply(ckm30,1/np.diag(ckm30))
ckm50_norm = np.multiply(ckm50,1/np.diag(ckm50))
num_rows = ckm30_norm.shape[0]
num_cols = ckm30_norm.shape[1]
matrix=list()
for i in range(num_rows):
matrix.append([.5*(1-.5*ckm30_norm[i,j]-.5*ckm30_norm[j,i])+.5*(1-.5*ckm50_norm[i,j]-.5*ckm50_norm[j,i]) for j in range(i+1)])
#Make the list of distances (ave of the two ckm matrices)
ckm_ave_train = .5*ckm30_norm+.5*ckm50_norm
ckm_ave_train_dist = dict()
for i in range(len(org_names)):
ckm_ave_train_dist[org_names[i]] = [.5*ckm_ave_train[i,j]+.5*ckm_ave_train[j,i] for j in range(len(org_names))]
#Construct the tree. Note I could use RapidNJ here, but a few tests have shown that the trees that RapidNJ creates are rubbish.
dm = _DistanceMatrix(names, matrix)
constructor = DistanceTreeConstructor()
tree = constructor.nj(dm)
t=Tree(tree.format('newick'),format=1)
#tree.format('newick')
#Phylo.draw_ascii(tree)
#Now I will put internal nodes in a certain phylogenetic distance between the root and a given node.
#Function to insert a node at a given distance
def insert_node(t, name_to_insert, insert_above, dist_along):
insert_at_node = t.search_nodes(name=insert_above)[0]
parent = (t&insert_above).up
orig_branch_length = t.get_distance(insert_at_node,parent)
if orig_branch_length < dist_along:
raise ValueError("error: dist_along larger than orig_branch_length in PlotPackage.py")
removed_node = insert_at_node.detach()
removed_node.dist = orig_branch_length - dist_along
added_node = parent.add_child(name=name_to_insert, dist=dist_along)
added_node.add_child(removed_node)
#Function to insert a node some % along a branch, taking into account the ckm distances and nodes already created in the NJ tree (and what distance their descendants are from everyone else)
def insert_hyp_node(t, leaf_name, percent, ckm_ave_train_dist, org_names):
dists = map(lambda y: abs(y-percent), ckm_ave_train_dist[leaf_name])
nearby_indicies = list()
#Add all the organisms that are within 0.05 of the given percent
# for i in range(len(dists)):
# if dists[i]<=.05:
# nearby_indicies.append(i)
nearby_names = list()
#If there are no nearby indicies, add the closest organism to the given percent
if nearby_indicies==[]:
nearby_names.append(org_names[dists.index(min(dists))])
else:
for i in range(len(nearby_indicies)):
nearby_names.append(org_names[i])
mean_dist = np.mean(map(lambda y: ckm_ave_train_dist[leaf_name][org_names.index(y)],nearby_names))
nearby_names.append(leaf_name)
LCA = t.get_common_ancestor(nearby_names)
LCA_to_leaf_dist = t.get_distance(LCA,leaf_name)
#divide the dist to the right/left of the LCA node by the number of percentage points in there
if LCA.name==t.name:
percent_dist = percent*LCA_to_leaf_dist
if mean_dist <= percent:
child_node = (t&leaf_name)
else:
child_node = (t&nearby_names[0])#This means "go up from root" in the direction of the nearest guy
ancestor_node = (t&child_node.name).up
elif mean_dist <= percent:
percent_dist = t.get_distance(LCA) + abs(percent-mean_dist)*(LCA_to_leaf_dist)/(1-mean_dist)
child_node = (t&leaf_name)
ancestor_node = (t&child_node.name).up
else:
percent_dist = t.get_distance(LCA) - abs(percent-mean_dist)*(t.get_distance(LCA))/(mean_dist)
child_node = (t&leaf_name)
ancestor_node = (t&child_node.name).up
while t.get_distance(t.name, ancestor_node) > percent_dist:
child_node = ancestor_node
ancestor_node = (t&child_node.name).up
insert_node(t, leaf_name+"_"+str(percent), child_node.name, percent_dist-t.get_distance(t.name, ancestor_node))
#Set outgroup
if outgroup in names:
t.set_outgroup(t&outgroup) #I will need to check that this outgroup is actually one of the names...
else:
print("WARNING: the chosen outgroup " + outgroup + " is not in the given taxonomy: ")
print(names)
print("Proceeding without setting an outgroup. This may cause results to be uninterpretable.")
#Insert hypothetical nodes
hyp_node_names = dict()
cutoffs = [.9,.8,.7,.6,.5,.4,.3,.2,.1]
#.........这里部分代码省略.........
示例10: hamming
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import nj [as 别名]
from Bio import Phylo
from Bio.Phylo.TreeConstruction import _DistanceMatrix
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
from io import StringIO
import re
# hamming distance
def hamming(seq1, seq2):
# assert len(seq1) == len(seq2), 'unequal reads!'
return int(sum([i[0] != i[1] for i in zip(seq1, seq2)]))
f = open('rosalind_chbp.txt')
species = f.readline().rstrip().split()
table = [''.join(i) for i in zip(*f.read().rstrip().split())]
n = len(table)
'''
For the Phylo.TreeConstruction to work, integers in the distance matrix
must be Python int and not numpy.int64
'''
dm = [[hamming(table[i], table[j]) for j in range(i+1)] for i in range(n)]
constructor = DistanceTreeConstructor()
tree = constructor.nj(_DistanceMatrix(names=species, matrix=dm))
handle = StringIO()
Phylo.write(tree, handle, format='newick', plain=True)
result = handle.getvalue()
result = re.sub('Inner[0-9]+', '', result)
open('rosalind_chbp_sub.txt', 'wt').write(result)
示例11: best_elements_order_tree
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import nj [as 别名]
def best_elements_order_tree(relations, elements = None, filter_order = None):
present_elements, present_element_groups, properties, property_groups, element_2_property_2_relation, property_2_element_2_relation = relations_2_model(relations)
if not elements: elements = present_elements
# distances = {}
# for e1 in elements:
# for e2 in elements:
# if (e1 is e2) or (id(e1) > id(e2)): continue
# nb_similarity = 0
# for property in properties[:]:
# if True == (e1 in property_2_element_2_relation[property]) == (e2 in property_2_element_2_relation[property]):
# nb_similarity += 2
# elif (e1 in property_2_element_2_relation[property]) == (e2 in property_2_element_2_relation[property]):
# nb_similarity += 1
# distances[e1, e2] = distances[e2, e1] = 1.0 - nb_similarity / len(properties)
distances = {}
for e1 in elements:
for e2 in elements:
if (e1 is e2) or (id(e1) > id(e2)): continue
d = 0
for property in properties[:]:
if (e1 in property_2_element_2_relation[property]) != (e2 in property_2_element_2_relation[property]):
d += 1.0
distances[e1, e2] = distances[e2, e1] = d
label_2_element = { element.label : element for element in elements }
from Bio.Phylo.TreeConstruction import _DistanceMatrix as DistanceMatrix, DistanceTreeConstructor
dm = DistanceMatrix([element.label for element in elements])
for e1 in elements:
for e2 in elements:
if (e1 is e2) or (id(e1) > id(e2)): continue
dm[e1.label, e2.label] = distances[e1, e2]
print(dm, file = sys.stderr)
treebuilder = DistanceTreeConstructor(None)
tree = treebuilder.nj(dm)
#tree = treebuilder.upgma(dm)
print(tree, file = sys.stderr)
def walker(clade):
if clade.clades:
results = []
partss = [walker(child) for child in clade.clades]
for ordered_parts in all_orders(partss):
combinations = all_combinations(ordered_parts)
results.extend(combinations)
return results
else:
element = label_2_element[clade.name]
return [ [element] ]
orders = walker(tree.root)
print(len(orders), file = sys.stderr)
def score_order(order):
nb_hole = 0
nb_prop_with_hole = 0
total_hole_length = 0
for property in properties:
start = None
end = None
in_hole = False
for i, element in enumerate(order):
if element in property_2_element_2_relation[property]:
if start is None: start = i
end = i
in_hole = False
else:
if (not start is None) and (not in_hole):
in_hole = True
nb_hole += 1
# After end, it is not a hole!
if end != i: nb_hole -= 1
if not end is None:
length = end - start + 1
if length > len(property_2_element_2_relation[property]):
total_hole_length += length - len(property_2_element_2_relation[property])
nb_prop_with_hole += 1
return (-nb_prop_with_hole, -nb_hole * 2 + -total_hole_length)
order, score = best(orders, score_order, score0 = (-sys.maxsize, -sys.maxsize))
return order
示例12: D
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import nj [as 别名]
# rosalind_ba7b
'''
Limb Length Problem
Find the limb length for a leaf in a tree.
Given: An integer n, followed by an integer j between 0 and n - 1,
followed by a space-separated additive distance matrix D (whose elements are integers).
Return: The limb length of the leaf in Tree(D) corresponding to row j of this
distance matrix (use 0-based indexing).
'''
import numpy as np
from Bio.Phylo.TreeConstruction import _DistanceMatrix
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
f = open('rosalind_ba7b.txt')
n = int(f.readline().rstrip())
j = int(f.readline().rstrip())
D = np.fromfile(f, sep=' ', dtype=int).reshape(n, n)
#For the Phylo.TreeConstruction to work, integers must be Python int and not numpy.int64
dm = [[int(D[i, j]) for j in range(i+1)] for i in range(n)]
names = [str(i) for i in range(n)]
constructor = DistanceTreeConstructor()
tree = constructor.nj(_DistanceMatrix(names, dm))
print(round(tree.find_any(str(j)).branch_length))
示例13: dm_to_tree
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import nj [as 别名]
from Bio import Phylo
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
from Bio.Phylo.TreeConstruction import _DistanceMatrix
def dm_to_tree(dm):
dm = dm.astype(float)
distance_triangular = [list(dm.values[i, : i + 1]) for i in range(len(dm))]
try:
dm = _DistanceMatrix(names=[str(i) for i in dm.columns], matrix=distance_triangular)
except Exception, e:
print list(dm.columns)
print [type(i) for i in dm.columns]
print type(distance_triangular)
print type(distance_triangular[0])
print set([str(type(i)) for j in distance_triangular for i in j])
print distance_triangular
raise e
constructor = DistanceTreeConstructor()
tree = constructor.nj(dm)
for c in tree.get_nonterminals():
c.name = None
return tree
示例14: DistanceCalculator
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import nj [as 别名]
# CAGTTCGCCACAA Gamma
# Several thigns can be done witht he alignment: get a distance matrix from it:
dstcalc = DistanceCalculator('identity')
dm = dstcalc.get_distance(aln)
# DistanceMatrix(names=['Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon'], matrix=[[0], [0.23076923076923073, 0], [0.3846153846153846, 0.23076923076923073, 0], [0.5384615384615384, 0.5384615384615384, 0.5384615384615384, 0], [0.6153846153846154, 0.3846153846153846, 0.46153846153846156, 0.15384615384615385, 0]])
print "What's the get_distance(aln) from DistanceCalculator('identity') object?"
print type(dm)
print dm
# Alpha 0
# Beta 0.230769230769 0
# Gamma 0.384615384615 0.230769230769 0
# Delta 0.538461538462 0.538461538462 0.538461538462 0
# Epsilon 0.615384615385 0.384615384615 0.461538461538 0.153846153846 0
# build a tree from it.
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
construc0 = DistanceTreeConstructor(dstcalc, 'nj')
tre0 = construc0.build_tree(aln)
print type(tre0)
# as you can see from abovedstcalc is needed for te constructor and then
# to build the tree the alignment is needed. That's two things which need to originae fromt he same thing.
# A bit of a tall order
# You can build the tree from a distance matrix only, by leaving out the aln argument
# by not using the build_tree method on the constructor, but rather the .nj method
construc2 = DistanceTreeConstructor()
tre2 = construc2.nj(dm)
print type(tre2)
示例15: main
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import nj [as 别名]
#.........这里部分代码省略.........
#Run andi?
if ARGS.andi_run:
#Run andi
andi_mat = 'andi_'+ARGS.model_andi_distance+'dist_'+base+'.mat'
andi_c = 'nice andi -j -m '+ARGS.model_andi_distance+' -t '+\
str(ARGS.threads)+' '+assembly_tempdir+'/*_contigs.fa > '+\
andi_mat
print('\nRunning andi with: \''+andi_c+'\'')
os.system(andi_c)
#Read in the andi dist matrix, convert to lower triangle
dm = read_file_lines(andi_mat)[1:]
dm = lower_tri(dm)
#Correct the names in the matrix
for iso in isos:
#Could do it this way, but this is slower than a nested loop
#dm.names[dm.names.index(iso_ID_trans[iso])] = iso
#real 0m9.417s
#user 1m18.576s
#sys 0m2.620s
#Nested loop is faster
for i in range(0, len(dm.names)):
#iso_ID_trans[iso] is the short_id
if dm.names[i] == iso_ID_trans[iso]:
dm.names[i] = iso
#real 0m8.789s
#user 1m14.637s
#sys 0m2.420s
#From the distance matrix in dm, infer the NJ tree
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
constructor = DistanceTreeConstructor()
njtree = constructor.nj(dm)
njtree.rooted = True
from Bio import Phylo
Phylo.write(njtree, 'temp.tre', 'newick')
from ete3 import Tree
t = Tree('temp.tre', format=1)
#Get rid of negative branch lengths (an artefact, not an error, of NJ)
for node in t.traverse():
node.dist = abs(node.dist)
t.set_outgroup(t.get_midpoint_outgroup())
t_out = base+'_andi_NJ_'+ARGS.model_andi_distance+'dist.nwk.tre'
t.write(format=1, outfile=t_out)
print('Final tree (midpoint-rooted, NJ under '+\
ARGS.model_andi_distance+' distance) looks like this:')
#Print the ascii tree
print(t)
#Remove the temp.tre
os.remove('temp.tre')
print('Tree (NJ under '+ARGS.model_andi_distance+\
' distance, midpoint-rooted) written to '+t_out+'.')
#Run roary?
if ARGS.roary_run:
roary_keepers = [
"accessory.header.embl",
"accessory.tab",
"accessory_binary_genes.fa",
"accessory_binary_genes.fa.newick",
"accessory_binary_genes_midpoint.nwk.tre",
"accessory_graph.dot",
"blast_identity_frequency.Rtab",
"clustered_proteins",
"core_accessory.header.embl",