本文整理汇总了Python中Bio.Phylo.TreeConstruction.DistanceTreeConstructor.upgma方法的典型用法代码示例。如果您正苦于以下问题:Python DistanceTreeConstructor.upgma方法的具体用法?Python DistanceTreeConstructor.upgma怎么用?Python DistanceTreeConstructor.upgma使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.Phylo.TreeConstruction.DistanceTreeConstructor
的用法示例。
在下文中一共展示了DistanceTreeConstructor.upgma方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: measure_D_net
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import upgma [as 别名]
def measure_D_net(G,qmod,qcon):
D_net_dic = {}
D_net_ret = {}
D_net = []
for u in G: D_net_dic[u] = {}
for u in sorted(G):
key1 = "Taxon" + str(u)
tmp_row = []
for v in sorted(G):
key2 = "Taxon" + str(v)
if u < v: continue
D_net_dic[u][v] = 1.0 - G.dmc_likelihood(u,v,qmod,qcon)
tmp_row.append(D_net_dic[u][v])
print D_net_dic[u][v],
D_net.append(tmp_row)
print '\n'
names = []
for u in G: names.append('Taxon'+str(u))
print names
print D_net
D_net_final = _DistanceMatrix(names,D_net)
#print D_net_final.names
constructor = DistanceTreeConstructor()
tree_dmc = constructor.upgma(D_net_final)
#print tree_dmc
Phylo.write(tree_dmc,'ph_dmc.nre','newick')
return D_net_final
示例2: DistanceTreeConstructorTest
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import upgma [as 别名]
class DistanceTreeConstructorTest(unittest.TestCase):
"""Test DistanceTreeConstructor"""
def setUp(self):
self.aln = AlignIO.read('TreeConstruction/msa.phy', 'phylip')
calculator = DistanceCalculator('blosum62')
self.dm = calculator.get_distance(self.aln)
self.constructor = DistanceTreeConstructor(calculator)
def test_upgma(self):
tree = self.constructor.upgma(self.dm)
self.assertTrue(isinstance(tree, BaseTree.Tree))
# tree_file = StringIO()
# Phylo.write(tree, tree_file, 'newick')
ref_tree = Phylo.read('./TreeConstruction/upgma.tre', 'newick')
self.assertTrue(Consensus._equal_topology(tree, ref_tree))
# ref_tree.close()
def test_nj(self):
tree = self.constructor.nj(self.dm)
self.assertTrue(isinstance(tree, BaseTree.Tree))
# tree_file = StringIO()
# Phylo.write(tree, tree_file, 'newick')
ref_tree = Phylo.read('./TreeConstruction/nj.tre', 'newick')
self.assertTrue(Consensus._equal_topology(tree, ref_tree))
# ref_tree.close()
def test_built_tree(self):
tree = self.constructor.build_tree(self.aln)
self.assertTrue(isinstance(tree, BaseTree.Tree))
# tree_file = StringIO()
# Phylo.write(tree, tree_file, 'newick')
ref_tree = Phylo.read('./TreeConstruction/nj.tre', 'newick')
self.assertTrue(Consensus._equal_topology(tree, ref_tree))
示例3: DistanceTreeConstructorTest
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import upgma [as 别名]
class DistanceTreeConstructorTest(unittest.TestCase):
"""Test DistanceTreeConstructor"""
def setUp(self):
self.aln = AlignIO.read(open('TreeConstruction/msa.phy'), 'phylip')
calculator = DistanceCalculator('blosum62')
self.dm = calculator.get_distance(self.aln)
self.constructor = DistanceTreeConstructor(calculator)
def test_upgma(self):
tree = self.constructor.upgma(self.dm)
self.assertTrue(isinstance(tree, BaseTree.Tree))
tree_file = StringIO.StringIO()
Phylo.write(tree, tree_file, 'newick')
ref_tree = open('./TreeConstruction/upgma.tre')
self.assertEqual(tree_file.getvalue(), ref_tree.readline())
ref_tree.close()
def test_nj(self):
tree = self.constructor.nj(self.dm)
self.assertTrue(isinstance(tree, BaseTree.Tree))
tree_file = StringIO.StringIO()
Phylo.write(tree, tree_file, 'newick')
ref_tree = open('./TreeConstruction/nj.tre')
self.assertEqual(tree_file.getvalue(), ref_tree.readline())
ref_tree.close()
def test_built_tree(self):
tree = self.constructor.build_tree(self.aln)
self.assertTrue(isinstance(tree, BaseTree.Tree))
tree_file = StringIO.StringIO()
Phylo.write(tree, tree_file, 'newick')
ref_tree = open('./TreeConstruction/nj.tre')
self.assertEqual(tree_file.getvalue(), ref_tree.readline())
ref_tree.close()
示例4: D_seq_matrix
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import upgma [as 别名]
def D_seq_matrix(fasta_file):
aln = AlignIO.read(fasta_file, 'fasta')
calculator = DistanceCalculator('identity')
dm = calculator.get_distance(aln)
constructor = DistanceTreeConstructor()
tree_seq = constructor.upgma(dm)
#print tree_dmc
Phylo.write(tree_seq,'ph_seq.nre','newick')
print dm.names
return dm
示例5: phyloxml_from_msa
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import upgma [as 别名]
def phyloxml_from_msa(msa, phyloxml):
from Bio import AlignIO
from Bio.Phylo.TreeConstruction import DistanceCalculator
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
from Bio import Phylo
ms_alignment = AlignIO.read(msa, "fasta")
calculator = DistanceCalculator("ident")
dist_matrix = calculator.get_distance(ms_alignment)
constructor = DistanceTreeConstructor()
tree = constructor.upgma(dist_matrix)
Phylo.write(tree, phyloxml, "phyloxml")
示例6: get_dn_ds_tree
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import upgma [as 别名]
def get_dn_ds_tree(self, dn_ds_method="NG86", tree_method="UPGMA"):
"""Method for constructing dn tree and ds tree.
Argument:
- dn_ds_method - Available methods include NG86, LWL85, YN00
and ML.
- tree_method - Available methods include UPGMA and NJ.
"""
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
dn_dm, ds_dm = self.get_dn_ds_matrix(method=dn_ds_method)
dn_constructor = DistanceTreeConstructor()
ds_constructor = DistanceTreeConstructor()
if tree_method == "UPGMA":
dn_tree = dn_constructor.upgma(dn_dm)
ds_tree = ds_constructor.upgma(ds_dm)
elif tree_method == "NJ":
dn_tree = dn_constructor.nj(dn_dm)
ds_tree = ds_constructor.nj(ds_dm)
else:
raise RuntimeError("Unkown tree method ({0}). Only NJ and UPGMA "
"are accepted.".format(tree_method))
return dn_tree, ds_tree
示例7: build_tree
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import upgma [as 别名]
def build_tree(dist_matrix, names_list, clust):
tree = None
if clust == 'nj':
# print(dist_matrix)
dm = DistanceMatrix(dist_matrix, names_list)
tree_scikit = nj(dm,result_constructor=str)
tree = Tree(tree_scikit)
elif clust == 'upgma':
dm = _DistanceMatrix(names=names_list, matrix=condense_matrix(dist_matrix))
constructor = DistanceTreeConstructor()
tree_biopython = constructor.upgma(dm)
# remove InnerNode names
for i in tree_biopython.get_nonterminals():
i.name = None
output = StringIO()
Phylo.write(tree_biopython,output, "newick")
tree = Tree(output.getvalue())
else:
print("Unknown tree clustering method ! Aborting")
sys.exit()
return tree
示例8: D_F_matrix
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import upgma [as 别名]
def D_F_matrix(D_Seq,D_net,final_tree):
names_Seq = D_Seq.names
names_Net = D_net.names
D_F = []
D_F_names = []
for key1 in names_Net:
i = names_Net.index(key1)
#print key1
temp_row = []
for j in range(0,i+1):
key2 = names_Net[j]
#print key2,
if key1 in names_Net and key2 in names_Seq:
if not key1 in D_F_names:
D_F_names.append(key1)
i1 = names_Net.index(key1)
j2 = names_Net.index(key2)
new_val = (0.5*D_net[key1,key2] + 0.5*D_Seq[key1,key2])
#print new_val,
temp_row.append(new_val)
#print temp_row
D_F.append(temp_row)
print D_F
D_F_final = _DistanceMatrix(D_F_names,D_F)
constructor = DistanceTreeConstructor()
tree_D_F = constructor.upgma(D_F_final)
#print tree_dmc
Phylo.write(tree_D_F,final_tree,'newick')
return D_F_final
示例9: D_F_matrix
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import upgma [as 别名]
def D_F_matrix(D_Seq,D_net,final_tree, alpha):
names_Seq = D_Seq.names
names_Net = D_net.names
D_F = []
D_F_names = []
for key1 in names_Net:
i = names_Net.index(key1)
#print key1
temp_row = []
for j in range(0,i+1):
key2 = names_Net[j]
#print key2,
if key1 in names_Net and key2 in names_Seq:
if not key1 in D_F_names:
D_F_names.append(key1)
i1 = names_Net.index(key1)
j2 = names_Net.index(key2) # should be 1-alpha * D_net and alpha * D_seq
new_val = ((1-alpha) * D_net[key1,key2]) + (alpha * D_Seq[key1,key2]) # alpha can be set to any value (between 0 and 1)
#print new_val, # we can change alpha to choose how much of D_Seq and D_net we want to use
temp_row.append(new_val)
#print temp_row
D_F.append(temp_row)
print D_F
D_F_final = _DistanceMatrix(D_F_names,D_F)
constructor = DistanceTreeConstructor()
tree_D_F = constructor.upgma(D_F_final)
#print tree_dmc
Phylo.write(tree_D_F,final_tree,'newick')
return D_F_final
示例10: noFeasibleTest
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import upgma [as 别名]
def noFeasibleTest(FASTAFile, sampleSize, outputDir):
""""takes a FASTAFile, constructs a UPGMA Tree from the file data, converts this tree to RLR format,
tries to find the tree with the lowest parsimony score (ignores feasibility check)"""
random.seed(0)
outputFile = FASTAFile.replace(".align", ".out")
if "/" in outputFile:
outputFile = outputFile[outputFile.rfind("/"):]
output = open(outputDir + "/" + outputFile, 'w')
output.write("*****************RUN STARTS HERE!*****************")
#start time
startTime = time.clock()
output.write("\n" + "Filename: " + FASTAFile + "\n")
output.write("Program Start: {:%Y-%m-%d %H:%M:%S}".format(datetime.datetime.now()) + "\n")
output.write("Sample Size: " + str(sampleSize) + "\n\n")
# Import fasta alignment file
myAlignment = AlignIO.read(FASTAFile, "fasta")
# Create a tip mapping from the fasta file
tipMapping = {}
for record in myAlignment:
tipMapping[record.id] = str(record.seq)
# Compute a distance matrix and construct tree
calculator = DistanceCalculator("identity")
myMatrix = calculator.get_distance(myAlignment)
constructor = DistanceTreeConstructor()
upgmaTree = constructor.upgma(myMatrix)
# Convert phyloxml tree to newick
# biopython does not provide a function to do this so it was necessary
# to write to a buffer in newick to convert then get rid of unneeded info
for clade in upgmaTree.get_terminals():
clade.name = "\"" + clade.name + "\""
buf = cStringIO.StringIO()
Phylo.write(upgmaTree, buf, 'newick', plain = True)
tree = buf.getvalue()
tree = re.sub(r'Inner\d*', '', tree)
tree = tree.replace(";", "")
tree = literal_eval(tree) #newick format
# RLR tree required for maxParsimony function
tree = NNI.NewicktoRLR(tree)
score = NNI.maxParsimony(tree, tipMapping)
# Perform NNI heuristic
loopCounter = 0
while True:
loopCounter += 1
output.write("Loop Iteration: " + str(loopCounter) + "\n")
output.write("Loop Start Time: {:%H:%M:%S}".format(datetime.datetime.now()) + "\n")
output.write("Current Tree\nScore: " + str(score) + "\nTree:\n" + str(tree) + "\n\n")
NNIs = NNI.allNNIs(tree)
if len(NNIs)-1 < sampleSize:
sampleSize = len(NNIs)-1
toScore = random.sample(NNIs, sampleSize)
scoredList = map(lambda x: (NNI.maxParsimony(x, tipMapping), x), toScore)
sortedlist = sorted(scoredList)
if sortedlist[0][0] < score:
score = sortedlist[0][0]
tree = sortedlist[0][1]
output.write("Found A More Parsimonious Tree!\n\n")
else:
break
output.write("No Neighbors With Better Scores Found\n\n")
output.write("Final Tree:\n" + str(tree) + "\nScore: " + str(score) + "\n\n")
endTime = (time.clock() - startTime)
output.write("Program End: " + str(endTime) + " seconds\n\n")
return
示例11: DistanceCalculator
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import upgma [as 别名]
# Creates the distance matrix
calculator = DistanceCalculator('ident')
dm_ape = calculator.get_distance(alignApe)
dm_hiv = calculator.get_distance(alignHIV)
# Jukes Cantor corrections
dm_ape_corrected = dm_ape
for d in dm_ape_corrected.matrix:
d[:] = [-3/4*np.log(1-4/3*x) for x in d]
dm_hiv_corrected = dm_hiv
for d in dm_hiv_corrected.matrix:
d[:] = [-3/4*np.log(1-4/3*x) for x in d]
# Constructs the tree using the upgma algorithm
constructor = DistanceTreeConstructor()
tree_ape = constructor.upgma(dm_ape)
tree_ape_corrected = constructor.upgma(dm_ape_corrected)
tree_hiv = constructor.upgma(dm_hiv)
tree_hiv_corrected = constructor.upgma(dm_hiv_corrected)
# Outputs the trees as a xml
Phylo.write(tree_ape, 'treeApe.xml', 'phyloxml')
Phylo.write(tree_ape_corrected, 'treeApe_corrected.xml', 'phyloxml')
Phylo.write(tree_hiv, 'treeHIV.xml', 'phyloxml')
Phylo.write(tree_hiv_corrected, 'treeHIV_corrected.xml', 'phyloxml')
示例12: len
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import upgma [as 别名]
## pad sequences so that they all have the same length
#for record in records:
# if len(record.seq) != maxlen:
# sequence = str(record.seq).ljust(maxlen, '.')
# record.seq = Seq.Seq(sequence)
#assert all(len(record.seq) == maxlen for record in records)
## write to temporary file and do alignment
#output_file = '{}_padded.fasta'.format(os.path.splitext(input_file)[0])
#with open(output_file, 'w') as f:
# SeqIO.write(records, f, 'fasta')
#alignment = AlignIO.read(output_file, "fasta")
#cline = ClustalwCommandline("clustalw2", infile=input_file)
#print(cline)
#print type(cline)
muscle_cline = MuscleCommandline(input=input_file)
stdout, stderr = muscle_cline()
alignment = AlignIO.read(StringIO(stdout), "fasta")
print(alignment)
#alignment = AlignIO.read('../data/ls_orchid.fasta', 'fasta')
#print alignment
calculator = DistanceCalculator('ident')
dm = calculator.get_distance(alignment)
constructor = DistanceTreeConstructor()
tree = constructor.upgma(dm)
Phylo.write(tree, 'phyloxml.xml', 'phyloxml')
示例13: compute_tree
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import upgma [as 别名]
def compute_tree(options, mat, names):
""" make upgma hierarchical clustering and write it as png and
graphviz dot
"""
# oops, convert to biopython matrix
matrix = []
for i in xrange(len(names)):
row = []
for j in xrange(i + 1):
# tree constructor writes 0-distances as 1s for some reason
# so we hack around here
val = float(mat[names[i]][names[j]])
if val == 0.:
val = 1e-10
elif val == 1.:
val = 1.1
row.append(val)
matrix.append(row)
dm = _DistanceMatrix(names, matrix)
# upgma tree
constructor = DistanceTreeConstructor()
tree = constructor.upgma(dm)
robust_makedirs(os.path.dirname(tree_path(options)))
Phylo.write(tree, tree_path(options), "newick")
# png tree -- note : doesn't work in toil
def f(x):
if "Inner" in str(x):
return ""
else:
return x
Phylo.draw_graphviz(tree, label_func = f, node_size=1000, node_shape="s", font_size=10)
pylab.savefig(tree_path(options).replace("newick", "png"))
# graphviz
# get networkx graph
nxgraph = Phylo.to_networkx(tree)
# make undirected
nxgraph = nx.Graph(nxgraph)
# push names to name labels
nxgraph = nx.convert_node_labels_to_integers(nxgraph, label_attribute="label")
for node_id in nxgraph.nodes():
node = nxgraph.node[node_id]
if "Inner" in str(node["label"]):
node["label"] = "\"\""
node["width"] = 0.001
node["height"] = 0.001
else:
node["fontsize"] = 18
for edge_id in nxgraph.edges():
edge = nxgraph.edge[edge_id[0]][edge_id[1]]
# in graphviz, weight means something else, so make it a label
weight = float(edge["weight"])
# undo hack from above
if weight > 1:
weight = 1.
if weight <= 1e-10 or weight == 1.:
weight = 0.
edge["weight"] = None
edge["label"] = "{0:.3g}".format(float(weight) * 100.)
edge["fontsize"] = 14
edge["len"] = draw_len(weight)
nx.write_dot(nxgraph, tree_path(options).replace("newick", "dot"))
示例14: NNIheuristic
# 需要导入模块: from Bio.Phylo.TreeConstruction import DistanceTreeConstructor [as 别名]
# 或者: from Bio.Phylo.TreeConstruction.DistanceTreeConstructor import upgma [as 别名]
def NNIheuristic(FASTAFile, sampleSize, threshold, outputDir):
""""Find the maximum parsimony score for that tree"""
random.seed(0)
outputFile = FASTAFile.replace(".align", ".out")
if "/" in outputFile:
outputFile = outputFile[outputFile.rfind("/"):]
output = open(outputDir + "/" + outputFile, 'w')
output.write("*****************RUN STARTS HERE!*****************")
#start time
startTime = time.clock()
output.write("\n" + "Filename: " + FASTAFile + "\n")
output.write("Program Start: {:%Y-%m-%d %H:%M:%S}".format(datetime.datetime.now()) + "\n")
output.write("Sample Size: " + str(sampleSize) + "\nThreshold: " + str(threshold) + "\n\n")
# Import fasta alignment file
myAlignment = AlignIO.read(FASTAFile, "fasta")
# Create a tip mapping from the fasta file
tipMapping = {}
for record in myAlignment:
tipMapping[record.id] = str(record.seq)
# Compute a distance matrix and construct tree
calculator = DistanceCalculator("identity")
myMatrix = calculator.get_distance(myAlignment)
output.write("matrix constructed here")
constructor = DistanceTreeConstructor()
upgmaTree = constructor.upgma(myMatrix)
output.write("constructed upgma tree")
# Convert phyloxml tree to newick
# biopython does not provide a function to do this so it was necessary
# to write to a buffer in newick to convert then get rid of unneeded info
for clade in upgmaTree.get_terminals():
clade.name = "\"" + clade.name + "\""
buf = cStringIO.StringIO()
Phylo.write(upgmaTree, buf, 'newick', plain = True)
tree = buf.getvalue()
tree = re.sub(r'Inner\d*', '', tree)
tree = tree.replace(";", "")
tree = literal_eval(tree) #newick format
output.write("created the original tree into newick format")
# RLR tree required for maxParsimony function
tree = NewicktoRLR(tree)
score = maxParsimony(tree, tipMapping)
graph = nx.Graph()
makeGraph(graph, tree)
output.write("made a graph")
leaves = getLeaves(tree)
currentFeasible = isFeasible(graph,leaves)
output.write("tested isFeasible")
# Perform NNI heuristic
counter = 0
loopCounter = 0
while True:
output.write("in the while loop")
loopCounter += 1
output.write("Loop Iteration: " + str(loopCounter) + "\n")
output.write("Loop Start Time: {:%H:%M:%S}".format(datetime.datetime.now()) + "\n")
output.write("Current Tree\nFeasibility: " + str(currentFeasible) + "\nScore: " + str(score) + "\nTree:\n" + str(tree) + "\n\n")
NNIs = allNNIs(tree)
if len(NNIs)-1 < sampleSize:
sampleSize = len(NNIs)-1
toScore = random.sample(NNIs, sampleSize)
# add feasibility test
output.write("starting feasibility test")
feasible = []
infeasible = []
for tree in toScore:
graph = nx.Graph()
makeGraph(graph, tree)
leaves = getLeaves(tree)
if isFeasible(graph, leaves): #if this tree is possible
feasible.append(tree)
else:
infeasible.append(tree) #if this tree is not possible
output.write("Number of Feasible Neighbor Trees: " + str(len(feasible)) + "\n")
output.write("Number of Infeasible Neighbor Trees: " + str(len(infeasible)) + "\n")
if len(feasible) != 0: #if feasible trees were found
if isFeasible(graph, leaves): #if this NNI is possible
feasible.append(tree)
else:
infeasible.append(tree) #if this NNI is not possible
if len(feasible) != 0: #if feasible NNIs were found
scoredList = map(lambda x: (maxParsimony(x, tipMapping), x), feasible)
sortedList = sorted(scoredList)
counter = 0
if not currentFeasible or sortedList[0][0] < score:
score = sortedList[0][0]
tree = sortedList[0][1]
currentFeasible = True
output.write("Found a New Feasible Tree!\n\n")
else:
output.write("Best Possible Feasible Tree Found\n" + str(tree) + "\n" + "Score: " + str(score) + "\n\n")
break
else: #if no possible trees we're found
#.........这里部分代码省略.........