本文整理汇总了Python中Bio.Phylo.read方法的典型用法代码示例。如果您正苦于以下问题:Python Phylo.read方法的具体用法?Python Phylo.read怎么用?Python Phylo.read使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.Phylo
的用法示例。
在下文中一共展示了Phylo.read方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_filenames_strains
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def get_filenames_strains(self, file_path_template_newick_tree):
"""
Get list of file names of simulated genomes by reading template newick tree
@attention: 'ancestor' is assumed to be part of tree as original sequence and will not be included
@param file_path_template_newick_tree: File path to newick file
@type file_path_template_newick_tree: str | unicode
@return: list of file names of simulated genomes
@rtype: list[str|unicode]
"""
assert self.validate_file(file_path_template_newick_tree)
list_of_filenames_strains = []
tree = Phylo.read(file_path_template_newick_tree, 'newick')
for leaf in tree.get_terminals():
prefix = leaf.name
if prefix.lower() == "ancestor":
continue
list_of_filenames_strains.append("{prefix}.fasta".format(prefix=prefix))
return list_of_filenames_strains
示例2: save_tree_with_new_names
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def save_tree_with_new_names(tree_in, tree_out, name_subs):
print()
print('Writing Microreact tree to: {}'.format(tree_out))
tree_format = None
for try_tree_format in ['newick', 'nexus', 'nexml', 'phyloxml', 'cdao']:
try:
Phylo.read(tree_in, try_tree_format)
tree_format = try_tree_format
break
except ValueError:
pass
if tree_format is None:
sys.exit('Error: could not read input tree')
tree = Phylo.read(tree_in, tree_format)
for node in tree.get_terminals():
name = str(node.name)
try:
node.name = name_subs[name]
except IndexError:
sys.exit('Error: sample name in tree not in Kleborate data: ' + name)
Phylo.write(tree, tree_out, 'newick')
示例3: simulate_strains
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def simulate_strains(
self, meta_table, genome_id_to_amounts, genome_id_to_file_path_genome, genome_id_to_file_path_gff=None):
"""
Uses sgEvolver to generate strain-level diversity around an isolate assembly
and add randomly picked strains to genome_id_to_file_path_genome and metadata table.
@attention genome_id_to_file_path_genome: Will be extended with IDs and file paths to the strains
@param meta_table: Metadata table containing genome information
@type meta_table: MetadataTable
@param genome_id_to_amounts: Mapping from genome id to the amount of strains
@type genome_id_to_amounts: dict[str, int]
@param genome_id_to_file_path_genome: Mapping from genome id to the file path of the genome
@type genome_id_to_file_path_genome: dict[str, str]
@param genome_id_to_file_path_gff: Mapping from genome id to the file path of the gene annotations of a genome
@type genome_id_to_file_path_gff: dict[str, str]
@return: Nothing
@rtype: None
"""
assert isinstance(meta_table, MetadataTable)
assert isinstance(genome_id_to_amounts, dict)
assert isinstance(genome_id_to_file_path_genome, dict)
assert genome_id_to_file_path_gff is None or isinstance(genome_id_to_file_path_gff, dict)
if genome_id_to_file_path_gff is None:
msg = "No gff file (gene annotation) was given. Simulating strains without such a file can break genes."
self._logger.warning(msg)
for file_path in genome_id_to_file_path_genome.values():
self.validate_file(file_path)
if genome_id_to_file_path_gff is not None:
for file_path in genome_id_to_file_path_gff.values():
self.validate_file(file_path)
self._simulate_strains(genome_id_to_amounts, genome_id_to_file_path_genome, genome_id_to_file_path_gff)
self._pick_random_strains(meta_table, genome_id_to_amounts, genome_id_to_file_path_genome)
# read file and generate strain diversity for each assembly
# then subsample the strains
示例4: get_root_sequence
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def get_root_sequence(root_node, ref=None, translations=None):
'''
create a json structure that contains the sequence of the root, both as
nucleotide and as translations. This allows look-up of the sequence for
all states, including those that are not variable.
Parameters
----------
root_node : dict
data associated with the node
ref : str, optional
filename of the root sequence
translations : str, optional
file name of translations
Returns
-------
dict
dict of nucleotide sequence and translations
'''
root_sequence = {}
if ref and translations:
from Bio import SeqIO
refseq = SeqIO.read(ref, 'fasta')
root_sequence['nuc']=str(refseq.seq)
for gene in SeqIO.parse(translations, 'fasta'):
root_sequence[gene.id] = str(gene.seq)
else:
root_sequence["nuc"] = root_node["sequence"]
root_sequence.update(root_node["aa_sequences"])
return root_sequence
示例5: set_description
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def set_description(data_json, cmd_line_description_file):
"""
Read Markdown file provided by *cmd_line_description_file* and set
`meta.description` in *data_json* to the text provided.
"""
try:
with open(cmd_line_description_file, encoding='utf-8') as description_file:
markdown_text = description_file.read()
data_json['meta']['description'] = markdown_text
except FileNotFoundError:
fatal("Provided desciption file {} does not exist".format(cmd_line_description_file))
示例6: load_alignments
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def load_alignments(sequence_files, gene_names):
from Bio import AlignIO
alignments = {}
for fname, gene in zip(sequence_files, gene_names):
alignments[gene] = AlignIO.read(fname, 'fasta')
return alignments
示例7: run
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def run(args):
from .titer_model import TreeModel
T = Phylo.read(args.tree, 'newick')
TM_tree = TreeModel(T, args.titers)
try:
TM_tree.prepare()
TM_tree.train()
tree_model = {'titers':TM_tree.compile_titers(),
'potency':TM_tree.compile_potencies(),
'avidity':TM_tree.compile_virus_effects(),
'nodes':{n.name:{"dTiter": n.dTiter, "cTiter":n.cTiter}
for n in T.find_clades()}}
except InsufficientDataException:
print("Unable to train tree model.", file=sys.stderr)
if args.allow_empty_model:
print("Writing empty model.", file=sys.stderr)
tree_model = {'titers':TM_tree.compile_titers(),
'potency':{},
'avidity':{},
'nodes':{n.name:{"dTiter": n.dTiter, "cTiter":n.cTiter}
for n in T.find_clades()}}
else:
print("Exiting.")
sys.exit(1)
# export the tree model
write_json(tree_model, args.output)
print("\nInferred titer model of type 'TreeModel' using augur:"
"\n\tNeher et al. Prediction, dynamics, and visualization of antigenic phenotypes of seasonal influenza viruses."
"\n\tPNAS, vol 113, 10.1073/pnas.1525578113\n")
print("results written to", args.output)
示例8: draw_tree
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def draw_tree(input_number: int, output_dir: str, tag: str) -> str:
""" Construct a PNG for display via fasttree
Returns:
the filename of the image generated
"""
matplotlib.use('Agg')
command = ["fasttree", "-quiet", "-fastest", "-noml", "trimmed_alignment%d.fasta" % input_number]
run_result = subprocessing.execute(command)
if not run_result.successful():
raise RuntimeError("Fasttree failed to run successfully:", run_result.stderr)
handle = StringIO(run_result.stdout)
tree_filename = os.path.join(output_dir, tag + '.png')
try:
tree = Phylo.read(handle, 'newick')
except NewickError:
logging.debug('Invalid newick tree for %r', tag)
return ''
# enforce a minimum distance between branches
max_size = max(tree.distance(node) for node in tree.get_terminals())
for clade in tree.get_nonterminals() + tree.get_terminals():
if not clade.branch_length:
clade.branch_length = max_size / 20
else:
clade.branch_length = abs(clade.branch_length) + max_size / 20
# change the colour of the query gene
label_colors = {tag: 'green'}
Phylo.draw(tree, do_show=False, label_colors=label_colors,
label_func=lambda node: str(node).replace("|", " "))
fig = matplotlib.pyplot.gcf()
fig.set_size_inches(20, (tree.count_terminals() / 3))
matplotlib.pyplot.axis('off')
fig.savefig(os.path.join(output_dir, tag + '.png'), bbox_inches='tight')
matplotlib.pyplot.close(fig)
return tree_filename
示例9: build_newick_fasttree
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def build_newick_fasttree(aln_fname, nuc=True):
import os
from Bio import Phylo
print("Building tree with fasttree")
tree_cmd = ["fasttree"]
if nuc: tree_cmd.append("-nt")
tree_cmd.extend([aln_fname,"1>","tmp.nwk", "2>", "fasttree_stderr"])
os.system(" ".join(tree_cmd))
return Phylo.read("tmp.nwk", 'newick')
示例10: build_newick_raxml
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def build_newick_raxml(aln_fname, nthreads=2, raxml_bin="raxml", **kwargs):
import shutil,os
print("Building tree with raxml")
from Bio import Phylo, AlignIO
AlignIO.write(AlignIO.read(aln_fname, 'fasta'),"temp.phyx", "phylip-relaxed")
cmd = raxml_bin + " -f d -T " + str(nthreads) + " -m GTRCAT -c 25 -p 235813 -n tre -s temp.phyx"
os.system(cmd)
return Phylo.read('RAxML_bestTree.tre', "newick")
示例11: tree
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def tree(self, in_tree):
'''
assigns a tree to the internal self._tree variable. The tree is either
loaded from file (if in_tree is str) or assigned (if in_tree is a Phylo.tree)
'''
from os.path import isfile
self._tree = None
if isinstance(in_tree, Phylo.BaseTree.Tree):
self._tree = in_tree
elif type(in_tree) in string_types and isfile(in_tree):
try:
self._tree=Phylo.read(in_tree, 'newick')
except:
fmt = in_tree.split('.')[-1]
if fmt in ['nexus', 'nex']:
self._tree=Phylo.read(in_tree, 'nexus')
else:
raise ValueError('TreeAnc: could not load tree, format needs to be nexus or newick! input was '+str(in_tree))
else:
raise ValueError('TreeAnc: could not load tree! input was '+str(in_tree))
if self._tree.count_terminals()<3:
raise ValueError('TreeAnc: tree in %s as only %d tips. Please check your tree!'%(str(in_tree), self._tree.count_terminals()))
# remove all existing sequence attributes
for node in self._tree.find_clades():
node.branch_length = node.branch_length if node.branch_length else 0.0
if hasattr(node, "_cseq"):
node.__delattr__("_cseq")
node.original_length = node.branch_length
node.mutation_length = node.branch_length
self.prepare_tree()
if self.data:
self._check_alignment_tree_gtr_consistency()
return ttconf.SUCCESS
示例12: prepare_tree
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def prepare_tree(self):
"""
Set link to parent and calculate distance to root for all tree nodes.
Should be run once the tree is read and after every rerooting,
topology change or branch length optimizations.
"""
self.sequence_reconstruction = False
self.tree.root.branch_length = 0.001
self.tree.root.mutation_length = self.tree.root.branch_length
self.tree.ladderize()
self._prepare_nodes()
self._leaves_lookup = {node.name:node for node in self.tree.get_terminals()}
示例13: infer_branch_associations
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def infer_branch_associations(path, metainfo_fpath, meta_data_config,
total_strains_count, strain_fraction_branch_association):
from sf_geneCluster_align_makeTree import load_sorted_clusters
from sf_coreTree_json import metadata_load
data_description = meta_data_config
association_dict = defaultdict(dict)
metadata = Metadata(metainfo_fpath, data_description)
metadata_dict = metadata.to_dict()
sorted_genelist = load_sorted_clusters(path)
## sorted_genelist: [(clusterID, [ count_strains,[memb1,...],count_genes]),...]
for clusterID, gene in sorted_genelist:
if gene[-1]>=total_strains_count*strain_fraction_branch_association: # and clusterID=='GC00001136':
print(clusterID)
tree = Phylo.read("%s/geneCluster/%s.nwk"%(path, clusterID), 'newick')
assoc = BranchAssociation(tree, metadata_dict)
for col, d in metadata.data_description.iterrows():
if d['associate']=='yes':
if 'log_scale' in d and d['log_scale']=='yes':
t = lambda x:np.log(x)
else:
t = lambda x:x
assoc.calc_up_down_averages(d["meta_category"], transform = t)
max_assoc = assoc.calc_significance()
association_dict[clusterID][d["meta_category"]] = max_assoc
write_pickle("%s/branch_association.cpk"%path, association_dict)
示例14: infer_presence_absence_associations
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def infer_presence_absence_associations(path, metainfo_fpath, meta_data_config,
total_strains_count, min_strain_fraction_association, max_strain_fraction_association):
from sf_geneCluster_align_makeTree import load_sorted_clusters
from sf_coreTree_json import metadata_load
data_description = meta_data_config
association_dict = defaultdict(dict)
metadata = Metadata(metainfo_fpath, data_description)
metadata_dict = metadata.to_dict()
min_strains_association = total_strains_count*min_strain_fraction_association
max_strains_association = total_strains_count*max_strain_fraction_association
sorted_genelist = load_sorted_clusters(path)
## sorted_genelist: [(clusterID, [ count_strains,[memb1,...],count_genes]),...]
# TODO fix vis
tree = Phylo.read("%sgeneCluster/strain_tree.nwk"%(path), 'newick')
assoc = PresenceAbsenceAssociation(tree, metadata_dict)
for clusterID, gene in sorted_genelist:
if gene[-1]>min_strains_association and gene[-1]<max_strains_association:
print(clusterID)
gl = load_gain_loss(path, clusterID)
for col, d in metadata.data_description.iterrows():
if d['associate']=='yes':
if 'log_scale' in d and d['log_scale']=='yes':
t = lambda x:np.log(x)
else:
t = lambda x:x
assoc.set_gain_loss(gl)
score = assoc.calc_association_simple(d["meta_category"], transform = t)
if np.isinf(score):
association_dict[clusterID][d["meta_category"]] = 0.0
else:
association_dict[clusterID][d["meta_category"]] = np.abs(score)
write_pickle("%s/presence_absence_association.cpk"%path, association_dict)
示例15: explore_paralogs
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def explore_paralogs(path, nstrains, paralog_branch_cutoff, paralog_frac_cutoff=0.3, plot=0):
'''
gather paralog statistics for all trees and plot if desired
parameters:
paralog_branch_cutoff -- cutoff used to determined whether or not to split cluster.
paralog_frac_cutoff -- cutoff for paralog splitting as fraction of total strains.
(default 0.3 -- that is 30%)
'''
cluster_seqs_path=path+'geneCluster/'
fname_list =glob.iglob(cluster_seqs_path+'*nwk')
paralog_stat = []
for fi,fname in enumerate(fname_list):
try:
tree = Phylo.read(fname, 'newick')
except:
print '(explore_paralogs) read tree problem: ', fname
best_split = find_best_split(tree)
if best_split is not None:
paralog_stat.append([fname, best_split.split_bl, len(best_split.para_nodes)])
with open(cluster_seqs_path+'paralogy_statistics.txt','wb') as paralogy_statistics:
for x,y,z in paralog_stat:
paralogy_statistics.write('%s\t%s\t%s\n'%(x.split('/')[-1],y,z))
def plot_paralogs(path):
'''
plot branch length against # of paralogs across trees
'''
import matplotlib.pyplot as plt
plt.ion()
plt.figure()
plt.scatter([x[1] for x in paralog_stat], [x[2] for x in paralog_stat])
plt.ylabel('# paralogs')
plt.xlabel('branch length')
plt.savefig(path+'explore_paralogs.pdf')
if plot: plot_paralogs(cluster_seqs_path)
#return paralog_split_list