当前位置: 首页>>代码示例>>Python>>正文


Python Phylo.read方法代码示例

本文整理汇总了Python中Bio.Phylo.read方法的典型用法代码示例。如果您正苦于以下问题:Python Phylo.read方法的具体用法?Python Phylo.read怎么用?Python Phylo.read使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Bio.Phylo的用法示例。


在下文中一共展示了Phylo.read方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_filenames_strains

# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def get_filenames_strains(self, file_path_template_newick_tree):
		"""
		Get list of file names of simulated genomes by reading template newick tree

		@attention: 'ancestor' is assumed to be part of tree as original sequence and will not be included

		@param file_path_template_newick_tree: File path to newick file
		@type file_path_template_newick_tree: str | unicode

		@return: list of file names of simulated genomes
		@rtype: list[str|unicode]
		"""
		assert self.validate_file(file_path_template_newick_tree)
		list_of_filenames_strains = []
		tree = Phylo.read(file_path_template_newick_tree, 'newick')
		for leaf in tree.get_terminals():
			prefix = leaf.name
			if prefix.lower() == "ancestor":
				continue
			list_of_filenames_strains.append("{prefix}.fasta".format(prefix=prefix))
		return list_of_filenames_strains 
开发者ID:CAMI-challenge,项目名称:CAMISIM,代码行数:23,代码来源:strainsimulationwrapper.py

示例2: save_tree_with_new_names

# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def save_tree_with_new_names(tree_in, tree_out, name_subs):
    print()
    print('Writing Microreact tree to: {}'.format(tree_out))
    tree_format = None
    for try_tree_format in ['newick', 'nexus', 'nexml', 'phyloxml', 'cdao']:
        try:
            Phylo.read(tree_in, try_tree_format)
            tree_format = try_tree_format
            break
        except ValueError:
            pass
    if tree_format is None:
        sys.exit('Error: could not read input tree')

    tree = Phylo.read(tree_in, tree_format)
    for node in tree.get_terminals():
        name = str(node.name)
        try:
            node.name = name_subs[name]
        except IndexError:
            sys.exit('Error: sample name in tree not in Kleborate data: ' + name)
    Phylo.write(tree, tree_out, 'newick') 
开发者ID:katholt,项目名称:Kleborate,代码行数:24,代码来源:kleborate_to_microreact.py

示例3: simulate_strains

# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def simulate_strains(
		self, meta_table, genome_id_to_amounts, genome_id_to_file_path_genome, genome_id_to_file_path_gff=None):
		"""
		Uses sgEvolver to generate strain-level diversity around an isolate assembly
		and add randomly picked strains to genome_id_to_file_path_genome and metadata table.

		@attention genome_id_to_file_path_genome: Will be extended with IDs and file paths to the strains

		@param meta_table: Metadata table containing genome information
		@type meta_table: MetadataTable
		@param genome_id_to_amounts: Mapping from genome id to the amount of strains
		@type genome_id_to_amounts: dict[str, int]
		@param genome_id_to_file_path_genome: Mapping from genome id to the file path of the genome
		@type genome_id_to_file_path_genome: dict[str, str]
		@param genome_id_to_file_path_gff: Mapping from genome id to the file path of the gene annotations of a genome
		@type genome_id_to_file_path_gff: dict[str, str]

		@return: Nothing
		@rtype: None
		"""
		assert isinstance(meta_table, MetadataTable)
		assert isinstance(genome_id_to_amounts, dict)
		assert isinstance(genome_id_to_file_path_genome, dict)
		assert genome_id_to_file_path_gff is None or isinstance(genome_id_to_file_path_gff, dict)
		if genome_id_to_file_path_gff is None:
			msg = "No gff file (gene annotation) was given. Simulating strains without such a file can break genes."
			self._logger.warning(msg)
		for file_path in genome_id_to_file_path_genome.values():
			self.validate_file(file_path)
		if genome_id_to_file_path_gff is not None:
			for file_path in genome_id_to_file_path_gff.values():
				self.validate_file(file_path)
		self._simulate_strains(genome_id_to_amounts, genome_id_to_file_path_genome, genome_id_to_file_path_gff)
		self._pick_random_strains(meta_table, genome_id_to_amounts, genome_id_to_file_path_genome)

		# read file and generate strain diversity for each assembly
		# then subsample the strains 
开发者ID:CAMI-challenge,项目名称:CAMISIM,代码行数:39,代码来源:strainsimulationwrapper.py

示例4: get_root_sequence

# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def get_root_sequence(root_node, ref=None, translations=None):
    '''
    create a json structure that contains the sequence of the root, both as
    nucleotide and as translations. This allows look-up of the sequence for
    all states, including those that are not variable.

    Parameters
    ----------
    root_node : dict
    	data associated with the node
    ref : str, optional
        filename of the root sequence
    translations : str, optional
        file name of translations

    Returns
    -------
    dict
        dict of nucleotide sequence and translations
    '''
    root_sequence = {}
    if ref and translations:
        from Bio import SeqIO
        refseq = SeqIO.read(ref, 'fasta')
        root_sequence['nuc']=str(refseq.seq)
        for gene in SeqIO.parse(translations, 'fasta'):
            root_sequence[gene.id] = str(gene.seq)
    else:
        root_sequence["nuc"] = root_node["sequence"]
        root_sequence.update(root_node["aa_sequences"])

    return root_sequence 
开发者ID:nextstrain,项目名称:augur,代码行数:34,代码来源:export_v1.py

示例5: set_description

# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def set_description(data_json, cmd_line_description_file):
    """
    Read Markdown file provided by *cmd_line_description_file* and set
    `meta.description` in *data_json* to the text provided.
    """
    try:
        with open(cmd_line_description_file, encoding='utf-8') as description_file:
            markdown_text = description_file.read()
            data_json['meta']['description'] = markdown_text
    except FileNotFoundError:
        fatal("Provided desciption file {} does not exist".format(cmd_line_description_file)) 
开发者ID:nextstrain,项目名称:augur,代码行数:13,代码来源:export_v2.py

示例6: load_alignments

# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def load_alignments(sequence_files, gene_names):
    from Bio import AlignIO
    alignments = {}
    for fname, gene in zip(sequence_files, gene_names):
        alignments[gene] = AlignIO.read(fname, 'fasta')
    return alignments 
开发者ID:nextstrain,项目名称:augur,代码行数:8,代码来源:reconstruct_sequences.py

示例7: run

# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def run(args):
        from .titer_model import TreeModel
        T = Phylo.read(args.tree, 'newick')
        TM_tree = TreeModel(T, args.titers)
        try:
            TM_tree.prepare()
            TM_tree.train()
            tree_model = {'titers':TM_tree.compile_titers(),
                          'potency':TM_tree.compile_potencies(),
                          'avidity':TM_tree.compile_virus_effects(),
                          'nodes':{n.name:{"dTiter": n.dTiter, "cTiter":n.cTiter}
                                      for n in T.find_clades()}}
        except InsufficientDataException:
            print("Unable to train tree model.", file=sys.stderr)
            if args.allow_empty_model:
                print("Writing empty model.", file=sys.stderr)
                tree_model = {'titers':TM_tree.compile_titers(),
                              'potency':{},
                              'avidity':{},
                              'nodes':{n.name:{"dTiter": n.dTiter, "cTiter":n.cTiter}
                                          for n in T.find_clades()}}
            else:
                print("Exiting.")
                sys.exit(1)

        # export the tree model
        write_json(tree_model, args.output)
        print("\nInferred titer model of type 'TreeModel' using augur:"
              "\n\tNeher et al. Prediction, dynamics, and visualization of antigenic phenotypes of seasonal influenza viruses."
              "\n\tPNAS, vol 113, 10.1073/pnas.1525578113\n")
        print("results written to", args.output) 
开发者ID:nextstrain,项目名称:augur,代码行数:33,代码来源:titers.py

示例8: draw_tree

# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def draw_tree(input_number: int, output_dir: str, tag: str) -> str:
    """ Construct a PNG for display via fasttree

        Returns:
            the filename of the image generated
    """
    matplotlib.use('Agg')
    command = ["fasttree", "-quiet", "-fastest", "-noml", "trimmed_alignment%d.fasta" % input_number]
    run_result = subprocessing.execute(command)
    if not run_result.successful():
        raise RuntimeError("Fasttree failed to run successfully:", run_result.stderr)

    handle = StringIO(run_result.stdout)
    tree_filename = os.path.join(output_dir, tag + '.png')
    try:
        tree = Phylo.read(handle, 'newick')
    except NewickError:
        logging.debug('Invalid newick tree for %r', tag)
        return ''

    # enforce a minimum distance between branches
    max_size = max(tree.distance(node) for node in tree.get_terminals())
    for clade in tree.get_nonterminals() + tree.get_terminals():
        if not clade.branch_length:
            clade.branch_length = max_size / 20
        else:
            clade.branch_length = abs(clade.branch_length) + max_size / 20
    # change the colour of the query gene
    label_colors = {tag: 'green'}

    Phylo.draw(tree, do_show=False, label_colors=label_colors,
               label_func=lambda node: str(node).replace("|", " "))
    fig = matplotlib.pyplot.gcf()
    fig.set_size_inches(20, (tree.count_terminals() / 3))
    matplotlib.pyplot.axis('off')
    fig.savefig(os.path.join(output_dir, tag + '.png'), bbox_inches='tight')
    matplotlib.pyplot.close(fig)
    return tree_filename 
开发者ID:antismash,项目名称:antismash,代码行数:40,代码来源:trees.py

示例9: build_newick_fasttree

# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def build_newick_fasttree(aln_fname, nuc=True):
    import os
    from Bio import Phylo
    print("Building tree with fasttree")
    tree_cmd = ["fasttree"]
    if nuc: tree_cmd.append("-nt")

    tree_cmd.extend([aln_fname,"1>","tmp.nwk", "2>", "fasttree_stderr"])
    os.system(" ".join(tree_cmd))
    return Phylo.read("tmp.nwk", 'newick') 
开发者ID:neherlab,项目名称:treetime,代码行数:12,代码来源:utils.py

示例10: build_newick_raxml

# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def build_newick_raxml(aln_fname, nthreads=2, raxml_bin="raxml", **kwargs):
    import shutil,os
    print("Building tree with raxml")
    from Bio import Phylo, AlignIO
    AlignIO.write(AlignIO.read(aln_fname, 'fasta'),"temp.phyx", "phylip-relaxed")
    cmd = raxml_bin + " -f d -T " + str(nthreads) + " -m GTRCAT -c 25 -p 235813 -n tre -s temp.phyx"
    os.system(cmd)
    return Phylo.read('RAxML_bestTree.tre', "newick") 
开发者ID:neherlab,项目名称:treetime,代码行数:10,代码来源:utils.py

示例11: tree

# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def tree(self, in_tree):
        '''
        assigns a tree to the internal self._tree variable. The tree is either
        loaded from file (if in_tree is str) or assigned (if in_tree is a Phylo.tree)
        '''
        from os.path import isfile
        self._tree = None
        if isinstance(in_tree, Phylo.BaseTree.Tree):
            self._tree = in_tree
        elif type(in_tree) in string_types and isfile(in_tree):
            try:
                self._tree=Phylo.read(in_tree, 'newick')
            except:
                fmt = in_tree.split('.')[-1]
                if fmt in ['nexus', 'nex']:
                    self._tree=Phylo.read(in_tree, 'nexus')
                else:
                    raise ValueError('TreeAnc: could not load tree, format needs to be nexus or newick! input was '+str(in_tree))
        else:
            raise ValueError('TreeAnc: could not load tree! input was '+str(in_tree))

        if self._tree.count_terminals()<3:
            raise ValueError('TreeAnc: tree in %s as only %d tips. Please check your tree!'%(str(in_tree), self._tree.count_terminals()))

        # remove all existing sequence attributes
        for node in self._tree.find_clades():
            node.branch_length = node.branch_length if node.branch_length else 0.0
            if hasattr(node, "_cseq"):
                node.__delattr__("_cseq")
            node.original_length = node.branch_length
            node.mutation_length = node.branch_length
        self.prepare_tree()

        if self.data:
            self._check_alignment_tree_gtr_consistency()

        return ttconf.SUCCESS 
开发者ID:neherlab,项目名称:treetime,代码行数:39,代码来源:treeanc.py

示例12: prepare_tree

# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def prepare_tree(self):
        """
        Set link to parent and calculate distance to root for all tree nodes.
        Should be run once the tree is read and after every rerooting,
        topology change or branch length optimizations.
        """
        self.sequence_reconstruction = False
        self.tree.root.branch_length = 0.001
        self.tree.root.mutation_length = self.tree.root.branch_length
        self.tree.ladderize()
        self._prepare_nodes()
        self._leaves_lookup = {node.name:node for node in self.tree.get_terminals()} 
开发者ID:neherlab,项目名称:treetime,代码行数:14,代码来源:treeanc.py

示例13: infer_branch_associations

# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def infer_branch_associations(path, metainfo_fpath, meta_data_config,
    total_strains_count, strain_fraction_branch_association):
    from sf_geneCluster_align_makeTree import load_sorted_clusters
    from sf_coreTree_json import metadata_load
    data_description = meta_data_config
    association_dict = defaultdict(dict)
    metadata = Metadata(metainfo_fpath, data_description)
    metadata_dict = metadata.to_dict()

    sorted_genelist = load_sorted_clusters(path)
    ## sorted_genelist: [(clusterID, [ count_strains,[memb1,...],count_genes]),...]
    for clusterID, gene in sorted_genelist:
        if gene[-1]>=total_strains_count*strain_fraction_branch_association: # and clusterID=='GC00001136':
            print(clusterID)
            tree = Phylo.read("%s/geneCluster/%s.nwk"%(path, clusterID), 'newick')
            assoc = BranchAssociation(tree, metadata_dict)
            for col, d  in metadata.data_description.iterrows():
                if d['associate']=='yes':
                    if 'log_scale' in d and d['log_scale']=='yes':
                        t = lambda x:np.log(x)
                    else:
                        t = lambda x:x
                    assoc.calc_up_down_averages(d["meta_category"], transform = t)
                    max_assoc = assoc.calc_significance()
                    association_dict[clusterID][d["meta_category"]] = max_assoc

    write_pickle("%s/branch_association.cpk"%path, association_dict) 
开发者ID:neherlab,项目名称:pan-genome-analysis,代码行数:29,代码来源:sf_association.py

示例14: infer_presence_absence_associations

# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def infer_presence_absence_associations(path, metainfo_fpath, meta_data_config,
    total_strains_count, min_strain_fraction_association, max_strain_fraction_association):
    from sf_geneCluster_align_makeTree import load_sorted_clusters
    from sf_coreTree_json import metadata_load
    data_description = meta_data_config
    association_dict = defaultdict(dict)
    metadata = Metadata(metainfo_fpath, data_description)
    metadata_dict = metadata.to_dict()
    min_strains_association = total_strains_count*min_strain_fraction_association
    max_strains_association = total_strains_count*max_strain_fraction_association
    sorted_genelist = load_sorted_clusters(path)
    ## sorted_genelist: [(clusterID, [ count_strains,[memb1,...],count_genes]),...]
    # TODO fix vis
    tree = Phylo.read("%sgeneCluster/strain_tree.nwk"%(path), 'newick')
    assoc = PresenceAbsenceAssociation(tree, metadata_dict)
    for clusterID, gene in sorted_genelist:
        if gene[-1]>min_strains_association and gene[-1]<max_strains_association:
            print(clusterID)
            gl = load_gain_loss(path, clusterID)
            for col, d  in metadata.data_description.iterrows():
                if d['associate']=='yes':
                    if 'log_scale' in d and d['log_scale']=='yes':
                        t = lambda x:np.log(x)
                    else:
                        t = lambda x:x
                    assoc.set_gain_loss(gl)
                    score = assoc.calc_association_simple(d["meta_category"], transform = t)
                    if np.isinf(score):
                        association_dict[clusterID][d["meta_category"]] = 0.0
                    else:
                        association_dict[clusterID][d["meta_category"]] = np.abs(score)

    write_pickle("%s/presence_absence_association.cpk"%path, association_dict) 
开发者ID:neherlab,项目名称:pan-genome-analysis,代码行数:35,代码来源:sf_association.py

示例15: explore_paralogs

# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import read [as 别名]
def explore_paralogs(path, nstrains, paralog_branch_cutoff, paralog_frac_cutoff=0.3, plot=0):
    '''
    gather paralog statistics for all trees and plot if desired
    parameters:
    paralog_branch_cutoff -- cutoff used to determined whether or not to split cluster.
    paralog_frac_cutoff  -- cutoff for paralog splitting as fraction of total strains.
                            (default 0.3 -- that is 30%)
    '''
    cluster_seqs_path=path+'geneCluster/'
    fname_list =glob.iglob(cluster_seqs_path+'*nwk')
    paralog_stat = []
    for fi,fname in enumerate(fname_list):
        try:
            tree = Phylo.read(fname, 'newick')
        except:
            print '(explore_paralogs) read tree problem: ', fname
        best_split = find_best_split(tree)
        if best_split is not None:
            paralog_stat.append([fname, best_split.split_bl, len(best_split.para_nodes)])
    with open(cluster_seqs_path+'paralogy_statistics.txt','wb') as paralogy_statistics:
        for x,y,z in paralog_stat:
            paralogy_statistics.write('%s\t%s\t%s\n'%(x.split('/')[-1],y,z))

    def plot_paralogs(path):
        '''
        plot branch length against # of paralogs across trees
        '''
        import matplotlib.pyplot as plt
        plt.ion()
        plt.figure()
        plt.scatter([x[1] for x in paralog_stat], [x[2] for x in paralog_stat])
        plt.ylabel('# paralogs')
        plt.xlabel('branch length')
        plt.savefig(path+'explore_paralogs.pdf')

    if plot: plot_paralogs(cluster_seqs_path)
    #return paralog_split_list 
开发者ID:neherlab,项目名称:pan-genome-analysis,代码行数:39,代码来源:sf_split_paralogy.py


注:本文中的Bio.Phylo.read方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。