當前位置: 首頁>>代碼示例>>Python>>正文


Python Phylo.read方法代碼示例

本文整理匯總了Python中Bio.Phylo.read方法的典型用法代碼示例。如果您正苦於以下問題:Python Phylo.read方法的具體用法?Python Phylo.read怎麽用?Python Phylo.read使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在Bio.Phylo的用法示例。


在下文中一共展示了Phylo.read方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: get_filenames_strains

# 需要導入模塊: from Bio import Phylo [as 別名]
# 或者: from Bio.Phylo import read [as 別名]
def get_filenames_strains(self, file_path_template_newick_tree):
		"""
		Get list of file names of simulated genomes by reading template newick tree

		@attention: 'ancestor' is assumed to be part of tree as original sequence and will not be included

		@param file_path_template_newick_tree: File path to newick file
		@type file_path_template_newick_tree: str | unicode

		@return: list of file names of simulated genomes
		@rtype: list[str|unicode]
		"""
		assert self.validate_file(file_path_template_newick_tree)
		list_of_filenames_strains = []
		tree = Phylo.read(file_path_template_newick_tree, 'newick')
		for leaf in tree.get_terminals():
			prefix = leaf.name
			if prefix.lower() == "ancestor":
				continue
			list_of_filenames_strains.append("{prefix}.fasta".format(prefix=prefix))
		return list_of_filenames_strains 
開發者ID:CAMI-challenge,項目名稱:CAMISIM,代碼行數:23,代碼來源:strainsimulationwrapper.py

示例2: save_tree_with_new_names

# 需要導入模塊: from Bio import Phylo [as 別名]
# 或者: from Bio.Phylo import read [as 別名]
def save_tree_with_new_names(tree_in, tree_out, name_subs):
    print()
    print('Writing Microreact tree to: {}'.format(tree_out))
    tree_format = None
    for try_tree_format in ['newick', 'nexus', 'nexml', 'phyloxml', 'cdao']:
        try:
            Phylo.read(tree_in, try_tree_format)
            tree_format = try_tree_format
            break
        except ValueError:
            pass
    if tree_format is None:
        sys.exit('Error: could not read input tree')

    tree = Phylo.read(tree_in, tree_format)
    for node in tree.get_terminals():
        name = str(node.name)
        try:
            node.name = name_subs[name]
        except IndexError:
            sys.exit('Error: sample name in tree not in Kleborate data: ' + name)
    Phylo.write(tree, tree_out, 'newick') 
開發者ID:katholt,項目名稱:Kleborate,代碼行數:24,代碼來源:kleborate_to_microreact.py

示例3: simulate_strains

# 需要導入模塊: from Bio import Phylo [as 別名]
# 或者: from Bio.Phylo import read [as 別名]
def simulate_strains(
		self, meta_table, genome_id_to_amounts, genome_id_to_file_path_genome, genome_id_to_file_path_gff=None):
		"""
		Uses sgEvolver to generate strain-level diversity around an isolate assembly
		and add randomly picked strains to genome_id_to_file_path_genome and metadata table.

		@attention genome_id_to_file_path_genome: Will be extended with IDs and file paths to the strains

		@param meta_table: Metadata table containing genome information
		@type meta_table: MetadataTable
		@param genome_id_to_amounts: Mapping from genome id to the amount of strains
		@type genome_id_to_amounts: dict[str, int]
		@param genome_id_to_file_path_genome: Mapping from genome id to the file path of the genome
		@type genome_id_to_file_path_genome: dict[str, str]
		@param genome_id_to_file_path_gff: Mapping from genome id to the file path of the gene annotations of a genome
		@type genome_id_to_file_path_gff: dict[str, str]

		@return: Nothing
		@rtype: None
		"""
		assert isinstance(meta_table, MetadataTable)
		assert isinstance(genome_id_to_amounts, dict)
		assert isinstance(genome_id_to_file_path_genome, dict)
		assert genome_id_to_file_path_gff is None or isinstance(genome_id_to_file_path_gff, dict)
		if genome_id_to_file_path_gff is None:
			msg = "No gff file (gene annotation) was given. Simulating strains without such a file can break genes."
			self._logger.warning(msg)
		for file_path in genome_id_to_file_path_genome.values():
			self.validate_file(file_path)
		if genome_id_to_file_path_gff is not None:
			for file_path in genome_id_to_file_path_gff.values():
				self.validate_file(file_path)
		self._simulate_strains(genome_id_to_amounts, genome_id_to_file_path_genome, genome_id_to_file_path_gff)
		self._pick_random_strains(meta_table, genome_id_to_amounts, genome_id_to_file_path_genome)

		# read file and generate strain diversity for each assembly
		# then subsample the strains 
開發者ID:CAMI-challenge,項目名稱:CAMISIM,代碼行數:39,代碼來源:strainsimulationwrapper.py

示例4: get_root_sequence

# 需要導入模塊: from Bio import Phylo [as 別名]
# 或者: from Bio.Phylo import read [as 別名]
def get_root_sequence(root_node, ref=None, translations=None):
    '''
    create a json structure that contains the sequence of the root, both as
    nucleotide and as translations. This allows look-up of the sequence for
    all states, including those that are not variable.

    Parameters
    ----------
    root_node : dict
    	data associated with the node
    ref : str, optional
        filename of the root sequence
    translations : str, optional
        file name of translations

    Returns
    -------
    dict
        dict of nucleotide sequence and translations
    '''
    root_sequence = {}
    if ref and translations:
        from Bio import SeqIO
        refseq = SeqIO.read(ref, 'fasta')
        root_sequence['nuc']=str(refseq.seq)
        for gene in SeqIO.parse(translations, 'fasta'):
            root_sequence[gene.id] = str(gene.seq)
    else:
        root_sequence["nuc"] = root_node["sequence"]
        root_sequence.update(root_node["aa_sequences"])

    return root_sequence 
開發者ID:nextstrain,項目名稱:augur,代碼行數:34,代碼來源:export_v1.py

示例5: set_description

# 需要導入模塊: from Bio import Phylo [as 別名]
# 或者: from Bio.Phylo import read [as 別名]
def set_description(data_json, cmd_line_description_file):
    """
    Read Markdown file provided by *cmd_line_description_file* and set
    `meta.description` in *data_json* to the text provided.
    """
    try:
        with open(cmd_line_description_file, encoding='utf-8') as description_file:
            markdown_text = description_file.read()
            data_json['meta']['description'] = markdown_text
    except FileNotFoundError:
        fatal("Provided desciption file {} does not exist".format(cmd_line_description_file)) 
開發者ID:nextstrain,項目名稱:augur,代碼行數:13,代碼來源:export_v2.py

示例6: load_alignments

# 需要導入模塊: from Bio import Phylo [as 別名]
# 或者: from Bio.Phylo import read [as 別名]
def load_alignments(sequence_files, gene_names):
    from Bio import AlignIO
    alignments = {}
    for fname, gene in zip(sequence_files, gene_names):
        alignments[gene] = AlignIO.read(fname, 'fasta')
    return alignments 
開發者ID:nextstrain,項目名稱:augur,代碼行數:8,代碼來源:reconstruct_sequences.py

示例7: run

# 需要導入模塊: from Bio import Phylo [as 別名]
# 或者: from Bio.Phylo import read [as 別名]
def run(args):
        from .titer_model import TreeModel
        T = Phylo.read(args.tree, 'newick')
        TM_tree = TreeModel(T, args.titers)
        try:
            TM_tree.prepare()
            TM_tree.train()
            tree_model = {'titers':TM_tree.compile_titers(),
                          'potency':TM_tree.compile_potencies(),
                          'avidity':TM_tree.compile_virus_effects(),
                          'nodes':{n.name:{"dTiter": n.dTiter, "cTiter":n.cTiter}
                                      for n in T.find_clades()}}
        except InsufficientDataException:
            print("Unable to train tree model.", file=sys.stderr)
            if args.allow_empty_model:
                print("Writing empty model.", file=sys.stderr)
                tree_model = {'titers':TM_tree.compile_titers(),
                              'potency':{},
                              'avidity':{},
                              'nodes':{n.name:{"dTiter": n.dTiter, "cTiter":n.cTiter}
                                          for n in T.find_clades()}}
            else:
                print("Exiting.")
                sys.exit(1)

        # export the tree model
        write_json(tree_model, args.output)
        print("\nInferred titer model of type 'TreeModel' using augur:"
              "\n\tNeher et al. Prediction, dynamics, and visualization of antigenic phenotypes of seasonal influenza viruses."
              "\n\tPNAS, vol 113, 10.1073/pnas.1525578113\n")
        print("results written to", args.output) 
開發者ID:nextstrain,項目名稱:augur,代碼行數:33,代碼來源:titers.py

示例8: draw_tree

# 需要導入模塊: from Bio import Phylo [as 別名]
# 或者: from Bio.Phylo import read [as 別名]
def draw_tree(input_number: int, output_dir: str, tag: str) -> str:
    """ Construct a PNG for display via fasttree

        Returns:
            the filename of the image generated
    """
    matplotlib.use('Agg')
    command = ["fasttree", "-quiet", "-fastest", "-noml", "trimmed_alignment%d.fasta" % input_number]
    run_result = subprocessing.execute(command)
    if not run_result.successful():
        raise RuntimeError("Fasttree failed to run successfully:", run_result.stderr)

    handle = StringIO(run_result.stdout)
    tree_filename = os.path.join(output_dir, tag + '.png')
    try:
        tree = Phylo.read(handle, 'newick')
    except NewickError:
        logging.debug('Invalid newick tree for %r', tag)
        return ''

    # enforce a minimum distance between branches
    max_size = max(tree.distance(node) for node in tree.get_terminals())
    for clade in tree.get_nonterminals() + tree.get_terminals():
        if not clade.branch_length:
            clade.branch_length = max_size / 20
        else:
            clade.branch_length = abs(clade.branch_length) + max_size / 20
    # change the colour of the query gene
    label_colors = {tag: 'green'}

    Phylo.draw(tree, do_show=False, label_colors=label_colors,
               label_func=lambda node: str(node).replace("|", " "))
    fig = matplotlib.pyplot.gcf()
    fig.set_size_inches(20, (tree.count_terminals() / 3))
    matplotlib.pyplot.axis('off')
    fig.savefig(os.path.join(output_dir, tag + '.png'), bbox_inches='tight')
    matplotlib.pyplot.close(fig)
    return tree_filename 
開發者ID:antismash,項目名稱:antismash,代碼行數:40,代碼來源:trees.py

示例9: build_newick_fasttree

# 需要導入模塊: from Bio import Phylo [as 別名]
# 或者: from Bio.Phylo import read [as 別名]
def build_newick_fasttree(aln_fname, nuc=True):
    import os
    from Bio import Phylo
    print("Building tree with fasttree")
    tree_cmd = ["fasttree"]
    if nuc: tree_cmd.append("-nt")

    tree_cmd.extend([aln_fname,"1>","tmp.nwk", "2>", "fasttree_stderr"])
    os.system(" ".join(tree_cmd))
    return Phylo.read("tmp.nwk", 'newick') 
開發者ID:neherlab,項目名稱:treetime,代碼行數:12,代碼來源:utils.py

示例10: build_newick_raxml

# 需要導入模塊: from Bio import Phylo [as 別名]
# 或者: from Bio.Phylo import read [as 別名]
def build_newick_raxml(aln_fname, nthreads=2, raxml_bin="raxml", **kwargs):
    import shutil,os
    print("Building tree with raxml")
    from Bio import Phylo, AlignIO
    AlignIO.write(AlignIO.read(aln_fname, 'fasta'),"temp.phyx", "phylip-relaxed")
    cmd = raxml_bin + " -f d -T " + str(nthreads) + " -m GTRCAT -c 25 -p 235813 -n tre -s temp.phyx"
    os.system(cmd)
    return Phylo.read('RAxML_bestTree.tre', "newick") 
開發者ID:neherlab,項目名稱:treetime,代碼行數:10,代碼來源:utils.py

示例11: tree

# 需要導入模塊: from Bio import Phylo [as 別名]
# 或者: from Bio.Phylo import read [as 別名]
def tree(self, in_tree):
        '''
        assigns a tree to the internal self._tree variable. The tree is either
        loaded from file (if in_tree is str) or assigned (if in_tree is a Phylo.tree)
        '''
        from os.path import isfile
        self._tree = None
        if isinstance(in_tree, Phylo.BaseTree.Tree):
            self._tree = in_tree
        elif type(in_tree) in string_types and isfile(in_tree):
            try:
                self._tree=Phylo.read(in_tree, 'newick')
            except:
                fmt = in_tree.split('.')[-1]
                if fmt in ['nexus', 'nex']:
                    self._tree=Phylo.read(in_tree, 'nexus')
                else:
                    raise ValueError('TreeAnc: could not load tree, format needs to be nexus or newick! input was '+str(in_tree))
        else:
            raise ValueError('TreeAnc: could not load tree! input was '+str(in_tree))

        if self._tree.count_terminals()<3:
            raise ValueError('TreeAnc: tree in %s as only %d tips. Please check your tree!'%(str(in_tree), self._tree.count_terminals()))

        # remove all existing sequence attributes
        for node in self._tree.find_clades():
            node.branch_length = node.branch_length if node.branch_length else 0.0
            if hasattr(node, "_cseq"):
                node.__delattr__("_cseq")
            node.original_length = node.branch_length
            node.mutation_length = node.branch_length
        self.prepare_tree()

        if self.data:
            self._check_alignment_tree_gtr_consistency()

        return ttconf.SUCCESS 
開發者ID:neherlab,項目名稱:treetime,代碼行數:39,代碼來源:treeanc.py

示例12: prepare_tree

# 需要導入模塊: from Bio import Phylo [as 別名]
# 或者: from Bio.Phylo import read [as 別名]
def prepare_tree(self):
        """
        Set link to parent and calculate distance to root for all tree nodes.
        Should be run once the tree is read and after every rerooting,
        topology change or branch length optimizations.
        """
        self.sequence_reconstruction = False
        self.tree.root.branch_length = 0.001
        self.tree.root.mutation_length = self.tree.root.branch_length
        self.tree.ladderize()
        self._prepare_nodes()
        self._leaves_lookup = {node.name:node for node in self.tree.get_terminals()} 
開發者ID:neherlab,項目名稱:treetime,代碼行數:14,代碼來源:treeanc.py

示例13: infer_branch_associations

# 需要導入模塊: from Bio import Phylo [as 別名]
# 或者: from Bio.Phylo import read [as 別名]
def infer_branch_associations(path, metainfo_fpath, meta_data_config,
    total_strains_count, strain_fraction_branch_association):
    from sf_geneCluster_align_makeTree import load_sorted_clusters
    from sf_coreTree_json import metadata_load
    data_description = meta_data_config
    association_dict = defaultdict(dict)
    metadata = Metadata(metainfo_fpath, data_description)
    metadata_dict = metadata.to_dict()

    sorted_genelist = load_sorted_clusters(path)
    ## sorted_genelist: [(clusterID, [ count_strains,[memb1,...],count_genes]),...]
    for clusterID, gene in sorted_genelist:
        if gene[-1]>=total_strains_count*strain_fraction_branch_association: # and clusterID=='GC00001136':
            print(clusterID)
            tree = Phylo.read("%s/geneCluster/%s.nwk"%(path, clusterID), 'newick')
            assoc = BranchAssociation(tree, metadata_dict)
            for col, d  in metadata.data_description.iterrows():
                if d['associate']=='yes':
                    if 'log_scale' in d and d['log_scale']=='yes':
                        t = lambda x:np.log(x)
                    else:
                        t = lambda x:x
                    assoc.calc_up_down_averages(d["meta_category"], transform = t)
                    max_assoc = assoc.calc_significance()
                    association_dict[clusterID][d["meta_category"]] = max_assoc

    write_pickle("%s/branch_association.cpk"%path, association_dict) 
開發者ID:neherlab,項目名稱:pan-genome-analysis,代碼行數:29,代碼來源:sf_association.py

示例14: infer_presence_absence_associations

# 需要導入模塊: from Bio import Phylo [as 別名]
# 或者: from Bio.Phylo import read [as 別名]
def infer_presence_absence_associations(path, metainfo_fpath, meta_data_config,
    total_strains_count, min_strain_fraction_association, max_strain_fraction_association):
    from sf_geneCluster_align_makeTree import load_sorted_clusters
    from sf_coreTree_json import metadata_load
    data_description = meta_data_config
    association_dict = defaultdict(dict)
    metadata = Metadata(metainfo_fpath, data_description)
    metadata_dict = metadata.to_dict()
    min_strains_association = total_strains_count*min_strain_fraction_association
    max_strains_association = total_strains_count*max_strain_fraction_association
    sorted_genelist = load_sorted_clusters(path)
    ## sorted_genelist: [(clusterID, [ count_strains,[memb1,...],count_genes]),...]
    # TODO fix vis
    tree = Phylo.read("%sgeneCluster/strain_tree.nwk"%(path), 'newick')
    assoc = PresenceAbsenceAssociation(tree, metadata_dict)
    for clusterID, gene in sorted_genelist:
        if gene[-1]>min_strains_association and gene[-1]<max_strains_association:
            print(clusterID)
            gl = load_gain_loss(path, clusterID)
            for col, d  in metadata.data_description.iterrows():
                if d['associate']=='yes':
                    if 'log_scale' in d and d['log_scale']=='yes':
                        t = lambda x:np.log(x)
                    else:
                        t = lambda x:x
                    assoc.set_gain_loss(gl)
                    score = assoc.calc_association_simple(d["meta_category"], transform = t)
                    if np.isinf(score):
                        association_dict[clusterID][d["meta_category"]] = 0.0
                    else:
                        association_dict[clusterID][d["meta_category"]] = np.abs(score)

    write_pickle("%s/presence_absence_association.cpk"%path, association_dict) 
開發者ID:neherlab,項目名稱:pan-genome-analysis,代碼行數:35,代碼來源:sf_association.py

示例15: explore_paralogs

# 需要導入模塊: from Bio import Phylo [as 別名]
# 或者: from Bio.Phylo import read [as 別名]
def explore_paralogs(path, nstrains, paralog_branch_cutoff, paralog_frac_cutoff=0.3, plot=0):
    '''
    gather paralog statistics for all trees and plot if desired
    parameters:
    paralog_branch_cutoff -- cutoff used to determined whether or not to split cluster.
    paralog_frac_cutoff  -- cutoff for paralog splitting as fraction of total strains.
                            (default 0.3 -- that is 30%)
    '''
    cluster_seqs_path=path+'geneCluster/'
    fname_list =glob.iglob(cluster_seqs_path+'*nwk')
    paralog_stat = []
    for fi,fname in enumerate(fname_list):
        try:
            tree = Phylo.read(fname, 'newick')
        except:
            print '(explore_paralogs) read tree problem: ', fname
        best_split = find_best_split(tree)
        if best_split is not None:
            paralog_stat.append([fname, best_split.split_bl, len(best_split.para_nodes)])
    with open(cluster_seqs_path+'paralogy_statistics.txt','wb') as paralogy_statistics:
        for x,y,z in paralog_stat:
            paralogy_statistics.write('%s\t%s\t%s\n'%(x.split('/')[-1],y,z))

    def plot_paralogs(path):
        '''
        plot branch length against # of paralogs across trees
        '''
        import matplotlib.pyplot as plt
        plt.ion()
        plt.figure()
        plt.scatter([x[1] for x in paralog_stat], [x[2] for x in paralog_stat])
        plt.ylabel('# paralogs')
        plt.xlabel('branch length')
        plt.savefig(path+'explore_paralogs.pdf')

    if plot: plot_paralogs(cluster_seqs_path)
    #return paralog_split_list 
開發者ID:neherlab,項目名稱:pan-genome-analysis,代碼行數:39,代碼來源:sf_split_paralogy.py


注:本文中的Bio.Phylo.read方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。