本文整理汇总了Python中Bio.Phylo.write方法的典型用法代码示例。如果您正苦于以下问题:Python Phylo.write方法的具体用法?Python Phylo.write怎么用?Python Phylo.write使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.Phylo
的用法示例。
在下文中一共展示了Phylo.write方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: save_tree_with_new_names
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import write [as 别名]
def save_tree_with_new_names(tree_in, tree_out, name_subs):
print()
print('Writing Microreact tree to: {}'.format(tree_out))
tree_format = None
for try_tree_format in ['newick', 'nexus', 'nexml', 'phyloxml', 'cdao']:
try:
Phylo.read(tree_in, try_tree_format)
tree_format = try_tree_format
break
except ValueError:
pass
if tree_format is None:
sys.exit('Error: could not read input tree')
tree = Phylo.read(tree_in, tree_format)
for node in tree.get_terminals():
name = str(node.name)
try:
node.name = name_subs[name]
except IndexError:
sys.exit('Error: sample name in tree not in Kleborate data: ' + name)
Phylo.write(tree, tree_out, 'newick')
示例2: polytomies_midpointRooting
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import write [as 别名]
def polytomies_midpointRooting(infileName, outfileName, clusterID):
# use ete2 to solve polytomies and midpoint rooting
from ete2 import Tree
newickString=open(infileName, 'rb').readline().rstrip()
tree = Tree(newickString,format=1);
tree.resolve_polytomy(recursive=True)
try:
tree.set_outgroup( tree.get_midpoint_outgroup() )
except:
pass
#print clusterID, ' can not conduct midpoint rooting'
tree.ladderize()
## adding the missing node.name
#for ind, node in enumerate(tree.traverse("postorder")):
for ind, node in enumerate(tree.iter_descendants("postorder")):
if node.name=='': node.name='%s%s'%('NODE_0',ind);
with open('./%s'%outfileName, 'wb') as outfile:
outfile.write(tree.write(format=1))
示例3: make_nj_tree
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import write [as 别名]
def make_nj_tree(dist_mat, dArgs, aSampleNames):
'''
Uses Biopython.Phylo to make a neighbour joining tree from a distance matrix
Parameters
----------
dist_mat: dict
distance matrix as a dict of dicts
distance_a_to_b = dist_mat[a][b]
dArgs: dict
input argument dictionary
aSampleNames: list
list of sample names
Returns
-------
returns 0
also writes tree file to to dArgs['tree'] in newick format
'''
aSimpleMatrix = []
for i, sample_1 in enumerate(aSampleNames):
mat_line = []
for j, sample_2 in enumerate(aSampleNames):
if j < i:
mat_line.append(dist_mat[sample_1][sample_2])
elif j == i:
mat_line.append(0)
else:
pass
aSimpleMatrix.append(mat_line)
oDistMat = TreeConstruction._DistanceMatrix(aSampleNames, aSimpleMatrix)
constructor = TreeConstruction.DistanceTreeConstructor()
oTree = constructor.nj(oDistMat)
Phylo.write(oTree, dArgs['tree'], 'newick')
logging.info("Tree file written.")
return 0
# --------------------------------------------------------------------------------------------------
示例4: register_arguments
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import write [as 别名]
def register_arguments(parser):
parser.add_argument('--alignment', '-a', help="alignment in fasta or VCF format")
parser.add_argument('--tree', '-t', required=True, help="prebuilt Newick")
parser.add_argument('--metadata', type=str, help="tsv/csv table with meta data for sequences")
parser.add_argument('--output-tree', type=str, help='file name to write tree to')
parser.add_argument('--output-node-data', type=str, help='file name to write branch lengths as node data')
parser.add_argument('--timetree', action="store_true", help="produce timetree using treetime")
parser.add_argument('--coalescent', help="coalescent time scale in units of inverse clock rate (float), optimize as scalar ('opt'), or skyline ('skyline')")
parser.add_argument('--gen-per-year', default=50, type=float, help="number of generations per year, relevant for skyline output('skyline')")
parser.add_argument('--clock-rate', type=float, help="fixed clock rate")
parser.add_argument('--clock-std-dev', type=float, help="standard deviation of the fixed clock_rate estimate")
parser.add_argument('--root', nargs="+", default='best', help="rooting mechanism ('best', least-squares', 'min_dev', 'oldest') "
"OR node to root by OR two nodes indicating a monophyletic group to root by. "
"Run treetime -h for definitions of rooting methods.")
parser.add_argument('--keep-root', action="store_true", help="do not reroot the tree; use it as-is. "
"Overrides anything specified by --root.")
parser.add_argument('--covariance', dest='covariance', action='store_true', help="Account for covariation when estimating "
"rates and/or rerooting. "
"Use --no-covariance to turn off.")
parser.add_argument('--no-covariance', dest='covariance', action='store_false') #If you set help here, it displays 'default: True' - which is confusing!
parser.add_argument('--keep-polytomies', action='store_true', help='Do not attempt to resolve polytomies')
parser.add_argument('--precision', type=int, choices=[0,1,2,3], help="precision used by TreeTime to determine the number of grid points that are used for the evaluation of the branch length interpolation objects. Values range from 0 (rough) to 3 (ultra fine) and default to 'auto'.")
parser.add_argument('--date-format', default="%Y-%m-%d", help="date format")
parser.add_argument('--date-confidence', action="store_true", help="calculate confidence intervals for node dates")
parser.add_argument('--date-inference', default='joint', choices=["joint", "marginal"],
help="assign internal nodes to their marginally most likely dates, not jointly most likely")
parser.add_argument('--branch-length-inference', default='auto', choices = ['auto', 'joint', 'marginal', 'input'],
help='branch length mode of treetime to use')
parser.add_argument('--clock-filter-iqd', type=float, help='clock-filter: remove tips that deviate more than n_iqd '
'interquartile ranges from the root-to-tip vs time regression')
parser.add_argument('--vcf-reference', type=str, help='fasta file of the sequence the VCF was mapped to')
parser.add_argument('--year-bounds', type=int, nargs='+', help='specify min or max & min prediction bounds for samples with XX in year')
parser.add_argument('--divergence-units', type=str, choices=['mutations', 'mutations-per-site'],
default='mutations-per-site', help='Units in which sequence divergences is exported.')
parser.add_argument('--seed', type=int, help='seed for random number generation')
parser.set_defaults(covariance=True)
示例5: register_arguments_beast
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import write [as 别名]
def register_arguments_beast(subparsers):
"""
Arguments available to `augur import beast`
"""
beast_parser = subparsers.add_parser('beast', help="Import beast analysis")
beast_parser.add_argument("--beast", help=SUPPRESS, default=True) # used to disambiguate subcommands
beast_parser.add_argument('--mcc', required=True, help="BEAST MCC tree")
beast_parser.add_argument('--most-recent-tip-date', default=0, type=float, help='Numeric date of most recent tip in tree (--tip-date-regex, --tip-date-format and --tip-date-delimeter are ignored if this is set)')
beast_parser.add_argument('--tip-date-regex', default=r'[0-9]{4}(\-[0-9]{2})*(\-[0-9]{2})*$', type=str, help='regex to extract dates from tip names')
beast_parser.add_argument('--tip-date-format', default="%Y-%m-%d", type=str, help='Format of date (if extracted by regex)')
beast_parser.add_argument('--tip-date-delimeter', default="-", type=str, help='delimeter used in tip-date-format. Used to match partial dates.')
beast_parser.add_argument('--verbose', action="store_true", help="Display verbose output. Only useful for debugging.")
beast_parser.add_argument('--recursion-limit', default=False, type=int, help="Set a custom recursion limit (dangerous!)")
beast_parser.add_argument('--output-tree', required=True, type=str, help='file name to write tree to')
beast_parser.add_argument('--output-node-data', required=True, type=str, help='file name to write (temporal) branch lengths & BEAST traits as node data')
示例6: run_beast
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import write [as 别名]
def run_beast(args):
'''
BEAST MCC tree to newick and node-data JSON for further augur processing / export
'''
verbose = args.verbose
print("importing from BEAST MCC tree", args.mcc)
if args.recursion_limit:
print("Setting recursion limit to %d"%(args.recursion_limit))
sys.setrecursionlimit(args.recursion_limit)
# node data is the dict that will be exported as json
node_data = {
'comment': "Imported from a BEAST MCC tree using `augur import beast`",
'mcc_file': args.mcc
}
tree = parse_nexus(tree_path=args.mcc, verbose=args.verbose)
summarise_parsed_traits(tree)
# Phylo.draw_ascii(tree)
# instantiate treetime for the sole reason to name internal nodes (!)
# note that tt.tree = tree, and this is modified in-place by this function
tt = TreeAnc(tree=tree, aln=fake_alignment(tree), ref=None, gtr='JC69', verbose=1)
# extract date information from the tree
root_date_offset, most_recent_tip = calc_tree_dates(tree, args.most_recent_tip_date, args.tip_date_regex, args.tip_date_format, args.tip_date_delimeter)
compute_entropies_for_discrete_traits(tree)
node_data['nodes'] = collect_node_data(tree, root_date_offset, most_recent_tip)
tree_success = Phylo.write(tree, args.output_tree, 'newick', format_branch_length='%1.8f')
json_success = write_json(node_data, args.output_node_data)
print_what_to_do_next(nodes=node_data['nodes'], mcc_path=args.mcc, tree_path=args.output_tree, node_data_path=args.output_node_data)
示例7: print_save_plot_skyline
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import write [as 别名]
def print_save_plot_skyline(tt, n_std=2.0, screen=True, save='', plot=''):
if plot:
import matplotlib.pyplot as plt
skyline, conf = tt.merger_model.skyline_inferred(gen=50, confidence=n_std)
if save: fh = open(save, 'w', encoding='utf-8')
header1 = "Skyline assuming 50 gen/year and approximate confidence bounds (+/- %f standard deviations of the LH)\n"%n_std
header2 = "date \tN_e \tlower \tupper"
if screen: print('\t'+header1+'\t'+header2)
if save: fh.write("#"+ header1+'#'+header2+'\n')
for (x,y, y1, y2) in zip(skyline.x, skyline.y, conf[0], conf[1]):
if screen: print("\t%1.1f\t%1.1f\t%1.1f\t%1.1f"%(x,y, y1, y2))
if save: fh.write("%1.1f\t%1.1f\t%1.1f\t%1.1f\n"%(x,y, y1, y2))
if save:
print("\n --- written skyline to %s\n"%save)
fh.close()
if plot:
plt.figure()
plt.fill_between(skyline.x, conf[0], conf[1], color=(0.8, 0.8, 0.8))
plt.plot(skyline.x, skyline.y, label='maximum likelihood skyline')
plt.yscale('log')
plt.legend()
plt.ticklabel_format(axis='x',useOffset=False)
plt.savefig(plot)
示例8: tree_inference
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import write [as 别名]
def tree_inference(aln_fname, tree_fname, tmp_dir=None,
methods = ['iqtree', 'fasttree', 'raxml'], **kwargs):
import os,shutil
from Bio import Phylo
if not os.path.isfile(aln_fname):
print("alignment file does not exist")
cwd = os.getcwd()
if tmp_dir:
if not os.path.isdir(tmp_dir):
try:
os.makedirs(tmp_dir)
except OSError as e:
print("Cannot create run_dir",e)
aln_fname_base = os.path.basename(aln_fname)
shutil.copyfile(aln_fname,os.path.join(tmp_dir, aln_fname_base))
aln_fname = aln_fname_base
os.chdir(tmp_dir)
for method in methods:
T = None
try:
if method.lower()=='iqtree':
T = build_newick_iqtree(aln_fname)
elif method.lower()=='fasttree':
T = build_newick_fasttree(aln_fname, nuc=True)
elif method.lower()=='raxml':
T = build_newick_raxml(aln_fname)
else:
print("Method not supported",method)
if T:
break
except:
continue
os.chdir(cwd)
if T is None:
print("tree building failed. tried", ", ".join(methods), "but none worked")
else:
Phylo.write(T, tree_fname, 'newick')
示例9: build_newick_raxml
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import write [as 别名]
def build_newick_raxml(aln_fname, nthreads=2, raxml_bin="raxml", **kwargs):
import shutil,os
print("Building tree with raxml")
from Bio import Phylo, AlignIO
AlignIO.write(AlignIO.read(aln_fname, 'fasta'),"temp.phyx", "phylip-relaxed")
cmd = raxml_bin + " -f d -T " + str(nthreads) + " -m GTRCAT -c 25 -p 235813 -n tre -s temp.phyx"
os.system(cmd)
return Phylo.read('RAxML_bestTree.tre', "newick")
示例10: main
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import write [as 别名]
def main():
args = get_arguments()
name_subs = name_substitution(args.kleborate_in)
check_for_unique_names(name_subs)
save_tree_with_new_names(args.tree_in, args.tree_out, name_subs)
autocolour_columns = get_autocolour_columns(args.kleborate_in)
csv_lines = []
with open(args.kleborate_in, 'rt') as kleborate_results:
original_header, new_header = None, None
for line in kleborate_results:
line = line.rstrip('\n')
if original_header is None:
original_header = line.split('\t')
new_header = get_new_header(original_header, autocolour_columns)
line_parts = new_header
else:
line_parts = get_data(line, name_subs, original_header, new_header)
csv_lines.append((','.join(line_parts)))
print()
print('Writing Microreact table to: {}'.format(args.csv_out))
with open(args.csv_out, 'wt') as output_csv:
for line in csv_lines:
output_csv.write(line)
output_csv.write('\n')
print()
示例11: codon_align
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import write [as 别名]
def codon_align(self, alignment_tool="mafft", prune=True, discard_premature_stops=False):
'''
takes a nucleotide alignment, translates it, aligns the amino acids, pads the gaps
note that this suppresses any compensated frameshift mutations
Parameters:
- alignment_tool: ['mafft', 'muscle'] the commandline tool to use
'''
cwd = os.getcwd()
make_dir(self.run_dir)
os.chdir(self.run_dir)
# translate
aa_seqs = {}
for seq in self.seqs.values():
tempseq = seq.seq.translate(table="Bacterial")
# use only sequences that translate without trouble
if not discard_premature_stops or '*' not in str(tempseq)[:-1] or prune==False:
aa_seqs[seq.id]=SeqRecord(tempseq,id=seq.id)
else:
print(seq.id,"has premature stops, discarding")
tmpfname = 'temp_in.fasta'
SeqIO.write(aa_seqs.values(), tmpfname,'fasta')
if alignment_tool=='mafft':
os.system('mafft --reorder --amino temp_in.fasta 1> temp_out.fasta')
aln_aa = AlignIO.read('temp_out.fasta', "fasta")
elif alignment_tool=='muscle':
from Bio.Align.Applications import MuscleCommandline
cline = MuscleCommandline(input=tmpfname, out=tmpfname[:-5]+'aligned.fasta')
cline()
aln_aa = AlignIO.read(tmpfname[:-5]+'aligned.fasta', "fasta")
else:
print 'Alignment tool not supported:'+alignment_tool
#return
#generate nucleotide alignment
self.aln = pad_nucleotide_sequences(aln_aa, self.seqs)
os.chdir(cwd)
remove_dir(self.run_dir)
示例12: align
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import write [as 别名]
def align(self):
'''
align sequencences in self.seqs using mafft
'''
cwd = os.getcwd()
make_dir(self.run_dir)
os.chdir(self.run_dir)
SeqIO.write(self.seqs.values(), "temp_in.fasta", "fasta")
os.system('mafft --reorder --anysymbol temp_in.fasta 1> temp_out.fasta 2> mafft.log')
self.aln = AlignIO.read('temp_out.fasta', 'fasta')
os.chdir(cwd)
remove_dir(self.run_dir)
示例13: cluster_align_makeTree
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import write [as 别名]
def cluster_align_makeTree( path, folders_dict, parallel, disable_cluster_postprocessing, simple_tree):
"""
create gene clusters as nucleotide/ amino_acid fasta files
and build individual gene trees based on fna files
"""
proc= multiprocessing.Process(target=create_geneCluster_fa, args=(path, folders_dict))
proc.start(); proc.join()
## align, build_tree, make_geneTree_json
cluster_seqs_path = path+'geneCluster/'
if os.path.exists(cluster_seqs_path+'gene_diversity.txt'):
os.system('rm '+cluster_seqs_path+'gene_diversity.txt')
if 0:
with open(cluster_seqs_path+'cluster_correl_stats.txt', 'wb') as cluster_correl_stats_file:
cluster_correl_stats_file.write('%s\n'%'\t'.join(
['clusterID', 'random_alnID', 'diversity_nuc', \
'mean_seqLen', 'std_seqLen', 'bestSplit_paraNodes', 'bestSplit_branchLen'
]))
fna_file_list=glob.glob(cluster_seqs_path+"*.fna")
multips(align_and_makeTree, parallel, fna_file_list,
cluster_seqs_path, simple_tree)
## if cluster_postprocessing skipped, rename allclusters.tsv and allclusters.cpk as the final cluster file
if disable_cluster_postprocessing:
update_diversity_cpk(path)
clustering_path= '%s%s'%(path,'protein_faa/diamond_matches/')
os.system('cp %sallclusters.tsv %sallclusters_final.tsv'%(clustering_path,clustering_path))
os.system('cp %sallclusters.cpk %sallclusters_postprocessed.cpk'%(clustering_path,clustering_path))
示例14: update_diversity_cpk
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import write [as 别名]
def update_diversity_cpk(path):
## write gene_diversity_Dt cpk file
output_path = path+'geneCluster/'
with open(output_path+'gene_diversity.txt', 'rb') as infile:
write_pickle(output_path+'gene_diversity.cpk',{ i.rstrip().split('\t')[0]:i.rstrip().split('\t')[1] for i in infile})
示例15: write_final_cluster
# 需要导入模块: from Bio import Phylo [as 别名]
# 或者: from Bio.Phylo import write [as 别名]
def write_final_cluster(path):
geneCluster_dt=load_sorted_clusters(path)
outfileName='allclusters_final.tsv'
with open(path+'protein_faa/diamond_matches/'+outfileName, 'wb') as outfile:
for clusterID, cluster_stat in geneCluster_dt:
outfile.write('\t'.join([gene for gene in cluster_stat[1]]))
outfile.write('\n')