本文整理汇总了Python中Bio.Align.Applications.MafftCommandline类的典型用法代码示例。如果您正苦于以下问题:Python MafftCommandline类的具体用法?Python MafftCommandline怎么用?Python MafftCommandline使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了MafftCommandline类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_Mafft_with_Clustalw_output
def test_Mafft_with_Clustalw_output(self):
"""Simple round-trip through app with clustal output"""
cmdline = MafftCommandline(mafft_exe)
#Use some properties:
cmdline.input = self.infile1
cmdline.clustalout = True
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
stdoutdata, stderrdata = cmdline()
#e.g. "CLUSTAL format alignment by MAFFT ..."
#or "CLUSTAL (-like) formatted alignment by MAFFT FFT-NS-2 (v6.240)"
self.assertTrue(stdoutdata.startswith("CLUSTAL"), stdoutdata)
self.assertTrue("$#=0" not in stderrdata)
示例2: test_Mafft_with_Clustalw_output
def test_Mafft_with_Clustalw_output(self):
"""Simple round-trip through app with clustal output"""
cmdline = MafftCommandline(mafft_exe)
#Use some properties:
cmdline.input = self.infile1
cmdline.clustalout = True
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
stdin, stdout, stderr = Application.generic_run(cmdline)
self.assertEqual(stdin.return_code, 0)
self.assert_(stdout.read().startswith("CLUSTAL format alignment by MAFFT"))
self.assert_("$#=0" not in stderr.read())
self.assertEqual(str(stdin._cl), mafft_exe \
+ " --clustalout Fasta/f002")
示例3: mafft_align
def mafft_align(fa_path, afa_path):
"""Align amino acid FASTA file.
Takes amino-acid seqs from fa_path and writes aligned amino-acids
to afa_path.
"""
mafft_call = MafftCommandline(input = fa_path)
mafft_call.maxiterate = 1000
mafft_call.retree = 2
stdout, stderr = mafft_call()
open(afa_path, "w").write(stdout)
open("%s.err" % afa_path, 'w').write(stderr)
示例4: test_Mafft_with_Clustalw_output
def test_Mafft_with_Clustalw_output(self):
"""Simple round-trip through app with clustal output"""
cmdline = MafftCommandline(mafft_exe)
#Use some properties:
cmdline.input = self.infile1
cmdline.clustalout = True
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
result, stdout, stderr = Application.generic_run(cmdline)
self.assertEqual(result.return_code, 0)
output = stdout.read()
#e.g. "CLUSTAL format alignment by MAFFT ..."
#or "CLUSTAL (-like) formatted alignment by MAFFT FFT-NS-2 (v6.240)"
self.assert_(output.startswith("CLUSTAL"), output)
self.assert_("$#=0" not in stderr.read())
self.assertEqual(str(result._cl), mafft_exe \
+ " --clustalout Fasta/f002")
示例5: test_Mafft_with_Clustalw_output
def test_Mafft_with_Clustalw_output(self):
"""Simple round-trip through app with clustal output"""
cmdline = MafftCommandline(mafft_exe)
#Use some properties:
cmdline.input = self.infile1
cmdline.clustalout = True
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
child = subprocess.Popen(str(cmdline),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=(sys.platform!="win32"))
return_code = child.wait()
self.assertEqual(return_code, 0)
output = child.stdout.read()
#e.g. "CLUSTAL format alignment by MAFFT ..."
#or "CLUSTAL (-like) formatted alignment by MAFFT FFT-NS-2 (v6.240)"
self.assert_(output.startswith("CLUSTAL"), output)
self.assert_("$#=0" not in child.stderr.read())
del child
示例6: align_cluster
def align_cluster(self, cluster_file):
"""
Worker fuction for align_clusters
Inputs a FASTA file containing an unaligned sequence cluster.
Uses MAFFT to align the cluster.
"""
mafft_cline = MafftCommandline(input=cluster_file)
mafft_cline.set_parameter("--auto", True)
mafft_cline.set_parameter("--adjustdirection", True)
color = Color()
print(color.red + str(mafft_cline) + color.done)
sys.stdout.flush()
if cluster_file.find("/") != -1:
alignment_file = "alignments" + cluster_file[cluster_file.index("/"):]
else:
alignment_file = "alignments/" + cluster_file
stdout, stderr = mafft_cline()
with open(alignment_file, "w") as handle:
handle.write(stdout)
return alignment_file
示例7: test_Mafft_with_options
def test_Mafft_with_options(self):
"""Simple round-trip through app with infile and options, result passed to stdout."""
cmdline = MafftCommandline(mafft_exe)
cmdline.set_parameter("input", self.infile1)
cmdline.set_parameter("maxiterate", 100)
cmdline.set_parameter("--localpair", True)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
stdoutdata, stderrdata = cmdline()
self.assertTrue(stdoutdata.startswith(">gi|1348912|gb|G26680|G26680"))
self.assertNotIn("$#=0", stderrdata)
示例8: test_Mafft_with_options
def test_Mafft_with_options(self):
"""Simple round-trip through app with infile and options.
Result passed to stdout.
"""
cmdline = MafftCommandline(mafft_exe)
cmdline.set_parameter("input", self.infile1)
cmdline.set_parameter("maxiterate", 100)
cmdline.set_parameter("--localpair", True)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
result, stdout, stderr = Application.generic_run(cmdline)
self.assertEqual(result.return_code, 0)
self.assert_(stdout.read().startswith(">gi|1348912|gb|G26680|G26680"))
self.assert_("$#=0" not in stderr.read())
self.assertEqual(str(result._cl), mafft_exe \
+ " --localpair --maxiterate 100 Fasta/f002")
示例9: test_Mafft_with_options
def test_Mafft_with_options(self):
"""Simple round-trip through app with infile and options.
Result passed to stdout.
"""
cmdline = MafftCommandline(mafft_exe)
cmdline.set_parameter("input", self.infile1)
cmdline.set_parameter("maxiterate", 100)
cmdline.set_parameter("--localpair", True)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
child = subprocess.Popen(str(cmdline),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=(sys.platform!="win32"))
return_code = child.wait()
self.assertEqual(return_code, 0)
self.assert_(child.stdout.read().startswith(">gi|1348912|gb|G26680|G26680"))
self.assert_("$#=0" not in child.stderr.read())
del child
示例10: sleep
atpA_records.append(SeqIO.read(handle, 'fasta'))
handle.close()
sleep(0.02)
SeqIO.write(atpA_records, "atpA_unaligned.fasta", "fasta")
for accession in rbcL_accessions:
if accession.strip() != '':
handle = Entrez.efetch(db='nucleotide', rettype='fasta', retmode='text', id=accession)
rbcL_records.append(SeqIO.read(handle, 'fasta'))
handle.close()
sleep(0.02)
SeqIO.write(rbcL_records, "rbcL_unaligned.fasta", "fasta")
print("Aligning atpA with MAFFT...")
mafft_cline = MafftCommandline(input="atpA_unaligned.fasta")
mafft_cline.set_parameter("--auto", True)
mafft_cline.set_parameter("--adjustdirection", True)
print(str(mafft_cline))
stdout, stderr = mafft_cline()
print("Writing atpA alignment to FASTA file...")
with open("atpA_aligned.fasta", "w") as handle:
handle.write(stdout)
print("Aligning rbcL with MAFFT...")
mafft_cline = MafftCommandline(input="rbcL_unaligned.fasta")
mafft_cline.set_parameter("--auto", True)
mafft_cline.set_parameter("--adjustdirection", True)
print(str(mafft_cline))
stdout, stderr = mafft_cline()
示例11: test_Mafft_with_complex_command_line
def test_Mafft_with_complex_command_line(self):
"""Round-trip with complex command line."""
cmdline = MafftCommandline(mafft_exe)
cmdline.set_parameter("input", self.infile1)
cmdline.set_parameter("--localpair", True)
cmdline.set_parameter("--weighti", 4.2)
cmdline.set_parameter("retree", 5)
cmdline.set_parameter("maxiterate", 200)
cmdline.set_parameter("--nofft", True)
cmdline.set_parameter("op", 2.04)
cmdline.set_parameter("--ep", 0.51)
cmdline.set_parameter("--lop", 0.233)
cmdline.set_parameter("lep", 0.2)
cmdline.set_parameter("--reorder", True)
cmdline.set_parameter("--treeout", True)
cmdline.set_parameter("nuc", True)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
result, stdout, stderr = Application.generic_run(cmdline)
self.assertEqual(result.return_code, 0)
self.assert_(stdout.read().startswith(">gi|1348912|gb|G26680|G26680"))
self.assert_("$#=0" not in stderr.read())
self.assertEqual(str(result._cl), mafft_exe \
+ " --localpair --weighti 4.2 --retree 5 " \
+ "--maxiterate 200 --nofft --op 2.04 --ep 0.51" \
+ " --lop 0.233 --lep 0.2 --reorder --treeout" \
+ " --nuc Fasta/f002")
示例12: main
#.........这里部分代码省略.........
else:
print("No taxids csv file found.\n")
# open species list file, get synonyms and any missing taxids
with open(args.species, "rb") as csvfile:
print("Checking list of species, getting missing taxids from NCBI...")
taxids_file = open("taxids.csv", "w")
namesreader = csv.reader(csvfile, delimiter=",")
i = 1
num_lines = sum(1 for line in open(args.species))
for row in namesreader:
# update status
percent = str(round(100 * i / float(num_lines), 2))
sys.stdout.write("\r" + "Completed: " + str(i) + "/" + str(num_lines) + " (" + percent + "%)")
sys.stdout.flush()
i += 1
# check to see if we already have a taxid for this species
found = False
for taxon in taxa:
if taxon.binomial == row[0]:
found = True
taxids_file.write(taxon.binomial + "," + taxon.taxid + "\n")
# add synonyms
for j in range(1, len(row)):
taxon.synonyms.append(row[j])
break
if not found:
# get the taxid from NCBI
taxon = Taxon(row[0])
taxon.get_taxid(email)
# dont overload genbank
time.sleep(0.1)
taxids_file.write(taxon.binomial + "," + taxon.taxid + "\n")
# add synonyms
for j in range(1, len(row)):
taxon.synonyms.append(row[j])
taxa.append(taxon)
taxids_file.close()
print("\nWriting all taxids to file taxids.csv...")
print("\nDownloading sequences from NCBI...")
for gene in genes:
print("\nSearching for gene: " + gene.name)
i = 1
for taxon in taxa:
# update status
percent = str(round(100 * i / float(len(taxa)), 2))
sys.stdout.write("\r" + "Completed: " + str(i) + "/" + str(num_lines) + " (" + percent + "%)")
sys.stdout.flush()
i += 1
if taxon.taxid != "not found":
taxon.get_sequences(email, gene)
# dont overload genbank
time.sleep(0.2)
print("\nGenerating unaligned FASTA file...")
unaligned_file = open(gene.name + ".fasta", "w")
for taxon in taxa:
record = taxon.get_longest_seq(gene.name, max_seq_length)
if record != None:
# output format: >binomial_accession_description
description = taxon.binomial + "_" + record.id + "_" + record.description
description = description.replace(" ", "_")
unaligned_file.write(">" + description + "\n")
unaligned_file.write(str(record.seq) + "\n\n")
unaligned_file.close()
print("Making alignment with MAFFT...")
try:
from Bio.Align.Applications import MafftCommandline
mafft_cline = MafftCommandline(input=gene.name + ".fasta")
mafft_cline.set_parameter("--auto", True)
mafft_cline.set_parameter("--adjustdirection", True)
print(str(mafft_cline))
stdout, stderr = mafft_cline()
print("Writing alignment to FASTA file...")
with open("aligned_" + gene.name + ".fasta", "w") as handle:
handle.write(stdout)
except:
print("Problem finding MAFFT, alignment skipped.")
print("\nGenerating summary results spreadsheet...\n")
summary = open("result.csv", "w")
header = "taxon,"
for gene in genes:
header += gene.name + ","
summary.write(header + "\n")
for taxon in taxa:
accessions = taxon.binomial + ","
for gene in genes:
# each column will be the longest sequences accession
record = taxon.get_longest_seq(gene.name, max_seq_length)
if record != None:
accessions += record.id + ","
else:
accessions += ","
summary.write(accessions + "\n")
summary.close()
print("Done!\n")
示例13: test_Mafft_with_complex_command_line
def test_Mafft_with_complex_command_line(self):
"""Round-trip with complex command line."""
cmdline = MafftCommandline(mafft_exe)
cmdline.set_parameter("input", self.infile1)
cmdline.set_parameter("--localpair", True)
cmdline.set_parameter("--weighti", 4.2)
cmdline.set_parameter("retree", 5)
cmdline.set_parameter("maxiterate", 200)
cmdline.set_parameter("--nofft", True)
cmdline.set_parameter("op", 2.04)
cmdline.set_parameter("--ep", 0.51)
cmdline.set_parameter("--lop", 0.233)
cmdline.set_parameter("lep", 0.2)
cmdline.set_parameter("--reorder", True)
cmdline.set_parameter("--treeout", True)
cmdline.set_parameter("nuc", True)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
self.assertEqual(str(cmdline), mafft_exe \
+ " --localpair --weighti 4.2 --retree 5 " \
+ "--maxiterate 200 --nofft --op 2.04 --ep 0.51" \
+ " --lop 0.233 --lep 0.2 --reorder --treeout" \
+ " --nuc Fasta/f002")
child = subprocess.Popen(str(cmdline),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=(sys.platform!="win32"))
return_code = child.wait()
self.assertEqual(return_code, 0)
self.assert_(child.stdout.read().startswith(">gi|1348912|gb|G26680|G26680"))
self.assert_("$#=0" not in child.stderr.read())
del child
示例14: test_Mafft_with_complex_command_line
def test_Mafft_with_complex_command_line(self):
"""Round-trip with complex command line."""
cmdline = MafftCommandline(mafft_exe)
cmdline.set_parameter("input", self.infile1)
cmdline.set_parameter("--localpair", True)
cmdline.set_parameter("--weighti", 4.2)
cmdline.set_parameter("retree", 5)
cmdline.set_parameter("maxiterate", 200)
cmdline.set_parameter("--nofft", True)
cmdline.set_parameter("op", 2.04)
cmdline.set_parameter("--ep", 0.51)
cmdline.set_parameter("--lop", 0.233)
cmdline.set_parameter("lep", 0.2)
cmdline.set_parameter("--reorder", True)
cmdline.set_parameter("--treeout", True)
cmdline.set_parameter("nuc", True)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
self.assertEqual(str(cmdline), mafft_exe
+ " --localpair --weighti 4.2 --retree 5 "
+ "--maxiterate 200 --nofft --op 2.04 --ep 0.51"
+ " --lop 0.233 --lep 0.2 --reorder --treeout"
+ " --nuc Fasta/f002")
stdoutdata, stderrdata = cmdline()
self.assertTrue(stdoutdata.startswith(">gi|1348912|gb|G26680|G26680"))
self.assertTrue("$#=0" not in stderrdata)
示例15: main
def main():
print("\n\nmatrix_maker.py\n\n")
print("Getting all taxid...\n")
print("Writing taxids to file taxids.txt...\n")
taxids_file = open("taxids.txt", "w")
name_file = open(taxa_file)
names = name_file.readlines()
taxids = []
import time
for name in names:
name = "%s" %(name.split()[0])
taxid = get_taxon_id(name)
name_taxid_text = name + "\t" + taxid
print(name_taxid_text)
taxids_file.write(name_taxid_text + "\n")
taxids.append( taxid )
# dont overload genbank
time.sleep(0.1)
taxids_file.close()
print("\nDownloading sequences for each taxid...\n") #Keeping the longest sequence for each taxon...\n")
from Bio import Entrez
from Bio import SeqIO
final_records = []
for taxid in taxids:
if taxid != "not found":
records = get_sequences(taxid)
# keep all records
final_records = final_records + records
# dont overload genbank
time.sleep(0.2)
# find the longest sequence
#longest_len = 0
#longest_seq = None
#for record in records:
# if len(record) > longest_len:
# longest_len = len(record)
# longest_seq = record
#if longest_seq != None:
# final_records.append(longest_seq)
print("\nGenerating unaligned FASTA file with GenBank formatted description...\n")
SeqIO.write(final_records, "output_unaligned_gb_format.fasta", "fasta")
print("Generating unaligned FASTA file with custom formatted description...\n")
unaligned_file = open("output_unaligned_custom_format.fasta", "w")
for record in final_records:
# remove the organism name from the description
description = record.description
if description.find(record.annotations["organism"] + " ") != -1:
description = description.replace(record.annotations["organism"] + " ", "")
# custom format for Andrew: >Organism name_accession_description
description = record.annotations["organism"] + "_" + record.id + "_" + description
description = description.replace(" ", "_")
unaligned_file.write(">" + description + "\n")
unaligned_file.write(str(record.seq) + "\n")
unaligned_file.close()
print("Making alignment with MAFFT...")
try:
from Bio.Align.Applications import MafftCommandline
mafft_cline = MafftCommandline(input="output_unaligned_custom_format.fasta")
mafft_cline.set_parameter("--auto", True)
mafft_cline.set_parameter("--adjustdirection", True)
print(str(mafft_cline))
stdout, stderr = mafft_cline()
print("Writing alignment to FASTA file...\n")
with open("output_aligned.fasta", "w") as handle:
handle.write(stdout)
except:
print("Problem finding MAFFT, alignment skipped.")
print("Done!\n")