本文整理汇总了Python中Bio.SeqIO.to_dict方法的典型用法代码示例。如果您正苦于以下问题:Python SeqIO.to_dict方法的具体用法?Python SeqIO.to_dict怎么用?Python SeqIO.to_dict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.SeqIO
的用法示例。
在下文中一共展示了SeqIO.to_dict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_prepare_with_alignment_with_ref_name
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def test_prepare_with_alignment_with_ref_name(self, test_file, test_seqs, existing_with_ref, existing_aln, ref_seq, out_file):
"""Test that, given a set of test sequences, an existing alignment, and a reference sequence name, no changes are made."""
aln_outfile, seqs_outfile, _ = align.prepare([test_file,], existing_with_ref, out_file, ref_seq.id, None)
assert os.path.isfile(aln_outfile), "Didn't write existing alignment where it said"
assert aln_outfile == existing_with_ref, "Rewrote the alignment file unexpectedly"
# Alignment file should be unchanged
aln_output = SeqIO.to_dict(SeqIO.parse(aln_outfile, "fasta"))
assert aln_output[ref_seq.id].seq == ref_seq.seq, "Reference sequence dropped from alignment"
for seq in existing_aln:
assert seq in aln_output, "Some existing alignment sequences dropped unexpectedly"
assert aln_output[seq].seq == existing_aln[seq].seq, "Some existing alignment sequences changed unexpectedly"
# test sequences should be unchanged
assert os.path.isfile(seqs_outfile), "Didn't write test sequences where it said"
seq_output = SeqIO.to_dict(SeqIO.parse(seqs_outfile, "fasta"))
for seq in test_seqs:
assert seq in seq_output, "Some test sequences unexpectedly dropped"
assert seq_output[seq].seq == test_seqs[seq].seq, "Some test sequences changed unexpectedly"
assert seq_output.keys() == test_seqs.keys()
示例2: test_prepare_with_alignment_with_ref_seq
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def test_prepare_with_alignment_with_ref_seq(self, test_file, test_seqs, existing_file, existing_aln, ref_seq, ref_file, out_file):
"""Test that, given a set of test sequences, an existing alignment, and a reference sequence, the reference
is added to the existing alignment and no other changes are made."""
aln_outfile, seqs_outfile, ref_name = align.prepare([test_file,], existing_file, out_file, None, ref_file)
assert ref_name == ref_seq.id, "Didn't return strain name from refrence file"
assert os.path.isfile(aln_outfile), "Didn't write existing alignment where it said"
assert aln_outfile != existing_aln, "Unexpectedly overwrote existing alignment"
# Alignment file should have the reference added
aln_output = SeqIO.to_dict(SeqIO.parse(aln_outfile, "fasta"))
assert aln_output[ref_seq.id].seq == ref_seq.seq, "Reference sequence not added to alignment"
for seq in existing_aln:
assert seq in aln_output, "Some existing alignment sequences dropped unexpectedly"
assert aln_output[seq].seq == existing_aln[seq].seq, "Some existing alignment sequences changed unexpectedly"
# test sequences should be unchanged
assert os.path.isfile(seqs_outfile), "Didn't write test sequences where it said"
seq_output = SeqIO.to_dict(SeqIO.parse(seqs_outfile, "fasta"))
for seq in test_seqs:
assert seq in seq_output, "Some test sequences unexpectedly dropped"
assert seq_output[seq].seq == test_seqs[seq].seq, "Some test sequences changed unexpectedly"
assert seq_output.keys() == test_seqs.keys()
示例3: extract_paralogs
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def extract_paralogs(gene,prefix):
putative_paralog_ids = list(set([x.split()[1].rstrip() for x in open(os.path.join(gene,prefix,"paralog_warning.txt"))]))
try:
chosen_paralog = open(os.path.join(gene,prefix,"exonerate_stats.csv")).readline().rstrip()
except IOError:
return 0
exonerate_dict = SeqIO.to_dict(SeqIO.parse(os.path.join(gene,prefix,"exonerate_results.fasta"),'fasta'))
if not os.path.isdir(os.path.join(gene,prefix,'paralogs')):
os.mkdir(os.path.join(gene,prefix,"paralogs"))
seqs_to_write = [exonerate_dict[x] for x in putative_paralog_ids]
for seq in range(len(seqs_to_write)):
if seqs_to_write[seq].id == chosen_paralog:
seqs_to_write[seq].id = "{}.{}".format(prefix,"main")
else:
seqs_to_write[seq].id = "{}.{}".format(prefix,seq)
SeqIO.write(seqs_to_write,os.path.join(gene,prefix,'paralogs','{}_paralogs.fasta'.format(gene)),'fasta')
return len(seqs_to_write)
示例4: initial_exonerate
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def initial_exonerate(proteinfilename, assemblyfilename,prefix):
"""Conduct exonerate search, returns a dictionary of results.
Using the ryo option in exonerate, the header should contain all the useful information."""
logger = logging.getLogger("pipeline")
outputfilename = "%s/exonerate_results.fasta" %prefix
exonerate_ryo = '">%ti,%qi,%qab,%qae,%pi,(%tS),%tab,%tae\\n%tcs\\n"'
exonerate_command = "exonerate -m protein2genome --showalignment no --showvulgar no -V 0 --ryo %s %s %s >%s" % (exonerate_ryo,proteinfilename,assemblyfilename,outputfilename)
logger.debug(exonerate_command)
#print exonerate_ryo
#proc = subprocess.Popen(['exonerate','-m','protein2genome','--showalignment','no','-V','0','--showvulgar','no','--ryo',exonerate_ryo,proteinfilename,assemblyfilename])
proc = subprocess.call(exonerate_command,shell=True)
protHitsCount = 0
#proc.wait()
records = SeqIO.to_dict(SeqIO.parse(outputfilename,'fasta'))
#proc.stdout.close()
return records
示例5: make_intron_supercontig
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def make_intron_supercontig(contig_info,gene,prefix,add_N = False):
cap3contigs = SeqIO.to_dict(SeqIO.parse("../{}_contigs.fasta".format(gene),'fasta'))
intron_supercontig = SeqRecord(Seq(''))
for i in contig_info:
if i[5] == "(+)":
intron_supercontig += cap3contigs[i[0]]
elif i[5] == "(-)":
intron_supercontig += cap3contigs[i[0]].reverse_complement()
else:
sys.stderr.write("Strandedness not found!")
sys.exit(1)
if add_N and i != contig_info[-1]:
intron_supercontig += "NNNNNNNNNN"
intron_supercontig.id = '{}-{}'.format(prefix,gene)
intron_supercontig.description = ''
SeqIO.write(intron_supercontig,'sequences/intron/{}_supercontig.fasta'.format(gene),'fasta')
示例6: testResfinderBetaLactam2MutationsSuccess
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def testResfinderBetaLactam2MutationsSuccess(self):
file = path.join(self.test_data_dir, "beta-lactam-blaIMP-42-mut-2.fsa")
files = [file]
self.amr_detection.run_amr_detection(files, 99, 90, 90, 90,0,0,0,0,0)
resfinder_results = self.amr_detection.get_resfinder_results()
self.assertEqual(len(resfinder_results.index), 1, 'Wrong number of rows in result')
result = resfinder_results[resfinder_results['Gene'] == 'blaIMP-42']
self.assertEqual(len(result.index), 1, 'Wrong number of results detected')
self.assertAlmostEqual(result['%Identity'].iloc[0], 99.73, places=2, msg='Wrong pid')
self.assertAlmostEqual(result['%Overlap'].iloc[0], 100.00, places=2, msg='Wrong overlap')
self.assertEqual(result['HSP Length/Total Length'].iloc[0], '741/741', msg='Wrong lengths')
self.assertEqual(result['Predicted Phenotype'].iloc[0],
'ampicillin, amoxicillin/clavulanic acid, cefoxitin, ceftriaxone, meropenem',
'Wrong phenotype')
hit_file = path.join(self.outdir.name, 'resfinder_beta-lactam-blaIMP-42-mut-2.fsa')
records = SeqIO.to_dict(SeqIO.parse(hit_file, 'fasta'))
self.assertEqual(len(records), 1, 'Wrong number of hit records')
expected_records = SeqIO.to_dict(SeqIO.parse(file, 'fasta'))
self.assertEqual(expected_records['blaIMP-42_1_AB753456'].seq, records['blaIMP-42_1_AB753456'].seq,
"records don't match")
示例7: testResfinderBetaLactamDelStartSuccess
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def testResfinderBetaLactamDelStartSuccess(self):
file = path.join(self.test_data_dir, "beta-lactam-blaIMP-42-del-start.fsa")
files = [file]
self.amr_detection.run_amr_detection(files, 99, 91, 90, 90,0,0,0,0,0)
resfinder_results = self.amr_detection.get_resfinder_results()
self.assertEqual(len(resfinder_results.index), 1, 'Wrong number of rows in result')
result = resfinder_results[resfinder_results['Gene'] == 'blaIMP-42']
self.assertEqual(len(result.index), 1, 'Wrong number of results detected')
self.assertAlmostEqual(result['%Identity'].iloc[0], 100.00, places=2, msg='Wrong pid')
self.assertAlmostEqual(result['%Overlap'].iloc[0], 91.90, places=2, msg='Wrong overlap')
self.assertEqual(result['HSP Length/Total Length'].iloc[0], '681/741', msg='Wrong lengths')
hit_file = path.join(self.outdir.name, 'resfinder_beta-lactam-blaIMP-42-del-start.fsa')
records = SeqIO.to_dict(SeqIO.parse(hit_file, 'fasta'))
self.assertEqual(len(records), 1, 'Wrong number of hit records')
expected_records = SeqIO.to_dict(SeqIO.parse(file, 'fasta'))
self.assertEqual(expected_records['blaIMP-42_1_AB753456'].seq, records['blaIMP-42_1_AB753456'].seq,
"records don't match")
示例8: testResfinderBetaLactamInsStartSuccess
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def testResfinderBetaLactamInsStartSuccess(self):
file = path.join(self.test_data_dir, "beta-lactam-blaIMP-42-ins-start.fsa")
files = [file]
self.amr_detection.run_amr_detection(files, 99, 91, 90, 90,0,0,0,0,0)
resfinder_results = self.amr_detection.get_resfinder_results()
self.assertEqual(len(resfinder_results.index), 1, 'Wrong number of rows in result')
result = resfinder_results[resfinder_results['Gene'] == 'blaIMP-42']
self.assertEqual(len(result.index), 1, 'Wrong number of results detected')
self.assertAlmostEqual(result['%Identity'].iloc[0], 99.73, places=2, msg='Wrong pid')
self.assertAlmostEqual(result['%Overlap'].iloc[0], 100.00, places=2, msg='Wrong overlap')
self.assertEqual(result['HSP Length/Total Length'].iloc[0], '741/741', msg='Wrong lengths')
hit_file = path.join(self.outdir.name, 'resfinder_beta-lactam-blaIMP-42-ins-start.fsa')
records = SeqIO.to_dict(SeqIO.parse(hit_file, 'fasta'))
self.assertEqual(len(records), 1, 'Wrong number of hit records')
expected_records = SeqIO.to_dict(
SeqIO.parse(path.join(self.test_data_dir, 'beta-lactam-blaIMP-42-mut-2.fsa'), 'fasta'))
logger.debug("expected_seq=%s", expected_records['blaIMP-42_1_AB753456'].seq)
logger.debug("actual_seq=%s", records['blaIMP-42_1_AB753456'].seq)
self.assertEqual(expected_records['blaIMP-42_1_AB753456'].seq, records['blaIMP-42_1_AB753456'].seq,
"records don't match")
示例9: testResfinderBetaLactamDelMiddleSuccess
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def testResfinderBetaLactamDelMiddleSuccess(self):
file = path.join(self.test_data_dir, "beta-lactam-blaIMP-42-del-middle.fsa")
files = [file]
self.amr_detection.run_amr_detection(files, 99, 91, 90, 90,0,0,0,0,0)
resfinder_results = self.amr_detection.get_resfinder_results()
self.assertEqual(len(resfinder_results.index), 1, 'Wrong number of rows in result')
result = resfinder_results[resfinder_results['Gene'] == 'blaIMP-42']
self.assertEqual(len(result.index), 1, 'Wrong number of results detected')
self.assertAlmostEqual(result['%Identity'].iloc[0], 99.33, places=2, msg='Wrong pid')
self.assertAlmostEqual(result['%Overlap'].iloc[0], 100.00, places=2, msg='Wrong overlap')
self.assertEqual(result['HSP Length/Total Length'].iloc[0], '741/741', msg='Wrong lengths')
hit_file = path.join(self.outdir.name, 'resfinder_beta-lactam-blaIMP-42-del-middle.fsa')
records = SeqIO.to_dict(SeqIO.parse(hit_file, 'fasta'))
self.assertEqual(len(records), 1, 'Wrong number of hit records')
expected_records = SeqIO.to_dict(
SeqIO.parse(path.join(self.test_data_dir, 'resfinder_beta-lactam-blaIMP-42-del-middle.fsa'), 'fasta'))
logger.debug("expected_seq=%s", expected_records['blaIMP-42_1_AB753456'].seq)
logger.debug("actual_seq=%s", records['blaIMP-42_1_AB753456'].seq)
self.assertEqual(expected_records['blaIMP-42_1_AB753456'].seq, records['blaIMP-42_1_AB753456'].seq,
"records don't match")
示例10: testResfinderBetaLactamInsMiddleSuccess
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def testResfinderBetaLactamInsMiddleSuccess(self):
file = path.join(self.test_data_dir, "beta-lactam-blaIMP-42-ins-middle.fsa")
files = [file]
self.amr_detection.run_amr_detection(files, 97, 99, 99, 90,0,0,0,0,0)
resfinder_results = self.amr_detection.get_resfinder_results()
self.assertEqual(len(resfinder_results.index), 1, 'Wrong number of rows in result')
result = resfinder_results[resfinder_results['Gene'] == 'blaIMP-42']
self.assertEqual(len(result.index), 1, 'Wrong number of results detected')
self.assertAlmostEqual(result['%Identity'].iloc[0], 98.14, places=2, msg='Wrong pid')
self.assertAlmostEqual(result['%Overlap'].iloc[0], 101.62, places=2, msg='Wrong overlap')
self.assertEqual(result['HSP Length/Total Length'].iloc[0], '753/741', msg='Wrong lengths')
hit_file = path.join(self.outdir.name, 'resfinder_beta-lactam-blaIMP-42-ins-middle.fsa')
records = SeqIO.to_dict(SeqIO.parse(hit_file, 'fasta'))
self.assertEqual(len(records), 1, 'Wrong number of hit records')
expected_records = SeqIO.to_dict(
SeqIO.parse(path.join(self.test_data_dir, 'beta-lactam-blaIMP-42-ins-middle.fsa'), 'fasta'))
logger.debug("expected_seq=%s", expected_records['blaIMP-42_1_AB753456'].seq)
logger.debug("actual_seq=%s", records['blaIMP-42_1_AB753456'].seq)
self.assertEqual(expected_records['blaIMP-42_1_AB753456'].seq.upper(), records['blaIMP-42_1_AB753456'].seq,
"records don't match")
示例11: testResfinderExcludeNonMatches
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def testResfinderExcludeNonMatches(self):
amr_detection = AMRDetectionResistance(self.resfinder_database, self.resfinder_drug_table, self.blast_handler,
self.pointfinder_drug_table, self.pointfinder_database,
include_negative_results=False, output_dir=self.outdir.name)
file_beta_lactam = path.join(self.test_data_dir, "beta-lactam-blaIMP-42-mut-2.fsa")
file_non_match = path.join(self.test_data_dir, "non-match.fsa")
files = [file_beta_lactam, file_non_match]
amr_detection.run_amr_detection(files, 99, 90, 90, 90,0,0,0,0,0)
summary_results = amr_detection.get_summary_results()
self.assertEqual(len(summary_results.index), 1, 'Wrong number of rows in result')
hit_file = path.join(self.outdir.name, 'resfinder_beta-lactam-blaIMP-42-mut-2.fsa')
records = SeqIO.to_dict(SeqIO.parse(hit_file, 'fasta'))
self.assertEqual(len(records), 1, 'Wrong number of hit records')
expected_records = SeqIO.to_dict(SeqIO.parse(file_beta_lactam, 'fasta'))
self.assertEqual(expected_records['blaIMP-42_1_AB753456'].seq, records['blaIMP-42_1_AB753456'].seq,
"records don't match")
示例12: testPlasmidfinderNameSuccess
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def testPlasmidfinderNameSuccess(self):
file = path.join(self.test_data_dir, "test-plasmids-seq.fsa")
files = [file]
self.amr_detection.run_amr_detection(files, 99, 90, 90, 90,0,0,0,0,0)
plasmidfinder_results = self.amr_detection.get_plasmidfinder_results()
self.assertEqual(len(plasmidfinder_results.index), 1, 'Wrong number of rows in result')
result = plasmidfinder_results[plasmidfinder_results['Plasmid'] == "IncW"]
self.assertEqual(len(result.index), 1, 'Wrong number of results detected')
self.assertAlmostEqual(result['%Identity'].iloc[0], 100.00, places=2, msg='Wrong pid')
self.assertAlmostEqual(result['%Overlap'].iloc[0], 100.00, places=2, msg='Wrong overlap')
self.assertEqual(result['Accession'].iloc[0], 'EF633507', msg='Wrong accession')
self.assertEqual(result['HSP Length/Total Length'].iloc[0], '243/243', msg='Wrong lengths')
hit_file = path.join(self.outdir.name, 'plasmidfinder_test-plasmids-seq.fsa')
records = SeqIO.to_dict(SeqIO.parse(hit_file, 'fasta'))
self.assertEqual(len(records), 1, 'Wrong number of hit records')
expected_records = SeqIO.to_dict(SeqIO.parse(file, 'fasta'))
self.assertEqual(expected_records['IncW_1__EF633507'].seq, records['IncW_1__EF633507'].seq,
"records don't match")
示例13: checkPsvs
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def checkPsvs(df):
print("Checking if PSVs appear in assemblies", file=sys.stderr)
from Bio import SeqIO
recs = SeqIO.to_dict(SeqIO.parse(args.check, "fasta"))
groups = df.groupby(by ="ccid")
for name, group in groups:
# skip if we cannot find fasta entry
if(name not in recs):
continue
rec = recs[name]
for idx, row in group.iterrows():
pos = row["qpos"]
alt = row["truealt"]
recalt = rec.seq[pos].upper()
#print(alt, recalt, pos, name, file=sys.stderr)
assert alt == recalt, "PSV called inccorectly at {}:{}, {} instead of {}".format(name,pos, alt, recalt)
示例14: get_ctxnum
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def get_ctxnum(reffile):
"""
Get the number of CG/CHG/CHH from a reference genome FASTA file
"""
with open(reffile) as infile:
fasta = SeqIO.to_dict(SeqIO.parse(infile, 'fasta'))
for chr in fasta:
fasta[chr] = str(fasta[chr].seq).upper()
num_cg = 0
num_chg = 0
num_chh = 0
for chr in fasta:
num_cg += len([match.start() for match in re.finditer(r'(?=(CG))', fasta[chr])])
num_cg += len([match.start()-1 for match in re.finditer(r'(?<=(CG))', fasta[chr])])
num_chg += len([match.start() for match in re.finditer(r'(?=(C[ACT]G))', fasta[chr])])
num_chg += len([match.start()-1 for match in re.finditer(r'(?<=(C[AGT]G))', fasta[chr])])
num_chh += len([match.start() for match in re.finditer(r'(?=(C[ACT][ACT]))', fasta[chr])])
num_chh += len([match.start()-1 for match in re.finditer(r'(?<=([AGT][AGT]G))', fasta[chr])])
return num_cg, num_chg, num_chh
示例15: __init__
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import to_dict [as 别名]
def __init__(self, seq_table, records, max_dist, min_fold, threshold_pval, log=None):
'''
seq_table: pandas.DataFrame
Samples on the columns; sequences on the rows
records: index of Bio.Seq
Indexed, unaligned input sequences. This could come from BioPython's
SeqIO.to_dict or SeqIO.index.
max_dist: float
genetic distance cutoff above which a sequence will not be merged into an OTU
min_fold: float
Multiply the sequence's abundance by this fold to get the minimum abundance
of an OTU for merging
threshold_pval: float
P-value below which a sequence will not be merged into an OTU
log: filehandle
Log file reporting the abundance, genetic, and distribution checks.
'''
self.seq_table = seq_table
self.records = records
self.max_dist = max_dist
self.min_fold = min_fold
self.threshold_pval = threshold_pval
self.log = log
# get a list of the names of the sequences in order of their (decreasing) abundance
self.seq_abunds = self.seq_table.sum(axis=1).sort_values(ascending=False)
# check that all sequence IDs in the table are in the fasta
missing_ids = [seq_id for seq_id in self.seq_abunds.index if seq_id not in self.records]
if len(missing_ids) > 0:
raise RuntimeError("{} sequence IDs found in the sequence table but not in the fasta: {}".format(len(missing_ids), missing_ids))
# initialize OTU information
self.membership = {}
self.otus = []