本文整理汇总了Python中skbio.core.alignment.SequenceCollection.from_fasta_records方法的典型用法代码示例。如果您正苦于以下问题:Python SequenceCollection.from_fasta_records方法的具体用法?Python SequenceCollection.from_fasta_records怎么用?Python SequenceCollection.from_fasta_records使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类skbio.core.alignment.SequenceCollection
的用法示例。
在下文中一共展示了SequenceCollection.from_fasta_records方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_split_fasta_diff_num_seqs_per_file
# 需要导入模块: from skbio.core.alignment import SequenceCollection [as 别名]
# 或者: from skbio.core.alignment.SequenceCollection import from_fasta_records [as 别名]
def test_split_fasta_diff_num_seqs_per_file(self):
"""split_fasta funcs as expected when diff num seqs go to each file
"""
fd, filename_prefix = mkstemp(dir=get_qiime_temp_dir(),
prefix='split_fasta_tests',
suffix='')
close(fd)
infile = ['>seq1', 'AACCTTAA', '>seq2', 'TTAACC', 'AATTAA',
'>seq3', 'CCTT--AA']
actual = split_fasta(infile, 2, filename_prefix)
actual_seqs = []
for fp in actual:
actual_seqs += list(open(fp))
remove_files(actual)
expected = ['%s.%d.fasta' % (filename_prefix, i) for i in range(2)]
# list of file paths is as expected
self.assertEqual(actual, expected)
# building seq collections from infile and the split files result in
# equivalent seq collections
self.assertEqual(
SequenceCollection.from_fasta_records(parse_fasta(infile), DNA),
SequenceCollection.from_fasta_records(parse_fasta(actual_seqs), DNA))
示例2: test_split_fasta_diff_num_seqs_per_file_alt
# 需要导入模块: from skbio.core.alignment import SequenceCollection [as 别名]
# 或者: from skbio.core.alignment.SequenceCollection import from_fasta_records [as 别名]
def test_split_fasta_diff_num_seqs_per_file_alt(self):
"""split_fasta funcs always catches all seqs
"""
# start with 59 seqs (b/c it's prime, so should make more
# confusing splits)
in_seqs = SequenceCollection.from_fasta_records(
[('seq%s' % k, 'AACCTTAA') for k in range(59)], DNA)
infile = in_seqs.to_fasta().split('\n')
# test seqs_per_file from 1 to 1000
for i in range(1, 1000):
fd, filename_prefix = mkstemp(dir=get_qiime_temp_dir(),
prefix='split_fasta_tests',
suffix='')
close(fd)
actual = split_fasta(infile, i, filename_prefix)
actual_seqs = []
for fp in actual:
actual_seqs += list(open(fp))
# remove the files now, so if the test fails they still get
# cleaned up
remove_files(actual)
# building seq collections from infile and the split files result in
# equivalent seq collections
self.assertEqual(
SequenceCollection.from_fasta_records(parse_fasta(infile), DNA),
SequenceCollection.from_fasta_records(parse_fasta(actual_seqs), DNA))
示例3: test_degap
# 需要导入模块: from skbio.core.alignment import SequenceCollection [as 别名]
# 或者: from skbio.core.alignment.SequenceCollection import from_fasta_records [as 别名]
def test_degap(self):
"""degap functions as expected
"""
expected = [(id_, seq.replace('.', '').replace('-', ''))
for id_, seq in self.seqs1_t]
expected = SequenceCollection.from_fasta_records(expected, DNASequence)
actual = self.a1.degap()
self.assertEqual(actual, expected)
expected = [(id_, seq.replace('.', '').replace('-', ''))
for id_, seq in self.seqs2_t]
expected = SequenceCollection.from_fasta_records(expected, RNASequence)
actual = self.a2.degap()
self.assertEqual(actual, expected)
示例4: filter_samples
# 需要导入模块: from skbio.core.alignment import SequenceCollection [as 别名]
# 或者: from skbio.core.alignment.SequenceCollection import from_fasta_records [as 别名]
def filter_samples(prefs, data, dir_path='', filename=None):
"""processes the filtering of the otus file and representative seq set, then
writes filtered otus and filtered representative seq set files"""
aln = data['aln']
otus = data['otus']
# filter the otus file based on which samples to remove
new_otus_list = filter_otus(otus, prefs)
filtered_otus_output_filepath = '%s/%s_sfiltered_otus.txt' \
% (dir_path, filename)
filtered_otus_output_filepath = open(filtered_otus_output_filepath, 'w')
# Write out a new otus file
for key in (new_otus_list):
filtered_otus_output_filepath.write(key[0])
for j in key[1]:
filtered_otus_output_filepath.write('\t' + str(j))
filtered_otus_output_filepath.write('\n')
filtered_otus_output_filepath.close()
# filter seq set
filtered_seqs, removed_seqs = filter_aln_by_otus(aln, prefs)
# write a fasta containing list of sequences removed from
# representative set
if len(removed_seqs) > 0:
removed_seqs = SequenceCollection.from_fasta_records(
[(e[0], str(e[1])) for e in removed_seqs], DNA)
else:
raise ValueError(
'No sequences were removed. Did you specify the correct Sample ID?')
output_filepath2 = '%s/%s_sremoved.fasta' % (dir_path, filename)
output_file2 = open(output_filepath2, 'w')
output_file2.write(removed_seqs.to_fasta())
output_file2.close()
# write a fasta containing the filtered representative seqs
if len(filtered_seqs) > 0:
filtered_seqs = SequenceCollection.from_fasta_records(
[(e[0], str(e[1])) for e in filtered_seqs], DNA)
else:
raise ValueError(
'No sequences were remaining in the fasta file. Did you remove all Sample ID\'s?')
output_filepath = '%s/%s_sfiltered.fasta' % (dir_path, filename)
output_file = open(output_filepath, 'w')
output_file.write(filtered_seqs.to_fasta())
output_file.close()
示例5: test_call_write_to_file
# 需要导入模块: from skbio.core.alignment import SequenceCollection [as 别名]
# 或者: from skbio.core.alignment.SequenceCollection import from_fasta_records [as 别名]
def test_call_write_to_file(self):
"""ReferenceRepSetPicker.__call__ otu map correctly written to file"""
app = ReferenceRepSetPicker(params={'Algorithm': 'first',
'ChoiceF': first_id})
app(self.tmp_seq_filepath,
self.tmp_otu_filepath,
self.ref_seq_filepath,
result_path=self.result_filepath)
with open(self.result_filepath) as f:
actual = SequenceCollection.from_fasta_records(parse_fasta(f), DNA)
expected = SequenceCollection.from_fasta_records(
parse_fasta(rep_seqs_reference_result_file_exp.split('\n')), DNA)
# we don't care about order in the results
self.assertEqual(set(actual), set(expected))
示例6: setUp
# 需要导入模块: from skbio.core.alignment import SequenceCollection [as 别名]
# 或者: from skbio.core.alignment.SequenceCollection import from_fasta_records [as 别名]
def setUp(self):
fd, self.pynast_test1_input_fp = mkstemp(prefix="PyNastAlignerTests_", suffix=".fasta")
close(fd)
with open(self.pynast_test1_input_fp, "w") as f:
f.write(pynast_test1_input_fasta)
fd, self.pynast_test1_template_fp = mkstemp(prefix="PyNastAlignerTests_", suffix="template.fasta")
close(fd)
with open(self.pynast_test1_template_fp, "w") as f:
f.write(pynast_test1_template_fasta)
fd, self.pynast_test_template_w_dots_fp = mkstemp(prefix="PyNastAlignerTests_", suffix="template.fasta")
close(fd)
with open(self.pynast_test_template_w_dots_fp, "w") as f:
f.write(pynast_test1_template_fasta.replace("-", "."))
fd, self.pynast_test_template_w_u_fp = mkstemp(prefix="PyNastAlignerTests_", suffix="template.fasta")
close(fd)
with open(self.pynast_test_template_w_u_fp, "w") as f:
f.write(pynast_test1_template_fasta.replace("T", "U"))
fd, self.pynast_test_template_w_lower_fp = mkstemp(prefix="PyNastAlignerTests_", suffix="template.fasta")
close(fd)
with open(self.pynast_test_template_w_lower_fp, "w") as f:
f.write(pynast_test1_template_fasta.lower())
# create temp file names (and touch them so we can reliably
# clean them up)
fd, self.result_fp = mkstemp(prefix="PyNastAlignerTests_", suffix=".fasta")
close(fd)
open(self.result_fp, "w").close()
fd, self.failure_fp = mkstemp(prefix="PyNastAlignerTests_", suffix=".fasta")
close(fd)
open(self.failure_fp, "w").close()
fd, self.log_fp = mkstemp(prefix="PyNastAlignerTests_", suffix=".log")
close(fd)
open(self.log_fp, "w").close()
self._paths_to_clean_up = [
self.pynast_test1_input_fp,
self.result_fp,
self.failure_fp,
self.log_fp,
self.pynast_test1_template_fp,
self.pynast_test_template_w_dots_fp,
self.pynast_test_template_w_u_fp,
self.pynast_test_template_w_lower_fp,
]
self.pynast_test1_aligner = PyNastAligner({"template_filepath": self.pynast_test1_template_fp, "min_len": 15})
self.pynast_test1_expected_aln = Alignment.from_fasta_records(parse_fasta(pynast_test1_expected_alignment), DNA)
self.pynast_test1_expected_fail = SequenceCollection.from_fasta_records(
parse_fasta(pynast_test1_expected_failure), DNA
)
示例7: test_split_fasta_equal_num_seqs_per_file
# 需要导入模块: from skbio.core.alignment import SequenceCollection [as 别名]
# 或者: from skbio.core.alignment.SequenceCollection import from_fasta_records [as 别名]
def test_split_fasta_equal_num_seqs_per_file(self):
"""split_fasta funcs as expected when equal num seqs go to each file
"""
fd, filename_prefix = mkstemp(dir=get_qiime_temp_dir(),
prefix='split_fasta_tests',
suffix='')
close(fd)
infile = ['>seq1', 'AACCTTAA', '>seq2', 'TTAACC', 'AATTAA',
'>seq3', 'CCTT--AA']
actual = split_fasta(infile, 1, filename_prefix)
actual_seqs = []
for fp in actual:
actual_seqs += list(open(fp))
remove_files(actual)
expected = ['%s.%d.fasta' % (filename_prefix, i) for i in range(3)]
self.assertEqual(actual, expected)
self.assertEqual(
SequenceCollection.from_fasta_records(parse_fasta(infile), DNA),
SequenceCollection.from_fasta_records(parse_fasta(actual_seqs), DNA))
示例8: test_filter_aln_by_otus
# 需要导入模块: from skbio.core.alignment import SequenceCollection [as 别名]
# 或者: from skbio.core.alignment.SequenceCollection import from_fasta_records [as 别名]
def test_filter_aln_by_otus(self):
"""filter_aln_by_otus: determines which sequences to keep and which
sequences to remove"""
self.sample_to_extract = "SampleA,SampleB"
exp1 = []
exp1.append(("SampleA", "AAAAAAAAAAAAAAA"))
exp2 = []
exp2.append(("SampleB", "CCCCCCC"))
exp2.append(("SampleC", "GGGGGGGGGGGGGG"))
aln = SequenceCollection.from_fasta_records(self.aln, DNA)
obs1, obs2 = filter_aln_by_otus(aln, self.prefs)
self.assertEqual(obs1, exp1)
self.assertEqual(obs2, exp2)
示例9: main
# 需要导入模块: from skbio.core.alignment import SequenceCollection [as 别名]
# 或者: from skbio.core.alignment.SequenceCollection import from_fasta_records [as 别名]
def main():
"""opens files as necessary based on prefs"""
option_parser, opts, args = parse_command_line_parameters(**script_info)
data = {}
fasta_file = opts.input_fasta_fp
# load the input alignment
data['aln'] = SequenceCollection.from_fasta_records(
parse_fasta(open(fasta_file)), DNA)
# Load the otu file
otu_path = opts.otu_map_fp
otu_f = open(otu_path, 'U')
otus = fields_to_dict(otu_f)
otu_f.close()
data['otus'] = otus
# Determine which which samples to extract from representative seqs
# and from otus file
if opts.samples_to_extract:
prefs = process_extract_samples(opts.samples_to_extract)
filepath = opts.input_fasta_fp
filename = filepath.strip().split('/')[-1]
filename = filename.split('.')[0]
if opts.output_dir:
if os.path.exists(opts.output_dir):
dir_path = opts.output_dir
else:
try:
os.mkdir(opts.output_dir)
dir_path = opts.output_dir
except OSError:
pass
else:
dir_path = './'
try:
action = filter_samples
except NameError:
action = None
# Place this outside try/except so we don't mask NameError in action
if action:
action(prefs, data, dir_path, filename)
示例10: test_call_pynast_test1_file_output_alt_params
# 需要导入模块: from skbio.core.alignment import SequenceCollection [as 别名]
# 或者: from skbio.core.alignment.SequenceCollection import from_fasta_records [as 别名]
def test_call_pynast_test1_file_output_alt_params(self):
"""PyNastAligner writes correct output files when no seqs align
"""
aligner = PyNastAligner({"template_filepath": self.pynast_test1_template_fp, "min_len": 1000})
actual = aligner(
self.pynast_test1_input_fp, result_path=self.result_fp, log_path=self.log_fp, failure_path=self.failure_fp
)
self.assertTrue(actual is None, "Result should be None when result path provided.")
self.assertEqual(getsize(self.result_fp), 0, "No alignable seqs should result in an empty file.")
# all seqs reported to fail
with open(self.failure_fp) as failure_f:
actual_fail = SequenceCollection.from_fasta_records(parse_fasta(failure_f), DNA)
self.assertEqual(actual_fail.sequence_count(), 3)
示例11: test_call_pynast_test1_file_output
# 需要导入模块: from skbio.core.alignment import SequenceCollection [as 别名]
# 或者: from skbio.core.alignment.SequenceCollection import from_fasta_records [as 别名]
def test_call_pynast_test1_file_output(self):
"""PyNastAligner writes correct output files for pynast_test1 seqs
"""
# do not collect results; check output files instead
actual = self.pynast_test1_aligner(
self.pynast_test1_input_fp, result_path=self.result_fp, log_path=self.log_fp, failure_path=self.failure_fp
)
self.assertTrue(actual is None, "Result should be None when result path provided.")
expected_aln = self.pynast_test1_expected_aln
with open(self.result_fp) as result_f:
actual_aln = Alignment.from_fasta_records(parse_fasta(result_f), DNA)
self.assertEqual(actual_aln, expected_aln)
with open(self.failure_fp) as failure_f:
actual_fail = SequenceCollection.from_fasta_records(parse_fasta(failure_f), DNA)
self.assertEqual(actual_fail.to_fasta(), self.pynast_test1_expected_fail.to_fasta())
示例12: test_from_fasta_records
# 需要导入模块: from skbio.core.alignment import SequenceCollection [as 别名]
# 或者: from skbio.core.alignment.SequenceCollection import from_fasta_records [as 别名]
def test_from_fasta_records(self):
"""Initialization from list of tuples functions as expected
"""
SequenceCollection.from_fasta_records(self.seqs1_t, DNASequence)
SequenceCollection.from_fasta_records(self.seqs2_t, RNASequence)
SequenceCollection.from_fasta_records(self.seqs3_t, NucleotideSequence)
示例13: __call__
# 需要导入模块: from skbio.core.alignment import SequenceCollection [as 别名]
# 或者: from skbio.core.alignment.SequenceCollection import from_fasta_records [as 别名]
def __call__(self, seq_path, result_path=None, log_path=None,
failure_path=None, cmbuild_params=None, cmalign_params=None):
log_params = []
# load candidate sequences
candidate_sequences = dict(parse_fasta(open(seq_path, 'U')))
# load template sequences
try:
info, template_alignment, struct = list(MinimalRfamParser(open(
self.Params['template_filepath'], 'U'),
seq_constructor=ChangedSequence))[0]
except RecordError:
raise ValueError(
"Template alignment must be in Stockholm format with corresponding secondary structure annotation when using InfernalAligner.")
moltype = self.Params['moltype']
# Need to make separate mapping for unaligned sequences
unaligned = SequenceCollection.from_fasta_records(
candidate_sequences.iteritems(), DNASequence)
mapped_seqs, new_to_old_ids = unaligned.int_map(prefix='unaligned_')
mapped_seq_tuples = [(k, str(v)) for k,v in mapped_seqs.iteritems()]
# Turn on --gapthresh option in cmbuild to force alignment to full
# model
if cmbuild_params is None:
cmbuild_params = {}
cmbuild_params.update({'--gapthresh': 1.0})
# record cmbuild parameters
log_params.append('cmbuild parameters:')
log_params.append(str(cmbuild_params))
# Turn on --sub option in Infernal, since we know the unaligned sequences
# are fragments.
# Also turn on --gapthresh to use same gapthresh as was used to build
# model
if cmalign_params is None:
cmalign_params = {}
cmalign_params.update({'--sub': True, '--gapthresh': 1.0})
# record cmalign parameters
log_params.append('cmalign parameters:')
log_params.append(str(cmalign_params))
# Align sequences to alignment including alignment gaps.
aligned, struct_string = cmalign_from_alignment(aln=template_alignment,
structure_string=struct,
seqs=mapped_seq_tuples,
moltype=moltype,
include_aln=True,
params=cmalign_params,
cmbuild_params=cmbuild_params)
# Pull out original sequences from full alignment.
infernal_aligned = []
# Get a dict of the identifiers to sequences (note that this is a
# cogent alignment object, hence the call to NamedSeqs)
aligned_dict = aligned.NamedSeqs
for n, o in new_to_old_ids.iteritems():
aligned_seq = aligned_dict[n]
infernal_aligned.append((o, aligned_seq))
# Create an Alignment object from alignment dict
infernal_aligned = Alignment.from_fasta_records(infernal_aligned, DNASequence)
if log_path is not None:
log_file = open(log_path, 'w')
log_file.write('\n'.join(log_params))
log_file.close()
if result_path is not None:
result_file = open(result_path, 'w')
result_file.write(infernal_aligned.to_fasta())
result_file.close()
return None
else:
try:
return infernal_aligned
except ValueError:
return {}
示例14: setUp
# 需要导入模块: from skbio.core.alignment import SequenceCollection [as 别名]
# 或者: from skbio.core.alignment.SequenceCollection import from_fasta_records [as 别名]
def setUp(self):
fd, self.pynast_test1_input_fp = mkstemp(
prefix='PyNastAlignerTests_', suffix='.fasta')
close(fd)
with open(self.pynast_test1_input_fp, 'w') as f:
f.write(pynast_test1_input_fasta)
fd, self.pynast_test1_template_fp = mkstemp(
prefix='PyNastAlignerTests_', suffix='template.fasta')
close(fd)
with open(self.pynast_test1_template_fp, 'w') as f:
f.write(pynast_test1_template_fasta)
fd, self.pynast_test_template_w_dots_fp = mkstemp(
prefix='PyNastAlignerTests_', suffix='template.fasta')
close(fd)
with open(self.pynast_test_template_w_dots_fp, 'w') as f:
f.write(pynast_test1_template_fasta.replace('-', '.'))
fd, self.pynast_test_template_w_u_fp = mkstemp(
prefix='PyNastAlignerTests_', suffix='template.fasta')
close(fd)
with open(self.pynast_test_template_w_u_fp, 'w') as f:
f.write(pynast_test1_template_fasta.replace('T', 'U'))
fd, self.pynast_test_template_w_lower_fp = mkstemp(
prefix='PyNastAlignerTests_', suffix='template.fasta')
close(fd)
with open(self.pynast_test_template_w_lower_fp, 'w') as f:
f.write(pynast_test1_template_fasta.lower())
# create temp file names (and touch them so we can reliably
# clean them up)
fd, self.result_fp = mkstemp(
prefix='PyNastAlignerTests_', suffix='.fasta')
close(fd)
open(self.result_fp, 'w').close()
fd, self.failure_fp = mkstemp(
prefix='PyNastAlignerTests_', suffix='.fasta')
close(fd)
open(self.failure_fp, 'w').close()
fd, self.log_fp = mkstemp(
prefix='PyNastAlignerTests_', suffix='.log')
close(fd)
open(self.log_fp, 'w').close()
self._paths_to_clean_up = [
self.pynast_test1_input_fp,
self.result_fp,
self.failure_fp,
self.log_fp,
self.pynast_test1_template_fp,
self.pynast_test_template_w_dots_fp,
self.pynast_test_template_w_u_fp,
self.pynast_test_template_w_lower_fp
]
self.pynast_test1_aligner = PyNastAligner({
'template_filepath': self.pynast_test1_template_fp,
'min_len': 15,
})
self.pynast_test1_expected_aln = Alignment.from_fasta_records(
parse_fasta(pynast_test1_expected_alignment),
DNA)
self.pynast_test1_expected_fail = SequenceCollection.from_fasta_records(
parse_fasta(pynast_test1_expected_failure), DNA)
示例15:
# 需要导入模块: from skbio.core.alignment import SequenceCollection [as 别名]
# 或者: from skbio.core.alignment.SequenceCollection import from_fasta_records [as 别名]
tggctcagattgaacgctggcggcaggcctaacacatgcaagtcgagcggaaacgantnntntgaaccttcggggnacgatnacggcgtcgagcggcggacgggtgagtaatgcctgggaaattgccctgatgtgggggataactattggaaacgatagctaataccgcataatgtctacggaccaaagagggggaccttcgggcctctcgcttcaggatatgcccaggtgggattagctagttggtgaggtaatggctcaccaaggcgacgatccctagctggtctgagaggatgatcagccacactggaactgag
"""
blast_id_to_taxonomy = \
"""AY800210\tArchaea;Euryarchaeota;Halobacteriales;uncultured
EU883771\tArchaea;Euryarchaeota;Methanomicrobiales;Methanomicrobium et rel.
EF503699\tArchaea;Crenarchaeota;uncultured;uncultured
DQ260310\tArchaea;Euryarchaeota;Methanobacteriales;Methanobacterium
EF503697\tArchaea;Crenarchaeota;uncultured;uncultured"""
blast_test_seqs = SequenceCollection.from_fasta_records([
('s1',
'TTCCGGTTGATCCTGCCGGACCCGACTGCTATCCGGATGCGACTAAGCCATGCTAGTCTAACGGATCTTCGGATCCGTGGCATACCGCTCTGTAACACGTAGATAACCTACCCTGAGGTCGGGGAAACTCCCGGGAAACTGGGCCTAATCCCCGATAGATAATTTGTACTGGAATGTCTTTTTATTGAAACCTCCGAGGCCTCAGGATGGGTCTGCGCCAGATTATGGTCGTAGGTGGGGTAACGGCCCACCTAGCCTTTGATCTGTACCGGACATGAGAGTGTGTGCCGGGAGATGGCCACTGAGACAAGGGGCCAGGCCCTACGGGGCGCAGCAGGCGCGAAAACTTCACAATGCCCGCAAGGGTGATGAGGGTATCCGAGTGCTACCTTAGCCGGTAGCTTTTATTCAGTGTAAATAGCTAGATGAATAAGGGGAGGGCAAGGCTGGTGCCAGCCGCCGCGGTAAAACCAGCTCCCGAGTGGTCGGGATTTTTATTGGGCCTAAAGCGTCCGTAGCCGGGCGTGCAAGTCATTGGTTAAATATCGGGTCTTAAGCCCGAACCTGCTAGTGATACTACACGCCTTGGGACCGGAAGAGGCAAATGGTACGTTGAGGGTAGGGGTGAAATCCTGTAATCCCCAACGGACCACCGGTGGCGAAGCTTGTTCAGTCATGAACAACTCTACACAAGGCGATTTGCTGGGACGGATCCGACGGTGAGGGACGAAACCCAGGGGAGCGAGCGGGATTAGATACCCCGGTAGTCCTGGGCGTAAACGATGCGAACTAGGTGTTGGCGGAGCCACGAGCTCTGTCGGTGCCGAAGCGAAGGCGTTAAGTTCGCCGCCAGGGGAGTACGGCCGCAAGGCTGAAACTTAAAGGAATTGGCGGGGGAGCAC'),
('s2',
'TGGCGTACGGCTCAGTAACACGTGGATAACTTACCCTTAGGACTGGGATAACTCTGGGAAACTGGGGATAATACTGGATATTAGGCTATGCCTGGAATGGTTTGCCTTTGAAATGTTTTTTTTCGCCTAAGGATAGGTCTGCGGCTGATTAGGTCGTTGGTGGGGTAATGGCCCACCAAGCCGATGATCGGTACGGGTTGTGAGAGCAAGGGCCCGGAGATGGAACCTGAGACAAGGTTCCAGACCCTACGGGGTGCAGCAGGCGCGAAACCTCCGCAATGTACGAAAGTGCGACGGGGGGATCCCAAGTGTTATGCTTTTTTGTATGACTTTTCATTAGTGTAAAAAGCTTTTAGAATAAGAGCTGGGCAAGACCGGTGCCAGCCGCCGCGGTAACACCGGCAGCTCGAGTGGTGACCACTTTTATTGGGCTTAAAGCGTTCGTAGCTTGATTTTTAAGTCTCTTGGGAAATCTCACGGCTTAACTGTGAGGCGTCTAAGAGATACTGGGAATCTAGGGACCGGGAGAGGTAAGAGGTACTTCAGGGGTAGAAGTGAAATTCTGTAATCCTTGAGGGACCACCGATGGCGAAGGCATCTTACCAGAACGGCTTCGACAGTGAGGAACGAAAGCTGGGGGAGCGAACGGGATTAGATACCCCGGTAGTCCCAGCCGTAAACTATGCGCGTTAGGTGTGCCTGTAACTACGAGTTACCGGGGTGCCGAAGTGAAAACGTGAAACGTGCCGCCTGGGAAGTACGGTCGCAAGGCTGAAACTTAAAGGAATTGGCGGGGGAGCACCACAACGGGTGGAGCCTGCGGTTTAATTGGACTCAACGCCGGGCAGCTCACCGGATAGGACAGCGGAATGATAGCCGGGCTGAAGACCTTGCTTGACCAGCTGAGA'),
('s3',
'AAGAATGGGGATAGCATGCGAGTCACGCCGCAATGTGTGGCATACGGCTCAGTAACACGTAGTCAACATGCCCAGAGGACGTGGACACCTCGGGAAACTGAGGATAAACCGCGATAGGCCACTACTTCTGGAATGAGCCATGACCCAAATCTATATGGCCTTTGGATTGGACTGCGGCCGATCAGGCTGTTGGTGAGGTAATGGCCCACCAAACCTGTAACCGGTACGGGCTTTGAGAGAAGGAGCCCGGAGATGGGCACTGAGACAAGGGCCCAGGCCCTATGGGGCGCAGCAGGCACGAAACCTCTGCAATAGGCGAAAGCTTGACAGGGTTACTCTGAGTGATGCCCGCTAAGGGTATCTTTTGGCACCTCTAAAAATGGTGCAGAATAAGGGGTGGGCAAGTCTGGTGTCAGCCGCCGCGGTAATACCAGCACCCCGAGTTGTCGGGACGATTATTGGGCCTAAAGCATCCGTAGCCTGTTCTGCAAGTCCTCCGTTAAATCCACCCGCTTAACGGATGGGCTGCGGAGGATACTGCAGAGCTAGGAGGCGGGAGAGGCAAACGGTACTCAGTGGGTAGGGGTAAAATCCTTTGATCTACTGAAGACCACCAGTGGTGAAGGCGGTTCGCCAGAACGCGCTCGAACGGTGAGGATGAAAGCTGGGGGAGCAAACCGGAATAGATACCCGAGTAATCCCAACTGTAAACGATGGCAACTCGGGGATGGGTTGGCCTCCAACCAACCCCATGGCCGCAGGGAAGCCGTTTAGCTCTCCCGCCTGGGGAATACGGTCCGCAGAATTGAACCTTAAAGGAATTTGGCGGGGAACCCCCACAAGGGGGAAAACCGTGCGGTTCAATTGGAATCCACCCCCCGGAAACTTTACCCGGGCGCG'),
('s4',
'GATACCCCCGGAAACTGGGGATTATACCGGATATGTGGGGCTGCCTGGAATGGTACCTCATTGAAATGCTCCCGCGCCTAAAGATGGATCTGCCGCAGAATAAGTAGTTTGCGGGGTAAATGGCCACCCAGCCAGTAATCCGTACCGGTTGTGAAAACCAGAACCCCGAGATGGAAACTGAAACAAAGGTTCAAGGCCTACCGGGCACAACAAGCGCCAAAACTCCGCCATGCGAGCCATCGCGACGGGGGAAAACCAAGTACCACTCCTAACGGGGTGGTTTTTCCGAAGTGGAAAAAGCCTCCAGGAATAAGAACCTGGGCCAGAACCGTGGCCAGCCGCCGCCGTTACACCCGCCAGCTCGAGTTGTTGGCCGGTTTTATTGGGGCCTAAAGCCGGTCCGTAGCCCGTTTTGATAAGGTCTCTCTGGTGAAATTCTACAGCTTAACCTGTGGGAATTGCTGGAGGATACTATTCAAGCTTGAAGCCGGGAGAAGCCTGGAAGTACTCCCGGGGGTAAGGGGTGAAATTCTATTATCCCCGGAAGACCAACTGGTGCCGAAGCGGTCCAGCCTGGAACCGAACTTGACCGTGAGTTACGAAAAGCCAAGGGGCGCGGACCGGAATAAAATAACCAGGGTAGTCCTGGCCGTAAACGATGTGAACTTGGTGGTGGGAATGGCTTCGAACTGCCCAATTGCCGAAAGGAAGCTGTAAATTCACCCGCCTTGGAAGTACGGTCGCAAGACTGGAACCTAAAAGGAATTGGCGGGGGGACACCACAACGCGTGGAGCCTGGCGGTTTTATTGGGATTCCACGCAGACATCTCACTCAGGGGCGACAGCAGAAATGATGGGCAGGTTGATGACCTTGCTTGACAAGCTGAAAAGGAGGTGCAT'),
('s5',
'TAAAATGACTAGCCTGCGAGTCACGCCGTAAGGCGTGGCATACAGGCTCAGTAACACGTAGTCAACATGCCCAAAGGACGTGGATAACCTCGGGAAACTGAGGATAAACCGCGATAGGCCAAGGTTTCTGGAATGAGCTATGGCCGAAATCTATATGGCCTTTGGATTGGACTGCGGCCGATCAGGCTGTTGGTGAGGTAATGGCCCACCAAACCTGTAACCGGTACGGGCTTTGAGAGAAGTAGCCCGGAGATGGGCACTGAGACAAGGGCCCAGGCCCTATGGGGCGCAGCAGGCGCGAAACCTCTGCAATAGGCGAAAGCCTGACAGGGTTACTCTGAGTGATGCCCGCTAAGGGTATCTTTTGGCACCTCTAAAAATGGTGCAGAATAAGGGGTGGGCAAGTCTGGTGTCAGCCGCCGCGGTAATACCAGCACCCCGAGTTGTCGGGACGATTATTGGGCCTAAAGCATCCGTAGCCTGTTCTGCAAGTCCTCCGTTAAATCCACCTGCTCAACGGATGGGCTGCGGAGGATACCGCAGAGCTAGGAGGCGGGAGAGGCAAACGGTACTCAGTGGGTAGGGGTAAAATCCATTGATCTACTGAAGACCACCAGTGGCGAAGGCGGTTTGCCAGAACGCGCTCGACGGTGAGGGATGAAAGCTGGGGGAGCAAACCGGATTAGATACCCGGGGTAGTCCCAGCTGTAAACGGATGCAGACTCGGGTGATGGGGTTGGCTTCCGGCCCAACCCCAATTGCCCCCAGGCGAAGCCCGTTAAGATCTTGCCGCCCTGTCAGATGTCAGGGCCGCCAATACTCGAAACCTTAAAAGGAAATTGGGCGCGGGAAAAGTCACCAAAAGGGGGTTGAAACCCTGCGGGTTATATATTGTAAACC'),
('s6', 'ATAGTAGGTGATTGCGAAGACCGCGGAACCGGGACCTAGCACCCAGCCTGTACCGAGGGATGGGGAGCTGTGGCGGTCCACCGACGACCCTTTGTGACAGCCGATTCCTACAATCCCAGCAACTGCAATGATCCACTCTAGTCGGCATAACCGGGAATCGTTAACCTGGTAGGGTTCTCTACGTCTGAGTCTACAGCCCAGAGCAGTCAGGCTACTATACGGTTTGCTGCATTGCATAGGCATCGGTCGCGGGCACTCCTCGCGGTTTCAGCTAGGGTTTAAATGGAGGGTCGCTGCATGAGTATGCAAATAGTGCCACTGCTCTGATACAGAGAAGTGTTGATATGACACCTAAGACCTGGTCACAGTTTTAACCTGCCTACGCACACCAGTGTGCTATTGATTAACGATATCGGTAGACACGACCTTGGTAACCTGACTAACCTCATGGAAAGTGACTAGATAAATGGACCGGAGCCAACTTTCACCCGGAAAACGGACCGACGAATCGTCGTAGACTACCGATCTGACAAAATAAGCACGAGGGAGCATGTTTTGCGCAGGCTAGCCTATTCCCACCTCAAGCCTCGAGAACCAAGACGCCTGATCCGGTGCTGCACGAAGGGTCGCCTCTAGGTAAGGAGAGCTGGCATCTCCAGATCCGATATTTTACCCAACCTTTGCGCGCTCAGATTGTTATAGTGAAACGATTTAAGCCTGAACGGAGTTCCGCTCCATATGTGGGTTATATATGTGAGATGTATTAACTTCCGCAGTTGTCTCTTTCGGTGCAGTACGCTTGGTATGTGTCTCAAATAATCGGTATTATAGTGATCTGAGAGGTTTTAAG')], DNA)
blast_reference_seqs = SequenceCollection.from_fasta_records([
('AY800210',
'TTCCGGTTGATCCTGCCGGACCCGACTGCTATCCGGATGCGACTAAGCCATGCTAGTCTAACGGATCTTCGGATCCGTGGCATACCGCTCTGTAACACGTAGATAACCTACCCTGAGGTCGGGGAAACTCCCGGGAAACTGGGCCTAATCCCCGATAGATAATTTGTACTGGAATGTCTTTTTATTGAAACCTCCGAGGCCTCAGGATGGGTCTGCGCCAGATTATGGTCGTAGGTGGGGTAACGGCCCACCTAGCCTTTGATCTGTACCGGACATGAGAGTGTGTGCCGGGAGATGGCCACTGAGACAAGGGGCCAGGCCCTACGGGGCGCAGCAGGCGCGAAAACTTCACAATGCCCGCAAGGGTGATGAGGGTATCCGAGTGCTACCTTAGCCGGTAGCTTTTATTCAGTGTAAATAGCTAGATGAATAAGGGGAGGGCAAGGCTGGTGCCAGCCGCCGCGGTAAAACCAGCTCCCGAGTGGTCGGGATTTTTATTGGGCCTAAAGCGTCCGTAGCCGGGCGTGCAAGTCATTGGTTAAATATCGGGTCTTAAGCCCGAACCTGCTAGTGATACTACACGCCTTGGGACCGGAAGAGGCAAATGGTACGTTGAGGGTAGGGGTGAAATCCTGTAATCCCCAACGGACCACCGGTGGCGAAGCTTGTTCAGTCATGAACAACTCTACACAAGGCGATTTGCTGGGACGGATCCGACGGTGAGGGACGAAACCCAGGGGAGCGAGCGGGATTAGATACCCCGGTAGTCCTGGGCGTAAACGATGCGAACTAGGTGTTGGCGGAGCCACGAGCTCTGTCGGTGCCGAAGCGAAGGCGTTAAGTTCGCCGCCAGGGGAGTACGGCCGCAAGGCTGAAACTTAAAGGAATTGGCGGGGGAGCAC'),
('EU883771',
'TGGCGTACGGCTCAGTAACACGTGGATAACTTACCCTTAGGACTGGGATAACTCTGGGAAACTGGGGATAATACTGGATATTAGGCTATGCCTGGAATGGTTTGCCTTTGAAATGTTTTTTTTCGCCTAAGGATAGGTCTGCGGCTGATTAGGTCGTTGGTGGGGTAATGGCCCACCAAGCCGATGATCGGTACGGGTTGTGAGAGCAAGGGCCCGGAGATGGAACCTGAGACAAGGTTCCAGACCCTACGGGGTGCAGCAGGCGCGAAACCTCCGCAATGTACGAAAGTGCGACGGGGGGATCCCAAGTGTTATGCTTTTTTGTATGACTTTTCATTAGTGTAAAAAGCTTTTAGAATAAGAGCTGGGCAAGACCGGTGCCAGCCGCCGCGGTAACACCGGCAGCTCGAGTGGTGACCACTTTTATTGGGCTTAAAGCGTTCGTAGCTTGATTTTTAAGTCTCTTGGGAAATCTCACGGCTTAACTGTGAGGCGTCTAAGAGATACTGGGAATCTAGGGACCGGGAGAGGTAAGAGGTACTTCAGGGGTAGAAGTGAAATTCTGTAATCCTTGAGGGACCACCGATGGCGAAGGCATCTTACCAGAACGGCTTCGACAGTGAGGAACGAAAGCTGGGGGAGCGAACGGGATTAGATACCCCGGTAGTCCCAGCCGTAAACTATGCGCGTTAGGTGTGCCTGTAACTACGAGTTACCGGGGTGCCGAAGTGAAAACGTGAAACGTGCCGCCTGGGAAGTACGGTCGCAAGGCTGAAACTTAAAGGAATTGGCGGGGGAGCACCACAACGGGTGGAGCCTGCGGTTTAATTGGACTCAACGCCGGGCAGCTCACCGGATAGGACAGCGGAATGATAGCCGGGCTGAAGACCTTGCTTGACCAGCTGAGA'),
('EF503699',
'AAGAATGGGGATAGCATGCGAGTCACGCCGCAATGTGTGGCATACGGCTCAGTAACACGTAGTCAACATGCCCAGAGGACGTGGACACCTCGGGAAACTGAGGATAAACCGCGATAGGCCACTACTTCTGGAATGAGCCATGACCCAAATCTATATGGCCTTTGGATTGGACTGCGGCCGATCAGGCTGTTGGTGAGGTAATGGCCCACCAAACCTGTAACCGGTACGGGCTTTGAGAGAAGGAGCCCGGAGATGGGCACTGAGACAAGGGCCCAGGCCCTATGGGGCGCAGCAGGCACGAAACCTCTGCAATAGGCGAAAGCTTGACAGGGTTACTCTGAGTGATGCCCGCTAAGGGTATCTTTTGGCACCTCTAAAAATGGTGCAGAATAAGGGGTGGGCAAGTCTGGTGTCAGCCGCCGCGGTAATACCAGCACCCCGAGTTGTCGGGACGATTATTGGGCCTAAAGCATCCGTAGCCTGTTCTGCAAGTCCTCCGTTAAATCCACCCGCTTAACGGATGGGCTGCGGAGGATACTGCAGAGCTAGGAGGCGGGAGAGGCAAACGGTACTCAGTGGGTAGGGGTAAAATCCTTTGATCTACTGAAGACCACCAGTGGTGAAGGCGGTTCGCCAGAACGCGCTCGAACGGTGAGGATGAAAGCTGGGGGAGCAAACCGGAATAGATACCCGAGTAATCCCAACTGTAAACGATGGCAACTCGGGGATGGGTTGGCCTCCAACCAACCCCATGGCCGCAGGGAAGCCGTTTAGCTCTCCCGCCTGGGGAATACGGTCCGCAGAATTGAACCTTAAAGGAATTTGGCGGGGAACCCCCACAAGGGGGAAAACCGTGCGGTTCAATTGGAATCCACCCCCCGGAAACTTTACCCGGGCGCG'),
('DQ260310',