本文整理汇总了Python中skbio.core.alignment.SequenceCollection类的典型用法代码示例。如果您正苦于以下问题:Python SequenceCollection类的具体用法?Python SequenceCollection怎么用?Python SequenceCollection使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SequenceCollection类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_split_fasta_diff_num_seqs_per_file
def test_split_fasta_diff_num_seqs_per_file(self):
"""split_fasta funcs as expected when diff num seqs go to each file
"""
fd, filename_prefix = mkstemp(dir=get_qiime_temp_dir(),
prefix='split_fasta_tests',
suffix='')
close(fd)
infile = ['>seq1', 'AACCTTAA', '>seq2', 'TTAACC', 'AATTAA',
'>seq3', 'CCTT--AA']
actual = split_fasta(infile, 2, filename_prefix)
actual_seqs = []
for fp in actual:
actual_seqs += list(open(fp))
remove_files(actual)
expected = ['%s.%d.fasta' % (filename_prefix, i) for i in range(2)]
# list of file paths is as expected
self.assertEqual(actual, expected)
# building seq collections from infile and the split files result in
# equivalent seq collections
self.assertEqual(
SequenceCollection.from_fasta_records(parse_fasta(infile), DNA),
SequenceCollection.from_fasta_records(parse_fasta(actual_seqs), DNA))
示例2: test_split_fasta_diff_num_seqs_per_file_alt
def test_split_fasta_diff_num_seqs_per_file_alt(self):
"""split_fasta funcs always catches all seqs
"""
# start with 59 seqs (b/c it's prime, so should make more
# confusing splits)
in_seqs = SequenceCollection.from_fasta_records(
[('seq%s' % k, 'AACCTTAA') for k in range(59)], DNA)
infile = in_seqs.to_fasta().split('\n')
# test seqs_per_file from 1 to 1000
for i in range(1, 1000):
fd, filename_prefix = mkstemp(dir=get_qiime_temp_dir(),
prefix='split_fasta_tests',
suffix='')
close(fd)
actual = split_fasta(infile, i, filename_prefix)
actual_seqs = []
for fp in actual:
actual_seqs += list(open(fp))
# remove the files now, so if the test fails they still get
# cleaned up
remove_files(actual)
# building seq collections from infile and the split files result in
# equivalent seq collections
self.assertEqual(
SequenceCollection.from_fasta_records(parse_fasta(infile), DNA),
SequenceCollection.from_fasta_records(parse_fasta(actual_seqs), DNA))
示例3: __call__
def __call__(self, seq_path, result_path=None, log_path=None,
failure_path=None):
# load candidate sequences
seq_file = open(seq_path, 'U')
candidate_sequences = parse_fasta(seq_file)
# load template sequences
template_alignment = []
template_alignment_fp = self.Params['template_filepath']
for seq_id, seq in parse_fasta(open(template_alignment_fp)):
# replace '.' characters with '-' characters
template_alignment.append((seq_id, seq.replace('.', '-').upper()))
template_alignment = Alignment.from_fasta_records(
template_alignment, DNASequence, validate=True)
# initialize_logger
logger = NastLogger(log_path)
# get function for pairwise alignment method
pairwise_alignment_f = pairwise_alignment_methods[
self.Params['pairwise_alignment_method']]
pynast_aligned, pynast_failed = pynast_seqs(
candidate_sequences,
template_alignment,
min_pct=self.Params['min_pct'],
min_len=self.Params['min_len'],
align_unaligned_seqs_f=pairwise_alignment_f,
logger=logger,
temp_dir=get_qiime_temp_dir())
logger.record(str(self))
for i, seq in enumerate(pynast_failed):
skb_seq = DNASequence(str(seq), identifier=seq.Name)
pynast_failed[i] = skb_seq
pynast_failed = SequenceCollection(pynast_failed)
for i, seq in enumerate(pynast_aligned):
skb_seq = DNASequence(str(seq), identifier=seq.Name)
pynast_aligned[i] = skb_seq
pynast_aligned = Alignment(pynast_aligned)
if failure_path is not None:
fail_file = open(failure_path, 'w')
fail_file.write(pynast_failed.to_fasta())
fail_file.close()
if result_path is not None:
result_file = open(result_path, 'w')
result_file.write(pynast_aligned.to_fasta())
result_file.close()
return None
else:
return pynast_aligned
示例4: filter_samples
def filter_samples(prefs, data, dir_path='', filename=None):
"""processes the filtering of the otus file and representative seq set, then
writes filtered otus and filtered representative seq set files"""
aln = data['aln']
otus = data['otus']
# filter the otus file based on which samples to remove
new_otus_list = filter_otus(otus, prefs)
filtered_otus_output_filepath = '%s/%s_sfiltered_otus.txt' \
% (dir_path, filename)
filtered_otus_output_filepath = open(filtered_otus_output_filepath, 'w')
# Write out a new otus file
for key in (new_otus_list):
filtered_otus_output_filepath.write(key[0])
for j in key[1]:
filtered_otus_output_filepath.write('\t' + str(j))
filtered_otus_output_filepath.write('\n')
filtered_otus_output_filepath.close()
# filter seq set
filtered_seqs, removed_seqs = filter_aln_by_otus(aln, prefs)
# write a fasta containing list of sequences removed from
# representative set
if len(removed_seqs) > 0:
removed_seqs = SequenceCollection.from_fasta_records(
[(e[0], str(e[1])) for e in removed_seqs], DNA)
else:
raise ValueError(
'No sequences were removed. Did you specify the correct Sample ID?')
output_filepath2 = '%s/%s_sremoved.fasta' % (dir_path, filename)
output_file2 = open(output_filepath2, 'w')
output_file2.write(removed_seqs.to_fasta())
output_file2.close()
# write a fasta containing the filtered representative seqs
if len(filtered_seqs) > 0:
filtered_seqs = SequenceCollection.from_fasta_records(
[(e[0], str(e[1])) for e in filtered_seqs], DNA)
else:
raise ValueError(
'No sequences were remaining in the fasta file. Did you remove all Sample ID\'s?')
output_filepath = '%s/%s_sfiltered.fasta' % (dir_path, filename)
output_file = open(output_filepath, 'w')
output_file.write(filtered_seqs.to_fasta())
output_file.close()
示例5: test_degap
def test_degap(self):
"""degap functions as expected
"""
expected = [(id_, seq.replace('.', '').replace('-', ''))
for id_, seq in self.seqs1_t]
expected = SequenceCollection.from_fasta_records(expected, DNASequence)
actual = self.a1.degap()
self.assertEqual(actual, expected)
expected = [(id_, seq.replace('.', '').replace('-', ''))
for id_, seq in self.seqs2_t]
expected = SequenceCollection.from_fasta_records(expected, RNASequence)
actual = self.a2.degap()
self.assertEqual(actual, expected)
示例6: test_call_write_to_file
def test_call_write_to_file(self):
"""ReferenceRepSetPicker.__call__ otu map correctly written to file"""
app = ReferenceRepSetPicker(params={'Algorithm': 'first',
'ChoiceF': first_id})
app(self.tmp_seq_filepath,
self.tmp_otu_filepath,
self.ref_seq_filepath,
result_path=self.result_filepath)
with open(self.result_filepath) as f:
actual = SequenceCollection.from_fasta_records(parse_fasta(f), DNA)
expected = SequenceCollection.from_fasta_records(
parse_fasta(rep_seqs_reference_result_file_exp.split('\n')), DNA)
# we don't care about order in the results
self.assertEqual(set(actual), set(expected))
示例7: test_distances
def test_distances(self):
"""distances functions as expected
"""
s1 = SequenceCollection([DNA("ACGT", "d1"), DNA("ACGG", "d2")])
expected = [[0, 0.25],
[0.25, 0]]
expected = DistanceMatrix(expected, ['d1', 'd2'])
actual = s1.distances(hamming)
self.assertEqual(actual, expected)
# alt distance function provided
def dumb_distance(s1, s2):
return 42.
expected = [[0, 42.],
[42., 0]]
expected = DistanceMatrix(expected, ['d1', 'd2'])
actual = s1.distances(dumb_distance)
self.assertEqual(actual, expected)
示例8: setUp
def setUp(self):
fd, self.pynast_test1_input_fp = mkstemp(prefix="PyNastAlignerTests_", suffix=".fasta")
close(fd)
with open(self.pynast_test1_input_fp, "w") as f:
f.write(pynast_test1_input_fasta)
fd, self.pynast_test1_template_fp = mkstemp(prefix="PyNastAlignerTests_", suffix="template.fasta")
close(fd)
with open(self.pynast_test1_template_fp, "w") as f:
f.write(pynast_test1_template_fasta)
fd, self.pynast_test_template_w_dots_fp = mkstemp(prefix="PyNastAlignerTests_", suffix="template.fasta")
close(fd)
with open(self.pynast_test_template_w_dots_fp, "w") as f:
f.write(pynast_test1_template_fasta.replace("-", "."))
fd, self.pynast_test_template_w_u_fp = mkstemp(prefix="PyNastAlignerTests_", suffix="template.fasta")
close(fd)
with open(self.pynast_test_template_w_u_fp, "w") as f:
f.write(pynast_test1_template_fasta.replace("T", "U"))
fd, self.pynast_test_template_w_lower_fp = mkstemp(prefix="PyNastAlignerTests_", suffix="template.fasta")
close(fd)
with open(self.pynast_test_template_w_lower_fp, "w") as f:
f.write(pynast_test1_template_fasta.lower())
# create temp file names (and touch them so we can reliably
# clean them up)
fd, self.result_fp = mkstemp(prefix="PyNastAlignerTests_", suffix=".fasta")
close(fd)
open(self.result_fp, "w").close()
fd, self.failure_fp = mkstemp(prefix="PyNastAlignerTests_", suffix=".fasta")
close(fd)
open(self.failure_fp, "w").close()
fd, self.log_fp = mkstemp(prefix="PyNastAlignerTests_", suffix=".log")
close(fd)
open(self.log_fp, "w").close()
self._paths_to_clean_up = [
self.pynast_test1_input_fp,
self.result_fp,
self.failure_fp,
self.log_fp,
self.pynast_test1_template_fp,
self.pynast_test_template_w_dots_fp,
self.pynast_test_template_w_u_fp,
self.pynast_test_template_w_lower_fp,
]
self.pynast_test1_aligner = PyNastAligner({"template_filepath": self.pynast_test1_template_fp, "min_len": 15})
self.pynast_test1_expected_aln = Alignment.from_fasta_records(parse_fasta(pynast_test1_expected_alignment), DNA)
self.pynast_test1_expected_fail = SequenceCollection.from_fasta_records(
parse_fasta(pynast_test1_expected_failure), DNA
)
示例9: setUp
def setUp(self):
"""Initialize values to be used in tests
"""
self.d1 = DNASequence('GATTACA', identifier="d1")
self.d2 = DNASequence('TTG', identifier="d2")
self.d1_lower = DNASequence('gattaca', identifier="d1")
self.d2_lower = DNASequence('ttg', identifier="d2")
self.r1 = RNASequence('GAUUACA', identifier="r1")
self.r2 = RNASequence('UUG', identifier="r2")
self.r3 = RNASequence('U-----UGCC--', identifier="r3")
self.i1 = DNASequence('GATXACA', identifier="i1")
self.seqs1 = [self.d1, self.d2]
self.seqs1_lower = [self.d1_lower, self.d2_lower]
self.seqs2 = [self.r1, self.r2, self.r3]
self.seqs3 = self.seqs1 + self.seqs2
self.seqs1_t = [('d1', 'GATTACA'), ('d2', 'TTG')]
self.seqs2_t = [('r1', 'GAUUACA'), ('r2', 'UUG'),
('r3', 'U-----UGCC--')]
self.seqs3_t = self.seqs1_t + self.seqs2_t
self.s1 = SequenceCollection(self.seqs1)
self.s1_lower = SequenceCollection(self.seqs1_lower)
self.s2 = SequenceCollection(self.seqs2)
self.s3 = SequenceCollection(self.seqs3)
self.empty = SequenceCollection([])
self.invalid_s1 = SequenceCollection([self.i1])
示例10: test_split_fasta_equal_num_seqs_per_file
def test_split_fasta_equal_num_seqs_per_file(self):
"""split_fasta funcs as expected when equal num seqs go to each file
"""
fd, filename_prefix = mkstemp(dir=get_qiime_temp_dir(),
prefix='split_fasta_tests',
suffix='')
close(fd)
infile = ['>seq1', 'AACCTTAA', '>seq2', 'TTAACC', 'AATTAA',
'>seq3', 'CCTT--AA']
actual = split_fasta(infile, 1, filename_prefix)
actual_seqs = []
for fp in actual:
actual_seqs += list(open(fp))
remove_files(actual)
expected = ['%s.%d.fasta' % (filename_prefix, i) for i in range(3)]
self.assertEqual(actual, expected)
self.assertEqual(
SequenceCollection.from_fasta_records(parse_fasta(infile), DNA),
SequenceCollection.from_fasta_records(parse_fasta(actual_seqs), DNA))
示例11: test_filter_aln_by_otus
def test_filter_aln_by_otus(self):
"""filter_aln_by_otus: determines which sequences to keep and which
sequences to remove"""
self.sample_to_extract = "SampleA,SampleB"
exp1 = []
exp1.append(("SampleA", "AAAAAAAAAAAAAAA"))
exp2 = []
exp2.append(("SampleB", "CCCCCCC"))
exp2.append(("SampleC", "GGGGGGGGGGGGGG"))
aln = SequenceCollection.from_fasta_records(self.aln, DNA)
obs1, obs2 = filter_aln_by_otus(aln, self.prefs)
self.assertEqual(obs1, exp1)
self.assertEqual(obs2, exp2)
示例12: main
def main():
"""opens files as necessary based on prefs"""
option_parser, opts, args = parse_command_line_parameters(**script_info)
data = {}
fasta_file = opts.input_fasta_fp
# load the input alignment
data['aln'] = SequenceCollection.from_fasta_records(
parse_fasta(open(fasta_file)), DNA)
# Load the otu file
otu_path = opts.otu_map_fp
otu_f = open(otu_path, 'U')
otus = fields_to_dict(otu_f)
otu_f.close()
data['otus'] = otus
# Determine which which samples to extract from representative seqs
# and from otus file
if opts.samples_to_extract:
prefs = process_extract_samples(opts.samples_to_extract)
filepath = opts.input_fasta_fp
filename = filepath.strip().split('/')[-1]
filename = filename.split('.')[0]
if opts.output_dir:
if os.path.exists(opts.output_dir):
dir_path = opts.output_dir
else:
try:
os.mkdir(opts.output_dir)
dir_path = opts.output_dir
except OSError:
pass
else:
dir_path = './'
try:
action = filter_samples
except NameError:
action = None
# Place this outside try/except so we don't mask NameError in action
if action:
action(prefs, data, dir_path, filename)
示例13: test_call_pynast_test1_file_output_alt_params
def test_call_pynast_test1_file_output_alt_params(self):
"""PyNastAligner writes correct output files when no seqs align
"""
aligner = PyNastAligner({"template_filepath": self.pynast_test1_template_fp, "min_len": 1000})
actual = aligner(
self.pynast_test1_input_fp, result_path=self.result_fp, log_path=self.log_fp, failure_path=self.failure_fp
)
self.assertTrue(actual is None, "Result should be None when result path provided.")
self.assertEqual(getsize(self.result_fp), 0, "No alignable seqs should result in an empty file.")
# all seqs reported to fail
with open(self.failure_fp) as failure_f:
actual_fail = SequenceCollection.from_fasta_records(parse_fasta(failure_f), DNA)
self.assertEqual(actual_fail.sequence_count(), 3)
示例14: test_call_pynast_test1_file_output
def test_call_pynast_test1_file_output(self):
"""PyNastAligner writes correct output files for pynast_test1 seqs
"""
# do not collect results; check output files instead
actual = self.pynast_test1_aligner(
self.pynast_test1_input_fp, result_path=self.result_fp, log_path=self.log_fp, failure_path=self.failure_fp
)
self.assertTrue(actual is None, "Result should be None when result path provided.")
expected_aln = self.pynast_test1_expected_aln
with open(self.result_fp) as result_f:
actual_aln = Alignment.from_fasta_records(parse_fasta(result_f), DNA)
self.assertEqual(actual_aln, expected_aln)
with open(self.failure_fp) as failure_f:
actual_fail = SequenceCollection.from_fasta_records(parse_fasta(failure_f), DNA)
self.assertEqual(actual_fail.to_fasta(), self.pynast_test1_expected_fail.to_fasta())
示例15: test_from_fasta_records
def test_from_fasta_records(self):
"""Initialization from list of tuples functions as expected
"""
SequenceCollection.from_fasta_records(self.seqs1_t, DNASequence)
SequenceCollection.from_fasta_records(self.seqs2_t, RNASequence)
SequenceCollection.from_fasta_records(self.seqs3_t, NucleotideSequence)