本文整理匯總了Python中skbio.SequenceCollection類的典型用法代碼示例。如果您正苦於以下問題:Python SequenceCollection類的具體用法?Python SequenceCollection怎麽用?Python SequenceCollection使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了SequenceCollection類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_k_word_frequencies
def test_k_word_frequencies(self):
expected1 = defaultdict(float)
expected1['A'] = 3 / 7.
expected1['C'] = 1 / 7.
expected1['G'] = 1 / 7.
expected1['T'] = 2 / 7.
expected2 = defaultdict(float)
expected2['G'] = 1 / 3.
expected2['T'] = 2 / 3.
self.assertEqual(self.s1.k_word_frequencies(k=1),
[expected1, expected2])
expected1 = defaultdict(float)
expected1['GAT'] = 1 / 2.
expected1['TAC'] = 1 / 2.
expected2 = defaultdict(float)
expected2['TTG'] = 1 / 1.
self.assertEqual(self.s1.k_word_frequencies(k=3, overlapping=False),
[expected1, expected2])
self.assertEqual(self.empty.k_word_frequencies(k=1), [])
# Test to ensure floating point precision bug isn't present. See the
# tests for BiologicalSequence.k_word_frequencies for more details.
sc = SequenceCollection([RNA('C' * 10, id='s1'),
RNA('G' * 10, id='s2')])
self.assertEqual(sc.k_word_frequencies(1),
[defaultdict(float, {'C': 1.0}),
defaultdict(float, {'G': 1.0})])
示例2: extract_seq_ids
def extract_seq_ids(data, fmt='fasta', variant=None):
"""
Given FASTQ-format data (string), parse out only the
sequence IDs and return.
"""
fh = StringIO(data)
if fmt == 'fastq':
sc = SequenceCollection.read(fh, format=fmt, variant=variant)
else:
sc = SequenceCollection.read(fh, format=fmt)
return frozenset(entry.id for entry in sc)
示例3: test_degap
def test_degap(self):
"""degap functions as expected
"""
expected = [(id_, seq.replace(".", "").replace("-", "")) for id_, seq in self.seqs1_t]
expected = SequenceCollection.from_fasta_records(expected, DNASequence)
actual = self.a1.degap()
self.assertEqual(actual, expected)
expected = [(id_, seq.replace(".", "").replace("-", "")) for id_, seq in self.seqs2_t]
expected = SequenceCollection.from_fasta_records(expected, RNASequence)
actual = self.a2.degap()
self.assertEqual(actual, expected)
示例4: test_distances
def test_distances(self):
s1 = SequenceCollection([DNA("ACGT", "d1"), DNA("ACGG", "d2")])
expected = [[0, 0.25],
[0.25, 0]]
expected = DistanceMatrix(expected, ['d1', 'd2'])
actual = s1.distances(hamming)
self.assertEqual(actual, expected)
# alt distance function provided
def dumb_distance(s1, s2):
return 42.
expected = [[0, 42.],
[42., 0]]
expected = DistanceMatrix(expected, ['d1', 'd2'])
actual = s1.distances(dumb_distance)
self.assertEqual(actual, expected)
示例5: convert_phylip
def convert_phylip(infile, outfile, format):
seqs = SequenceCollection.read(
infile, format='phylip',
data_parser=phylip.relaxed_ids
)
seqs.write(outfile, format=format)
示例6: main
def main():
args = handle_program_options()
if osp.isfile(args.out_dir):
print("--out_dir (-o) option must be a valid directory and not a file",
file=sys.stderr)
sys.exit(1)
# will fail gracefully if dir exists
skbu.create_dir(args.out_dir)
metagenomes = []
if args.metagenome_id is not None:
metagenomes.append(args.metagenome_id)
elif args.metagenome_file is not None:
metagenomes.extend(parse_metagenome_file(args.metagenome_file))
if args.verbose:
msg = 'Processing requested for {} metagenome(s) found in: {}'
print(msg.format(len(metagenomes), args.metagenome_file))
# MG-RAST stage.file ids for downloading
derep_passed = '150.1'
screen_passed = '299.1'
for mg_id in metagenomes:
if args.verbose:
print('Processing metagenome: {}'.format(mg_id))
print('\tDownloading: Dereplication Passed...', end='')
sys.stdout.flush()
derepp_rsp = mgapi.mgrast_request('download', mg_id,
{'file': derep_passed},
auth_key=args.auth_key)
derepp_sc = SequenceCollection.read(StringIO(derepp_rsp.text),
format='fastq',
variant='illumina1.8')
if args.verbose:
print('{} sequences'.format(len(derepp_sc)))
print('\tDownloading: Screen Passed...', end='')
sys.stdout.flush()
screenp_rsp = mgapi.mgrast_request('download', mg_id,
{'file': screen_passed},
auth_key=args.auth_key)
screenp_ids = extract_seq_ids(screenp_rsp.text, fmt='fastq',
variant='illumina1.8')
if args.verbose:
print('{} sequences'.format(len(screenp_ids)))
# filter dereplication passed with IDs from screen passed
failed_screen = filter_seqs(derepp_sc, screenp_ids)
if args.verbose:
nsp = len(screenp_ids)
print('\tRemoved {} sequences from Dereplication Passed'.format(nsp))
print('\tleaving {} sequences'.format(len(failed_screen)))
out_fp = osp.join(args.out_dir, mg_id + '_screen_failed.fastq')
failed_screen.write(out_fp, format='fastq', variant='illumina1.8')
if args.verbose:
print('Sequence data written to: ' + out_fp)
示例7: test_make_mini_otu_files
def test_make_mini_otu_files(self):
os.system("mkdir tmp")
self.extension_seqs = SequenceCollection.read(self.extension_seqs)
result = _make_mini_otu_files(self.key_node,
self.extension_genus_dic_few,
self.extension_seqs)
os.system("rm -r tmp")
self.assertEqual(result, """>P1\nTTAAAAAA\n""")
示例8: setUp
def setUp(self):
self.d1 = DNA('GATTACA', metadata={'id': "d1"})
self.d2 = DNA('TTG', metadata={'id': "d2"})
self.d3 = DNA('GTATACA', metadata={'id': "d3"})
self.r1 = RNA('GAUUACA', metadata={'id': "r1"})
self.r2 = RNA('UUG', metadata={'id': "r2"})
self.r3 = RNA('U-----UGCC--', metadata={'id': "r3"})
self.seqs1 = [self.d1, self.d2]
self.seqs2 = [self.r1, self.r2, self.r3]
self.seqs3 = self.seqs1 + self.seqs2
self.seqs4 = [self.d1, self.d3]
self.s1 = SequenceCollection(self.seqs1)
self.s2 = SequenceCollection(self.seqs2)
self.s3 = SequenceCollection(self.seqs3)
self.s4 = SequenceCollection(self.seqs4)
self.empty = SequenceCollection([])
示例9: test_distances
def test_distances(self):
s1 = SequenceCollection([DNA("ACGT", metadata={'id': "d1"}),
DNA("ACGG", metadata={'id': "d2"})])
expected = [[0, 0.25],
[0.25, 0]]
expected = DistanceMatrix(expected, ['d1', 'd2'])
def h(s1, s2):
return hamming(s1.values, s2.values)
actual = s1.distances(h)
self.assertEqual(actual, expected)
# alt distance function provided
def dumb_distance(s1, s2):
return 42.
expected = [[0, 42.],
[42., 0]]
expected = DistanceMatrix(expected, ['d1', 'd2'])
actual = s1.distances(dumb_distance)
self.assertEqual(actual, expected)
示例10: setUp
def setUp(self):
self.d1 = DNASequence('GATTACA', id="d1")
self.d2 = DNASequence('TTG', id="d2")
self.d3 = DNASequence('GTATACA', id="d3")
self.d1_lower = DNASequence('gattaca', id="d1")
self.d2_lower = DNASequence('ttg', id="d2")
self.d3_lower = DNASequence('gtataca', id="d3")
self.r1 = RNASequence('GAUUACA', id="r1")
self.r2 = RNASequence('UUG', id="r2")
self.r3 = RNASequence('U-----UGCC--', id="r3")
self.i1 = DNASequence('GATXACA', id="i1")
self.seqs1 = [self.d1, self.d2]
self.seqs1_lower = [self.d1_lower, self.d2_lower]
self.seqs2 = [self.r1, self.r2, self.r3]
self.seqs3 = self.seqs1 + self.seqs2
self.seqs4 = [self.d1, self.d3]
self.seqs1_t = [('d1', 'GATTACA'), ('d2', 'TTG')]
self.seqs2_t = [('r1', 'GAUUACA'), ('r2', 'UUG'),
('r3', 'U-----UGCC--')]
self.seqs3_t = self.seqs1_t + self.seqs2_t
self.s1 = SequenceCollection(self.seqs1)
self.s1_lower = SequenceCollection(self.seqs1_lower)
self.s2 = SequenceCollection(self.seqs2)
self.s3 = SequenceCollection(self.seqs3)
self.s4 = SequenceCollection(self.seqs4)
self.empty = SequenceCollection([])
self.invalid_s1 = SequenceCollection([self.i1])
示例11: setUp
def setUp(self):
"""Initialize values to be used in tests
"""
self.d1 = DNASequence("GATTACA", id="d1")
self.d2 = DNASequence("TTG", id="d2")
self.d1_lower = DNASequence("gattaca", id="d1")
self.d2_lower = DNASequence("ttg", id="d2")
self.r1 = RNASequence("GAUUACA", id="r1")
self.r2 = RNASequence("UUG", id="r2")
self.r3 = RNASequence("U-----UGCC--", id="r3")
self.i1 = DNASequence("GATXACA", id="i1")
self.seqs1 = [self.d1, self.d2]
self.seqs1_lower = [self.d1_lower, self.d2_lower]
self.seqs2 = [self.r1, self.r2, self.r3]
self.seqs3 = self.seqs1 + self.seqs2
self.seqs1_t = [("d1", "GATTACA"), ("d2", "TTG")]
self.seqs2_t = [("r1", "GAUUACA"), ("r2", "UUG"), ("r3", "U-----UGCC--")]
self.seqs3_t = self.seqs1_t + self.seqs2_t
self.s1 = SequenceCollection(self.seqs1)
self.s1_lower = SequenceCollection(self.seqs1_lower)
self.s2 = SequenceCollection(self.seqs2)
self.s3 = SequenceCollection(self.seqs3)
self.empty = SequenceCollection([])
self.invalid_s1 = SequenceCollection([self.i1])
示例12: test_kmer_frequencies
def test_kmer_frequencies(self):
expected1 = Counter({'GAT': 1, 'TAC': 1})
expected2 = Counter({'TTG': 1})
self.assertEqual(
self.s1.kmer_frequencies(k=3, overlap=False, relative=False),
[expected1, expected2])
expected1 = defaultdict(float)
expected1['A'] = 3 / 7.
expected1['C'] = 1 / 7.
expected1['G'] = 1 / 7.
expected1['T'] = 2 / 7.
expected2 = defaultdict(float)
expected2['G'] = 1 / 3.
expected2['T'] = 2 / 3.
self.assertEqual(self.s1.kmer_frequencies(k=1, relative=True),
[expected1, expected2])
expected1 = defaultdict(float)
expected1['GAT'] = 1 / 2.
expected1['TAC'] = 1 / 2.
expected2 = defaultdict(float)
expected2['TTG'] = 1 / 1.
self.assertEqual(
self.s1.kmer_frequencies(k=3, overlap=False, relative=True),
[expected1, expected2])
self.assertEqual(self.empty.kmer_frequencies(k=1, relative=True), [])
# Test to ensure floating point precision bug isn't present. See the
# tests for Sequence.kmer_frequencies for more details.
sc = SequenceCollection([RNA('C' * 10, metadata={'id': 's1'}),
RNA('G' * 10, metadata={'id': 's2'})])
self.assertEqual(sc.kmer_frequencies(1, relative=True),
[defaultdict(float, {'C': 1.0}),
defaultdict(float, {'G': 1.0})])
示例13: str
from qiime_default_reference import get_template_alignment, get_reference_sequences
from skbio import SequenceCollection
gapped_sequences = [(s.id, str(s)) for s in SequenceCollection.read(get_template_alignment())][:500]
sequences = [(s.id, str(s)) for s in SequenceCollection.read(get_reference_sequences())][:500]
motif_1 = "GGTGCAAGCCGGTGGAAACA"
示例14: test_from_fasta_records
def test_from_fasta_records(self):
"""Initialization from list of tuples functions as expected
"""
SequenceCollection.from_fasta_records(self.seqs1_t, DNASequence)
SequenceCollection.from_fasta_records(self.seqs2_t, RNASequence)
SequenceCollection.from_fasta_records(self.seqs3_t, NucleotideSequence)
示例15: SequenceCollectionTests
class SequenceCollectionTests(TestCase):
"""Tests of the SequenceCollection class """
def setUp(self):
"""Initialize values to be used in tests
"""
self.d1 = DNASequence('GATTACA', id="d1")
self.d2 = DNASequence('TTG', id="d2")
self.d1_lower = DNASequence('gattaca', id="d1")
self.d2_lower = DNASequence('ttg', id="d2")
self.r1 = RNASequence('GAUUACA', id="r1")
self.r2 = RNASequence('UUG', id="r2")
self.r3 = RNASequence('U-----UGCC--', id="r3")
self.i1 = DNASequence('GATXACA', id="i1")
self.seqs1 = [self.d1, self.d2]
self.seqs1_lower = [self.d1_lower, self.d2_lower]
self.seqs2 = [self.r1, self.r2, self.r3]
self.seqs3 = self.seqs1 + self.seqs2
self.seqs1_t = [('d1', 'GATTACA'), ('d2', 'TTG')]
self.seqs2_t = [('r1', 'GAUUACA'), ('r2', 'UUG'),
('r3', 'U-----UGCC--')]
self.seqs3_t = self.seqs1_t + self.seqs2_t
self.s1 = SequenceCollection(self.seqs1)
self.s1_lower = SequenceCollection(self.seqs1_lower)
self.s2 = SequenceCollection(self.seqs2)
self.s3 = SequenceCollection(self.seqs3)
self.empty = SequenceCollection([])
self.invalid_s1 = SequenceCollection([self.i1])
def test_init(self):
"""Initialization functions as expected with varied input types
"""
SequenceCollection(self.seqs1)
SequenceCollection(self.seqs2)
SequenceCollection(self.seqs3)
SequenceCollection([])
def test_init_fail(self):
"""initialization with sequences with overlapping ids fails
"""
s1 = [self.d1, self.d1]
self.assertRaises(SequenceCollectionError, SequenceCollection, s1)
def test_init_validate(self):
"""initialization with validation functions as expected
"""
SequenceCollection(self.seqs1, validate=True)
SequenceCollection(self.seqs1, validate=True)
# can't validate self.seqs2 as a DNASequence
self.assertRaises(SequenceCollectionError, SequenceCollection,
self.invalid_s1, validate=True)
def test_from_fasta_records(self):
"""Initialization from list of tuples functions as expected
"""
SequenceCollection.from_fasta_records(self.seqs1_t, DNASequence)
SequenceCollection.from_fasta_records(self.seqs2_t, RNASequence)
SequenceCollection.from_fasta_records(self.seqs3_t, NucleotideSequence)
def test_contains(self):
"""in operator functions as expected
"""
self.assertTrue('d1' in self.s1)
self.assertTrue('r2' in self.s2)
self.assertFalse('r2' in self.s1)
def test_eq(self):
"""equality operator functions as expected
"""
self.assertTrue(self.s1 == self.s1)
self.assertFalse(self.s1 == self.s2)
# different objects can be equal
self.assertTrue(self.s1 == SequenceCollection([self.d1, self.d2]))
self.assertTrue(SequenceCollection([self.d1, self.d2]) == self.s1)
# SequenceCollections with different number of sequences are not equal
self.assertFalse(self.s1 == SequenceCollection([self.d1]))
class FakeSequenceCollection(SequenceCollection):
pass
# SequenceCollections of different types are not equal
self.assertFalse(self.s1 == FakeSequenceCollection([self.d1, self.d2]))
self.assertFalse(self.s1 == Alignment([self.d1, self.d2]))
# SequenceCollections with different sequences are not equal
self.assertFalse(self.s1 == SequenceCollection([self.d1, self.r1]))
def test_getitem(self):
"""getitem functions as expected
"""
self.assertEqual(self.s1[0], self.d1)
self.assertEqual(self.s1[1], self.d2)
self.assertEqual(self.s2[0], self.r1)
#.........這裏部分代碼省略.........