本文整理汇总了Python中sourmash._minhash.MinHash.get_mins方法的典型用法代码示例。如果您正苦于以下问题:Python MinHash.get_mins方法的具体用法?Python MinHash.get_mins怎么用?Python MinHash.get_mins使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sourmash._minhash.MinHash
的用法示例。
在下文中一共展示了MinHash.get_mins方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_size_limit
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import get_mins [as 别名]
def test_size_limit(track_abundance):
# test behavior with size limit of 3
mh = MinHash(3, 4, track_abundance=track_abundance)
mh.add_hash(10)
mh.add_hash(20)
mh.add_hash(30)
assert mh.get_mins() == [10, 20, 30]
mh.add_hash(5) # -> should push 30 off end
assert mh.get_mins() == [5, 10, 20]
示例2: test_basic_dna_bad_force
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import get_mins [as 别名]
def test_basic_dna_bad_force(track_abundance):
# test behavior on bad DNA; use 100 so multiple hashes get added.
mh = MinHash(100, 4, track_abundance=track_abundance)
assert len(mh.get_mins()) == 0
mh.add_sequence('ATGN', True) # ambiguous kmer skipped.
assert len(mh.get_mins()) == 0
mh.add_sequence('AATGN', True) # but good k-mers still used.
assert len(mh.get_mins()) == 1
mh.add_sequence('AATG', True) # checking that right kmer was added
assert len(mh.get_mins()) == 1 # (only 1 hash <- this is a dup)
示例3: test_abundance_simple
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import get_mins [as 别名]
def test_abundance_simple():
a = MinHash(20, 5, False, track_abundance=True)
a.add_sequence('AAAAA')
assert a.get_mins() == [2110480117637990133]
assert a.get_mins(with_abundance=True) == {2110480117637990133: 1}
a.add_sequence('AAAAA')
assert a.get_mins() == [2110480117637990133]
assert a.get_mins(with_abundance=True) == {2110480117637990133: 2}
示例4: test_basic_dna_bad_force_2
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import get_mins [as 别名]
def test_basic_dna_bad_force_2(track_abundance):
# test behavior on bad DNA
mh = MinHash(100, 4, track_abundance=track_abundance)
assert len(mh.get_mins()) == 0
mh.add_sequence('AAGNCGG', True) # ambiguous kmers skipped.
assert len(mh.get_mins()) == 0
mh.add_sequence('AATGNGCGG', True) # ambiguous kmers skipped.
assert len(mh.get_mins()) == 2
mh.add_sequence('AATG', True) # checking that right kmers were added
mh.add_sequence('GCGG', True)
assert len(mh.get_mins()) == 2 # (only 2 hashes should be there)
示例5: test_max_hash
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import get_mins [as 别名]
def test_max_hash(track_abundance):
# test behavior with max_hash
mh = MinHash(0, 4, track_abundance=track_abundance, max_hash=35)
mh.add_hash(10)
mh.add_hash(20)
mh.add_hash(30)
assert mh.get_mins() == [10, 20, 30]
mh.add_hash(40)
assert mh.get_mins() == [10, 20, 30]
mh.add_hash(36)
assert mh.get_mins() == [10, 20, 30]
示例6: test_basic_dna
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import get_mins [as 别名]
def test_basic_dna(track_abundance):
# verify that MHs of size 1 stay size 1, & act properly as bottom sketches.
mh = MinHash(1, 4, track_abundance=track_abundance)
mh.add_sequence('ATGC')
a = mh.get_mins()
mh.add_sequence('GCAT') # this will not get added; hash > ATGC
b = mh.get_mins()
print(a, b)
assert a == b
assert len(b) == 1
示例7: test_minhash_abund_add
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import get_mins [as 别名]
def test_minhash_abund_add():
# this targets part of bug #319, a segfault caused by invalidation of
# std::vector iterators upon vector resizing - in this case, there
# was also a bug in inserting into the middle of mins when scaled was set.
a = MinHash(0, 10, track_abundance=True, max_hash=5000)
n = 0
for i in range(10, 0, -1):
a.add_hash(i)
n += 1
assert len(a.get_mins()) == n
print(len(a.get_mins()))
示例8: test_bytes_dna
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import get_mins [as 别名]
def test_bytes_dna(track_abundance):
mh = MinHash(1, 4, track_abundance=track_abundance)
mh.add_sequence('ATGC')
mh.add_sequence(b'ATGC')
mh.add_sequence(u'ATGC')
a = mh.get_mins()
mh.add_sequence('GCAT') # this will not get added; hash > ATGC
mh.add_sequence(b'GCAT') # this will not get added; hash > ATGC
mh.add_sequence(u'GCAT') # this will not get added; hash > ATGC
b = mh.get_mins()
print(a, b)
assert a == b
assert len(b) == 1
示例9: test_scaled
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import get_mins [as 别名]
def test_scaled(track_abundance):
# test behavior with scaled (alt to max_hash)
scaled = get_scaled_for_max_hash(35)
print('XX', scaled, get_max_hash_for_scaled(scaled))
mh = MinHash(0, 4, track_abundance=track_abundance, scaled=scaled)
assert mh.max_hash == 35
mh.add_hash(10)
mh.add_hash(20)
mh.add_hash(30)
assert mh.get_mins() == [10, 20, 30]
mh.add_hash(40)
assert mh.get_mins() == [10, 20, 30]
mh.add_hash(36)
assert mh.get_mins() == [10, 20, 30]
示例10: test_mh_asymmetric_merge
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import get_mins [as 别名]
def test_mh_asymmetric_merge(track_abundance):
# test merging two asymmetric (different size) MHs
a = MinHash(20, 10, track_abundance=track_abundance)
for i in range(0, 40, 2):
a.add_hash(i)
# different size: 10
b = MinHash(10, 10, track_abundance=track_abundance)
for i in range(0, 80, 4):
b.add_hash(i)
c = a.merge(b)
d = b.merge(a)
assert len(a) == 20
assert len(b) == 10
assert len(c) == len(a)
assert len(d) == len(b)
# can't compare different sizes without downsampling
with pytest.raises(TypeError):
d.compare(a)
a = a.downsample_n(d.num)
print(a.get_mins())
print(d.get_mins())
assert d.compare(a) == 1.0
c = c.downsample_n(b.num)
assert c.compare(b) == 1.0
示例11: test_abundance_count_common
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import get_mins [as 别名]
def test_abundance_count_common():
a = MinHash(20, 5, False, track_abundance=True)
b = MinHash(20, 5, False, track_abundance=False)
a.add_sequence('AAAAA')
a.add_sequence('AAAAA')
assert a.get_mins() == [2110480117637990133]
assert a.get_mins(with_abundance=True) == {2110480117637990133: 2}
b.add_sequence('AAAAA')
b.add_sequence('GGGGG')
assert a.count_common(b) == 1
assert a.count_common(b) == b.count_common(a)
assert b.get_mins(with_abundance=True) == [2110480117637990133,
10798773792509008305]
示例12: test_bytes_protein
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import get_mins [as 别名]
def test_bytes_protein(track_abundance):
# verify that we can hash protein/aa sequences
mh = MinHash(10, 6, True, track_abundance=track_abundance)
mh.add_protein('AGYYG')
mh.add_protein(u'AGYYG')
mh.add_protein(b'AGYYG')
assert len(mh.get_mins()) == 4
示例13: test_add_many
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import get_mins [as 别名]
def test_add_many(track_abundance):
a = MinHash(0, 10, track_abundance=track_abundance, max_hash=5000)
b = MinHash(0, 10, track_abundance=track_abundance, max_hash=5000)
a.add_many(list(range(0, 100, 2)))
a.add_many(list(range(0, 100, 2)))
assert len(a) == 50
assert all(c % 2 == 0 for c in a.get_mins())
for h in range(0, 100, 2):
b.add_hash(h)
b.add_hash(h)
assert len(b) == 50
assert a == b
示例14: test_pickle_scaled
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import get_mins [as 别名]
def test_pickle_scaled(track_abundance):
a = MinHash(0, 10, track_abundance=track_abundance, scaled=922337203685477632)
for i in range(0, 40, 2):
a.add_hash(i)
b = pickle.loads(pickle.dumps(a))
assert a.ksize == b.ksize
assert b.num == a.num
assert b.max_hash == a.max_hash
assert b.max_hash == 20
assert not b.is_protein
assert b.track_abundance == track_abundance
assert b.seed == a.seed
assert len(b.get_mins()) == len(a.get_mins())
assert len(b.get_mins()) == 11
assert a.scaled == b.scaled
assert b.scaled != 0
示例15: test_remove_many
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import get_mins [as 别名]
def test_remove_many(track_abundance):
a = MinHash(0, 10, track_abundance=track_abundance, max_hash=5000)
a.add_many(list(range(0, 100, 2)))
orig_sig = signature.SourmashSignature(a)
orig_md5 = orig_sig.md5sum()
a.remove_many(list(range(0, 100, 3)))
new_sig = signature.SourmashSignature(a)
new_md5 = new_sig.md5sum()
assert orig_md5 == "f1cc295157374f5c07cfca5f867188a1"
assert new_md5 == "dd93fa319ef57f4a019c59ee1a8c73e2"
assert orig_md5 != new_md5
assert len(a) == 33
assert all(c % 6 != 0 for c in a.get_mins())