本文整理汇总了Python中sourmash._minhash.MinHash.add_hash方法的典型用法代码示例。如果您正苦于以下问题:Python MinHash.add_hash方法的具体用法?Python MinHash.add_hash怎么用?Python MinHash.add_hash使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sourmash._minhash.MinHash
的用法示例。
在下文中一共展示了MinHash.add_hash方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_mh_inplace_concat_asymmetric
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import add_hash [as 别名]
def test_mh_inplace_concat_asymmetric(track_abundance):
# test merging two asymmetric (different size) MHs
a = MinHash(20, 10, track_abundance=track_abundance)
for i in range(0, 40, 2):
a.add_hash(i)
# different size: 10
b = MinHash(10, 10, track_abundance=track_abundance)
for i in range(0, 80, 4):
b.add_hash(i)
c = a.__copy__()
c += b
d = b.__copy__()
d += a
assert len(a) == 20
assert len(b) == 10
assert len(c) == len(a)
assert len(d) == len(b)
try:
d.compare(a)
except TypeError as exc:
assert 'must have same num' in str(exc)
a = a.downsample_n(d.num)
assert d.compare(a) == 1.0 # see: d += a, above.
c = c.downsample_n(b.num)
assert c.compare(b) == 0.5
示例2: test_mh_asymmetric_merge
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import add_hash [as 别名]
def test_mh_asymmetric_merge(track_abundance):
# test merging two asymmetric (different size) MHs
a = MinHash(20, 10, track_abundance=track_abundance)
for i in range(0, 40, 2):
a.add_hash(i)
# different size: 10
b = MinHash(10, 10, track_abundance=track_abundance)
for i in range(0, 80, 4):
b.add_hash(i)
c = a.merge(b)
d = b.merge(a)
assert len(a) == 20
assert len(b) == 10
assert len(c) == len(a)
assert len(d) == len(b)
# can't compare different sizes without downsampling
with pytest.raises(TypeError):
d.compare(a)
a = a.downsample_n(d.num)
print(a.get_mins())
print(d.get_mins())
assert d.compare(a) == 1.0
c = c.downsample_n(b.num)
assert c.compare(b) == 1.0
示例3: test_reviving_minhash
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import add_hash [as 别名]
def test_reviving_minhash():
# simulate reading a MinHash from disk
mh = MinHash(0, 21, max_hash=184467440737095520, seed=42,
track_abundance=False)
mins = (28945103950853965, 74690756200987412, 82962372765557409,
93503551367950366, 106923350319729608, 135116761470196737,
160165359281648267, 162390811417732001, 177939655451276972)
for m in mins:
mh.add_hash(m)
示例4: test_mh_count_common
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import add_hash [as 别名]
def test_mh_count_common(track_abundance):
a = MinHash(20, 10, track_abundance=track_abundance)
for i in range(0, 40, 2):
a.add_hash(i)
b = MinHash(20, 10, track_abundance=track_abundance)
for i in range(0, 80, 4):
b.add_hash(i)
assert a.count_common(b) == 10
assert b.count_common(a) == 10
示例5: test_mh_subtract
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import add_hash [as 别名]
def test_mh_subtract(track_abundance):
# test subtracting two identically configured minhashes
a = MinHash(20, 10, track_abundance=track_abundance)
for i in range(0, 40, 2):
a.add_hash(i)
b = MinHash(20, 10, track_abundance=track_abundance)
for i in range(0, 80, 4):
b.add_hash(i)
assert a.subtract_mins(b) == set(range(2, 40, 4))
示例6: test_minhash_abund_capacity_increase
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import add_hash [as 别名]
def test_minhash_abund_capacity_increase():
# this targets bug #319, a segfault caused by invalidation of
# std::vector iterators upon vector resizing.
# this should set capacity to 1000 - see KmerMinHash constructor call
# to 'reserve' when n > 0 for specific parameter.
a = MinHash(0, 10, track_abundance=True, max_hash=5000)
# 1001 is dependent on the value passed to reserve (currently 1000).
for i in range(1001, 0, -1):
a.add_hash(i)
示例7: test_mh_merge_check_length
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import add_hash [as 别名]
def test_mh_merge_check_length(track_abundance):
a = MinHash(20, 10, track_abundance=track_abundance)
for i in range(0, 40, 2):
a.add_hash(i)
b = MinHash(20, 10, track_abundance=track_abundance)
for i in range(0, 80, 4):
b.add_hash(i)
c = a.merge(b)
assert(len(c.get_mins()) == 20)
示例8: test_minhash_abund_add
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import add_hash [as 别名]
def test_minhash_abund_add():
# this targets part of bug #319, a segfault caused by invalidation of
# std::vector iterators upon vector resizing - in this case, there
# was also a bug in inserting into the middle of mins when scaled was set.
a = MinHash(0, 10, track_abundance=True, max_hash=5000)
n = 0
for i in range(10, 0, -1):
a.add_hash(i)
n += 1
assert len(a.get_mins()) == n
print(len(a.get_mins()))
示例9: test_minhash_abund_merge_flat_2
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import add_hash [as 别名]
def test_minhash_abund_merge_flat_2():
# this targets a segfault caused by trying to merge
# a signature with abundance and a signature without abundance.
a = MinHash(0, 10, track_abundance=True, max_hash=5000)
b = MinHash(0, 10, max_hash=5000)
for i in range(0, 10, 2):
a.add_hash(i)
for j in range(0, 10, 3):
b.add_hash(i)
a.merge(b)
示例10: test_mh_copy_and_clear_with_max_hash
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import add_hash [as 别名]
def test_mh_copy_and_clear_with_max_hash(track_abundance):
# test basic creation of new, empty MinHash w/max_hash param set
a = MinHash(0, 10, track_abundance=track_abundance, max_hash=20)
for i in range(0, 40, 2):
a.add_hash(i)
b = a.copy_and_clear()
assert a.ksize == b.ksize
assert b.num == a.num
assert b.max_hash == 20
assert not b.is_protein
assert b.track_abundance == track_abundance
assert b.seed == a.seed
assert len(b.get_mins()) == 0
assert a.scaled == b.scaled
assert b.scaled != 0
示例11: test_add_many
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import add_hash [as 别名]
def test_add_many(track_abundance):
a = MinHash(0, 10, track_abundance=track_abundance, max_hash=5000)
b = MinHash(0, 10, track_abundance=track_abundance, max_hash=5000)
a.add_many(list(range(0, 100, 2)))
a.add_many(list(range(0, 100, 2)))
assert len(a) == 50
assert all(c % 2 == 0 for c in a.get_mins())
for h in range(0, 100, 2):
b.add_hash(h)
b.add_hash(h)
assert len(b) == 50
assert a == b
示例12: test_minhash_abund_merge_flat
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import add_hash [as 别名]
def test_minhash_abund_merge_flat():
# this targets a segfault caused by trying to compute similarity
# of a signature with abundance and a signature without abundance.
# the correct behavior for now is to calculate simple Jaccard,
# i.e. 'flatten' both of them.
a = MinHash(0, 10, track_abundance=True, max_hash=5000)
b = MinHash(0, 10, max_hash=5000)
for i in range(0, 10, 2):
a.add_hash(i)
for j in range(0, 10, 3):
b.add_hash(i)
# these crashed, previously.
assert a.similarity(b) == 0.2
assert b.similarity(a) == 0.2
示例13: test_pickle_scaled
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import add_hash [as 别名]
def test_pickle_scaled(track_abundance):
a = MinHash(0, 10, track_abundance=track_abundance, scaled=922337203685477632)
for i in range(0, 40, 2):
a.add_hash(i)
b = pickle.loads(pickle.dumps(a))
assert a.ksize == b.ksize
assert b.num == a.num
assert b.max_hash == a.max_hash
assert b.max_hash == 20
assert not b.is_protein
assert b.track_abundance == track_abundance
assert b.seed == a.seed
assert len(b.get_mins()) == len(a.get_mins())
assert len(b.get_mins()) == 11
assert a.scaled == b.scaled
assert b.scaled != 0
示例14: test_mh_merge
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import add_hash [as 别名]
def test_mh_merge(track_abundance):
# test merging two identically configured minhashes
a = MinHash(20, 10, track_abundance=track_abundance)
for i in range(0, 40, 2):
a.add_hash(i)
b = MinHash(20, 10, track_abundance=track_abundance)
for i in range(0, 80, 4):
b.add_hash(i)
c = a.merge(b)
d = b.merge(a)
assert len(c) == len(d)
assert c.get_mins() == d.get_mins()
assert c.compare(d) == 1.0
assert d.compare(c) == 1.0
示例15: test_max_hash
# 需要导入模块: from sourmash._minhash import MinHash [as 别名]
# 或者: from sourmash._minhash.MinHash import add_hash [as 别名]
def test_max_hash(track_abundance):
# test behavior with max_hash
mh = MinHash(0, 4, track_abundance=track_abundance, max_hash=35)
mh.add_hash(10)
mh.add_hash(20)
mh.add_hash(30)
assert mh.get_mins() == [10, 20, 30]
mh.add_hash(40)
assert mh.get_mins() == [10, 20, 30]
mh.add_hash(36)
assert mh.get_mins() == [10, 20, 30]