本文整理汇总了Python中gensim.similarities.Similarity方法的典型用法代码示例。如果您正苦于以下问题:Python similarities.Similarity方法的具体用法?Python similarities.Similarity怎么用?Python similarities.Similarity使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.similarities
的用法示例。
在下文中一共展示了similarities.Similarity方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testChunking
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import Similarity [as 别名]
def testChunking(self):
if self.cls == similarities.Similarity:
index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
else:
index = self.cls(corpus, num_features=len(dictionary))
query = corpus[:3]
sims = index[query]
expected = numpy.array([
[ 0.99999994, 0.23570226, 0.28867513, 0.23570226, 0.0, 0.0, 0.0, 0.0, 0.0 ],
[ 0.23570226, 1.0, 0.40824831, 0.33333334, 0.70710677, 0.0, 0.0, 0.0, 0.23570226 ],
[ 0.28867513, 0.40824831, 1.0, 0.61237246, 0.28867513, 0.0, 0.0, 0.0, 0.0 ]
], dtype=numpy.float32)
self.assertTrue(numpy.allclose(expected, sims))
# test the same thing but with num_best
index.num_best = 3
sims = index[query]
expected = [[(0, 0.99999994), (2, 0.28867513), (3, 0.23570226)],
[(1, 1.0), (4, 0.70710677), (2, 0.40824831)],
[(2, 1.0), (3, 0.61237246), (1, 0.40824831)]]
self.assertTrue(numpy.allclose(expected, sims))
if self.cls == similarities.Similarity:
index.destroy()
示例2: testIter
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import Similarity [as 别名]
def testIter(self):
if self.cls == similarities.Similarity:
index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
else:
index = self.cls(corpus, num_features=len(dictionary))
sims = [sim for sim in index]
expected = numpy.array([
[ 0.99999994, 0.23570226, 0.28867513, 0.23570226, 0.0, 0.0, 0.0, 0.0, 0.0 ],
[ 0.23570226, 1.0, 0.40824831, 0.33333334, 0.70710677, 0.0, 0.0, 0.0, 0.23570226 ],
[ 0.28867513, 0.40824831, 1.0, 0.61237246, 0.28867513, 0.0, 0.0, 0.0, 0.0 ],
[ 0.23570226, 0.33333334, 0.61237246, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
[ 0.0, 0.70710677, 0.28867513, 0.0, 0.99999994, 0.0, 0.0, 0.0, 0.0 ],
[ 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.70710677, 0.57735026, 0.0 ],
[ 0.0, 0.0, 0.0, 0.0, 0.0, 0.70710677, 0.99999994, 0.81649655, 0.40824828 ],
[ 0.0, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.81649655, 0.99999994, 0.66666663 ],
[ 0.0, 0.23570226, 0.0, 0.0, 0.0, 0.0, 0.40824828, 0.66666663, 0.99999994 ]
], dtype=numpy.float32)
self.assertTrue(numpy.allclose(expected, sims))
if self.cls == similarities.Similarity:
index.destroy()
示例3: testPersistency
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import Similarity [as 别名]
def testPersistency(self):
fname = testfile()
if self.cls == similarities.Similarity:
index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
else:
index = self.cls(corpus, num_features=len(dictionary))
index.save(fname)
index2 = self.cls.load(fname)
if self.cls == similarities.Similarity:
# for Similarity, only do a basic check
self.assertTrue(len(index.shards) == len(index2.shards))
index.destroy()
else:
if isinstance(index, similarities.SparseMatrixSimilarity):
# hack SparseMatrixSim indexes so they're easy to compare
index.index = index.index.todense()
index2.index = index2.index.todense()
self.assertTrue(numpy.allclose(index.index, index2.index))
self.assertEqual(index.num_best, index2.num_best)
示例4: testLarge
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import Similarity [as 别名]
def testLarge(self):
fname = testfile()
if self.cls == similarities.Similarity:
index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
else:
index = self.cls(corpus, num_features=len(dictionary))
# store all arrays separately
index.save(fname, sep_limit=0)
index2 = self.cls.load(fname)
if self.cls == similarities.Similarity:
# for Similarity, only do a basic check
self.assertTrue(len(index.shards) == len(index2.shards))
index.destroy()
else:
if isinstance(index, similarities.SparseMatrixSimilarity):
# hack SparseMatrixSim indexes so they're easy to compare
index.index = index.index.todense()
index2.index = index2.index.todense()
self.assertTrue(numpy.allclose(index.index, index2.index))
self.assertEqual(index.num_best, index2.num_best)
示例5: testMmap
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import Similarity [as 别名]
def testMmap(self):
fname = testfile()
if self.cls == similarities.Similarity:
index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
else:
index = self.cls(corpus, num_features=len(dictionary))
# store all arrays separately
index.save(fname, sep_limit=0)
# same thing, but use mmap to load arrays
index2 = self.cls.load(fname, mmap='r')
if self.cls == similarities.Similarity:
# for Similarity, only do a basic check
self.assertTrue(len(index.shards) == len(index2.shards))
index.destroy()
else:
if isinstance(index, similarities.SparseMatrixSimilarity):
# hack SparseMatrixSim indexes so they're easy to compare
index.index = index.index.todense()
index2.index = index2.index.todense()
self.assertTrue(numpy.allclose(index.index, index2.index))
self.assertEqual(index.num_best, index2.num_best)
示例6: main
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import Similarity [as 别名]
def main():
corpora_documents = []
for item_text in raw_documents:
item_str = list(jieba.cut(item_text))
corpora_documents.append(item_str)
dictionary = corpora.Dictionary(corpora_documents)
corpus = [dictionary.doc2bow(text) for text in corpora_documents]
similarity =similarities.Similarity('-Similarity-index', corpus, num_features=400)
test_data_1 = '你好,我想问一下我想离婚他不想离,孩子他说不要,是六个月就自动生效离婚'
test_cut_raw_1 = jieba.cut(test_data_1)
test_corpus_1 = dictionary.doc2bow(test_cut_raw_1)
similarity.num_best = 5
# 返回最相似的样本材料,(index_of_document, similarity) tuples
print(similarity[test_corpus_1])
示例7: testFull
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import Similarity [as 别名]
def testFull(self, num_best=None, shardsize=100):
if self.cls == similarities.Similarity:
index = self.cls(None, corpus, num_features=len(dictionary), shardsize=shardsize)
else:
index = self.cls(corpus, num_features=len(dictionary))
if isinstance(index, similarities.MatrixSimilarity):
expected = numpy.array([
[ 0.57735026, 0.57735026, 0.57735026, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
[ 0.40824831, 0.0, 0.0, 0.40824831, 0.40824831, 0.40824831, 0.40824831, 0.40824831, 0.0, 0.0, 0.0, 0.0 ],
[ 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0 ],
[ 0.0, 0.40824831, 0.0, 0.0, 0.0, 0.81649661, 0.0, 0.0, 0.40824831, 0.0, 0.0, 0.0 ],
[ 0.0, 0.0, 0.0, 0.57735026, 0.0, 0.0, 0.57735026, 0.57735026, 0.0, 0.0, 0.0, 0.0 ],
[ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0 ],
[ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.70710677, 0.70710677, 0.0 ],
[ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.57735026, 0.57735026 ],
[ 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.0, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.57735026 ]
], dtype=numpy.float32)
self.assertTrue(numpy.allclose(expected, index.index))
index.num_best = num_best
query = corpus[0]
sims = index[query]
expected = [(0, 0.99999994), (2, 0.28867513), (3, 0.23570226), (1, 0.23570226)][ : num_best]
# convert sims to full numpy arrays, so we can use allclose() and ignore
# ordering of items with the same similarity value
expected = matutils.sparse2full(expected, len(index))
if num_best is not None: # when num_best is None, sims is already a numpy array
sims = matutils.sparse2full(sims, len(index))
self.assertTrue(numpy.allclose(expected, sims))
if self.cls == similarities.Similarity:
index.destroy()
示例8: testReopen
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import Similarity [as 别名]
def testReopen(self):
"""test re-opening partially full shards"""
index = similarities.Similarity(None, corpus[:5], num_features=len(dictionary), shardsize=9)
_ = index[corpus[0]] # forces shard close
index.add_documents(corpus[5:])
query = corpus[0]
sims = index[query]
expected = [(0, 0.99999994), (2, 0.28867513), (3, 0.23570226), (1, 0.23570226)]
expected = matutils.sparse2full(expected, len(index))
self.assertTrue(numpy.allclose(expected, sims))
index.destroy()