当前位置: 首页>>代码示例>>Python>>正文


Python similarities.Similarity方法代码示例

本文整理汇总了Python中gensim.similarities.Similarity方法的典型用法代码示例。如果您正苦于以下问题:Python similarities.Similarity方法的具体用法?Python similarities.Similarity怎么用?Python similarities.Similarity使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.similarities的用法示例。


在下文中一共展示了similarities.Similarity方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: testChunking

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import Similarity [as 别名]
def testChunking(self):
        if self.cls == similarities.Similarity:
            index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
        else:
            index = self.cls(corpus, num_features=len(dictionary))
        query = corpus[:3]
        sims = index[query]
        expected = numpy.array([
            [ 0.99999994, 0.23570226, 0.28867513, 0.23570226, 0.0, 0.0, 0.0, 0.0, 0.0 ],
            [ 0.23570226, 1.0, 0.40824831, 0.33333334, 0.70710677, 0.0, 0.0, 0.0, 0.23570226 ],
            [ 0.28867513, 0.40824831, 1.0, 0.61237246, 0.28867513, 0.0, 0.0, 0.0, 0.0 ]
            ], dtype=numpy.float32)
        self.assertTrue(numpy.allclose(expected, sims))

        # test the same thing but with num_best
        index.num_best = 3
        sims = index[query]
        expected = [[(0, 0.99999994), (2, 0.28867513), (3, 0.23570226)],
                    [(1, 1.0), (4, 0.70710677), (2, 0.40824831)],
                    [(2, 1.0), (3, 0.61237246), (1, 0.40824831)]]
        self.assertTrue(numpy.allclose(expected, sims))
        if self.cls == similarities.Similarity:
            index.destroy() 
开发者ID:largelymfs,项目名称:topical_word_embeddings,代码行数:25,代码来源:test_similarities.py

示例2: testIter

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import Similarity [as 别名]
def testIter(self):
        if self.cls == similarities.Similarity:
            index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
        else:
            index = self.cls(corpus, num_features=len(dictionary))
        sims = [sim for sim in index]
        expected = numpy.array([
            [ 0.99999994, 0.23570226, 0.28867513, 0.23570226, 0.0, 0.0, 0.0, 0.0, 0.0 ],
            [ 0.23570226, 1.0, 0.40824831, 0.33333334, 0.70710677, 0.0, 0.0, 0.0, 0.23570226 ],
            [ 0.28867513, 0.40824831, 1.0, 0.61237246, 0.28867513, 0.0, 0.0, 0.0, 0.0 ],
            [ 0.23570226, 0.33333334, 0.61237246, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
            [ 0.0, 0.70710677, 0.28867513, 0.0, 0.99999994, 0.0, 0.0, 0.0, 0.0 ],
            [ 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.70710677, 0.57735026, 0.0 ],
            [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.70710677, 0.99999994, 0.81649655, 0.40824828 ],
            [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.81649655, 0.99999994, 0.66666663 ],
            [ 0.0, 0.23570226, 0.0, 0.0, 0.0, 0.0, 0.40824828, 0.66666663, 0.99999994 ]
            ], dtype=numpy.float32)
        self.assertTrue(numpy.allclose(expected, sims))
        if self.cls == similarities.Similarity:
            index.destroy() 
开发者ID:largelymfs,项目名称:topical_word_embeddings,代码行数:22,代码来源:test_similarities.py

示例3: testPersistency

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import Similarity [as 别名]
def testPersistency(self):
        fname = testfile()
        if self.cls == similarities.Similarity:
            index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
        else:
            index = self.cls(corpus, num_features=len(dictionary))
        index.save(fname)
        index2 = self.cls.load(fname)
        if self.cls == similarities.Similarity:
            # for Similarity, only do a basic check
            self.assertTrue(len(index.shards) == len(index2.shards))
            index.destroy()
        else:
            if isinstance(index, similarities.SparseMatrixSimilarity):
                # hack SparseMatrixSim indexes so they're easy to compare
                index.index = index.index.todense()
                index2.index = index2.index.todense()
            self.assertTrue(numpy.allclose(index.index, index2.index))
            self.assertEqual(index.num_best, index2.num_best) 
开发者ID:largelymfs,项目名称:topical_word_embeddings,代码行数:21,代码来源:test_similarities.py

示例4: testLarge

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import Similarity [as 别名]
def testLarge(self):
        fname = testfile()
        if self.cls == similarities.Similarity:
            index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
        else:
            index = self.cls(corpus, num_features=len(dictionary))
        # store all arrays separately
        index.save(fname, sep_limit=0)

        index2 = self.cls.load(fname)
        if self.cls == similarities.Similarity:
            # for Similarity, only do a basic check
            self.assertTrue(len(index.shards) == len(index2.shards))
            index.destroy()
        else:
            if isinstance(index, similarities.SparseMatrixSimilarity):
                # hack SparseMatrixSim indexes so they're easy to compare
                index.index = index.index.todense()
                index2.index = index2.index.todense()
            self.assertTrue(numpy.allclose(index.index, index2.index))
            self.assertEqual(index.num_best, index2.num_best) 
开发者ID:largelymfs,项目名称:topical_word_embeddings,代码行数:23,代码来源:test_similarities.py

示例5: testMmap

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import Similarity [as 别名]
def testMmap(self):
        fname = testfile()
        if self.cls == similarities.Similarity:
            index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
        else:
            index = self.cls(corpus, num_features=len(dictionary))
        # store all arrays separately
        index.save(fname, sep_limit=0)

        # same thing, but use mmap to load arrays
        index2 = self.cls.load(fname, mmap='r')
        if self.cls == similarities.Similarity:
            # for Similarity, only do a basic check
            self.assertTrue(len(index.shards) == len(index2.shards))
            index.destroy()
        else:
            if isinstance(index, similarities.SparseMatrixSimilarity):
                # hack SparseMatrixSim indexes so they're easy to compare
                index.index = index.index.todense()
                index2.index = index2.index.todense()
            self.assertTrue(numpy.allclose(index.index, index2.index))
            self.assertEqual(index.num_best, index2.num_best) 
开发者ID:largelymfs,项目名称:topical_word_embeddings,代码行数:24,代码来源:test_similarities.py

示例6: main

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import Similarity [as 别名]
def main():
    corpora_documents = []
    for item_text in raw_documents:
        item_str = list(jieba.cut(item_text))
        corpora_documents.append(item_str)

    dictionary = corpora.Dictionary(corpora_documents)
    corpus = [dictionary.doc2bow(text) for text in corpora_documents]

    similarity =similarities.Similarity('-Similarity-index', corpus, num_features=400)

    test_data_1 = '你好,我想问一下我想离婚他不想离,孩子他说不要,是六个月就自动生效离婚'
    test_cut_raw_1 = jieba.cut(test_data_1)
    test_corpus_1 = dictionary.doc2bow(test_cut_raw_1)
    similarity.num_best = 5
    # 返回最相似的样本材料,(index_of_document, similarity) tuples
    print(similarity[test_corpus_1]) 
开发者ID:jarvisqi,项目名称:nlp_learning,代码行数:19,代码来源:text.py

示例7: testFull

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import Similarity [as 别名]
def testFull(self, num_best=None, shardsize=100):
        if self.cls == similarities.Similarity:
            index = self.cls(None, corpus, num_features=len(dictionary), shardsize=shardsize)
        else:
            index = self.cls(corpus, num_features=len(dictionary))
        if isinstance(index, similarities.MatrixSimilarity):
            expected = numpy.array([
                [ 0.57735026, 0.57735026, 0.57735026, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
                [ 0.40824831, 0.0, 0.0, 0.40824831, 0.40824831, 0.40824831, 0.40824831, 0.40824831, 0.0, 0.0, 0.0, 0.0 ],
                [ 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0 ],
                [ 0.0, 0.40824831, 0.0, 0.0, 0.0, 0.81649661, 0.0, 0.0, 0.40824831, 0.0, 0.0, 0.0 ],
                [ 0.0, 0.0, 0.0, 0.57735026, 0.0, 0.0, 0.57735026, 0.57735026, 0.0, 0.0, 0.0, 0.0 ],
                [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0 ],
                [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.70710677, 0.70710677, 0.0 ],
                [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.57735026, 0.57735026 ],
                [ 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.0, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.57735026 ]
                ], dtype=numpy.float32)
            self.assertTrue(numpy.allclose(expected, index.index))
        index.num_best = num_best
        query = corpus[0]
        sims = index[query]
        expected = [(0, 0.99999994), (2, 0.28867513), (3, 0.23570226), (1, 0.23570226)][ : num_best]

        # convert sims to full numpy arrays, so we can use allclose() and ignore
        # ordering of items with the same similarity value
        expected = matutils.sparse2full(expected, len(index))
        if num_best is not None: # when num_best is None, sims is already a numpy array
            sims = matutils.sparse2full(sims, len(index))
        self.assertTrue(numpy.allclose(expected, sims))
        if self.cls == similarities.Similarity:
            index.destroy() 
开发者ID:largelymfs,项目名称:topical_word_embeddings,代码行数:33,代码来源:test_similarities.py

示例8: testReopen

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import Similarity [as 别名]
def testReopen(self):
        """test re-opening partially full shards"""
        index = similarities.Similarity(None, corpus[:5], num_features=len(dictionary), shardsize=9)
        _ = index[corpus[0]] # forces shard close
        index.add_documents(corpus[5:])
        query = corpus[0]
        sims = index[query]
        expected = [(0, 0.99999994), (2, 0.28867513), (3, 0.23570226), (1, 0.23570226)]
        expected = matutils.sparse2full(expected, len(index))
        self.assertTrue(numpy.allclose(expected, sims))
        index.destroy() 
开发者ID:largelymfs,项目名称:topical_word_embeddings,代码行数:13,代码来源:test_similarities.py


注:本文中的gensim.similarities.Similarity方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。