当前位置: 首页>>代码示例>>Python>>正文


Python similarities.MatrixSimilarity方法代码示例

本文整理汇总了Python中gensim.similarities.MatrixSimilarity方法的典型用法代码示例。如果您正苦于以下问题:Python similarities.MatrixSimilarity方法的具体用法?Python similarities.MatrixSimilarity怎么用?Python similarities.MatrixSimilarity使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.similarities的用法示例。


在下文中一共展示了similarities.MatrixSimilarity方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: load

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def load(cls, save_dir='./'):
        """
        Load a SimSearch object and it's underlying KeySearch from the 
        specified directory. Returns both objects.
        """
        
        # First create and load the underlying KeySearch.
        ksearch = KeySearch.load(save_dir)
        
        # Create a SimSearch object.
        ssearch = SimSearch(ksearch)
        
        # Load the LSI index.
        ssearch.index = similarities.MatrixSimilarity.load(save_dir + 'index.mm')
        
        # Load the LSI model.
        ssearch.lsi = LsiModel.load(save_dir + 'lsi.model')
        
        return (ksearch, ssearch) 
开发者ID:chrisjmccormick,项目名称:wiki-sim-search,代码行数:21,代码来源:simsearch.py

示例2: trainLSI

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def trainLSI(self, num_topics=100):
        """
        Train the Latent Semantic Indexing model.
        """
        self.num_topics = num_topics        
        # Train LSA
        
        # Look-up the number of features in the tfidf model.
        #self.num_tfidf_features = max(self.corpus_tfidf.dfs) + 1        
        
        self.lsi = LsiModel(self.ksearch.corpus_tfidf, num_topics=self.num_topics, id2word=self.ksearch.dictionary)   
    
        # Transform corpus to LSI space and index it
        self.index = similarities.MatrixSimilarity(self.lsi[self.ksearch.corpus_tfidf], num_features=num_topics) 
开发者ID:chrisjmccormick,项目名称:wiki-sim-search,代码行数:16,代码来源:simsearch.py

示例3: __init__

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def __init__(self, corpus_file, word2id):
        time_s = time.time()
        self.contexts, self.responses = load_corpus_file(corpus_file, word2id, size=50000)

        self._train_model()
        self.corpus_mm = self.tfidf_model[self.corpus]
        self.index = similarities.MatrixSimilarity(self.corpus_mm)
        logger.debug("Time to build tfidf model by %s: %2.f seconds." % (corpus_file, time.time() - time_s)) 
开发者ID:shibing624,项目名称:dialogbot,代码行数:10,代码来源:tfidfmodel.py

示例4: testFull

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def testFull(self, num_best=None, shardsize=100):
        if self.cls == similarities.Similarity:
            index = self.cls(None, corpus, num_features=len(dictionary), shardsize=shardsize)
        else:
            index = self.cls(corpus, num_features=len(dictionary))
        if isinstance(index, similarities.MatrixSimilarity):
            expected = numpy.array([
                [ 0.57735026, 0.57735026, 0.57735026, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
                [ 0.40824831, 0.0, 0.0, 0.40824831, 0.40824831, 0.40824831, 0.40824831, 0.40824831, 0.0, 0.0, 0.0, 0.0 ],
                [ 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0 ],
                [ 0.0, 0.40824831, 0.0, 0.0, 0.0, 0.81649661, 0.0, 0.0, 0.40824831, 0.0, 0.0, 0.0 ],
                [ 0.0, 0.0, 0.0, 0.57735026, 0.0, 0.0, 0.57735026, 0.57735026, 0.0, 0.0, 0.0, 0.0 ],
                [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0 ],
                [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.70710677, 0.70710677, 0.0 ],
                [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.57735026, 0.57735026 ],
                [ 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.0, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.57735026 ]
                ], dtype=numpy.float32)
            self.assertTrue(numpy.allclose(expected, index.index))
        index.num_best = num_best
        query = corpus[0]
        sims = index[query]
        expected = [(0, 0.99999994), (2, 0.28867513), (3, 0.23570226), (1, 0.23570226)][ : num_best]

        # convert sims to full numpy arrays, so we can use allclose() and ignore
        # ordering of items with the same similarity value
        expected = matutils.sparse2full(expected, len(index))
        if num_best is not None: # when num_best is None, sims is already a numpy array
            sims = matutils.sparse2full(sims, len(index))
        self.assertTrue(numpy.allclose(expected, sims))
        if self.cls == similarities.Similarity:
            index.destroy() 
开发者ID:largelymfs,项目名称:topical_word_embeddings,代码行数:33,代码来源:test_similarities.py

示例5: check_similary

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def check_similary(self, doc, lsi):
        vec_bagofwords = self.dictionary.doc2bow(doc)
        # convert the query to LSI space
        vec_lsi = lsi[vec_bagofwords]
        # print(vec_lsi)
        index = similarities.MatrixSimilarity(lsi[corpus])
        return index[vec_lsi] 
开发者ID:christabor,项目名称:MoAL,代码行数:9,代码来源:tm_gensim.py

示例6: TfidfModel

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def TfidfModel(self):
        self.simple_model()

        # 转换模型
        self.model = models.TfidfModel(self.corpus_simple)
        self.corpus = self.model[self.corpus_simple]

        # 创建相似度矩阵
        self.index = similarities.MatrixSimilarity(self.corpus)

    # lsi模型 
开发者ID:WenRichard,项目名称:Customer-Chatbot,代码行数:13,代码来源:sentenceSimilarity.py

示例7: LsiModel

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def LsiModel(self):
        self.simple_model()

        # 转换模型
        self.model = models.LsiModel(self.corpus_simple)
        self.corpus = self.model[self.corpus_simple]

        # 创建相似度矩阵
        self.index = similarities.MatrixSimilarity(self.corpus)

    # lda模型 
开发者ID:WenRichard,项目名称:Customer-Chatbot,代码行数:13,代码来源:sentenceSimilarity.py

示例8: LdaModel

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def LdaModel(self):
        self.simple_model()

        # 转换模型
        self.model = models.LdaModel(self.corpus_simple)
        self.corpus = self.model[self.corpus_simple]

        # 创建相似度矩阵
        self.index = similarities.MatrixSimilarity(self.corpus)

    # 对新输入的句子(比较的句子)进行预处理 
开发者ID:WenRichard,项目名称:Customer-Chatbot,代码行数:13,代码来源:sentenceSimilarity.py

示例9: build_lda_model

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def build_lda_model(self, data, docs, n_topics=5):

        texts = []
        tokenizer = RegexpTokenizer(r'\w+')
        for d in tqdm(data):
            raw = d.lower()

            tokens = tokenizer.tokenize(raw)

            stopped_tokens = self.remove_stopwords(tokens)

            stemmed_tokens = stopped_tokens
            #stemmer = PorterStemmer()
            #stemmed_tokens = [stemmer.stem(token) for token in stopped_tokens]

            texts.append(stemmed_tokens)

        dictionary = corpora.Dictionary(texts)

        corpus = [dictionary.doc2bow(text) for text in texts]

        lda_model = models.ldamodel.LdaModel(corpus=corpus, id2word=dictionary,
                                                 num_topics=n_topics)

        index = similarities.MatrixSimilarity(corpus)

        self.save_lda_model(lda_model, corpus, dictionary, index)
        self.save_similarities(index, docs)

        return dictionary, texts, lda_model 
开发者ID:practical-recommender-systems,项目名称:moviegeek,代码行数:32,代码来源:lda_model_calculator.py

示例10: create_lsi_model

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def create_lsi_model(num_topics,dictionary,corpus):

    print ("create lsi model ...")
    tfidf_model = models.TfidfModel(corpus)
    corpus_tfidf = tfidf_model[corpus]
    lsi_model = models.LsiModel(corpus_tfidf,id2word=dictionary,num_topics = num_topics)
    #lsi_model = models.LsiModel(corpus,id2word=dictionary,num_topics = num_topics)
    corpus_lsi = lsi_model[corpus_tfidf]
    #corpus_lsi = lsi_model[corpus]
    corpus_simi_matrix = similarities.MatrixSimilarity(corpus_lsi)
    #corpus_simi_matrix = similarities.MatrixSimilarity(corpus_tfidf)
    return (tfidf_model,lsi_model,corpus_simi_matrix) 
开发者ID:geekinglcq,项目名称:aca,代码行数:14,代码来源:lsi_model.py

示例11: create_lsi_model

# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def create_lsi_model(num_topics,dictionary,corpus):
    print ("create lsi model ...")

    tfidf_model = models.TfidfModel(corpus)
    corpus_tfidf = tfidf_model[corpus]
    lsi_model = models.LsiModel(corpus_tfidf,id2word=dictionary,num_topics = num_topics)
    corpus_lsi = lsi_model[corpus_tfidf]
    corpus_simi_matrix = similarities.MatrixSimilarity(corpus_lsi)
    return (tfidf_model,lsi_model,corpus_simi_matrix) 
开发者ID:geekinglcq,项目名称:aca,代码行数:11,代码来源:lsi_author.py


注:本文中的gensim.similarities.MatrixSimilarity方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。