當前位置: 首頁>>代碼示例>>Python>>正文


Python similarities.MatrixSimilarity方法代碼示例

本文整理匯總了Python中gensim.similarities.MatrixSimilarity方法的典型用法代碼示例。如果您正苦於以下問題:Python similarities.MatrixSimilarity方法的具體用法?Python similarities.MatrixSimilarity怎麽用?Python similarities.MatrixSimilarity使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在gensim.similarities的用法示例。


在下文中一共展示了similarities.MatrixSimilarity方法的11個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: load

# 需要導入模塊: from gensim import similarities [as 別名]
# 或者: from gensim.similarities import MatrixSimilarity [as 別名]
def load(cls, save_dir='./'):
        """
        Load a SimSearch object and it's underlying KeySearch from the 
        specified directory. Returns both objects.
        """
        
        # First create and load the underlying KeySearch.
        ksearch = KeySearch.load(save_dir)
        
        # Create a SimSearch object.
        ssearch = SimSearch(ksearch)
        
        # Load the LSI index.
        ssearch.index = similarities.MatrixSimilarity.load(save_dir + 'index.mm')
        
        # Load the LSI model.
        ssearch.lsi = LsiModel.load(save_dir + 'lsi.model')
        
        return (ksearch, ssearch) 
開發者ID:chrisjmccormick,項目名稱:wiki-sim-search,代碼行數:21,代碼來源:simsearch.py

示例2: trainLSI

# 需要導入模塊: from gensim import similarities [as 別名]
# 或者: from gensim.similarities import MatrixSimilarity [as 別名]
def trainLSI(self, num_topics=100):
        """
        Train the Latent Semantic Indexing model.
        """
        self.num_topics = num_topics        
        # Train LSA
        
        # Look-up the number of features in the tfidf model.
        #self.num_tfidf_features = max(self.corpus_tfidf.dfs) + 1        
        
        self.lsi = LsiModel(self.ksearch.corpus_tfidf, num_topics=self.num_topics, id2word=self.ksearch.dictionary)   
    
        # Transform corpus to LSI space and index it
        self.index = similarities.MatrixSimilarity(self.lsi[self.ksearch.corpus_tfidf], num_features=num_topics) 
開發者ID:chrisjmccormick,項目名稱:wiki-sim-search,代碼行數:16,代碼來源:simsearch.py

示例3: __init__

# 需要導入模塊: from gensim import similarities [as 別名]
# 或者: from gensim.similarities import MatrixSimilarity [as 別名]
def __init__(self, corpus_file, word2id):
        time_s = time.time()
        self.contexts, self.responses = load_corpus_file(corpus_file, word2id, size=50000)

        self._train_model()
        self.corpus_mm = self.tfidf_model[self.corpus]
        self.index = similarities.MatrixSimilarity(self.corpus_mm)
        logger.debug("Time to build tfidf model by %s: %2.f seconds." % (corpus_file, time.time() - time_s)) 
開發者ID:shibing624,項目名稱:dialogbot,代碼行數:10,代碼來源:tfidfmodel.py

示例4: testFull

# 需要導入模塊: from gensim import similarities [as 別名]
# 或者: from gensim.similarities import MatrixSimilarity [as 別名]
def testFull(self, num_best=None, shardsize=100):
        if self.cls == similarities.Similarity:
            index = self.cls(None, corpus, num_features=len(dictionary), shardsize=shardsize)
        else:
            index = self.cls(corpus, num_features=len(dictionary))
        if isinstance(index, similarities.MatrixSimilarity):
            expected = numpy.array([
                [ 0.57735026, 0.57735026, 0.57735026, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
                [ 0.40824831, 0.0, 0.0, 0.40824831, 0.40824831, 0.40824831, 0.40824831, 0.40824831, 0.0, 0.0, 0.0, 0.0 ],
                [ 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0 ],
                [ 0.0, 0.40824831, 0.0, 0.0, 0.0, 0.81649661, 0.0, 0.0, 0.40824831, 0.0, 0.0, 0.0 ],
                [ 0.0, 0.0, 0.0, 0.57735026, 0.0, 0.0, 0.57735026, 0.57735026, 0.0, 0.0, 0.0, 0.0 ],
                [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0 ],
                [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.70710677, 0.70710677, 0.0 ],
                [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.57735026, 0.57735026 ],
                [ 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.0, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.57735026 ]
                ], dtype=numpy.float32)
            self.assertTrue(numpy.allclose(expected, index.index))
        index.num_best = num_best
        query = corpus[0]
        sims = index[query]
        expected = [(0, 0.99999994), (2, 0.28867513), (3, 0.23570226), (1, 0.23570226)][ : num_best]

        # convert sims to full numpy arrays, so we can use allclose() and ignore
        # ordering of items with the same similarity value
        expected = matutils.sparse2full(expected, len(index))
        if num_best is not None: # when num_best is None, sims is already a numpy array
            sims = matutils.sparse2full(sims, len(index))
        self.assertTrue(numpy.allclose(expected, sims))
        if self.cls == similarities.Similarity:
            index.destroy() 
開發者ID:largelymfs,項目名稱:topical_word_embeddings,代碼行數:33,代碼來源:test_similarities.py

示例5: check_similary

# 需要導入模塊: from gensim import similarities [as 別名]
# 或者: from gensim.similarities import MatrixSimilarity [as 別名]
def check_similary(self, doc, lsi):
        vec_bagofwords = self.dictionary.doc2bow(doc)
        # convert the query to LSI space
        vec_lsi = lsi[vec_bagofwords]
        # print(vec_lsi)
        index = similarities.MatrixSimilarity(lsi[corpus])
        return index[vec_lsi] 
開發者ID:christabor,項目名稱:MoAL,代碼行數:9,代碼來源:tm_gensim.py

示例6: TfidfModel

# 需要導入模塊: from gensim import similarities [as 別名]
# 或者: from gensim.similarities import MatrixSimilarity [as 別名]
def TfidfModel(self):
        self.simple_model()

        # 轉換模型
        self.model = models.TfidfModel(self.corpus_simple)
        self.corpus = self.model[self.corpus_simple]

        # 創建相似度矩陣
        self.index = similarities.MatrixSimilarity(self.corpus)

    # lsi模型 
開發者ID:WenRichard,項目名稱:Customer-Chatbot,代碼行數:13,代碼來源:sentenceSimilarity.py

示例7: LsiModel

# 需要導入模塊: from gensim import similarities [as 別名]
# 或者: from gensim.similarities import MatrixSimilarity [as 別名]
def LsiModel(self):
        self.simple_model()

        # 轉換模型
        self.model = models.LsiModel(self.corpus_simple)
        self.corpus = self.model[self.corpus_simple]

        # 創建相似度矩陣
        self.index = similarities.MatrixSimilarity(self.corpus)

    # lda模型 
開發者ID:WenRichard,項目名稱:Customer-Chatbot,代碼行數:13,代碼來源:sentenceSimilarity.py

示例8: LdaModel

# 需要導入模塊: from gensim import similarities [as 別名]
# 或者: from gensim.similarities import MatrixSimilarity [as 別名]
def LdaModel(self):
        self.simple_model()

        # 轉換模型
        self.model = models.LdaModel(self.corpus_simple)
        self.corpus = self.model[self.corpus_simple]

        # 創建相似度矩陣
        self.index = similarities.MatrixSimilarity(self.corpus)

    # 對新輸入的句子(比較的句子)進行預處理 
開發者ID:WenRichard,項目名稱:Customer-Chatbot,代碼行數:13,代碼來源:sentenceSimilarity.py

示例9: build_lda_model

# 需要導入模塊: from gensim import similarities [as 別名]
# 或者: from gensim.similarities import MatrixSimilarity [as 別名]
def build_lda_model(self, data, docs, n_topics=5):

        texts = []
        tokenizer = RegexpTokenizer(r'\w+')
        for d in tqdm(data):
            raw = d.lower()

            tokens = tokenizer.tokenize(raw)

            stopped_tokens = self.remove_stopwords(tokens)

            stemmed_tokens = stopped_tokens
            #stemmer = PorterStemmer()
            #stemmed_tokens = [stemmer.stem(token) for token in stopped_tokens]

            texts.append(stemmed_tokens)

        dictionary = corpora.Dictionary(texts)

        corpus = [dictionary.doc2bow(text) for text in texts]

        lda_model = models.ldamodel.LdaModel(corpus=corpus, id2word=dictionary,
                                                 num_topics=n_topics)

        index = similarities.MatrixSimilarity(corpus)

        self.save_lda_model(lda_model, corpus, dictionary, index)
        self.save_similarities(index, docs)

        return dictionary, texts, lda_model 
開發者ID:practical-recommender-systems,項目名稱:moviegeek,代碼行數:32,代碼來源:lda_model_calculator.py

示例10: create_lsi_model

# 需要導入模塊: from gensim import similarities [as 別名]
# 或者: from gensim.similarities import MatrixSimilarity [as 別名]
def create_lsi_model(num_topics,dictionary,corpus):

    print ("create lsi model ...")
    tfidf_model = models.TfidfModel(corpus)
    corpus_tfidf = tfidf_model[corpus]
    lsi_model = models.LsiModel(corpus_tfidf,id2word=dictionary,num_topics = num_topics)
    #lsi_model = models.LsiModel(corpus,id2word=dictionary,num_topics = num_topics)
    corpus_lsi = lsi_model[corpus_tfidf]
    #corpus_lsi = lsi_model[corpus]
    corpus_simi_matrix = similarities.MatrixSimilarity(corpus_lsi)
    #corpus_simi_matrix = similarities.MatrixSimilarity(corpus_tfidf)
    return (tfidf_model,lsi_model,corpus_simi_matrix) 
開發者ID:geekinglcq,項目名稱:aca,代碼行數:14,代碼來源:lsi_model.py

示例11: create_lsi_model

# 需要導入模塊: from gensim import similarities [as 別名]
# 或者: from gensim.similarities import MatrixSimilarity [as 別名]
def create_lsi_model(num_topics,dictionary,corpus):
    print ("create lsi model ...")

    tfidf_model = models.TfidfModel(corpus)
    corpus_tfidf = tfidf_model[corpus]
    lsi_model = models.LsiModel(corpus_tfidf,id2word=dictionary,num_topics = num_topics)
    corpus_lsi = lsi_model[corpus_tfidf]
    corpus_simi_matrix = similarities.MatrixSimilarity(corpus_lsi)
    return (tfidf_model,lsi_model,corpus_simi_matrix) 
開發者ID:geekinglcq,項目名稱:aca,代碼行數:11,代碼來源:lsi_author.py


注:本文中的gensim.similarities.MatrixSimilarity方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。