本文整理汇总了Python中gensim.similarities.MatrixSimilarity方法的典型用法代码示例。如果您正苦于以下问题:Python similarities.MatrixSimilarity方法的具体用法?Python similarities.MatrixSimilarity怎么用?Python similarities.MatrixSimilarity使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.similarities
的用法示例。
在下文中一共展示了similarities.MatrixSimilarity方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def load(cls, save_dir='./'):
"""
Load a SimSearch object and it's underlying KeySearch from the
specified directory. Returns both objects.
"""
# First create and load the underlying KeySearch.
ksearch = KeySearch.load(save_dir)
# Create a SimSearch object.
ssearch = SimSearch(ksearch)
# Load the LSI index.
ssearch.index = similarities.MatrixSimilarity.load(save_dir + 'index.mm')
# Load the LSI model.
ssearch.lsi = LsiModel.load(save_dir + 'lsi.model')
return (ksearch, ssearch)
示例2: trainLSI
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def trainLSI(self, num_topics=100):
"""
Train the Latent Semantic Indexing model.
"""
self.num_topics = num_topics
# Train LSA
# Look-up the number of features in the tfidf model.
#self.num_tfidf_features = max(self.corpus_tfidf.dfs) + 1
self.lsi = LsiModel(self.ksearch.corpus_tfidf, num_topics=self.num_topics, id2word=self.ksearch.dictionary)
# Transform corpus to LSI space and index it
self.index = similarities.MatrixSimilarity(self.lsi[self.ksearch.corpus_tfidf], num_features=num_topics)
示例3: __init__
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def __init__(self, corpus_file, word2id):
time_s = time.time()
self.contexts, self.responses = load_corpus_file(corpus_file, word2id, size=50000)
self._train_model()
self.corpus_mm = self.tfidf_model[self.corpus]
self.index = similarities.MatrixSimilarity(self.corpus_mm)
logger.debug("Time to build tfidf model by %s: %2.f seconds." % (corpus_file, time.time() - time_s))
示例4: testFull
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def testFull(self, num_best=None, shardsize=100):
if self.cls == similarities.Similarity:
index = self.cls(None, corpus, num_features=len(dictionary), shardsize=shardsize)
else:
index = self.cls(corpus, num_features=len(dictionary))
if isinstance(index, similarities.MatrixSimilarity):
expected = numpy.array([
[ 0.57735026, 0.57735026, 0.57735026, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
[ 0.40824831, 0.0, 0.0, 0.40824831, 0.40824831, 0.40824831, 0.40824831, 0.40824831, 0.0, 0.0, 0.0, 0.0 ],
[ 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0 ],
[ 0.0, 0.40824831, 0.0, 0.0, 0.0, 0.81649661, 0.0, 0.0, 0.40824831, 0.0, 0.0, 0.0 ],
[ 0.0, 0.0, 0.0, 0.57735026, 0.0, 0.0, 0.57735026, 0.57735026, 0.0, 0.0, 0.0, 0.0 ],
[ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0 ],
[ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.70710677, 0.70710677, 0.0 ],
[ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.57735026, 0.57735026 ],
[ 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.0, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.57735026 ]
], dtype=numpy.float32)
self.assertTrue(numpy.allclose(expected, index.index))
index.num_best = num_best
query = corpus[0]
sims = index[query]
expected = [(0, 0.99999994), (2, 0.28867513), (3, 0.23570226), (1, 0.23570226)][ : num_best]
# convert sims to full numpy arrays, so we can use allclose() and ignore
# ordering of items with the same similarity value
expected = matutils.sparse2full(expected, len(index))
if num_best is not None: # when num_best is None, sims is already a numpy array
sims = matutils.sparse2full(sims, len(index))
self.assertTrue(numpy.allclose(expected, sims))
if self.cls == similarities.Similarity:
index.destroy()
示例5: check_similary
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def check_similary(self, doc, lsi):
vec_bagofwords = self.dictionary.doc2bow(doc)
# convert the query to LSI space
vec_lsi = lsi[vec_bagofwords]
# print(vec_lsi)
index = similarities.MatrixSimilarity(lsi[corpus])
return index[vec_lsi]
示例6: TfidfModel
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def TfidfModel(self):
self.simple_model()
# 转换模型
self.model = models.TfidfModel(self.corpus_simple)
self.corpus = self.model[self.corpus_simple]
# 创建相似度矩阵
self.index = similarities.MatrixSimilarity(self.corpus)
# lsi模型
示例7: LsiModel
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def LsiModel(self):
self.simple_model()
# 转换模型
self.model = models.LsiModel(self.corpus_simple)
self.corpus = self.model[self.corpus_simple]
# 创建相似度矩阵
self.index = similarities.MatrixSimilarity(self.corpus)
# lda模型
示例8: LdaModel
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def LdaModel(self):
self.simple_model()
# 转换模型
self.model = models.LdaModel(self.corpus_simple)
self.corpus = self.model[self.corpus_simple]
# 创建相似度矩阵
self.index = similarities.MatrixSimilarity(self.corpus)
# 对新输入的句子(比较的句子)进行预处理
示例9: build_lda_model
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def build_lda_model(self, data, docs, n_topics=5):
texts = []
tokenizer = RegexpTokenizer(r'\w+')
for d in tqdm(data):
raw = d.lower()
tokens = tokenizer.tokenize(raw)
stopped_tokens = self.remove_stopwords(tokens)
stemmed_tokens = stopped_tokens
#stemmer = PorterStemmer()
#stemmed_tokens = [stemmer.stem(token) for token in stopped_tokens]
texts.append(stemmed_tokens)
dictionary = corpora.Dictionary(texts)
corpus = [dictionary.doc2bow(text) for text in texts]
lda_model = models.ldamodel.LdaModel(corpus=corpus, id2word=dictionary,
num_topics=n_topics)
index = similarities.MatrixSimilarity(corpus)
self.save_lda_model(lda_model, corpus, dictionary, index)
self.save_similarities(index, docs)
return dictionary, texts, lda_model
示例10: create_lsi_model
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def create_lsi_model(num_topics,dictionary,corpus):
print ("create lsi model ...")
tfidf_model = models.TfidfModel(corpus)
corpus_tfidf = tfidf_model[corpus]
lsi_model = models.LsiModel(corpus_tfidf,id2word=dictionary,num_topics = num_topics)
#lsi_model = models.LsiModel(corpus,id2word=dictionary,num_topics = num_topics)
corpus_lsi = lsi_model[corpus_tfidf]
#corpus_lsi = lsi_model[corpus]
corpus_simi_matrix = similarities.MatrixSimilarity(corpus_lsi)
#corpus_simi_matrix = similarities.MatrixSimilarity(corpus_tfidf)
return (tfidf_model,lsi_model,corpus_simi_matrix)
示例11: create_lsi_model
# 需要导入模块: from gensim import similarities [as 别名]
# 或者: from gensim.similarities import MatrixSimilarity [as 别名]
def create_lsi_model(num_topics,dictionary,corpus):
print ("create lsi model ...")
tfidf_model = models.TfidfModel(corpus)
corpus_tfidf = tfidf_model[corpus]
lsi_model = models.LsiModel(corpus_tfidf,id2word=dictionary,num_topics = num_topics)
corpus_lsi = lsi_model[corpus_tfidf]
corpus_simi_matrix = similarities.MatrixSimilarity(corpus_lsi)
return (tfidf_model,lsi_model,corpus_simi_matrix)