本文整理汇总了Python中gensim.models.LdaModel.get_document_topics方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.get_document_topics方法的具体用法?Python LdaModel.get_document_topics怎么用?Python LdaModel.get_document_topics使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models.LdaModel
的用法示例。
在下文中一共展示了LdaModel.get_document_topics方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: W2V_cpp2
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import get_document_topics [as 别名]
class W2V_cpp2(W2V_base):
def __init__(self,n_topic, path, folder):
self.n_topic = n_topic
W2V_base.__init__(self, path, folder)
#process dict
for prod_id in self.idx2prod.keys():
prod = self.idx2prod[prod_id]
n_prod_id = prod_id - len(self.word_count) - 1
del self.idx2prod[prod_id]
self.idx2prod[n_prod_id] = prod
self.prod2idx[prod] = n_prod_id
for user_id in self.idx2user.keys():
user = self.idx2user[user_id]
n_user_id = user_id - len(self.word_count) - len(self.prod2idx) - 1
del self.idx2user[user_id]
self.idx2user[n_user_id] = user
self.user2idx[user] = n_user_id
def train(self):
data = []
entity2id = {}
id2entity = []
for obj in self.data:
doc = []
obj_sents = obj["text_data"]
entity = obj["prod"]
if entity not in entity2id:
entity2id[entity] = len(entity2id)
id2entity.append(entity)
doc_id = entity2id[entity]
for obj_sent in obj_sents:
for pair in obj_sent:
if pair[0] >= 0:
doc.append((pair[0], doc_id))
data.append(doc)
self.ldamodel = LdaModel(corpus=data, id2word=self.idx2word, num_topics=self.n_topic)
f_entity = open("lda/prod.txt", "w")
f_model = open("lda/model.txt", "w")
f_model.write(str(len(entity2id)))
f_model.write(" ")
f_model.write(str(self.n_topic))
f_model.write("\n")
for entity in id2entity:
f_entity.write(entity)
f_entity.write("\n")
f_model.write(entity)
f_model.write(" ")
distr = self.ldamodel.get_document_topics(data[1], minimum_phi_value=0, minimum_probability=0)
distr = [pair[1] for pair in distr]
for prod in distr:
f_model.write(str(prod))
f_model.write(" ")
f_model.write("\n")
self.ldamodel.save("lda/model_200")
示例2: defaultdict
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import get_document_topics [as 别名]
texts = [[word for word in document.lower().split() if word not in stoplist]
for document in documents]
from collections import defaultdict
frequency = defaultdict(int)
for text in texts:
for token in text:
frequency[token] += 1
texts = [[token for token in text if frequency[token] >= 1]
for text in texts]
from pprint import pprint # pretty-printer
dictionary = corpora.Dictionary(texts)
# dictionary.save('/tmp/deerwester.dict') # store the dictionary, for future reference
# print(dictionary)
corpus = [dictionary.doc2bow(text) for text in texts]
# corpora.MmCorpus.serialize('/tmp/deerwester.mm', corpus)
lda = LdaModel(corpus, num_topics=2)
# on a new document:
new_doc = "pretty obvious that when i write my tellall memoir someday there will be four to six"
new_vec = dictionary.doc2bow(new_doc.lower().split())
print(lda.print_topic(0))
print(lda.show_topic(1))
print(lda.get_document_topics(new_vec))