本文整理汇总了Python中gensim.models.LdaModel.__getitem__方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.__getitem__方法的具体用法?Python LdaModel.__getitem__怎么用?Python LdaModel.__getitem__使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models.LdaModel
的用法示例。
在下文中一共展示了LdaModel.__getitem__方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: upload_file
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import __getitem__ [as 别名]
#.........这里部分代码省略.........
# corpus = glob.glob("swcorpus/*")
if not os.path.exists("out"):
os.makedirs("out")
# if not os.path.exists(os.path.join(os.path.join(os.getcwd(),
# 'out'), foldername)): os.makedirs(os.path.join
# (os.path.join(os.getcwd(), 'out'), foldername))
MmCorpus.serialize(
os.path.join(os.path.join(os.getcwd(), "out"), '.'.join(
['corpus.mm'])), corpus)
mm = MmCorpus('out/corpus.mm')
print(mm)
# doc_labels = glob.glob("corpus/*")
print("fitting the model ...\n")
model = LdaModel(
corpus=mm, id2word=dictionary, num_topics=no_of_topics,
passes=no_of_passes, eval_every=eval, chunksize=chunk,
alpha=alpha, eta=eta)
# model = LdaMulticore(corpus=corpus, id2word=dictionary,
# num_topics=no_of_topics, passes=no_of_passes,
# eval_every=eval, chunksize=chunk, alpha=alpha, eta=eta)
print(model, "\n")
topics = model.show_topics(num_topics=no_of_topics)
for item, i in zip(topics, enumerate(topics)):
print("topic #"+str(i[0])+": "+str(item)+"\n")
print("saving ...\n")
if not os.path.exists("out"):
os.makedirs("out")
# if not os.path.exists(os.path.join(os.path.join(os.getcwd(),
# 'out'), foldername)):
# os.makedirs(os.path.join(os.path.join(os.getcwd(), 'out'),
# foldername))
with open(
os.path.join(os.path.join(os.getcwd(), "out"), ''.join(
["corpus_doclabels.txt"])), "w", encoding="utf-8") as f:
for item in doc_labels:
f.write(item + "\n")
with open(
os.path.join(os.path.join(os.getcwd(), "out"), ''.join(
["corpus_topics.txt"])), "w", encoding="utf-8") as f:
for item, i in zip(topics, enumerate(topics)):
f.write(
"".join(["topic #", str(i[0]), ": ", str(item), "\n"]))
dictionary.save(
os.path.join(os.path.join(os.getcwd(), "out"), '.'.join(
['corpus', 'dict'])))
# MmCorpus.serialize(
# os.path.join(os.path.join(os.getcwd(), "out"), '.'.join(
# [foldername, 'mm'])), corpus)
model.save(
os.path.join(os.path.join(os.getcwd(), "out"), '.'.join(
['corpus', 'lda'])))
print("\n ta-daaaa ...\n")
# VISUALIZATION
no_of_topics = model.num_topics
no_of_docs = len(doc_labels)
doc_topic = np.zeros((no_of_docs, no_of_topics))
for doc, i in zip(corpus, range(no_of_docs)):
# topic_dist is a list of tuples (topic_id, topic_prob)
topic_dist = model.__getitem__(doc)
for topic in topic_dist:
doc_topic[i][topic[0]] = topic[1]
# get plot labels
topic_labels = []
for i in range(no_of_topics):
# show_topic() returns tuples (word_prob, word)
topic_terms = [x[0] for x in model.show_topic(i, topn=3)]
topic_labels.append(" ".join(topic_terms))
# cf. https://de.dariah.eu/tatom/topic_model_visualization.html
if no_of_docs > 20 or no_of_topics > 20:
plt.figure(figsize=(20, 20)) # if many items, enlarge figure
plt.pcolor(doc_topic, norm=None, cmap='Reds')
plt.yticks(np.arange(doc_topic.shape[0])+1.0, doc_labels)
plt.xticks(
np.arange(doc_topic.shape[1])+0.5, topic_labels, rotation='90')
plt.gca().invert_yaxis()
plt.colorbar(cmap='Reds')
plt.tight_layout()
plt.savefig("./static/corpus_heatmap.svg")
return render_template('success.html')