本文整理汇总了Python中gensim.models.LdaModel.show_topics方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.show_topics方法的具体用法?Python LdaModel.show_topics怎么用?Python LdaModel.show_topics使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models.LdaModel
的用法示例。
在下文中一共展示了LdaModel.show_topics方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: lda
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import show_topics [as 别名]
def lda(docs, k):
"""Latent Dirichlet allocation topic model.
Uses Gensim's LdaModel after tokenizing using scikit-learn's
TfidfVectorizer.
Parameters
----------
k : integer
Number of topics.
"""
from gensim.matutils import Sparse2Corpus
from gensim.models import LdaModel
# Use a scikit-learn vectorizer rather than Gensim's equivalent
# for speed and consistency with LSA and k-means.
vect = _vectorizer()
corpus = vect.fit_transform(fetch(d) for d in docs)
corpus = Sparse2Corpus(corpus)
model = LdaModel(corpus=corpus, num_topics=k)
topics = model.show_topics(formatted=False)
vocab = vect.get_feature_names()
#return [(vocab[int(idx)], w) for topic in topics for w, idx in topic]
return [[(vocab[int(idx)], w) for w, idx in topic] for topic in topics]
示例2: upload_file
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import show_topics [as 别名]
#.........这里部分代码省略.........
corpus = MyCorpus()
# corpus = glob.glob("swcorpus/*")
if not os.path.exists("out"):
os.makedirs("out")
# if not os.path.exists(os.path.join(os.path.join(os.getcwd(),
# 'out'), foldername)): os.makedirs(os.path.join
# (os.path.join(os.getcwd(), 'out'), foldername))
MmCorpus.serialize(
os.path.join(os.path.join(os.getcwd(), "out"), '.'.join(
['corpus.mm'])), corpus)
mm = MmCorpus('out/corpus.mm')
print(mm)
# doc_labels = glob.glob("corpus/*")
print("fitting the model ...\n")
model = LdaModel(
corpus=mm, id2word=dictionary, num_topics=no_of_topics,
passes=no_of_passes, eval_every=eval, chunksize=chunk,
alpha=alpha, eta=eta)
# model = LdaMulticore(corpus=corpus, id2word=dictionary,
# num_topics=no_of_topics, passes=no_of_passes,
# eval_every=eval, chunksize=chunk, alpha=alpha, eta=eta)
print(model, "\n")
topics = model.show_topics(num_topics=no_of_topics)
for item, i in zip(topics, enumerate(topics)):
print("topic #"+str(i[0])+": "+str(item)+"\n")
print("saving ...\n")
if not os.path.exists("out"):
os.makedirs("out")
# if not os.path.exists(os.path.join(os.path.join(os.getcwd(),
# 'out'), foldername)):
# os.makedirs(os.path.join(os.path.join(os.getcwd(), 'out'),
# foldername))
with open(
os.path.join(os.path.join(os.getcwd(), "out"), ''.join(
["corpus_doclabels.txt"])), "w", encoding="utf-8") as f:
for item in doc_labels:
f.write(item + "\n")
with open(
os.path.join(os.path.join(os.getcwd(), "out"), ''.join(
["corpus_topics.txt"])), "w", encoding="utf-8") as f:
for item, i in zip(topics, enumerate(topics)):
f.write(
"".join(["topic #", str(i[0]), ": ", str(item), "\n"]))
dictionary.save(
os.path.join(os.path.join(os.getcwd(), "out"), '.'.join(
['corpus', 'dict'])))
# MmCorpus.serialize(
# os.path.join(os.path.join(os.getcwd(), "out"), '.'.join(
# [foldername, 'mm'])), corpus)
示例3: print
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import show_topics [as 别名]
# vamos a utilizar Latent Dirichlet Allocation para tratar de categorizar los abstracts
# este se demora la primera q lo corres para entrenar el modelo
print("lda")
lda_filename = 'model.lda'
if not os.path.isfile(lda_filename):
lda = LdaModel(corpus, num_topics=5,
id2word=dictionary,
update_every=5,
chunksize=10000,
passes=100)
lda.save('/tmp/model.lda')
else:
lda = LdaModel.load('/tmp/model.lda')
lda.show_topics()
topics_matrix = lda.show_topics(formatted=False, num_words=7)
print(topics_matrix)
print(len(topics_matrix))
for topic in topics_matrix:
i = topic[1]
print([str(word) for word in i])
#
# topics_matrix = np.array(topics_matrix)
#
# topic_words = topics_matrix[:, :, 1]
# for i in topic_words:
# print([str(word) for word in i])
示例4: MmCorpus
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import show_topics [as 别名]
MmCorpus.serialize(corpusPath, corpus)
mm = MmCorpus(corpusPath)
doc_labels = makeDocLabels(path)
log.info('fitting the model ...')
# fitting the model
model = LdaModel(corpus=mm, id2word=dictionary, num_topics=no_of_topics, passes=no_of_passes,
eval_every=eval, chunksize=chunk, alpha=alpha, eta=eta)
log.info('generated topics...')
# print topics
topics = model.show_topics(num_topics=no_of_topics)
for item, i in zip(topics, enumerate(topics)):
log.info('topic #%s: %s', i[0], item)
log.info('saving results...')
# create output folder
if not os.path.exists("out"): os.makedirs("out")
# save doc_labels for further use
with open(os.path.join(os.path.join(os.getcwd(), "out"),''.join([foldername, "_doclabels.txt"])), "w", encoding="utf-8") as f:
for item in doc_labels: f.write(item+"\n")
# save topics for further use
示例5: create_lda_model
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import show_topics [as 别名]
def create_lda_model():
logging.info('about to create all docs from chunks')
start_time = datetime.datetime.now()
create_all_docs()
end_time = datetime.datetime.now()
logging.info('total time is: %s', end_time - start_time)
logging.info('about to load all docs')
with open('./resources/LDA_processing/all_docs.pkl', mode='rb') as f:
all_docs = pickle.load(f)
logging.info('about to load english words')
with open('./resources/LDA_input/english_full_list.txt') as f:
english_words = f.read().splitlines()
good_english_words = set(english_words[75:21000])
del english_words
logging.info('about to remove all stop-words and unknown words')
texts = []
for i, doc in enumerate(all_docs):
filtered_doc = [word for word in doc if word in good_english_words]
texts.append(filtered_doc)
if i % 5000 == 0:
logging.info('Finished doc: %s', i)
logging.info('about to release memory of all_docs and english_words')
del all_docs
del good_english_words
logging.info('about to save texts')
with open('./resources/LDA_processing/texts.pkl', mode='wb') as f:
pickle.dump(texts, f)
logging.info('about to load texts')
with open('./resources/LDA_processing/texts.pkl', mode='rb') as f:
texts = pickle.load(f)
logging.info('about to create dictionary')
dictionary = corpora.Dictionary(texts)
keys = dictionary.keys()
logging.info('dict size before filter: %s', len(keys))
dictionary.filter_extremes(keep_n=150000)
dictionary.filter_extremes(no_below=150, no_above=0.05)
keys = dictionary.keys()
logging.info('dict size after filter: %s', len(keys))
dictionary.save('./resources/LDA_processing/lda.dict')
dictionary.save_as_text('./resources/LDA_processing/lda_dict.txt')
logging.info('about to create corpus')
corpus = [dictionary.doc2bow(text) for text in texts]
logging.info('about to save corpus as mm file')
corpora.MmCorpus.serialize('./resources/LDA_processing/corpus.mm', corpus)
logging.info('about to load dictionary file')
dictionary = corpora.Dictionary.load('./resources/LDA_processing/lda.dict')
logging.info('about to load corpus as mm file')
corpus = corpora.MmCorpus('./resources/LDA_processing/corpus.mm')
logging.info('about to start LDA model')
lda = LdaModel(corpus, id2word=dictionary, num_topics=num_topics)
logging.info('finished LDA model')
logging.info('about to save ldaModel')
lda.save('./resources/LDA_processing/LdaModel')
logging.info('about to load ldaModel')
lda = LdaModel.load('./resources/LDA_processing/LdaModel')
logging.info('about to find topics')
topics = lda.show_topics(num_topics=num_topics, num_words=10000, log=True, formatted=False)
logging.info('about to save topics')
with open('./resources/LDA_processing/topics.pkl', mode='wb') as f:
pickle.dump(topics, f)
dict_word_sets = find_words_from_lda_model()
with open('./resources/LDA_processing/dict_word_sets.pkl', mode='wb') as f:
pickle.dump(dict_word_sets, f)
topics_words = extract_words_from_word_sets()
with open('./resources/LDA_result/topic_words', mode='wt', encoding='utf-8') as f:
f.write('\n'.join(topics_words))
示例6: UnlabeledCorpus
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import show_topics [as 别名]
vocab = Dictionary.load_from_text('./vocab.txt')
corpus = UnlabeledCorpus('./rumor_train.csv', vocab)
valid_corpus = UnlabeledCorpus('./rumor_valid.csv', vocab)
valid_sentences = [doc for doc in valid_corpus][5000:]
# varing number of topics
# result = {}
# for num_topics in [2, 4, 8, 16, 32, 64]:
# best_value = -100
# for i in range(5):
# model = LdaModel(corpus=corpus, id2word=vocab, num_topics=num_topics)
# likelihood = model.log_perplexity(valid_sentences)
# best_value = max(best_value, likelihood)
# result[num_topics]= best_value
#
# for num_topics, likelihood in result.iteritems():
# print 'num_topics: %d, best word_likelihood: %f' % (num_topics, likelihood)
model = LdaModel(corpus=corpus, id2word=vocab, num_topics=8, passes=2)
model.save('./lda_model.txt')
# print topics to a file
topics = model.show_topics(num_topics=100, num_words=50)
with codecs.open('./topics.txt', 'w', 'utf-8') as out_f:
for topic in topics:
topic_id, topic_str = topic[0], topic[1]
out_f.write('%d:\n%s\n' % (topic_id, topic_str))
out_f.write('\n')