本文整理汇总了Python中gensim.models.LdaModel.show_topic方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.show_topic方法的具体用法?Python LdaModel.show_topic怎么用?Python LdaModel.show_topic使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models.LdaModel
的用法示例。
在下文中一共展示了LdaModel.show_topic方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import show_topic [as 别名]
class CorpusLdaModelWrapper:
def __init__(self, corpus, dictionary, doc_labels, preprocessing_pipeline, numtopics):
self.corpus = corpus
self.dictionary = dictionary
self.doc_labels = doc_labels
self.pipeline = preprocessing_pipeline
self.numtopics = numtopics
self.trained = False
def train(self):
# training
self.model = LdaModel(self.corpus, id2word=self.dictionary, num_topics=self.numtopics)
self.index = MatrixSimilarity(self.model[self.corpus])
# flag
self.trained = True
def convertTextToReducedVector(self, text):
if not self.trained:
raise exceptions.ModelNotTrainedException()
tokens = word_tokenize(prep.preprocess_text(text, self.pipeline))
tokens = filter(lambda token: self.dictionary.token2id.has_key(token), tokens)
bow = self.dictionary.doc2bow(tokens)
return self.model[bow]
def queryDoc(self, text):
reducedVec = self.convertTextToReducedVector(text)
sims = self.index[reducedVec]
simtuples = zip(range(len(sims)), sims) if self.doc_labels==None else zip(self.doc_labels, sims)
simtuples = sorted(simtuples, key=lambda item: item[1], reverse=True)
return simtuples
def show_topic(self, id):
return self.model.show_topic(id)
示例2: topicsLDA
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import show_topic [as 别名]
def topicsLDA(self, num_topics=10, num_iterations=10000, num_words=10):
# LdaModel(corpus=None, num_topics=100, id2word=None, distributed=False, chunksize=2000, passes=1, update_every=1, alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10, iterations=50, gamma_threshold=0.001)
try:
lda = LdaModel(corpus=self.corpus, num_topics=num_topics, id2word=self.id2word, iterations=num_iterations)
result = {}
tpd = lda[self.corpus] # topic probability distribution
for topics in tpd:
for elem in topics:
if result.get(elem[0], -1) == -1:
words = lda.show_topic(elem[0], topn=num_words)
result[elem[0]] = {'weight': elem[1], 'words': words}
else:
result[elem[0]]['weight'] += elem[1]
return result
except Exception as e:
print e
return None
示例3: upload_file
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import show_topic [as 别名]
#.........这里部分代码省略.........
corpus = MyCorpus()
# corpus = glob.glob("swcorpus/*")
if not os.path.exists("out"):
os.makedirs("out")
# if not os.path.exists(os.path.join(os.path.join(os.getcwd(),
# 'out'), foldername)): os.makedirs(os.path.join
# (os.path.join(os.getcwd(), 'out'), foldername))
MmCorpus.serialize(
os.path.join(os.path.join(os.getcwd(), "out"), '.'.join(
['corpus.mm'])), corpus)
mm = MmCorpus('out/corpus.mm')
print(mm)
# doc_labels = glob.glob("corpus/*")
print("fitting the model ...\n")
model = LdaModel(
corpus=mm, id2word=dictionary, num_topics=no_of_topics,
passes=no_of_passes, eval_every=eval, chunksize=chunk,
alpha=alpha, eta=eta)
# model = LdaMulticore(corpus=corpus, id2word=dictionary,
# num_topics=no_of_topics, passes=no_of_passes,
# eval_every=eval, chunksize=chunk, alpha=alpha, eta=eta)
print(model, "\n")
topics = model.show_topics(num_topics=no_of_topics)
for item, i in zip(topics, enumerate(topics)):
print("topic #"+str(i[0])+": "+str(item)+"\n")
print("saving ...\n")
if not os.path.exists("out"):
os.makedirs("out")
# if not os.path.exists(os.path.join(os.path.join(os.getcwd(),
# 'out'), foldername)):
# os.makedirs(os.path.join(os.path.join(os.getcwd(), 'out'),
# foldername))
with open(
os.path.join(os.path.join(os.getcwd(), "out"), ''.join(
["corpus_doclabels.txt"])), "w", encoding="utf-8") as f:
for item in doc_labels:
f.write(item + "\n")
with open(
os.path.join(os.path.join(os.getcwd(), "out"), ''.join(
["corpus_topics.txt"])), "w", encoding="utf-8") as f:
for item, i in zip(topics, enumerate(topics)):
f.write(
"".join(["topic #", str(i[0]), ": ", str(item), "\n"]))
dictionary.save(
os.path.join(os.path.join(os.getcwd(), "out"), '.'.join(
['corpus', 'dict'])))
# MmCorpus.serialize(
# os.path.join(os.path.join(os.getcwd(), "out"), '.'.join(
# [foldername, 'mm'])), corpus)
示例4: corpus
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import show_topic [as 别名]
bow_corpus = [ dictionary.doc2bow(t) for t in texts ]
print 'Serializing corpus (%s) ...' % BOW
MmCorpus.serialize(BOW, bow_corpus)
size = len(bow_corpus) * 9/10
training = bow_corpus[:size]
testing = bow_corpus[size:]
t0 = time()
print 'Training LDA w/ %d topics on first %d texts ...' % (Num_Topics, len(training))
lda = LdaModel(training, id2word=dictionary, num_topics=Num_Topics, passes=5)
print("done in %0.3fs." % (time() - t0))
print 'Saving LDA model (%s) ...' % NSFLDA
lda.save(NSFLDA)
print 'Random subset of topics:'
print '\n'.join(lda.print_topics())
print 'Computing perplexity on %d held-out documents ...' % len(testing)
perplexity = 2 ** -(lda.log_perplexity(testing))
print 'Perplexity: %.2f' % perplexity
for i in range(0, Num_Topics):
temp = lda.show_topic(i, 10)
terms = []
for term in temp:
terms.append(term[1])
print "Top 10 terms for topic #" + str(i) + ": "+ ", ".join(terms)
示例5: defaultdict
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import show_topic [as 别名]
texts = [[word for word in document.lower().split() if word not in stoplist]
for document in documents]
from collections import defaultdict
frequency = defaultdict(int)
for text in texts:
for token in text:
frequency[token] += 1
texts = [[token for token in text if frequency[token] >= 1]
for text in texts]
from pprint import pprint # pretty-printer
dictionary = corpora.Dictionary(texts)
# dictionary.save('/tmp/deerwester.dict') # store the dictionary, for future reference
# print(dictionary)
corpus = [dictionary.doc2bow(text) for text in texts]
# corpora.MmCorpus.serialize('/tmp/deerwester.mm', corpus)
lda = LdaModel(corpus, num_topics=2)
# on a new document:
new_doc = "pretty obvious that when i write my tellall memoir someday there will be four to six"
new_vec = dictionary.doc2bow(new_doc.lower().split())
print(lda.print_topic(0))
print(lda.show_topic(1))
print(lda.get_document_topics(new_vec))