本文整理汇总了Python中gensim.models.LdaModel.print_topic方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.print_topic方法的具体用法?Python LdaModel.print_topic怎么用?Python LdaModel.print_topic使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models.LdaModel
的用法示例。
在下文中一共展示了LdaModel.print_topic方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: lda
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import print_topic [as 别名]
def lda():
# remove stop words
stopwords = codecs.open('../conf/stop_words_ch.txt', mode='r', encoding='utf8').readlines()
stopwords = [ w.strip() for w in stopwords ]
fp = codecs.open('D:\\nlp\corpora\segs.txt', mode='r', encoding='utf8')
train = []
for line in fp:
line = line.split()
train.append([ w for w in line if w not in stopwords ])
dictionary = corpora.Dictionary(train)
corpus = [ dictionary.doc2bow(text) for text in train ]
lda = LdaModel(corpus=corpus, id2word=dictionary, num_topics=100)
lda.print_topics(30)
# print topic id=20
lda.print_topic(20)
# save/load model
lda.save('D:\\nlp\corpora\news.model')
示例2: extract_topics
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import print_topic [as 别名]
def extract_topics(words):
word_id_map=Dictionary([words])
word_id_map.filter_tokens([id for id, occurance in word_id_map.dfs.iteritems() if occurance == 2])
word_id_map.compactify()
deals_corpus=[word_id_map.doc2bow(words)]
lda=LdaModel(corpus=deals_corpus, id2word=word_id_map, num_topics=15, update_every=1, chunksize=1000,passes=1)
topics=[]
for i in range(15):
tokens=lda.print_topic(i).split('+')
topic_scores=[]
for token in tokens:
score,token_val=token.split('*')
topic_scores.append((token_val,score))
topics.append(topic_scores)
return topics
示例3: get_topics_lda
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import print_topic [as 别名]
def get_topics_lda(tokens, n_topics=10):
"""
Using the `gensim` package for LDA.
LDA is a little better than LSA as it provides a reasonal mixture of topics (Wikipedia).
`gensim` is a package for topic modeling only. So for a particular topic modeling task,
it is a lighter option to install and run. Also it can be run distributed and updated over an existing model
:param tokens: Preprocessed tokens for faster dictionary building
:param n_topics: Number of topics to decompose data to
:return: list() of topics
"""
dict_file = 'resources/deals.dict'
if not os.path.isfile(dict_file):
print "Dictionary file does not exist. Creating one"
dictionary = Dictionary(tokens)
freq1 = [id for id, freq in dictionary.dfs.iteritems() if freq == 1]
dictionary.filter_tokens(freq1)
dictionary.compactify()
dictionary.save(dict_file)
dictionary = Dictionary.load(dict_file)
# print dictionary
corpus_file = 'resources/deals.mm'
if not os.path.isfile(corpus_file):
print "Corpus file does not exist. Creating one"
corpus = [dictionary.doc2bow(token) for token in tokens]
MmCorpus.serialize(corpus_file, corpus)
mm = MmCorpus(corpus_file)
# print mm
# tfidf = TfidfModel(mm)
# corpus_tfidf = tfidf[mm]
lda = LdaModel(corpus=mm, id2word=dictionary, num_topics=n_topics, update_every=1, chunksize=1000,
passes=1)
topics = []
for i in range(0, n_topics):
words = lda.print_topic(i).split('+')
topic = []
for word in words:
score, w = word.split('*')
topic.append((w, score))
topics.append(topic)
return topics
示例4: len
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import print_topic [as 别名]
corpus = [dictionary.doc2bow(text) for text in texts]
corpora.MmCorpus.serialize('robCSVcorpus.mm', corpus) # store to disk, for later use
#import numpy as np
#corpusnp=np.array(corpus)
#print len(corpusnp), len(np.delete(corpusnp, 1, axis=0))
#Initialize the transformation
#term freq inverse doc freq
#trying to ind the frequency on that page versus overall frequency
lda = LdaModel(corpus, id2word=dictionary,num_topics=numTopics)
ii=0
print 'These are the topics'
for i in range(0, lda.num_topics):
print lda.print_topic(i,topn=20)
#sys.exit()
doc_lda = []
for i in range(len(corpus)):
doc_lda.append(lda[corpus[i]])
#print(doc_lda)
'''
This will simply put the tuples in a csv file, poor format
'''
with open('CorpusTopicsOld.csv', 'wb') as csvfile:
spamwriter = csv.writer(csvfile)
for i in range(len(corpus)):
示例5: defaultdict
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import print_topic [as 别名]
texts = [[word for word in document.lower().split() if word not in stoplist]
for document in documents]
from collections import defaultdict
frequency = defaultdict(int)
for text in texts:
for token in text:
frequency[token] += 1
texts = [[token for token in text if frequency[token] >= 1]
for text in texts]
from pprint import pprint # pretty-printer
dictionary = corpora.Dictionary(texts)
# dictionary.save('/tmp/deerwester.dict') # store the dictionary, for future reference
# print(dictionary)
corpus = [dictionary.doc2bow(text) for text in texts]
# corpora.MmCorpus.serialize('/tmp/deerwester.mm', corpus)
lda = LdaModel(corpus, num_topics=2)
# on a new document:
new_doc = "pretty obvious that when i write my tellall memoir someday there will be four to six"
new_vec = dictionary.doc2bow(new_doc.lower().split())
print(lda.print_topic(0))
print(lda.show_topic(1))
print(lda.get_document_topics(new_vec))