本文整理汇总了Python中gensim.models.ldamodel.LdaModel.show_topics方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.show_topics方法的具体用法?Python LdaModel.show_topics怎么用?Python LdaModel.show_topics使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models.ldamodel.LdaModel
的用法示例。
在下文中一共展示了LdaModel.show_topics方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import show_topics [as 别名]
def main():
collection_name = "nips"
years = xrange(2010, 2015) # 10 ~ 14
n_topics = 10
corpus_paths = map(lambda y:
"data/{}-{}.dat".format(collection_name, y),
years)
all_corpus = []
year2corpus = {}
for year, path in zip(years, corpus_paths):
corpus = list(load_line_corpus(path))
all_corpus.append(proc_corpus(corpus))
year2corpus[year] = corpus
all_corpus = list(itertools.chain.from_iterable(all_corpus))
dictionary = Dictionary(all_corpus)
all_corpus = [dictionary.doc2bow(doc)
for doc in all_corpus]
import pdb
pdb.set_trace()
# print all_corpus
model = LdaModel(all_corpus, num_topics=n_topics,
id2word=dictionary,
eval_every=10, passes=100)
print model.show_topics()
示例2: gensim_lda
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import show_topics [as 别名]
def gensim_lda(d):
from gensim import corpora, models
from gensim.models.ldamodel import LdaModel
list_doc = []
for i in range(0,len(d)):
list_doc = list_doc + d[i]
dictionary = corpora.Dictionary(list_doc)
model = LdaModel(num_topics = 20, id2word = dictionary)
for i in range(0, len(d)):
print 'Generating corpus and updating model ', i
corpus = [dictionary.doc2bow(doc) for doc in d[i]]
model.update(corpus)
model.save('model_20')
print model.show_topics(num_topics = 20, num_words = 10)
示例3: train_model
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import show_topics [as 别名]
def train_model(texts, **kwargs):
# parse args
filter_stopwords = kwargs.get('filter_stopwords', True)
normalizer = kwargs.get('normalizer', 'porter')
tfidf = kwargs.get('tfidf', True)
num_topics = kwargs.get('num_topics', 20)
min_freq = kwargs.get('min_freq', 2)
use_pickle = kwargs.get('use_pickle', True)
update_pickle = kwargs.get('update_pickle', True)
report = kwargs.get('report', True)
distributed = kwargs.get('distributed', False)
# build corpus or read it in from pickle
if use_pickle:
print "INFO: loading pickled corpus and word hash"
corpus = pickle.load( open( "pickles/corpus.p", "rb" ) )
id2word = pickle.load( open( "pickles/id2word.p", "rb" ) )
else:
print "INFO: processing text and building corpus..."
corpus, id2word = process_texts(
texts = texts,
filter_stopwords = filter_stopwords,
normalizer = normalizer,
min_freq = min_freq
)
if update_pickle:
# pickle files
print "INFO: updating pickled coprus and word hash"
pickle.dump(corpus, open( "pickles/corpus.p", "wb" ) )
pickle.dump(id2word, open( "pickles/id2word.p", "wb" ) )
# optional tfidf transformation
if tfidf:
print "INFO: applying tfidf transformation..."
tfidf = TfidfModel(corpus)
corpus = tfidf[corpus]
# fit model
print "INFO: fitting model..."
lda = LdaModel(
corpus = corpus,
id2word = id2word,
num_topics = num_topics,
distributed = distributed
)
# report
if report:
perplexity = lda.bound(corpus)
print "RESULTS:"
print "\nperplexity: ", perplexity, "\n"
topics = lda.show_topics(num_topics)
for i, t in enumerate(topics):
print "topic %d:" % i
print t
return lda, corpus, id2word
示例4: main
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import show_topics [as 别名]
def main(argv):
if len(argv) < 4:
print 'python train_lda.py group_id num_topics passes'
sys.exit(1)
group_id = argv[1]
num_topics = int(argv[2])
passes = int(argv[3])
log.info('Prepare corpus for group: %s' % group_id)
base_path = 'tables/' + group_id + '/'
model_base_path = 'ldamodels/' + group_id + '/'
# buid dict and corpus
#now = datetime.now()
indicator = 'title-comment'
source_path = base_path + 'corpus-topic-comment'
corpus_path = model_base_path + 'corpus-'+ indicator + '-' + group_id + '.mm'
dict_path = model_base_path + 'dict-' + indicator + '-' + group_id + '.dict'
log.info('Building the dict...')
build_dict_corpus(source_path, corpus_path, dict_path)
log.info('Loading dict from pre-saved file...')
dictionary = corpora.Dictionary.load(dict_path)
log.info('Done')
#dictionary.save_as_text(base_path + 'text-dict.txt')
log.info('Build a lda model...')
log.info('Loading corpus from pre-saved .mm file...')
mmcorpus = corpora.MmCorpus(corpus_path)
log.info('Done')
log.info('Training lda model...')
model = LdaModel(mmcorpus, num_topics=num_topics, id2word = dictionary, passes = passes)
model_path = model_base_path + indicator + '-' + group_id + '.ldamodel'
model.save(model_path)
log.info('Done.')
model = LdaModel.load(model_path)
model.show_topics(topics=num_topics, topn=10, log=True)
示例5: build_word_id_map
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import show_topics [as 别名]
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
logger = logging.getLogger('LDA_model_builder')
################################################################################################################################################
logger.info('building word_id_map...')
word_id_map = build_word_id_map([trainPosts_loc, testPosts_loc])
pickle(word_id_map, 'word_id_map')
normalize_content_stats()
train_and_test_corpus = MyCorpus([trainPosts_loc, testPosts_loc], word_id_map)
logger.info('training LDA model...')
#id2word is a mapping from word ids (integers) to words (strings). It is used to determine the vocabulary size, as well as for debugging and topic printing.
lda = LdaModel(train_and_test_corpus, id2word=word_id_map, num_topics=topic_count, update_every=1, chunksize=10000, passes=1)
pickle(lda, 'lda')
#Print the 'topn' most probable words for (randomly selected) 'topics' number of topics. Set topics=-1 to print all topics.
lda.show_topics(topics=topic_count, topn=10)
################################################################################################################################################
#key = blog + '_' + post_id
#value = a list of (topic_id, topic_probability) 2-tuples
blog_topic_distribution_map = {}
#key = uid (user id)
#value = list of (blog, post_id) tuples
train_user_likes_map = defaultdict(list)
#key = blog
#value = list of post_ids
test_blog_post_map = defaultdict(list)
logger.info('starting LDA prediction for training data...')
for blog, post_id, likes, blog_content_as_list_of_words in MyFilesIterator([trainPosts_loc]).iterate_fields():
示例6: len
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import show_topics [as 别名]
if len(sys.argv) != 2:
print 'Usage: {0} rcv1_data_dir'.format(sys.argv[0])
raise SystemExit(1)
data_dir = sys.argv[1]
mapping_file = data_dir+'/token_id_idf'
dictionary_file = data_dir+'/id_token_df'
token_file = data_dir+'/tokens'
lda_file = data_dir+'/lda_model'
print 'creating dictionary...'
N = 23307 # supplied idfs from rcv1/lyrl2004 were based on 23307 training docs
create_dictionary_file(mapping_file,dictionary_file,23307)
dictionary = Dictionary.load_from_text(dictionary_file)
print 'creating corpus...'
corpus = SimpleLowCorpus(token_file,dictionary)
print 'training model...'
logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.INFO)
lda = LdaModel(corpus,id2word=dictionary,num_topics=200)
print 'done!'
print '\n'*3
print '======final topics======'
topics = lda.show_topics(topics=-1,topn=4)
for i,topic in enumerate(topics):
print i,topic
print 'saving model...'
lda.save(lda_file)
示例7: range
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import show_topics [as 别名]
term_lists = []
for i in range(len(df)):
df['msg'][i] = df['msg'][i].lower()
j = df['msg'][i].find('req')
if j > -1:
df['msg'][i] = df['msg'][i][j:]
idx.append(i)
terms = df['msg'][i].split()
terms = terms[5:]
filtered_terms = [t for t in terms if len(t) > 0]
term_lists.append(filtered_terms)
# Merge term lists into the main dataframe
d = {'terms':term_lists}
term_df = DataFrame(data=d,columns=['terms'],index=df.index[idx])
df = df.join(term_df)
# Create corpus for topic modeling
corpora_dict = Dictionary(term_lists)
corpus = [corpora_dict.doc2bow(msg) for msg in term_lists]
# Perform topic modeling
lda = LdaModel(corpus=corpus,id2word=corpora_dict,num_topics=5)
# Print out top terms for each topic
topics = lda.show_topics()
i = 0
for topic in topics:
i += 1
print "Topic %d: %s" % (i,str(topic))
示例8: LdaModel
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import show_topics [as 别名]
run_id = "ldaU_K{K}_a{alpha_frac}-K_b{beta}_iter{iter}.gensim".format(K=num_topics, alpha_frac=alpha_frac, beta=beta, iter=num_iterations)
print run_id
output_file = output_file_template.format(run_id=run_id)
# Train and save
print 'Training...'
model = LdaModel(corpus,
alpha=alpha, eta=beta,
id2word=dictionary, num_topics=num_topics, iterations=num_iterations
)
# model = LdaMulticore(corpus,
# alpha=alpha, eta=beta,
# id2word=dictionary, num_topics=num_topics, iterations=num_iterations, workers=2
# )
print 'Done training.'
model.save(output_file)
# Print top 10 words in topics, if desired
if print_topics:
topics = model.show_topics(num_topics=100, formatted=False)
for topic in topics:
for tup in topic[1]:
print tup[0] + ": " + str(tup[1])
print '\n'
# Evaluate perplexity
ll = model.log_perplexity(test_corpus)
print "LL: "+str(ll)
print "Perp: "+str(np.exp2(-ll))