当前位置: 首页>>代码示例>>Python>>正文


Python LdaModel.show_topics方法代码示例

本文整理汇总了Python中gensim.models.ldamodel.LdaModel.show_topics方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.show_topics方法的具体用法?Python LdaModel.show_topics怎么用?Python LdaModel.show_topics使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.models.ldamodel.LdaModel的用法示例。


在下文中一共展示了LdaModel.show_topics方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import show_topics [as 别名]
def main():
    collection_name = "nips"
    years = xrange(2010, 2015)  # 10 ~ 14
    n_topics = 10
    
    corpus_paths = map(lambda y: 
                       "data/{}-{}.dat".format(collection_name, y),
                       years)
    all_corpus = []
    year2corpus = {}
    for year, path in zip(years, corpus_paths):
        corpus = list(load_line_corpus(path))
        all_corpus.append(proc_corpus(corpus))
        year2corpus[year] = corpus

    all_corpus = list(itertools.chain.from_iterable(all_corpus))

    dictionary = Dictionary(all_corpus)
    all_corpus = [dictionary.doc2bow(doc)
                  for doc in all_corpus]

    import pdb
    pdb.set_trace()

    # print all_corpus
    model = LdaModel(all_corpus, num_topics=n_topics,
                     id2word=dictionary,
                     eval_every=10, passes=100)
    print model.show_topics()
开发者ID:xiaohan2012,项目名称:temporal-topic-mining,代码行数:31,代码来源:gensim_lda.py

示例2: gensim_lda

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import show_topics [as 别名]
def gensim_lda(d):
    from gensim import corpora, models
    from gensim.models.ldamodel import LdaModel
    list_doc = []
    for i in range(0,len(d)):
        list_doc = list_doc + d[i]

    dictionary = corpora.Dictionary(list_doc)
    model = LdaModel(num_topics = 20, id2word = dictionary)
    for i in range(0, len(d)):
        print 'Generating corpus and updating model ', i
        corpus = [dictionary.doc2bow(doc) for doc in d[i]]
        model.update(corpus)

    model.save('model_20')
    print model.show_topics(num_topics = 20, num_words = 10)
开发者ID:JT17,项目名称:445Project,代码行数:18,代码来源:lda_training_data.py

示例3: train_model

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import show_topics [as 别名]
def train_model(texts, **kwargs):

  # parse args
  filter_stopwords = kwargs.get('filter_stopwords', True)
  normalizer = kwargs.get('normalizer', 'porter')
  tfidf = kwargs.get('tfidf', True)
  num_topics = kwargs.get('num_topics', 20)
  min_freq = kwargs.get('min_freq', 2)
  use_pickle = kwargs.get('use_pickle', True)
  update_pickle = kwargs.get('update_pickle', True)
  report = kwargs.get('report', True)
  distributed = kwargs.get('distributed', False)
  
  # build corpus or read it in from pickle
  if use_pickle:
    print "INFO: loading pickled corpus and word hash"
    corpus = pickle.load( open( "pickles/corpus.p", "rb" ) )
    id2word = pickle.load( open( "pickles/id2word.p", "rb" ) )
            
  else:
    print "INFO: processing text and building corpus..."
    corpus, id2word = process_texts(
      texts = texts, 
      filter_stopwords = filter_stopwords,
      normalizer = normalizer,
      min_freq = min_freq
    )

    if update_pickle:
      # pickle files
      print "INFO: updating pickled coprus and word hash"
      pickle.dump(corpus, open( "pickles/corpus.p", "wb" ) )
      pickle.dump(id2word, open( "pickles/id2word.p", "wb" ) )

  # optional tfidf transformation
  if tfidf:
    print "INFO: applying tfidf transformation..."
    tfidf = TfidfModel(corpus)
    corpus = tfidf[corpus]

  # fit model
  print "INFO: fitting model..."
  lda = LdaModel(
    corpus = corpus, 
    id2word = id2word, 
    num_topics = num_topics,
    distributed = distributed
  )

  # report
  if report:
    perplexity = lda.bound(corpus)
    print "RESULTS:"
    print "\nperplexity: ", perplexity, "\n"
    topics = lda.show_topics(num_topics)
    for i, t in enumerate(topics):
      print "topic %d:" % i
      print t

  return lda, corpus, id2word
开发者ID:LauraCarter,项目名称:amnesty,代码行数:62,代码来源:topic_modeling.py

示例4: main

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import show_topics [as 别名]
def main(argv):
    if len(argv) < 4:
        print 'python train_lda.py group_id num_topics passes'
        sys.exit(1)
        
    group_id = argv[1]
    num_topics = int(argv[2])
    passes = int(argv[3])
    log.info('Prepare corpus for group: %s' % group_id)

    base_path = 'tables/' + group_id + '/'
    model_base_path = 'ldamodels/' + group_id + '/'
    
    # buid dict and corpus
    #now = datetime.now()
    indicator = 'title-comment'
    source_path = base_path + 'corpus-topic-comment'
    
    corpus_path = model_base_path + 'corpus-'+ indicator + '-' + group_id + '.mm'
    dict_path = model_base_path + 'dict-' + indicator + '-' + group_id + '.dict'
    
    log.info('Building the dict...')
    build_dict_corpus(source_path, corpus_path, dict_path)
    
    log.info('Loading dict from pre-saved file...')
    dictionary = corpora.Dictionary.load(dict_path)
    log.info('Done')
    
    #dictionary.save_as_text(base_path + 'text-dict.txt')
    
    log.info('Build a lda model...')
    log.info('Loading corpus from pre-saved .mm file...')
    mmcorpus = corpora.MmCorpus(corpus_path)
    log.info('Done')
    
    log.info('Training lda model...')
    model = LdaModel(mmcorpus, num_topics=num_topics, id2word = dictionary, passes = passes)
    model_path = model_base_path + indicator + '-' + group_id + '.ldamodel'
    model.save(model_path)
    log.info('Done.')
    
    model = LdaModel.load(model_path)
    model.show_topics(topics=num_topics, topn=10, log=True)
开发者ID:hitalex,项目名称:crawler,代码行数:45,代码来源:train_lda.py

示例5: build_word_id_map

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import show_topics [as 别名]
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
logger = logging.getLogger('LDA_model_builder')
################################################################################################################################################
logger.info('building word_id_map...')
word_id_map = build_word_id_map([trainPosts_loc, testPosts_loc])
pickle(word_id_map, 'word_id_map')
normalize_content_stats()

train_and_test_corpus = MyCorpus([trainPosts_loc, testPosts_loc], word_id_map)
logger.info('training LDA model...')
#id2word is a mapping from word ids (integers) to words (strings). It is used to determine the vocabulary size, as well as for debugging and topic printing.
lda = LdaModel(train_and_test_corpus, id2word=word_id_map, num_topics=topic_count, update_every=1, chunksize=10000, passes=1)
pickle(lda, 'lda')

#Print the 'topn' most probable words for (randomly selected) 'topics' number of topics. Set topics=-1 to print all topics.
lda.show_topics(topics=topic_count, topn=10)
################################################################################################################################################
#key = blog + '_' + post_id
#value = a list of (topic_id, topic_probability) 2-tuples
blog_topic_distribution_map = {}

#key = uid (user id)
#value = list of (blog, post_id) tuples
train_user_likes_map = defaultdict(list)

#key = blog
#value = list of post_ids
test_blog_post_map = defaultdict(list)

logger.info('starting LDA prediction for training data...')
for blog, post_id, likes, blog_content_as_list_of_words in MyFilesIterator([trainPosts_loc]).iterate_fields():
开发者ID:harixxy,项目名称:solutions,代码行数:33,代码来源:LDA_model_builder.py

示例6: len

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import show_topics [as 别名]
if len(sys.argv) != 2:
    print 'Usage: {0} rcv1_data_dir'.format(sys.argv[0])
    raise SystemExit(1)

data_dir = sys.argv[1]
mapping_file = data_dir+'/token_id_idf'
dictionary_file = data_dir+'/id_token_df'
token_file = data_dir+'/tokens'
lda_file = data_dir+'/lda_model'

print 'creating dictionary...'
N = 23307  # supplied idfs from rcv1/lyrl2004 were based on 23307 training docs
create_dictionary_file(mapping_file,dictionary_file,23307)
dictionary = Dictionary.load_from_text(dictionary_file)

print 'creating corpus...'
corpus = SimpleLowCorpus(token_file,dictionary)

print 'training model...'
logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.INFO)
lda = LdaModel(corpus,id2word=dictionary,num_topics=200)
print 'done!'
print '\n'*3
print '======final topics======'
topics = lda.show_topics(topics=-1,topn=4)
for i,topic in enumerate(topics):
    print i,topic

print 'saving model...'
lda.save(lda_file)
开发者ID:biddyweb,项目名称:news-1,代码行数:32,代码来源:train.py

示例7: range

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import show_topics [as 别名]
term_lists = []
for i in range(len(df)):
    df['msg'][i] = df['msg'][i].lower()
    j = df['msg'][i].find('req')
    if j > -1:
        df['msg'][i] = df['msg'][i][j:] 
        idx.append(i)
        terms = df['msg'][i].split()
        terms = terms[5:]
        filtered_terms = [t for t in terms if len(t) > 0]
        term_lists.append(filtered_terms)

# Merge term lists into the main dataframe    
d = {'terms':term_lists}
term_df = DataFrame(data=d,columns=['terms'],index=df.index[idx])
df = df.join(term_df)

# Create corpus for topic modeling
corpora_dict = Dictionary(term_lists)
corpus = [corpora_dict.doc2bow(msg) for msg in term_lists]

# Perform topic modeling
lda = LdaModel(corpus=corpus,id2word=corpora_dict,num_topics=5)

# Print out top terms for each topic
topics = lda.show_topics()
i = 0
for topic in topics:
    i += 1
    print "Topic %d: %s" % (i,str(topic))
开发者ID:DGaffney,项目名称:Datadives,代码行数:32,代码来源:lda_topic_modeling_of_vhd_msgs.py

示例8: LdaModel

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import show_topics [as 别名]
			run_id = "ldaU_K{K}_a{alpha_frac}-K_b{beta}_iter{iter}.gensim".format(K=num_topics, alpha_frac=alpha_frac, beta=beta, iter=num_iterations)
			print run_id

			output_file = output_file_template.format(run_id=run_id)

			# Train and save
			print 'Training...'
			model = LdaModel(corpus, 
				alpha=alpha, eta=beta,
				id2word=dictionary, num_topics=num_topics, iterations=num_iterations
			)
			# model = LdaMulticore(corpus, 
			# 	alpha=alpha, eta=beta,
			# 	id2word=dictionary, num_topics=num_topics, iterations=num_iterations, workers=2
			# )
			print 'Done training.'
			model.save(output_file)

			# Print top 10 words in topics, if desired
			if print_topics:
				topics = model.show_topics(num_topics=100, formatted=False)
				for topic in topics:
					for tup in topic[1]:
						print tup[0] + ": " + str(tup[1])
					print '\n'

			# Evaluate perplexity
			ll = model.log_perplexity(test_corpus)
			print "LL:   "+str(ll)
			print "Perp: "+str(np.exp2(-ll))
开发者ID:EliasJonsson,项目名称:PGM-Project,代码行数:32,代码来源:train_lda_u.py


注:本文中的gensim.models.ldamodel.LdaModel.show_topics方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。