本文整理汇总了Python中gensim.models.LdaModel.log_perplexity方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.log_perplexity方法的具体用法?Python LdaModel.log_perplexity怎么用?Python LdaModel.log_perplexity使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models.LdaModel
的用法示例。
在下文中一共展示了LdaModel.log_perplexity方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: create_evaluation_perplexity
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import log_perplexity [as 别名]
def create_evaluation_perplexity(config, Kind):
model_fname = config.model_fname % Kind.__name__
corpus_fname = config.corpus_fname % Kind.__name__
try:
id2word = Dictionary.load(corpus_fname + '.dict')
corpus = MalletCorpus(corpus_fname, id2word=id2word)
except:
error('Corpora not built yet -- cannot evaluate')
held_out = list()
training = list()
target_len = int(0.1 * len(corpus))
logger.info('Calculating perplexity with held-out %d of %d documents' %
(target_len, len(corpus)))
ids = set()
while len(ids) < target_len:
ids.add(random.randint(0, len(corpus)))
for doc_id, doc in enumerate(corpus):
if doc_id in ids:
held_out.append(doc)
else:
training.append(doc)
model = LdaModel(training,
id2word=corpus.id2word,
alpha=config.alpha,
passes=config.passes,
num_topics=config.num_topics)
pwb = model.log_perplexity(held_out)
with open(config.path + 'evaluate-perplexity-results.csv', 'a') as f:
w = csv.writer(f)
w.writerow([model_fname, pwb])
示例2: dictionary
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import log_perplexity [as 别名]
print 'Saving dictionary (%s)...' % DICT
dictionary.save(DICT)
print 'Building bag-of-words corpus ...'
bow_corpus = [ dictionary.doc2bow(t) for t in texts ]
print 'Serializing corpus (%s) ...' % BOW
MmCorpus.serialize(BOW, bow_corpus)
size = len(bow_corpus) * 4 / 5
training = bow_corpus[:size]
testing = bow_corpus[size:]
print 'Training LDA w/ %d topics on first %d texts ...' % (Num_Topics, len(training))
lda = LdaModel(training, id2word=dictionary, num_topics=Num_Topics, passes=5, iterations = 1000)
print 'Saving LDA model (%s) ...' % NSFLDA
lda.save(NSFLDA)
print 'Random subset of topics:'
print '\n'.join(lda.print_topics())
print 'Computing perplexity on %d held-out documents ...' % len(testing)
perplexity = 2 ** -(lda.log_perplexity(testing))
print 'Perplexity: %.2f' % perplexity