当前位置: 首页>>代码示例>>Python>>正文


Python LdaModel.print_topics方法代码示例

本文整理汇总了Python中gensim.models.LdaModel.print_topics方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.print_topics方法的具体用法?Python LdaModel.print_topics怎么用?Python LdaModel.print_topics使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.models.LdaModel的用法示例。


在下文中一共展示了LdaModel.print_topics方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: lda

# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import print_topics [as 别名]
def lda():
    # remove stop words
    stopwords = codecs.open('../conf/stop_words_ch.txt', mode='r', encoding='utf8').readlines()
    stopwords = [ w.strip() for w in stopwords ]
    
    fp = codecs.open('D:\\nlp\corpora\segs.txt', mode='r', encoding='utf8')
    train = []
    for line in fp:
        line = line.split()
        train.append([ w for w in line if w not in stopwords ])
    
    dictionary = corpora.Dictionary(train)
    corpus = [ dictionary.doc2bow(text) for text in train ]
    lda = LdaModel(corpus=corpus, id2word=dictionary, num_topics=100)
    
    lda.print_topics(30)
    # print topic id=20
    lda.print_topic(20)
    
    # save/load model
    lda.save('D:\\nlp\corpora\news.model')
开发者ID:xialei,项目名称:poc,代码行数:23,代码来源:news.py

示例2: dictionary

# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import print_topics [as 别名]
print 'Saving dictionary (%s)...' % DICT
dictionary.save(DICT)

print 'Building bag-of-words corpus ...'
bow_corpus = [ dictionary.doc2bow(t) for t in texts ]

print 'Serializing corpus (%s) ...' % BOW
MmCorpus.serialize(BOW, bow_corpus)

size = len(bow_corpus) * 4 / 5
training = bow_corpus[:size]
testing = bow_corpus[size:]

print 'Training LDA w/ %d topics on first %d texts ...' % (Num_Topics, len(training))
lda = LdaModel(training, id2word=dictionary, num_topics=Num_Topics, passes=5, iterations = 1000)

print 'Saving LDA model (%s) ...' % NSFLDA
lda.save(NSFLDA)

print 'Random subset of topics:'
print '\n'.join(lda.print_topics())

print 'Computing perplexity on %d held-out documents ...' % len(testing)
perplexity = 2 ** -(lda.log_perplexity(testing))
print 'Perplexity: %.2f' % perplexity




开发者ID:voronoi,项目名称:TopicModelling,代码行数:27,代码来源:LDA_v4.py

示例3: unpickle

# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import print_topics [as 别名]
        #
        # logging.info('combine report and wiki dictionary...')
        # wiki_to_report = report_dict.merge_with(wiki_dict)
        # merged_dict = report_dict
        #
        # logging.info('combine report and wiki corpus...')
        # merged_corpus = wiki_to_report[wiki_corpus].corpus + report_corpus
        logging.info('generate wiki corpus...')
        wiki_txt = unpickle('data/txt/processed_wiki.pkl')
        wiki_corpus = [report_dict.doc2bow(wiki) for wiki in wiki_txt]

        logging.info('combine report and wiki corpus...')
        merged_corpus = wiki_corpus + report_corpus

    # compute TFIDF
    # logging.info('compute TFIDF...')
    # tfidf = TfidfModel(dictionary=report_dict, id2word=report_dict)

    # perform LDA
    logging.info('perform LDA...')
    if use_wiki is True:
        lda = LdaModel(corpus=merged_corpus, id2word=report_dict, num_topics=num_topics, passes=passes,
                       iterations=iterations, alpha='auto', chunksize=chunksize)
        lda.save('result/model_wiki.lda')
        lda.print_topics(topics=num_topics, topn=10)
    else:
        lda = LdaModel(corpus=report_corpus, id2word=report_dict, num_topics=num_topics, passes=passes,
                       iterations=iterations, alpha='auto', chunksize=chunksize)
        lda.save('result/model.lda')
        lda.print_topics(topics=num_topics, topn=10)
开发者ID:andresportocarrero,项目名称:FinancialReportMining,代码行数:32,代码来源:LDA.py

示例4: print

# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import print_topics [as 别名]
    # stemming process
    print(count)
    # print(List)
    # counts = Counter(List)
    # print(counts)
    print(documentInfo)
    train_set = documentInfo

    # construct training corpus
    dictionary = Dictionary(train_set)
    corpus = [dictionary.doc2bow(text) for text in train_set]
    print(corpus)
    print(dictionary)
    # train lda model
    lda = LdaModel(corpus=corpus, id2word=dictionary, num_topics=30)
    print(lda)
    print(lda.print_topics(5))


    #
    # def lda_test(train_set):
    #     # train corpus
    #     dictionary = Dictionary(train_set)
    #     corpus = [dictionary.doc2bow(text) for text in train_set]
    #     print(corpus)
    #     print(dictionary)
    #     # lda model training
    #     lda = LdaModel(corpus=corpus, id2word=dictionary, num_topics=50)
    #     print(lda)
    #     return (lda.print_topics(50))
开发者ID:outerforce,项目名称:crawler,代码行数:32,代码来源:preprocess.py


注:本文中的gensim.models.LdaModel.print_topics方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。