当前位置: 首页>>代码示例>>Python>>正文


Python LdaModel.show_topics方法代码示例

本文整理汇总了Python中gensim.models.LdaModel.show_topics方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.show_topics方法的具体用法?Python LdaModel.show_topics怎么用?Python LdaModel.show_topics使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.models.LdaModel的用法示例。


在下文中一共展示了LdaModel.show_topics方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: lda

# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import show_topics [as 别名]
def lda(docs, k):
    """Latent Dirichlet allocation topic model.

    Uses Gensim's LdaModel after tokenizing using scikit-learn's
    TfidfVectorizer.

    Parameters
    ----------
    k : integer
        Number of topics.
    """
    from gensim.matutils import Sparse2Corpus
    from gensim.models import LdaModel

    # Use a scikit-learn vectorizer rather than Gensim's equivalent
    # for speed and consistency with LSA and k-means.
    vect = _vectorizer()
    corpus = vect.fit_transform(fetch(d) for d in docs)
    corpus = Sparse2Corpus(corpus)

    model = LdaModel(corpus=corpus, num_topics=k)

    topics = model.show_topics(formatted=False)
    vocab = vect.get_feature_names()
    #return [(vocab[int(idx)], w) for topic in topics for w, idx in topic]
    return [[(vocab[int(idx)], w) for w, idx in topic] for topic in topics]
开发者ID:fanfannothing,项目名称:xtas,代码行数:28,代码来源:cluster.py

示例2: upload_file

# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import show_topics [as 别名]

#.........这里部分代码省略.........

    corpus = MyCorpus()

    # corpus = glob.glob("swcorpus/*")

    if not os.path.exists("out"):
        os.makedirs("out")
    # if not os.path.exists(os.path.join(os.path.join(os.getcwd(),
    # 'out'), foldername)): os.makedirs(os.path.join
    # (os.path.join(os.getcwd(), 'out'), foldername))

    MmCorpus.serialize(
        os.path.join(os.path.join(os.getcwd(), "out"), '.'.join(
            ['corpus.mm'])), corpus)
    mm = MmCorpus('out/corpus.mm')

    print(mm)

    # doc_labels = glob.glob("corpus/*")

    print("fitting the model ...\n")

    model = LdaModel(
        corpus=mm, id2word=dictionary, num_topics=no_of_topics,
        passes=no_of_passes, eval_every=eval, chunksize=chunk,
        alpha=alpha, eta=eta)

    # model = LdaMulticore(corpus=corpus, id2word=dictionary,
    # num_topics=no_of_topics, passes=no_of_passes,
    # eval_every=eval, chunksize=chunk, alpha=alpha, eta=eta)

    print(model, "\n")

    topics = model.show_topics(num_topics=no_of_topics)

    for item, i in zip(topics, enumerate(topics)):
        print("topic #"+str(i[0])+": "+str(item)+"\n")

    print("saving ...\n")

    if not os.path.exists("out"):
        os.makedirs("out")
    # if not os.path.exists(os.path.join(os.path.join(os.getcwd(),
    # 'out'), foldername)):
    # os.makedirs(os.path.join(os.path.join(os.getcwd(), 'out'),
    # foldername))

    with open(
        os.path.join(os.path.join(os.getcwd(), "out"), ''.join(
            ["corpus_doclabels.txt"])), "w", encoding="utf-8") as f:
            for item in doc_labels:
                f.write(item + "\n")

    with open(
        os.path.join(os.path.join(os.getcwd(), "out"), ''.join(
            ["corpus_topics.txt"])), "w", encoding="utf-8") as f:
        for item, i in zip(topics, enumerate(topics)):
            f.write(
                "".join(["topic #", str(i[0]), ": ", str(item), "\n"]))

    dictionary.save(
        os.path.join(os.path.join(os.getcwd(), "out"), '.'.join(
            ['corpus', 'dict'])))
    # MmCorpus.serialize(
    # os.path.join(os.path.join(os.getcwd(), "out"), '.'.join(
    # [foldername, 'mm'])), corpus)
开发者ID:pielstroem,项目名称:Topics,代码行数:70,代码来源:demo.py

示例3: print

# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import show_topics [as 别名]

#  vamos a utilizar Latent Dirichlet Allocation para tratar de categorizar los abstracts
# este se demora la primera q lo corres para entrenar el modelo
print("lda")
lda_filename = 'model.lda'
if not os.path.isfile(lda_filename):
    lda = LdaModel(corpus, num_topics=5,
                   id2word=dictionary,
                   update_every=5,
                   chunksize=10000,
                   passes=100)
    lda.save('/tmp/model.lda')
else:
    lda = LdaModel.load('/tmp/model.lda')
lda.show_topics()
topics_matrix = lda.show_topics(formatted=False, num_words=7)

print(topics_matrix)
print(len(topics_matrix))

for topic in topics_matrix:
    i = topic[1]
    print([str(word) for word in i])
#
# topics_matrix = np.array(topics_matrix)
#
# topic_words = topics_matrix[:, :, 1]
# for i in topic_words:
#     print([str(word) for word in i])
开发者ID:rafunchik,项目名称:shrimps,代码行数:31,代码来源:docs.py

示例4: MmCorpus

# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import show_topics [as 别名]
MmCorpus.serialize(corpusPath, corpus)

mm = MmCorpus(corpusPath)

doc_labels = makeDocLabels(path)

log.info('fitting the model ...')

# fitting the model
model = LdaModel(corpus=mm, id2word=dictionary, num_topics=no_of_topics, passes=no_of_passes,
                 eval_every=eval, chunksize=chunk, alpha=alpha, eta=eta)

log.info('generated topics...')

# print topics
topics = model.show_topics(num_topics=no_of_topics)

for item, i in zip(topics, enumerate(topics)):
    log.info('topic #%s: %s', i[0], item)


log.info('saving results...')

# create output folder
if not os.path.exists("out"): os.makedirs("out")

# save doc_labels for further use
with open(os.path.join(os.path.join(os.getcwd(), "out"),''.join([foldername, "_doclabels.txt"])), "w", encoding="utf-8") as f:
    for item in doc_labels: f.write(item+"\n")
	
# save topics for further use
开发者ID:pielstroem,项目名称:Topics,代码行数:33,代码来源:lda2.py

示例5: create_lda_model

# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import show_topics [as 别名]
def create_lda_model():
    logging.info('about to create all docs from chunks')
    start_time = datetime.datetime.now()
    create_all_docs()
    end_time = datetime.datetime.now()
    logging.info('total time is: %s', end_time - start_time)

    logging.info('about to load all docs')
    with open('./resources/LDA_processing/all_docs.pkl', mode='rb') as f:
        all_docs = pickle.load(f)

    logging.info('about to load english words')
    with open('./resources/LDA_input/english_full_list.txt') as f:
        english_words = f.read().splitlines()

    good_english_words = set(english_words[75:21000])
    del english_words
    logging.info('about to remove all stop-words and unknown words')
    texts = []
    for i, doc in enumerate(all_docs):
        filtered_doc = [word for word in doc if word in good_english_words]
        texts.append(filtered_doc)
        if i % 5000 == 0:
            logging.info('Finished doc: %s', i)

    logging.info('about to release memory of all_docs and english_words')
    del all_docs
    del good_english_words

    logging.info('about to save texts')
    with open('./resources/LDA_processing/texts.pkl', mode='wb') as f:
        pickle.dump(texts, f)

    logging.info('about to load texts')
    with open('./resources/LDA_processing/texts.pkl', mode='rb') as f:
        texts = pickle.load(f)

    logging.info('about to create dictionary')
    dictionary = corpora.Dictionary(texts)
    keys = dictionary.keys()
    logging.info('dict size before filter: %s', len(keys))
    dictionary.filter_extremes(keep_n=150000)
    dictionary.filter_extremes(no_below=150, no_above=0.05)
    keys = dictionary.keys()
    logging.info('dict size after filter: %s', len(keys))
    dictionary.save('./resources/LDA_processing/lda.dict')
    dictionary.save_as_text('./resources/LDA_processing/lda_dict.txt')

    logging.info('about to create corpus')
    corpus = [dictionary.doc2bow(text) for text in texts]

    logging.info('about to save corpus as mm file')
    corpora.MmCorpus.serialize('./resources/LDA_processing/corpus.mm', corpus)

    logging.info('about to load dictionary file')
    dictionary = corpora.Dictionary.load('./resources/LDA_processing/lda.dict')

    logging.info('about to load corpus as mm file')
    corpus = corpora.MmCorpus('./resources/LDA_processing/corpus.mm')

    logging.info('about to start LDA model')
    lda = LdaModel(corpus, id2word=dictionary, num_topics=num_topics)
    logging.info('finished LDA model')

    logging.info('about to save ldaModel')
    lda.save('./resources/LDA_processing/LdaModel')

    logging.info('about to load ldaModel')
    lda = LdaModel.load('./resources/LDA_processing/LdaModel')

    logging.info('about to find topics')
    topics = lda.show_topics(num_topics=num_topics, num_words=10000, log=True, formatted=False)

    logging.info('about to save topics')
    with open('./resources/LDA_processing/topics.pkl', mode='wb') as f:
        pickle.dump(topics, f)

    dict_word_sets = find_words_from_lda_model()
    with open('./resources/LDA_processing/dict_word_sets.pkl', mode='wb') as f:
        pickle.dump(dict_word_sets, f)

    topics_words = extract_words_from_word_sets()
    with open('./resources/LDA_result/topic_words', mode='wt', encoding='utf-8') as f:
        f.write('\n'.join(topics_words))
开发者ID:uriklarman,项目名称:TreasureHunter,代码行数:86,代码来源:process_documents.py

示例6: UnlabeledCorpus

# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import show_topics [as 别名]
    vocab = Dictionary.load_from_text('./vocab.txt')
    corpus = UnlabeledCorpus('./rumor_train.csv', vocab)
    valid_corpus = UnlabeledCorpus('./rumor_valid.csv', vocab)
    valid_sentences = [doc for doc in valid_corpus][5000:]

    # varing number of topics
    # result = {}
    # for num_topics in [2, 4, 8, 16, 32, 64]:
    #     best_value = -100
    #     for i in range(5):
    #         model = LdaModel(corpus=corpus, id2word=vocab, num_topics=num_topics)
    #         likelihood = model.log_perplexity(valid_sentences)
    #         best_value = max(best_value, likelihood)
    #     result[num_topics]= best_value
    #
    # for num_topics, likelihood in result.iteritems():
    #     print 'num_topics: %d, best word_likelihood: %f' % (num_topics, likelihood)

    model = LdaModel(corpus=corpus, id2word=vocab, num_topics=8, passes=2)
    model.save('./lda_model.txt')
    # print topics to a file
    topics = model.show_topics(num_topics=100, num_words=50)
    with codecs.open('./topics.txt', 'w', 'utf-8') as out_f:
        for topic in topics:
            topic_id, topic_str = topic[0], topic[1]
            out_f.write('%d:\n%s\n' % (topic_id, topic_str))
        out_f.write('\n')



开发者ID:zhuwenya,项目名称:Rumor_Detection,代码行数:29,代码来源:run.py


注:本文中的gensim.models.LdaModel.show_topics方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。