当前位置: 首页>>代码示例>>Python>>正文


Python LdaModel.get_document_topics方法代码示例

本文整理汇总了Python中gensim.models.LdaModel.get_document_topics方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.get_document_topics方法的具体用法?Python LdaModel.get_document_topics怎么用?Python LdaModel.get_document_topics使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.models.LdaModel的用法示例。


在下文中一共展示了LdaModel.get_document_topics方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: W2V_cpp2

# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import get_document_topics [as 别名]
class W2V_cpp2(W2V_base):
    def __init__(self,n_topic, path, folder):
        self.n_topic = n_topic
        W2V_base.__init__(self, path, folder)

        #process dict
        for prod_id in self.idx2prod.keys():
            prod = self.idx2prod[prod_id]
            n_prod_id = prod_id - len(self.word_count) - 1
            del self.idx2prod[prod_id]
            self.idx2prod[n_prod_id] = prod
            self.prod2idx[prod] = n_prod_id

        for user_id in self.idx2user.keys():
            user = self.idx2user[user_id]
            n_user_id = user_id - len(self.word_count) - len(self.prod2idx) - 1
            del self.idx2user[user_id]
            self.idx2user[n_user_id] = user
            self.user2idx[user] = n_user_id

    def train(self):
        data = []
        entity2id = {}
        id2entity = []

        for obj in self.data:
            doc = []
            obj_sents = obj["text_data"]
            entity = obj["prod"]
            if entity not in entity2id:
                entity2id[entity] = len(entity2id)
                id2entity.append(entity)
            doc_id = entity2id[entity]

            for obj_sent in obj_sents:
                for pair in obj_sent:
                    if pair[0] >= 0:
                        doc.append((pair[0], doc_id))
            data.append(doc)



        self.ldamodel = LdaModel(corpus=data, id2word=self.idx2word, num_topics=self.n_topic)

        f_entity = open("lda/prod.txt", "w")
        f_model = open("lda/model.txt", "w")
        f_model.write(str(len(entity2id)))
        f_model.write(" ")
        f_model.write(str(self.n_topic))
        f_model.write("\n")

        for entity in id2entity:
            f_entity.write(entity)
            f_entity.write("\n")

            f_model.write(entity)
            f_model.write(" ")

            distr = self.ldamodel.get_document_topics(data[1], minimum_phi_value=0, minimum_probability=0)
            distr = [pair[1] for pair in distr]

            for prod in distr:
                f_model.write(str(prod))
                f_model.write(" ")

            f_model.write("\n")

        self.ldamodel.save("lda/model_200")
开发者ID:Sanqiang,项目名称:entity2vector,代码行数:70,代码来源:lda.py

示例2: defaultdict

# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import get_document_topics [as 别名]
texts = [[word for word in document.lower().split() if word not in stoplist]
         for document in documents]

from collections import defaultdict

frequency = defaultdict(int)
for text in texts:
    for token in text:
        frequency[token] += 1

texts = [[token for token in text if frequency[token] >= 1]
         for text in texts]

from pprint import pprint  # pretty-printer

dictionary = corpora.Dictionary(texts)
# dictionary.save('/tmp/deerwester.dict') # store the dictionary, for future reference
# print(dictionary)
corpus = [dictionary.doc2bow(text) for text in texts]
# corpora.MmCorpus.serialize('/tmp/deerwester.mm', corpus)

lda = LdaModel(corpus, num_topics=2)

# on a new document:
new_doc = "pretty obvious that when i write my tellall memoir someday there will be four to six"
new_vec = dictionary.doc2bow(new_doc.lower().split())

print(lda.print_topic(0))
print(lda.show_topic(1))
print(lda.get_document_topics(new_vec))
开发者ID:manassharma,项目名称:Geolocation-Of-Microbloggers,代码行数:32,代码来源:lda_for_dummies.py


注:本文中的gensim.models.LdaModel.get_document_topics方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。