当前位置: 首页>>代码示例>>Python>>正文


Python ldamodel.LdaModel方法代码示例

本文整理汇总了Python中gensim.models.ldamodel.LdaModel方法的典型用法代码示例。如果您正苦于以下问题:Python ldamodel.LdaModel方法的具体用法?Python ldamodel.LdaModel怎么用?Python ldamodel.LdaModel使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.models.ldamodel的用法示例。


在下文中一共展示了ldamodel.LdaModel方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: fit_model

# 需要导入模块: from gensim.models import ldamodel [as 别名]
# 或者: from gensim.models.ldamodel import LdaModel [as 别名]
def fit_model(self, data, params, return_data=False):
        """
        Fit model to `data` using gensim with parameter set `params`.
        """
        from gensim.models.ldamodel import LdaModel

        dictionary = params.pop('dictionary', None)

        if hasattr(data, 'dtype') and hasattr(data, 'shape') and hasattr(data, 'transpose'):
            corpus = dtm_to_gensim_corpus(data)
            dtm = data
        else:
            if isinstance(data, tuple) and len(data) == 2:
                dictionary, corpus = data
            else:
                corpus = data
            dtm = gensim_corpus_to_dtm(corpus)

        model = LdaModel(corpus, id2word=dictionary, **params)

        if return_data:
            return model, (corpus, dtm)
        else:
            return model 
开发者ID:WZBSocialScienceCenter,项目名称:tmtoolkit,代码行数:26,代码来源:tm_gensim.py

示例2: testTransform

# 需要导入模块: from gensim.models import ldamodel [as 别名]
# 或者: from gensim.models.ldamodel import LdaModel [as 别名]
def testTransform(self):
        passed = False
        # sometimes, LDA training gets stuck at a local minimum
        # in that case try re-training the model from scratch, hoping for a
        # better random initialization
        for i in range(5): # restart at most 5 times
            # create the transformation model
            model = ldamodel.LdaModel(id2word=dictionary, num_topics=2, passes=100)
            model.update(corpus)

            # transform one document
            doc = list(corpus)[0]
            transformed = model[doc]

            vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests
            expected = [0.13, 0.87]
            passed = numpy.allclose(sorted(vec), sorted(expected), atol=1e-2) # must contain the same values, up to re-ordering
            if passed:
                break
            logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" %
                            (i, sorted(vec), sorted(expected)))
        self.assertTrue(passed) 
开发者ID:largelymfs,项目名称:topical_word_embeddings,代码行数:24,代码来源:test_models.py

示例3: testLargeMmap

# 需要导入模块: from gensim.models import ldamodel [as 别名]
# 或者: from gensim.models.ldamodel import LdaModel [as 别名]
def testLargeMmap(self):
        model = ldamodel.LdaModel(self.corpus, num_topics=2)

        # simulate storing large arrays separately
        model.save(testfile(), sep_limit=0)

        model2 = ldamodel.LdaModel.load(testfile())
        self.assertEqual(model.num_topics, model2.num_topics)
        self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta))
        tstvec = []
        self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector

        # test loading the large model arrays with mmap
        model2 = ldamodel.LdaModel.load(testfile(), mmap='r')
        self.assertEqual(model.num_topics, model2.num_topics)
        self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta))
        tstvec = []
        self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector
#endclass TestLdaModel 
开发者ID:largelymfs,项目名称:topical_word_embeddings,代码行数:21,代码来源:test_models.py

示例4: update_reviews_with_topics

# 需要导入模块: from gensim.models import ldamodel [as 别名]
# 或者: from gensim.models.ldamodel import LdaModel [as 别名]
def update_reviews_with_topics(topic_model, corpus_list, reviews):
    """

    :type minimum_probability: float
    :param minimum_probability:
    :type topic_model: LdaModel
    :param topic_model:
    :type corpus_list: list
    :param reviews:
    """
    # print('reviews length', len(reviews))

    for review, corpus in zip(reviews, corpus_list):
        review[Constants.TOPICS_FIELD] =\
            topic_model.get_document_topics(corpus)

        non_zero_topics = [topic[0] for topic in review[Constants.TOPICS_FIELD]]

        for topic_index in range(Constants.TOPIC_MODEL_NUM_TOPICS):
            if topic_index not in non_zero_topics:
                review[Constants.TOPICS_FIELD].insert(
                    topic_index, [topic_index, 0.0]) 
开发者ID:melqkiades,项目名称:yelp,代码行数:24,代码来源:lda_context_utils.py

示例5: __init__

# 需要导入模块: from gensim.models import ldamodel [as 别名]
# 或者: from gensim.models.ldamodel import LdaModel [as 别名]
def __init__(
        self, cleanup_urls=True, nltk_tokenizer=False, confidence_threshold=0.8
    ):
        super().__init__(
            cleanup_urls=cleanup_urls,
            nltk_tokenizer=nltk_tokenizer,
            confidence_threshold=confidence_threshold,
        )
        self.corpus = []
        self.bug_ids = []
        for bug in bugzilla.get_bugs():
            self.corpus.append(self.text_preprocess(self.get_text(bug)))
            self.bug_ids.append(bug["id"])

        indexes = list(range(len(self.corpus)))
        random.shuffle(indexes)
        self.corpus = [self.corpus[idx] for idx in indexes]
        self.bug_ids = [self.bug_ids[idx] for idx in indexes]

        self.dictionary = Dictionary(self.corpus)

        self.model = LdaModel([self.dictionary.doc2bow(text) for text in self.corpus]) 
开发者ID:mozilla,项目名称:bugbug,代码行数:24,代码来源:similarity.py

示例6: initialize

# 需要导入模块: from gensim.models import ldamodel [as 别名]
# 或者: from gensim.models.ldamodel import LdaModel [as 别名]
def initialize(self, myid, dispatcher, **model_params):
        self.lock_update = threading.Lock()
        self.jobsdone = 0 # how many jobs has this worker completed?
        self.myid = myid # id of this worker in the dispatcher; just a convenience var for easy access/logging TODO remove?
        self.dispatcher = dispatcher
        self.finished = False
        logger.info("initializing worker #%s" % myid)
        self.model = ldamodel.LdaModel(**model_params) 
开发者ID:largelymfs,项目名称:topical_word_embeddings,代码行数:10,代码来源:lda_worker.py

示例7: testTopicSeeding

# 需要导入模块: from gensim.models import ldamodel [as 别名]
# 或者: from gensim.models.ldamodel import LdaModel [as 别名]
def testTopicSeeding(self):
        passed = False
        for topic in range(2):
            # try seeding it both ways round, check you get the same
            # topics out but with which way round they are depending
            # on the way round they're seeded
            for i in range(5): # restart at most 5 times

                eta = numpy.ones((2, len(dictionary))) * 0.5
                system = dictionary.token2id[u'system']
                trees = dictionary.token2id[u'trees']

                # aggressively seed the word 'system', in one of the
                # two topics, 10 times higher than the other words
                eta[topic, system] *= 10

                model = ldamodel.LdaModel(id2word=dictionary, num_topics=2, passes=200, eta=eta)
                model.update(corpus)

                topics = [dict((word, p) for p, word in model.show_topic(j)) for j in range(2)]

                # check that the word system in the topic we seeded, got a high weight,
                # and the word 'trees' (the main word in the other topic) a low weight --
                # and vice versa for the other topic (which we didn't seed with 'system')
                result = [[topics[topic].get(u'system',0), topics[topic].get(u'trees',0)],
                          [topics[1-topic].get(u'system',0), topics[1-topic].get(u'trees',0)]]
                expected = [[0.385, 0.022],
                            [0.025, 0.157]]
                passed = numpy.allclose(result, expected, atol=1e-2)
                if passed:
                    break
                logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" %
                                (i, result, expected))
            self.assertTrue(passed) 
开发者ID:largelymfs,项目名称:topical_word_embeddings,代码行数:36,代码来源:test_models.py

示例8: testPersistence

# 需要导入模块: from gensim.models import ldamodel [as 别名]
# 或者: from gensim.models.ldamodel import LdaModel [as 别名]
def testPersistence(self):
        model = ldamodel.LdaModel(self.corpus, num_topics=2)
        model.save(testfile())
        model2 = ldamodel.LdaModel.load(testfile())
        self.assertEqual(model.num_topics, model2.num_topics)
        self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta))
        tstvec = []
        self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector 
开发者ID:largelymfs,项目名称:topical_word_embeddings,代码行数:10,代码来源:test_models.py

示例9: get_topic

# 需要导入模块: from gensim.models import ldamodel [as 别名]
# 或者: from gensim.models.ldamodel import LdaModel [as 别名]
def get_topic(self, doc_set):
		# compile sample documents into a list
		o1 = self.__tokenize(doc_set)
		o2 = self.__remove_stop_words(o1)
		#o3 = self.__lemmatizer(o2)
		o4 = self.__dt_matrix(o2)
		
		self.topics = LdaModel(o4[0], num_topics=1, id2word=o4[1], passes=50)
		output = self.topics.show_topics(num_topics=1, num_words=3, log=False, formatted=True)
		return [x.split("*")[1].replace('"', '') for x in output[0][1].split("+")] 
开发者ID:skashyap7,项目名称:TBBTCorpus,代码行数:12,代码来源:topic_extractor.py

示例10: find_lda_context

# 需要导入模块: from gensim.models import ldamodel [as 别名]
# 或者: from gensim.models.ldamodel import LdaModel [as 别名]
def find_lda_context(train_records, test_records):
    """
    Uses the training records to create a topic model and then updates both
    the training and testing records with a vector of probabilities for each
    topic from the recently created topic model
    """

    dictionary = preprocess_records(train_records, test_records)
    corpus = [record[Constants.CORPUS_FIELD] for record in train_records]
    print(corpus)
    topic_model = ldamodel.LdaModel(
        corpus, id2word=dictionary,
        num_topics=num_topics,
        passes=Constants.TOPIC_MODEL_PASSES,
        iterations=Constants.TOPIC_MODEL_ITERATIONS)

    print(corpus)
    for i in range(num_topics):
        print(topic_model.show_topic(i, topn=2))

    records = train_records + test_records

    for record in records:
        document_topics =\
            topic_model.get_document_topics(record[Constants.CORPUS_FIELD])
        lda_context = [document_topic[1] for document_topic in document_topics]
        record['lda_context'] = lda_context

        context_topics = {}
        for i in range(num_topics):
            topic_id = 'topic' + str(i)
            context_topics[topic_id] = document_topics[i][1]

        record[Constants.CONTEXT_TOPICS_FIELD] = context_topics 
开发者ID:melqkiades,项目名称:yelp,代码行数:36,代码来源:fastfm_recommender.py

示例11: train_topic_model

# 需要导入模块: from gensim.models import ldamodel [as 别名]
# 或者: from gensim.models.ldamodel import LdaModel [as 别名]
def train_topic_model(records):
    print('%s: train topic model' % time.strftime("%Y/%m/%d-%H:%M:%S"))

    if Constants.TOPIC_MODEL_TYPE == 'lda':

        topic_model_file_path = \
            Constants.generate_file_name(
                'topic_model', 'pkl', Constants.CACHE_FOLDER, None, None, True)
        if os.path.exists(topic_model_file_path):
            print('WARNING: Topic model already exists')
            return

        corpus = \
            [record[Constants.CORPUS_FIELD] for record in records]
        dictionary = corpora.Dictionary.load(Constants.DICTIONARY_FILE)
        topic_model = ldamodel.LdaModel(
            corpus, id2word=dictionary,
            num_topics=Constants.TOPIC_MODEL_NUM_TOPICS,
            passes=Constants.TOPIC_MODEL_PASSES,
            iterations=Constants.TOPIC_MODEL_ITERATIONS)

        with open(topic_model_file_path, 'wb') as write_file:
            pickle.dump(topic_model, write_file, pickle.HIGHEST_PROTOCOL)

    elif Constants.TOPIC_MODEL_TYPE == 'ensemble':
        file_path = Constants.ENSEMBLED_RESULTS_FOLDER + \
                    "factors_final_k%02d.pkl" % Constants.TOPIC_MODEL_NUM_TOPICS

        if os.path.exists(file_path):
            print('Ensemble topic model already exists')
            return

        export_to_text(records)
        topic_ensemble_caller.run_local_parse_directory()
        topic_ensemble_caller.run_generate_kfold()
        topic_ensemble_caller.run_combine_nmf()

    else:
        raise ValueError('Unrecognized topic modeling algorithm: \'%s\'' %
                         Constants.TOPIC_MODEL_TYPE) 
开发者ID:melqkiades,项目名称:yelp,代码行数:42,代码来源:topic_model_creator.py

示例12: build_topic_model_from_corpus

# 需要导入模块: from gensim.models import ldamodel [as 别名]
# 或者: from gensim.models.ldamodel import LdaModel [as 别名]
def build_topic_model_from_corpus(corpus, dictionary):
    """
    Builds a topic model with the given corpus and dictionary.
    The model is built using Latent Dirichlet Allocation

    :type corpus list
    :parameter corpus: a list of bag of words, each bag of words represents a
    document
    :type dictionary: gensim.corpora.Dictionary
    :parameter dictionary: a Dictionary object that contains the words that are
    permitted to belong to the document, words that are not in this dictionary
    will be ignored
    :rtype: gensim.models.ldamodel.LdaModel
    :return: an LdaModel built using the reviews contained in the records
    parameter
    """

    # numpy.random.seed(0)
    if Constants.LDA_MULTICORE:
        print('%s: lda multicore' % time.strftime("%Y/%m/%d-%H:%M:%S"))
        topic_model = LdaMulticore(
            corpus, id2word=dictionary,
            num_topics=Constants.TOPIC_MODEL_NUM_TOPICS,
            passes=Constants.TOPIC_MODEL_PASSES,
            iterations=Constants.TOPIC_MODEL_ITERATIONS,
            workers=Constants.NUM_CORES - 1)
    else:
        print('%s: lda monocore' % time.strftime("%Y/%m/%d-%H:%M:%S"))
        topic_model = ldamodel.LdaModel(
            corpus, id2word=dictionary,
            num_topics=Constants.TOPIC_MODEL_NUM_TOPICS,
            passes=Constants.TOPIC_MODEL_PASSES,
            iterations=Constants.TOPIC_MODEL_ITERATIONS)

    return topic_model 
开发者ID:melqkiades,项目名称:yelp,代码行数:37,代码来源:lda_context_utils.py

示例13: get_topic_distribution

# 需要导入模块: from gensim.models import ldamodel [as 别名]
# 或者: from gensim.models.ldamodel import LdaModel [as 别名]
def get_topic_distribution(record, lda_model, dictionary, minimum_probability,
                           sampling_method=None, max_words=None):
    """

    :type record: dict
    :type lda_model: LdaModel
    :type minimum_probability: float
    :param sampling_method: a float in the range [0,1] that
    indicates the proportion of text that should be sampled from the review.
    It can also take the string value of 'max', indicating that only the
    word with the highest probability from the topic will be sampled
     text. If None then all the review text is taken
    :param max_words: is the set of words with maximum probability for each
    contextual topic
    """
    # review_bow = [record[Constants.BOW_FIELD]]
    # review_bow =\
    #     sample_bag_of_words(review_bow, sampling_method, max_words)

    # corpus = dictionary.doc2bow(review_bow[0])
    corpus = record[Constants.CORPUS_FIELD]
    lda_corpus = lda_model.get_document_topics(
        corpus, minimum_probability=minimum_probability)

    topic_distribution = numpy.zeros(lda_model.num_topics)
    for pair in lda_corpus:
        topic_distribution[pair[0]] = pair[1]

    return topic_distribution 
开发者ID:melqkiades,项目名称:yelp,代码行数:31,代码来源:lda_context_utils.py


注:本文中的gensim.models.ldamodel.LdaModel方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。