當前位置: 首頁>>代碼示例>>Python>>正文


Python ldamodel.LdaModel方法代碼示例

本文整理匯總了Python中gensim.models.ldamodel.LdaModel方法的典型用法代碼示例。如果您正苦於以下問題:Python ldamodel.LdaModel方法的具體用法?Python ldamodel.LdaModel怎麽用?Python ldamodel.LdaModel使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在gensim.models.ldamodel的用法示例。


在下文中一共展示了ldamodel.LdaModel方法的13個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: fit_model

# 需要導入模塊: from gensim.models import ldamodel [as 別名]
# 或者: from gensim.models.ldamodel import LdaModel [as 別名]
def fit_model(self, data, params, return_data=False):
        """
        Fit model to `data` using gensim with parameter set `params`.
        """
        from gensim.models.ldamodel import LdaModel

        dictionary = params.pop('dictionary', None)

        if hasattr(data, 'dtype') and hasattr(data, 'shape') and hasattr(data, 'transpose'):
            corpus = dtm_to_gensim_corpus(data)
            dtm = data
        else:
            if isinstance(data, tuple) and len(data) == 2:
                dictionary, corpus = data
            else:
                corpus = data
            dtm = gensim_corpus_to_dtm(corpus)

        model = LdaModel(corpus, id2word=dictionary, **params)

        if return_data:
            return model, (corpus, dtm)
        else:
            return model 
開發者ID:WZBSocialScienceCenter,項目名稱:tmtoolkit,代碼行數:26,代碼來源:tm_gensim.py

示例2: testTransform

# 需要導入模塊: from gensim.models import ldamodel [as 別名]
# 或者: from gensim.models.ldamodel import LdaModel [as 別名]
def testTransform(self):
        passed = False
        # sometimes, LDA training gets stuck at a local minimum
        # in that case try re-training the model from scratch, hoping for a
        # better random initialization
        for i in range(5): # restart at most 5 times
            # create the transformation model
            model = ldamodel.LdaModel(id2word=dictionary, num_topics=2, passes=100)
            model.update(corpus)

            # transform one document
            doc = list(corpus)[0]
            transformed = model[doc]

            vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests
            expected = [0.13, 0.87]
            passed = numpy.allclose(sorted(vec), sorted(expected), atol=1e-2) # must contain the same values, up to re-ordering
            if passed:
                break
            logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" %
                            (i, sorted(vec), sorted(expected)))
        self.assertTrue(passed) 
開發者ID:largelymfs,項目名稱:topical_word_embeddings,代碼行數:24,代碼來源:test_models.py

示例3: testLargeMmap

# 需要導入模塊: from gensim.models import ldamodel [as 別名]
# 或者: from gensim.models.ldamodel import LdaModel [as 別名]
def testLargeMmap(self):
        model = ldamodel.LdaModel(self.corpus, num_topics=2)

        # simulate storing large arrays separately
        model.save(testfile(), sep_limit=0)

        model2 = ldamodel.LdaModel.load(testfile())
        self.assertEqual(model.num_topics, model2.num_topics)
        self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta))
        tstvec = []
        self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector

        # test loading the large model arrays with mmap
        model2 = ldamodel.LdaModel.load(testfile(), mmap='r')
        self.assertEqual(model.num_topics, model2.num_topics)
        self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta))
        tstvec = []
        self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector
#endclass TestLdaModel 
開發者ID:largelymfs,項目名稱:topical_word_embeddings,代碼行數:21,代碼來源:test_models.py

示例4: update_reviews_with_topics

# 需要導入模塊: from gensim.models import ldamodel [as 別名]
# 或者: from gensim.models.ldamodel import LdaModel [as 別名]
def update_reviews_with_topics(topic_model, corpus_list, reviews):
    """

    :type minimum_probability: float
    :param minimum_probability:
    :type topic_model: LdaModel
    :param topic_model:
    :type corpus_list: list
    :param reviews:
    """
    # print('reviews length', len(reviews))

    for review, corpus in zip(reviews, corpus_list):
        review[Constants.TOPICS_FIELD] =\
            topic_model.get_document_topics(corpus)

        non_zero_topics = [topic[0] for topic in review[Constants.TOPICS_FIELD]]

        for topic_index in range(Constants.TOPIC_MODEL_NUM_TOPICS):
            if topic_index not in non_zero_topics:
                review[Constants.TOPICS_FIELD].insert(
                    topic_index, [topic_index, 0.0]) 
開發者ID:melqkiades,項目名稱:yelp,代碼行數:24,代碼來源:lda_context_utils.py

示例5: __init__

# 需要導入模塊: from gensim.models import ldamodel [as 別名]
# 或者: from gensim.models.ldamodel import LdaModel [as 別名]
def __init__(
        self, cleanup_urls=True, nltk_tokenizer=False, confidence_threshold=0.8
    ):
        super().__init__(
            cleanup_urls=cleanup_urls,
            nltk_tokenizer=nltk_tokenizer,
            confidence_threshold=confidence_threshold,
        )
        self.corpus = []
        self.bug_ids = []
        for bug in bugzilla.get_bugs():
            self.corpus.append(self.text_preprocess(self.get_text(bug)))
            self.bug_ids.append(bug["id"])

        indexes = list(range(len(self.corpus)))
        random.shuffle(indexes)
        self.corpus = [self.corpus[idx] for idx in indexes]
        self.bug_ids = [self.bug_ids[idx] for idx in indexes]

        self.dictionary = Dictionary(self.corpus)

        self.model = LdaModel([self.dictionary.doc2bow(text) for text in self.corpus]) 
開發者ID:mozilla,項目名稱:bugbug,代碼行數:24,代碼來源:similarity.py

示例6: initialize

# 需要導入模塊: from gensim.models import ldamodel [as 別名]
# 或者: from gensim.models.ldamodel import LdaModel [as 別名]
def initialize(self, myid, dispatcher, **model_params):
        self.lock_update = threading.Lock()
        self.jobsdone = 0 # how many jobs has this worker completed?
        self.myid = myid # id of this worker in the dispatcher; just a convenience var for easy access/logging TODO remove?
        self.dispatcher = dispatcher
        self.finished = False
        logger.info("initializing worker #%s" % myid)
        self.model = ldamodel.LdaModel(**model_params) 
開發者ID:largelymfs,項目名稱:topical_word_embeddings,代碼行數:10,代碼來源:lda_worker.py

示例7: testTopicSeeding

# 需要導入模塊: from gensim.models import ldamodel [as 別名]
# 或者: from gensim.models.ldamodel import LdaModel [as 別名]
def testTopicSeeding(self):
        passed = False
        for topic in range(2):
            # try seeding it both ways round, check you get the same
            # topics out but with which way round they are depending
            # on the way round they're seeded
            for i in range(5): # restart at most 5 times

                eta = numpy.ones((2, len(dictionary))) * 0.5
                system = dictionary.token2id[u'system']
                trees = dictionary.token2id[u'trees']

                # aggressively seed the word 'system', in one of the
                # two topics, 10 times higher than the other words
                eta[topic, system] *= 10

                model = ldamodel.LdaModel(id2word=dictionary, num_topics=2, passes=200, eta=eta)
                model.update(corpus)

                topics = [dict((word, p) for p, word in model.show_topic(j)) for j in range(2)]

                # check that the word system in the topic we seeded, got a high weight,
                # and the word 'trees' (the main word in the other topic) a low weight --
                # and vice versa for the other topic (which we didn't seed with 'system')
                result = [[topics[topic].get(u'system',0), topics[topic].get(u'trees',0)],
                          [topics[1-topic].get(u'system',0), topics[1-topic].get(u'trees',0)]]
                expected = [[0.385, 0.022],
                            [0.025, 0.157]]
                passed = numpy.allclose(result, expected, atol=1e-2)
                if passed:
                    break
                logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" %
                                (i, result, expected))
            self.assertTrue(passed) 
開發者ID:largelymfs,項目名稱:topical_word_embeddings,代碼行數:36,代碼來源:test_models.py

示例8: testPersistence

# 需要導入模塊: from gensim.models import ldamodel [as 別名]
# 或者: from gensim.models.ldamodel import LdaModel [as 別名]
def testPersistence(self):
        model = ldamodel.LdaModel(self.corpus, num_topics=2)
        model.save(testfile())
        model2 = ldamodel.LdaModel.load(testfile())
        self.assertEqual(model.num_topics, model2.num_topics)
        self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta))
        tstvec = []
        self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector 
開發者ID:largelymfs,項目名稱:topical_word_embeddings,代碼行數:10,代碼來源:test_models.py

示例9: get_topic

# 需要導入模塊: from gensim.models import ldamodel [as 別名]
# 或者: from gensim.models.ldamodel import LdaModel [as 別名]
def get_topic(self, doc_set):
		# compile sample documents into a list
		o1 = self.__tokenize(doc_set)
		o2 = self.__remove_stop_words(o1)
		#o3 = self.__lemmatizer(o2)
		o4 = self.__dt_matrix(o2)
		
		self.topics = LdaModel(o4[0], num_topics=1, id2word=o4[1], passes=50)
		output = self.topics.show_topics(num_topics=1, num_words=3, log=False, formatted=True)
		return [x.split("*")[1].replace('"', '') for x in output[0][1].split("+")] 
開發者ID:skashyap7,項目名稱:TBBTCorpus,代碼行數:12,代碼來源:topic_extractor.py

示例10: find_lda_context

# 需要導入模塊: from gensim.models import ldamodel [as 別名]
# 或者: from gensim.models.ldamodel import LdaModel [as 別名]
def find_lda_context(train_records, test_records):
    """
    Uses the training records to create a topic model and then updates both
    the training and testing records with a vector of probabilities for each
    topic from the recently created topic model
    """

    dictionary = preprocess_records(train_records, test_records)
    corpus = [record[Constants.CORPUS_FIELD] for record in train_records]
    print(corpus)
    topic_model = ldamodel.LdaModel(
        corpus, id2word=dictionary,
        num_topics=num_topics,
        passes=Constants.TOPIC_MODEL_PASSES,
        iterations=Constants.TOPIC_MODEL_ITERATIONS)

    print(corpus)
    for i in range(num_topics):
        print(topic_model.show_topic(i, topn=2))

    records = train_records + test_records

    for record in records:
        document_topics =\
            topic_model.get_document_topics(record[Constants.CORPUS_FIELD])
        lda_context = [document_topic[1] for document_topic in document_topics]
        record['lda_context'] = lda_context

        context_topics = {}
        for i in range(num_topics):
            topic_id = 'topic' + str(i)
            context_topics[topic_id] = document_topics[i][1]

        record[Constants.CONTEXT_TOPICS_FIELD] = context_topics 
開發者ID:melqkiades,項目名稱:yelp,代碼行數:36,代碼來源:fastfm_recommender.py

示例11: train_topic_model

# 需要導入模塊: from gensim.models import ldamodel [as 別名]
# 或者: from gensim.models.ldamodel import LdaModel [as 別名]
def train_topic_model(records):
    print('%s: train topic model' % time.strftime("%Y/%m/%d-%H:%M:%S"))

    if Constants.TOPIC_MODEL_TYPE == 'lda':

        topic_model_file_path = \
            Constants.generate_file_name(
                'topic_model', 'pkl', Constants.CACHE_FOLDER, None, None, True)
        if os.path.exists(topic_model_file_path):
            print('WARNING: Topic model already exists')
            return

        corpus = \
            [record[Constants.CORPUS_FIELD] for record in records]
        dictionary = corpora.Dictionary.load(Constants.DICTIONARY_FILE)
        topic_model = ldamodel.LdaModel(
            corpus, id2word=dictionary,
            num_topics=Constants.TOPIC_MODEL_NUM_TOPICS,
            passes=Constants.TOPIC_MODEL_PASSES,
            iterations=Constants.TOPIC_MODEL_ITERATIONS)

        with open(topic_model_file_path, 'wb') as write_file:
            pickle.dump(topic_model, write_file, pickle.HIGHEST_PROTOCOL)

    elif Constants.TOPIC_MODEL_TYPE == 'ensemble':
        file_path = Constants.ENSEMBLED_RESULTS_FOLDER + \
                    "factors_final_k%02d.pkl" % Constants.TOPIC_MODEL_NUM_TOPICS

        if os.path.exists(file_path):
            print('Ensemble topic model already exists')
            return

        export_to_text(records)
        topic_ensemble_caller.run_local_parse_directory()
        topic_ensemble_caller.run_generate_kfold()
        topic_ensemble_caller.run_combine_nmf()

    else:
        raise ValueError('Unrecognized topic modeling algorithm: \'%s\'' %
                         Constants.TOPIC_MODEL_TYPE) 
開發者ID:melqkiades,項目名稱:yelp,代碼行數:42,代碼來源:topic_model_creator.py

示例12: build_topic_model_from_corpus

# 需要導入模塊: from gensim.models import ldamodel [as 別名]
# 或者: from gensim.models.ldamodel import LdaModel [as 別名]
def build_topic_model_from_corpus(corpus, dictionary):
    """
    Builds a topic model with the given corpus and dictionary.
    The model is built using Latent Dirichlet Allocation

    :type corpus list
    :parameter corpus: a list of bag of words, each bag of words represents a
    document
    :type dictionary: gensim.corpora.Dictionary
    :parameter dictionary: a Dictionary object that contains the words that are
    permitted to belong to the document, words that are not in this dictionary
    will be ignored
    :rtype: gensim.models.ldamodel.LdaModel
    :return: an LdaModel built using the reviews contained in the records
    parameter
    """

    # numpy.random.seed(0)
    if Constants.LDA_MULTICORE:
        print('%s: lda multicore' % time.strftime("%Y/%m/%d-%H:%M:%S"))
        topic_model = LdaMulticore(
            corpus, id2word=dictionary,
            num_topics=Constants.TOPIC_MODEL_NUM_TOPICS,
            passes=Constants.TOPIC_MODEL_PASSES,
            iterations=Constants.TOPIC_MODEL_ITERATIONS,
            workers=Constants.NUM_CORES - 1)
    else:
        print('%s: lda monocore' % time.strftime("%Y/%m/%d-%H:%M:%S"))
        topic_model = ldamodel.LdaModel(
            corpus, id2word=dictionary,
            num_topics=Constants.TOPIC_MODEL_NUM_TOPICS,
            passes=Constants.TOPIC_MODEL_PASSES,
            iterations=Constants.TOPIC_MODEL_ITERATIONS)

    return topic_model 
開發者ID:melqkiades,項目名稱:yelp,代碼行數:37,代碼來源:lda_context_utils.py

示例13: get_topic_distribution

# 需要導入模塊: from gensim.models import ldamodel [as 別名]
# 或者: from gensim.models.ldamodel import LdaModel [as 別名]
def get_topic_distribution(record, lda_model, dictionary, minimum_probability,
                           sampling_method=None, max_words=None):
    """

    :type record: dict
    :type lda_model: LdaModel
    :type minimum_probability: float
    :param sampling_method: a float in the range [0,1] that
    indicates the proportion of text that should be sampled from the review.
    It can also take the string value of 'max', indicating that only the
    word with the highest probability from the topic will be sampled
     text. If None then all the review text is taken
    :param max_words: is the set of words with maximum probability for each
    contextual topic
    """
    # review_bow = [record[Constants.BOW_FIELD]]
    # review_bow =\
    #     sample_bag_of_words(review_bow, sampling_method, max_words)

    # corpus = dictionary.doc2bow(review_bow[0])
    corpus = record[Constants.CORPUS_FIELD]
    lda_corpus = lda_model.get_document_topics(
        corpus, minimum_probability=minimum_probability)

    topic_distribution = numpy.zeros(lda_model.num_topics)
    for pair in lda_corpus:
        topic_distribution[pair[0]] = pair[1]

    return topic_distribution 
開發者ID:melqkiades,項目名稱:yelp,代碼行數:31,代碼來源:lda_context_utils.py


注:本文中的gensim.models.ldamodel.LdaModel方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。