本文整理汇总了Python中gensim.models.LdaMulticore方法的典型用法代码示例。如果您正苦于以下问题:Python models.LdaMulticore方法的具体用法?Python models.LdaMulticore怎么用?Python models.LdaMulticore使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models
的用法示例。
在下文中一共展示了models.LdaMulticore方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: create_lda_model
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import LdaMulticore [as 别名]
def create_lda_model(self, **kwargs):
""" Create a Latent Dirichlet Allocation (LDA) model from the
entire words database table """
corpus_tfidf = self.load_tfidf_corpus()
if self._dictionary is None:
self.load_dictionary()
# Initialize an LDA transformation
lda = models.LdaMulticore(
corpus_tfidf,
id2word=self._dictionary,
num_topics=self._dimensions,
**kwargs
)
if self._verbose:
lda.print_topics(num_topics=self._dimensions)
# Save the generated model
lda.save(self._LDA_MODEL_FILE.format(self._dimensions))
示例2: build_topic_model_from_corpus
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import LdaMulticore [as 别名]
def build_topic_model_from_corpus(corpus, dictionary):
"""
Builds a topic model with the given corpus and dictionary.
The model is built using Latent Dirichlet Allocation
:type corpus list
:parameter corpus: a list of bag of words, each bag of words represents a
document
:type dictionary: gensim.corpora.Dictionary
:parameter dictionary: a Dictionary object that contains the words that are
permitted to belong to the document, words that are not in this dictionary
will be ignored
:rtype: gensim.models.ldamodel.LdaModel
:return: an LdaModel built using the reviews contained in the records
parameter
"""
# numpy.random.seed(0)
if Constants.LDA_MULTICORE:
print('%s: lda multicore' % time.strftime("%Y/%m/%d-%H:%M:%S"))
topic_model = LdaMulticore(
corpus, id2word=dictionary,
num_topics=Constants.TOPIC_MODEL_NUM_TOPICS,
passes=Constants.TOPIC_MODEL_PASSES,
iterations=Constants.TOPIC_MODEL_ITERATIONS,
workers=Constants.NUM_CORES - 1)
else:
print('%s: lda monocore' % time.strftime("%Y/%m/%d-%H:%M:%S"))
topic_model = ldamodel.LdaModel(
corpus, id2word=dictionary,
num_topics=Constants.TOPIC_MODEL_NUM_TOPICS,
passes=Constants.TOPIC_MODEL_PASSES,
iterations=Constants.TOPIC_MODEL_ITERATIONS)
return topic_model
示例3: load_lda_model
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import LdaMulticore [as 别名]
def load_lda_model(self):
""" Load a previously generated LDA model """
self._model = models.LdaMulticore.load(
self._LDA_MODEL_FILE.format(self._dimensions), mmap="r"
)
self._model_name = "lda"
示例4: topic_analysis
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import LdaMulticore [as 别名]
def topic_analysis(corpus, dictionary, models_path, technique):
import uuid
uuid = str(uuid.uuid4())
print("[BLOCK] Starting models for context")
sys.stdout.flush()
if technique == "all" or technique == "hdp":
t1 = time()
# HDP model
model = HdpModel(corpus, id2word=dictionary)
model.save("%s/hdp_%s" % (models_path, uuid))
del model
t2 = time()
print("[BLOCK] Training time for HDP model: %s" % (round(t2-t1, 2)))
sys.stdout.flush()
if technique == "all" or technique == "ldap":
t1 = time()
# Parallel LDA model
model = LdaMulticore(corpus, id2word=dictionary, num_topics=100, workers=23, passes=20)
model.save("%s/lda_parallel_%s" % (models_path, uuid))
del model
t2 = time()
print("[BLOCK] Training time for LDA multicore: %s" % (round(t2-t1, 2)))
sys.stdout.flush()
if technique == "all" or technique == "lsa":
t1 = time()
# LSA model
model = LsiModel(corpus, id2word=dictionary, num_topics=400)
model.save("%s/lsa_%s" % (models_path, uuid))
del model
t2 = time()
print("[BLOCK] Training time for LSA: %s" % (round(t2-t1, 2)))
sys.stdout.flush()
if technique == "all" or technique == "ldao":
t1 = time()
# Online LDA model
model = LdaModel(corpus, id2word=dictionary, num_topics=100, update_every=1, chunksize=10000, passes=5)
model.save("%s/lda_online_%s" % (models_path, uuid))
t2 = time()
print("[BLOCK] Training time for LDA online: %s" % (round(t2-t1, 2)))
sys.stdout.flush()
if technique == "all" or technique == "lda":
t1 = time()
# Offline LDA model
model = LdaModel(corpus, id2word=dictionary, num_topics=100, update_every=0, passes=20)
model.save("%s/lda_offline_%s" % (models_path, uuid))
del model
t2 = time()
print("[BLOCK] Training time for LDA offline: %s" % (round(t2-t1, 2)))
sys.stdout.flush()