本文整理汇总了Python中gensim.models.ldamodel.LdaModel.load方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.load方法的具体用法?Python LdaModel.load怎么用?Python LdaModel.load使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models.ldamodel.LdaModel
的用法示例。
在下文中一共展示了LdaModel.load方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def __init__(self, topics = 10,
worker = 3,
pretrained_model = None,
dictionary = None):
"""
lda模型训练初始化。
Args:
topics -- 指定主题个数
worker -- 并行化参数,一般为core数量减一
pretrained_model -- 预训练的模型,由于支持在线更新,所以可以加载上次训练的模型
dictionary -- 训练时词需要转换成ID,所以跟模型配套有一个ID映射的词典
Example:
>>> lda = LDA(topics = 20, worker = 2,
pretrained_model = model_file,
dictionary = dictionary_file)
>>> corpus = read_file(corpus_file) # [['word1', 'word2'], ['word3', 'word4']]
>>> lda.update(corpus)
>>> lda.save(model_file, dictionary_file)
>>> topics = lda.inference(['word5', 'word6'])
"""
self._topics = topics
self._workers = worker
self._model = None
self._common_dictionary = None
if pretrained_model and common_dictionary:
self._model = LdaModel.load(pretrained_model)
self._common_dictionary = Dictionary.load(dictionary)
示例2: run
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def run(self):
if self.clean_level in ('raw','clean','stopwords'):
kind = self.clean_level
else:
kind = 'stopwords'
if not os.path.exists(self.res_dir):
print 'Creando carpeta para resultados...'
os.mkdir(self.res_dir)
# Aplicar cada modelo
for idioma, modelos in self.input()['lda']['langs'].iteritems():
corp_path = self.input()['corp']['langs'][idioma].path
corpus = corpora.MmCorpus(corp_path)
for n_topics, modelo in modelos.iteritems():
model_path = modelo.path
model = LdaModel.load(model_path)
classification = []
for doc in corpus:
topic = model.get_document_topics(doc)
classification.append(topic)
print '--------------------------------------'
print 'USER INFO: Clasificando textos en %s con nivel de limpieza "%s" con %d tópicos' % (idioma, kind, n_topics)
model.print_topics(len(corpus),5)
with self.output()['langs'][idioma][n_topics]['doc_topics'].open('w') as f:
pickle.dump(classification, f)
with self.output()['langs'][idioma][n_topics]['topics'].open('w') as f:
pickle.dump(model.print_topics(n_topics,5), f) # el 5 es un parámetro que se puede editar (numero de palabras del tópico a mostrar)
示例3: make_clouds
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def make_clouds(files, n_words=20):
# set locations
base_model_name = os.path.splitext(os.path.basename(files.model))[0]
output_d = '../browser/clouds/' + base_model_name + '/'
if not os.path.exists(output_d):
os.makedirs(output_d)
# create wordcloud generator
wc = WordCloud(width=1000, height=500, background_color='white')
print('Loading model')
model = LdaModel.load(files.model)
beta = model.expElogbeta
print('Normalizing by topics, and by words')
pTW = normalize(beta, axis=0)
pWT = normalize(beta, axis=1)
# load bug<->id map, then invert to id<-> bug
bug_to_id = json.loads(open(files.replacements).read())
id_to_bug = {v: k for k, v in bug_to_id.items() if "." not in k}
for i in range(len(beta)):
# compute RAR
t_rar = np.sqrt(pTW[i] * pWT[i])
top_word_ids = t_rar.argsort()[:-1 - n_words:-1]
top_words = [model.id2word.id2token[wordid] for wordid in top_word_ids]
top_words = [id_to_bug[word] if word in id_to_bug else word for word in top_words]
wc.fit_words(zip(top_words, t_rar[top_word_ids]))
wc.to_file(output_d + str(i) + '.png')
示例4: __init__
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def __init__(self, fnames, model=None, corpus=None, dictionary=None):
"""`fnames` is an array of files for [lda_model, distribution]"""
self.reviews = open('data/electronics_topics_in.txt').readlines()
print "Loding topic model..."
if model is not None:
print "Using argument model"
self.lda = model
else:
self.lda = LdaModel.load(fnames[0])
if corpus is not None:
print "Using argument corpus and dictionary"
self.corpus = corpus
self.dictionary = dictionary
else:
print "Loading corpus and dictionary from file"
self.corpus = load("data/models/electronics_tfidf_corpus.pkl")
self.dictionary = load("data/models/electronics_dict.pkl")
print "Loading review-topic distribution..."
self.review_dist = [l for l in self.lda[self.corpus]]
tmp = lambda dist: sorted(dist, key=lambda arr: arr[1], reverse=True)
self.review_dist = map(lambda dist: tmp(dist), self.review_dist)
print "processing topics"
tmp = map(lambda t: re.sub("(\d*\.\d*\*)", "", t), self.lda.show_topics(-1))
self.topics = map(lambda ts: re.sub("\\s\+", ",", ts), tmp)
示例5: __init__
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def __init__(self, ac):
with open('../TextMining/Topic/data.loc','rb') as f:
load(f)
self.data = load(f)
with open('../TextMining/Topic/translator.loc','rb') as f:
self.translator = load(f)
self.index = similarities.MatrixSimilarity.load('../TextMining/Topic/index.loc')
self.lda = LdaModel.load('../TextMining/Topic/lda.loc')
self.dictionary = Dictionary().load("../TextMining/Topic/dic.loc")
self.ac_terms = ac
示例6: __init__
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def __init__(self, jobdesc_fname, jobtitle_fname):
self.es = Elasticsearch([{'host': app.config['ES_HOST'], 'port': 9200, 'timeout': 120}])
self.model = LdaModel.load(app.config['RCMDR_LDA_MODEL'])
self.job_labels = {
int(k):v
for k, v in (line.split("=") for line in open(app.config['RCMDR_JOB_LABELS'])
.read().strip().split('\n'))
}
self.jobdesc_fname = jobdesc_fname
self.jobtitle_fname = jobtitle_fname
示例7: analyze
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def analyze(self, docs):
# load dictionary and model
self.dictionary = Dictionary.load(self.getModelFilePath("common.dictionary.file"))
self.ldaModel = LdaModel.load(self.getModelFilePath("common.model.file"))
# Converting list of documents (corpus) into Document Term Matrix using dictionary prepared above.
docTermMatrix = [self.dictionary.doc2bow(doc) for doc in docs]
docTopicDistr = self.getDocumentTopics(docTermMatrix)
return docTopicDistr
示例8: __init__
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def __init__(self):
self.dictionary = Dictionary.load(app.config["RCMDR_DICT"])
self.corpus = corpora.MmCorpus(app.config["RCMDR_CORPUS"])
self.tfidf = TfidfModel.load(app.config["RCMDR_TFIDF_MODEL"])
self.lda_model = LdaModel.load(app.config["RCMDR_LDA_MODEL"])
self.lsi_model = LsiModel.load(app.config["RCMDR_LSI_MODEL"])
self.lda_index = Similarity.load(app.config["RCMDR_LDA_INDEX"])
self.lsi_index = Similarity.load(app.config["RCMDR_LSI_INDEX"])
self.job_labels = {
int(k): v
for k, v in (line.split("=") for line in open(app.config["RCMDR_JOB_LABELS"]).read().strip().split("\n"))
}
示例9: AuthorTopicStd
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def AuthorTopicStd():
import nltk
from gensim import corpora
from gensim import matutils
from gensim.models.ldamodel import LdaModel
from nltk.corpus import stopwords
from unidecode import unidecode
TOPIC_FILE = './lda_topic.dump'
LDA_FILE = './result.lda'
DICTIONARY_FILE = './keywords.dict'
with open(TOPIC_FILE, 'rb') as f:
num_topics, topic_result = serializer.load(f)
lda = LdaModel.load(LDA_FILE)
dictionary = corpora.Dictionary.load(DICTIONARY_FILE)
tokenizer = nltk.tokenize.RegexpTokenizer(r'[\w]{2,}')
stopwords_set = set(stopwords.words())
my_topic_cache_by_aid = [None, None]
def calculator(aid, pid):
if my_topic_cache_by_aid[0] == aid:
my_topic = my_topic_cache_by_aid[1]
else:
my_keywords = []
for ipid, iaid in paper_authors.get_by_aid(aid):
paper = papers.get(ipid)
if paper is None:
continue
keywords = tokenizer.tokenize(unidecode(paper[Papers.IDX_TITLE]).lower())
if not keywords:
continue
my_keywords.extend(keywords)
my_keywords = list(filter(lambda s: s not in stopwords_set, my_keywords))
if not my_keywords:
return np.nan
my_topic = lda[dictionary.doc2bow(my_keywords)]
my_topic_cache_by_aid[0] = aid
my_topic_cache_by_aid[1] = my_topic
my_topic_array = matutils.sparse2full(my_topic, num_topics)
return np.std(my_topic_array)
return calculator
示例10: getLdaModel
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def getLdaModel(bow_corpus, dictionary, useSavedTill):
if useSavedTill >= USESAVED.lda_model:
common_logger.info("loading LDA model from file")
return LdaModel.load(file_lda_model)
else:
common_logger.info("Training LDA model")
num_topics = int(math.log(len(bow_corpus)) + 1) # assumption:
lda_model = LdaModel(bow_corpus, num_topics=num_topics, id2word=dictionary, passes=numPasses)
common_logger.info("Saving LDA model")
lda_model.save(file_lda_model)
common_logger.info("Done creating LDA model")
return lda_model
示例11: update
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def update(self, docs):
# load dictionary and model
self.dictionary = Dictionary.load(self.getModelFilePath("common.dictionary.file"))
self.ldaModel = LdaModel.load(self.getModelFilePath("common.model.file"))
# Converting list of documents (corpus) into Document Term Matrix using dictionary prepared above.
docTermMatrix = [self.dictionary.doc2bow(doc) for doc in docs]
numPass = self.config.getIntConfig("train.num.pass")[0]
self.ldaModel.update(docTermMatrix, passes=numPasses)
docTopicDistr = self.getDocumentTopics(docTermMatrix)
return docTopicDistr
示例12: fetch_model
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def fetch_model(dictionary):
print "Fetching LDA Model... ",
try:
lda = LdaModel.load('Topic/lda.tm')
print "LDA Model loaded!"
except IOError:
print "Model not found, building LDA..."
corpus=MyCorpus()
#lda = LdaModel(corpus,num_topics=50,update_every=1,chunksize=1000,passes=15)
lda = LdaModel(corpus,num_topics=50,id2word=dictionary,update_every=1,chunksize=1000,passes=50)
print "LDA Built!"
lda.save('Topic/lda.tm')
return lda
示例13: main
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def main():
logformat = '%(asctime)s %(name)-12s: %(message)s'
logging.basicConfig(level=logging.DEBUG, format=logformat)
kera = NOB_kera()
es = Elasticsearch(port=9201)
mod = LdaModel.load(modelfile)
vocab = Dictionary.load(vocabulary)
tfidf = TfidfModel(dictionary=vocab)
results = []
for (topics, topicid) in get_doc_topics(mod, mod.num_topics, num_words_from_topic, vocab, tfidf):
res = es.search(index='wiki4', body={"query": {"match": {"_all": topics}}}, size=num_results_from_es)
results.append({'topics': topics, 'result': res, 'topicid': topicid})
results = add_keywords(results, kera)
df = pd.DataFrame(results)
df.to_csv('nowiki_4_with_kera_250_topics.csv', encoding='utf-8')
示例14: SNAP_ldaTopicsForTopic
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def SNAP_ldaTopicsForTopic(self, topic, numTopics = 10):
if numTopics not in [5, 10, 20, 30]:
print("[ERROR] Invalid numTopics")
return
inPath = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
'snap_data',
"gensim_snap_lda_%s_%d" % (topic, numTopics)
)
lda = LdaModel.load(inPath)
return lda.print_topics(numTopics)
##################
#
##################
示例15: get_lda_model
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def get_lda_model(num_topics):
file_name = None
if num_topics == 10:
file_name = LDA_FILE_10
elif num_topics == 30:
file_name = LDA_FILE_30
elif num_topics == 60:
file_name = LDA_FILE_60
elif num_topics == 120:
file_name = LDA_FILE_120
else:
raise ValueError("bad number of topics")
return LdaModel.load(file_name)