本文整理汇总了Python中gensim.models.LdaModel.load方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.load方法的具体用法?Python LdaModel.load怎么用?Python LdaModel.load使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models.LdaModel
的用法示例。
在下文中一共展示了LdaModel.load方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: create_lda_model
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import load [as 别名]
def create_lda_model(project, corpus, id2word, name, use_level=True, force=False):
model_fname = project.full_path + name + str(project.num_topics)
if use_level:
model_fname += project.level
model_fname += '.lda.gz'
if not os.path.exists(model_fname) or force:
if corpus:
update_every=None # run in batch if we have a pre-supplied corpus
else:
update_every=1
model = LdaModel(corpus=corpus,
id2word=id2word,
alpha=project.alpha,
eta=project.eta,
passes=project.passes,
num_topics=project.num_topics,
iterations=project.iterations,
eval_every=None, # disable perplexity tests for speed
update_every=update_every,
)
if corpus:
model.save(model_fname)
else:
model = LdaModel.load(model_fname)
return model, model_fname
示例2: write_topics
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import load [as 别名]
def write_topics(model_path, csv_name, k):
model = LdaModel.load(model_path)
topics = []
for topic_id in range(model.num_topics):
topics.append(model.return_topic(topicid=topic_id))
dictionary = Dictionary.load('data/dictionary/tweets.dict')
word_indices = dictionary.id2token
writer = csv.writer(file(csv_name, 'w'))
output = [[0 for i in range(model.num_topics)] for j in range(k)]
for topic_id, topic in enumerate(topics):
for rank, index in enumerate(topic.argsort()[::-1]):
output[rank][topic_id] = {}
output[rank][topic_id]['word'] = word_indices[index]
output[rank][topic_id]['p'] = topic[index]
rank += 1
if rank >= k:
break
for topic_id in range(model.num_topics):
row = ['z = ' + str(topic_id)]
for rank in range(k):
row.append(output[rank][topic_id]['word'] + ':' + str(output[rank][topic_id]['p']))
writer.writerow(row)
示例3: load
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import load [as 别名]
def load(self):
'''读取 lda 模型和 dic 词典.
'''
lda_file = config.get('dmp', 'lda_file')
dic_file = config.get('dmp', 'dic_file')
self.lda = LdaModel.load(lda_file)
self.dic = Dictionary.load(dic_file)
示例4: __init__
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import load [as 别名]
def __init__(self, destination, fileName, modelName='', ldaPasses='', topicNum=''):
'''
Constructor
'''
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
self.__destination = destination
self.__fileName = fileName
self.__modelName = modelName
self.__ldaPasses = ldaPasses
self.__topicNum = topicNum
#=======================================================================
# STOP WORDS AND CAHRACTERS
#=======================================================================
self.__stopwords = stopwords.words('english')# + string.punctuation
self.__chars_to_remove = [u'[', u']', u'(', u')', u'*', u'%', u'{', u'}', u'\n', u'\n\n', u'\t', u';',u'/',u'^',u'--',u'\\',u'+',u'-',u'.',u'?',u'&',u'#',u'',u'']
self.__stopwords.extend(self.__chars_to_remove)
self.__stopwords.extend([item for item in string.punctuation])
#=======================================================================
# DATABASE
#=======================================================================
self.__db = connectMySQL(db='xpath', port=3366)
self.__queryResults = None
self.__cleanedCorpus = []
if modelName != '' and os.path.exists(self.__destination+modelName+'.lda'):
self.__ldaModel = LdaModel.load(self.__destination+modelName+'.lda', mmap='r')
if fileName != '' and os.path.exists(self.__destination+fileName+'.dict'):
self.__modelDict = corpora.Dictionary.load(self.__destination+fileName+'.dict')
示例5: create_evaluation_distinctiveness
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import load [as 别名]
def create_evaluation_distinctiveness(config, Kind):
model_fname = config.model_fname % Kind.__name__
try:
model = LdaModel.load(model_fname)
logger.info('Opened previously created model at file %s' % model_fname)
except:
error('Cannot evalutate LDA models not built yet!')
scores = utils.score(model, utils.kullback_leibler_divergence)
total = sum([x[1] for x in scores])
logger.info("%s model KL: %f" % (model_fname, total))
with open(config.path + 'evaluate-results.csv', 'a') as f:
w = csv.writer(f)
w.writerow([model_fname, total])
etas = list()
for topic in model.state.get_lambda():
topic_eta = list()
for p_w in topic:
topic_eta.append(p_w * numpy.log2(p_w))
etas.append(-sum(topic_eta))
entropy = sum(etas) / len(etas)
logger.info("%s model entropy mean: %f" % (model_fname, entropy))
with open(config.path + 'evaluate-entropy-results.csv', 'a') as f:
w = csv.writer(f)
w.writerow([model_fname, entropy])
示例6: calculateLDADistance
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import load [as 别名]
def calculateLDADistance(self, modelName='', topNSimilar='', topicList=''):
if modelName=='':
modelName=self.__fileName
if topNSimilar=='':
topNSimilar=5
write2file = self.__destination+modelName+"_results_LDA_similarTopics.csv"
resultsCSV = open(write2file, "wb")
print 'Reading model data'
gensimDict = corpora.Dictionary.load(self.__destination+self.__fileName+'.dict')
ldaModel = LdaModel.load(self.__destination+modelName+'.lda', mmap=None)
topics = ldaModel.show_topics(num_topics=ldaModel.num_topics, num_words=len(gensimDict),formatted=False)
#=======================================================================
# num_topics=ldaModel.num_topics
# num_words=len(gensimDict)
#=======================================================================
#=======================================================================
# GET SIMILARITY VECTORS
#=======================================================================
print 'Extractig vectors'
topicsSorted = [sorted(x, key=lambda x: x[1]) for x in topics]
vectors = []
for topic in topicsSorted:
vector = [item[0] for item in topic]
vectors.append(vector)
#=======================================================================
# CALCULATE SIMILARITIES BETWEEN TOPICS
#=======================================================================
print 'Calculating distances between LDA topics\n'
results = []
for topicListItem in topicList:
distances = []
for j in range (0, len(vectors)):
dist = euclidean(vectors[topicListItem], vectors[j])
#===============================================================
# print topicListItem, j, dist
#===============================================================
distances.append(dist)
results.append(distances)
#=======================================================================
# EXPORT TOP N SIMILAR TOPICS NAD PRINT OUT QUERY TERMS
#=======================================================================
print 'Writing found similar topics to file\n'
for resultItem in range(0,len(results)):
similarLDATopics = np.argsort(results[resultItem])[::-1]
for similarItem in similarLDATopics[:topNSimilar]:
#===============================================================
# print topicList[resultItem],similarItem
#===============================================================
resultsCSV.write(str(topicList[resultItem])+'; '+str(similarItem)+'; '+', '.join(x[1].lstrip().rstrip() for x in topics[similarItem][:100])+'\n\n')
resultsCSV.write('*******************************************\n\n')
示例7: evaluate_log
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import load [as 别名]
def evaluate_log(context, config):
logger.info('Evalutating models for: %s' % config.project.name)
model_fname = config.model_fname % ChangesetCorpus.__name__
changeset_fname = config.corpus_fname % ChangesetCorpus.__name__
commit_fname = config.corpus_fname % CommitLogCorpus.__name__
try:
commit_id2word = Dictionary.load(commit_fname + '.dict')
commit_corpus = MalletCorpus(commit_fname,
id2word=commit_id2word)
changeset_id2word = Dictionary.load(changeset_fname + '.dict')
changeset_corpus = MalletCorpus(changeset_fname,
id2word=changeset_id2word)
except:
error('Corpora not built yet -- cannot evaluate')
try:
model = LdaModel.load(model_fname)
logger.info('Opened previously created model at file %s' % model_fname)
except:
error('Cannot evalutate LDA models not built yet!')
changeset_doc_topic = get_doc_topic(changeset_corpus, model)
commit_doc_topic = get_doc_topic(commit_corpus, model)
first_shared = dict()
for id_ in commit_doc_topic:
i = 0
commit_topics = [topic[0] for topic in commit_doc_topic[id_]]
try:
changeset_topics = [topic[0] for topic in changeset_doc_topic[id_]]
except:
continue
maximum = 101
minimum = maximum
for i, topic in enumerate(commit_topics):
if topic in changeset_topics:
j = changeset_topics.index(topic)
minimum = min(minimum, max(i, j))
for i, topic in enumerate(changeset_topics):
if topic in commit_topics:
j = commit_topics.index(topic)
minimum = min(minimum, max(i, j))
first_shared[id_] = minimum
if minimum == maximum:
logger.info('No common topics found for %s' % str(id_))
del first_shared[id_]
mean = sum(first_shared.values()) / len(first_shared)
with open('data/evaluate-log-results.csv', 'a') as f:
w = csv.writer(f)
w.writerow([model_fname, mean] + list(first_shared.values()))
示例8: load_lda_model
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import load [as 别名]
def load_lda_model(lda_model_name=None, mallet=False):
if os.path.isfile(lda_model_name):
if mallet:
lda_model = LdaMallet.load(lda_model_name)
else:
lda_model = LdaModel.load(lda_model_name)
return lda_model
return None
示例9: __init__
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import load [as 别名]
def __init__(self):
cwd = os.path.dirname(__file__)
dictionary_path = os.path.abspath(os.path.join(cwd, 'models/dictionary.dict'))
lda_model_path = os.path.abspath(os.path.join(cwd, 'models/lda_model_10_topics.lda'))
self.dictionary = corpora.Dictionary.load(dictionary_path)
self.lda = LdaModel.load(lda_model_path)
示例10: analyzeLDA
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import load [as 别名]
def analyzeLDA(self, modelName='', numberOfTerms=''):
'''
modelName -> name of model to read in to memory without the extension
'''
if modelName=='':
modelName=self.__fileName
if numberOfTerms == '':
numberOfTerms=100
write2file = self.__destination+modelName+"_results_%s_SW.csv"%(numberOfTerms)
#=======================================================================
# allTopicsFile = self.__destination+modelName+"_results_AllTopics.csv"
#=======================================================================
resultsCSV = open(write2file, "wb")
model = LdaModel.load(self.__destination+modelName+'.lda', mmap=None)
#and another way, only prints top words
for t in range(0, model.num_topics-1):
#===================================================================
# print 'topic {}: '.format(t) + ', '.join([v[1] for v in model.show_topic(t, numberOfTerms)])
#===================================================================
topicSet = [v[1].lstrip().rstrip() for v in model.show_topic(t, numberOfTerms) if v[1] not in self.__stopwords]
listSet = set(topicSet)
for key in self.__queryWords:
difference = set(topicSet).intersection(self.__queryWords[key])
if len(difference) > 0:
self.__overlapingTopics[key][t]=topicSet
try:
for key in self.__overlapingTopics:
if self.__overlapingTopics[key]:
for topicKey in self.__overlapingTopics[key]:
topicTerms = [w.lstrip().rstrip() for w in self.__overlapingTopics[key][topicKey] if w not in self.__stopwords][:100]
#=======================================================
# topicTerms = [w.translate(None, ''.join(self.__chars_to_remove)) for w in topicTerms if w !='']
#=======================================================
resultsCSV.write(key+';'+str(topicKey)+';'+', '.join(topicTerms)+'\n\n')
print key,'\t',topicKey,'\t', topicTerms
resultsCSV.write('***************************************\n')
print '*************************\n'
write2fileJSON = self.__destination+modelName+"_results_%s_SW.json"%(numberOfTerms)
with open(write2fileJSON, 'w') as fp:
json.dump(self.__overlapingTopics, fp)
except KeyError as e:
print e
pass
resultsCSV.close()
示例11: analyzeUniqueLDA
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import load [as 别名]
def analyzeUniqueLDA(self, modelName='', numberOfTerms=''):
'''
modelName -> name of model to read in to memory without the extension
'''
if modelName=='':
modelName=self.__fileName
if numberOfTerms=='':
numberOfTerms=100
write2File = self.__destination+modelName+"_results_unique_%sTerms.csv"%(numberOfTerms)
resultsCSV = open(write2File, "wb")
model = LdaModel.load(self.__destination+modelName+'.lda', mmap=None)
#and another way, only prints top words
for t in range(0, model.num_topics-1):
#===================================================================
# print 'topic {}: '.format(t) + ', '.join([v[1] for v in model.show_topic(t, 500)])
#===================================================================
# raw_input('prompt')
topicSet = [v[1].lstrip().rstrip() for v in model.show_topic(t, numberOfTerms) if v[1] not in self.__stopwords]
#===================================================================
# print type(topicSet), topicSet
#===================================================================
listSet = set(topicSet)
#print listSet
#print type(topicSet), topicSet
for key in self.__queryWords:
#print self.__queryWords[key]
difference = set(topicSet).intersection(self.__queryWords[key])
if len(difference) > 0:
self.__overlapingTopics[key][t]=topicSet
try:
for key in self.__overlapingTopics:
uniqueQueryTerms = []
if self.__overlapingTopics[key]:
for topicKey in self.__overlapingTopics[key]:
topicTerms = [w for w in self.__overlapingTopics[key][topicKey] if w not in self.__stopwords]
uniqueQueryTerms.extend(topicTerms)
uniqueQueryTerms = [x for x in set(uniqueQueryTerms)]
resultsCSV.write(key+';'+str(topicKey)+';'+', '.join(uniqueQueryTerms)+'\n\n')
resultsCSV.write('***************************************\n')
print key, uniqueQueryTerms
print '*************************\n'
except KeyError as e:
print e
pass
resultsCSV.close()
示例12: get_keywords
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import load [as 别名]
def get_keywords(threshold=0.01, model_path='result/model.lda'):
lda_model = LdaModel.load(model_path)
topic_num = lda_model.num_topics
keywords = set()
for topic_id in range(topic_num):
topic = lda_model.state.get_lambda()[topic_id]
topic = topic / topic.sum() # normalize to probability dist
signif_word_ids = np.where(topic > threshold)[0]
keywords = keywords.union([lda_model.id2word[word_id] for word_id in signif_word_ids])
return keywords
示例13: __init__
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import load [as 别名]
def __init__(self):
# current_working_dir = '/home/etu/eason/nodejs/Semantic_Aware_RecSys'
current_working_dir = '.'
os.chdir(current_working_dir)
lda_model_path = "./LDAmodel/final_ldamodel"
self.lda = LdaModel.load(lda_model_path)
self.no_of_recommendation = 10
self.omit_topic_below_this_fraction = 0.1
self.mapping = self.__init_mapping()
self.linkMapping = self.__init_Link_mapping()
self.doc_topic_matrix = loadPickleFile('doc_topic_matrix')
示例14: getAllTopics
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import load [as 别名]
def getAllTopics(self, modelName='', numberOfTerms=100):
'''
modelName -> name of model to read in to memory without the extension
'''
returningData = {}
if modelName=='':
modelName=self.__fileName
model = LdaModel.load(self.__destination+modelName+'.lda', mmap=None)
return model.show_topics(num_topics=model.num_topics,num_words=numberOfTerms, formatted=False)
示例15: __init__
# 需要导入模块: from gensim.models import LdaModel [as 别名]
# 或者: from gensim.models.LdaModel import load [as 别名]
def __init__(self, categ, lda_num_topics):
"""
Initialize Predict class
"""
collection_name = '%s_corpus' % categ
dictionary_path = os.path.join(src_dir, 'models/dictionary_' + categ + '.dict')
lda_model_path = os.path.join(dst_dir, 'models/lda_model_' + str(lda_num_topics) +'_topics_' + categ + '.lda')
self.categ = categ
self.collection_name = collection_name
self.lda_num_topics = lda_num_topics
self.dictionary = corpora.Dictionary.load(dictionary_path)
self.lda = LdaModel.load(lda_model_path)
self.stopwords = stopwords.words('english')
self.lem = WordNetLemmatizer()
self.tokenizer = regexp.RegexpTokenizer("[\w’]+", flags=re.UNICODE)