本文整理汇总了Python中gensim.models.ldamodel.LdaModel.inference方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.inference方法的具体用法?Python LdaModel.inference怎么用?Python LdaModel.inference使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models.ldamodel.LdaModel
的用法示例。
在下文中一共展示了LdaModel.inference方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import inference [as 别名]
#.........这里部分代码省略.........
topic_dist = topic_dist / topic_dist.sum()
#=====[ Step 2: fill topic_dist_dict with strings appropriately ]=====
topic_dist_dict = {self.lda_model.id2word[i]:topic_dist[i] for i in range(len(topic_dist))}
#=====[ Step 3: add to list of dicts ]=====
topic_dists.append(topic_dist_dict)
return topic_dists
def train_lda (self, corpus, dictionary):
"""
PRIVATE: train_lda
------------------
given a corpus and a dictionary, this fits parameters for self.lda_model,
fills self.lda_model_topics with the
"""
self.lda_model = LdaModel(corpus, id2word=dictionary, num_topics=self.num_topics_lda)
self.lda_model_topics = self.find_per_topic_word_distributions ()
def get_lda_vec (self, word_list):
"""
PRIVATE: get_lda_vec
--------------------
given a list of words, returns an lda vector characterizing
it
"""
#=====[ Step 1: convert to gensim bag of words ]=====
gensim_bow = self.lda_model.id2word.doc2bow(word_list)
#=====[ Step 2: get and return lda vector ]=====
gamma, sstats = self.lda_model.inference([gensim_bow])
normalized_gamma = gamma[0] / sum(gamma[0])
return normalized_gamma
def apply_lda (self, df, target_col):
"""
PUBLIC: apply_lda
-----------------
given a dataframe and a target column, this will run LDA
on it, add a column to df, and return it.
"""
colname_lda = self.get_colname_lda (target_col)
df[colname_lda] = df[target_col].apply (self.get_lda_vec)
return df
def print_lda_topics (self, words_per_topic=30):
"""
PUBLIC: print_lda_topics
------------------------
prints out self.lda_model_topics in an intuitive fashion
"""
#=====[ Step 1: ensure necessary conditions ]=====
if not self.lda_model_topics:
print_error ("print_lda_topics", "you have not found lda topics yet")
#=====[ Step 2: iterate through topics, print constituent words ]=====
for index, topic in enumerate(self.lda_model_topics):
print_header ("TOPIC: #" + str(index))
sorted_words = sorted(topic.items(), reverse=True, key=lambda x: x[1])
for word, weight in sorted_words[:words_per_topic]:
示例2: CaptionCorpus
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import inference [as 别名]
class CaptionCorpus(object):
""" Class to manipulate the corpus of captions """
def __init__(self, captions_dict, stop="None"):
self.captions = captions_dict
self.documents = []
for captions in self.captions.values():
self.documents += captions
self.stop_words = []
if stop == "English":
self.stop_words = stopwords.words('english') + [".", ","]
self.documents = [self._stop_document(document) \
for document in self.documents]
self.dictionary = corpora.Dictionary(self.documents)
def _stop_document(self, document):
return [word for word in document if word not in \
self.stop_words]
def bow_corpus(self):
""" Return the bag of words corpus """
return [self.dictionary.doc2bow(document) for \
document in self.documents]
def ldafy(self, num_topics):
self.lda = LdaModel(self.bow_corpus(), num_topics=num_topics, \
id2word=self.dictionary)
def word2vecfy(self, size, min_count):
self.w2v = Word2Vec(self.documents, size=size, \
min_count=min_count)
def _lda_vector(self, document):
document = self._stop_document(document)
document = self.dictionary.doc2bow(document)
vector = self.lda.inference([document])[0][0]
return vector
def lda_corpus(self):
lda_dict = {}
for name in self.captions:
captions = self.captions[name]
vector = []
for caption in captions:
lda_caption = self._lda_vector(caption)
vector.append(lda_caption)
lda_dict[name] = np.mean(vector, axis=0)
return lda_dict
def w2v_pretrained(self, path_to_model):
self.w2v = Word2Vec.load_word2vec_format(\
path_to_model, binary=True)
def w2v_corpus(self):
w2v_dict = {}
for name in self.captions:
captions = self.captions[name]
for caption in captions:
w2v_caption = self._w2v_document(caption)
if not name in w2v_dict:
w2v_dict[name] = w2v_caption
else:
w2v_dict[name] = w2v_dict[name] + w2v_caption
return w2v_dict
def lda_distance(self, document1, document2):
vector1 = self._lda_vector(document1)
vector2 = self._lda_vector(document2)
vector1 = vector1/np.linalg.norm(vector1)
vector2 = vector2/np.linalg.norm(vector2)
return np.linalg.norm(vector1-vector2)
def _w2v_document(self, document):
document = self._stop_document(document)
vectors = []
for word in document:
try:
vectors.append(self.w2v[word])
except:
pass
return np.mean(vectors, 0)
def w2v_distance(self, document1, document2):
vector1 = self._w2v_document(document1)
vector2 = self._w2v_document(document2)
return np.linalg.norm(vector1-vector2)
def image_features(self, method="lda"):
img_features = {}
for name in self.captions:
captions = self.captions[name]
if method=="lda":
vectors = [self._lda_vector(doc) \
for doc in captions]
else:
vectors = [self._w2v_document(doc) \
for doc in captions]
img_features[name] = np.mean(vectors, 0)
return img_features
#.........这里部分代码省略.........
示例3: __init__
# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import inference [as 别名]
#.........这里部分代码省略.........
####################################################################################################
######################[ --- USING TFIDF --- ]#######################################################
####################################################################################################
def add_tfidf_column (self, df):
"""
PRIVATE: add_tfidf_column
-------------------------
params: df - dataframe containing activities
returns: df containing 'tfidf_vec' column
"""
def get_tfidf (word_list):
return self.tfidf_model[self.dictionary.doc2bow(word_list)]
df['tfidf_col'] = df['lda_doc'].apply (get_tfidf)
return df
####################################################################################################
######################[ --- USING LDA --- ]#########################################################
####################################################################################################
def get_lda_vec (self, word_list):
"""
PRIVATE: get_lda_vec
--------------------
given a list of words, returns an lda vector characterizing
it
"""
#=====[ Step 1: convert to gensim bag of words ]=====
gensim_bow = self.lda_model.id2word.doc2bow(word_list)
#=====[ Step 2: get and return lda vector ]=====
gamma, sstats = self.lda_model.inference([gensim_bow])
normalized_gamma = gamma[0] / sum(gamma[0])
return normalized_gamma
def add_lda_doc_column (self, df):
"""
PRIVATE: add_lda_doc_column
---------------------------
adds a column to df, 'lda_doc', that contains
the document to be used for the given row
"""
df['lda_doc'] = df['name']*5 + df['words']
return df
def add_lda_vec_column (self, df):
"""
PUBLIC: add_lda_vec_column
--------------------------
given a dataframe, this will add an lda column
"""
#=====[ Step 1: get the documents ]=====
df = self.add_lda_doc_column (df)
#=====[ Step 2: apply LDA to each ]=====
df['lda_vec'] = df['lda_doc'].apply (self.get_lda_vec)
return df
def get_user_lda_doc (self, user_df):
"""
PUBLIC: get_user_doc