当前位置: 首页>>代码示例>>Python>>正文


Python LdaModel.inference方法代码示例

本文整理汇总了Python中gensim.models.ldamodel.LdaModel.inference方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.inference方法的具体用法?Python LdaModel.inference怎么用?Python LdaModel.inference使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.models.ldamodel.LdaModel的用法示例。


在下文中一共展示了LdaModel.inference方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import inference [as 别名]

#.........这里部分代码省略.........
			topic_dist = topic_dist / topic_dist.sum()

			#=====[ Step 2: fill topic_dist_dict with strings appropriately	]=====
			topic_dist_dict = {self.lda_model.id2word[i]:topic_dist[i] for i in range(len(topic_dist))}

			#=====[ Step 3: add to list of dicts	]=====
			topic_dists.append(topic_dist_dict) 

		return topic_dists


	def train_lda (self, corpus, dictionary):
		"""
			PRIVATE: train_lda
			------------------
			given a corpus and a dictionary, this fits parameters for self.lda_model, 
			fills self.lda_model_topics with the 
		"""
		self.lda_model = LdaModel(corpus, id2word=dictionary, num_topics=self.num_topics_lda)
		self.lda_model_topics = self.find_per_topic_word_distributions ()


	def get_lda_vec (self, word_list):
		"""
			PRIVATE: get_lda_vec
			--------------------
			given a list of words, returns an lda vector characterizing 
			it
		"""
		#=====[ Step 1: convert to gensim bag of words	]=====
		gensim_bow = self.lda_model.id2word.doc2bow(word_list)

		#=====[ Step 2: get and return lda vector	]=====
		gamma, sstats = self.lda_model.inference([gensim_bow])
		normalized_gamma = gamma[0] / sum(gamma[0])
		return normalized_gamma


	def apply_lda (self, df, target_col):
		"""
			PUBLIC: apply_lda
			-----------------
			given a dataframe and a target column, this will run LDA 
			on it, add a column to df, and return it.
		"""
		colname_lda = self.get_colname_lda (target_col)
		df[colname_lda] = df[target_col].apply (self.get_lda_vec)
		return df


	def print_lda_topics (self, words_per_topic=30):
		"""
			PUBLIC: print_lda_topics
			------------------------
			prints out self.lda_model_topics in an intuitive fashion
		"""
		#=====[ Step 1: ensure necessary conditions	]=====
		if not self.lda_model_topics:
			print_error ("print_lda_topics", "you have not found lda topics yet")

		#=====[ Step 2: iterate through topics, print constituent words	]=====
		for index, topic in enumerate(self.lda_model_topics):			
			print_header ("TOPIC: #" + str(index))

			sorted_words = sorted(topic.items(), reverse=True, key=lambda x: x[1])
			for word, weight in sorted_words[:words_per_topic]:
开发者ID:jayhack,项目名称:SpotOn,代码行数:70,代码来源:SemanticAnalysis.py

示例2: CaptionCorpus

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import inference [as 别名]
class CaptionCorpus(object): 
	""" Class to manipulate the corpus of captions """

	def __init__(self, captions_dict, stop="None"):
		self.captions = captions_dict
		self.documents = []
		for captions in self.captions.values():
			self.documents += captions
		self.stop_words = []
		if stop == "English":
			self.stop_words = stopwords.words('english') + [".", ","]
			self.documents = [self._stop_document(document) \
				for document in self.documents]
		self.dictionary = corpora.Dictionary(self.documents)

	def _stop_document(self, document):
		return [word for word in document if word not in \
			self.stop_words]

	def bow_corpus(self):
		""" Return the bag of words corpus """
		return [self.dictionary.doc2bow(document) for \
			document in self.documents]

	def ldafy(self, num_topics): 
		self.lda = LdaModel(self.bow_corpus(), num_topics=num_topics, \
			id2word=self.dictionary)

	def word2vecfy(self, size, min_count):
		self.w2v = Word2Vec(self.documents, size=size, \
			min_count=min_count)

	def _lda_vector(self, document):
		document = self._stop_document(document)
		document = self.dictionary.doc2bow(document)
		vector = self.lda.inference([document])[0][0]
		return vector

	def lda_corpus(self):
		lda_dict = {}
		for name in self.captions:
			captions = self.captions[name]
			vector = []
			for caption in captions:
				lda_caption = self._lda_vector(caption)
				vector.append(lda_caption)
			lda_dict[name] = np.mean(vector, axis=0)
		return lda_dict

	def w2v_pretrained(self, path_to_model):
		self.w2v = Word2Vec.load_word2vec_format(\
			path_to_model, binary=True)

	def w2v_corpus(self):
		w2v_dict = {}
		for name in self.captions:
			captions = self.captions[name]
			for caption in captions:
				w2v_caption = self._w2v_document(caption)
				if not name in w2v_dict:
					w2v_dict[name] = w2v_caption
				else:
					w2v_dict[name] = w2v_dict[name] + w2v_caption
		return w2v_dict

	def lda_distance(self, document1, document2):
		vector1 = self._lda_vector(document1)
		vector2 = self._lda_vector(document2)
		vector1 = vector1/np.linalg.norm(vector1)
		vector2 = vector2/np.linalg.norm(vector2)
		return np.linalg.norm(vector1-vector2)

	def _w2v_document(self, document):
		document = self._stop_document(document)
		vectors = []
		for word in document:
			try:
				vectors.append(self.w2v[word])
			except:
				pass
		return np.mean(vectors, 0)

	def w2v_distance(self, document1, document2):
		vector1 = self._w2v_document(document1)
		vector2 = self._w2v_document(document2)
		return np.linalg.norm(vector1-vector2)

	def image_features(self, method="lda"): 
		img_features = {}
		for name in self.captions: 
			captions = self.captions[name]
			if method=="lda":
				vectors = [self._lda_vector(doc) \
							for doc in captions]
			else:
				vectors = [self._w2v_document(doc) \
							for doc in captions]
			img_features[name] = np.mean(vectors, 0)
		return img_features

#.........这里部分代码省略.........
开发者ID:eqperes,项目名称:mvaproject,代码行数:103,代码来源:gensim_corpus.py

示例3: __init__

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import inference [as 别名]

#.........这里部分代码省略.........

	####################################################################################################
	######################[ --- USING TFIDF --- ]#######################################################
	####################################################################################################

	def add_tfidf_column (self, df):
		"""
			PRIVATE: add_tfidf_column
			-------------------------
			params: df - dataframe containing activities
			returns: df containing 'tfidf_vec' column 
		"""
		def get_tfidf (word_list):
			return self.tfidf_model[self.dictionary.doc2bow(word_list)]
		df['tfidf_col'] = df['lda_doc'].apply (get_tfidf)
		return df


	####################################################################################################
	######################[ --- USING LDA --- ]#########################################################
	####################################################################################################

	def get_lda_vec (self, word_list):
		"""
			PRIVATE: get_lda_vec
			--------------------
			given a list of words, returns an lda vector characterizing 
			it
		"""
		#=====[ Step 1: convert to gensim bag of words	]=====
		gensim_bow = self.lda_model.id2word.doc2bow(word_list)

		#=====[ Step 2: get and return lda vector	]=====
		gamma, sstats = self.lda_model.inference([gensim_bow])
		normalized_gamma = gamma[0] / sum(gamma[0])
		return normalized_gamma


	def add_lda_doc_column (self, df):
		"""
			PRIVATE: add_lda_doc_column
			---------------------------
			adds a column to df, 'lda_doc', that contains 
			the document to be used for the given row
		"""
		df['lda_doc'] = df['name']*5 + df['words']
		return df


	def add_lda_vec_column (self, df):
		"""
			PUBLIC: add_lda_vec_column
			--------------------------
			given a dataframe, this will add an lda column
		"""	
		#=====[ Step 1: get the documents	]=====
		df = self.add_lda_doc_column (df)

		#=====[ Step 2: apply LDA to each	]=====
		df['lda_vec'] = df['lda_doc'].apply (self.get_lda_vec)
		return df


	def get_user_lda_doc (self, user_df):
		"""
			PUBLIC: get_user_doc
开发者ID:205Consulting,项目名称:SpotOn,代码行数:70,代码来源:SemanticAnalysis.py


注:本文中的gensim.models.ldamodel.LdaModel.inference方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。