本文整理汇总了Python中gensim.matutils.corpus2dense方法的典型用法代码示例。如果您正苦于以下问题:Python matutils.corpus2dense方法的具体用法?Python matutils.corpus2dense怎么用?Python matutils.corpus2dense使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.matutils
的用法示例。
在下文中一共展示了matutils.corpus2dense方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testTransform
# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import corpus2dense [as 别名]
def testTransform(self):
"""Test lsi[vector] transformation."""
# create the transformation model
model = lsimodel.LsiModel(self.corpus, num_topics=2)
# make sure the decomposition is enough accurate
u, s, vt = scipy.linalg.svd(matutils.corpus2dense(self.corpus, self.corpus.num_terms), full_matrices=False)
self.assertTrue(numpy.allclose(s[:2], model.projection.s)) # singular values must match
# transform one document
doc = list(self.corpus)[0]
transformed = model[doc]
vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests
expected = numpy.array([-0.6594664, 0.142115444]) # scaled LSI version
# expected = numpy.array([-0.1973928, 0.05591352]) # non-scaled LSI version
self.assertTrue(numpy.allclose(abs(vec), abs(expected))) # transformed entries must be equal up to sign
示例2: to_features
# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import corpus2dense [as 别名]
def to_features(dictionary, words):
tmp = dictionary.doc2bow(words)
dense = list(matutils.corpus2dense([tmp], num_terms=len(dictionary)).T[0])
return dense
示例3: __getitem__
# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import corpus2dense [as 别名]
def __getitem__(self, bow, scaled=False, chunksize=512):
"""
Return latent representation, as a list of (topic_id, topic_value) 2-tuples.
This is done by folding input document into the latent topic space.
"""
assert self.projection.u is not None, "decomposition not initialized yet"
# if the input vector is in fact a corpus, return a transformed corpus as a result
is_corpus, bow = utils.is_corpus(bow)
if is_corpus and chunksize:
# by default, transform `chunksize` documents at once, when called as `lsi[corpus]`.
# this chunking is completely transparent to the user, but it speeds
# up internal computations (one mat * mat multiplication, instead of
# `chunksize` smaller mat * vec multiplications).
return self._apply(bow, chunksize=chunksize)
if not is_corpus:
bow = [bow]
# convert input to scipy.sparse CSC, then do "sparse * dense = dense" multiplication
vec = matutils.corpus2csc(bow, num_terms=self.num_terms, dtype=self.projection.u.dtype)
topic_dist = (vec.T * self.projection.u[:, :self.num_topics]).T # (x^T * u).T = u^-1 * x
# # convert input to dense, then do dense * dense multiplication
# # ± same performance as above (BLAS dense * dense is better optimized than scipy.sparse), but consumes more memory
# vec = matutils.corpus2dense(bow, num_terms=self.num_terms, num_docs=len(bow))
# topic_dist = numpy.dot(self.projection.u[:, :self.num_topics].T, vec)
# # use numpy's advanced indexing to simulate sparse * dense
# # ± same speed again
# u = self.projection.u[:, :self.num_topics]
# topic_dist = numpy.empty((u.shape[1], len(bow)), dtype=u.dtype)
# for vecno, vec in enumerate(bow):
# indices, data = zip(*vec) if vec else ([], [])
# topic_dist[:, vecno] = numpy.dot(u.take(indices, axis=0).T, numpy.array(data, dtype=u.dtype))
if scaled:
topic_dist = (1.0 / self.projection.s[:self.num_topics]) * topic_dist # s^-1 * u^-1 * x
# convert a numpy array to gensim sparse vector = tuples of (feature_id, feature_weight),
# with no zero weights.
if not is_corpus:
# lsi[single_document]
result = matutils.full2sparse(topic_dist.flat)
else:
# lsi[chunk of documents]
result = matutils.Dense2Corpus(topic_dist)
return result