本文整理汇总了Python中sparsesvd.sparsesvd函数的典型用法代码示例。如果您正苦于以下问题:Python sparsesvd函数的具体用法?Python sparsesvd怎么用?Python sparsesvd使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sparsesvd函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: pseudoinverse
def pseudoinverse(Mat, precision):
"""
Pseudoinverse computation.
Objective:
----------
To compute pseudoinverse using Singular Value Depcomposition
Reason:
-------
SVD using Scipy is slow and consumes a lot of memory, similarly
pysparse matrix consumes a lot of memory. This is a better
alternative to a direct computation of inverse.
Process:
--------
The function uses sparsesvd to compute the SVD of a sparse matrix,
there is a precision attached in the function, this controls the
cutting (or the k) of the SVD. Precision is actually a percentage
and uses this to get the k.
k = (Precision/100) * rows of the matrix.
The function takes a sparse matrix and a precision score as the input.
"""
matrix = Mat.tocsc()
if matrix.shape[0] <= matrix.shape[1]:
k = int((precision * matrix.shape[0]) / 100)
ut, s, vt = sparsesvd(matrix.tocsc(), k)
UT = ss.csr_matrix(ut)
SI = ss.csr_matrix(np.diag(1 / s))
VT = ss.csr_matrix(vt)
temp_matrix = spmatrixmul(VT.transpose(), SI)
pinv_matrix = spmatrixmul(temp_matrix, UT)
del ut, s, vt, UT, SI, VT, temp_matrix
else:
k = int((precision * matrix.transpose().shape[0]) / 100)
ut, s, vt = sparsesvd(matrix.transpose().tocsc(), k)
UT = ss.csr_matrix(ut)
SI = ss.csr_matrix(np.diag(1 / s))
VT = ss.csr_matrix(vt)
temp_matrix = spmatrixmul(UT.transpose(), SI)
pinv_matrix = spmatrixmul(temp_matrix, VT)
del ut, s, vt, UT, SI, VT, temp_matrix
return pinv_matrix.tocsr()
示例2: tune
def tune(my_corpus, dictionary, min_topics=2,max_topics=50,step=2):
def sym_kl(p,q):
return np.sum([scipy.stats.entropy(p,q),scipy.stats.entropy(q,p)])
kl = []
Hbar = []
perplexity = []
n_topics = []
l = np.array([sum(cnt for _, cnt in doc) for doc in my_corpus])
corpus = Index.get_corpus('train features')
for i in range(min_topics,max_topics,step):
n_topics.append(i)
lda = gensim.models.ldamodel.LdaModel(corpus=corpus, id2word=dictionary,num_topics=i, alpha = 'auto')
m1 = scipy.sparse.csc_matrix(lda.expElogbeta)
U,cm1,V = sparsesvd(m1, m1.shape[0])
#Document-topic matrix
lda_topics = lda[my_corpus]
m2 = gensim.matutils.corpus2dense(lda_topics, lda.num_topics).transpose()
cm2 = l.dot(m2)
cm2 = cm2 + 0.0001
cm2norm = np.linalg.norm(l)
cm2 = cm2/cm2norm
kl.append(sym_kl(cm1,cm2))
entropy_list = [scipy.stats.entropy([x[1] for x in lda[v]] ) for v in my_corpus]
Hbar.append(np.mean(entropy_list))
perplexity.append( lda.log_perplexity(my_corpus) )
print("NumTopics: %s | Unscaled Entropy: %s | Per-word-bound: %s | Per-word-perplexity: %s | Arun measure %s" % \
(i, Hbar[-1], perplexity[-1], np.exp2(-perplexity[-1]), kl[-1]))
return n_topics, Hbar, perplexity, kl
示例3: lsa
def lsa( ):
from sparsesvd import sparsesvd
from numpy import array
import scipy.sparse as sp
# calculate svd and perform lsa
print "######## READING TERM DOC MATRIX #########"
termDocEntries = pickle.load(open(outfile +"/tdm.p" ,"rb"))
id2title = pickle.load(open(outfile + "/id_file.p","rb"))
word2id = pickle.load(open(outfile + "/word_id.p","rb"))
fileCount = len(id2title)
#fileCount = 60000
vocab_size = len(word2id)
print "######## READING COMPLETE #########"
I = array([ i for ((i,j),v) in termDocEntries] )
J = array([ j for ((i,j),v) in termDocEntries] )
V = array([ v for ((i,j),v) in termDocEntries] )
shape = (fileCount, vocab_size)
print "Dimension of TDM is : ", shape
print "######## STARTING LSA #########"
termDocMatrix = sp.csc_matrix( (V,(I,J)), shape= (fileCount, vocab_size ), dtype=np.float32)
UT , S, V = sparsesvd(termDocMatrix, 300)
(m1,m2) = UT.T.shape
S1 = np.zeros((m2,m2), dtype=np.float32)
for i in range(m2):
S1[i][i] = S[i]
US = np.dot(UT.T, S1)
print m1, m2
(n1,n2) = V.shape
pickle.dump( US , open( outfile + "/u_sigma.p", "wb" ) )
pickle.dump( V.T , open( outfile + "/v.p", "wb" ) )
print "######## LSA COMPLETE #########"
示例4: matrixsvd
def matrixsvd(self):
svd_matrix = self.projection_matrix.tocsc()
if self.svd is 'scipy':
Utemp, Stemp, VTtemp = ssl.svds(svd_matrix.tocsc(),
k=(int(self.projection_matrix.tocsr().shape[0] *
self.precision) / 100))
UT = np.nan_to_num(Utemp.transpose())
S = np.nan_to_num(Stemp)
VT = np.nan_to_num(VTtemp)
elif self.svd is 'sparsesvd':
(UT, S, VT) = sparsesvd(svd_matrix, (int(svd_matrix.shape[0] * self.precision) / 100))
elif self.svd is 'fast':
Utemp, Stemp, VTtemp = fast_svd(svd_matrix,
(int(self.projection_matrix.tocsr().shape[0] *
self.precision) / 100))
UT = np.nan_to_num(Utemp.transpose())
S = np.nan_to_num(Stemp)
VT = np.nan_to_num(VTtemp)
else:
Utemp, Stemp, VTtemp = np.linalg.svd(svd_matrix.todense())
UT = np.nan_to_num(Utemp.transpose())
S = np.nan_to_num(Stemp)
VT = np.nan_to_num(VTtemp)
return UT, S, VT
示例5: generate_archetypes
def generate_archetypes(singer_resumes, archetype_count_k=20, cache_file=CACHE):
""" Generate and write to disk an archetype matrix given a population """
# Generate a unique, ordered, list of characters
characters = set() # Could optimized by using single comprehension
for singer_resume in singer_resumes:
characters.update(singer_resume)
characters = list(characters)
# Create a dict to lookup character index by id
character_positions = dict()
for i, character in enumerate(characters):
character_positions[character] = i
# Construct an empty matrix to populate
dimensions = len(singer_resumes), len(characters)
singer_matrix = scipy.sparse.lil_matrix(dimensions)
# Populate the matrix
for j, singer_resume in enumerate(singer_resumes):
for character in singer_resume:
position = character_positions[character]
singer_matrix[j, position] = True
# Convert matrix to a sparse matrix
sparse_singer_matrix = scipy.sparse.csc_matrix(singer_matrix)
# Do magic with maths
U, s, V = sparsesvd(sparse_singer_matrix, archetype_count_k)
archetypes = V
# Cache the data for later use
arrays = {CHARACTERS: character_positions, ARCHETYPES: archetypes}
np.savez(cache_file, **arrays)
示例6: __init__
def __init__(self, m, k, docs=None, use_svdlibc=False, power_iters=P2_EXTRA_ITERS, extra_dims=P2_EXTRA_DIMS):
"""
Construct the (U, S) projection from a corpus `docs`. The projection can
be later updated by merging it with another Projection via `self.merge()`.
This is the class taking care of the 'core math'; interfacing with corpora,
splitting large corpora into chunks and merging them etc. is done through
the higher-level `LsiModel` class.
"""
self.m, self.k = m, k
self.power_iters = power_iters
self.extra_dims = extra_dims
if docs is not None:
# base case decomposition: given a job `docs`, compute its decomposition,
# *in-core*.
if not use_svdlibc:
u, s = stochastic_svd(docs, k, chunksize=sys.maxsize,
num_terms=m, power_iters=self.power_iters,
extra_dims=self.extra_dims)
else:
try:
import sparsesvd
except ImportError:
raise ImportError("`sparsesvd` module requested but not found; run `easy_install sparsesvd`")
logger.info("computing sparse SVD of %s matrix" % str(docs.shape))
if not scipy.sparse.issparse(docs):
docs = matutils.corpus2csc(docs)
ut, s, vt = sparsesvd.sparsesvd(docs, k + 30) # ask for extra factors, because for some reason SVDLIBC sometimes returns fewer factors than requested
u = ut.T
del ut, vt
k = clip_spectrum(s**2, self.k)
self.u = u[:, :k].copy()
self.s = s[:k].copy()
else:
self.u, self.s = None, None
示例7: __init__
def __init__(self, m, k, docs = None):
"""
Store (U, S) projection itself. This is the class taking care of 'core math';
interfacing with corpora, training etc is done through class LsiModel.
`docs` is either a spare matrix or a corpus which, when converted to a
sparse matrix, must fit comfortably into main memory.
"""
self.m, self.k = m, k
if docs is not None:
# base case decomposition: given a job `docs`, compute its decomposition
# in core, algorithm 1
if utils.isCorpus(docs):
docs = matutils.corpus2csc(m, docs)
if m * k < 10000:
# SVDLIBC gives spurious results for small matrices.. run full
# LAPACK svd on them instead
docs = docs.todense()
logger.info("computing dense SVD of %s matrix" % str(docs.shape))
u, s, vt = numpy.linalg.svd(docs, full_matrices = False)
else:
try:
import sparsesvd
except ImportError:
raise ImportError("for LSA, the `sparsesvd` module is needed but not found; run `easy_install sparsesvd`")
logger.info("computing sparse SVD of %s matrix" % str(docs.shape))
ut, s, vt = sparsesvd.sparsesvd(docs, k + 30) # ask for extra factors, because for some reason SVDLIBC sometimes returns fewer factors than requested
u = ut.T
del ut
del vt
k = clipSpectrum(s, self.k)
self.u, self.s = u[:, :k], s[:k]
else:
self.u, self.s = None, None
示例8: LSA
def LSA(M,k): ##will return top k sentences
SM = scipy.sparse.csc_matrix(M) # convert to sparse CSC format
u, s, vt = sparsesvd(SM,k+10) #
##SVD calculated at this stage, concept matrix vt, from now we can apply various approaches
##to filter out top k sentences.
##We are using OzSoy's approach
##Using Cross Method
m,n=M.shape
Avg=numpy.average(M,1)
for i in range(0,m):
for j in range(0,n):
if M[i][j]<Avg[i]:
M[i][j]=0
Length=numpy.dot(s,vt)
L=[]
##returning top k sentences
for i in range(0,n):
L.append(tuple([Length[i],i]))
if k>=len(L):
return L
#building min heap
count= int(k/2-1)
while(count>=0):
L=heapify(L,count,k)
count-=1
for i in range(k,len(L)):
if L[0][0]<L[i][0]:
L[0]=L[i]
L=heapify(L,0,k)
return L[:k]
示例9: applySvd
def applySvd(self):
len_row = max(self.array_row) + 1
len_col = max(self.array_col) + 1
print "Applying SVD with ROW: " + str(len_row) + " and COL: " + str(len_col)
sparse_matrix = scipy.sparse.csc_matrix(
(self.array_data, (self.array_row, self.array_col)), shape=(len_row, len_col)
)
print "sparsed matrix"
Ut, Sigma, Vt = sparsesvd(sparse_matrix, self.svd_dimension)
print "U Sigma Vt done!"
sparse_matrix = array(0)
print "Mounting Matrix SVD"
self.svd_matrix = numpy.dot(Ut.T, numpy.dot(numpy.diag(Sigma), Vt))
print "Done!"
print Ut.T
print "\n"
print Sigma
print "\n"
print Vt
print "\n"
print self.svd_matrix.T
print "\n"
Ut = None
Sigma = None
Vt = None
示例10: main
def main():
en_vector=ENVector()
en_vector.read_freq("results/freq_en_fixed_pmi.txt")
#print "Reading Pair Co-occurence"
#en_vector.read_and_duplicate("results/pair_en_test.txt")
en_vector.read_pair_pmi("results/pair_en_fixed_pmi.txt")
en_vector.sort_by_freq()
#print "Generating Label"
en_vector.generate_label()
# print "Generating Matrix Label"
en_vector.generate_matrix_label()
#print "Calculating Vector Size"
en_vector.calculate_size()
matrix=sp.lil_matrix((limit,limit))
for i in range(min(limit,len(en_vector.matrix_label))):
for j in range((len(en_vector.matrix_label[i]))):
if en_vector.matrix_label[i][j]>=limit:
continue
word1=en_vector.word_list[i]
word2=en_vector.word_list[en_vector.matrix_label[i][j]]
matrix[i,en_vector.matrix_label[i][j]]=en_vector.pair_count[(word1,word2)]
smat=sp.csc_matrix(matrix)
ut,s,vt=sparsesvd(smat,10)
for i in range(limit):
for j in range(10):
print (ut[j][i]*s[j]),
print
示例11: arun
def arun(corpus, dictionary, min_topics=10, max_topics=21, step=5):
print "Arun runing"
output = []
for i in range(min_topics, max_topics, step):
lda = LDA(dictionary, corpus, i, "lda20/lda_training_" + str(i))
print "Модель построена/загружена"
m1 = lda.expElogbeta
# U, cm1, V = np.linalg.svd(m1)
smat = scipy.sparse.csc_matrix(m1) # convert to sparse CSC format
U, cm1, V = sparsesvd(smat, i + 30) # do SVD, asking for 100 factors
print "sparsesvd сделано"
#Document-topic matrix
lda_topics = lda[my_corpus]
m2 = matutils.corpus2dense(lda_topics, lda.num_topics).transpose()
cm2 = l.dot(m2)
cm2 = cm2 + 0.0001
print "cm2norm begin"
cm2norm = np.linalg.norm(l)
print "cm2norm end"
cm2 = cm2/cm2norm
print len(cm1), len(cm2)
kl = sym_kl(cm1, cm2)
output.append((i, kl))
print i, kl
print output
return output
示例12: learn
def learn(mat):
print "Starting learning process..."
start_time = time.time()
user_mat, axis_weights, movie_mat = sparsesvd(mat, NUM_COMPONENTS)
print "Matrix decomposition complete (elapsed time: %f s)." % (time.time() - start_time)
print "Learning process complete."
return (user_mat, axis_weights, movie_mat)
示例13: test_svd_matrix
def test_svd_matrix(W, WT, D, DT):
Winv = ss.csr_matrix(np.linalg.pinv(W.todense()))
WTinv = ss.csr_matrix(np.linalg.pinv(W.transpose().todense()))
# A = np.dot(np.dot(Winv, D), WTinv)
A = ((Winv * D) * WTinv)
A = A.tocsc()
res_dict = {}
old_z = 0
for k in range(270, 280):
(ut, s, vt) = sparsesvd(A, k)
U = ss.csr_matrix(ut.T)
S = ss.csr_matrix(np.diag(s))
V = ss.csr_matrix(vt)
L = (W * U) * (S * V * WT.transpose())
z = U.shape[1]
if z == old_z:
break
else:
Res = fnorm(L, DT)
res_dict[z] = Res
Result = OrderedDict(sorted(res_dict.items(),
key=lambda t: np.float64(t[1])))
old_z = z
return Result
示例14: generate_model
def generate_model(in_path, title_limit, user_limit, features, out_path):
# connect to db
db = pg.connect(in_path)
# load scores
scores = load_scores(db)
db.close()
print "Loaded scores"
# filter insignificant titles/users, second filtering to remove empty cols/rows
(mat, old_ids_1) = filter_too_small(scores, title_limit, user_limit)
(mat, old_ids_2) = filter_too_small(mat.tocsc(), 1, 1)
print "Filtered insignificant titles and users"
# matrix is in csr format, calc row nnz averages and convert to csc
averages = map(lambda x: row_nnz_average(mat,x), range(0, mat.shape[0]))
mat = mat.tocsc()
# build compact titleid translation tables
old_ids = join_old_id_dicts(old_ids_1, old_ids_2)
(title_to_document, document_to_tile) = build_title_mapping(old_ids, mat.shape[0])
# run svd
print "Built additional data"
(ut, s, vt) = sparsesvd(mat.tocsc(), features)
print "Factorization finished"
s_sqrt = numpy.diag(numpy.sqrt(s))
s_inv = numpy.diag(numpy.power(s,-1))
terms = ut.transpose().dot(s_sqrt)
documents = s_sqrt.dot(s_inv).dot(ut)
# dump results
savemat(out_path, {"Terms": terms, "Documents": documents, "Averages": averages, "TitleMapping": title_to_document, "DocumentMapping" : document_to_tile}, oned_as='row')
print "Saved generated results"
示例15: load
def load(self, dirname, svd_k = 0):
"""
Load the embedding and optionally perform SVD
on load. If svd_k is set to 0, no SVD is performed.
"""
self.dirname = dirname
try:
self.emb.x, self.emb.y = load_svmlight_file(dirname + EMBEDDING_FILENAME)
except (ValueError, IOError):
return None
if svd_k != 0:
try:
import sparsesvd
import scipy.sparse
X = self.emb.x.T
X = scipy.sparse.csc_matrix(X)
Ut, S, Vt = sparsesvd.sparsesvd(X, svd_k)
self.emb.x = scipy.sparse.csr_matrix(Vt.T)
except ImportError:
print('Warning: Cannot perform SVD without sparsesvd module')
self._loadFeatureTable()
self._loadTOC()
return self.emb