当前位置: 首页>>代码示例>>Python>>正文


Python sparsesvd.sparsesvd函数代码示例

本文整理汇总了Python中sparsesvd.sparsesvd函数的典型用法代码示例。如果您正苦于以下问题:Python sparsesvd函数的具体用法?Python sparsesvd怎么用?Python sparsesvd使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了sparsesvd函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: pseudoinverse

def pseudoinverse(Mat, precision):
    """
    Pseudoinverse computation.

    Objective:
    ----------
    To compute pseudoinverse using Singular Value Depcomposition

    Reason:
    -------
    SVD using Scipy is slow and consumes a lot of memory, similarly
    pysparse matrix consumes a lot of memory. This is a better
    alternative to a direct computation of inverse.

    Process:
    --------
    The function uses sparsesvd to compute the SVD of a sparse matrix,
    there is a precision attached in the function, this controls the
    cutting (or the k) of the SVD. Precision is actually a percentage
    and uses this to get the k.

        k = (Precision/100) * rows of the matrix.


    The function takes a sparse matrix and a precision score as the input.

    """
    matrix = Mat.tocsc()
    if matrix.shape[0] <= matrix.shape[1]:

        k = int((precision * matrix.shape[0]) / 100)
        ut, s, vt = sparsesvd(matrix.tocsc(), k)
        UT = ss.csr_matrix(ut)
        SI = ss.csr_matrix(np.diag(1 / s))
        VT = ss.csr_matrix(vt)

        temp_matrix = spmatrixmul(VT.transpose(), SI)
        pinv_matrix = spmatrixmul(temp_matrix, UT)
        del ut, s, vt, UT, SI, VT, temp_matrix

    else:

        k = int((precision * matrix.transpose().shape[0]) / 100)
        ut, s, vt = sparsesvd(matrix.transpose().tocsc(), k)
        UT = ss.csr_matrix(ut)
        SI = ss.csr_matrix(np.diag(1 / s))
        VT = ss.csr_matrix(vt)

        temp_matrix = spmatrixmul(UT.transpose(), SI)
        pinv_matrix = spmatrixmul(temp_matrix, VT)
        del ut, s, vt, UT, SI, VT, temp_matrix

    return pinv_matrix.tocsr()
开发者ID:f00barin,项目名称:distrib,代码行数:53,代码来源:dst.py

示例2: tune

	def tune(my_corpus, dictionary, min_topics=2,max_topics=50,step=2):
		def sym_kl(p,q):
			return np.sum([scipy.stats.entropy(p,q),scipy.stats.entropy(q,p)])

		kl = []
		Hbar = []
		perplexity = []
		n_topics = []
		l = np.array([sum(cnt for _, cnt in doc) for doc in my_corpus])
		corpus = Index.get_corpus('train features')
		for i in range(min_topics,max_topics,step):
			n_topics.append(i)
			lda = gensim.models.ldamodel.LdaModel(corpus=corpus, id2word=dictionary,num_topics=i, alpha = 'auto')
			m1 =  scipy.sparse.csc_matrix(lda.expElogbeta)
			U,cm1,V = sparsesvd(m1, m1.shape[0])
			#Document-topic matrix
			lda_topics = lda[my_corpus]
			m2 = gensim.matutils.corpus2dense(lda_topics, lda.num_topics).transpose()
			cm2 = l.dot(m2)
			cm2 = cm2 + 0.0001
			cm2norm = np.linalg.norm(l)
			cm2 = cm2/cm2norm
			kl.append(sym_kl(cm1,cm2))
			entropy_list = [scipy.stats.entropy([x[1] for x in lda[v]] ) for v in my_corpus]
			Hbar.append(np.mean(entropy_list))
			perplexity.append( lda.log_perplexity(my_corpus) )
			print("NumTopics: %s | Unscaled Entropy: %s | Per-word-bound: %s | Per-word-perplexity: %s | Arun measure %s" % \
					(i, Hbar[-1], perplexity[-1], np.exp2(-perplexity[-1]), kl[-1]))
		return n_topics, Hbar, perplexity, kl
开发者ID:redmelonnette,项目名称:reddit_authorship,代码行数:29,代码来源:setup_experiments.py

示例3: lsa

def lsa( ):
    from sparsesvd import sparsesvd
    from numpy import array
    import scipy.sparse as sp
    # calculate svd and perform lsa
    print "########     READING TERM DOC MATRIX #########"
    termDocEntries = pickle.load(open(outfile  +"/tdm.p" ,"rb"))
    id2title = pickle.load(open(outfile + "/id_file.p","rb"))
    word2id = pickle.load(open(outfile + "/word_id.p","rb"))
    fileCount = len(id2title)
    #fileCount = 60000
    vocab_size = len(word2id)
    print "########     READING COMPLETE        #########"
    I = array([ i for ((i,j),v) in termDocEntries] )
    J = array([ j for ((i,j),v) in termDocEntries] )
    V = array([ v for ((i,j),v) in termDocEntries] )
    shape = (fileCount, vocab_size)
    print "Dimension of TDM is : ", shape
    print "########     STARTING LSA            #########"
    termDocMatrix = sp.csc_matrix( (V,(I,J)), shape= (fileCount, vocab_size ), dtype=np.float32)

    UT , S, V = sparsesvd(termDocMatrix, 300) 
    (m1,m2) =  UT.T.shape

    S1 = np.zeros((m2,m2), dtype=np.float32)
    for i in range(m2):
        S1[i][i] = S[i]
    US = np.dot(UT.T, S1)
    print m1, m2
    (n1,n2) = V.shape

    pickle.dump( US , open( outfile + "/u_sigma.p", "wb" ) )
    pickle.dump( V.T , open( outfile + "/v.p", "wb" ) )
    print "########     LSA COMPLETE        #########"
开发者ID:debjyoti385,项目名称:WikiSearch,代码行数:34,代码来源:term_document_cluster.py

示例4: matrixsvd

    def matrixsvd(self):
        svd_matrix = self.projection_matrix.tocsc()

        if self.svd is 'scipy':
            Utemp, Stemp, VTtemp = ssl.svds(svd_matrix.tocsc(),
                    k=(int(self.projection_matrix.tocsr().shape[0] *
                        self.precision) / 100))
            UT = np.nan_to_num(Utemp.transpose())
            S = np.nan_to_num(Stemp)
            VT = np.nan_to_num(VTtemp)

        elif self.svd is 'sparsesvd':
            (UT, S, VT) = sparsesvd(svd_matrix, (int(svd_matrix.shape[0] * self.precision) / 100))

        elif self.svd is 'fast':
            Utemp, Stemp, VTtemp = fast_svd(svd_matrix,
                    (int(self.projection_matrix.tocsr().shape[0] *
                        self.precision) / 100))
            UT = np.nan_to_num(Utemp.transpose())
            S = np.nan_to_num(Stemp)
            VT = np.nan_to_num(VTtemp)

        else:
            Utemp, Stemp, VTtemp = np.linalg.svd(svd_matrix.todense())
            UT = np.nan_to_num(Utemp.transpose())
            S = np.nan_to_num(Stemp)
            VT = np.nan_to_num(VTtemp)

        return UT, S, VT
开发者ID:f00barin,项目名称:distrib,代码行数:29,代码来源:dstns.py

示例5: generate_archetypes

def generate_archetypes(singer_resumes, archetype_count_k=20, cache_file=CACHE):
    """ Generate and write to disk an archetype matrix given a population """

    # Generate a unique, ordered, list of characters
    characters = set()  # Could optimized by using single comprehension
    for singer_resume in singer_resumes:
        characters.update(singer_resume)
    characters = list(characters)

    # Create a dict to lookup character index by id
    character_positions = dict()
    for i, character in enumerate(characters):
        character_positions[character] = i

    # Construct an empty matrix to populate
    dimensions = len(singer_resumes), len(characters)
    singer_matrix = scipy.sparse.lil_matrix(dimensions)

    # Populate the matrix
    for j, singer_resume in enumerate(singer_resumes):
        for character in singer_resume:
            position = character_positions[character]
            singer_matrix[j, position] = True

    # Convert matrix to a sparse matrix
    sparse_singer_matrix = scipy.sparse.csc_matrix(singer_matrix)

    # Do magic with maths
    U, s, V = sparsesvd(sparse_singer_matrix, archetype_count_k)

    archetypes = V

    # Cache the data for later use
    arrays = {CHARACTERS: character_positions, ARCHETYPES: archetypes}
    np.savez(cache_file, **arrays)
开发者ID:jonafato,项目名称:clusterfach-py,代码行数:35,代码来源:clusterfach.py

示例6: __init__

    def __init__(self, m, k, docs=None, use_svdlibc=False, power_iters=P2_EXTRA_ITERS, extra_dims=P2_EXTRA_DIMS):
        """
        Construct the (U, S) projection from a corpus `docs`. The projection can
        be later updated by merging it with another Projection via `self.merge()`.

        This is the class taking care of the 'core math'; interfacing with corpora,
        splitting large corpora into chunks and merging them etc. is done through
        the higher-level `LsiModel` class.
        """
        self.m, self.k = m, k
        self.power_iters = power_iters
        self.extra_dims = extra_dims
        if docs is not None:
            # base case decomposition: given a job `docs`, compute its decomposition,
            # *in-core*.
            if not use_svdlibc:
                u, s = stochastic_svd(docs, k, chunksize=sys.maxsize,
                    num_terms=m, power_iters=self.power_iters,
                    extra_dims=self.extra_dims)
            else:
                try:
                    import sparsesvd
                except ImportError:
                    raise ImportError("`sparsesvd` module requested but not found; run `easy_install sparsesvd`")
                logger.info("computing sparse SVD of %s matrix" % str(docs.shape))
                if not scipy.sparse.issparse(docs):
                    docs = matutils.corpus2csc(docs)
                ut, s, vt = sparsesvd.sparsesvd(docs, k + 30) # ask for extra factors, because for some reason SVDLIBC sometimes returns fewer factors than requested
                u = ut.T
                del ut, vt
                k = clip_spectrum(s**2, self.k)
            self.u = u[:, :k].copy()
            self.s = s[:k].copy()
        else:
            self.u, self.s = None, None
开发者ID:AmitShah,项目名称:gensim,代码行数:35,代码来源:lsimodel.py

示例7: __init__

    def __init__(self, m, k, docs = None):
        """
        Store (U, S) projection itself. This is the class taking care of 'core math';
        interfacing with corpora, training etc is done through class LsiModel.
        
        `docs` is either a spare matrix or a corpus which, when converted to a 
        sparse matrix, must fit comfortably into main memory.
        """

        self.m, self.k = m, k
        if docs is not None:
            # base case decomposition: given a job `docs`, compute its decomposition 
            # in core, algorithm 1
            if utils.isCorpus(docs):
                docs = matutils.corpus2csc(m, docs)
            if m * k < 10000:
                # SVDLIBC gives spurious results for small matrices.. run full
                # LAPACK svd on them instead
                docs = docs.todense()
                logger.info("computing dense SVD of %s matrix" % str(docs.shape))
                u, s, vt = numpy.linalg.svd(docs, full_matrices = False)
            else:
                try:
                    import sparsesvd
                except ImportError:
                    raise ImportError("for LSA, the `sparsesvd` module is needed but not found; run `easy_install sparsesvd`")
                logger.info("computing sparse SVD of %s matrix" % str(docs.shape))
                ut, s, vt = sparsesvd.sparsesvd(docs, k + 30) # ask for extra factors, because for some reason SVDLIBC sometimes returns fewer factors than requested
                u = ut.T
                del ut
            del vt
            k = clipSpectrum(s, self.k)
            self.u, self.s = u[:, :k], s[:k]
        else:
            self.u, self.s = None, None
开发者ID:beibeiyang,项目名称:Latent-Dirichlet-Allocation,代码行数:35,代码来源:lsimodel.py

示例8: LSA

def LSA(M,k):  ##will return top k sentences
    SM = scipy.sparse.csc_matrix(M) # convert to sparse CSC format
    u, s, vt = sparsesvd(SM,k+10) #
    ##SVD calculated at this stage, concept matrix vt, from now we can apply various approaches
    ##to filter out top k sentences.
    ##We are using OzSoy's approach
    ##Using Cross Method
    m,n=M.shape

    Avg=numpy.average(M,1)
    for i in range(0,m):
        for j in range(0,n):
            if M[i][j]<Avg[i]:
                M[i][j]=0
    Length=numpy.dot(s,vt)
    L=[]
    ##returning top k sentences
    for i in range(0,n):
        L.append(tuple([Length[i],i]))

    if k>=len(L):
        return L
    #building min heap

    count= int(k/2-1)

    while(count>=0):
        L=heapify(L,count,k)
        count-=1
    for i in range(k,len(L)):
        if L[0][0]<L[i][0]:
            L[0]=L[i]
            L=heapify(L,0,k)
    return L[:k]
开发者ID:shanky-259,项目名称:Automatic_Text_Summarizer,代码行数:34,代码来源:LSA_summary.py

示例9: applySvd

 def applySvd(self):
     len_row = max(self.array_row) + 1
     len_col = max(self.array_col) + 1
     print "Applying SVD with ROW: " + str(len_row) + " and COL: " + str(len_col)
     sparse_matrix = scipy.sparse.csc_matrix(
         (self.array_data, (self.array_row, self.array_col)), shape=(len_row, len_col)
     )
     print "sparsed matrix"
     Ut, Sigma, Vt = sparsesvd(sparse_matrix, self.svd_dimension)
     print "U Sigma Vt done!"
     sparse_matrix = array(0)
     print "Mounting Matrix SVD"
     self.svd_matrix = numpy.dot(Ut.T, numpy.dot(numpy.diag(Sigma), Vt))
     print "Done!"
     print Ut.T
     print "\n"
     print Sigma
     print "\n"
     print Vt
     print "\n"
     print self.svd_matrix.T
     print "\n"
     Ut = None
     Sigma = None
     Vt = None
开发者ID:rogergranada,项目名称:ATCP,代码行数:25,代码来源:Matrix.py

示例10: main

def main():
    en_vector=ENVector()
    en_vector.read_freq("results/freq_en_fixed_pmi.txt")
    #print "Reading Pair Co-occurence"
    #en_vector.read_and_duplicate("results/pair_en_test.txt")
    en_vector.read_pair_pmi("results/pair_en_fixed_pmi.txt")
    en_vector.sort_by_freq()
    #print "Generating Label"
    en_vector.generate_label()
    # print "Generating Matrix Label"
    en_vector.generate_matrix_label()
    #print "Calculating Vector Size"
    en_vector.calculate_size()

    matrix=sp.lil_matrix((limit,limit))
    for i in range(min(limit,len(en_vector.matrix_label))):
        for j in range((len(en_vector.matrix_label[i]))):
            if en_vector.matrix_label[i][j]>=limit:
                continue
            word1=en_vector.word_list[i]
            word2=en_vector.word_list[en_vector.matrix_label[i][j]]
            matrix[i,en_vector.matrix_label[i][j]]=en_vector.pair_count[(word1,word2)]

    smat=sp.csc_matrix(matrix)
    ut,s,vt=sparsesvd(smat,10)
    for i in range(limit):
        for j in range(10):
            print (ut[j][i]*s[j]),
        print 
开发者ID:aripras,项目名称:nlp_experiment,代码行数:29,代码来源:svd.py

示例11: arun

def arun(corpus, dictionary, min_topics=10, max_topics=21, step=5):
    print "Arun runing"
    output = []
    for i in range(min_topics, max_topics, step):
        lda = LDA(dictionary, corpus, i, "lda20/lda_training_" + str(i))
        print "Модель построена/загружена"
        m1 = lda.expElogbeta
        # U, cm1, V = np.linalg.svd(m1)
        smat = scipy.sparse.csc_matrix(m1)  # convert to sparse CSC format
        U, cm1, V = sparsesvd(smat, i + 30)  # do SVD, asking for 100 factors
        print "sparsesvd сделано"
        #Document-topic matrix
        lda_topics = lda[my_corpus]
        m2 = matutils.corpus2dense(lda_topics, lda.num_topics).transpose()
        cm2 = l.dot(m2)
        cm2 = cm2 + 0.0001
        print "cm2norm begin"
        cm2norm = np.linalg.norm(l)
        print "cm2norm end"
        cm2 = cm2/cm2norm
        print len(cm1), len(cm2)
        kl = sym_kl(cm1, cm2)
        output.append((i, kl))
        print i, kl
    print output
    return output
开发者ID:i-Hun,项目名称:thesis-code,代码行数:26,代码来源:lda_arun.py

示例12: learn

def learn(mat):
    print "Starting learning process..."
    start_time = time.time()
    user_mat, axis_weights, movie_mat = sparsesvd(mat, NUM_COMPONENTS)
    print "Matrix decomposition complete (elapsed time: %f s)." % (time.time() - start_time)
    print "Learning process complete."
    return (user_mat, axis_weights, movie_mat)
开发者ID:dychen,项目名称:cs156b,代码行数:7,代码来源:svd2.py

示例13: test_svd_matrix

def test_svd_matrix(W, WT, D, DT):
    Winv = ss.csr_matrix(np.linalg.pinv(W.todense()))
    WTinv = ss.csr_matrix(np.linalg.pinv(W.transpose().todense()))
#    A = np.dot(np.dot(Winv, D), WTinv)
    A = ((Winv * D) * WTinv)
    A = A.tocsc()
    res_dict = {}
    old_z = 0

    for k in range(270, 280):
        (ut, s, vt) = sparsesvd(A, k)
        U = ss.csr_matrix(ut.T)
        S = ss.csr_matrix(np.diag(s))
        V = ss.csr_matrix(vt)
        L = (W * U) * (S * V * WT.transpose())
        z = U.shape[1]

        if z == old_z:
            break

        else:
            Res = fnorm(L, DT)
            res_dict[z] = Res
            Result = OrderedDict(sorted(res_dict.items(),
                key=lambda t: np.float64(t[1])))
            old_z = z

    return Result
开发者ID:f00barin,项目名称:distrib,代码行数:28,代码来源:distrib_test.py

示例14: generate_model

def generate_model(in_path, title_limit, user_limit, features, out_path):
    # connect to db
    db = pg.connect(in_path)
    # load scores
    scores = load_scores(db)
    db.close()
    print "Loaded scores"
    # filter insignificant titles/users, second filtering to remove empty cols/rows
    (mat, old_ids_1) = filter_too_small(scores, title_limit, user_limit)
    (mat, old_ids_2) = filter_too_small(mat.tocsc(), 1, 1)
    print "Filtered insignificant titles and users"
    # matrix is in csr format, calc row nnz averages and convert to csc
    averages = map(lambda x: row_nnz_average(mat,x), range(0, mat.shape[0]))
    mat = mat.tocsc()
    # build compact titleid translation tables
    old_ids = join_old_id_dicts(old_ids_1, old_ids_2)
    (title_to_document, document_to_tile) = build_title_mapping(old_ids, mat.shape[0])
    # run svd
    print "Built additional data"
    (ut, s, vt) = sparsesvd(mat.tocsc(), features)
    print "Factorization finished"
    s_sqrt = numpy.diag(numpy.sqrt(s))
    s_inv = numpy.diag(numpy.power(s,-1))
    terms = ut.transpose().dot(s_sqrt)
    documents = s_sqrt.dot(s_inv).dot(ut)
    # dump results
    savemat(out_path, {"Terms": terms, "Documents": documents, "Averages": averages, "TitleMapping": title_to_document, "DocumentMapping" : document_to_tile}, oned_as='row')
    print "Saved generated results"
开发者ID:vosen,项目名称:Madarame,代码行数:28,代码来源:model_generator.py

示例15: load

    def load(self, dirname, svd_k = 0):
        """
        Load the embedding and optionally perform SVD
        on load. If svd_k is set to 0, no SVD is performed.
        """
        
        self.dirname = dirname
        
        try:
            self.emb.x, self.emb.y = load_svmlight_file(dirname + EMBEDDING_FILENAME)
        except (ValueError, IOError):
            return None
        
        if svd_k != 0:
            try:
                import sparsesvd
                import scipy.sparse
                
                X = self.emb.x.T
                X = scipy.sparse.csc_matrix(X)
                Ut, S, Vt = sparsesvd.sparsesvd(X, svd_k)
                self.emb.x = scipy.sparse.csr_matrix(Vt.T)

            except ImportError:
                print('Warning: Cannot perform SVD without sparsesvd module')

        self._loadFeatureTable()
        self._loadTOC()
        return self.emb
开发者ID:chubbymaggie,项目名称:octopus-mlutils,代码行数:29,代码来源:EmbeddingLoader.py


注:本文中的sparsesvd.sparsesvd函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。