当前位置: 首页>>代码示例>>Python>>正文


Python Sampling.randCrossValidation方法代码示例

本文整理汇总了Python中sandbox.util.Sampling.Sampling.randCrossValidation方法的典型用法代码示例。如果您正苦于以下问题:Python Sampling.randCrossValidation方法的具体用法?Python Sampling.randCrossValidation怎么用?Python Sampling.randCrossValidation使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sandbox.util.Sampling.Sampling的用法示例。


在下文中一共展示了Sampling.randCrossValidation方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: profileModelSelect

# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
 def profileModelSelect(self):
     lmbdas = numpy.linspace(1.0, 0.01, 5)
     softImpute = IterativeSoftImpute(k=500)
     
     folds = 5
     cvInds = Sampling.randCrossValidation(folds, self.X.nnz)
     ProfileUtils.profile('softImpute.modelSelect(self.X, lmbdas, cvInds)', globals(), locals())
开发者ID:charanpald,项目名称:sandbox,代码行数:9,代码来源:IterativeSoftImputeProfile.py

示例2: testRandCrossValidation

# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
 def testRandCrossValidation(self): 
     numExamples = 10
     folds = 3
     
     indices = Sampling.randCrossValidation(folds, numExamples)
 
     
     for i in range(folds):
         self.assertTrue((numpy.union1d(indices[i][0], indices[i][1]) == numpy.arange(numExamples)).all())
开发者ID:charanpald,项目名称:sandbox,代码行数:11,代码来源:SamplingTest.py

示例3: modelSelect

# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
    def modelSelect(self, X): 
        """
        Perform model selection on X and return the best parameters. 
        """
        m, n = X.shape
        cvInds = Sampling.randCrossValidation(self.folds, X.nnz)
        localAucs = numpy.zeros((self.ks.shape[0], self.lmbdas.shape[0], len(cvInds)))
        
        logging.debug("Performing model selection")
        paramList = []        
        
        for icv, (trainInds, testInds) in enumerate(cvInds):
            Util.printIteration(icv, 1, self.folds, "Fold: ")

            trainX = SparseUtils.submatrix(X, trainInds)
            testX = SparseUtils.submatrix(X, testInds)
            
            testOmegaList = SparseUtils.getOmegaList(testX)
            
            for i, k in enumerate(self.ks): 
                maxLocalAuc = self.copy()
                maxLocalAuc.k = k
                paramList.append((trainX, testX, testOmegaList, maxLocalAuc))
                    
        pool = multiprocessing.Pool(processes=self.numProcesses, maxtasksperchild=100)
        resultsIterator = pool.imap(localAucsLmbdas, paramList, self.chunkSize)
        #import itertools
        #resultsIterator = itertools.imap(localAucsLmbdas, paramList)
        
        for icv, (trainInds, testInds) in enumerate(cvInds):        
            for i, k in enumerate(self.ks): 
                tempAucs = resultsIterator.next()
                localAucs[i, :, icv] = tempAucs
        
        pool.terminate()
        
        meanLocalAucs = numpy.mean(localAucs, 2)
        stdLocalAucs = numpy.std(localAucs, 2)
        
        logging.debug(meanLocalAucs)
        
        k = self.ks[numpy.unravel_index(numpy.argmax(meanLocalAucs), meanLocalAucs.shape)[0]]
        lmbda = self.lmbdas[numpy.unravel_index(numpy.argmax(meanLocalAucs), meanLocalAucs.shape)[1]]
        
        logging.debug("Model parameters: k=" + str(k) + " lmbda=" + str(lmbda))
        
        self.k = k 
        self.lmbda = lmbda 
        
        return meanLocalAucs, stdLocalAucs
开发者ID:charanpald,项目名称:sandbox,代码行数:52,代码来源:WarpMf.py

示例4: testParallelModelSelect

# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
 def testParallelModelSelect(self): 
     X = scipy.sparse.rand(10, 10, 0.5)
     X = X.tocsr()
       
     numExamples = X.getnnz()
     paramDict = {}
     paramDict["setRank"] = numpy.array([5, 10, 20])
     folds = 3 
     idx = Sampling.randCrossValidation(folds, numExamples)
             
     
     method = "lsnmf"
     nimfaFactorise = NimfaFactorise(method)
     learner, meanErrors = nimfaFactorise.parallelModelSelect(X, idx, paramDict)
开发者ID:charanpald,项目名称:sandbox,代码行数:16,代码来源:NimfaFactoriseTest.py

示例5: modelSelect

# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
    def modelSelect(self, X): 
        """
        Perform model selection on X and return the best parameters. 
        """
        m, n = X.shape
        cvInds = Sampling.randCrossValidation(self.folds, X.nnz)
        precisions = numpy.zeros((self.ks.shape[0], len(cvInds)))
        
        logging.debug("Performing model selection")
        paramList = []        
        
        for icv, (trainInds, testInds) in enumerate(cvInds):
            Util.printIteration(icv, 1, self.folds, "Fold: ")

            trainX = SparseUtils.submatrix(X, trainInds)
            testX = SparseUtils.submatrix(X, testInds)
            
            testOmegaList = SparseUtils.getOmegaList(testX)
            
            for i, k in enumerate(self.ks): 
                learner = self.copy()
                learner.k = k
                paramList.append((trainX, testX, testOmegaList, learner))
                    
        #pool = multiprocessing.Pool(processes=self.numProcesses, maxtasksperchild=100)
        #resultsIterator = pool.imap(computePrecision, paramList, self.chunkSize)
        import itertools
        resultsIterator = itertools.imap(computePrecision, paramList)
        
        for icv, (trainInds, testInds) in enumerate(cvInds):        
            for i, k in enumerate(self.ks): 
                tempPrecision = resultsIterator.next()
                precisions[i, icv] = tempPrecision
        
        #pool.terminate()
        
        meanPrecisions = numpy.mean(precisions, 1)
        stdPrecisions = numpy.std(precisions, 1)
        
        logging.debug(meanPrecisions)
        
        k = self.ks[numpy.argmax(meanPrecisions)]

        
        logging.debug("Model parameters: k=" + str(k)) 
        
        self.k = k 
        
        return meanPrecisions, stdPrecisions
开发者ID:charanpald,项目名称:sandbox,代码行数:51,代码来源:KNNRecommender.py

示例6: testModelSelect

# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
    def testModelSelect(self):
        lmbda = 0.1
        shape = (20, 20) 
        r = 20 
        numInds = 100
        noise = 0.2
        X = ExpSU.SparseUtils.generateSparseLowRank(shape, r, numInds, noise)
        
        U, s, V = numpy.linalg.svd(X.todense())

        k = 15

        iterativeSoftImpute = IterativeSoftImpute(lmbda, k=None, svdAlg="propack", updateAlg="zero")
        iterativeSoftImpute.numProcesses = 1
        rhos = numpy.linspace(0.5, 0.001, 20)
        ks = numpy.array([k], numpy.int)
        folds = 3
        cvInds = Sampling.randCrossValidation(folds, X.nnz)
        meanTestErrors, meanTrainErrors = iterativeSoftImpute.modelSelect(X, rhos, ks, cvInds)

        #Now do model selection manually 
        (rowInds, colInds) = X.nonzero()
        trainErrors = numpy.zeros((rhos.shape[0], len(cvInds)))
        testErrors = numpy.zeros((rhos.shape[0], len(cvInds)))
        
        for i, rho in enumerate(rhos): 
            for j, (trainInds, testInds) in enumerate(cvInds): 
                trainX = scipy.sparse.csc_matrix(X.shape)
                testX = scipy.sparse.csc_matrix(X.shape)
                
                for p in trainInds: 
                    trainX[rowInds[p], colInds[p]] = X[rowInds[p], colInds[p]]
                    
                for p in testInds: 
                    testX[rowInds[p], colInds[p]] = X[rowInds[p], colInds[p]]
                                 
                softImpute = SoftImpute(numpy.array([rho]), k=ks[0]) 
                ZList = [softImpute.learnModel(trainX, fullMatrices=False)]
                
                predTrainX = softImpute.predict(ZList, trainX.nonzero())[0]
                predX = softImpute.predict(ZList, testX.nonzero())[0]

                testErrors[i, j] = MCEvaluator.rootMeanSqError(testX, predX)
                trainErrors[i, j] = MCEvaluator.rootMeanSqError(trainX, predTrainX)
        
        meanTestErrors2 = testErrors.mean(1)   
        meanTrainErrors2 = trainErrors.mean(1)  
        
        nptst.assert_array_almost_equal(meanTestErrors.ravel(), meanTestErrors2, 1) 
开发者ID:charanpald,项目名称:sandbox,代码行数:51,代码来源:IterativeSoftImputeTest.py

示例7: not

# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
        
    if not (fileLock.isLocked() or fileLock.fileExists()) or overwrite: 
        fileLock.lock()       
        
        logging.debug(learner)      
    
        try: 
            #Do some recommendation 
            if type(learner) == IterativeSoftImpute:  
                trainX = X.toScipyCsc()
                trainIterator = iter([trainX])
                         
                if modelSelect: 
                    modelSelectX, userInds = Sampling.sampleUsers2(X, modelSelectSamples)
                    modelSelectX = modelSelectX.toScipyCsc()                            
                    cvInds = Sampling.randCrossValidation(folds, modelSelectX.nnz)
                    meanMetrics, stdMetrics = learner.modelSelect2(modelSelectX, rhosSi, ks, cvInds)
                
                ZList = learner.learnModel(trainIterator)    
                U, s, V = ZList.next()
                U = U*s
            elif type(learner) == WeightedMf:  
                trainX = X.toScipyCsr()

                if modelSelect:                     
                    modelSelectX, userInds = Sampling.sampleUsers2(X, modelSelectSamples)
                    modelSelectX = modelSelectX.toScipyCsc()  
                    meanMetrics, stdMetrics = learner.modelSelect(modelSelectX)                          
                
                learner.learnModel(trainX)
                U = learner.U 
开发者ID:charanpald,项目名称:wallhack,代码行数:33,代码来源:ContactsRecommenderExp.py

示例8: modelSelect

# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
    def modelSelect(self, X, ks, lmbdas, gammas, nFolds, maxNTry=5):
        """
        Choose parameters based on a single matrix X. We do cross validation
        within, and set parameters according to the mean squared error.
        Return nothing.
        """
        logging.debug("Performing model selection")

        # usefull
        X = X.tocoo()
        gc.collect()
        nK = len(ks) 
        nLmbda = len(lmbdas) 
        nGamma = len(gammas) 
        nLG = nLmbda * nGamma
        errors = scipy.zeros((nK, nLmbda, nGamma, nFolds))
       
        # generate cross validation sets
        cvInds = Sampling.randCrossValidation(nFolds, X.nnz)
        
        # compute error for each fold / setting
        for icv, (trainInds, testInds) in enumerate(cvInds):
            Util.printIteration(icv, 1, nFolds, "Fold: ")

            trainX = SparseUtils.submatrix(X, trainInds)
            testX = SparseUtils.submatrix(X, testInds)

            assert trainX.nnz == trainInds.shape[0]
            assert testX.nnz == testInds.shape[0]
            nptst.assert_array_almost_equal((testX+trainX).data, X.data)

            paramList = []
        
            for ik, k in enumerate(ks):
                for ilmbda, lmbda in enumerate(lmbdas):
                    for igamma, gamma in enumerate(gammas):
                        paramList.append((trainX, testX, k, lmbda, gamma, maxNTry)) 
            
            # ! Remark !
            # we can parallelize the run of parameters easely.
            # parallelize the run of cv-folds is not done as it is much more
            # memory-consuming 
            
            # parallel version (copied from IteraticeSoftImpute, but not tested) 
            #pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()/2, maxtasksperchild=10)
            #results = pool.imap(self.learnPredict, paramList)
            #pool.terminate()

            # non-parallel version 
            results = scipy.array(list(itertools.starmap(self.learnPredict, paramList)))

            errors[:, :, :, icv] = scipy.array(results).reshape((nK, nLmbda, nGamma))
        
        # compute cross validation error for each setting
        errors[errors == float("inf")] = errors[errors != float("inf")].max()
        errors[numpy.isnan(errors)] = numpy.max(errors[numpy.logical_not(numpy.isnan(errors))])
        meanErrors = errors.mean(3)
        stdErrors = errors.std(3)
        logging.debug("Mean errors given (k, lambda, gamma):")
        logging.debug(meanErrors)
        logging.debug("... with standard deviation:")
        logging.debug(stdErrors)

        # keep the best
        iMin = meanErrors.argmin()
        kMin = ks[int(scipy.floor(iMin/(nLG)))]
        lmbdaMin = lmbdas[int(scipy.floor((iMin%nLG)/nGamma))]
        gammaMin = gammas[int(scipy.floor(iMin%nGamma))]
        logging.debug("argmin: (k, lambda, gamma) = (" + str(kMin) + ", " + str(lmbdaMin) + ", " + str(gammaMin) + ")")
        logging.debug("min = " + str(meanErrors[int(scipy.floor(iMin/(nLG))), int(scipy.floor((iMin%nLG)/nGamma)), int(scipy.floor(iMin%nGamma))]))
        
        self.baseLearner.k = kMin
        self.baseLearner.lmbda = lmbdaMin
        self.baseLearner.gamma = gammaMin
        
        return
开发者ID:charanpald,项目名称:sandbox,代码行数:78,代码来源:IterativeSGDNorm2Reg.py

示例9: runExperiment

# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
    def runExperiment(self, X):
        """
        Run the selected ranking experiments and save results
        """
        logging.debug("Splitting into train and test sets")
        #Make sure different runs get the same train/test split
        numpy.random.seed(21)
        m, n = X.shape
        #colProbs = (X.sum(0)+1)/float(m+1)
        #colProbs = colProbs**-self.algoArgs.itemExp
        #colProbs = numpy.ones(n)/float(n)
        trainTestXs = Sampling.shuffleSplitRows(X, 1, self.algoArgs.testSize)
        trainX, testX = trainTestXs[0]
        logging.debug("Train X shape and nnz: " + str(trainX.shape) + " " + str(trainX.nnz))
        logging.debug("Test X shape and nnz: " + str(testX.shape) + " " + str(testX.nnz))

        #Have scipy versions of each array
        trainXScipy = trainX.toScipyCsc()
        testXScipy = testX.toScipyCsc()

        if self.algoArgs.runSoftImpute:
            logging.debug("Running soft impute")
            resultsFileName = self.resultsDir + "ResultsSoftImpute.npz"

            fileLock = FileLock(resultsFileName)

            if not (fileLock.isLocked() or fileLock.fileExists()) or self.algoArgs.overwrite:
                fileLock.lock()
                logging.debug("Performing model selection, taking sample size " + str(self.algoArgs.modelSelectSamples))
                modelSelectX, userInds = Sampling.sampleUsers2(trainXScipy, self.algoArgs.modelSelectSamples, prune=True)

                try:
                    learner = IterativeSoftImpute(self.algoArgs.rhoSi, eps=self.algoArgs.epsSi, k=self.algoArgs.k, svdAlg=self.algoArgs.svdAlg, postProcess=self.algoArgs.postProcess, p=self.algoArgs.pSi, q=self.algoArgs.qSi)
                    learner.folds = self.algoArgs.folds
                    learner.metric = self.algoArgs.metric
                    learner.numProcesses = self.algoArgs.processes
                    learner.recommendSize = self.algoArgs.recommendSize
                    learner.validationSize = self.algoArgs.validationSize

                    if self.algoArgs.modelSelect:
                        cvInds = Sampling.randCrossValidation(self.algoArgs.folds, modelSelectX.nnz)
                        meanErrors, stdErrors = learner.modelSelect2(modelSelectX, self.algoArgs.rhosSi, self.algoArgs.ks, cvInds)

                        modelSelectFileName = resultsFileName.replace("Results", "ModelSelect")
                        numpy.savez(modelSelectFileName, meanErrors, stdErrors)
                        logging.debug("Saved model selection grid as " + modelSelectFileName)

                    logging.debug(learner)

                    self.recordResults(X, trainXScipy, testXScipy, learner, resultsFileName)
                finally:
                    fileLock.unlock()
            else:
                logging.debug("File is locked or already computed: " + resultsFileName)

        if self.algoArgs.runMaxLocalAuc:
            logging.debug("Running max local AUC")

            if self.algoArgs.loss != "tanh":
                resultsFileName = self.resultsDir + "ResultsMaxLocalAUC_loss=" + self.algoArgs.loss + ".npz"
            else:
                resultsFileName = self.resultsDir + "ResultsMaxLocalAUC_loss=" + self.algoArgs.loss + "_rho=" + str(self.algoArgs.rhoMlauc) + ".npz"

            fileLock = FileLock(resultsFileName)

            if not (fileLock.isLocked() or fileLock.fileExists()) or self.algoArgs.overwrite:
                fileLock.lock()

                try:
                    learner = MaxLocalAUC(self.algoArgs.k, 1-self.algoArgs.u, lmbdaU=self.algoArgs.lmbdaUMlauc, lmbdaV=self.algoArgs.lmbdaVMlauc, eps=self.algoArgs.epsMlauc, stochastic=not self.algoArgs.fullGradient)

                    learner.alpha = self.algoArgs.alpha
                    learner.alphas = self.algoArgs.alphas
                    learner.eta = self.algoArgs.eta
                    learner.folds = self.algoArgs.folds
                    learner.initialAlg = self.algoArgs.initialAlg
                    learner.itemExpP = self.algoArgs.itemExpP
                    learner.itemExpQ = self.algoArgs.itemExpQ
                    learner.ks = self.algoArgs.ks
                    learner.lmbdas = self.algoArgs.lmbdasMlauc
                    learner.loss = self.algoArgs.loss
                    learner.maxIterations = self.algoArgs.maxIterations
                    learner.maxNorms = self.algoArgs.maxNorms
                    learner.maxNormU = self.algoArgs.maxNorm
                    learner.maxNormV = self.algoArgs.maxNorm
                    learner.metric = self.algoArgs.metric
                    learner.normalise = self.algoArgs.normalise
                    learner.numAucSamples = self.algoArgs.numAucSamples
                    learner.numProcesses = self.algoArgs.processes
                    learner.numRowSamples = self.algoArgs.numRowSamples
                    learner.rate = self.algoArgs.rate
                    learner.recommendSize = self.algoArgs.recommendSize
                    learner.recordStep = self.algoArgs.recordStep
                    learner.rho = self.algoArgs.rhoMlauc
                    learner.rhos = self.algoArgs.rhosMlauc
                    learner.startAverage = self.algoArgs.startAverage
                    learner.t0 = self.algoArgs.t0
                    learner.t0s = self.algoArgs.t0s
                    learner.validationSize = self.algoArgs.validationSize
                    learner.validationUsers = self.algoArgs.validationUsers
#.........这里部分代码省略.........
开发者ID:charanpald,项目名称:wallhack,代码行数:103,代码来源:RankingExpHelper.py


注:本文中的sandbox.util.Sampling.Sampling.randCrossValidation方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。