当前位置: 首页>>代码示例>>Python>>正文


Python Sampling.Sampling类代码示例

本文整理汇总了Python中sandbox.util.Sampling.Sampling的典型用法代码示例。如果您正苦于以下问题:Python Sampling类的具体用法?Python Sampling怎么用?Python Sampling使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Sampling类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: testSampleUsers

    def testSampleUsers(self): 
        m = 10
        n = 15
        r = 5 
        u = 0.3
        w = 1-u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)

        k = 50
        X2, userInds = Sampling.sampleUsers(X, k)

        nptst.assert_array_equal(X.toarray(), X2.toarray())
        
        numRuns = 50
        for i in range(numRuns): 
            m = numpy.random.randint(10, 100)
            n = numpy.random.randint(10, 100)
            k = numpy.random.randint(10, 100)

            X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)

            X2, userInds = Sampling.sampleUsers(X, k)
            
            self.assertEquals(X2.shape[0], min(k, m))
            self.assertTrue((X.dot(X.T)!=numpy.zeros((m, m)).all()))
            self.assertTrue((X2.toarray() == X.toarray()[userInds, :]).all())
            self.assertEquals(X.toarray()[userInds, :].nonzero()[0].shape[0], X2.nnz)
开发者ID:charanpald,项目名称:sandbox,代码行数:27,代码来源:SamplingTest.py

示例2: testCrossValidation

    def testCrossValidation(self):
        numExamples = 10
        folds = 2

        indices = Sampling.crossValidation(folds, numExamples)

        self.assertEquals((list(indices[0][0]), list(indices[0][1])), ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4]))
        self.assertEquals((list(indices[1][0]), list(indices[1][1])), ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]))

        indices = Sampling.crossValidation(3, numExamples)

        self.assertEquals((list(indices[0][0]), list(indices[0][1])), ([3, 4, 5, 6, 7, 8, 9], [0, 1, 2]))
        self.assertEquals((list(indices[1][0]), list(indices[1][1])), ([0, 1, 2, 6, 7, 8, 9], [3, 4, 5]))
        self.assertEquals((list(indices[2][0]), list(indices[2][1])), ([0, 1, 2, 3, 4, 5], [6, 7, 8, 9]))

        indices = Sampling.crossValidation(4, numExamples)

        self.assertEquals((list(indices[0][0]), list(indices[0][1])), ([2, 3, 4, 5, 6, 7, 8, 9], [0, 1]))
        self.assertEquals((list(indices[1][0]), list(indices[1][1])), ([0, 1, 5, 6, 7, 8, 9], [2, 3, 4]))
        self.assertEquals((list(indices[2][0]), list(indices[2][1])), ([0, 1, 2, 3, 4, 7, 8, 9], [5, 6]))
        self.assertEquals((list(indices[3][0]), list(indices[3][1])), ([0, 1, 2, 3, 4, 5, 6], [7, 8, 9]))

        indices = Sampling.crossValidation(numExamples, numExamples)
        self.assertEquals((list(indices[0][0]), list(indices[0][1])), ([1, 2, 3, 4, 5, 6, 7, 8, 9], [0]))
        self.assertEquals((list(indices[1][0]), list(indices[1][1])), ([0, 2, 3, 4, 5, 6, 7, 8, 9], [1]))
        self.assertEquals((list(indices[2][0]), list(indices[2][1])), ([0, 1, 3, 4, 5, 6, 7, 8, 9], [2]))
        self.assertEquals((list(indices[3][0]), list(indices[3][1])), ([0, 1, 2, 4, 5, 6, 7, 8, 9], [3]))
        self.assertEquals((list(indices[4][0]), list(indices[4][1])), ([0, 1, 2, 3, 5, 6, 7, 8, 9], [4]))

        self.assertRaises(ValueError, Sampling.crossValidation, numExamples+1, numExamples)
        self.assertRaises(ValueError, Sampling.crossValidation, 0, numExamples)
        self.assertRaises(ValueError, Sampling.crossValidation, -1, numExamples)
        self.assertRaises(ValueError, Sampling.crossValidation, folds, 1)
开发者ID:charanpald,项目名称:sandbox,代码行数:33,代码来源:SamplingTest.py

示例3: testRepCrossValidation

    def testRepCrossValidation(self): 
        numExamples = 10
        folds = 3
        repetitions = 1

        indices = Sampling.repCrossValidation(folds, numExamples, repetitions)
        
        for i in range(folds):
            self.assertTrue((numpy.union1d(indices[i][0], indices[i][1]) == numpy.arange(numExamples)).all())
        
        repetitions = 2
        indices = Sampling.repCrossValidation(folds, numExamples, repetitions)
        
        for i in range(folds):
            self.assertTrue((numpy.union1d(indices[i][0], indices[i][1]) == numpy.arange(numExamples)).all())
开发者ID:charanpald,项目名称:sandbox,代码行数:15,代码来源:SamplingTest.py

示例4: profileModelSelect

 def profileModelSelect(self):
     lmbdas = numpy.linspace(1.0, 0.01, 5)
     softImpute = IterativeSoftImpute(k=500)
     
     folds = 5
     cvInds = Sampling.randCrossValidation(folds, self.X.nnz)
     ProfileUtils.profile('softImpute.modelSelect(self.X, lmbdas, cvInds)', globals(), locals())
开发者ID:charanpald,项目名称:sandbox,代码行数:7,代码来源:IterativeSoftImputeProfile.py

示例5: testAverageRocCurve

    def testAverageRocCurve(self):
        m = 50
        n = 20
        k = 8
        u = 20.0 / m
        w = 1 - u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix(
            (m, n), k, w, csarray=True, verbose=True, indsPerRow=200
        )

        fpr, tpr = MCEvaluator.averageRocCurve(X, U, V)

        import matplotlib

        matplotlib.use("GTK3Agg")
        import matplotlib.pyplot as plt

        # plt.plot(fpr, tpr)
        # plt.show()

        # Now try case where we have a training set
        folds = 1
        testSize = 5
        trainTestXs = Sampling.shuffleSplitRows(X, folds, testSize)
        trainX, testX = trainTestXs[0]

        fpr, tpr = MCEvaluator.averageRocCurve(testX, U, V, trainX=trainX)
开发者ID:kentwang,项目名称:sandbox,代码行数:27,代码来源:MCEvaluatorTest.py

示例6: testParallelPen

 def testParallelPen(self): 
     #Check if penalisation == inf when treeSize < gamma 
     numExamples = 100
     X, y = data.make_regression(numExamples) 
     learner = DecisionTreeLearner(pruneType="CART", maxDepth=10, minSplit=2)
     
     paramDict = {} 
     paramDict["setGamma"] = numpy.array(numpy.round(2**numpy.arange(1, 10, 0.5)-1), dtype=numpy.int)
     
     folds = 3
     alpha = 1.0
     Cvs = numpy.array([(folds-1)*alpha])
     
     idx = Sampling.crossValidation(folds, X.shape[0])
     
     resultsList = learner.parallelPen(X, y, idx, paramDict, Cvs)
     
     learner, trainErrors, currentPenalties = resultsList[0]
     
     learner.setGamma(2**10)
     treeSize = 0
     #Let's work out the size of the unpruned tree 
     for trainInds, testInds in idx: 
         trainX = X[trainInds, :]
         trainY = y[trainInds]
         
         learner.learnModel(trainX, trainY)
         treeSize += learner.tree.size 
     
     treeSize /= float(folds)         
     
     self.assertTrue(numpy.isinf(currentPenalties[paramDict["setGamma"]>treeSize]).all())      
     self.assertTrue(not numpy.isinf(currentPenalties[paramDict["setGamma"]<treeSize]).all())
开发者ID:charanpald,项目名称:sandbox,代码行数:33,代码来源:DecisionTreeLearnerTest.py

示例7: testShuffleSplit

    def testShuffleSplit(self):
        numExamples = 10
        folds = 5

        indices = Sampling.shuffleSplit(folds, numExamples)
        
        for i in range(folds):
            self.assertTrue((numpy.union1d(indices[i][0], indices[i][1]) == numpy.arange(numExamples)).all())
        
        indices = Sampling.shuffleSplit(folds, numExamples, 0.5)
        trainSize = numExamples*0.5

        for i in range(folds):
            self.assertTrue((numpy.union1d(indices[i][0], indices[i][1]) == numpy.arange(numExamples)).all())
            self.assertTrue(indices[i][0].shape[0] == trainSize)

        indices = Sampling.shuffleSplit(folds, numExamples, 0.55)
开发者ID:charanpald,项目名称:sandbox,代码行数:17,代码来源:SamplingTest.py

示例8: testSampleUsers2

    def testSampleUsers2(self): 
        m = 10
        n = 15
        r = 5 
        u = 0.3
        w = 1-u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)

        k = X.nnz+100
        X2, userInds = Sampling.sampleUsers2(X, k)

        nptst.assert_array_equal(X.toarray(), X2.toarray())
        
        #Test pruning of cols 
        k = 500
        m = 100
        n = 500
        u = 0.1
        w = 1 - u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)
        numpy.random.seed(21)
        X2, userInds = Sampling.sampleUsers2(X, k, prune=True)
        nnz1 = X2.nnz
        self.assertTrue((X2.sum(0)!=0).all())

        numpy.random.seed(21)
        X2, userInds = Sampling.sampleUsers2(X, k, prune=False)
        nnz2 = X2.nnz
        self.assertEquals(nnz1, nnz2)

        numRuns = 50
        for i in range(numRuns): 
            m = numpy.random.randint(10, 100)
            n = numpy.random.randint(10, 100)
            k = 500

            X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)

            X2, userInds = Sampling.sampleUsers2(X, k)
            

            self.assertTrue((X.dot(X.T)!=numpy.zeros((m, m)).all()))
            self.assertTrue((X2.toarray() == X.toarray()[userInds, :]).all())
            self.assertEquals(X.toarray()[userInds, :].nonzero()[0].shape[0], X2.nnz)
开发者ID:charanpald,项目名称:sandbox,代码行数:44,代码来源:SamplingTest.py

示例9: cvPrune

 def cvPrune(self, validX, validY): 
     """
     We do something like reduced error pruning but we use cross validation 
     to decide which nodes to prune. 
     """
     
     #First set the value of the vertices using the training set. 
     #Reset all alphas to zero 
     inds = Sampling.crossValidation(self.folds, validX.shape[0])
     
     for i in self.tree.getAllVertexIds(): 
         self.tree.getVertex(i).setAlpha(0.0)
         self.tree.getVertex(i).setTestError(0.0)
     
     for trainInds, testInds in inds:             
         rootId = (0,)
         root = self.tree.getVertex(rootId)
         root.setTrainInds(trainInds)
         root.setTestInds(testInds)
         root.tempValue = numpy.mean(validY[trainInds])
         
         nodeStack = [(rootId, root.tempValue)]
         
         while len(nodeStack) != 0: 
             (nodeId, value) = nodeStack.pop()
             node = self.tree.getVertex(nodeId)
             tempTrainInds = node.getTrainInds()
             tempTestInds = node.getTestInds()
             node.setTestError(numpy.sum((validY[tempTestInds] - node.tempValue)**2) + node.getTestError())
             childIds = [self.getLeftChildId(nodeId), self.getRightChildId(nodeId)]
             
             for childId in childIds:                 
                 if self.tree.vertexExists(childId): 
                     child = self.tree.getVertex(childId)
                     
                     if childId[-1] == 0: 
                         childInds = validX[tempTrainInds, node.getFeatureInd()] < node.getThreshold()
                     else: 
                         childInds = validX[tempTrainInds, node.getFeatureInd()] >= node.getThreshold()
                     
                     if childInds.sum() !=0:   
                         value = numpy.mean(validY[tempTrainInds[childInds]])
                         
                     child.tempValue = value 
                     child.setTrainInds(tempTrainInds[childInds])
                     nodeStack.append((childId, value))
                     
                     if childId[-1] == 0: 
                         childInds = validX[tempTestInds, node.getFeatureInd()] < node.getThreshold() 
                     else: 
                         childInds = validX[tempTestInds, node.getFeatureInd()] >= node.getThreshold()  
                      
                     child.setTestInds(tempTestInds[childInds])
     
     self.computeAlphas()
     self.prune()
开发者ID:charanpald,项目名称:sandbox,代码行数:56,代码来源:DecisionTreeLearner.py

示例10: cvModelSelection

    def cvModelSelection(self, graph, paramList, paramFunc, folds, errorFunc):
        """
        ParamList is a list of lists of parameters and paramFunc
        is a list of the corresponding functions to call with the parameters
        as arguments. Note that a parameter can also be a tuple which is expanded
        out before the function is called. 

        e.g.
        paramList = [[1, 2], [2, 1], [12, 1]]
        paramFunc = [predictor.setC, predictor.setD]
        """

        inds = Sampling.crossValidation(folds, graph.getNumEdges())
        errors = numpy.zeros((len(paramList), folds))
        allEdges = graph.getAllEdges()

        for i in range(len(paramList)):
            paramSet = paramList[i]
            logging.debug("Using paramSet=" + str(paramSet))

            for j in range(len(paramSet)):
                if type(paramSet[j]) == tuple:
                    paramFunc[j](*paramSet[j])
                else: 
                    paramFunc[j](paramSet[j])

            predY = numpy.zeros(0)
            y = numpy.zeros(0)
            j = 0 

            for (trainInds, testInds) in inds:
                trainEdges = allEdges[trainInds, :]
                testEdges = allEdges[testInds, :]

                trainGraph = SparseGraph(graph.getVertexList(), graph.isUndirected())
                trainGraph.addEdges(trainEdges, graph.getEdgeValues(trainEdges))

                testGraph = SparseGraph(graph.getVertexList(), graph.isUndirected())
                testGraph.addEdges(testEdges, graph.getEdgeValues(testEdges))

                self.learnModel(trainGraph)

                predY = self.predictEdges(testGraph, testGraph.getAllEdges())
                y = testGraph.getEdgeValues(testGraph.getAllEdges())
                #Note that the order the edges is different in testGraphs as
                #opposed to graph when calling getAllEdges()

                errors[i, j] = errorFunc(y, predY)
                j = j+1 

            logging.info("Error of current fold: " + str(numpy.mean(errors[i, :])))

        meanErrors = numpy.mean(errors, 1)
        strErrors = numpy.std(errors, 1)

        return meanErrors, strErrors
开发者ID:charanpald,项目名称:sandbox,代码行数:56,代码来源:AbstractEdgeLabelPredictor.py

示例11: modelSelect

    def modelSelect(self, X, colProbs=None): 
        """
        Perform model selection on X and return the best parameters. 
        """
        m, n = X.shape
        #cvInds = Sampling.randCrossValidation(self.folds, X.nnz)
        trainTestXs = Sampling.shuffleSplitRows(X, self.folds, self.validationSize, colProbs=colProbs)
        testMetrics = numpy.zeros((self.ks.shape[0], self.lmbdas.shape[0], len(trainTestXs)))
        
        if self.metric == "mrr":
            evaluationMethod = computeTestMRR
        elif self.metric == "f1": 
            evaluationMethod = computeTestF1
        else: 
            raise ValueError("Invalid metric: " + self.metric)        
        
        logging.debug("Performing model selection")
        paramList = []        
        
        for i, k in enumerate(self.ks): 
            for j, lmbda in enumerate(self.lmbdas): 
                for icv, (trainX, testX) in enumerate(trainTestXs):                
                    learner = self.copy()
                    learner.k = k
                    learner.lmbda = lmbda 
                
                    paramList.append((trainX.toScipyCsr(), testX.toScipyCsr(), learner))
            
        if self.numProcesses != 1: 
            pool = multiprocessing.Pool(processes=self.numProcesses, maxtasksperchild=100)
            resultsIterator = pool.imap(evaluationMethod, paramList, self.chunkSize)
        else: 
            import itertools
            resultsIterator = itertools.imap(evaluationMethod, paramList)
        
        for i, k in enumerate(self.ks):
            for j, lmbda in enumerate(self.lmbdas):
                for icv in range(len(trainTestXs)):             
                    testMetrics[i, j, icv] = resultsIterator.next()
        
        if self.numProcesses != 1: 
            pool.terminate()
            
        meanTestMetrics= numpy.mean(testMetrics, 2)
        stdTestMetrics = numpy.std(testMetrics, 2)
        
        logging.debug("ks=" + str(self.ks)) 
        logging.debug("lmbdas=" + str(self.lmbdas)) 
        logging.debug("Mean metrics=" + str(meanTestMetrics))
        
        self.k = self.ks[numpy.unravel_index(numpy.argmax(meanTestMetrics), meanTestMetrics.shape)[0]]
        self.lmbda = self.lmbdas[numpy.unravel_index(numpy.argmax(meanTestMetrics), meanTestMetrics.shape)[1]]

        logging.debug("Model parameters: k=" + str(self.k) + " lmbda=" + str(self.lmbda))
         
        return meanTestMetrics, stdTestMetrics
开发者ID:charanpald,项目名称:sandbox,代码行数:56,代码来源:WeightedMf.py

示例12: testBootstrap2

    def testBootstrap2(self):
        numExamples = 10
        folds = 2

        indices = Sampling.bootstrap2(folds, numExamples)

        for i in range(folds):
            self.assertEquals(indices[i][0].shape[0], numExamples)
            self.assertTrue(indices[i][1].shape[0] < numExamples)
            self.assertTrue((numpy.union1d(indices[0][0], indices[0][1]) == numpy.arange(numExamples)).all())
开发者ID:charanpald,项目名称:sandbox,代码行数:10,代码来源:SamplingTest.py

示例13: testParallelPen

    def testParallelPen(self): 
        folds = 3
        Cv = numpy.array([4.0])
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")

        paramDict = {} 
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()            
        
        resultsList = svm.parallelPen(self.X, self.y, idx, paramDict, Cv)
        
        tol = 10**-6
        bestError = 1
        trainErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        penalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                penalty = 0
                for trainInds, testInds in idx:
                    trainX = self.X[trainInds, :]
                    trainY = self.y[trainInds]

                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(self.X)
                    predTrainY = svm.predict(trainX)
                    penalty += Evaluator.binaryError(predY, self.y) - Evaluator.binaryError(predTrainY, trainY)

                penalty = penalty*Cv[0]/len(idx)
                svm.learnModel(self.X, self.y)
                predY = svm.predict(self.X)
                trainErrors2[i, j] = Evaluator.binaryError(predY, self.y)
                penalties2[i, j] = penalty
                meanErrors2[i, j] = Evaluator.binaryError(predY, self.y) + penalty

                if meanErrors2[i, j] < bestError:
                    bestC = C
                    bestGamma = gamma
                    bestError = meanErrors2[i, j]

        bestSVM, trainErrors, currentPenalties = resultsList[0]
        meanErrors = trainErrors + currentPenalties

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)
        self.assertTrue(numpy.linalg.norm(trainErrors2.T - trainErrors) < tol)
        self.assertTrue(numpy.linalg.norm(penalties2.T - currentPenalties) < tol)
开发者ID:charanpald,项目名称:sandbox,代码行数:55,代码来源:LibSVMTest.py

示例14: generateLearner

    def generateLearner(self, X, y):
        """
        Train using the given examples and labels, and use model selection to
        find the best parameters.
        """
        if numpy.unique(y).shape[0] != 2:
            print(y)
            raise ValueError("Can only operate on binary data")

        #Do model selection first 
        if self.sampleSize == None: 
            idx = Sampling.crossValidation(self.folds, X.shape[0])
            learner, meanErrors = self.parallelModelSelect(X, y, idx, self.paramDict)
        else: 
            idx = Sampling.crossValidation(self.folds, self.sampleSize)
            inds = numpy.random.permutation(X.shape[0])[0:self.sampleSize]
            learner, meanErrors = self.parallelModelSelect(X[inds, :], y[inds], idx, self.paramDict)
            learner = self.getBestLearner(meanErrors, self.paramDict, X, y)
        
        return learner
开发者ID:charanpald,项目名称:sandbox,代码行数:20,代码来源:SVMLeafRank.py

示例15: evaluateCv

    def evaluateCv(self, X, y, folds, metricMethod=Evaluator.binaryError):
        """
        Compute the cross validation according to a given metric. 
        """
        Parameter.checkInt(folds, 2, float('inf'))
        idx = Sampling.crossValidation(folds, y.shape[0])
        metrics = AbstractPredictor.evaluateLearn(X, y, idx, self.learnModel, self.predict, metricMethod)

        mean = numpy.mean(metrics, 0)
        var = numpy.var(metrics, 0)

        return (mean, var)
开发者ID:charanpald,项目名称:sandbox,代码行数:12,代码来源:AbstractPredictor.py


注:本文中的sandbox.util.Sampling.Sampling类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。