本文整理汇总了Python中sandbox.util.Sampling.Sampling.crossValidation方法的典型用法代码示例。如果您正苦于以下问题:Python Sampling.crossValidation方法的具体用法?Python Sampling.crossValidation怎么用?Python Sampling.crossValidation使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sandbox.util.Sampling.Sampling
的用法示例。
在下文中一共展示了Sampling.crossValidation方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testCrossValidation
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import crossValidation [as 别名]
def testCrossValidation(self):
numExamples = 10
folds = 2
indices = Sampling.crossValidation(folds, numExamples)
self.assertEquals((list(indices[0][0]), list(indices[0][1])), ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4]))
self.assertEquals((list(indices[1][0]), list(indices[1][1])), ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]))
indices = Sampling.crossValidation(3, numExamples)
self.assertEquals((list(indices[0][0]), list(indices[0][1])), ([3, 4, 5, 6, 7, 8, 9], [0, 1, 2]))
self.assertEquals((list(indices[1][0]), list(indices[1][1])), ([0, 1, 2, 6, 7, 8, 9], [3, 4, 5]))
self.assertEquals((list(indices[2][0]), list(indices[2][1])), ([0, 1, 2, 3, 4, 5], [6, 7, 8, 9]))
indices = Sampling.crossValidation(4, numExamples)
self.assertEquals((list(indices[0][0]), list(indices[0][1])), ([2, 3, 4, 5, 6, 7, 8, 9], [0, 1]))
self.assertEquals((list(indices[1][0]), list(indices[1][1])), ([0, 1, 5, 6, 7, 8, 9], [2, 3, 4]))
self.assertEquals((list(indices[2][0]), list(indices[2][1])), ([0, 1, 2, 3, 4, 7, 8, 9], [5, 6]))
self.assertEquals((list(indices[3][0]), list(indices[3][1])), ([0, 1, 2, 3, 4, 5, 6], [7, 8, 9]))
indices = Sampling.crossValidation(numExamples, numExamples)
self.assertEquals((list(indices[0][0]), list(indices[0][1])), ([1, 2, 3, 4, 5, 6, 7, 8, 9], [0]))
self.assertEquals((list(indices[1][0]), list(indices[1][1])), ([0, 2, 3, 4, 5, 6, 7, 8, 9], [1]))
self.assertEquals((list(indices[2][0]), list(indices[2][1])), ([0, 1, 3, 4, 5, 6, 7, 8, 9], [2]))
self.assertEquals((list(indices[3][0]), list(indices[3][1])), ([0, 1, 2, 4, 5, 6, 7, 8, 9], [3]))
self.assertEquals((list(indices[4][0]), list(indices[4][1])), ([0, 1, 2, 3, 5, 6, 7, 8, 9], [4]))
self.assertRaises(ValueError, Sampling.crossValidation, numExamples+1, numExamples)
self.assertRaises(ValueError, Sampling.crossValidation, 0, numExamples)
self.assertRaises(ValueError, Sampling.crossValidation, -1, numExamples)
self.assertRaises(ValueError, Sampling.crossValidation, folds, 1)
示例2: testParallelPen
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import crossValidation [as 别名]
def testParallelPen(self):
#Check if penalisation == inf when treeSize < gamma
numExamples = 100
X, y = data.make_regression(numExamples)
learner = DecisionTreeLearner(pruneType="CART", maxDepth=10, minSplit=2)
paramDict = {}
paramDict["setGamma"] = numpy.array(numpy.round(2**numpy.arange(1, 10, 0.5)-1), dtype=numpy.int)
folds = 3
alpha = 1.0
Cvs = numpy.array([(folds-1)*alpha])
idx = Sampling.crossValidation(folds, X.shape[0])
resultsList = learner.parallelPen(X, y, idx, paramDict, Cvs)
learner, trainErrors, currentPenalties = resultsList[0]
learner.setGamma(2**10)
treeSize = 0
#Let's work out the size of the unpruned tree
for trainInds, testInds in idx:
trainX = X[trainInds, :]
trainY = y[trainInds]
learner.learnModel(trainX, trainY)
treeSize += learner.tree.size
treeSize /= float(folds)
self.assertTrue(numpy.isinf(currentPenalties[paramDict["setGamma"]>treeSize]).all())
self.assertTrue(not numpy.isinf(currentPenalties[paramDict["setGamma"]<treeSize]).all())
示例3: cvPrune
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import crossValidation [as 别名]
def cvPrune(self, validX, validY):
"""
We do something like reduced error pruning but we use cross validation
to decide which nodes to prune.
"""
#First set the value of the vertices using the training set.
#Reset all alphas to zero
inds = Sampling.crossValidation(self.folds, validX.shape[0])
for i in self.tree.getAllVertexIds():
self.tree.getVertex(i).setAlpha(0.0)
self.tree.getVertex(i).setTestError(0.0)
for trainInds, testInds in inds:
rootId = (0,)
root = self.tree.getVertex(rootId)
root.setTrainInds(trainInds)
root.setTestInds(testInds)
root.tempValue = numpy.mean(validY[trainInds])
nodeStack = [(rootId, root.tempValue)]
while len(nodeStack) != 0:
(nodeId, value) = nodeStack.pop()
node = self.tree.getVertex(nodeId)
tempTrainInds = node.getTrainInds()
tempTestInds = node.getTestInds()
node.setTestError(numpy.sum((validY[tempTestInds] - node.tempValue)**2) + node.getTestError())
childIds = [self.getLeftChildId(nodeId), self.getRightChildId(nodeId)]
for childId in childIds:
if self.tree.vertexExists(childId):
child = self.tree.getVertex(childId)
if childId[-1] == 0:
childInds = validX[tempTrainInds, node.getFeatureInd()] < node.getThreshold()
else:
childInds = validX[tempTrainInds, node.getFeatureInd()] >= node.getThreshold()
if childInds.sum() !=0:
value = numpy.mean(validY[tempTrainInds[childInds]])
child.tempValue = value
child.setTrainInds(tempTrainInds[childInds])
nodeStack.append((childId, value))
if childId[-1] == 0:
childInds = validX[tempTestInds, node.getFeatureInd()] < node.getThreshold()
else:
childInds = validX[tempTestInds, node.getFeatureInd()] >= node.getThreshold()
child.setTestInds(tempTestInds[childInds])
self.computeAlphas()
self.prune()
示例4: cvModelSelection
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import crossValidation [as 别名]
def cvModelSelection(self, graph, paramList, paramFunc, folds, errorFunc):
"""
ParamList is a list of lists of parameters and paramFunc
is a list of the corresponding functions to call with the parameters
as arguments. Note that a parameter can also be a tuple which is expanded
out before the function is called.
e.g.
paramList = [[1, 2], [2, 1], [12, 1]]
paramFunc = [predictor.setC, predictor.setD]
"""
inds = Sampling.crossValidation(folds, graph.getNumEdges())
errors = numpy.zeros((len(paramList), folds))
allEdges = graph.getAllEdges()
for i in range(len(paramList)):
paramSet = paramList[i]
logging.debug("Using paramSet=" + str(paramSet))
for j in range(len(paramSet)):
if type(paramSet[j]) == tuple:
paramFunc[j](*paramSet[j])
else:
paramFunc[j](paramSet[j])
predY = numpy.zeros(0)
y = numpy.zeros(0)
j = 0
for (trainInds, testInds) in inds:
trainEdges = allEdges[trainInds, :]
testEdges = allEdges[testInds, :]
trainGraph = SparseGraph(graph.getVertexList(), graph.isUndirected())
trainGraph.addEdges(trainEdges, graph.getEdgeValues(trainEdges))
testGraph = SparseGraph(graph.getVertexList(), graph.isUndirected())
testGraph.addEdges(testEdges, graph.getEdgeValues(testEdges))
self.learnModel(trainGraph)
predY = self.predictEdges(testGraph, testGraph.getAllEdges())
y = testGraph.getEdgeValues(testGraph.getAllEdges())
#Note that the order the edges is different in testGraphs as
#opposed to graph when calling getAllEdges()
errors[i, j] = errorFunc(y, predY)
j = j+1
logging.info("Error of current fold: " + str(numpy.mean(errors[i, :])))
meanErrors = numpy.mean(errors, 1)
strErrors = numpy.std(errors, 1)
return meanErrors, strErrors
示例5: testParallelPen
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import crossValidation [as 别名]
def testParallelPen(self):
folds = 3
Cv = numpy.array([4.0])
idx = Sampling.crossValidation(folds, self.X.shape[0])
svm = self.svm
svm.setKernel("gaussian")
paramDict = {}
paramDict["setC"] = svm.getCs()
paramDict["setGamma"] = svm.getGammas()
resultsList = svm.parallelPen(self.X, self.y, idx, paramDict, Cv)
tol = 10**-6
bestError = 1
trainErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
penalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
for i in range(svm.Cs.shape[0]):
C = svm.Cs[i]
for j in range(svm.gammas.shape[0]):
gamma = svm.gammas[j]
penalty = 0
for trainInds, testInds in idx:
trainX = self.X[trainInds, :]
trainY = self.y[trainInds]
svm.setGamma(gamma)
svm.setC(C)
svm.learnModel(trainX, trainY)
predY = svm.predict(self.X)
predTrainY = svm.predict(trainX)
penalty += Evaluator.binaryError(predY, self.y) - Evaluator.binaryError(predTrainY, trainY)
penalty = penalty*Cv[0]/len(idx)
svm.learnModel(self.X, self.y)
predY = svm.predict(self.X)
trainErrors2[i, j] = Evaluator.binaryError(predY, self.y)
penalties2[i, j] = penalty
meanErrors2[i, j] = Evaluator.binaryError(predY, self.y) + penalty
if meanErrors2[i, j] < bestError:
bestC = C
bestGamma = gamma
bestError = meanErrors2[i, j]
bestSVM, trainErrors, currentPenalties = resultsList[0]
meanErrors = trainErrors + currentPenalties
self.assertEquals(bestC, bestSVM.getC())
self.assertEquals(bestGamma, bestSVM.getGamma())
self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)
self.assertTrue(numpy.linalg.norm(trainErrors2.T - trainErrors) < tol)
self.assertTrue(numpy.linalg.norm(penalties2.T - currentPenalties) < tol)
示例6: generateLearner
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import crossValidation [as 别名]
def generateLearner(self, X, y):
"""
Train using the given examples and labels, and use model selection to
find the best parameters.
"""
if numpy.unique(y).shape[0] != 2:
print(y)
raise ValueError("Can only operate on binary data")
#Do model selection first
if self.sampleSize == None:
idx = Sampling.crossValidation(self.folds, X.shape[0])
learner, meanErrors = self.parallelModelSelect(X, y, idx, self.paramDict)
else:
idx = Sampling.crossValidation(self.folds, self.sampleSize)
inds = numpy.random.permutation(X.shape[0])[0:self.sampleSize]
learner, meanErrors = self.parallelModelSelect(X[inds, :], y[inds], idx, self.paramDict)
learner = self.getBestLearner(meanErrors, self.paramDict, X, y)
return learner
示例7: evaluateCv
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import crossValidation [as 别名]
def evaluateCv(self, X, y, folds, metricMethod=Evaluator.binaryError):
"""
Compute the cross validation according to a given metric.
"""
Parameter.checkInt(folds, 2, float('inf'))
idx = Sampling.crossValidation(folds, y.shape[0])
metrics = AbstractPredictor.evaluateLearn(X, y, idx, self.learnModel, self.predict, metricMethod)
mean = numpy.mean(metrics, 0)
var = numpy.var(metrics, 0)
return (mean, var)
示例8: testParallelVfPenRbf2
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import crossValidation [as 别名]
def testParallelVfPenRbf2(self):
#Test support vector regression
folds = 3
Cv = numpy.array([4.0])
idx = Sampling.crossValidation(folds, self.X.shape[0])
svm = self.svm
svm.setKernel("gaussian")
svm.setSvmType("Epsilon_SVR")
resultsList = svm.parallelVfPenRbf(self.X, self.y, idx, Cv, type="Epsilon_SVR")
tol = 10**-6
bestError = 100
meanErrors2 = numpy.zeros((svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))
for i in range(svm.Cs.shape[0]):
C = svm.Cs[i]
for j in range(svm.gammas.shape[0]):
gamma = svm.gammas[j]
for k in range(svm.epsilons.shape[0]):
epsilon = svm.epsilons[k]
penalty = 0
for trainInds, testInds in idx:
trainX = self.X[trainInds, :]
trainY = self.y[trainInds]
svm.setGamma(gamma)
svm.setC(C)
svm.setEpsilon(epsilon)
svm.learnModel(trainX, trainY)
predY = svm.predict(self.X)
predTrainY = svm.predict(trainX)
penalty += svm.getMetricMethod()(predY, self.y) - svm.getMetricMethod()(predTrainY, trainY)
penalty = penalty*Cv[0]/len(idx)
svm.learnModel(self.X, self.y)
predY = svm.predict(self.X)
meanErrors2[j, k, i] = svm.getMetricMethod()(predY, self.y) + penalty
if meanErrors2[j, k, i] < bestError:
bestC = C
bestGamma = gamma
bestEpsilon = epsilon
bestError = meanErrors2[j, k, i]
bestSVM, trainErrors, currentPenalties = resultsList[0]
meanErrors = trainErrors + currentPenalties
self.assertEquals(bestC, bestSVM.getC())
self.assertEquals(bestGamma, bestSVM.getGamma())
self.assertEquals(bestEpsilon, bestSVM.getEpsilon())
self.assertTrue(numpy.linalg.norm(meanErrors2 - meanErrors) < tol)
示例9: testParallelPenaltyGrid
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import crossValidation [as 别名]
def testParallelPenaltyGrid(self):
folds = 3
idx = Sampling.crossValidation(folds, self.X.shape[0])
randomForest = RandomForest()
trainX = self.X[0:40, :]
trainY = self.y[0:40]
paramDict = {}
paramDict["setMinSplit"] = randomForest.getMinSplits()
paramDict["setMaxDepth"] = randomForest.getMaxDepths()
idealPenalties = randomForest.parallelPenaltyGrid(trainX, trainY, self.X, self.y, paramDict)
示例10: testParallelPenaltyGrid
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import crossValidation [as 别名]
def testParallelPenaltyGrid(self):
folds = 3
idx = Sampling.crossValidation(folds, self.X.shape[0])
decisionTree = DecisionTree()
bestLearner, meanErrors = decisionTree.parallelVfcv(self.X, self.y, idx)
trainX = self.X[0:40, :]
trainY = self.y[0:40]
paramDict = {}
paramDict["setMinSplit"] = decisionTree.getMinSplits()
paramDict["setMaxDepth"] = decisionTree.getMaxDepths()
idealPenalties = decisionTree.parallelPenaltyGrid(trainX, trainY, self.X, self.y, paramDict)
示例11: testParallelVfcvRbf2
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import crossValidation [as 别名]
def testParallelVfcvRbf2(self):
#In this test we try SVM regression
folds = 3
idx = Sampling.crossValidation(folds, self.X.shape[0])
svm = self.svm
svm.setKernel("gaussian")
svm.setSvmType("Epsilon_SVR")
bestSVM, meanErrors = svm.parallelVfcvRbf(self.X, self.y, idx, type="Epsilon_SVR")
tol = 10**-6
bestError = 100
meanErrors2 = numpy.zeros((svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))
for i in range(svm.Cs.shape[0]):
C = svm.Cs[i]
for j in range(svm.gammas.shape[0]):
gamma = svm.gammas[j]
for k in range(svm.epsilons.shape[0]):
epsilon = svm.epsilons[k]
error = 0
for trainInds, testInds in idx:
trainX = self.X[trainInds, :]
trainY = self.y[trainInds]
testX = self.X[testInds, :]
testY = self.y[testInds]
svm.setGamma(gamma)
svm.setC(C)
svm.setEpsilon(epsilon)
svm.learnModel(trainX, trainY)
predY = svm.predict(testX)
error += svm.getMetricMethod()(predY, testY)
meanErrors2[j, k, i] = error/len(idx)
if error < bestError:
bestC = C
bestGamma = gamma
bestError = error
bestEpsilon = epsilon
self.assertEquals(bestC, bestSVM.getC())
self.assertEquals(bestGamma, bestSVM.getGamma())
self.assertEquals(bestEpsilon, bestSVM.getEpsilon())
self.assertTrue(numpy.linalg.norm(meanErrors2 - meanErrors) < tol)
示例12: testParallelModelSelect
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import crossValidation [as 别名]
def testParallelModelSelect(self):
folds = 3
idx = Sampling.crossValidation(folds, self.X.shape[0])
svm = self.svm
svm.setKernel("gaussian")
paramDict = {}
paramDict["setC"] = svm.getCs()
paramDict["setGamma"] = svm.getGammas()
bestSVM, meanErrors = svm.parallelModelSelect(self.X, self.y, idx, paramDict)
tol = 10**-6
bestError = 1
meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
print("Computing real grid")
for i in range(svm.Cs.shape[0]):
C = svm.Cs[i]
for j in range(svm.gammas.shape[0]):
gamma = svm.gammas[j]
error = 0
for trainInds, testInds in idx:
trainX = self.X[trainInds, :]
trainY = self.y[trainInds]
testX = self.X[testInds, :]
testY = self.y[testInds]
svm.setGamma(gamma)
svm.setC(C)
svm.learnModel(trainX, trainY)
predY = svm.predict(testX)
error += Evaluator.binaryError(predY, testY)
meanErrors2[i, j] = error/len(idx)
if error < bestError:
bestC = C
bestGamma = gamma
bestError = error
self.assertEquals(bestC, bestSVM.getC())
self.assertEquals(bestGamma, bestSVM.getGamma())
self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)
示例13: testGetBestLearner
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import crossValidation [as 别名]
def testGetBestLearner(self):
svm = self.svm
paramDict = {}
paramDict["setC"] = svm.getCs()
paramDict["setGamma"] = svm.getGammas()
errors = numpy.random.rand(svm.getCs().shape[0], svm.getGammas().shape[0])
folds = 5
idx = Sampling.crossValidation(folds, self.X.shape[0])
svm.normModelSelect = True
svm.setKernel("gaussian")
learner = svm.getBestLearner(errors, paramDict, self.X, self.y, idx)
bestC = learner.getC()
#Find the best norm
bestInds = numpy.unravel_index(numpy.argmin(errors), errors.shape)
learner.setC(svm.getCs()[bestInds[0]])
learner.setGamma(svm.getGammas()[bestInds[1]])
norms = []
for trainInds, testInds in idx:
validX = self.X[trainInds, :]
validY = self.y[trainInds]
learner.learnModel(validX, validY)
norms.append(learner.weightNorm())
bestNorm = numpy.array(norms).mean()
norms = numpy.zeros(paramDict["setC"].shape[0])
for i, C in enumerate(paramDict["setC"]):
learner.setC(C)
learner.learnModel(self.X, self.y)
norms[i] = learner.weightNorm()
bestC2 = paramDict["setC"][numpy.abs(norms-bestNorm).argmin()]
self.assertEquals(bestC, bestC2)
示例14: run
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import crossValidation [as 别名]
def run():
for i in range(2):
print("Iteration " + str(i))
idx = Sampling.crossValidation(self.folds, numExamples)
learner.parallelPen(X, Y, idx, self.paramDict, Cvs)
示例15: sampleMethod
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import crossValidation [as 别名]
idx = sampleMethod(folds, validY.shape[0])
svmGridResults = learner.parallelPen(validX, validY, idx, paramDict, Cvs)
for result in svmGridResults:
learner, trainErrors, currentPenalties = result
print(numpy.mean(trainErrors), numpy.mean(currentPenalties))
"""
#Figure out why the penalty is increasing
X = trainX
y = trainY
for i in range(foldsSet.shape[0]):
folds = foldsSet[i]
idx = Sampling.crossValidation(folds, validX.shape[0])
penalty = 0
fullError = 0
trainError = 0
learner.learnModel(validX, validY)
predY = learner.predict(X)
predValidY = learner.predict(validX)
idealPenalty = Evaluator.rootMeanSqError(predY, y) - Evaluator.rootMeanSqError(predValidY, validY)
for trainInds, testInds in idx:
trainX = validX[trainInds, :]
trainY = validY[trainInds]
#learner.setGamma(gamma)