本文整理汇总了Python中sandbox.util.Sampling.Sampling.randCrossValidation方法的典型用法代码示例。如果您正苦于以下问题:Python Sampling.randCrossValidation方法的具体用法?Python Sampling.randCrossValidation怎么用?Python Sampling.randCrossValidation使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sandbox.util.Sampling.Sampling
的用法示例。
在下文中一共展示了Sampling.randCrossValidation方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: profileModelSelect
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
def profileModelSelect(self):
lmbdas = numpy.linspace(1.0, 0.01, 5)
softImpute = IterativeSoftImpute(k=500)
folds = 5
cvInds = Sampling.randCrossValidation(folds, self.X.nnz)
ProfileUtils.profile('softImpute.modelSelect(self.X, lmbdas, cvInds)', globals(), locals())
示例2: testRandCrossValidation
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
def testRandCrossValidation(self):
numExamples = 10
folds = 3
indices = Sampling.randCrossValidation(folds, numExamples)
for i in range(folds):
self.assertTrue((numpy.union1d(indices[i][0], indices[i][1]) == numpy.arange(numExamples)).all())
示例3: modelSelect
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
def modelSelect(self, X):
"""
Perform model selection on X and return the best parameters.
"""
m, n = X.shape
cvInds = Sampling.randCrossValidation(self.folds, X.nnz)
localAucs = numpy.zeros((self.ks.shape[0], self.lmbdas.shape[0], len(cvInds)))
logging.debug("Performing model selection")
paramList = []
for icv, (trainInds, testInds) in enumerate(cvInds):
Util.printIteration(icv, 1, self.folds, "Fold: ")
trainX = SparseUtils.submatrix(X, trainInds)
testX = SparseUtils.submatrix(X, testInds)
testOmegaList = SparseUtils.getOmegaList(testX)
for i, k in enumerate(self.ks):
maxLocalAuc = self.copy()
maxLocalAuc.k = k
paramList.append((trainX, testX, testOmegaList, maxLocalAuc))
pool = multiprocessing.Pool(processes=self.numProcesses, maxtasksperchild=100)
resultsIterator = pool.imap(localAucsLmbdas, paramList, self.chunkSize)
#import itertools
#resultsIterator = itertools.imap(localAucsLmbdas, paramList)
for icv, (trainInds, testInds) in enumerate(cvInds):
for i, k in enumerate(self.ks):
tempAucs = resultsIterator.next()
localAucs[i, :, icv] = tempAucs
pool.terminate()
meanLocalAucs = numpy.mean(localAucs, 2)
stdLocalAucs = numpy.std(localAucs, 2)
logging.debug(meanLocalAucs)
k = self.ks[numpy.unravel_index(numpy.argmax(meanLocalAucs), meanLocalAucs.shape)[0]]
lmbda = self.lmbdas[numpy.unravel_index(numpy.argmax(meanLocalAucs), meanLocalAucs.shape)[1]]
logging.debug("Model parameters: k=" + str(k) + " lmbda=" + str(lmbda))
self.k = k
self.lmbda = lmbda
return meanLocalAucs, stdLocalAucs
示例4: testParallelModelSelect
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
def testParallelModelSelect(self):
X = scipy.sparse.rand(10, 10, 0.5)
X = X.tocsr()
numExamples = X.getnnz()
paramDict = {}
paramDict["setRank"] = numpy.array([5, 10, 20])
folds = 3
idx = Sampling.randCrossValidation(folds, numExamples)
method = "lsnmf"
nimfaFactorise = NimfaFactorise(method)
learner, meanErrors = nimfaFactorise.parallelModelSelect(X, idx, paramDict)
示例5: modelSelect
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
def modelSelect(self, X):
"""
Perform model selection on X and return the best parameters.
"""
m, n = X.shape
cvInds = Sampling.randCrossValidation(self.folds, X.nnz)
precisions = numpy.zeros((self.ks.shape[0], len(cvInds)))
logging.debug("Performing model selection")
paramList = []
for icv, (trainInds, testInds) in enumerate(cvInds):
Util.printIteration(icv, 1, self.folds, "Fold: ")
trainX = SparseUtils.submatrix(X, trainInds)
testX = SparseUtils.submatrix(X, testInds)
testOmegaList = SparseUtils.getOmegaList(testX)
for i, k in enumerate(self.ks):
learner = self.copy()
learner.k = k
paramList.append((trainX, testX, testOmegaList, learner))
#pool = multiprocessing.Pool(processes=self.numProcesses, maxtasksperchild=100)
#resultsIterator = pool.imap(computePrecision, paramList, self.chunkSize)
import itertools
resultsIterator = itertools.imap(computePrecision, paramList)
for icv, (trainInds, testInds) in enumerate(cvInds):
for i, k in enumerate(self.ks):
tempPrecision = resultsIterator.next()
precisions[i, icv] = tempPrecision
#pool.terminate()
meanPrecisions = numpy.mean(precisions, 1)
stdPrecisions = numpy.std(precisions, 1)
logging.debug(meanPrecisions)
k = self.ks[numpy.argmax(meanPrecisions)]
logging.debug("Model parameters: k=" + str(k))
self.k = k
return meanPrecisions, stdPrecisions
示例6: testModelSelect
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
def testModelSelect(self):
lmbda = 0.1
shape = (20, 20)
r = 20
numInds = 100
noise = 0.2
X = ExpSU.SparseUtils.generateSparseLowRank(shape, r, numInds, noise)
U, s, V = numpy.linalg.svd(X.todense())
k = 15
iterativeSoftImpute = IterativeSoftImpute(lmbda, k=None, svdAlg="propack", updateAlg="zero")
iterativeSoftImpute.numProcesses = 1
rhos = numpy.linspace(0.5, 0.001, 20)
ks = numpy.array([k], numpy.int)
folds = 3
cvInds = Sampling.randCrossValidation(folds, X.nnz)
meanTestErrors, meanTrainErrors = iterativeSoftImpute.modelSelect(X, rhos, ks, cvInds)
#Now do model selection manually
(rowInds, colInds) = X.nonzero()
trainErrors = numpy.zeros((rhos.shape[0], len(cvInds)))
testErrors = numpy.zeros((rhos.shape[0], len(cvInds)))
for i, rho in enumerate(rhos):
for j, (trainInds, testInds) in enumerate(cvInds):
trainX = scipy.sparse.csc_matrix(X.shape)
testX = scipy.sparse.csc_matrix(X.shape)
for p in trainInds:
trainX[rowInds[p], colInds[p]] = X[rowInds[p], colInds[p]]
for p in testInds:
testX[rowInds[p], colInds[p]] = X[rowInds[p], colInds[p]]
softImpute = SoftImpute(numpy.array([rho]), k=ks[0])
ZList = [softImpute.learnModel(trainX, fullMatrices=False)]
predTrainX = softImpute.predict(ZList, trainX.nonzero())[0]
predX = softImpute.predict(ZList, testX.nonzero())[0]
testErrors[i, j] = MCEvaluator.rootMeanSqError(testX, predX)
trainErrors[i, j] = MCEvaluator.rootMeanSqError(trainX, predTrainX)
meanTestErrors2 = testErrors.mean(1)
meanTrainErrors2 = trainErrors.mean(1)
nptst.assert_array_almost_equal(meanTestErrors.ravel(), meanTestErrors2, 1)
示例7: not
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
if not (fileLock.isLocked() or fileLock.fileExists()) or overwrite:
fileLock.lock()
logging.debug(learner)
try:
#Do some recommendation
if type(learner) == IterativeSoftImpute:
trainX = X.toScipyCsc()
trainIterator = iter([trainX])
if modelSelect:
modelSelectX, userInds = Sampling.sampleUsers2(X, modelSelectSamples)
modelSelectX = modelSelectX.toScipyCsc()
cvInds = Sampling.randCrossValidation(folds, modelSelectX.nnz)
meanMetrics, stdMetrics = learner.modelSelect2(modelSelectX, rhosSi, ks, cvInds)
ZList = learner.learnModel(trainIterator)
U, s, V = ZList.next()
U = U*s
elif type(learner) == WeightedMf:
trainX = X.toScipyCsr()
if modelSelect:
modelSelectX, userInds = Sampling.sampleUsers2(X, modelSelectSamples)
modelSelectX = modelSelectX.toScipyCsc()
meanMetrics, stdMetrics = learner.modelSelect(modelSelectX)
learner.learnModel(trainX)
U = learner.U
示例8: modelSelect
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
def modelSelect(self, X, ks, lmbdas, gammas, nFolds, maxNTry=5):
"""
Choose parameters based on a single matrix X. We do cross validation
within, and set parameters according to the mean squared error.
Return nothing.
"""
logging.debug("Performing model selection")
# usefull
X = X.tocoo()
gc.collect()
nK = len(ks)
nLmbda = len(lmbdas)
nGamma = len(gammas)
nLG = nLmbda * nGamma
errors = scipy.zeros((nK, nLmbda, nGamma, nFolds))
# generate cross validation sets
cvInds = Sampling.randCrossValidation(nFolds, X.nnz)
# compute error for each fold / setting
for icv, (trainInds, testInds) in enumerate(cvInds):
Util.printIteration(icv, 1, nFolds, "Fold: ")
trainX = SparseUtils.submatrix(X, trainInds)
testX = SparseUtils.submatrix(X, testInds)
assert trainX.nnz == trainInds.shape[0]
assert testX.nnz == testInds.shape[0]
nptst.assert_array_almost_equal((testX+trainX).data, X.data)
paramList = []
for ik, k in enumerate(ks):
for ilmbda, lmbda in enumerate(lmbdas):
for igamma, gamma in enumerate(gammas):
paramList.append((trainX, testX, k, lmbda, gamma, maxNTry))
# ! Remark !
# we can parallelize the run of parameters easely.
# parallelize the run of cv-folds is not done as it is much more
# memory-consuming
# parallel version (copied from IteraticeSoftImpute, but not tested)
#pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()/2, maxtasksperchild=10)
#results = pool.imap(self.learnPredict, paramList)
#pool.terminate()
# non-parallel version
results = scipy.array(list(itertools.starmap(self.learnPredict, paramList)))
errors[:, :, :, icv] = scipy.array(results).reshape((nK, nLmbda, nGamma))
# compute cross validation error for each setting
errors[errors == float("inf")] = errors[errors != float("inf")].max()
errors[numpy.isnan(errors)] = numpy.max(errors[numpy.logical_not(numpy.isnan(errors))])
meanErrors = errors.mean(3)
stdErrors = errors.std(3)
logging.debug("Mean errors given (k, lambda, gamma):")
logging.debug(meanErrors)
logging.debug("... with standard deviation:")
logging.debug(stdErrors)
# keep the best
iMin = meanErrors.argmin()
kMin = ks[int(scipy.floor(iMin/(nLG)))]
lmbdaMin = lmbdas[int(scipy.floor((iMin%nLG)/nGamma))]
gammaMin = gammas[int(scipy.floor(iMin%nGamma))]
logging.debug("argmin: (k, lambda, gamma) = (" + str(kMin) + ", " + str(lmbdaMin) + ", " + str(gammaMin) + ")")
logging.debug("min = " + str(meanErrors[int(scipy.floor(iMin/(nLG))), int(scipy.floor((iMin%nLG)/nGamma)), int(scipy.floor(iMin%nGamma))]))
self.baseLearner.k = kMin
self.baseLearner.lmbda = lmbdaMin
self.baseLearner.gamma = gammaMin
return
示例9: runExperiment
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import randCrossValidation [as 别名]
def runExperiment(self, X):
"""
Run the selected ranking experiments and save results
"""
logging.debug("Splitting into train and test sets")
#Make sure different runs get the same train/test split
numpy.random.seed(21)
m, n = X.shape
#colProbs = (X.sum(0)+1)/float(m+1)
#colProbs = colProbs**-self.algoArgs.itemExp
#colProbs = numpy.ones(n)/float(n)
trainTestXs = Sampling.shuffleSplitRows(X, 1, self.algoArgs.testSize)
trainX, testX = trainTestXs[0]
logging.debug("Train X shape and nnz: " + str(trainX.shape) + " " + str(trainX.nnz))
logging.debug("Test X shape and nnz: " + str(testX.shape) + " " + str(testX.nnz))
#Have scipy versions of each array
trainXScipy = trainX.toScipyCsc()
testXScipy = testX.toScipyCsc()
if self.algoArgs.runSoftImpute:
logging.debug("Running soft impute")
resultsFileName = self.resultsDir + "ResultsSoftImpute.npz"
fileLock = FileLock(resultsFileName)
if not (fileLock.isLocked() or fileLock.fileExists()) or self.algoArgs.overwrite:
fileLock.lock()
logging.debug("Performing model selection, taking sample size " + str(self.algoArgs.modelSelectSamples))
modelSelectX, userInds = Sampling.sampleUsers2(trainXScipy, self.algoArgs.modelSelectSamples, prune=True)
try:
learner = IterativeSoftImpute(self.algoArgs.rhoSi, eps=self.algoArgs.epsSi, k=self.algoArgs.k, svdAlg=self.algoArgs.svdAlg, postProcess=self.algoArgs.postProcess, p=self.algoArgs.pSi, q=self.algoArgs.qSi)
learner.folds = self.algoArgs.folds
learner.metric = self.algoArgs.metric
learner.numProcesses = self.algoArgs.processes
learner.recommendSize = self.algoArgs.recommendSize
learner.validationSize = self.algoArgs.validationSize
if self.algoArgs.modelSelect:
cvInds = Sampling.randCrossValidation(self.algoArgs.folds, modelSelectX.nnz)
meanErrors, stdErrors = learner.modelSelect2(modelSelectX, self.algoArgs.rhosSi, self.algoArgs.ks, cvInds)
modelSelectFileName = resultsFileName.replace("Results", "ModelSelect")
numpy.savez(modelSelectFileName, meanErrors, stdErrors)
logging.debug("Saved model selection grid as " + modelSelectFileName)
logging.debug(learner)
self.recordResults(X, trainXScipy, testXScipy, learner, resultsFileName)
finally:
fileLock.unlock()
else:
logging.debug("File is locked or already computed: " + resultsFileName)
if self.algoArgs.runMaxLocalAuc:
logging.debug("Running max local AUC")
if self.algoArgs.loss != "tanh":
resultsFileName = self.resultsDir + "ResultsMaxLocalAUC_loss=" + self.algoArgs.loss + ".npz"
else:
resultsFileName = self.resultsDir + "ResultsMaxLocalAUC_loss=" + self.algoArgs.loss + "_rho=" + str(self.algoArgs.rhoMlauc) + ".npz"
fileLock = FileLock(resultsFileName)
if not (fileLock.isLocked() or fileLock.fileExists()) or self.algoArgs.overwrite:
fileLock.lock()
try:
learner = MaxLocalAUC(self.algoArgs.k, 1-self.algoArgs.u, lmbdaU=self.algoArgs.lmbdaUMlauc, lmbdaV=self.algoArgs.lmbdaVMlauc, eps=self.algoArgs.epsMlauc, stochastic=not self.algoArgs.fullGradient)
learner.alpha = self.algoArgs.alpha
learner.alphas = self.algoArgs.alphas
learner.eta = self.algoArgs.eta
learner.folds = self.algoArgs.folds
learner.initialAlg = self.algoArgs.initialAlg
learner.itemExpP = self.algoArgs.itemExpP
learner.itemExpQ = self.algoArgs.itemExpQ
learner.ks = self.algoArgs.ks
learner.lmbdas = self.algoArgs.lmbdasMlauc
learner.loss = self.algoArgs.loss
learner.maxIterations = self.algoArgs.maxIterations
learner.maxNorms = self.algoArgs.maxNorms
learner.maxNormU = self.algoArgs.maxNorm
learner.maxNormV = self.algoArgs.maxNorm
learner.metric = self.algoArgs.metric
learner.normalise = self.algoArgs.normalise
learner.numAucSamples = self.algoArgs.numAucSamples
learner.numProcesses = self.algoArgs.processes
learner.numRowSamples = self.algoArgs.numRowSamples
learner.rate = self.algoArgs.rate
learner.recommendSize = self.algoArgs.recommendSize
learner.recordStep = self.algoArgs.recordStep
learner.rho = self.algoArgs.rhoMlauc
learner.rhos = self.algoArgs.rhosMlauc
learner.startAverage = self.algoArgs.startAverage
learner.t0 = self.algoArgs.t0
learner.t0s = self.algoArgs.t0s
learner.validationSize = self.algoArgs.validationSize
learner.validationUsers = self.algoArgs.validationUsers
#.........这里部分代码省略.........