本文整理汇总了Python中sandbox.util.Sampling.Sampling.sampleUsers2方法的典型用法代码示例。如果您正苦于以下问题:Python Sampling.sampleUsers2方法的具体用法?Python Sampling.sampleUsers2怎么用?Python Sampling.sampleUsers2使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sandbox.util.Sampling.Sampling
的用法示例。
在下文中一共展示了Sampling.sampleUsers2方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testSampleUsers2
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import sampleUsers2 [as 别名]
def testSampleUsers2(self):
m = 10
n = 15
r = 5
u = 0.3
w = 1-u
X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)
k = X.nnz+100
X2, userInds = Sampling.sampleUsers2(X, k)
nptst.assert_array_equal(X.toarray(), X2.toarray())
#Test pruning of cols
k = 500
m = 100
n = 500
u = 0.1
w = 1 - u
X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)
numpy.random.seed(21)
X2, userInds = Sampling.sampleUsers2(X, k, prune=True)
nnz1 = X2.nnz
self.assertTrue((X2.sum(0)!=0).all())
numpy.random.seed(21)
X2, userInds = Sampling.sampleUsers2(X, k, prune=False)
nnz2 = X2.nnz
self.assertEquals(nnz1, nnz2)
numRuns = 50
for i in range(numRuns):
m = numpy.random.randint(10, 100)
n = numpy.random.randint(10, 100)
k = 500
X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)
X2, userInds = Sampling.sampleUsers2(X, k)
self.assertTrue((X.dot(X.T)!=numpy.zeros((m, m)).all()))
self.assertTrue((X2.toarray() == X.toarray()[userInds, :]).all())
self.assertEquals(X.toarray()[userInds, :].nonzero()[0].shape[0], X2.nnz)
示例2: getDataset
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import sampleUsers2 [as 别名]
def getDataset(dataset, nnz=20000):
"""
Return a dataset by name
"""
if dataset == "synthetic":
X, U, V = DatasetUtils.syntheticDataset1()
elif dataset == "synthetic2":
X = DatasetUtils.syntheticDataset2()
elif dataset == "movielens":
X = DatasetUtils.movieLens()
elif dataset == "epinions":
X = DatasetUtils.epinions()
X, userInds = Sampling.sampleUsers2(X, nnz, prune=True)
elif dataset == "flixster":
X = DatasetUtils.flixster()
X, userInds = Sampling.sampleUsers2(X, nnz, prune=True)
else:
raise ValueError("Unknown dataset: " + dataset)
return X
示例3: FileLock
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import sampleUsers2 [as 别名]
similaritiesFileName = resultsDir + "Recommendations_" + learnerName + "_" + dataset + ".csv"
fileLock = FileLock(outputFilename)
if not (fileLock.isLocked() or fileLock.fileExists()) or overwrite:
fileLock.lock()
logging.debug(learner)
try:
#Do some recommendation
if type(learner) == IterativeSoftImpute:
trainX = X.toScipyCsc()
trainIterator = iter([trainX])
if modelSelect:
modelSelectX, userInds = Sampling.sampleUsers2(X, modelSelectSamples)
modelSelectX = modelSelectX.toScipyCsc()
cvInds = Sampling.randCrossValidation(folds, modelSelectX.nnz)
meanMetrics, stdMetrics = learner.modelSelect2(modelSelectX, rhosSi, ks, cvInds)
ZList = learner.learnModel(trainIterator)
U, s, V = ZList.next()
U = U*s
elif type(learner) == WeightedMf:
trainX = X.toScipyCsr()
if modelSelect:
modelSelectX, userInds = Sampling.sampleUsers2(X, modelSelectSamples)
modelSelectX = modelSelectX.toScipyCsc()
meanMetrics, stdMetrics = learner.modelSelect(modelSelectX)
示例4:
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import sampleUsers2 [as 别名]
fprTrain, tprTrain = MCEvaluator.averageRocCurve(trainX, U, V)
fprTest, tprTest = MCEvaluator.averageRocCurve(testX, U, V)
return fprTrain, tprTrain, fprTest, tprTest
if saveResults:
paramList = []
chunkSize = 1
U, V = maxLocalAuc.initUV(X)
for loss in losses:
for nnz in nnzs:
for trainX, testX in trainTestXs:
numpy.random.seed(21)
modelSelectX, userInds = Sampling.sampleUsers2(trainX, nnz*trainX.nnz)
maxLocalAuc.loss = loss
paramList.append((modelSelectX, trainX, testX, maxLocalAuc.copy(), U.copy(), V.copy()))
pool = multiprocessing.Pool(maxtasksperchild=100, processes=multiprocessing.cpu_count())
resultsIterator = pool.imap(computeTestAuc, paramList, chunkSize)
#import itertools
#resultsIterator = itertools.imap(computeTestAuc, paramList)
meanFprTrains = []
meanTprTrains = []
meanFprTests = []
meanTprTests = []
for loss in losses:
示例5: print
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import sampleUsers2 [as 别名]
maxLocalAuc.numAucSamples = 10
maxLocalAuc.numProcesses = multiprocessing.cpu_count()
maxLocalAuc.numRecordAucSamples = 100
maxLocalAuc.numRowSamples = 15
maxLocalAuc.rate = "constant"
maxLocalAuc.recordStep = 10
maxLocalAuc.rho = 1.0
maxLocalAuc.t0 = 1.0
maxLocalAuc.t0s = 2.0**-numpy.arange(7, 12, 1)
maxLocalAuc.validationSize = 3
maxLocalAuc.validationUsers = 0
if saveResults:
X = DatasetUtils.getDataset(dataset, nnz=1000000)
X2, userInds = Sampling.sampleUsers2(X, 500000, prune=True)
X3, userInds = Sampling.sampleUsers2(X, 200000, prune=True)
X4, userInds = Sampling.sampleUsers2(X, 100000, prune=True)
X5, userInds = Sampling.sampleUsers2(X, 500000, prune=False)
X6, userInds = Sampling.sampleUsers2(X, 200000, prune=False)
X7, userInds = Sampling.sampleUsers2(X, 100000, prune=False)
print(X.shape, X.nnz)
print(X2.shape, X2.nnz)
print(X3.shape, X3.nnz)
print(X4.shape, X4.nnz)
print(X5.shape, X5.nnz)
print(X6.shape, X6.nnz)
示例6: Keyword
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import sampleUsers2 [as 别名]
dataParser.add_argument("--dataset", type=str, help="The dataset to use: either Doc or Keyword (default: %(default)s)", default=dataParser.dataset)
devNull, remainingArgs = dataParser.parse_known_args(namespace=dataArgs)
if dataArgs.help:
helpParser = argparse.ArgumentParser(description="", add_help=False, parents=[dataParser, RankingExpHelper.newAlgoParser(defaultAlgoArgs)])
helpParser.print_help()
exit()
# print args #
logging.info("Data params:")
keys = list(vars(dataArgs).keys())
keys.sort()
for key in keys:
logging.info(" " + str(key) + ": " + str(dataArgs.__getattribute__(key)))
logging.info("Creating the exp-runner")
#Load/create the dataset - sample at most a million nnzs
X = DatasetUtils.mendeley(dataset=dataArgs.dataset)
numpy.random.seed(21)
X, userInds = Sampling.sampleUsers2(X, 10**6, prune=True)
m, n = X.shape
dataArgs.extendedDirName = ""
dataArgs.extendedDirName += "MendeleyCoauthors" + dataParser.dataset
rankingExpHelper = RankingExpHelper(remainingArgs, defaultAlgoArgs, dataArgs.extendedDirName)
rankingExpHelper.printAlgoArgs()
rankingExpHelper.runExperiment(X)
示例7: runExperiment
# 需要导入模块: from sandbox.util.Sampling import Sampling [as 别名]
# 或者: from sandbox.util.Sampling.Sampling import sampleUsers2 [as 别名]
def runExperiment(self, X):
"""
Run the selected ranking experiments and save results
"""
logging.debug("Splitting into train and test sets")
#Make sure different runs get the same train/test split
numpy.random.seed(21)
m, n = X.shape
#colProbs = (X.sum(0)+1)/float(m+1)
#colProbs = colProbs**-self.algoArgs.itemExp
#colProbs = numpy.ones(n)/float(n)
trainTestXs = Sampling.shuffleSplitRows(X, 1, self.algoArgs.testSize)
trainX, testX = trainTestXs[0]
logging.debug("Train X shape and nnz: " + str(trainX.shape) + " " + str(trainX.nnz))
logging.debug("Test X shape and nnz: " + str(testX.shape) + " " + str(testX.nnz))
#Have scipy versions of each array
trainXScipy = trainX.toScipyCsc()
testXScipy = testX.toScipyCsc()
if self.algoArgs.runSoftImpute:
logging.debug("Running soft impute")
resultsFileName = self.resultsDir + "ResultsSoftImpute.npz"
fileLock = FileLock(resultsFileName)
if not (fileLock.isLocked() or fileLock.fileExists()) or self.algoArgs.overwrite:
fileLock.lock()
logging.debug("Performing model selection, taking sample size " + str(self.algoArgs.modelSelectSamples))
modelSelectX, userInds = Sampling.sampleUsers2(trainXScipy, self.algoArgs.modelSelectSamples, prune=True)
try:
learner = IterativeSoftImpute(self.algoArgs.rhoSi, eps=self.algoArgs.epsSi, k=self.algoArgs.k, svdAlg=self.algoArgs.svdAlg, postProcess=self.algoArgs.postProcess, p=self.algoArgs.pSi, q=self.algoArgs.qSi)
learner.folds = self.algoArgs.folds
learner.metric = self.algoArgs.metric
learner.numProcesses = self.algoArgs.processes
learner.recommendSize = self.algoArgs.recommendSize
learner.validationSize = self.algoArgs.validationSize
if self.algoArgs.modelSelect:
cvInds = Sampling.randCrossValidation(self.algoArgs.folds, modelSelectX.nnz)
meanErrors, stdErrors = learner.modelSelect2(modelSelectX, self.algoArgs.rhosSi, self.algoArgs.ks, cvInds)
modelSelectFileName = resultsFileName.replace("Results", "ModelSelect")
numpy.savez(modelSelectFileName, meanErrors, stdErrors)
logging.debug("Saved model selection grid as " + modelSelectFileName)
logging.debug(learner)
self.recordResults(X, trainXScipy, testXScipy, learner, resultsFileName)
finally:
fileLock.unlock()
else:
logging.debug("File is locked or already computed: " + resultsFileName)
if self.algoArgs.runMaxLocalAuc:
logging.debug("Running max local AUC")
if self.algoArgs.loss != "tanh":
resultsFileName = self.resultsDir + "ResultsMaxLocalAUC_loss=" + self.algoArgs.loss + ".npz"
else:
resultsFileName = self.resultsDir + "ResultsMaxLocalAUC_loss=" + self.algoArgs.loss + "_rho=" + str(self.algoArgs.rhoMlauc) + ".npz"
fileLock = FileLock(resultsFileName)
if not (fileLock.isLocked() or fileLock.fileExists()) or self.algoArgs.overwrite:
fileLock.lock()
try:
learner = MaxLocalAUC(self.algoArgs.k, 1-self.algoArgs.u, lmbdaU=self.algoArgs.lmbdaUMlauc, lmbdaV=self.algoArgs.lmbdaVMlauc, eps=self.algoArgs.epsMlauc, stochastic=not self.algoArgs.fullGradient)
learner.alpha = self.algoArgs.alpha
learner.alphas = self.algoArgs.alphas
learner.eta = self.algoArgs.eta
learner.folds = self.algoArgs.folds
learner.initialAlg = self.algoArgs.initialAlg
learner.itemExpP = self.algoArgs.itemExpP
learner.itemExpQ = self.algoArgs.itemExpQ
learner.ks = self.algoArgs.ks
learner.lmbdas = self.algoArgs.lmbdasMlauc
learner.loss = self.algoArgs.loss
learner.maxIterations = self.algoArgs.maxIterations
learner.maxNorms = self.algoArgs.maxNorms
learner.maxNormU = self.algoArgs.maxNorm
learner.maxNormV = self.algoArgs.maxNorm
learner.metric = self.algoArgs.metric
learner.normalise = self.algoArgs.normalise
learner.numAucSamples = self.algoArgs.numAucSamples
learner.numProcesses = self.algoArgs.processes
learner.numRowSamples = self.algoArgs.numRowSamples
learner.rate = self.algoArgs.rate
learner.recommendSize = self.algoArgs.recommendSize
learner.recordStep = self.algoArgs.recordStep
learner.rho = self.algoArgs.rhoMlauc
learner.rhos = self.algoArgs.rhosMlauc
learner.startAverage = self.algoArgs.startAverage
learner.t0 = self.algoArgs.t0
learner.t0s = self.algoArgs.t0s
learner.validationSize = self.algoArgs.validationSize
learner.validationUsers = self.algoArgs.validationUsers
#.........这里部分代码省略.........