本文整理汇总了Python中sandbox.util.SparseUtils.SparseUtils.pruneMatrixRowAndCols方法的典型用法代码示例。如果您正苦于以下问题:Python SparseUtils.pruneMatrixRowAndCols方法的具体用法?Python SparseUtils.pruneMatrixRowAndCols怎么用?Python SparseUtils.pruneMatrixRowAndCols使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sandbox.util.SparseUtils.SparseUtils
的用法示例。
在下文中一共展示了SparseUtils.pruneMatrixRowAndCols方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: flixster
# 需要导入模块: from sandbox.util.SparseUtils import SparseUtils [as 别名]
# 或者: from sandbox.util.SparseUtils.SparseUtils import pruneMatrixRowAndCols [as 别名]
def flixster(minNnzRows=10, minNnzCols=2, quantile=90):
matrixFileName = PathDefaults.getDataDir() + "flixster/Ratings.timed.txt"
matrixFile = open(matrixFileName)
matrixFile.readline()
userIndexer = IdIndexer("i")
movieIndexer = IdIndexer("i")
ratings = array.array("f")
logging.debug("Loading ratings from " + matrixFileName)
for i, line in enumerate(matrixFile):
if i % 1000000 == 0:
logging.debug("Iteration: " + str(i))
vals = line.split()
userIndexer.append(vals[0])
movieIndexer.append(vals[1])
ratings.append(float(vals[2]))
rowInds = userIndexer.getArray()
colInds = movieIndexer.getArray()
ratings = numpy.array(ratings)
X = sppy.csarray((len(userIndexer.getIdDict()), len(movieIndexer.getIdDict())), storagetype="row", dtype=numpy.int)
X.put(numpy.array(ratings>3, numpy.int), numpy.array(rowInds, numpy.int32), numpy.array(colInds, numpy.int32), init=True)
X.prune()
X = SparseUtils.pruneMatrixRowAndCols(X, minNnzRows, minNnzCols)
logging.debug("Read file: " + matrixFileName)
logging.debug("Non zero elements: " + str(X.nnz) + " shape: " + str(X.shape))
#X = Sampling.sampleUsers(X, 1000)
return X
示例2: epinions
# 需要导入模块: from sandbox.util.SparseUtils import SparseUtils [as 别名]
# 或者: from sandbox.util.SparseUtils.SparseUtils import pruneMatrixRowAndCols [as 别名]
def epinions(minNnzRows=10, minNnzCols=3, quantile=90):
matrixFileName = PathDefaults.getDataDir() + "epinions/rating.mat"
A = scipy.io.loadmat(matrixFileName)["rating"]
userIndexer = IdIndexer("i")
itemIndexer = IdIndexer("i")
for i in range(A.shape[0]):
userIndexer.append(A[i, 0])
itemIndexer.append(A[i, 1])
rowInds = userIndexer.getArray()
colInds = itemIndexer.getArray()
ratings = A[:, 3]
X = sppy.csarray((len(userIndexer.getIdDict()), len(itemIndexer.getIdDict())), storagetype="row", dtype=numpy.int)
X.put(numpy.array(ratings>3, numpy.int), numpy.array(rowInds, numpy.int32), numpy.array(colInds, numpy.int32), init=True)
X.prune()
X = SparseUtils.pruneMatrixRowAndCols(X, minNnzRows, minNnzCols)
logging.debug("Read file: " + matrixFileName)
logging.debug("Non zero elements: " + str(X.nnz) + " shape: " + str(X.shape))
return X
示例3: mendeley2
# 需要导入模块: from sandbox.util.SparseUtils import SparseUtils [as 别名]
# 或者: from sandbox.util.SparseUtils.SparseUtils import pruneMatrixRowAndCols [as 别名]
def mendeley2(minNnzRows=10, minNnzCols=2, quantile=90, dataset="Document"):
authorAuthorFileName = PathDefaults.getDataDir() + "reference/author" + dataset + "Matrix.mtx"
logging.debug("Reading file: " + authorAuthorFileName)
X = sppy.io.mmread(authorAuthorFileName, storagetype="row")
logging.debug("Raw non-zero elements: " + str(X.nnz) + " shape: " + str(X.shape))
X = SparseUtils.pruneMatrixRowAndCols(X, minNnzRows, minNnzCols)
logging.debug("Read file: " + authorAuthorFileName)
logging.debug("Non-zero elements: " + str(X.nnz) + " shape: " + str(X.shape))
return X
示例4: movieLens
# 需要导入模块: from sandbox.util.SparseUtils import SparseUtils [as 别名]
# 或者: from sandbox.util.SparseUtils.SparseUtils import pruneMatrixRowAndCols [as 别名]
def movieLens(minNnzRows=10, minNnzCols=2, quantile=90):
matrixFileName = PathDefaults.getDataDir() + "movielens/ml-100k/u.data"
data = numpy.loadtxt(matrixFileName)
X = sppy.csarray((numpy.max(data[:, 0]), numpy.max(data[:, 1])), storagetype="row", dtype=numpy.int)
X.put(numpy.array(data[:, 2]>3, numpy.int), numpy.array(data[:, 0]-1, numpy.int32), numpy.array(data[:, 1]-1, numpy.int32), init=True)
#X = SparseUtilsCython.centerRowsCsarray(X)
#X[X.nonzero()] = X.values()>0
X.prune()
#maxNnz = numpy.percentile(X.sum(0), quantile)
#X = SparseUtils.pruneMatrixCols(X, minNnz=minNnzCols, maxNnz=maxNnz)
X = SparseUtils.pruneMatrixRowAndCols(X, minNnzRows, minNnzCols)
logging.debug("Read file: " + matrixFileName)
logging.debug("Non zero elements: " + str(X.nnz) + " shape: " + str(X.shape))
return X
示例5: mendeley
# 需要导入模块: from sandbox.util.SparseUtils import SparseUtils [as 别名]
# 或者: from sandbox.util.SparseUtils.SparseUtils import pruneMatrixRowAndCols [as 别名]
def mendeley(minNnzRows=10, minNnzCols=2, quantile=90, dataset="Doc", sigma=0.05, indicator=True):
authorAuthorFileName = PathDefaults.getDataDir() + "reference/authorAuthor"+ dataset + "Matrix_sigma=" + str(sigma) + ".mtx"
logging.debug("Reading file: " + authorAuthorFileName)
X = sppy.io.mmread(authorAuthorFileName, storagetype="row")
if indicator:
X[X.nonzero()] = 1
X.prune()
logging.debug("Raw non-zero elements: " + str(X.nnz) + " shape: " + str(X.shape))
X = SparseUtils.pruneMatrixRowAndCols(X, minNnzRows, minNnzCols)
logging.debug("Read file: " + authorAuthorFileName)
logging.debug("Non-zero elements: " + str(X.nnz) + " shape: " + str(X.shape))
return X
示例6: bookCrossing
# 需要导入模块: from sandbox.util.SparseUtils import SparseUtils [as 别名]
# 或者: from sandbox.util.SparseUtils.SparseUtils import pruneMatrixRowAndCols [as 别名]
def bookCrossing(minNnzRows=10, minNnzCols=3, quantile=90):
matrixFileName = PathDefaults.getDataDir() + "book-crossing/BX-Book-Ratings.csv"
matrixFile = open(matrixFileName)
matrixFile.readline()
userIndexer = IdIndexer("i")
itemIndexer = IdIndexer("i")
ratings = array.array("f")
logging.debug("Loading ratings from " + matrixFileName)
for i, line in enumerate(matrixFile):
if i % 1000000 == 0:
logging.debug("Iteration: " + str(i))
vals = line.split(";")
field1 = vals[0].strip("\"")
field2 = vals[1].strip("\"")
field3 = int(vals[2].strip("\"\n\r"))
userIndexer.append(field1)
itemIndexer.append(field2)
ratings.append(field3)
rowInds = userIndexer.getArray()
colInds = itemIndexer.getArray()
ratings = numpy.array(ratings)
X = sppy.csarray((len(userIndexer.getIdDict()), len(itemIndexer.getIdDict())), storagetype="row", dtype=numpy.int)
X.put(numpy.array(numpy.logical_or(ratings>4, ratings==0), numpy.int), numpy.array(rowInds, numpy.int32), numpy.array(colInds, numpy.int32), init=True)
X.prune()
X = SparseUtils.pruneMatrixRowAndCols(X, minNnzRows, minNnzCols)
logging.debug("Read file: " + matrixFileName)
logging.debug("Non zero elements: " + str(X.nnz) + " shape: " + str(X.shape))
return X