本文整理汇总了Python中sandbox.util.PathDefaults.PathDefaults.getDataDir方法的典型用法代码示例。如果您正苦于以下问题:Python PathDefaults.getDataDir方法的具体用法?Python PathDefaults.getDataDir怎么用?Python PathDefaults.getDataDir使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sandbox.util.PathDefaults.PathDefaults
的用法示例。
在下文中一共展示了PathDefaults.getDataDir方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def __init__(self, field):
numpy.random.seed(21)
dataDir = PathDefaults.getDataDir() + "dblp/"
self.xmlFileName = dataDir + "dblp.xml"
self.xmlCleanFilename = dataDir + "dblpClean.xml"
resultsDir = PathDefaults.getDataDir() + "reputation/" + field + "/"
self.expertsFileName = resultsDir + "experts.txt"
self.expertMatchesFilename = resultsDir + "experts_matches.csv"
self.trainExpertMatchesFilename = resultsDir + "experts_train_matches.csv"
self.testExpertMatchesFilename = resultsDir + "experts_test_matches.csv"
self.coauthorsFilename = resultsDir + "coauthors.csv"
self.publicationsFilename = resultsDir + "publications.csv"
self.stepSize = 100000
self.numLines = 33532888
self.publicationTypes = set(["article" , "inproceedings", "proceedings", "book", "incollection", "phdthesis", "mastersthesis", "www"])
self.p = 0.5
self.matchCutoff = 0.95
self.cleanXML()
self.matchExperts()
logging.warning("Now you must disambiguate the matched experts if not ready done")
示例2: processSimpleDataset
# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def processSimpleDataset(name, numRealisations, split, ext=".csv", delimiter=",", usecols=None, skiprows=1, converters=None):
numpy.random.seed(21)
dataDir = PathDefaults.getDataDir() + "modelPenalisation/regression/"
fileName = dataDir + name + ext
print("Loading data from file " + fileName)
outputDir = PathDefaults.getDataDir() + "modelPenalisation/regression/" + name + "/"
XY = numpy.loadtxt(fileName, delimiter=delimiter, skiprows=skiprows, usecols=usecols, converters=converters)
X = XY[:, :-1]
y = XY[:, -1]
idx = Sampling.shuffleSplit(numRealisations, X.shape[0], split)
preprocessSave(X, y, outputDir, idx)
示例3: testGenerateRandomGraph
# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def testGenerateRandomGraph(self):
egoFileName = PathDefaults.getDataDir() + "infoDiffusion/EgoData.csv"
alterFileName = PathDefaults.getDataDir() + "infoDiffusion/AlterData.csv"
numVertices = 1000
infoProb = 0.1
p = 0.1
neighbours = 10
generator = SmallWorldGenerator(p, neighbours)
graph = SparseGraph(VertexList(numVertices, 0))
graph = generator.generate(graph)
self.svmEgoSimulator.generateRandomGraph(egoFileName, alterFileName, infoProb, graph)
示例4: flixster
# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def flixster(minNnzRows=10, minNnzCols=2, quantile=90):
matrixFileName = PathDefaults.getDataDir() + "flixster/Ratings.timed.txt"
matrixFile = open(matrixFileName)
matrixFile.readline()
userIndexer = IdIndexer("i")
movieIndexer = IdIndexer("i")
ratings = array.array("f")
logging.debug("Loading ratings from " + matrixFileName)
for i, line in enumerate(matrixFile):
if i % 1000000 == 0:
logging.debug("Iteration: " + str(i))
vals = line.split()
userIndexer.append(vals[0])
movieIndexer.append(vals[1])
ratings.append(float(vals[2]))
rowInds = userIndexer.getArray()
colInds = movieIndexer.getArray()
ratings = numpy.array(ratings)
X = sppy.csarray((len(userIndexer.getIdDict()), len(movieIndexer.getIdDict())), storagetype="row", dtype=numpy.int)
X.put(numpy.array(ratings>3, numpy.int), numpy.array(rowInds, numpy.int32), numpy.array(colInds, numpy.int32), init=True)
X.prune()
X = SparseUtils.pruneMatrixRowAndCols(X, minNnzRows, minNnzCols)
logging.debug("Read file: " + matrixFileName)
logging.debug("Non zero elements: " + str(X.nnz) + " shape: " + str(X.shape))
#X = Sampling.sampleUsers(X, 1000)
return X
示例5: main
# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def main(argv=None):
if argv is None:
argv = sys.argv
try:
# read options
try:
opts, args = getopt.getopt(argv[1:], "hd:n:D", ["help", "dir=", "nb_user=", "debug"])
except getopt.error as msg:
raise RGUsage(msg)
# apply options
dir = PathDefaults.getDataDir() + "cluster/"
nb_user = None
log_level = logging.INFO
for o, a in opts:
if o in ("-h", "--help"):
print(__doc__)
return 0
elif o in ("-d", "--dir"):
dir = a
elif o in ("-n", "--nb_user"):
nb_user = int(a)
elif o in ("-D", "--debug"):
log_level = logging.DEBUG
logging.basicConfig(stream=sys.stdout, level=log_level, format='%(levelname)s (%(asctime)s):%(message)s')
# process: generate data files
BemolData.generate_data_file(dir, nb_user)
except RGUsage as err:
logging.error(err.msg)
logging.error("for help use --help")
return 2
示例6: epinions
# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def epinions(minNnzRows=10, minNnzCols=3, quantile=90):
matrixFileName = PathDefaults.getDataDir() + "epinions/rating.mat"
A = scipy.io.loadmat(matrixFileName)["rating"]
userIndexer = IdIndexer("i")
itemIndexer = IdIndexer("i")
for i in range(A.shape[0]):
userIndexer.append(A[i, 0])
itemIndexer.append(A[i, 1])
rowInds = userIndexer.getArray()
colInds = itemIndexer.getArray()
ratings = A[:, 3]
X = sppy.csarray((len(userIndexer.getIdDict()), len(itemIndexer.getIdDict())), storagetype="row", dtype=numpy.int)
X.put(numpy.array(ratings>3, numpy.int), numpy.array(rowInds, numpy.int32), numpy.array(colInds, numpy.int32), init=True)
X.prune()
X = SparseUtils.pruneMatrixRowAndCols(X, minNnzRows, minNnzCols)
logging.debug("Read file: " + matrixFileName)
logging.debug("Non zero elements: " + str(X.nnz) + " shape: " + str(X.shape))
return X
示例7: testEdgeFile
# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def testEdgeFile(self):
"""
Figure out the problem with the edge file
"""
dataDir = PathDefaults.getDataDir() + "cluster/"
edgesFilename = dataDir + "Cit-HepTh.txt"
edges = {}
file = open(edgesFilename, 'r')
file.readline()
file.readline()
file.readline()
file.readline()
vertices = {}
for line in file:
(vertex1, sep, vertex2) = line.partition("\t")
vertex1 = vertex1.strip()
vertex2 = vertex2.strip()
edges[(vertex1, vertex2)] = 0
vertices[vertex1] = 0
vertices[vertex2] = 0
#It says there are 352807 edges in paper and 27770 vertices
self.assertEquals(len(edges), 352807)
self.assertEquals(len(vertices), 27770)
示例8: __init__
# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def __init__(self):
self.labelNames = ["Cortisol.val", "Testosterone.val", "IGF1.val"]
self.dataDir = PathDefaults.getDataDir() + "metabolomic/"
self.boundsDict = {}
self.boundsDict["Cortisol"] = numpy.array([0, 89, 225, 573])
self.boundsDict["Testosterone"] = numpy.array([0, 3, 9, 13])
self.boundsDict["IGF1"] = numpy.array([0, 200, 441, 782])
示例9: __init__
# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def __init__(self, maxIter=None, iterStartTimeStamp=None):
outputDir = PathDefaults.getOutputDir() + "recommend/erasm/"
if not os.path.exists(outputDir):
os.mkdir(outputDir)
#iterStartDate is the starting date of the iterator
if iterStartTimeStamp != None:
self.iterStartTimeStamp = iterStartTimeStamp
else:
self.iterStartTimeStamp = 1286229600
self.timeStep = timedelta(30).total_seconds()
self.ratingFileName = outputDir + "data.npz"
self.userDictFileName = outputDir + "userIdDict.pkl"
self.groupDictFileName = outputDir + "groupIdDict.pkl"
self.isTrainRatingsFileName = outputDir + "is_train.npz"
self.dataDir = PathDefaults.getDataDir() + "erasm/"
self.dataFileName = self.dataDir + "groupMembers-29-11-12"
self.maxIter = maxIter
self.trainSplit = 4.0/5
self.processRatings()
self.splitDataset()
self.loadProcessedData()
示例10: testToyData
# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def testToyData(self):
dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
data = numpy.load(dataDir + "toyData.npz")
gridPoints, X, y, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"]
pxSum = 0
pY1XSum = 0
pYminus1XSum = 0
px2Sum = 0
squareArea = (gridPoints[1]-gridPoints[0])**2
for i in range(gridPoints.shape[0]-1):
for j in range(gridPoints.shape[0]-1):
px = (pdfX[i,j]+pdfX[i+1,j]+pdfX[i, j+1]+pdfX[i+1, j+1])/4
pxSum += px*squareArea
pY1X = (pdfY1X[i,j]+pdfY1X[i+1,j]+pdfY1X[i, j+1]+pdfY1X[i+1, j+1])/4
pY1XSum += pY1X*squareArea
pYminus1X = (pdfYminus1X[i,j]+pdfYminus1X[i+1,j]+pdfYminus1X[i, j+1]+pdfYminus1X[i+1, j+1])/4
pYminus1XSum += pYminus1X*squareArea
px2Sum += px*pY1X*squareArea + px*pYminus1X*squareArea
self.assertAlmostEquals(pxSum, 1)
print(pY1XSum)
print(pYminus1XSum)
self.assertAlmostEquals(px2Sum, 1)
示例11: testComputeIdealPenalty
# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def testComputeIdealPenalty(self):
dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
data = numpy.load(dataDir + "toyData.npz")
gridPoints, X, y, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"]
sampleSize = 100
trainX, trainY = X[0:sampleSize, :], y[0:sampleSize]
testX, testY = X[sampleSize:, :], y[sampleSize:]
#We form a test set from the grid points
fullX = numpy.zeros((gridPoints.shape[0]**2, 2))
for m in range(gridPoints.shape[0]):
fullX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 0] = gridPoints
fullX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 1] = gridPoints[m]
C = 1.0
gamma = 1.0
args = (trainX, trainY, fullX, C, gamma, gridPoints, pdfX, pdfY1X, pdfYminus1X)
penalty = computeIdealPenalty(args)
#Now compute penalty using data
args = (trainX, trainY, testX, testY, C, gamma)
penalty2 = computeIdealPenalty2(args)
self.assertAlmostEquals(penalty2, penalty, 2)
示例12: testPredict2
# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def testPredict2(self):
# Test on Gauss2D dataset
dataDir = PathDefaults.getDataDir()
fileName = dataDir + "Gauss2D_learn.csv"
XY = numpy.loadtxt(fileName, skiprows=1, usecols=(1, 2, 3), delimiter=",")
X = XY[:, 0:2]
y = XY[:, 2]
fileName = dataDir + "Gauss2D_test.csv"
testXY = numpy.loadtxt(fileName, skiprows=1, usecols=(1, 2, 3), delimiter=",")
testX = testXY[:, 0:2]
testY = testXY[:, 2]
X = Standardiser().standardiseArray(X)
testX = Standardiser().standardiseArray(testX)
maxDepths = range(3, 10)
trainAucs = numpy.array(
[0.7194734, 0.7284824, 0.7332185, 0.7348198, 0.7366152, 0.7367508, 0.7367508, 0.7367508]
)
testAucs = numpy.array([0.6789078, 0.6844632, 0.6867918, 0.6873420, 0.6874820, 0.6874400, 0.6874400, 0.6874400])
i = 0
# The results are approximately the same, but not exactly
for maxDepth in maxDepths:
treeRank = TreeRank(self.leafRanklearner)
treeRank.setMaxDepth(maxDepth)
treeRank.learnModel(X, y)
trainScores = treeRank.predict(X)
testScores = treeRank.predict(testX)
self.assertAlmostEquals(Evaluator.auc(trainScores, y), trainAucs[i], 2)
self.assertAlmostEquals(Evaluator.auc(testScores, testY), testAucs[i], 1)
i += 1
示例13: profileClusterFromIterator
# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def profileClusterFromIterator(self):
iterator = IncreasingSubgraphListIterator(self.graph, self.subgraphIndicesList)
dataDir = PathDefaults.getDataDir() + "cluster/"
#iterator = getBemolGraphIterator(dataDir)
def run():
clusterList, timeList, boundList = self.clusterer.clusterFromIterator(iterator, verbose=True)
print(timeList.cumsum(0))
ProfileUtils.profile('run()', globals(), locals())
示例14: syntheticDataset2
# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def syntheticDataset2():
"""
Create a simple synthetic dataset using a power law distribution on users and items
"""
resultsDir = PathDefaults.getDataDir() + "syntheticRanking/"
matrixFileName = resultsDir + "dataset1.mtx"
X = sppy.io.mmread(matrixFileName, storagetype="row")
return X
示例15: getIterator
# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def getIterator():
dataDir = PathDefaults.getDataDir() + "cluster/"
nbUser = 10000 # set to 'None' to have all users
nbPurchasesPerIt = 500 # set to 'None' to take all the purchases per date
startingIteration = 300
endingIteration = 600 # set to 'None' to have all iterations
stepSize = 1
return itertools.islice(BemolData.getGraphIterator(dataDir, nbUser, nbPurchasesPerIt), startingIteration, endingIteration, stepSize)