当前位置: 首页>>代码示例>>Python>>正文


Python PathDefaults.getDataDir方法代码示例

本文整理汇总了Python中sandbox.util.PathDefaults.PathDefaults.getDataDir方法的典型用法代码示例。如果您正苦于以下问题:Python PathDefaults.getDataDir方法的具体用法?Python PathDefaults.getDataDir怎么用?Python PathDefaults.getDataDir使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sandbox.util.PathDefaults.PathDefaults的用法示例。


在下文中一共展示了PathDefaults.getDataDir方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
    def __init__(self, field):
        numpy.random.seed(21)        
        
        dataDir = PathDefaults.getDataDir() + "dblp/"
        self.xmlFileName = dataDir + "dblp.xml"
        self.xmlCleanFilename = dataDir + "dblpClean.xml"        

        resultsDir = PathDefaults.getDataDir() + "reputation/" + field + "/"
        self.expertsFileName = resultsDir + "experts.txt"
        self.expertMatchesFilename = resultsDir + "experts_matches.csv"
        self.trainExpertMatchesFilename = resultsDir + "experts_train_matches.csv"
        self.testExpertMatchesFilename = resultsDir + "experts_test_matches.csv"
        self.coauthorsFilename = resultsDir + "coauthors.csv"
        self.publicationsFilename = resultsDir + "publications.csv"
        
        self.stepSize = 100000
        self.numLines = 33532888
        self.publicationTypes = set(["article" , "inproceedings", "proceedings", "book", "incollection", "phdthesis", "mastersthesis", "www"])
        self.p = 0.5     
        self.matchCutoff = 0.95
        
        
        self.cleanXML()
        self.matchExperts()
        logging.warning("Now you must disambiguate the matched experts if not ready done")        
开发者ID:charanpald,项目名称:wallhack,代码行数:27,代码来源:DBLPDataset.py

示例2: processSimpleDataset

# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def processSimpleDataset(name, numRealisations, split, ext=".csv", delimiter=",", usecols=None, skiprows=1, converters=None):
    numpy.random.seed(21)
    dataDir = PathDefaults.getDataDir() + "modelPenalisation/regression/"
    fileName = dataDir + name + ext
    
    print("Loading data from file " + fileName)
    outputDir = PathDefaults.getDataDir() + "modelPenalisation/regression/" + name + "/"

    XY = numpy.loadtxt(fileName, delimiter=delimiter, skiprows=skiprows, usecols=usecols, converters=converters)
    X = XY[:, :-1]
    y = XY[:, -1]
    idx = Sampling.shuffleSplit(numRealisations, X.shape[0], split)
    preprocessSave(X, y, outputDir, idx)
开发者ID:charanpald,项目名称:wallhack,代码行数:15,代码来源:CreateRegressionBenchmarks.py

示例3: testGenerateRandomGraph

# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
    def testGenerateRandomGraph(self):
        egoFileName = PathDefaults.getDataDir() + "infoDiffusion/EgoData.csv"
        alterFileName = PathDefaults.getDataDir()  + "infoDiffusion/AlterData.csv"
        numVertices = 1000
        infoProb = 0.1

        
        p = 0.1
        neighbours = 10
        generator = SmallWorldGenerator(p, neighbours)
        graph = SparseGraph(VertexList(numVertices, 0))
        graph = generator.generate(graph)

        self.svmEgoSimulator.generateRandomGraph(egoFileName, alterFileName, infoProb, graph)
开发者ID:charanpald,项目名称:wallhack,代码行数:16,代码来源:SvmEgoSimulatorTest.py

示例4: flixster

# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
 def flixster(minNnzRows=10, minNnzCols=2, quantile=90): 
     matrixFileName = PathDefaults.getDataDir() + "flixster/Ratings.timed.txt" 
     matrixFile = open(matrixFileName)
     matrixFile.readline()
     userIndexer = IdIndexer("i")
     movieIndexer = IdIndexer("i")
     
     ratings = array.array("f")
     logging.debug("Loading ratings from " + matrixFileName)
     
     for i, line in enumerate(matrixFile):
         if i % 1000000 == 0: 
             logging.debug("Iteration: " + str(i))
         vals = line.split()
         
         userIndexer.append(vals[0])
         movieIndexer.append(vals[1])
         ratings.append(float(vals[2]))
     
     rowInds = userIndexer.getArray()
     colInds = movieIndexer.getArray()
     ratings = numpy.array(ratings)
     
     X = sppy.csarray((len(userIndexer.getIdDict()), len(movieIndexer.getIdDict())), storagetype="row", dtype=numpy.int)
     X.put(numpy.array(ratings>3, numpy.int), numpy.array(rowInds, numpy.int32), numpy.array(colInds, numpy.int32), init=True)
     X.prune()
     
     X = SparseUtils.pruneMatrixRowAndCols(X, minNnzRows, minNnzCols)
     
     logging.debug("Read file: " + matrixFileName)
     logging.debug("Non zero elements: " + str(X.nnz) + " shape: " + str(X.shape))
     
     #X = Sampling.sampleUsers(X, 1000)
     
     return X 
开发者ID:charanpald,项目名称:wallhack,代码行数:37,代码来源:DatasetUtils.py

示例5: main

# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
 def main(argv=None):
     if argv is None:
         argv = sys.argv
     try:
         # read options
         try:
             opts, args = getopt.getopt(argv[1:], "hd:n:D", ["help", "dir=", "nb_user=", "debug"])
         except getopt.error as msg:
              raise RGUsage(msg)
         # apply options
         dir = PathDefaults.getDataDir() + "cluster/"
         nb_user = None
         log_level = logging.INFO
         for o, a in opts:
             if o in ("-h", "--help"):
                 print(__doc__)
                 return 0
             elif o in ("-d", "--dir"):
                 dir = a
             elif o in ("-n", "--nb_user"):
                 nb_user = int(a)
             elif o in ("-D", "--debug"):
                 log_level = logging.DEBUG
         logging.basicConfig(stream=sys.stdout, level=log_level, format='%(levelname)s (%(asctime)s):%(message)s')
         # process: generate data files
         BemolData.generate_data_file(dir, nb_user)
     except RGUsage as err:
         logging.error(err.msg)
         logging.error("for help use --help")
         return 2
开发者ID:charanpald,项目名称:wallhack,代码行数:32,代码来源:BemolData.py

示例6: epinions

# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
    def epinions(minNnzRows=10, minNnzCols=3, quantile=90): 
        matrixFileName = PathDefaults.getDataDir() + "epinions/rating.mat" 
        A = scipy.io.loadmat(matrixFileName)["rating"]
        
        userIndexer = IdIndexer("i")
        itemIndexer = IdIndexer("i")        
        
        for i in range(A.shape[0]): 
            userIndexer.append(A[i, 0])
            itemIndexer.append(A[i, 1])


        rowInds = userIndexer.getArray()
        colInds = itemIndexer.getArray()
        ratings = A[:, 3]        
        
        X = sppy.csarray((len(userIndexer.getIdDict()), len(itemIndexer.getIdDict())), storagetype="row", dtype=numpy.int)
        X.put(numpy.array(ratings>3, numpy.int), numpy.array(rowInds, numpy.int32), numpy.array(colInds, numpy.int32), init=True)
        X.prune()
        
        X = SparseUtils.pruneMatrixRowAndCols(X, minNnzRows, minNnzCols)
        
        logging.debug("Read file: " + matrixFileName)
        logging.debug("Non zero elements: " + str(X.nnz) + " shape: " + str(X.shape))

        return X 
开发者ID:charanpald,项目名称:wallhack,代码行数:28,代码来源:DatasetUtils.py

示例7: testEdgeFile

# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
    def testEdgeFile(self):
        """
        Figure out the problem with the edge file 
        """
        dataDir = PathDefaults.getDataDir() + "cluster/"
        edgesFilename = dataDir + "Cit-HepTh.txt"

        edges = {}
        file = open(edgesFilename, 'r')
        file.readline()
        file.readline()
        file.readline()
        file.readline()

        vertices = {}

        for line in file:
            (vertex1, sep, vertex2) = line.partition("\t")
            vertex1 = vertex1.strip()
            vertex2 = vertex2.strip()
            edges[(vertex1, vertex2)] = 0
            vertices[vertex1] = 0
            vertices[vertex2] = 0

        #It says there are 352807 edges in paper and 27770 vertices
        self.assertEquals(len(edges), 352807)
        self.assertEquals(len(vertices), 27770)
开发者ID:charanpald,项目名称:wallhack,代码行数:29,代码来源:CitationIterGeneratorTest.py

示例8: __init__

# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
 def __init__(self):
     self.labelNames = ["Cortisol.val", "Testosterone.val", "IGF1.val"]
     self.dataDir = PathDefaults.getDataDir() +  "metabolomic/"
     self.boundsDict = {}
     self.boundsDict["Cortisol"] = numpy.array([0, 89, 225, 573])
     self.boundsDict["Testosterone"] = numpy.array([0, 3, 9, 13])
     self.boundsDict["IGF1"] = numpy.array([0, 200, 441, 782])
开发者ID:charanpald,项目名称:wallhack,代码行数:9,代码来源:MetabolomicsUtils.py

示例9: __init__

# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
    def __init__(self, maxIter=None, iterStartTimeStamp=None): 
        outputDir = PathDefaults.getOutputDir() + "recommend/erasm/"

        if not os.path.exists(outputDir): 
            os.mkdir(outputDir)
            
        #iterStartDate is the starting date of the iterator 
        if iterStartTimeStamp != None: 
            self.iterStartTimeStamp = iterStartTimeStamp
        else: 
            self.iterStartTimeStamp = 1286229600
            
        self.timeStep = timedelta(30).total_seconds()             
                
        self.ratingFileName = outputDir + "data.npz"          
        self.userDictFileName = outputDir + "userIdDict.pkl"   
        self.groupDictFileName = outputDir + "groupIdDict.pkl" 
        self.isTrainRatingsFileName = outputDir + "is_train.npz"
    
        self.dataDir = PathDefaults.getDataDir() + "erasm/"
        self.dataFileName = self.dataDir + "groupMembers-29-11-12" 
        
        self.maxIter = maxIter 
        self.trainSplit = 4.0/5 
        
        self.processRatings()
        self.splitDataset()        
        self.loadProcessedData()
开发者ID:charanpald,项目名称:wallhack,代码行数:30,代码来源:MendeleyGroupsDataset.py

示例10: testToyData

# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
    def testToyData(self):
        dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
        data = numpy.load(dataDir + "toyData.npz")
        gridPoints, X, y, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"]


        pxSum = 0
        pY1XSum = 0
        pYminus1XSum = 0

        px2Sum = 0 
        squareArea = (gridPoints[1]-gridPoints[0])**2

        for i in range(gridPoints.shape[0]-1):
            for j in range(gridPoints.shape[0]-1):
                px = (pdfX[i,j]+pdfX[i+1,j]+pdfX[i, j+1]+pdfX[i+1, j+1])/4
                pxSum += px*squareArea

                pY1X = (pdfY1X[i,j]+pdfY1X[i+1,j]+pdfY1X[i, j+1]+pdfY1X[i+1, j+1])/4
                pY1XSum += pY1X*squareArea

                pYminus1X = (pdfYminus1X[i,j]+pdfYminus1X[i+1,j]+pdfYminus1X[i, j+1]+pdfYminus1X[i+1, j+1])/4
                pYminus1XSum += pYminus1X*squareArea

                px2Sum += px*pY1X*squareArea + px*pYminus1X*squareArea

        self.assertAlmostEquals(pxSum, 1)
        print(pY1XSum)
        print(pYminus1XSum)

        self.assertAlmostEquals(px2Sum, 1)
开发者ID:charanpald,项目名称:wallhack,代码行数:33,代码来源:GenerateToyDataTest.py

示例11: testComputeIdealPenalty

# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
    def testComputeIdealPenalty(self):
        dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
        data = numpy.load(dataDir + "toyData.npz")
        gridPoints, X, y, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"]

        sampleSize = 100
        trainX, trainY = X[0:sampleSize, :], y[0:sampleSize]
        testX, testY = X[sampleSize:, :], y[sampleSize:]

        #We form a test set from the grid points
        fullX = numpy.zeros((gridPoints.shape[0]**2, 2))
        for m in range(gridPoints.shape[0]):
            fullX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 0] = gridPoints
            fullX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 1] = gridPoints[m]

        C = 1.0
        gamma = 1.0
        args = (trainX, trainY, fullX, C, gamma, gridPoints, pdfX, pdfY1X, pdfYminus1X)
        penalty = computeIdealPenalty(args)


        #Now compute penalty using data
        args = (trainX, trainY, testX, testY, C, gamma)
        penalty2 = computeIdealPenalty2(args)

        self.assertAlmostEquals(penalty2, penalty, 2)
开发者ID:charanpald,项目名称:wallhack,代码行数:28,代码来源:ModelSelectUtilsTest.py

示例12: testPredict2

# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
    def testPredict2(self):
        # Test on Gauss2D dataset
        dataDir = PathDefaults.getDataDir()

        fileName = dataDir + "Gauss2D_learn.csv"
        XY = numpy.loadtxt(fileName, skiprows=1, usecols=(1, 2, 3), delimiter=",")
        X = XY[:, 0:2]
        y = XY[:, 2]

        fileName = dataDir + "Gauss2D_test.csv"
        testXY = numpy.loadtxt(fileName, skiprows=1, usecols=(1, 2, 3), delimiter=",")
        testX = testXY[:, 0:2]
        testY = testXY[:, 2]

        X = Standardiser().standardiseArray(X)
        testX = Standardiser().standardiseArray(testX)

        maxDepths = range(3, 10)
        trainAucs = numpy.array(
            [0.7194734, 0.7284824, 0.7332185, 0.7348198, 0.7366152, 0.7367508, 0.7367508, 0.7367508]
        )
        testAucs = numpy.array([0.6789078, 0.6844632, 0.6867918, 0.6873420, 0.6874820, 0.6874400, 0.6874400, 0.6874400])
        i = 0

        # The results are approximately the same, but not exactly
        for maxDepth in maxDepths:
            treeRank = TreeRank(self.leafRanklearner)
            treeRank.setMaxDepth(maxDepth)
            treeRank.learnModel(X, y)
            trainScores = treeRank.predict(X)
            testScores = treeRank.predict(testX)

            self.assertAlmostEquals(Evaluator.auc(trainScores, y), trainAucs[i], 2)
            self.assertAlmostEquals(Evaluator.auc(testScores, testY), testAucs[i], 1)
            i += 1
开发者ID:kentwang,项目名称:sandbox,代码行数:37,代码来源:TreeRankTest.py

示例13: profileClusterFromIterator

# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
 def profileClusterFromIterator(self):
     iterator = IncreasingSubgraphListIterator(self.graph, self.subgraphIndicesList)
     dataDir = PathDefaults.getDataDir() + "cluster/"
     #iterator = getBemolGraphIterator(dataDir)
     
     def run(): 
         clusterList, timeList, boundList = self.clusterer.clusterFromIterator(iterator, verbose=True)
         print(timeList.cumsum(0))
         
     ProfileUtils.profile('run()', globals(), locals())
开发者ID:charanpald,项目名称:sandbox,代码行数:12,代码来源:IterativeSpectralClusteringProfile.py

示例14: syntheticDataset2

# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
 def syntheticDataset2(): 
     """
     Create a simple synthetic dataset using a power law distribution on users and items 
     """
     resultsDir = PathDefaults.getDataDir() + "syntheticRanking/"
     matrixFileName = resultsDir + "dataset1.mtx" 
     
     X = sppy.io.mmread(matrixFileName, storagetype="row")
     
     return X   
开发者ID:charanpald,项目名称:wallhack,代码行数:12,代码来源:DatasetUtils.py

示例15: getIterator

# 需要导入模块: from sandbox.util.PathDefaults import PathDefaults [as 别名]
# 或者: from sandbox.util.PathDefaults.PathDefaults import getDataDir [as 别名]
 def getIterator(): 
     dataDir = PathDefaults.getDataDir() + "cluster/"
     
     nbUser = 10000 # set to 'None' to have all users
     nbPurchasesPerIt = 500 # set to 'None' to take all the purchases per date
     startingIteration = 300
     endingIteration = 600 # set to 'None' to have all iterations
     stepSize = 1    
     
     return itertools.islice(BemolData.getGraphIterator(dataDir, nbUser, nbPurchasesPerIt), startingIteration, endingIteration, stepSize)
开发者ID:charanpald,项目名称:wallhack,代码行数:12,代码来源:DatasetStats.py


注:本文中的sandbox.util.PathDefaults.PathDefaults.getDataDir方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。