Python PathDefaults.getOutputDir方法代码示例

本文整理汇总了Python中apgl.util.PathDefaults.PathDefaults.getOutputDir方法的典型用法代码示例。


示例1: saveRatingMatrix

# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getOutputDir [as 别名]
def saveRatingMatrix(): 
    Take the coauthor graph above and make vertices indexed from 0 then save 
    as matrix market format. 
    edgeFileName = PathDefaults.getOutputDir() + "erasm/edges2.txt"
    logging.debug("Reading edge list")
    edges = numpy.loadtxt(edgeFileName, delimiter=",", dtype=numpy.int)
    logging.debug("Total number of edges: " + str(edges.shape[0]))
    vertexIdDict = {} 
    vertexIdSet = set([])
    i = 0 
    for edge in edges:
        if edge[0] not in vertexIdSet: 
            vertexIdDict[edge[0]] = i
            i += 1 
        if edge[1] not in vertexIdSet: 
            vertexIdDict[edge[1]] = i 
            i += 1 

    n = len(vertexIdDict)    
    R = scipy.sparse.lil_matrix((n, n))
    logging.debug("Creating sparse matrix")
    for edge in edges:
        R[vertexIdDict[edge[0]], vertexIdDict[edge[1]]] += 1 
        R[vertexIdDict[edge[1]], vertexIdDict[edge[0]]] += 1 
    logging.debug("Created matrix " + str(R.shape) + " with " + str(R.getnnz()) + " non zeros")    

    R = R.tocsr()    
    minCoauthors = 20
    logging.debug("Removing vertices with <" + str(minCoauthors) + " coauthors")
    nonzeros = R.nonzero()    
    inds = numpy.arange(nonzeros[0].shape[0])[numpy.bincount(nonzeros[0]) >= minCoauthors]
    R = R[inds, :][:, inds]
    logging.debug("Matrix has shape " + str(R.shape) + " with " + str(R.getnnz()) + " non zeros")    
    matrixFileName = PathDefaults.getOutputDir() + "erasm/R"
    scipy.io.mmwrite(matrixFileName, R)
    logging.debug("Wrote matrix to file " + matrixFileName)

示例2: __init__

# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getOutputDir [as 别名]
    def __init__(self, YList, X, featuresName, ages, args):
        super(MetabolomicsExpRunner, self).__init__(args=args)
        self.X = X
        self.YList = YList #The list of concentrations 
        self.featuresName = featuresName
        self.args = args
        self.ages = ages 

        self.maxDepth = 10
        self.numTrees = 10
        self.sampleSize = 1.0
        self.sampleReplace = True
        self.folds = 5
        self.resultsDir = PathDefaults.getOutputDir() + "metabolomics/"

        self.leafRankGenerators = []
        self.leafRankGenerators.append((LinearSvmGS.generate(), "SVM"))
        self.leafRankGenerators.append((SvcGS.generate(), "RBF-SVM"))
        self.leafRankGenerators.append((DecisionTree.generate(), "CART"))

        self.pcaLeafRankGenerators = [(LinearSvmPca.generate(), "LinearSVM-PCA")]

        self.funcLeafRankGenerators = []
        self.funcLeafRankGenerators.append((LinearSvmFGs.generate, "SVMF"))
        self.funcLeafRankGenerators.append((SvcFGs.generate, "RBF-SVMF"))
        self.funcLeafRankGenerators.append((DecisionTreeF.generate, "CARTF"))

        #Store all the label vectors and their missing values
        YIgf1Inds, YICortisolInds, YTestoInds = MetabolomicsUtils.createIndicatorLabels(YList)
        self.hormoneInds = [YIgf1Inds, YICortisolInds, YTestoInds]
        self.hormoneNames = MetabolomicsUtils.getLabelNames()

示例3: __init__

# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getOutputDir [as 别名]
    def __init__(self, maxIter=None, iterStartTimeStamp=None):
        Return a training and test set for movielens based on the time each 
        rating was made. 
        self.timeStep = timedelta(30).total_seconds()

        # iterStartDate is the starting date of the iterator
        if iterStartTimeStamp != None:
            self.iterStartTimeStamp = iterStartTimeStamp
            self.iterStartTimeStamp = 789652009

        outputDir = PathDefaults.getOutputDir() + "recommend/erasm/"

        self.numRatings = 402872
        self.minContacts = 10

        if not os.path.exists(outputDir):

        self.ratingFileName = outputDir + "data.npz"
        self.userDictFileName = outputDir + "userIdDict.pkl"
        self.isTrainRatingsFileName = outputDir + "is_train.npz"

        self.maxIter = maxIter
        self.trainSplit = 4.0 / 5


        if self.maxIter != None:
            logging.debug("Maximum number of iterations: " + str(self.maxIter))

示例4: testWriteToFile3

# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getOutputDir [as 别名]
    def testWriteToFile3(self):
        We will test out writing out some random graphs to Pajek
        numVertices = 20
        numFeatures = 0 
        vList = VertexList(numVertices, numFeatures)
        graph = SparseGraph(vList)

        p = 0.1
        generator = ErdosRenyiGenerator(p)
        graph = generator.generate(graph)

        pw = PajekWriter()
        directory = PathDefaults.getOutputDir() + "test/"
        pw.writeToFile(directory + "erdosRenyi20", graph)

        #Now write a small world graph
        p = 0.2
        k = 3

        generator = SmallWorldGenerator(p, k)
        graph = generator.generate(graph)

        pw.writeToFile(directory + "smallWorld20", graph)

示例5: __init__

# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getOutputDir [as 别名]
    def __init__(self, maxIter=None, iterStartTimeStamp=None): 
        outputDir = PathDefaults.getOutputDir() + "recommend/erasm/"

        if not os.path.exists(outputDir): 
        #iterStartDate is the starting date of the iterator 
        if iterStartTimeStamp != None: 
            self.iterStartTimeStamp = iterStartTimeStamp
            self.iterStartTimeStamp = 1286229600
        self.timeStep = timedelta(30).total_seconds()             
        self.ratingFileName = outputDir + "data.npz"          
        self.userDictFileName = outputDir + "userIdDict.pkl"   
        self.groupDictFileName = outputDir + "groupIdDict.pkl" 
        self.isTrainRatingsFileName = outputDir + "is_train.npz"
        self.dataDir = PathDefaults.getDataDir() + "erasm/"
        self.dataFileName = self.dataDir + "groupMembers-29-11-12" 
        self.maxIter = maxIter 
        self.trainSplit = 4.0/5 

示例6: __init__

# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getOutputDir [as 别名]
    def __init__(self, trainXIteratorFunc, testXIteratorFunc, cmdLine=None, defaultAlgoArgs = None, dirName=""):
        """ priority for default args
         - best priority: command-line value
         - middle priority: set-by-function value
         - lower priority: class value
        # Parameters to choose which methods to run
        # Obtained merging default parameters from the class with those from the user
        self.algoArgs = RecommendExpHelper.newAlgoParams(defaultAlgoArgs)
        #Function to return iterators to the training and test matrices  
        self.trainXIteratorFunc = trainXIteratorFunc
        self.testXIteratorFunc = testXIteratorFunc
        #How often to print output 
        self.logStep = 10
        #The max number of observations to use for model selection
        self.sampleSize = 5*10**6

        # basic resultsDir
        self.resultsDir = PathDefaults.getOutputDir() + "recommend/" + dirName + "/"

        # update algoParams from command line

示例7: getOutputFileName

# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getOutputDir [as 别名]
    def getOutputFileName(graphType, p, k, infoProb):
        outputDirectory = PathDefaults.getOutputDir()

        if graphType == "SmallWorld":
            outputFileName = outputDirectory + "SvmEgoOutput_type=" + graphType + "_p=" + str(p) + "_k=" + str(k) + "_q=" + str(infoProb)
        elif graphType == "ErdosRenyi":
            outputFileName = outputDirectory + "SvmEgoOutput_type=" + graphType + "_p=" + str(p) + "_q=" + str(infoProb)
            raise ValueError("Invalid graph type: " + graphType)

        return outputFileName

示例8: __init__

# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getOutputDir [as 别名]
    def __init__(self, df, X, featuresName, ages, args):
        super(MetabolomicsRegExpRunner, self).__init__(args=args)
        self.df = df
        self.X = X
        self.featuresName = featuresName
        self.args = args
        self.ages = ages 

        self.labelNames = MetabolomicsUtils.getLabelNames()
        self.YList = MetabolomicsUtils.createLabelList(df, self.labelNames)
        self.boundsList = MetabolomicsUtils.getBounds()

        self.resultsDir = PathDefaults.getOutputDir() + "metabolomics/"

示例9: loadParams

# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getOutputDir [as 别名]
def loadParams(ind): 
    if processReal: 
        resultsDir = PathDefaults.getOutputDir() + "viroscopy/real/theta" + str(ind) + "/"
        outputDir = resultsDir + "stats/"
        N, matchAlpha, breakScale, numEpsilons, epsilon, minEpsilon, matchAlg, abcMaxRuns, batchSize, pertScale = HIVModelUtils.realABCParams(True)
        startDate, endDate, recordStep, M, targetGraph, numInds = HIVModelUtils.realSimulationParams(test=True, ind=ind)
        realTheta, sigmaTheta, pertTheta = HIVModelUtils.estimatedRealTheta(ind)
        prefix = "Real"
        resultsDir = PathDefaults.getOutputDir() + "viroscopy/toy/theta/"
        outputDir = resultsDir + "stats/"        
        N, matchAlpha, breakScale, numEpsilons, epsilon, minEpsilon, matchAlg, abcMaxRuns, batchSize, pertScale = HIVModelUtils.toyABCParams()
        startDate, endDate, recordStep, M, targetGraph = HIVModelUtils.toySimulationParams(test=True)
        realTheta, sigmaTheta, pertTheta = HIVModelUtils.toyTheta()
        prefix = "Toy"
        numInds = 1

    breakSize = (targetGraph.subgraph(targetGraph.removedIndsAt(endDate)).size - targetGraph.subgraph(targetGraph.removedIndsAt(startDate)).size)  * breakScale       
    return N, resultsDir, outputDir, recordStep, startDate, endDate, prefix, targetGraph, breakSize, numEpsilons, M, matchAlpha, matchAlg, numInds

示例10: __init__

# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getOutputDir [as 别名]
    def __init__(self, iteratorFunc, cmdLine=None, defaultAlgoArgs = None, dirName=""):
        # Parameters to choose which methods to run
        # Obtained merging default parameters from the class with those from the user
        self.algoArgs = ClusterExpHelper.newAlgoParams(defaultAlgoArgs)
        # Variables related to the dataset
        self.getIteratorFunc = iteratorFunc
        #How often to print output 
        self.logStep = 10

        # basic resultsDir
        self.resultsDir = PathDefaults.getOutputDir() + "cluster/" + dirName + "/"

        # update algoParams from command line

示例11: testWriteToFile

# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getOutputDir [as 别名]
    def testWriteToFile(self):
        graph = DictGraph()

        numVertices = 5
        numFeatures = 3

        V = numpy.random.rand(numVertices, numFeatures)

        for i in range(0, numVertices):
            graph.setVertex(i, V[i, :])

        fileName = PathDefaults.getOutputDir() + "test/vertices"
        verterWriter = CsvVertexWriter()
        verterWriter.writeToFile(fileName, graph)


示例12: __init__

# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getOutputDir [as 别名]
    def __init__(self, maxIter=None, iterStartTimeStamp=None): 
        Return a training and test set for netflix based on the time each 
        rating was made. There are 62 iterations. 
        self.timeStep = timedelta(30).total_seconds()  
        #startDate is used to convert dates into ints 
        #self.startDate = datetime(1998,1,1)
        #self.endDate = datetime(2005,12,31)
        #iterStartDate is the starting date of the iterator 
        if iterStartTimeStamp != None: 
            self.iterStartTimeStamp = iterStartTimeStamp
            self.iterStartTimeStamp = time.mktime(datetime(2001,1,1).timetuple()) 

        self.startMovieID = 1 
        self.endMovieID = 17770
        self.numMovies = 17770
        self.numRatings = 100480507
        self.numProbeMovies = 16938
        self.numProbeRatings = 1408395
        self.numCustomers = 480189
        outputDir = PathDefaults.getOutputDir() + "recommend/netflix/"

        if not os.path.exists(outputDir): 
        self.ratingFileName = outputDir + "data.npz"  
        self.custDictFileName = outputDir + "custIdDict.pkl"
        self.probeFileName = PathDefaults.getDataDir() + "netflix/probe.txt"    
        self.testRatingsFileName = outputDir + "test_data.npz"
        self.isTrainRatingsFileName = outputDir + "is_train.npz"
        self.maxIter = maxIter 
        self.trainSplit = 4.0/5 

        if self.maxIter != None: 
            logging.debug("Maximum number of iterations: " + str(self.maxIter))

示例13: recommend

# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getOutputDir [as 别名]
def recommend(learner): 
    Take a list of coauthors and read in the complete graph into a sparse 
    matrix X such that X_ij = k means author i has worked with j, k times. Then 
    do matrix factorisation on the resulting methods. 
    outputDir = PathDefaults.getOutputDir() + "erasm/" 
    matrixFileName = outputDir + "Toy"
    numExamples = 50 
    numFolds = 5    
    X = scipy.io.mmread(matrixFileName)
    X = scipy.sparse.csr_matrix(X)
    logging.debug("Loaded matrix " + str(X.shape) + " with " + str(X.getnnz()) + " non zeros")
    X = X.tocsr()
    X = X[0:numExamples ,:]
    X, maxS = preprocess(X)

    #Take out some ratings to form a training set
    rowInds, colInds = X.nonzero()
    randInds = numpy.random.permutation(rowInds.shape[0])
    indexList = Sampling.crossValidation(numFolds, rowInds.shape[0])
    paramList = [] 
    for j, (trnIdx, tstIdx) in enumerate(indexList): 
        trainInds = randInds[trnIdx]
        testInds = randInds[tstIdx]
        trainX = SparseUtils.selectMatrix(X, rowInds[trainInds], colInds[trainInds]).tocsr()
        testX = SparseUtils.selectMatrix(X, rowInds[testInds], colInds[testInds]).tocsr()
        paramList.append((trainX, testX, learner))
    pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
    results = pool.map(computeTestError, paramList)
    #results = map(computeTestError, paramList)
    testErrors = numpy.array(results)
    meanTestErrors = testErrors.mean()
    logging.debug("Test errors = " + str(meanTestErrors))
    errorFileName = outputDir + "results_" + learner.name()
    numpy.savez(errorFileName, meanTestErrors)   
    logging.debug("Saved results as " + errorFileName)

示例14: computeLearningRates

# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getOutputDir [as 别名]
def computeLearningRates(datasetNames, numProcesses, fileNameSuffix, learnerName, sampleSizes, foldsSet): 
    dataDir = PathDefaults.getDataDir() + "modelPenalisation/"
    outputDir = PathDefaults.getOutputDir() + "modelPenalisation/"

    learner, loadMethod, dataDir, outputDir, paramDict = getSetup(learnerName, dataDir, outputDir, numProcesses)
    for i in range(len(datasetNames)):
        logging.debug("Learning using dataset " + datasetNames[i][0])
        outfileName = outputDir + datasetNames[i][0] + fileNameSuffix

        fileLock = FileLock(outfileName + ".npz")
        if not fileLock.isLocked() and not fileLock.fileExists():
            numRealisations = datasetNames[i][1]  
            gridShape = [numRealisations, sampleSizes.shape[0]]
            gridShape = tuple(gridShape)            
            betaGrids = numpy.zeros(gridShape) 
            for k in range(sampleSizes.shape[0]):
                sampleSize = sampleSizes[k]
                logging.debug("Using sample size " + str(sampleSize))
                for j in range(numRealisations):
                        Util.printIteration(j, 1, numRealisations, "Realisation: ")
                        trainX, trainY, testX, testY = loadMethod(dataDir, datasetNames[i][0], j)
                        trainInds = numpy.random.permutation(trainX.shape[0])[0:sampleSize]
                        validX = trainX[trainInds,:]
                        validY = trainY[trainInds]
                        betaGrids[j, k, :] = learner.learningRate(validX, validY, foldsSet, paramDict)
            numpy.savez(outfileName, betaGrids)
            logging.debug("Saved results as file " + outfileName + ".npz")

示例15: testWriteToFile

# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getOutputDir [as 别名]
    def testWriteToFile(self):
        pw = PajekWriter()
        directory = PathDefaults.getOutputDir() + "test/"
        #Have to check the files
        fileName1 = directory + "denseTestUndirected"
        pw.writeToFile(fileName1, self.dGraph1)
        fileName2 = directory + "denseTestDirected"
        pw.writeToFile(fileName2, self.dGraph2)
        fileName3 = directory + "sparseTestUndirected"
        pw.writeToFile(fileName3, self.sGraph1)
        fileName4 = directory + "sparseTestDirected"
        pw.writeToFile(fileName4, self.sGraph2)

        fileName5 = directory + "dictTestUndirected"
        pw.writeToFile(fileName5, self.dctGraph1)

        fileName6 = directory + "dictTestDirected"
        pw.writeToFile(fileName6, self.dctGraph2)
