本文整理汇总了Python中apgl.util.PathDefaults.PathDefaults.getDataDir方法的典型用法代码示例。如果您正苦于以下问题:Python PathDefaults.getDataDir方法的具体用法?Python PathDefaults.getDataDir怎么用?Python PathDefaults.getDataDir使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类apgl.util.PathDefaults.PathDefaults
的用法示例。
在下文中一共展示了PathDefaults.getDataDir方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def __init__(self, field):
numpy.random.seed(21)
dataDir = PathDefaults.getDataDir() + "dblp/"
self.xmlFileName = dataDir + "dblp.xml"
self.xmlCleanFilename = dataDir + "dblpClean.xml"
resultsDir = PathDefaults.getDataDir() + "reputation/" + field + "/"
self.expertsFileName = resultsDir + "experts.txt"
self.expertMatchesFilename = resultsDir + "experts_matches.csv"
self.trainExpertMatchesFilename = resultsDir + "experts_train_matches.csv"
self.testExpertMatchesFilename = resultsDir + "experts_test_matches.csv"
self.coauthorsFilename = resultsDir + "coauthors.csv"
self.publicationsFilename = resultsDir + "publications.csv"
self.stepSize = 100000
self.numLines = 33532888
self.publicationTypes = set(["article" , "inproceedings", "proceedings", "book", "incollection", "phdthesis", "mastersthesis", "www"])
self.p = 0.5
self.matchCutoff = 0.95
self.cleanXML()
self.matchExperts()
logging.warning("Now you must disambiguate the matched experts if not ready done")
示例2: processSimpleDataset
# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def processSimpleDataset(name, numRealisations, split, ext=".csv", delimiter=",", usecols=None, skiprows=1, converters=None):
numpy.random.seed(21)
dataDir = PathDefaults.getDataDir() + "modelPenalisation/regression/"
fileName = dataDir + name + ext
print("Loading data from file " + fileName)
outputDir = PathDefaults.getDataDir() + "modelPenalisation/regression/" + name + "/"
XY = numpy.loadtxt(fileName, delimiter=delimiter, skiprows=skiprows, usecols=usecols, converters=converters)
X = XY[:, :-1]
y = XY[:, -1]
idx = Sampling.shuffleSplit(numRealisations, X.shape[0], split)
preprocessSave(X, y, outputDir, idx)
示例3: testGenerateRandomGraph
# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def testGenerateRandomGraph(self):
egoFileName = PathDefaults.getDataDir() + "infoDiffusion/EgoData.csv"
alterFileName = PathDefaults.getDataDir() + "infoDiffusion/AlterData.csv"
numVertices = 1000
infoProb = 0.1
p = 0.1
neighbours = 10
generator = SmallWorldGenerator(p, neighbours)
graph = SparseGraph(VertexList(numVertices, 0))
graph = generator.generate(graph)
self.svmEgoSimulator.generateRandomGraph(egoFileName, alterFileName, infoProb, graph)
示例4: __init__
# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def __init__(self, maxIter=None, iterStartTimeStamp=None):
outputDir = PathDefaults.getOutputDir() + "recommend/erasm/"
if not os.path.exists(outputDir):
os.mkdir(outputDir)
#iterStartDate is the starting date of the iterator
if iterStartTimeStamp != None:
self.iterStartTimeStamp = iterStartTimeStamp
else:
self.iterStartTimeStamp = 1286229600
self.timeStep = timedelta(30).total_seconds()
self.ratingFileName = outputDir + "data.npz"
self.userDictFileName = outputDir + "userIdDict.pkl"
self.groupDictFileName = outputDir + "groupIdDict.pkl"
self.isTrainRatingsFileName = outputDir + "is_train.npz"
self.dataDir = PathDefaults.getDataDir() + "erasm/"
self.dataFileName = self.dataDir + "groupMembers-29-11-12"
self.maxIter = maxIter
self.trainSplit = 4.0/5
self.processRatings()
self.splitDataset()
self.loadProcessedData()
示例5: __init__
# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def __init__(self):
self.labelNames = ["Cortisol.val", "Testosterone.val", "IGF1.val"]
self.dataDir = PathDefaults.getDataDir() + "metabolomic/"
self.boundsDict = {}
self.boundsDict["Cortisol"] = numpy.array([0, 89, 225, 573])
self.boundsDict["Testosterone"] = numpy.array([0, 3, 9, 13])
self.boundsDict["IGF1"] = numpy.array([0, 200, 441, 782])
示例6: testComputeIdealPenalty
# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def testComputeIdealPenalty(self):
dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
data = numpy.load(dataDir + "toyData.npz")
gridPoints, X, y, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"]
sampleSize = 100
trainX, trainY = X[0:sampleSize, :], y[0:sampleSize]
testX, testY = X[sampleSize:, :], y[sampleSize:]
#We form a test set from the grid points
fullX = numpy.zeros((gridPoints.shape[0]**2, 2))
for m in range(gridPoints.shape[0]):
fullX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 0] = gridPoints
fullX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 1] = gridPoints[m]
C = 1.0
gamma = 1.0
args = (trainX, trainY, fullX, C, gamma, gridPoints, pdfX, pdfY1X, pdfYminus1X)
penalty = computeIdealPenalty(args)
#Now compute penalty using data
args = (trainX, trainY, testX, testY, C, gamma)
penalty2 = computeIdealPenalty2(args)
self.assertAlmostEquals(penalty2, penalty, 2)
示例7: testEdgeFile
# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def testEdgeFile(self):
"""
Figure out the problem with the edge file
"""
dataDir = PathDefaults.getDataDir() + "cluster/"
edgesFilename = dataDir + "Cit-HepTh.txt"
edges = {}
file = open(edgesFilename, 'r')
file.readline()
file.readline()
file.readline()
file.readline()
vertices = {}
for line in file:
(vertex1, sep, vertex2) = line.partition("\t")
vertex1 = vertex1.strip()
vertex2 = vertex2.strip()
edges[(vertex1, vertex2)] = 0
vertices[vertex1] = 0
vertices[vertex2] = 0
#It says there are 352807 edges in paper and 27770 vertices
self.assertEquals(len(edges), 352807)
self.assertEquals(len(vertices), 27770)
示例8: main
# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def main(argv=None):
if argv is None:
argv = sys.argv
try:
# read options
try:
opts, args = getopt.getopt(argv[1:], "hd:n:D", ["help", "dir=", "nb_user=", "debug"])
except getopt.error as msg:
raise RGUsage(msg)
# apply options
dir = PathDefaults.getDataDir() + "cluster/"
nb_user = None
log_level = logging.INFO
for o, a in opts:
if o in ("-h", "--help"):
print(__doc__)
return 0
elif o in ("-d", "--dir"):
dir = a
elif o in ("-n", "--nb_user"):
nb_user = int(a)
elif o in ("-D", "--debug"):
log_level = logging.DEBUG
logging.basicConfig(stream=sys.stdout, level=log_level, format='%(levelname)s (%(asctime)s):%(message)s')
# process: generate data files
BemolData.generate_data_file(dir, nb_user)
except RGUsage as err:
logging.error(err.msg)
logging.error("for help use --help")
return 2
示例9: testToyData
# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def testToyData(self):
dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
data = numpy.load(dataDir + "toyData.npz")
gridPoints, X, y, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"]
pxSum = 0
pY1XSum = 0
pYminus1XSum = 0
px2Sum = 0
squareArea = (gridPoints[1]-gridPoints[0])**2
for i in range(gridPoints.shape[0]-1):
for j in range(gridPoints.shape[0]-1):
px = (pdfX[i,j]+pdfX[i+1,j]+pdfX[i, j+1]+pdfX[i+1, j+1])/4
pxSum += px*squareArea
pY1X = (pdfY1X[i,j]+pdfY1X[i+1,j]+pdfY1X[i, j+1]+pdfY1X[i+1, j+1])/4
pY1XSum += pY1X*squareArea
pYminus1X = (pdfYminus1X[i,j]+pdfYminus1X[i+1,j]+pdfYminus1X[i, j+1]+pdfYminus1X[i+1, j+1])/4
pYminus1XSum += pYminus1X*squareArea
px2Sum += px*pY1X*squareArea + px*pYminus1X*squareArea
self.assertAlmostEquals(pxSum, 1)
print(pY1XSum)
print(pYminus1XSum)
self.assertAlmostEquals(px2Sum, 1)
示例10: testPredict2
# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def testPredict2(self):
#Test on Gauss2D dataset
dataDir = PathDefaults.getDataDir()
fileName = dataDir + "Gauss2D_learn.csv"
XY = numpy.loadtxt(fileName, skiprows=1, usecols=(1,2,3), delimiter=",")
X = XY[:, 0:2]
y = XY[:, 2]
fileName = dataDir + "Gauss2D_test.csv"
testXY = numpy.loadtxt(fileName, skiprows=1, usecols=(1,2,3), delimiter=",")
testX = testXY[:, 0:2]
testY = testXY[:, 2]
X = Standardiser().standardiseArray(X)
testX = Standardiser().standardiseArray(testX)
maxDepths = range(3, 10)
trainAucs = numpy.array([0.7194734, 0.7284824, 0.7332185, 0.7348198, 0.7366152, 0.7367508, 0.7367508, 0.7367508])
testAucs = numpy.array([0.6789078, 0.6844632, 0.6867918, 0.6873420, 0.6874820, 0.6874400, 0.6874400, 0.6874400])
i = 0
#The results are approximately the same, but not exactly
for maxDepth in maxDepths:
treeRank = TreeRank(self.leafRanklearner)
treeRank.setMaxDepth(maxDepth)
treeRank.learnModel(X, y)
trainScores = treeRank.predict(X)
testScores = treeRank.predict(testX)
self.assertAlmostEquals(Evaluator.auc(trainScores, y), trainAucs[i], 2)
self.assertAlmostEquals(Evaluator.auc(testScores, testY), testAucs[i], 1)
i+=1
示例11: testGraphFromMatFile
# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def testGraphFromMatFile(self):
matFileName = PathDefaults.getDataDir() + "infoDiffusion/EgoAlterTransmissions1000.mat"
sGraph = EgoUtils.graphFromMatFile(matFileName)
examplesList = ExamplesList.readFromMatFile(matFileName)
numFeatures = examplesList.getDataFieldSize("X", 1)
self.assertEquals(examplesList.getNumExamples(), sGraph.getNumEdges())
self.assertEquals(examplesList.getNumExamples()*2, sGraph.getNumVertices())
self.assertEquals(numFeatures/2+1, sGraph.getVertexList().getNumFeatures())
#Every even vertex has information, odd does not
for i in range(0, sGraph.getNumVertices()):
vertex = sGraph.getVertex(i)
if i%2 == 0:
self.assertEquals(vertex[sGraph.getVertexList().getNumFeatures()-1], 1)
else:
self.assertEquals(vertex[sGraph.getVertexList().getNumFeatures()-1], 0)
#Test the first few vertices are the same
for i in range(0, 10):
vertex1 = sGraph.getVertex(i*2)[0:numFeatures/2]
vertex2 = sGraph.getVertex(i*2+1)[0:numFeatures/2]
vertexEx1 = examplesList.getSubDataField("X", numpy.array([i])).ravel()[0:numFeatures/2]
vertexEx2 = examplesList.getSubDataField("X", numpy.array([i])).ravel()[numFeatures/2:numFeatures]
self.assertTrue((vertex1 == vertexEx1).all())
self.assertTrue((vertex2 == vertexEx2).all())
示例12: processRatings
# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def processRatings(self):
"""
Convert the dataset into a matrix and save the results for faster
access.
"""
if not os.path.exists(self.ratingFileName) or not os.path.exists(self.custDictFileName):
dataDir = PathDefaults.getDataDir() + "netflix/training_set/"
logging.debug("Processing ratings given in " + dataDir)
custIdDict = {}
custIdSet = set([])
movieIds = array.array("I")
custIds = array.array("I")
ratings = array.array("B")
dates = array.array("L")
j = 0
for i in range(self.startMovieID, self.endMovieID+1):
Util.printIteration(i-1, 1, self.endMovieID-1)
ratingsFile = open(dataDir + "mv_" + str(i).zfill(7) + ".txt")
ratingsFile.readline()
for line in ratingsFile:
vals = line.split(",")
custId = int(vals[0])
if custId not in custIdSet:
custIdSet.add(custId)
custIdDict[custId] = j
custInd = j
j += 1
else:
custInd = custIdDict[custId]
rating = int(vals[1])
t = datetime.strptime(vals[2].strip(), "%Y-%m-%d")
movieIds.append(i-1)
custIds.append(custInd)
ratings.append(rating)
dates.append(int(time.mktime(t.timetuple())))
movieIds = numpy.array(movieIds, numpy.uint32)
custIds = numpy.array(custIds, numpy.uint32)
ratings = numpy.array(ratings, numpy.uint8)
dates = numpy.array(dates, numpy.uint32)
assert ratings.shape[0] == self.numRatings
numpy.savez(self.ratingFileName, movieIds, custIds, ratings, dates)
logging.debug("Saved ratings file as " + self.ratingFileName)
pickle.dump(custIdDict, open(self.custDictFileName, 'wb'))
logging.debug("Saved custIdDict as " + self.custDictFileName)
else:
logging.debug("Ratings file " + str(self.ratingFileName) + " already processed")
示例13: loadData
# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def loadData():
"""
Return the raw spectra and the MDS transformed data as well as the DataFrame
for the MDS data.
"""
utilsLib = importr('utils')
dataDir = PathDefaults.getDataDir() + "metabolomic/"
fileName = dataDir + "data.RMN.total.6.txt"
df = utilsLib.read_table(fileName, header=True, row_names=1, sep=",")
maxNMRIndex = 951
X = df.rx(robjects.IntVector(range(1, maxNMRIndex)))
X = numpy.array(X).T
#Load age and normalise (missing values are assinged the mean)
ages = numpy.array(df.rx(robjects.StrVector(["Age"]))).ravel()
meanAge = numpy.mean(ages[numpy.logical_not(numpy.isnan(ages))])
ages[numpy.isnan(ages)] = meanAge
ages = Standardiser().standardiseArray(ages)
Xs = X.copy()
standardiser = Standardiser()
Xs = standardiser.standardiseArray(X)
fileName = dataDir + "data.sportsmen.log.AP.1.txt"
df = utilsLib.read_table(fileName, header=True, row_names=1, sep=",")
maxNMRIndex = 419
X2 = df.rx(robjects.IntVector(range(1, maxNMRIndex)))
X2 = numpy.array(X2).T
#Load the OPLS corrected files
fileName = dataDir + "IGF1.log.OSC.1.txt"
df = utilsLib.read_table(fileName, header=True, row_names=1, sep=",")
minNMRIndex = 22
maxNMRIndex = 441
Xopls1 = df.rx(robjects.IntVector(range(minNMRIndex, maxNMRIndex)))
Xopls1 = numpy.array(Xopls1).T
fileName = dataDir + "cort.log.OSC.1.txt"
df = utilsLib.read_table(fileName, header=True, row_names=1, sep=",")
minNMRIndex = 20
maxNMRIndex = 439
Xopls2 = df.rx(robjects.IntVector(range(minNMRIndex, maxNMRIndex)))
Xopls2 = numpy.array(Xopls2).T
fileName = dataDir + "testo.log.OSC.1.txt"
df = utilsLib.read_table(fileName, header=True, row_names=1, sep=",")
minNMRIndex = 22
maxNMRIndex = 441
Xopls3 = df.rx(robjects.IntVector(range(minNMRIndex, maxNMRIndex)))
Xopls3 = numpy.array(Xopls3).T
#Let's load all the label data here
labelNames = MetabolomicsUtils.getLabelNames()
YList = MetabolomicsUtils.createLabelList(df, labelNames)
return X, X2, Xs, (Xopls1, Xopls2, Xopls3), YList, ages, df
示例14: profileClusterFromIterator
# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def profileClusterFromIterator(self):
iterator = IncreasingSubgraphListIterator(self.graph, self.subgraphIndicesList)
dataDir = PathDefaults.getDataDir() + "cluster/"
#iterator = getBemolGraphIterator(dataDir)
def run():
clusterList, timeList, boundList = self.clusterer.clusterFromIterator(iterator, verbose=True)
print(timeList.cumsum(0))
ProfileUtils.profile('run()', globals(), locals())
示例15: getIterator
# 需要导入模块: from apgl.util.PathDefaults import PathDefaults [as 别名]
# 或者: from apgl.util.PathDefaults.PathDefaults import getDataDir [as 别名]
def getIterator():
dataDir = PathDefaults.getDataDir() + "cluster/"
nbUser = 10000 # set to 'None' to have all users
nbPurchasesPerIt = 500 # set to 'None' to take all the purchases per date
startingIteration = 300
endingIteration = 600 # set to 'None' to have all iterations
stepSize = 1
return itertools.islice(BemolData.getGraphIterator(dataDir, nbUser, nbPurchasesPerIt), startingIteration, endingIteration, stepSize)