本文整理汇总了Python中sandbox.util.Util.Util.savePickle方法的典型用法代码示例。如果您正苦于以下问题:Python Util.savePickle方法的具体用法?Python Util.savePickle怎么用?Python Util.savePickle使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sandbox.util.Util.Util
的用法示例。
在下文中一共展示了Util.savePickle方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: computeLDA
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import savePickle [as 别名]
def computeLDA(self):
if not os.path.exists(self.modelFilename) or self.overwriteModel:
self.vectoriseDocuments()
self.loadVectoriser()
corpus = gensim.corpora.mmcorpus.MmCorpus(self.docTermMatrixFilename + ".mtx")
id2WordDict = dict(zip(range(len(self.vectoriser.get_feature_names())), self.vectoriser.get_feature_names()))
logging.getLogger('gensim').setLevel(logging.INFO)
lda = LdaModel(corpus, num_topics=self.k, id2word=id2WordDict, chunksize=self.chunksize, distributed=False)
#index = gensim.similarities.docsim.SparseMatrixSimilarity(lda[corpus], num_features=self.k)
index = gensim.similarities.docsim.Similarity(self.indexFilename, lda[corpus], num_features=self.k)
Util.savePickle([lda, index], self.modelFilename, debug=True)
gc.collect()
else:
logging.debug("File already exists: " + self.modelFilename)
示例2: computeConfigScalarStats
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import savePickle [as 别名]
def computeConfigScalarStats():
logging.info("Computing configuration model scalar stats")
graphFileNameBase = resultsDir + "ConfigInfectGraph"
resultsFileNameBase = resultsDir + "ConfigInfectGraphScalarStats"
for j in range(numConfigGraphs):
resultsFileName = resultsFileNameBase + str(j)
if not os.path.isfile(resultsFileName):
configGraph = SparseGraph.load(graphFileNameBase + str(j))
statsArray = graphStats.sequenceScalarStats(configGraph, subgraphIndicesList, slowStats, treeStats=True)
Util.savePickle(statsArray, resultsFileName, True)
gc.collect()
logging.info("All done")
示例3: computeConfigVectorStats
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import savePickle [as 别名]
def computeConfigVectorStats():
#Note: We can make this multithreaded
logging.info("Computing configuration model vector stats")
graphFileNameBase = resultsDir + "ConfigInfectGraph"
resultsFileNameBase = resultsDir + "ConfigInfectGraphVectorStats"
for j in range(numConfigGraphs):
resultsFileName = resultsFileNameBase + str(j)
if not os.path.isfile(resultsFileName):
configGraph = SparseGraph.load(graphFileNameBase + str(j))
statsDictList = graphStats.sequenceVectorStats(configGraph, subgraphIndicesList2, eigenStats=False, treeStats=True)
Util.savePickle(statsDictList, resultsFileName, False)
gc.collect()
logging.info("All done")
示例4: coauthorsGraph
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import savePickle [as 别名]
def coauthorsGraph(self, field, relevantExperts):
"""
Using the relevant authors we find all coauthors.
"""
if not os.path.exists(self.getCoauthorsFilename(field)) or self.overwriteGraph:
logging.debug("Finding coauthors of relevant experts")
if self.knownAuthors:
graph, authorIndexer = self.coauthorsGraphFromAuthors2(set(relevantExperts), field)
else:
graph, authorIndexer = self.coauthorsGraphFromAuthors(set(relevantExperts))
logging.debug(graph.summary())
Util.savePickle([graph, authorIndexer], self.getCoauthorsFilename(field), debug=True)
else:
logging.debug("Files already generated: " + self.getCoauthorsFilename(field))
graph, authorIndexer = Util.loadPickle(self.getCoauthorsFilename(field))
return graph, authorIndexer
示例5: plotOtherStats
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import savePickle [as 别名]
def plotOtherStats():
#Let's look at geodesic distances in subgraphs and communities
logging.info("Computing other stats")
resultsFileName = resultsDir + "ContactGrowthOtherStats.pkl"
hivGraphStats = HIVGraphStatistics(fInds)
if saveResults:
statsArray = hivGraphStats.sequenceScalarStats(sGraph, subgraphIndicesList)
#statsArray["dayList"] = absDayList
Util.savePickle(statsArray, resultsFileName, True)
else:
statsArray = Util.loadPickle(resultsFileName)
#Just load the harmonic geodesic distances of the full graph
resultsFileName = resultsDir + "ContactGrowthScalarStats.pkl"
statsArray2 = Util.loadPickle(resultsFileName)
global plotInd
msmGeodesic = statsArray[:, hivGraphStats.msmGeodesicIndex]
msmGeodesic[msmGeodesic < 0] = 0
msmGeodesic[msmGeodesic == float('inf')] = 0
#Output all the results into plots
plt.figure(plotInd)
plt.plot(absDayList, msmGeodesic, 'k-', absDayList, statsArray[:, hivGraphStats.mostConnectedGeodesicIndex], 'k--')
plt.xticks(locs, labels)
#plt.ylim([0, 0.1])
plt.xlabel("Year")
plt.ylabel("Mean harmonic geodesic distance")
plt.legend(("MSM individuals", "Top 10% degree"), loc="upper right")
plt.savefig(figureDir + "MSM10Geodesic" + ".eps")
plotInd += 1
plt.figure(plotInd)
plt.plot(absDayList, statsArray2[:, graphStats.harmonicGeoDistanceIndex], 'k-', absDayList, statsArray[:, hivGraphStats.menSubgraphGeodesicIndex], 'k--')
plt.xticks(locs, labels)
plt.ylim([0, 200.0])
plt.xlabel("Year")
plt.ylabel("Mean harmonic geodesic distance")
plt.legend(("All individuals", "Men subgraph"), loc="upper right")
plt.savefig(figureDir + "MenSubgraphGeodesic" + ".eps")
plotInd += 1
示例6: saveStats
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import savePickle [as 别名]
def saveStats(args):
i, theta = args
resultsFileName = outputDir + "SimStats" + str(i) + ".pkl"
lock = FileLock(resultsFileName)
if not lock.fileExists() and not lock.isLocked():
lock.lock()
model = HIVModelUtils.createModel(targetGraph, startDate, endDate, recordStep, M, matchAlpha, breakSize, matchAlg, theta=thetaArray[i])
times, infectedIndices, removedIndices, graph, compTimes, graphMetrics = HIVModelUtils.simulate(model)
times = numpy.arange(startDate, endDate+1, recordStep)
vertexArray, infectedIndices, removedIndices, contactGraphStats, removedGraphStats, finalRemovedDegrees = HIVModelUtils.generateStatistics(graph, times)
stats = times, vertexArray, infectedIndices, removedGraphStats, finalRemovedDegrees, graphMetrics.objectives, compTimes
Util.savePickle(stats, resultsFileName)
lock.unlock()
else:
logging.debug("Results already computed: " + str(resultsFileName))
示例7: computeLSI
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import savePickle [as 别名]
def computeLSI(self):
"""
Compute using the LSI version in gensim
"""
if not os.path.exists(self.modelFilename) or self.overwriteModel:
self.vectoriseDocuments()
self.loadVectoriser()
#X = scipy.io.mmread(self.docTermMatrixFilename)
#corpus = gensim.matutils.MmReader(self.docTermMatrixFilename + ".mtx", True)
#corpus = gensim.matutils.Sparse2Corpus(X, documents_columns=False)
corpus = gensim.corpora.mmcorpus.MmCorpus(self.docTermMatrixFilename + ".mtx")
id2WordDict = dict(zip(range(len(self.vectoriser.get_feature_names())), self.vectoriser.get_feature_names()))
logging.getLogger('gensim').setLevel(logging.INFO)
lsi = LsiModel(corpus, num_topics=self.k, id2word=id2WordDict, chunksize=self.chunksize, distributed=False)
index = gensim.similarities.docsim.Similarity(self.indexFilename, lsi[corpus], num_features=self.k)
Util.savePickle([lsi, index], self.modelFilename, debug=True)
gc.collect()
else:
logging.debug("File already exists: " + self.modelFilename)
示例8: vectoriseDocuments
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import savePickle [as 别名]
def vectoriseDocuments(self):
"""
We want to go through the dataset and vectorise all the title+abstracts.
The results are saved in TDIDF format in a matrix X.
"""
if not os.path.exists(self.docTermMatrixFilename + ".mtx") or not os.path.exists(self.authorListFilename) or not os.path.exists(self.vectoriserFilename) or self.overwriteVectoriser:
logging.debug("Vectorising documents")
authorList, documentList, citationList = self.readAuthorsAndDocuments()
Util.savePickle(authorList, self.authorListFilename, debug=True)
Util.savePickle(citationList, self.citationListFilename, debug=True)
#vectoriser = text.HashingVectorizer(ngram_range=(1,2), binary=self.binary, norm="l2", stop_words="english", tokenizer=PorterTokeniser(), dtype=numpy.float)
#if self.tfidf:
logging.debug("Generating TFIDF features")
vectoriser = text.TfidfVectorizer(min_df=self.minDf, ngram_range=(1,self.ngram), binary=self.binary, sublinear_tf=self.sublinearTf, norm="l2", max_df=0.95, stop_words="english", tokenizer=PorterTokeniser(), max_features=self.numFeatures, dtype=numpy.float)
#else:
# logging.debug("Generating bag of word features")
# vectoriser = text.CountVectorizer(min_df=self.minDf, ngram_range=(1,self.ngram), binary=False, max_df=0.95, stop_words="english", max_features=self.numFeatures, dtype=numpy.float, tokenizer=PorterTokeniser())
X = vectoriser.fit_transform(documentList)
del documentList
scipy.io.mmwrite(self.docTermMatrixFilename, X)
logging.debug("Wrote X with shape " + str(X.shape) + " and " + str(X.nnz) + " nonzeros to file " + self.docTermMatrixFilename + ".mtx")
del X
#Save vectoriser - note that we can't pickle the tokeniser so it needs to be reset when loaded
vectoriser.tokenizer = None
Util.savePickle(vectoriser, self.vectoriserFilename, debug=True)
del vectoriser
gc.collect()
else:
logging.debug("Author list, document-term matrix and vectoriser already generated: ")
示例9: range
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import savePickle [as 别名]
paramList = []
for i in range(thetaArray.shape[0]):
paramList.append((i, thetaArray[i, :]))
pool = multiprocessing.Pool(multiprocessing.cpu_count())
resultIterator = pool.map(saveStats, paramList)
#resultIterator = map(saveStats, paramList)
pool.terminate()
#Now save the statistics on the target graph
times = numpy.arange(startDate, endDate+1, recordStep)
vertexArray, infectedIndices, removedIndices, contactGraphStats, removedGraphStats, finalRemovedDegrees = HIVModelUtils.generateStatistics(targetGraph, times)
stats = vertexArray, infectedIndices, removedIndices, contactGraphStats, removedGraphStats, finalRemovedDegrees
resultsFileName = outputDir + "IdealStats.pkl"
Util.savePickle(stats, resultsFileName)
else:
import matplotlib
matplotlib.use("GTK3Agg")
import matplotlib.pyplot as plt
plotStyles = ['k-', 'kx-', 'k+-', 'k.-', 'k*-']
N, resultsDir, outputDir, recordStep, startDate, endDate, prefix, targetGraph, breakSize, numEpsilons, M, matchAlpha, matchAlg, numInds = loadParams(0)
inds = range(numInds)
numRecordSteps = int((endDate-startDate)/recordStep)+1
#We store: number of detections, CT detections, rand detections, infectives, max componnent size, num components, edges, objectives
numMeasures = 12
numTimings = 2
示例10: plotVectorStats
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import savePickle [as 别名]
def plotVectorStats():
#Finally, compute some vector stats at various points in the graph
logging.info("Computing vector stats")
global plotInd
resultsFileName = resultsDir + "InfectGrowthVectorStats.pkl"
if saveResults:
statsDictList = graphStats.sequenceVectorStats(sGraph, subgraphIndicesList2, True)
Util.savePickle(statsDictList, resultsFileName, True)
else:
statsDictList = Util.loadPickle(resultsFileName)
treeSizesDistArray = numpy.zeros((len(dayList2), 3000))
treeDepthsDistArray = numpy.zeros((len(dayList2), 100))
numVerticesEdgesArray = numpy.zeros((len(dayList2), 2), numpy.int)
numVerticesEdgesArray[:, 0] = [len(sgl) for sgl in subgraphIndicesList2]
numVerticesEdgesArray[:, 1] = [sGraph.subgraph(sgl).getNumEdges() for sgl in subgraphIndicesList2]
for j in range(len(dayList2)):
dateStr = (str(DateUtils.getDateStrFromDay(dayList2[j], startYear)))
logging.info(dateStr)
statsDict = statsDictList[j]
degreeDist = statsDict["outDegreeDist"]
degreeDist = degreeDist/float(numpy.sum(degreeDist))
maxEigVector = statsDict["maxEigVector"]
maxEigVector = numpy.flipud(numpy.sort(numpy.abs(maxEigVector)))
maxEigVector = numpy.log(maxEigVector[maxEigVector>0])
treeSizesDist = statsDict["treeSizesDist"]
treeSizesDist = numpy.array(treeSizesDist, numpy.float64)/numpy.sum(treeSizesDist)
treeSizesDistArray[j, 0:treeSizesDist.shape[0]] = treeSizesDist
treeDepthsDist = statsDict["treeDepthsDist"]
#treeDepthsDist = numpy.array(treeDepthsDist, numpy.float64)/numpy.sum(treeDepthsDist)
treeDepthsDist = numpy.array(treeDepthsDist, numpy.float64)
treeDepthsDistArray[j, 0:treeDepthsDist.shape[0]] = treeDepthsDist
plotInd2 = plotInd
plt.figure(plotInd2)
plt.plot(numpy.arange(degreeDist.shape[0]), degreeDist, label=dateStr)
plt.xlabel("Degree")
plt.ylabel("Probability")
plt.ylim((0, 0.8))
plt.legend()
plt.savefig(figureDir + "DegreeDist" + ".eps")
plotInd2 += 1
plt.figure(plotInd2)
plt.scatter(numpy.arange(treeSizesDist.shape[0])[treeSizesDist!=0], numpy.log(treeSizesDist[treeSizesDist!=0]), s=30, c=plotStyles2[j][0], label=dateStr)
plt.xlabel("Size")
plt.ylabel("log(probability)")
plt.xlim((0, 125))
plt.legend()
plt.savefig(figureDir + "TreeSizeDist" + ".eps")
plotInd2 += 1
plt.figure(plotInd2)
plt.scatter(numpy.arange(treeDepthsDist.shape[0])[treeDepthsDist!=0], numpy.log(treeDepthsDist[treeDepthsDist!=0]), s=30, c=plotStyles2[j][0], label=dateStr)
plt.xlabel("Depth")
plt.ylabel("log(probability)")
plt.xlim((0, 15))
plt.legend()
plt.savefig(figureDir + "TreeDepthDist" + ".eps")
plotInd2 += 1
dateStrList = [DateUtils.getDateStrFromDay(day, startYear) for day in dayList2]
precision = 4
treeSizesDistArray = treeSizesDistArray[:, 0:treeSizesDist.shape[0]]
nonZeroCols = numpy.sum(treeSizesDistArray, 0)!=0
print((Latex.array1DToRow(numpy.arange(treeSizesDistArray.shape[1])[nonZeroCols])))
print((Latex.array2DToRows(treeSizesDistArray[:, nonZeroCols])))
print("Tree depths")
treeDepthsDistArray = treeDepthsDistArray[:, 0:treeDepthsDist.shape[0]]
nonZeroCols = numpy.sum(treeDepthsDistArray, 0)!=0
print((Latex.array1DToRow(numpy.arange(treeDepthsDistArray.shape[1])[nonZeroCols])))
print((Latex.array2DToRows(treeDepthsDistArray[:, nonZeroCols])))
print(numpy.sum(treeDepthsDistArray[:, 0:3], 1))
print("Edges and verticies")
print(Latex.listToRow(dateStrList))
print(Latex.array2DToRows(numVerticesEdgesArray.T, precision))
示例11: plotScalarStats
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import savePickle [as 别名]
def plotScalarStats():
logging.info("Computing scalar stats")
resultsFileName = resultsDir + "InfectGrowthScalarStats.pkl"
if saveResults:
statsArray = graphStats.sequenceScalarStats(sGraph, subgraphIndicesList, treeStats=True)
Util.savePickle(statsArray, resultsFileName, True)
else:
statsArray = Util.loadPickle(resultsFileName)
global plotInd
#Output all the results into plots
#Take the mean of the results over the configuration model graphs
resultsFileNameBase = resultsDir + "ConfigInfectGraphScalarStats"
numGraphs = len(subgraphIndicesList)
configStatsArrays = numpy.zeros((numGraphs, graphStats.getNumStats(), numConfigGraphs))
for j in range(numConfigGraphs):
resultsFileName = resultsFileNameBase + str(j)
configStatsArrays[:, :, j] = Util.loadPickle(resultsFileName)
configStatsArray = numpy.mean(configStatsArrays, 2)
configStatsStd = numpy.std(configStatsArrays, 2)
#Make sure we don't include 0 in the array
vertexIndex = numpy.argmax(statsArray[:, graphStats.numVerticesIndex] > 0)
edgeIndex = numpy.argmax(statsArray[:, graphStats.numEdgesIndex] > 0)
minIndex = numpy.maximum(vertexIndex, edgeIndex)
def plotRealConfigError(index, styleReal, styleConfig, realLabel, configLabel):
plt.hold(True)
plt.plot(absDayList, statsArray[:, index], styleReal, label=realLabel)
#errors = numpy.c_[configStatsArray[:, index]-configStatsMinArray[:, index] , configStatsMaxArray[:, index]-configStatsArray[:, index]].T
errors = numpy.c_[configStatsStd[:, index], configStatsStd[:, index]].T
plt.plot(absDayList, configStatsArray[:, index], styleConfig, label=configLabel)
plt.errorbar(absDayList, configStatsArray[:, index], errors, linewidth=0, elinewidth=0, label="_nolegend_", ecolor=styleConfig[0])
xmin, xmax = plt.xlim()
plt.xlim((0, xmax))
ymin, ymax = plt.ylim()
plt.ylim((0, ymax))
plt.figure(plotInd)
plt.plot(numpy.log(statsArray[minIndex:, graphStats.numVerticesIndex]), numpy.log(statsArray[minIndex:, graphStats.numEdgesIndex]))
plt.xlabel("log(|V|)")
plt.ylabel("log(|E|)")
plt.savefig(figureDir + "LogVerticesEdgesGrowth.eps")
plotInd += 1
plt.figure(plotInd)
#plt.plot(absDayList, statsArray[:, graphStats.numTreesIndex], plotStyles3[0], label="Trees Size >= 1")
#plt.plot(absDayList, statsArray[:, graphStats.numNonSingletonTreesIndex], plotStyles3[1], label="Trees Size >= 2")
plotRealConfigError(graphStats.numTreesIndex, plotStyles3[0], plotStyles5[0], "Trees size >= 1", "CM trees size >= 1")
plotRealConfigError(graphStats.numNonSingletonTreesIndex, plotStyles3[0], plotStyles5[0], "Trees size >= 2", "CM trees size >= 2")
plt.xticks(locs, labels)
plt.xlabel("Year")
plt.ylabel("No. trees")
plt.legend(loc="upper left")
plt.savefig(figureDir + "NumTreesGrowth.eps")
plotInd += 1
for k in range(len(dayList)):
day = dayList[k]
print(str(DateUtils.getDateStrFromDay(day, startYear)) + ": " + str(statsArray[k, graphStats.numTreesIndex]))
print(str(DateUtils.getDateStrFromDay(day, startYear)) + ": " + str(configStatsArray[k, graphStats.numTreesIndex]))
#Load stats from a file to get the max tree from its root
resultsFilename = resultsDir + "treeSizesDepths.npz"
file = open(resultsFilename, 'r')
arrayDict = numpy.load(file)
statsArray[:, graphStats.maxTreeDepthIndex] = arrayDict["arr_0"]
statsArray[:, graphStats.maxTreeSizeIndex] = arrayDict["arr_1"]
statsArray[:, graphStats.secondTreeDepthIndex] = arrayDict["arr_2"]
statsArray[:, graphStats.secondTreeSizeIndex] = arrayDict["arr_3"]
plt.figure(plotInd)
plotRealConfigError(graphStats.maxTreeSizeIndex, plotStyles3[0], plotStyles5[0], "Max tree", "CM max tree")
plotRealConfigError(graphStats.secondTreeSizeIndex, plotStyles3[1], plotStyles5[1], "2nd tree", "CM 2nd tree")
plt.xticks(locs, labels)
plt.xlabel("Year")
plt.ylabel("Size")
plt.legend(loc="upper left")
plt.savefig(figureDir + "MaxTreeGrowth.eps")
plotInd += 1
plt.figure(plotInd)
plotRealConfigError(graphStats.maxTreeDepthIndex, plotStyles3[0], plotStyles5[0], "Max tree", "CM max tree")
plotRealConfigError(graphStats.secondTreeDepthIndex, plotStyles3[1], plotStyles5[1], "2nd tree", "CM 2nd tree")
#plt.plot(absDayList, statsArray[:, graphStats.maxTreeDepthIndex], plotStyles3[0], absDayList, statsArray[:, graphStats.secondTreeDepthIndex], plotStyles3[1] )
#plt.plot(absDayList, configStatsArray[:, graphStats.maxTreeDepthIndex], plotStyles4[0], absDayList, configStatsArray[:, graphStats.secondTreeDepthIndex], plotStyles4[1])
plt.xticks(locs, labels)
plt.xlabel("Year")
plt.ylabel("Depth")
plt.legend(loc="lower right")
plt.savefig(figureDir + "MaxTreeDepthGrowth.eps")
plotInd += 1
示例12: plotTreeStats
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import savePickle [as 别名]
def plotTreeStats():
logging.info("Computing tree stats")
resultsFileName = resultsDir + "InfectGrowthTreeStats.pkl"
if saveResults:
statsDictList = []
for j in range(len(subgraphIndicesList2)):
Util.printIteration(j, 1, len(subgraphIndicesList2))
subgraphIndices = subgraphIndicesList2[j]
subgraph = sGraph.subgraph(subgraphIndices)
logging.info("Finding trees")
trees = subgraph.findTrees()
logging.info("Computing tree statistics")
statsDict = {}
locationEntropy = []
orientEntropy = []
detectionRanges = []
for i in range(len(trees)):
if len(trees[i]) > 1:
treeGraph = subgraph.subgraph(trees[i])
vertexArray = treeGraph.getVertexList().getVertices(list(range(treeGraph.getNumVertices())))
locationEntropy.append(Util.entropy(vertexArray[:, locationIndex]))
orientEntropy.append(Util.entropy(vertexArray[:, orientationIndex]))
detections = vertexArray[:, detectionIndex]
detectionRanges.append(numpy.max(detections) - numpy.min(detections))
statsDict["locationEnt"] = numpy.array(locationEntropy)
statsDict["orientEnt"] = numpy.array(orientEntropy)
statsDict["detectRanges"] = numpy.array(detectionRanges)
statsDictList.append(statsDict)
Util.savePickle(statsDictList, resultsFileName, True)
else:
statsDictList = Util.loadPickle(resultsFileName)
locBins = numpy.arange(0, 2.4, 0.2)
detectBins = numpy.arange(0, 6500, 500)
locationEntDists = []
orientEntDists = []
detectionDists = []
for j in range(0, len(dayList2)):
dateStr = (str(DateUtils.getDateStrFromDay(dayList2[j], startYear)))
logging.info(dateStr)
statsDict = statsDictList[j]
plotInd2 = plotInd
locationEntDists.append(statsDict["locationEnt"])
orientEntDists.append(statsDict["orientEnt"])
detectionDists.append(statsDict["detectRanges"])
#for j in range(len(orientEntDists)):
# print(numpy.sum(numpy.histogram(orientEntDists[j])[0]))
# print(numpy.histogram(orientEntDists[j])[0]/float(orientEntDists[j].shape[0]))
dateStrs = [DateUtils.getDateStrFromDay(dayList2[i], startYear) for i in range(1, len(dayList2))]
plt.figure(plotInd2)
histOut = plt.hist(locationEntDists, locBins, normed=True)
plt.xlabel("Location Entropy")
plt.ylabel("Probability Density")
plt.savefig(figureDir + "LocationEnt" + ".eps")
#plt.legend()
plotInd2 += 1
plt.figure(plotInd2)
histOut = plt.hist(orientEntDists, normed=True)
plt.xlabel("Orientation Entropy")
plt.ylabel("Probability Density")
plt.savefig(figureDir + "OrientEnt" + ".eps")
#plt.legend()
plotInd2 += 1
plt.figure(plotInd2)
histOut = plt.hist(detectionDists, detectBins, normed=True)
plt.xlabel("Detection Range (days)")
plt.ylabel("Probability Density")
plt.savefig(figureDir + "DetectionRanges" + ".eps")
#plt.legend()
plotInd2 += 1
示例13: len
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import savePickle [as 别名]
#for line in outputLists:
#fich.write(line[i])
#Ajout du score de l'expertise
#outputLists.append(expertAuthorsInds)
itemList = RankAggregator.generateItemList(outputLists)
methodNames = graphRanker.getNames()
if runLSI:
outputFilename = dataset.getOutputFieldDir(field) + "outputListsLSI.npz"
else:
outputFilename = dataset.getOutputFieldDir(field) + "outputListsLDA.npz"
Util.savePickle([outputLists, trainExpertMatchesInds, testExpertMatchesInds], outputFilename, debug=True)
numMethods = len(outputLists)
precisions = numpy.zeros((len(ns), numMethods))
averagePrecisions = numpy.zeros(numMethods)
for i, n in enumerate(ns):
for j in range(len(outputLists)):
precisions[i, j] = Evaluator.precisionFromIndLists(testExpertMatchesInds, outputLists[j][0:n])
for j in range(len(outputLists)):
averagePrecisions[j] = Evaluator.averagePrecisionFromLists(testExpertMatchesInds, outputLists[j][0:averagePrecisionN], averagePrecisionN)
precisions2 = numpy.c_[numpy.array(ns), precisions]
logging.debug(Latex.listToRow(methodNames))
示例14: plotVectorStats
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import savePickle [as 别名]
def plotVectorStats():
#Finally, compute some vector stats at various points in the graph
logging.info("Computing vector stats")
global plotInd
resultsFileName = resultsDir + "ContactGrowthVectorStats.pkl"
if saveResults:
statsDictList = graphStats.sequenceVectorStats(sGraph, subgraphIndicesList2)
Util.savePickle(statsDictList, resultsFileName, False)
else:
statsDictList = Util.loadPickle(resultsFileName)
#Load up configuration model results
configStatsDictList = []
resultsFileNameBase = resultsDir + "ConfigGraphVectorStats"
for j in range(numConfigGraphs):
resultsFileName = resultsFileNameBase + str(j)
configStatsDictList.append(Util.loadPickle(resultsFileName))
#Now need to take mean of 1st element of list
meanConfigStatsDictList = configStatsDictList[0]
for i in range(len(configStatsDictList[0])):
for k in range(1, numConfigGraphs):
for key in configStatsDictList[k][i].keys():
if configStatsDictList[k][i][key].shape[0] > meanConfigStatsDictList[i][key].shape[0]:
meanConfigStatsDictList[i][key] = numpy.r_[meanConfigStatsDictList[i][key], numpy.zeros(configStatsDictList[k][i][key].shape[0] - meanConfigStatsDictList[i][key].shape[0])]
elif configStatsDictList[k][i][key].shape[0] < meanConfigStatsDictList[i][key].shape[0]:
configStatsDictList[k][i][key] = numpy.r_[configStatsDictList[k][i][key], numpy.zeros(meanConfigStatsDictList[i][key].shape[0] - configStatsDictList[k][i][key].shape[0])]
meanConfigStatsDictList[i][key] += configStatsDictList[k][i][key]
for key in configStatsDictList[0][i].keys():
meanConfigStatsDictList[i][key] = meanConfigStatsDictList[i][key]/numConfigGraphs
triangleDistArray = numpy.zeros((len(dayList2), 100))
configTriangleDistArray = numpy.zeros((len(dayList2), 100))
hopPlotArray = numpy.zeros((len(dayList2), 27))
configHopPlotArray = numpy.zeros((len(dayList2), 30))
componentsDistArray = numpy.zeros((len(dayList2), 3000))
configComponentsDistArray = numpy.zeros((len(dayList2), 3000))
numVerticesEdgesArray = numpy.zeros((len(dayList2), 2), numpy.int)
numVerticesEdgesArray[:, 0] = [len(sgl) for sgl in subgraphIndicesList2]
numVerticesEdgesArray[:, 1] = [sGraph.subgraph(sgl).getNumEdges() for sgl in subgraphIndicesList2]
binWidths = numpy.arange(0, 0.50, 0.05)
eigVectorDists = numpy.zeros((len(dayList2), binWidths.shape[0]-1), numpy.int)
femaleSums = numpy.zeros(len(dayList2))
maleSums = numpy.zeros(len(dayList2))
heteroSums = numpy.zeros(len(dayList2))
biSums = numpy.zeros(len(dayList2))
contactSums = numpy.zeros(len(dayList2))
nonContactSums = numpy.zeros(len(dayList2))
donorSums = numpy.zeros(len(dayList2))
randomTestSums = numpy.zeros(len(dayList2))
stdSums = numpy.zeros(len(dayList2))
prisonerSums = numpy.zeros(len(dayList2))
recommendSums = numpy.zeros(len(dayList2))
meanAges = numpy.zeros(len(dayList2))
degrees = numpy.zeros((len(dayList2), 20))
provinces = numpy.zeros((len(dayList2), 15))
havanaSums = numpy.zeros(len(dayList2))
villaClaraSums = numpy.zeros(len(dayList2))
pinarSums = numpy.zeros(len(dayList2))
holguinSums = numpy.zeros(len(dayList2))
habanaSums = numpy.zeros(len(dayList2))
sanctiSums = numpy.zeros(len(dayList2))
meanDegrees = numpy.zeros(len(dayList2))
stdDegrees = numpy.zeros(len(dayList2))
#Note that death has a lot of missing values
for j in range(len(dayList2)):
dateStr = (str(DateUtils.getDateStrFromDay(dayList2[j], startYear)))
logging.info(dateStr)
statsDict = statsDictList[j]
configStatsDict = meanConfigStatsDictList[j]
degreeDist = statsDict["outDegreeDist"]
degreeDist = degreeDist/float(numpy.sum(degreeDist))
#Note that degree distribution for configuration graph will be identical
eigenDist = statsDict["eigenDist"]
eigenDist = numpy.log(eigenDist[eigenDist>=10**-1])
#configEigenDist = configStatsDict["eigenDist"]
#configEigenDist = numpy.log(configEigenDist[configEigenDist>=10**-1])
hopCount = statsDict["hopCount"]
hopCount = numpy.log10(hopCount)
hopPlotArray[j, 0:hopCount.shape[0]] = hopCount
configHopCount = configStatsDict["hopCount"]
configHopCount = numpy.log10(configHopCount)
#configHopPlotArray[j, 0:configHopCount.shape[0]] = configHopCount
#.........这里部分代码省略.........
示例15: plotScalarStats
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import savePickle [as 别名]
def plotScalarStats():
logging.info("Computing scalar stats")
resultsFileName = resultsDir + "ContactGrowthScalarStats.pkl"
if saveResults:
statsArray = graphStats.sequenceScalarStats(sGraph, subgraphIndicesList, slowStats)
Util.savePickle(statsArray, resultsFileName, True)
#Now compute statistics on the configuration graphs
else:
statsArray = Util.loadPickle(resultsFileName)
#Take the mean of the results over the configuration model graphs
resultsFileNameBase = resultsDir + "ConfigGraphScalarStats"
numGraphs = len(subgraphIndicesList)
#configStatsArrays = numpy.zeros((numGraphs, graphStats.getNumStats(), numConfigGraphs))
configStatsArrays = numpy.zeros((numGraphs, graphStats.getNumStats()-2, numConfigGraphs))
for j in range(numConfigGraphs):
resultsFileName = resultsFileNameBase + str(j)
configStatsArrays[:, :, j] = Util.loadPickle(resultsFileName)
configStatsArray = numpy.mean(configStatsArrays, 2)
configStatsStd = numpy.std(configStatsArrays, 2)
global plotInd
def plotRealConfigError(index, styleReal, styleConfig, realLabel, configLabel):
plt.hold(True)
plt.plot(absDayList, statsArray[:, index], styleReal, label=realLabel)
#errors = numpy.c_[configStatsArray[:, index]-configStatsMinArray[:, index] , configStatsMaxArray[:, index]-configStatsArray[:, index]].T
errors = numpy.c_[configStatsStd[:, index], configStatsStd[:, index]].T
plt.plot(absDayList, configStatsArray[:, index], styleConfig, label=configLabel)
plt.errorbar(absDayList, configStatsArray[:, index], errors, linewidth=0, elinewidth=1, label="_nolegend_", ecolor="red")
xmin, xmax = plt.xlim()
plt.xlim((0, xmax))
ymin, ymax = plt.ylim()
plt.ylim((0, ymax))
#Output all the results into plots
plt.figure(plotInd)
plt.hold(True)
plotRealConfigError(graphStats.maxComponentSizeIndex, plotStyleBW[0], plotStyles4[0], "Max comp. vertices", "CM max comp. vertices")
plotRealConfigError(graphStats.maxComponentEdgesIndex, plotStyleBW[1], plotStyles4[1], "Max comp. edges", "CM max comp. edges")
plt.xticks(locs, labels)
plt.xlabel("Year")
plt.ylabel("No. vertices/edges")
plt.legend(loc="upper left")
plt.savefig(figureDir + "MaxComponentSizeGrowth.eps")
plotInd += 1
for k in range(len(dayList)):
day = dayList[k]
print(str(DateUtils.getDateStrFromDay(day, startYear)) + ": " + str(statsArray[k, graphStats.maxComponentEdgesIndex]))
#print(str(DateUtils.getDateStrFromDay(day, startYear)) + ": " + str(configStatsArray[k, graphStats.numComponentsIndex]))
plt.figure(plotInd)
plotRealConfigError(graphStats.numComponentsIndex, plotStyleBW[0], plotStyles4[0], "Size >= 1", "CM size >= 1")
plotRealConfigError(graphStats.numNonSingletonComponentsIndex, plotStyleBW[1], plotStyles4[1], "Size >= 2", "CM size >= 2")
plotRealConfigError(graphStats.numTriOrMoreComponentsIndex, plotStyleBW[2], plotStyles4[2], "Size >= 3", "CM size >= 3")
plt.xticks(locs, labels)
plt.xlabel("Year")
plt.ylabel("No. components")
plt.legend(loc="upper left")
plt.savefig(figureDir + "NumComponentsGrowth.eps")
plotInd += 1
plt.figure(plotInd)
plotRealConfigError(graphStats.meanComponentSizeIndex, plotStyleBW[0], plotStyles4[0], "Real graph", "CM")
plt.xticks(locs, labels)
plt.xlabel("Year")
plt.ylabel("Mean component size")
plt.legend(loc="lower right")
plt.savefig(figureDir + "MeanComponentSizeGrowth.eps")
plotInd += 1
plt.figure(plotInd)
plotRealConfigError(graphStats.diameterIndex, plotStyleBW[0], plotStyles4[0], "Real graph", "CM")
plt.xticks(locs, labels)
plt.xlabel("Year")
plt.ylabel("Max component diameter")
plt.legend(loc="lower right")
plt.savefig(figureDir + "MaxComponentDiameterGrowth.eps")
plotInd += 1
plt.figure(plotInd)
plotRealConfigError(graphStats.effectiveDiameterIndex, plotStyleBW[0], plotStyles4[0], "Real graph", "CM")
plt.xticks(locs, labels)
plt.xlabel("Year")
plt.ylabel("Effective diameter")
plt.legend(loc="lower right")
plt.savefig(figureDir + "MaxComponentEffDiameterGrowth.eps")
plotInd += 1
plt.figure(plotInd)
plotRealConfigError(graphStats.meanDegreeIndex, plotStyleBW[0], plotStyles4[0], "All vertices", "CM all vertices")
plotRealConfigError(graphStats.maxCompMeanDegreeIndex, plotStyleBW[1], plotStyles4[1], "Max component", "CM max component")
#.........这里部分代码省略.........