本文整理汇总了Python中library.file_io.FileIO.iterateJsonFromFile方法的典型用法代码示例。如果您正苦于以下问题:Python FileIO.iterateJsonFromFile方法的具体用法?Python FileIO.iterateJsonFromFile怎么用?Python FileIO.iterateJsonFromFile使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类library.file_io.FileIO
的用法示例。
在下文中一共展示了FileIO.iterateJsonFromFile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: generate_data_for_significant_nei_utm_ids
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
def generate_data_for_significant_nei_utm_ids():
output_file = GeneralMethods.get_method_id()+'.json'
so_hashtags, mf_utm_id_to_valid_nei_utm_ids = set(), {}
for utm_object in \
FileIO.iterateJsonFromFile(f_hashtags_by_utm_id, True):
for hashtag, count in utm_object['mf_hashtag_to_count'].iteritems():
if hashtag!='total_num_of_occurrences': so_hashtags.add(hashtag)
mf_utm_id_to_valid_nei_utm_ids[utm_object['utm_id']] =\
utm_object['mf_nei_utm_id_to_common_h_count'].keys()
hashtags = sorted(list(so_hashtags))
mf_utm_id_to_vector = {}
for utm_object in FileIO.iterateJsonFromFile(f_hashtags_by_utm_id, True):
# print i, utm_object['utm_id']
utm_id_vector = map(lambda hashtag: utm_object['mf_hashtag_to_count'].get(hashtag, 0.0),
hashtags)
mf_utm_id_to_vector[utm_object['utm_id']] = robjects.FloatVector(utm_id_vector)
for i, (utm_id, vector) in enumerate(mf_utm_id_to_vector.iteritems()):
print '%s of %s'%(i+1, len(mf_utm_id_to_vector))
ltuo_utm_id_and_vector = [(utm_id, vector)]
for valid_nei_utm_id in mf_utm_id_to_valid_nei_utm_ids[utm_id]:
if valid_nei_utm_id in mf_utm_id_to_vector and valid_nei_utm_id!=utm_id:
ltuo_utm_id_and_vector.append((valid_nei_utm_id, mf_utm_id_to_vector[valid_nei_utm_id]))
od = rlc.OrdDict(sorted(ltuo_utm_id_and_vector, key=itemgetter(0)))
df_utm_vectors = robjects.DataFrame(od)
df_utm_vectors_json = R_Helper.get_json_for_data_frame(df_utm_vectors)
dfm_dict = cjson.decode(df_utm_vectors_json)
mf_utm_ids_to_utm_colnames = dict(zip(zip(*ltuo_utm_id_and_vector)[0], df_utm_vectors.colnames))
utm_id_colname = mf_utm_ids_to_utm_colnames[utm_id]
dfm_dict['prediction_variable'] = utm_id_colname
dfm_dict['predictor_variables'] = filter(lambda colname: colname!=utm_id_colname,
df_utm_vectors.colnames)
dfm_dict['mf_utm_colnames_to_utm_ids'] = dict(zip(df_utm_vectors.colnames, zip(*ltuo_utm_id_and_vector)[0]))
FileIO.writeToFileAsJson(dfm_dict, output_file)
示例2: plotQualityWithKMeansAndSSA
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
def plotQualityWithKMeansAndSSA():
del plotSettings["ssa_mr"]
speedStats = dict([(k, {"f1": [], "nmi": [], "purity": []}) for k in plotSettings])
for data in FileIO.iterateJsonFromFile(TweetsFile.stats_file):
for k in speedStats:
for metric in speedStats["ssa"]:
speedStats[k][metric].append(data[k][metric])
for k in speedStats:
del speedStats[k]["f1"]
speedStats.update(dict([(k, {"f1": [], "nmi": [], "purity": []}) for k in kMeansPlotSettings]))
k = "k_means"
for data in FileIO.iterateJsonFromFile(TweetsFile.combined_stats_file):
for metric in speedStats["k_means"]:
speedStats[k][metric].append(data[k][metric])
for k in speedStats:
if "f1" in speedStats[k]:
del speedStats[k]["f1"]
dataForPlot = dict([(k, []) for k in speedStats])
for k in speedStats:
for k1 in speedStats[k]:
dataForPlot[k] += [np.mean(speedStats[k][k1])]
# del dataForPlot['k_means']
print dataForPlot
ind, width = np.arange(2), 0.1
rects, i = [], 1
plotSettings.update(kMeansPlotSettings)
for k in dataForPlot:
rects.append(plt.bar(ind + i * width, dataForPlot[k], width, color=plotSettings[k]["color"]))
i += 1
plt.ylabel(getLatexForString("Score"))
plt.title(getLatexForString("Clustering quality comparison for Streaming LSH with SSA"))
plt.xticks(ind + 2 * width, ("$Purity$", "$NMI$"))
plt.legend([r[0] for r in rects], [plotSettings[k]["label"] for k in plotSettings], loc=4)
# plt.show()
plt.savefig("qualityComparisonAll.pdf")
示例3: build
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
def build(numberOfTimeUnits=24):
validLattices = set()
for data in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%('world','%s_%s'%(2,11))): validLattices.add(data['id'])
documents, lattices = [], set()
for h in FileIO.iterateJsonFromFile(hashtagsFile%('training_world','%s_%s'%(2,11))):
hashtag, document = Hashtag(h), []
if hashtag.isValidObject():
for timeUnit, occs in enumerate(hashtag.getOccrancesEveryTimeWindowIterator(HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS)):
occs = filter(lambda t: t[0] in validLattices, occs)
occs = sorted(occs, key=itemgetter(0))
if occs:
for lattice in zip(*occs)[0]: lattices.add(lattice)
document.append([timeUnit, [(k, len(list(i))) for k, i in groupby(occs, key=itemgetter(0))]])
if document: documents.append(document)
lattices = sorted(list(lattices))
print len(lattices)
documents = [(d, TargetSelectionRegressionClassifier.getPercentageDistributionInLattice(d)) for d in documents]
documents = documents[:int(len(documents)*0.80)]
for decisionTimeUnit in range(1, numberOfTimeUnits+1):
for latticeCount, predictingLattice in enumerate(lattices):
print decisionTimeUnit, latticeCount,
inputVectors, outputValues = [], []
for rawDocument, processedDocument in documents:
documentForTimeUnit = TargetSelectionRegressionClassifier.getPercentageDistributionInLattice(rawDocument[:decisionTimeUnit])
if documentForTimeUnit and processedDocument:
vector = [documentForTimeUnit.get(l, 0) for l in lattices]
inputVectors.append(vector), outputValues.append(float(processedDocument.get(predictingLattice, 0)))
# TargetSelectionRegressionClassifier(decisionTimeUnit=decisionTimeUnit, predictingLattice=predictingLattice).build(zip(inputVectors, outputValues))
TargetSelectionRegressionSVMRBFClassifier(decisionTimeUnit=decisionTimeUnit, predictingLattice=predictingLattice).build(zip(inputVectors, outputValues))
示例4: analyzeJustifyExponentialDecay
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
def analyzeJustifyExponentialDecay(self):
global evaluation
experimentsData = {JustifyExponentialDecay.with_decay: {}, JustifyExponentialDecay.without_decay: {}}
for data in FileIO.iterateJsonFromFile(JustifyExponentialDecay.stats_file): experimentsData[data['iteration_parameters']['type']][getDateTimeObjectFromTweetTimestamp(data['iteration_parameters']['current_time'])]=data['clusters']
qualityData = []
for k1, k2 in zip(sorted(experimentsData[JustifyExponentialDecay.with_decay]), sorted(experimentsData[JustifyExponentialDecay.without_decay])):
qualityData.append((k1, evaluation.getEvaluationMetrics(experimentsData[JustifyExponentialDecay.with_decay][k1], None, None)['purity']-evaluation.getEvaluationMetrics(experimentsData[JustifyExponentialDecay.without_decay][k1], None, None)['purity']))
keyTime = sorted(qualityData, key=itemgetter(1))[-1][0]
clusterWithDecay = [i for i in experimentsData[JustifyExponentialDecay.with_decay][keyTime] if len(i)>=3]
clusterWithOutDecay = [i for i in experimentsData[JustifyExponentialDecay.without_decay][keyTime] if len(i)>=3]
# for c in clusterWithDecay:
# print c, [evaluation.expertsToClassMap[i.lower()] for i in c]
interestedCluster = set(['Zap2it', 'ESPNAndyKatz', 'comingsoonnet', '950KJR', 'ginasmith888', 'UKCoachCalipari', 'SportsFanz', 'David_Henrie'])
for c in clusterWithOutDecay:
if len(set(c).intersection(interestedCluster))>0:
# print c, [evaluation.expertsToClassMap[i.lower()] for i in c]
setString = ', '.join(['%s (%s)'%(i, evaluation.expertsToClassMap[i.lower()]) for i in sorted(c)]).replace(' ', '\\ ').replace('_', '\\_')
print keyTime, '&', setString, '\\\\'
clustersDiscoveredEarlierByDecay = {}
for kt in sorted(experimentsData[JustifyExponentialDecay.with_decay]):
for c in experimentsData[JustifyExponentialDecay.with_decay][kt]:
c=sorted(c)
if len(set(c).intersection(interestedCluster))>0:
classes = [evaluation.expertsToClassMap[i.lower()] for i in c if i.lower() in evaluation.expertsToClassMap]
if sorted([(k, len(list(g))/float(len(classes))) for k,g in groupby(sorted(classes))], key=itemgetter(1))[-1][1]>0.7:
if kt>datetime(2011,3,19) and kt<=keyTime: clustersDiscoveredEarlierByDecay[kt]=c
observedStrings = set()
for k in sorted(clustersDiscoveredEarlierByDecay):
setString = ', '.join(['%s (%s)'%(i, evaluation.expertsToClassMap[i.lower()]) for i in sorted(clustersDiscoveredEarlierByDecay[k])]).replace(' ', '\\ ').replace('_', '\\_')
if setString not in observedStrings: print k, '&', setString, '\\\\'; observedStrings.add(setString)
示例5: probabilisticCoverageModelExample
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
def probabilisticCoverageModelExample(hashtag, type):
MINUTES, timeUnit = 5, 1
print len(CoverageBasedLatticeSelectionModel.lattices)
for hashtagObject in FileIO.iterateJsonFromFile('/mnt/chevron/kykamath/data/geo/hashtags/analysis/all_world/2_11/hashtagsWithoutEndingWindow'):
if hashtagObject['h']==hashtag:
occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False)
occurances = list(zip(*sorted(occsDistributionInTimeUnits.iteritems(), key=itemgetter(0)))[1])
occsInTimeunit = zip(*reduce(lambda aggList, l: aggList+l, occurances[:timeUnit], []))[0]
allOccurances = zip(*reduce(lambda aggList, l: aggList+l, occurances, []))[0]
if type=='5m': probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(occsInTimeunit)
else:
print getRadius(allOccurances)
probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(allOccurances)
latticeScores = CoverageBasedLatticeSelectionModel.spreadProbability(CoverageBasedLatticeSelectionModel.lattices, probabilityDistributionForObservedLattices)
points, colors = zip(*map(lambda t: (getLocationFromLid(t[0].replace('_', ' ')), t[1]), sorted(latticeScores.iteritems(), key=itemgetter(1))))
# print points[0], colors[0]
ax = plt.subplot(111)
sc = plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap='cool', lw = 0)
divider = make_axes_locatable(ax)
# plt.title('Jaccard similarity with New York')
cax = divider.append_axes("right", size="5%", pad=0.05)
plt.colorbar(sc, cax=cax)
plt.show()
# plt.savefig('../images/coverage_examples/%s_%s.png'%(hashtag, type))
plt.clf()
break
示例6: coverageIndication
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
def coverageIndication():
MINUTES = 5
for timeUnit, color, shape in [(1, 'r', 'x'), (3, 'g', 'd'), (6, 'b', 's')]:
print timeUnit
data = defaultdict(int)
for hashtagObject in FileIO.iterateJsonFromFile(hashtagsFile%('training_world','%s_%s'%(2,11))):
try:
occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False)
occurances = list(zip(*sorted(occsDistributionInTimeUnits.iteritems(), key=itemgetter(0)))[1])
occsInTimeunit = zip(*reduce(lambda aggList, l: aggList+l, occurances[:timeUnit], []))[0]
if len(occsInTimeunit)>10:
allOccurances = zip(*reduce(lambda aggList, l: aggList+l, occurances, []))[0]
timeUnitRadius, allRadius = getRadius(occsInTimeunit), getRadius(allOccurances)
data[int(abs(timeUnitRadius-allRadius))/50*50+50]+=1
# data[round(abs(timeUnitRadius-allRadius)/allRadius, 2)]+=1
except IndexError as e: pass
for k in data.keys()[:]:
if data[k]<3: del data[k]
dataX, dataY = zip(*sorted(data.iteritems(), key=itemgetter(0)))
plt.loglog(dataX, dataY, lw=2, label=str(timeUnit*MINUTES) + ' minutes', marker=shape)
# plt.loglog([1],[1])
# plt.title('Early indication of coverage'),
plt.xlabel('Coverage difference (miles)', fontsize=20), plt.ylabel('Number of hashtags', fontsize=20)
plt.legend()
# plt.show()
plt.savefig('../images/coverageIndication.png')
示例7: temporalLocalityTemporalDistanceExample
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
def temporalLocalityTemporalDistanceExample(lattice=NEW_YORK):
distances = defaultdict(dict)
for latticeObject in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%('training_world','%s_%s'%(2,11))):
if latticeObject['id']==lattice:
latticeHashtagsSet = set(latticeObject['hashtags'])
for neighborLattice, neighborHashtags in latticeObject['links'].iteritems():
distances[neighborLattice] = {}
neighborHashtags = filterOutNeighborHashtagsOutside1_5IQROfTemporalDistance(latticeObject['hashtags'], neighborHashtags, findLag=False)
neighborHashtagsSet = set(neighborHashtags)
distances[neighborLattice]['similarity']=len(latticeHashtagsSet.intersection(neighborHashtagsSet))/float(len(latticeHashtagsSet.union(neighborHashtagsSet)))
distances[neighborLattice]['temporalDistance']=np.mean([abs(latticeObject['hashtags'][k][0]-neighborHashtags[k][0]) for k in neighborHashtags if k in latticeObject['hashtags']])/(60.*60.)
distances[neighborLattice]['geoDistance']=getHaversineDistanceForLids(latticeObject['id'].replace('_', ' '), neighborLattice.replace('_', ' '))
break
dataPoints = []
ax = plt.subplot(111)
for k, data in distances.iteritems(): dataPoints.append((getLocationFromLid(k.replace('_', ' ')), data['temporalDistance']))
points, colors = zip(*sorted(dataPoints, key=itemgetter(1)))
sc = plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', cmap='RdPu', c=colors, lw = 0, alpha=1.0)
plotPointsOnWorldMap([getLocationFromLid(lattice.replace('_', ' '))], blueMarble=False, bkcolor='#CFCFCF', c='#64FF1C', lw = 0)
divider = make_axes_locatable(ax)
plt.title('Average time difference from New York')
cax = divider.append_axes("right", size="5%", pad=0.05)
plt.colorbar(sc, cax=cax)
# plt.show()
plt.savefig('../images/temporalDistanceExample.png')
示例8: plotClusteringSpeed
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
def plotClusteringSpeed(saveFig=True):
plotSettings = {
"k_means": {"label": "Iterative k-means", "color": "#FD0006"},
"mr_k_means": {"label": "MR k-means", "color": "#5AF522"},
"streaming_lsh": {"label": "Stream CDA", "color": "#7109AA"},
}
dataToPlot = {
"k_means": {"x": [], "y": []},
"mr_k_means": {"x": [], "y": []},
"streaming_lsh": {"x": [], "y": []},
}
for data in FileIO.iterateJsonFromFile(TweetsFile.combined_stats_file):
for k in plotSettings:
dataToPlot[k]["x"].append(data[k]["no_of_documents"])
dataToPlot[k]["y"].append(data[k]["iteration_time"])
for k in plotSettings:
plt.loglog(
dataToPlot[k]["x"],
dataToPlot[k]["y"],
label=plotSettings[k]["label"],
color=plotSettings[k]["color"],
lw=2,
)
plt.legend(loc=4)
if saveFig:
plt.xlabel(getLatexForString("\# of documents"))
plt.ylabel(getLatexForString("Running time (s)"))
plt.title(getLatexForString("Running time comparsion for Streaing LSH with k-Means"))
plt.xlim(xmin=800, xmax=100000)
plt.xticks([])
# plt.show()
if saveFig:
plt.savefig("speedComparisonWithKMeans.pdf")
示例9: plotClusteringQuality
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
def plotClusteringQuality():
del plotSettings["ssa_mr"]
speedStats = dict([(k, {"f1": [], "nmi": [], "purity": []}) for k in plotSettings])
for data in FileIO.iterateJsonFromFile(TweetsFile.stats_file):
for k in speedStats:
for metric in speedStats["ssa"]:
speedStats[k][metric].append(data[k][metric])
dataForPlot = dict([(k, []) for k in plotSettings])
for k, v in speedStats.iteritems():
print k
for k1, v1 in v.iteritems():
if type(v1[0]) != type([]):
print k1, "(%0.2f %0.2f)" % (np.mean(v1), np.var(v1))
dataForPlot[k] += [np.mean(v1)]
else:
print k1, ["(%0.2f %0.2f)" % (np.mean(z), np.var(z)) for z in zip(*v1)]
dataForPlot[k] += [np.mean(z) for z in zip(*v1)]
ind, width = np.arange(5), 0.1
rects, i = [], 0
for k in dataForPlot:
rects.append(plt.bar(ind + i * width, dataForPlot[k], width, color=plotSettings[k]["color"]))
i += 1
plt.ylabel(getLatexForString("Score"))
plt.title(getLatexForString("Clustering quality comparison for Streaming LSH with SSA"))
plt.xticks(ind + width, ("$F$", "$Precision$", "$Recall$", "$Purity$", "$NMI$"))
plt.legend([r[0] for r in rects], [plotSettings[k]["label"] for k in plotSettings], loc=4)
# plt.show()
plt.savefig("qualityComparisonWithSSA.pdf")
示例10: print_dense_utm_ids
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
def print_dense_utm_ids():
''' Prints list of dense utm_ids.
'''
print [utm_object['utm_id']
for utm_object in FileIO.iterateJsonFromFile(
f_hashtags_by_utm_id,
remove_params_dict=True)]
示例11: iterateExpertClusters
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
def iterateExpertClusters(startingDay=datetime(2011,3,19), endingDay=datetime(2011,3, 30)):
# def iterateExpertClusters(startingDay=datetime(2011,3,19), endingDay=datetime(2011,4,7)):
while startingDay<=endingDay:
for line in FileIO.iterateJsonFromFile(experts_twitter_stream_settings.lsh_clusters_folder+FileIO.getFileByDay(startingDay)):
currentTime = getDateTimeObjectFromTweetTimestamp(line['time_stamp'])
for clusterMap in line['clusters']: yield (currentTime, TwitterCrowdsSpecificMethods.getClusterFromMapFormat(clusterMap))
startingDay+=timedelta(days=1)
示例12: significant_nei_utm_ids
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
def significant_nei_utm_ids():
output_folder = fld_google_drive_data_analysis%GeneralMethods.get_method_id()+'/%s.png'
for i, data in enumerate(FileIO.iterateJsonFromFile(f_significant_nei_utm_ids, remove_params_dict=True)):
utm_lat_long = UTMConverter.getLatLongUTMIdInLatLongForm(data['utm_id'])
nei_utm_lat_longs = map(
lambda nei_utm_id: UTMConverter.getLatLongUTMIdInLatLongForm(nei_utm_id),
data['nei_utm_ids']
)
if nei_utm_lat_longs:
output_file = output_folder%('%s_%s'%(utm_lat_long))
plotPointsOnWorldMap(nei_utm_lat_longs,
blueMarble=False,
bkcolor='#CFCFCF',
lw = 0,
color = '#EA00FF',
alpha=1.)
_, m = plotPointsOnWorldMap([utm_lat_long],
blueMarble=False,
bkcolor='#CFCFCF',
lw = 0,
color = '#2BFF00',
s = 40,
returnBaseMapObject=True,
alpha=1.)
for nei_utm_lat_long in nei_utm_lat_longs:
m.drawgreatcircle(utm_lat_long[1],
utm_lat_long[0],
nei_utm_lat_long[1],
nei_utm_lat_long[0],
color='#FFA600',
lw=1.5,
alpha=1.0)
print 'Saving %s'%(i+1)
savefig(output_file)
示例13: trendCurves
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
def trendCurves(iterationData=None, experimentFileName=None):
if iterationData:
currentTimeStep, _, currentTopics, _, finalCall, conf = iterationData
experimentFileName = conf['experimentFileName']
if not finalCall:
topicDistribution = dict((str(topic.id), {'total': topic.totalCount, 'timeStep': topic.countDistribution[currentTimeStep]}) for topic in currentTopics)
# print currentTimeStep
FileIO.writeToFileAsJson({'t':currentTimeStep, 'topics':topicDistribution}, experimentFileName)
else:
iterationInfo = {'trending_topics': [topic.id for topic in currentTopics if topic.stickiness>=stickinessLowerThreshold],
'topic_colors': dict((str(topic.id), topic.color) for topic in currentTopics),
'conf': conf}
del iterationInfo['conf']['spamDectectionMethod']
FileIO.writeToFileAsJson(iterationInfo, experimentFileName)
else:
topicsDataX = defaultdict(list)
topicsDataY = defaultdict(list)
for data in FileIO.iterateJsonFromFile(experimentFileName):
if 'conf' not in data:
for topic in data['topics']: topicsDataX[topic].append(data['t']), topicsDataY[topic].append(data['topics'][topic]['timeStep'])
else: topicColorMap=data['topic_colors']; trendingTopics=data['trending_topics']
for topic in topicsDataX: plt.fill_between(topicsDataX[topic], topicsDataY[topic], color=topicColorMap[str(topic)], alpha=1.0)
plt.figure()
for topic in trendingTopics: plt.fill_between(topicsDataX[str(topic)], topicsDataY[str(topic)], color=topicColorMap[str(topic)], alpha=1.0)
plt.ylabel('Number of Contents', fontsize=16, fontweight='bold')
plt.show()
示例14: measureCorrelations
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
def measureCorrelations(timeRange, outputFolder):
'''
['haversine_distance', 'temporal_distance_in_hours', 0.20147108648121248]
['haversine_distance', 'sharing_probability', -0.19587239643328627]
'''
measures = [
(LatticeGraph.typeHaversineDistance, LatticeGraph.typeTemporalDistanceInHours),
(LatticeGraph.typeHaversineDistance, LatticeGraph.typeSharingProbability),
]
runData = []
for xMeasure, yMeasure in measures:
i, xdata, ydata = 1, [], []
for latticeObject in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%(outputFolder,'%s_%s'%timeRange)):
print i, latticeObject['id']; i+=1
xdata+=zip(*xMeasure['method'](latticeObject)['links'].iteritems())[1]
ydata+=zip(*yMeasure['method'](latticeObject)['links'].iteritems())[1]
# if i==200: break
preasonsCorrelation, _ = stats.pearsonr(xdata, ydata)
# plt.scatter(xdata[:5000], ydata[:5000])
# plt.title('Pearson\'s co-efficient %0.3f'%preasonsCorrelation)
# plt.xlabel(xMeasure['title']), plt.ylabel(yMeasure['title'])
# plt.show()
runData.append([xMeasure['id'], yMeasure['id'], preasonsCorrelation])
for i in runData:
print i
示例15: plotDimensionsUpdateFrequencyEstimation
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
def plotDimensionsUpdateFrequencyEstimation(self, returnAxisValuesOnly=True):
'''
numberOfTimeUnits=10*24*12
Experts stream 12
Houston stream 2
'''
dataDistribution = defaultdict(list)
for line in FileIO.iterateJsonFromFile(self.dimensionsUpdateFrequencyFile):
for k, v in line[ParameterEstimation.dimensionsUpdateFrequencyId].iteritems():
k = int(k) / self.timeUnitInSeconds.seconds
if k not in dataDistribution: dataDistribution[k] = [0., 0.]
dataDistribution[k][0] += v; dataDistribution[k][1] += 1
x, y = [], []; [(x.append(k), y.append((dataDistribution[k][0] / dataDistribution[k][1]))) for k in sorted(dataDistribution)]
x1, y1 = [], []; [(x1.append(k), y1.append((dataDistribution[k][0] / dataDistribution[k][1]) / k)) for k in sorted(dataDistribution)]
x = x[:numberOfTimeUnits]; y = y[:numberOfTimeUnits]; x1 = x1[:numberOfTimeUnits]; y1 = y1[:numberOfTimeUnits]
def subPlot(id):
plt.subplot(id)
inactivityCorordinates = max(zip(x1, y1), key=itemgetter(1))
plt.semilogx(x1, y1, '-', color=self.stream_settings['plot_color'], label=getLatexForString(self.stream_settings['plot_label'] + ' (Update frequency=%d TU)' % inactivityCorordinates[0]), lw=2)
plt.subplot(id).yaxis.set_major_formatter(FuncFormatter(lambda x, i: '%0.1f' % (x / 10. ** 3)))
plt.semilogx([inactivityCorordinates[0]], [inactivityCorordinates[1]], 'o', alpha=0.7, color='r')
plt.subplot(id).yaxis.set_major_formatter(FuncFormatter(lambda x, i: '%0.1f' % (x / 10. ** 3)))
plt.yticks((min(y1), max(y1)))
print self.stream_settings['plot_label'], inactivityCorordinates[0]
plt.subplot(311)
plt.title(getLatexForString('Dimensions update frequency estimation'))
plt.semilogx(x, y, '-', color=self.stream_settings['plot_color'], label=getLatexForString(self.stream_settings['plot_label']), lw=2)
plt.subplot(311).yaxis.set_major_formatter(FuncFormatter(lambda x, i: '%0.1f' % (x / 10. ** 5)))
plt.text(0.0, 1.01, getLatexForString('10^5'), transform=plt.gca().transAxes)
plt.ylabel(getLatexForString('\# of decayed dimensions'))
if self.stream_settings['stream_id'] == 'experts_twitter_stream': subPlot(312)
else: subPlot(313); plt.xlabel(getLatexForString(xlabelTimeUnits))
plt.ylabel(getLatexForString('Rate of DD (10^3)'))
plt.legend(loc=3)
if returnAxisValuesOnly: plt.show()