本文整理汇总了Python中library.classes.GeneralMethods类的典型用法代码示例。如果您正苦于以下问题:Python GeneralMethods类的具体用法?Python GeneralMethods怎么用?Python GeneralMethods使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了GeneralMethods类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: mapper
def mapper(self, key, hashtag_object):
ltuo_occ_time_and_occ_location = hashtag_object['ltuo_occ_time_and_occ_location']
if ltuo_occ_time_and_occ_location:
ltuo_intvl_time_and_occ_location = [(
GeneralMethods.approximateEpoch(occ_time, TIME_UNIT_IN_SECONDS),
occ_location
)
for occ_time, occ_location in ltuo_occ_time_and_occ_location]
ltuo_intvl_time_and_items =\
GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(0))
ltuo_intvl_time_and_items.sort(key=itemgetter(0))
first_time = ltuo_intvl_time_and_items[0][0]
intvl_method = lambda (t, it): ((t-first_time)/TIME_UNIT_IN_SECONDS, (t, len(it)))
ltuo_iid_and_tuo_interval_and_occurrence_count = map(intvl_method, ltuo_intvl_time_and_items)
peak_tuo_iid_and_tuo_interval_and_occurrence_count = \
max(
ltuo_iid_and_tuo_interval_and_occurrence_count,
key=lambda (_, (__, occurrence_count)): occurrence_count
)
peak_iid = peak_tuo_iid_and_tuo_interval_and_occurrence_count[0]
current_val = 0.0
total_occurrences = sum(data[1][1] for data in ltuo_iid_and_tuo_interval_and_occurrence_count)
for iid, (_, occurrence_count) in ltuo_iid_and_tuo_interval_and_occurrence_count:
is_peak = 0.0
if iid==peak_iid: is_peak=1.0
current_val+=occurrence_count
yield iid, [is_peak, occurrence_count/total_occurrences, current_val/total_occurrences]
示例2: test_append
def test_append(self):
self.crowd.append(self.cluster, test_time+timedelta(days=1))
self.assertEqual([GeneralMethods.getEpochFromDateTimeObject(test_time), GeneralMethods.getEpochFromDateTimeObject(test_time+timedelta(days=1))], sorted(self.crowd.clusters.keys()))
self.assertEqual(StreamCluster, type(self.crowd.clusters[GeneralMethods.getEpochFromDateTimeObject(test_time)]))
self.assertEqual(2, self.crowd.lifespan)
self.assertEqual(getStringRepresentationForTweetTimestamp(test_time), getStringRepresentationForTweetTimestamp(self.crowd.startTime))
self.assertEqual(getStringRepresentationForTweetTimestamp(test_time+timedelta(days=1)), getStringRepresentationForTweetTimestamp(self.crowd.endTime))
示例3: trendCurves
def trendCurves():
model = MixedUsersModel()
experimentFileName = spamModelFolder+model.id
conf = {'model': model, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.trendCurves, 1)], 'ratio': {'normal': 0.985, 'spammer': 0.015},
'experimentFileName': experimentFileName}
GeneralMethods.runCommand('rm -rf %s'%experimentFileName); run(**conf)
Analysis.trendCurves(experimentFileName=experimentFileName)
示例4: dimensionsUpdateFrequencyEstimation
def dimensionsUpdateFrequencyEstimation(estimationObject, currentMessageTime):
'''
Observe the new dimensions that get added to current dimension if the dimensions
are being updated at regular intervals.
For example, number of dimensions being added after 10m, 20m,... 5 horus.
As time increases the number of 'decayed' dimensions increase. The current dimensions
has a lot of unwanted decayed dimensions. Using this information identify the time
interval that is best suited to refresh dimensions.
Tentative: We decide to pick the time interval at which the rate of decay is maximum.
'''
def updatePhraseScore(phraseObject):
phraseObject.updateScore(currentMessageTime, 0, **estimationObject.stream_settings)
return phraseObject
dimensions = estimationObject.stream_settings['dimensions']
newList = [p.text for p in Phrase.sort((updatePhraseScore(p) for p in estimationObject.phraseTextToPhraseObjectMap.itervalues()), reverse=True)][:dimensions]
print currentMessageTime, len(newList)
if len(newList) >= dimensions:
idsOfDimensionsListToCompare = [(i, GeneralMethods.approximateToNearest5Minutes(currentMessageTime - i)) for i in estimationObject.dimensionUpdateTimeDeltas if GeneralMethods.approximateToNearest5Minutes(currentMessageTime - i) in estimationObject.dimensionListsMap]
dimensionsUpdateFrequency = {}
for td, id in idsOfDimensionsListToCompare:
oldList = estimationObject.dimensionListsMap[id]
dimensionsUpdateFrequency[str(td.seconds)] = len(set(newList).difference(oldList))
print len(estimationObject.dimensionListsMap), currentMessageTime, len(newList), [(k, dimensionsUpdateFrequency[k]) for k in sorted(dimensionsUpdateFrequency)]
iterationData = {
'time_stamp': getStringRepresentationForTweetTimestamp(currentMessageTime),
'total_number_of_phrases': len(estimationObject.phraseTextToPhraseObjectMap),
'settings': pprint.pformat(estimationObject.stream_settings),
ParameterEstimation.dimensionsUpdateFrequencyId:dimensionsUpdateFrequency
}
FileIO.writeToFileAsJson(iterationData, estimationObject.dimensionsUpdateFrequencyFile)
estimationObject.dimensionListsMap[GeneralMethods.approximateToNearest5Minutes(currentMessageTime)] = newList[:]
for key in estimationObject.dimensionListsMap.keys()[:]:
if currentMessageTime - key > estimationObject.dimensionUpdateTimeDeltas[-1]: del estimationObject.dimensionListsMap[key]
示例5: analyzeQuality
def analyzeQuality(graphs, graphType):
def getQualityScore(graphMap, edgesToKeep, timeDifference):
dataToReturn = []
for j, intervalInSeconds in enumerate([1]):
intervalInSeconds*=timeDifference
linearGraph = LocationGraphs.combineLocationGraphs(graphMap, startingGraphId, datetime.datetime.fromtimestamp(endingGraphId+1), intervalInSeconds, linear=True, edgesToKeep=edgesToKeep)
logGraph = LocationGraphs.combineLocationGraphs(graphMap, startingGraphId, datetime.datetime.fromtimestamp(endingGraphId+1), intervalInSeconds, linear=False, edgesToKeep=edgesToKeep)
linearClusters = [[str(c), [l[0]for l in lst]] for c, lst in groupby(sorted(clusterUsingAffinityPropagation(linearGraph)[1], key=itemgetter(1)), key=itemgetter(1))]
logarithmicClusters = [[str(c), [l[0]for l in lst]] for c, lst in groupby(sorted(clusterUsingAffinityPropagation(logGraph)[1], key=itemgetter(1)), key=itemgetter(1))]
score = LocationGraphs.getClusterQualityScore(linearClusters, logarithmicClusters)
print intervalInSeconds, edgesToKeep, score
dataToReturn.append(score)
return dataToReturn
graphFile = qualityMetricsFolder%graphType
print graphFile
GeneralMethods.runCommand('rm -rf %s'%graphFile)
for edgesToKeep in range(1,11):
# for edgesToKeep in [1,10]:
edgesToKeep*=0.1
graphMap = dict(graphs[:])
startingGraphId, endingGraphId = min(graphMap.keys()), max(graphMap.keys())
timeDifference = endingGraphId-startingGraphId
LocationGraphs.updateLogarithmicGraphs(graphMap, edgesToKeep=edgesToKeep)
# print {'edgesToKeep': edgesToKeep, 'score': np.mean(getQualityScore(graphMap, edgesToKeep, timeDifference))}
FileIO.writeToFileAsJson({'edgesToKeep': edgesToKeep, 'score': np.mean(getQualityScore(graphMap, edgesToKeep, timeDifference))}, graphFile)
示例6: combineLocationGraphs
def combineLocationGraphs(graphMap, startingGraphId, startingTime, intervalInSeconds, linear=True, **kwargs):
if intervalInSeconds%TIME_UNIT_IN_SECONDS==0 and int(intervalInSeconds/TIME_UNIT_IN_SECONDS)!=0: numberOfGraphs = int(intervalInSeconds/TIME_UNIT_IN_SECONDS)
else: numberOfGraphs = int(intervalInSeconds/TIME_UNIT_IN_SECONDS)+1
graphId = GeneralMethods.approximateEpoch(GeneralMethods.getEpochFromDateTimeObject(startingTime), TIME_UNIT_IN_SECONDS)
currentLogarithmicId = LocationGraphs.getLogarithmicGraphId(startingGraphId, graphId)
currentCollectedGraphs = 0
graphIdsToCombine = []
while currentCollectedGraphs!=numberOfGraphs and currentLogarithmicId>0:
numberOfGraphsToCollect = 2**int(math.log(numberOfGraphs-currentCollectedGraphs,2))
if not linear and currentLogarithmicId%2==0:
indices = [1]+map(lambda j: 2**j, filter(lambda j: currentLogarithmicId%(2**j)==0, range(1, int(math.log(currentLogarithmicId+1,2))+1)))
if max(indices)>numberOfGraphsToCollect and numberOfGraphsToCollect in indices: index = numberOfGraphsToCollect
else: index = max(indices)
else: index=1
logGraphId = '%s_%s'%(LocationGraphs.getGraphId(startingGraphId, currentLogarithmicId), index)
if logGraphId in graphMap: graphIdsToCombine.append(logGraphId)
currentLogarithmicId-=index
currentCollectedGraphs+=index
graphIdsToCombine = sorted(graphIdsToCombine, key=lambda id:int(id.split('_')[1]), reverse=True)
# print graphIdsToCombine
# for i in graphIdsToCombine:
# ep, l = i.split('_')
# print i, datetime.datetime.fromtimestamp(float(ep)), l, graphMap[i].number_of_nodes()
graphsToCombine = [graphMap[id] for id in graphIdsToCombine]
return combineGraphList(graphsToCombine, **kwargs)
示例7: generate_tuo_location_and_tuo_neighbor_location_and_pure_influence_score
def generate_tuo_location_and_tuo_neighbor_location_and_pure_influence_score(models_ids, startTime, endTime, outputFolder, hashtag_tag):
for model_id in models_ids:
# if w_extra_hashtags: output_file = tuo_location_and_tuo_neighbor_location_and_pure_influence_score_file%(model_id, hashtag_tag)
# else: output_file = tuo_location_and_tuo_neighbor_location_and_pure_influence_score_file%(model_id, wout_extra_hashtags_tag)
output_file = tuo_location_and_tuo_neighbor_location_and_pure_influence_score_file%(model_id, hashtag_tag)
GeneralMethods.runCommand('rm -rf %s'%output_file)
for line_count, location_object in enumerate(iterateJsonFromFile(
location_objects_file%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d'))
)):
print line_count, model_id
tuo_neighbor_location_and_pure_influence_score = []
location_hashtag_set = set(location_object['hashtags'])
for neighbor_location, mf_hashtag_to_tuo_occurrences_and_time_range in location_object['links'].iteritems():
pure_influence_scores = []
for hashtag, (neighbor_location_occurrences, time_range) in mf_hashtag_to_tuo_occurrences_and_time_range.iteritems():
if hashtag in location_object['hashtags']:
location_occurrences = location_object['hashtags'][hashtag][0]
pure_influence_scores.append(MF_INFLUENCE_MEASURING_MODELS_TO_MODEL_ID[model_id](location_occurrences, neighbor_location_occurrences))
neighbor_location_hashtag_set = set(mf_hashtag_to_tuo_occurrences_and_time_range.keys())
if hashtag_tag==w_extra_hashtags_tag:
for hashtag in location_hashtag_set.difference(neighbor_location_hashtag_set): pure_influence_scores.append(1.0)
for hashtag in neighbor_location_hashtag_set.difference(location_hashtag_set): pure_influence_scores.append(-1.0)
mean_pure_influence_score = np.mean(pure_influence_scores)
tuo_neighbor_location_and_pure_influence_score.append([neighbor_location, mean_pure_influence_score])
tuo_neighbor_location_and_pure_influence_score = sorted(tuo_neighbor_location_and_pure_influence_score, key=itemgetter(1))
FileIO.writeToFileAsJson([location_object['id'], tuo_neighbor_location_and_pure_influence_score], output_file)
示例8: generate_hashtag_specific_location_and_pure_influence_scores
def generate_hashtag_specific_location_and_pure_influence_scores(test_models_ids):
for test_model_id in test_models_ids:
output_file = f_ltuo_hashtag_and_ltuo_location_and_pure_influence_score%(test_model_id)
GeneralMethods.runCommand('rm -rf %s'%output_file)
ltuo_hashtag_and_ltuo_location_and_occurrence_time = Experiments.load_ltuo_hashtag_and_ltuo_location_and_occurrence_time()
for hashtag_count, (hashtag, ltuo_location_and_occurrence_time) in\
enumerate(ltuo_hashtag_and_ltuo_location_and_occurrence_time):
ltuo_location_and_occurrence_times = [(location, sorted(zip(*ito_location_and_occurrence_time)[1]))
for location, ito_location_and_occurrence_time in
groupby(
sorted(ltuo_location_and_occurrence_time, key=itemgetter(0)),
key=itemgetter(0)
)
]
print hashtag_count, test_model_id
ltuo_location_and_pure_influence_score = []
for location, location_occurrence_times in ltuo_location_and_occurrence_times:
pure_influence_scores = []
for neighbor_location, neighbor_location_occurrence_times in ltuo_location_and_occurrence_times:
if location!=neighbor_location:
pure_influence_score = MF_INFLUENCE_MEASURING_MODELS_TO_MODEL_ID[test_model_id](neighbor_location_occurrence_times, location_occurrence_times)
pure_influence_scores.append(pure_influence_score)
ltuo_location_and_pure_influence_score.append([location, np.mean(pure_influence_scores)])
ltuo_location_and_pure_influence_score = sorted(ltuo_location_and_pure_influence_score, key=itemgetter(1))
FileIO.writeToFileAsJson([hashtag, ltuo_location_and_pure_influence_score], output_file)
示例9: generate_tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity
def generate_tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity(model_ids, startTime, endTime, outputFolder):
def location_similarity(location_vector_1, location_vector_2):
return reduce(lambda total, k: total+(location_vector_1.get(k,0)*location_vector_2.get(k,0)), set(location_vector_1.keys()).union(location_vector_2.keys()),0.)
influence_types=[InfluenceMeasuringModels.TYPE_COMPLETE_INFLUENCE, InfluenceMeasuringModels.TYPE_OUTGOING_INFLUENCE, InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE]
for model_id in model_ids:
mf_location_to_mf_influence_type_to_influence_vector = dict(Experiments.load_tuo_location_and_mf_influence_type_to_influence_vector(model_id))
GeneralMethods.runCommand('rm -rf %s'%tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity_file%model_id)
for line_count, location_object in enumerate(iterateJsonFromFile(
location_objects_file%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d'))
)):
print line_count
location = location_object['id']
tuo_neighbor_location_and_mf_influence_type_and_similarity = []
for neighbor_location in location_object['links'].keys():
mf_influence_type_and_similarity = {}
for influence_type in influence_types:
similarity = location_similarity(
mf_location_to_mf_influence_type_to_influence_vector[location][influence_type],
mf_location_to_mf_influence_type_to_influence_vector[neighbor_location][influence_type]
)
mf_influence_type_and_similarity[influence_type] = similarity
so_hashtags_for_location = set(location_object['hashtags'].keys())
so_hashtags_for_neighbor_location = set(location_object['links'][neighbor_location].keys())
numerator = len(so_hashtags_for_location.intersection(so_hashtags_for_neighbor_location)) + 0.
denominator = len(so_hashtags_for_location.union(so_hashtags_for_neighbor_location)) + 0.
mf_influence_type_and_similarity[JACCARD_SIMILARITY] = numerator/denominator
tuo_neighbor_location_and_mf_influence_type_and_similarity.append([neighbor_location, mf_influence_type_and_similarity])
FileIO.writeToFileAsJson(
[location, tuo_neighbor_location_and_mf_influence_type_and_similarity],
tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity_file%model_id
)
示例10: writeUserClustersFile
def writeUserClustersFile(place):
print 'Generating clusters...'
userVectors = GenerateDataFiles.getUserVectors(place)
GeneralMethods.runCommand('rm -rf %s'%placesUserClustersFile%place['name'])
clusterAssignments = Clustering.cluster(Clustering.EM, placesARFFFile%place['name'], userVectors, '-N -1')
# clusterAssignments = Clustering.cluster(Clustering.KMeans, placesARFFFile%place['name'], userVectors, '-N 2')
for userId, userVector in userVectors.iteritems(): userVectors[userId] = {'userVector': userVector, 'clusterId': clusterAssignments[userId]}
for data in userVectors.iteritems(): FileIO.writeToFileAsJson(data, placesUserClustersFile%place['name'])
示例11: performanceWithSpamFilteringForLatestMessages
def performanceWithSpamFilteringForLatestMessages(generateData):
experimentData = defaultdict(dict)
for iteration in range(10):
# for spammerPercentage in range(1,21):
## spammerPercentage = 20
# spammerPercentage = spammerPercentage*0.05
# for spammerPercentage in range(1,11):
# spammerPercentage = spammerPercentage*0.02
# for spammerPercentage in range(1,201):
# spammerPercentage = spammerPercentage* 0.005
l1 = [spammerPercentage* 0.001 for spammerPercentage in range(1,51)]
l2 = [spammerPercentage* 0.05 for spammerPercentage in range(1,21)]
l3 = [0.01]+l2
for spammerPercentage in l1:
experimentFileName = spamModelFolder+'performanceWithSpamFilteringForLatestMessages/%s/%0.3f'%(iteration,spammerPercentage)
print experimentFileName
if generateData:
model = MixedUsersModel()
conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage},
'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesSpamFiltered],
'experimentFileName': experimentFileName,
# 'noOfPayloadsPerSpammer': 1, 'noOfTopics': 10
}
# conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage},
# 'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesDuplicatesRemoved, RankingModel.popularMessages],
# 'experimentFileName': experimentFileName}
GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf)
else:
tempData = defaultdict(list)
for data in FileIO.iterateJsonFromFile(experimentFileName):
for ranking_id in data['spammmess']:
tempData[ranking_id]+=data['spammmess'][ranking_id]
experimentData[iteration][spammerPercentage]=tempData
if not generateData:
realDataY = defaultdict(dict)
for iteration in experimentData:
dataY = defaultdict(list)
dataX = []
for perct in sorted(experimentData[iteration]):
dataX.append(perct)
for ranking_id, values in experimentData[iteration][perct].iteritems(): dataY[ranking_id].append(np.mean(values))
dataX=sorted(dataX)
for ranking_id in dataY:
for x, y in zip(dataX, dataY[ranking_id]):
if x not in realDataY[ranking_id]: realDataY[ranking_id][x]=[]
realDataY[ranking_id][x].append(y)
for ranking_id in dataY: plt.plot(dataX, [np.mean(realDataY[ranking_id][x]) for x in dataX], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id])
plt.xlabel('Percentage of Spammers', fontsize=16, fontweight='bold')
plt.ylabel('Spamness', fontsize=16, fontweight='bold')
# plt.title('Performance with spam filtering')
plt.legend(loc=2)
# plt.show()
plt.xlim(xmax=0.05)
plt.savefig('performanceWithSpamFilteringForLatestMessages.png')
plt.clf()
示例12: messageSelectionMethod
def messageSelectionMethod(self, currentTimeStep, user, currentTopics, **conf):
message = None
if GeneralMethods.trueWith(user.messagingProbability):
if GeneralMethods.trueWith(user.newTopicProbability):
topic = Topic(len(currentTopics))
currentTopics.append(topic)
message = user.generateMessage(currentTimeStep, topic)
else:
message = user.generateMessage(currentTimeStep, random.choice(currentTopics))
return message
示例13: performanceWithSpamDetection
def performanceWithSpamDetection(generateData):
experimentData = defaultdict(dict)
ratios = [0.0,0.4,0.9]
marker = dict([(0.0, 's'), (0.4, 'o'), (0.9, 'd')])
# spammerPercentages = [0.2, 0.01, 0.01]
spammerPercentages = [0.015, 0.015, 0.015]
for iteration in range(10):
for spamDetectionRatio, spammerPercentage in zip(ratios, spammerPercentages):
experimentFileName = spamModelFolder+'performanceWithSpamDetection/%s/%0.3f'%(iteration,spamDetectionRatio)
print experimentFileName
if generateData:
model = MixedUsersModel()
conf = {'model': model, 'numberOfTimeSteps': 100, 'addUsersMethod': User.addUsersUsingRatioWithSpamDetection, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage},
# 'spammerMessagingProbability': spammerBudget,
'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesSpamFiltered, RankingModel.popularMessages, RankingModel.popularMessagesSpamFiltered],
'spamDetectionRatio': spamDetectionRatio,
'experimentFileName': experimentFileName}
GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf)
else:
for data in FileIO.iterateJsonFromFile(experimentFileName):
for ranking_id in data['spammmess']:
if data['currentTimeStep'] not in experimentData[spamDetectionRatio]: experimentData[spamDetectionRatio][data['currentTimeStep']]=defaultdict(list)
experimentData[spamDetectionRatio][data['currentTimeStep']][ranking_id]+=data['spammmess'][ranking_id]
if not generateData:
sdr = {}
for spamDetectionRatio in sorted(experimentData.keys()):
dataToPlot = defaultdict(list)
for timeUnit in experimentData[spamDetectionRatio]:
dataToPlot['x'].append(timeUnit)
for ranking_id in experimentData[spamDetectionRatio][timeUnit]: dataToPlot[ranking_id].append(np.mean(experimentData[spamDetectionRatio][timeUnit][ranking_id]))
sdr[spamDetectionRatio]=dataToPlot
for ranking_id in [RankingModel.LATEST_MESSAGES_SPAM_FILTERED, RankingModel.POPULAR_MESSAGES_SPAM_FILTERED]:
# for ranking_id in [RankingModel.LATEST_MESSAGES, RankingModel.POPULAR_MESSAGES]:
for spamDetectionRatio in ratios:
print ranking_id, spamDetectionRatio
dataY = smooth(sdr[spamDetectionRatio][ranking_id],8)[:len(sdr[spamDetectionRatio]['x'])]
dataX, dataY = sdr[spamDetectionRatio]['x'][10:], dataY[10:]
print 'x', [x-10 for x in dataX]
if spamDetectionRatio==0.0:
print ranking_id, dataY
plt.plot([x-10 for x in dataX], dataY, label='%s'%(labels[ranking_id]), lw=1, marker=marker[spamDetectionRatio])
else:
print ranking_id, dataY
plt.plot([x-10 for x in dataX], dataY, label='%s (%d'%(labels[ranking_id].replace('Filtering', 'Detection'),spamDetectionRatio*100)+'%)', lw=1, marker=marker[spamDetectionRatio])
plt.ylim(ymin=0, ymax=1)
plt.xlim(xmin=0, xmax=75)
# plt.title(ranking_id)
plt.legend()
plt.xlabel('Time', fontsize=16, fontweight='bold')
plt.ylabel('Spamness', fontsize=16, fontweight='bold')
# plt.show()
# plt.savefig('performanceWithSpamDetection_%s.png'%ranking_id)
savefig('performanceWithSpamDetection_%s.png'%ranking_id)
plt.clf()
示例14: writeARFFFile
def writeARFFFile(place):
userVectors = defaultdict(dict)
locationToUserMap = dict((l['location'], l) for l in locationToUserMapIterator(place, minCheckins=50))
for lid in locationToUserMap:
for user in locationToUserMap[lid]['users']:
userVectors[user][lid.replace(' ', '_')]=sum(len(locationToUserMap[lid]['users'][user][d][db]) for d in locationToUserMap[lid]['users'][user] for db in locationToUserMap[lid]['users'][user][d])
for user in userVectors.keys()[:]:
if sum(userVectors[user].itervalues())<place['minUserCheckins']: del userVectors[user]
arffFile=ARFF.writeARFFForClustering(userVectors, place['name'])
outputFileName = getARFFFileName(place)
FileIO.createDirectoryForFile(outputFileName)
GeneralMethods.runCommand('mv %s %s'%(arffFile, outputFileName))
示例15: reducer
def reducer(self, location, it_performance_values):
performance_values = list(chain(*it_performance_values))
performance_summary = defaultdict(list)
for prediction_method, pvs_for_prediction_method in \
GeneralMethods.group_items_by(performance_values, key=itemgetter('prediction_method')):
for metric, pvs_for_prediction_method_and_metric in \
GeneralMethods.group_items_by(pvs_for_prediction_method, key=itemgetter('metric')):
performance_summary[metric].append([
prediction_method,
pvs_for_prediction_method_and_metric[0]['metric_value']
])
yield '', dict(location=location, performance_summary=performance_summary)