本文整理汇总了Python中library.classes.GeneralMethods.runCommand方法的典型用法代码示例。如果您正苦于以下问题:Python GeneralMethods.runCommand方法的具体用法?Python GeneralMethods.runCommand怎么用?Python GeneralMethods.runCommand使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类library.classes.GeneralMethods
的用法示例。
在下文中一共展示了GeneralMethods.runCommand方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: generate_tuo_location_and_tuo_neighbor_location_and_pure_influence_score
# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def generate_tuo_location_and_tuo_neighbor_location_and_pure_influence_score(models_ids, startTime, endTime, outputFolder, hashtag_tag):
for model_id in models_ids:
# if w_extra_hashtags: output_file = tuo_location_and_tuo_neighbor_location_and_pure_influence_score_file%(model_id, hashtag_tag)
# else: output_file = tuo_location_and_tuo_neighbor_location_and_pure_influence_score_file%(model_id, wout_extra_hashtags_tag)
output_file = tuo_location_and_tuo_neighbor_location_and_pure_influence_score_file%(model_id, hashtag_tag)
GeneralMethods.runCommand('rm -rf %s'%output_file)
for line_count, location_object in enumerate(iterateJsonFromFile(
location_objects_file%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d'))
)):
print line_count, model_id
tuo_neighbor_location_and_pure_influence_score = []
location_hashtag_set = set(location_object['hashtags'])
for neighbor_location, mf_hashtag_to_tuo_occurrences_and_time_range in location_object['links'].iteritems():
pure_influence_scores = []
for hashtag, (neighbor_location_occurrences, time_range) in mf_hashtag_to_tuo_occurrences_and_time_range.iteritems():
if hashtag in location_object['hashtags']:
location_occurrences = location_object['hashtags'][hashtag][0]
pure_influence_scores.append(MF_INFLUENCE_MEASURING_MODELS_TO_MODEL_ID[model_id](location_occurrences, neighbor_location_occurrences))
neighbor_location_hashtag_set = set(mf_hashtag_to_tuo_occurrences_and_time_range.keys())
if hashtag_tag==w_extra_hashtags_tag:
for hashtag in location_hashtag_set.difference(neighbor_location_hashtag_set): pure_influence_scores.append(1.0)
for hashtag in neighbor_location_hashtag_set.difference(location_hashtag_set): pure_influence_scores.append(-1.0)
mean_pure_influence_score = np.mean(pure_influence_scores)
tuo_neighbor_location_and_pure_influence_score.append([neighbor_location, mean_pure_influence_score])
tuo_neighbor_location_and_pure_influence_score = sorted(tuo_neighbor_location_and_pure_influence_score, key=itemgetter(1))
FileIO.writeToFileAsJson([location_object['id'], tuo_neighbor_location_and_pure_influence_score], output_file)
示例2: generate_tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity
# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def generate_tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity(model_ids, startTime, endTime, outputFolder):
def location_similarity(location_vector_1, location_vector_2):
return reduce(lambda total, k: total+(location_vector_1.get(k,0)*location_vector_2.get(k,0)), set(location_vector_1.keys()).union(location_vector_2.keys()),0.)
influence_types=[InfluenceMeasuringModels.TYPE_COMPLETE_INFLUENCE, InfluenceMeasuringModels.TYPE_OUTGOING_INFLUENCE, InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE]
for model_id in model_ids:
mf_location_to_mf_influence_type_to_influence_vector = dict(Experiments.load_tuo_location_and_mf_influence_type_to_influence_vector(model_id))
GeneralMethods.runCommand('rm -rf %s'%tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity_file%model_id)
for line_count, location_object in enumerate(iterateJsonFromFile(
location_objects_file%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d'))
)):
print line_count
location = location_object['id']
tuo_neighbor_location_and_mf_influence_type_and_similarity = []
for neighbor_location in location_object['links'].keys():
mf_influence_type_and_similarity = {}
for influence_type in influence_types:
similarity = location_similarity(
mf_location_to_mf_influence_type_to_influence_vector[location][influence_type],
mf_location_to_mf_influence_type_to_influence_vector[neighbor_location][influence_type]
)
mf_influence_type_and_similarity[influence_type] = similarity
so_hashtags_for_location = set(location_object['hashtags'].keys())
so_hashtags_for_neighbor_location = set(location_object['links'][neighbor_location].keys())
numerator = len(so_hashtags_for_location.intersection(so_hashtags_for_neighbor_location)) + 0.
denominator = len(so_hashtags_for_location.union(so_hashtags_for_neighbor_location)) + 0.
mf_influence_type_and_similarity[JACCARD_SIMILARITY] = numerator/denominator
tuo_neighbor_location_and_mf_influence_type_and_similarity.append([neighbor_location, mf_influence_type_and_similarity])
FileIO.writeToFileAsJson(
[location, tuo_neighbor_location_and_mf_influence_type_and_similarity],
tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity_file%model_id
)
示例3: generate_hashtag_specific_location_and_pure_influence_scores
# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def generate_hashtag_specific_location_and_pure_influence_scores(test_models_ids):
for test_model_id in test_models_ids:
output_file = f_ltuo_hashtag_and_ltuo_location_and_pure_influence_score%(test_model_id)
GeneralMethods.runCommand('rm -rf %s'%output_file)
ltuo_hashtag_and_ltuo_location_and_occurrence_time = Experiments.load_ltuo_hashtag_and_ltuo_location_and_occurrence_time()
for hashtag_count, (hashtag, ltuo_location_and_occurrence_time) in\
enumerate(ltuo_hashtag_and_ltuo_location_and_occurrence_time):
ltuo_location_and_occurrence_times = [(location, sorted(zip(*ito_location_and_occurrence_time)[1]))
for location, ito_location_and_occurrence_time in
groupby(
sorted(ltuo_location_and_occurrence_time, key=itemgetter(0)),
key=itemgetter(0)
)
]
print hashtag_count, test_model_id
ltuo_location_and_pure_influence_score = []
for location, location_occurrence_times in ltuo_location_and_occurrence_times:
pure_influence_scores = []
for neighbor_location, neighbor_location_occurrence_times in ltuo_location_and_occurrence_times:
if location!=neighbor_location:
pure_influence_score = MF_INFLUENCE_MEASURING_MODELS_TO_MODEL_ID[test_model_id](neighbor_location_occurrence_times, location_occurrence_times)
pure_influence_scores.append(pure_influence_score)
ltuo_location_and_pure_influence_score.append([location, np.mean(pure_influence_scores)])
ltuo_location_and_pure_influence_score = sorted(ltuo_location_and_pure_influence_score, key=itemgetter(1))
FileIO.writeToFileAsJson([hashtag, ltuo_location_and_pure_influence_score], output_file)
示例4: analyzeQuality
# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def analyzeQuality(graphs, graphType):
def getQualityScore(graphMap, edgesToKeep, timeDifference):
dataToReturn = []
for j, intervalInSeconds in enumerate([1]):
intervalInSeconds*=timeDifference
linearGraph = LocationGraphs.combineLocationGraphs(graphMap, startingGraphId, datetime.datetime.fromtimestamp(endingGraphId+1), intervalInSeconds, linear=True, edgesToKeep=edgesToKeep)
logGraph = LocationGraphs.combineLocationGraphs(graphMap, startingGraphId, datetime.datetime.fromtimestamp(endingGraphId+1), intervalInSeconds, linear=False, edgesToKeep=edgesToKeep)
linearClusters = [[str(c), [l[0]for l in lst]] for c, lst in groupby(sorted(clusterUsingAffinityPropagation(linearGraph)[1], key=itemgetter(1)), key=itemgetter(1))]
logarithmicClusters = [[str(c), [l[0]for l in lst]] for c, lst in groupby(sorted(clusterUsingAffinityPropagation(logGraph)[1], key=itemgetter(1)), key=itemgetter(1))]
score = LocationGraphs.getClusterQualityScore(linearClusters, logarithmicClusters)
print intervalInSeconds, edgesToKeep, score
dataToReturn.append(score)
return dataToReturn
graphFile = qualityMetricsFolder%graphType
print graphFile
GeneralMethods.runCommand('rm -rf %s'%graphFile)
for edgesToKeep in range(1,11):
# for edgesToKeep in [1,10]:
edgesToKeep*=0.1
graphMap = dict(graphs[:])
startingGraphId, endingGraphId = min(graphMap.keys()), max(graphMap.keys())
timeDifference = endingGraphId-startingGraphId
LocationGraphs.updateLogarithmicGraphs(graphMap, edgesToKeep=edgesToKeep)
# print {'edgesToKeep': edgesToKeep, 'score': np.mean(getQualityScore(graphMap, edgesToKeep, timeDifference))}
FileIO.writeToFileAsJson({'edgesToKeep': edgesToKeep, 'score': np.mean(getQualityScore(graphMap, edgesToKeep, timeDifference))}, graphFile)
示例5: trendCurves
# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def trendCurves():
model = MixedUsersModel()
experimentFileName = spamModelFolder+model.id
conf = {'model': model, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.trendCurves, 1)], 'ratio': {'normal': 0.985, 'spammer': 0.015},
'experimentFileName': experimentFileName}
GeneralMethods.runCommand('rm -rf %s'%experimentFileName); run(**conf)
Analysis.trendCurves(experimentFileName=experimentFileName)
示例6: writeUserClustersFile
# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def writeUserClustersFile(place):
print 'Generating clusters...'
userVectors = GenerateDataFiles.getUserVectors(place)
GeneralMethods.runCommand('rm -rf %s'%placesUserClustersFile%place['name'])
clusterAssignments = Clustering.cluster(Clustering.EM, placesARFFFile%place['name'], userVectors, '-N -1')
# clusterAssignments = Clustering.cluster(Clustering.KMeans, placesARFFFile%place['name'], userVectors, '-N 2')
for userId, userVector in userVectors.iteritems(): userVectors[userId] = {'userVector': userVector, 'clusterId': clusterAssignments[userId]}
for data in userVectors.iteritems(): FileIO.writeToFileAsJson(data, placesUserClustersFile%place['name'])
示例7: performanceWithSpamFilteringForLatestMessages
# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def performanceWithSpamFilteringForLatestMessages(generateData):
experimentData = defaultdict(dict)
for iteration in range(10):
# for spammerPercentage in range(1,21):
## spammerPercentage = 20
# spammerPercentage = spammerPercentage*0.05
# for spammerPercentage in range(1,11):
# spammerPercentage = spammerPercentage*0.02
# for spammerPercentage in range(1,201):
# spammerPercentage = spammerPercentage* 0.005
l1 = [spammerPercentage* 0.001 for spammerPercentage in range(1,51)]
l2 = [spammerPercentage* 0.05 for spammerPercentage in range(1,21)]
l3 = [0.01]+l2
for spammerPercentage in l1:
experimentFileName = spamModelFolder+'performanceWithSpamFilteringForLatestMessages/%s/%0.3f'%(iteration,spammerPercentage)
print experimentFileName
if generateData:
model = MixedUsersModel()
conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage},
'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesSpamFiltered],
'experimentFileName': experimentFileName,
# 'noOfPayloadsPerSpammer': 1, 'noOfTopics': 10
}
# conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage},
# 'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesDuplicatesRemoved, RankingModel.popularMessages],
# 'experimentFileName': experimentFileName}
GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf)
else:
tempData = defaultdict(list)
for data in FileIO.iterateJsonFromFile(experimentFileName):
for ranking_id in data['spammmess']:
tempData[ranking_id]+=data['spammmess'][ranking_id]
experimentData[iteration][spammerPercentage]=tempData
if not generateData:
realDataY = defaultdict(dict)
for iteration in experimentData:
dataY = defaultdict(list)
dataX = []
for perct in sorted(experimentData[iteration]):
dataX.append(perct)
for ranking_id, values in experimentData[iteration][perct].iteritems(): dataY[ranking_id].append(np.mean(values))
dataX=sorted(dataX)
for ranking_id in dataY:
for x, y in zip(dataX, dataY[ranking_id]):
if x not in realDataY[ranking_id]: realDataY[ranking_id][x]=[]
realDataY[ranking_id][x].append(y)
for ranking_id in dataY: plt.plot(dataX, [np.mean(realDataY[ranking_id][x]) for x in dataX], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id])
plt.xlabel('Percentage of Spammers', fontsize=16, fontweight='bold')
plt.ylabel('Spamness', fontsize=16, fontweight='bold')
# plt.title('Performance with spam filtering')
plt.legend(loc=2)
# plt.show()
plt.xlim(xmax=0.05)
plt.savefig('performanceWithSpamFilteringForLatestMessages.png')
plt.clf()
示例8: performanceWithSpamDetection
# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def performanceWithSpamDetection(generateData):
experimentData = defaultdict(dict)
ratios = [0.0,0.4,0.9]
marker = dict([(0.0, 's'), (0.4, 'o'), (0.9, 'd')])
# spammerPercentages = [0.2, 0.01, 0.01]
spammerPercentages = [0.015, 0.015, 0.015]
for iteration in range(10):
for spamDetectionRatio, spammerPercentage in zip(ratios, spammerPercentages):
experimentFileName = spamModelFolder+'performanceWithSpamDetection/%s/%0.3f'%(iteration,spamDetectionRatio)
print experimentFileName
if generateData:
model = MixedUsersModel()
conf = {'model': model, 'numberOfTimeSteps': 100, 'addUsersMethod': User.addUsersUsingRatioWithSpamDetection, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage},
# 'spammerMessagingProbability': spammerBudget,
'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesSpamFiltered, RankingModel.popularMessages, RankingModel.popularMessagesSpamFiltered],
'spamDetectionRatio': spamDetectionRatio,
'experimentFileName': experimentFileName}
GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf)
else:
for data in FileIO.iterateJsonFromFile(experimentFileName):
for ranking_id in data['spammmess']:
if data['currentTimeStep'] not in experimentData[spamDetectionRatio]: experimentData[spamDetectionRatio][data['currentTimeStep']]=defaultdict(list)
experimentData[spamDetectionRatio][data['currentTimeStep']][ranking_id]+=data['spammmess'][ranking_id]
if not generateData:
sdr = {}
for spamDetectionRatio in sorted(experimentData.keys()):
dataToPlot = defaultdict(list)
for timeUnit in experimentData[spamDetectionRatio]:
dataToPlot['x'].append(timeUnit)
for ranking_id in experimentData[spamDetectionRatio][timeUnit]: dataToPlot[ranking_id].append(np.mean(experimentData[spamDetectionRatio][timeUnit][ranking_id]))
sdr[spamDetectionRatio]=dataToPlot
for ranking_id in [RankingModel.LATEST_MESSAGES_SPAM_FILTERED, RankingModel.POPULAR_MESSAGES_SPAM_FILTERED]:
# for ranking_id in [RankingModel.LATEST_MESSAGES, RankingModel.POPULAR_MESSAGES]:
for spamDetectionRatio in ratios:
print ranking_id, spamDetectionRatio
dataY = smooth(sdr[spamDetectionRatio][ranking_id],8)[:len(sdr[spamDetectionRatio]['x'])]
dataX, dataY = sdr[spamDetectionRatio]['x'][10:], dataY[10:]
print 'x', [x-10 for x in dataX]
if spamDetectionRatio==0.0:
print ranking_id, dataY
plt.plot([x-10 for x in dataX], dataY, label='%s'%(labels[ranking_id]), lw=1, marker=marker[spamDetectionRatio])
else:
print ranking_id, dataY
plt.plot([x-10 for x in dataX], dataY, label='%s (%d'%(labels[ranking_id].replace('Filtering', 'Detection'),spamDetectionRatio*100)+'%)', lw=1, marker=marker[spamDetectionRatio])
plt.ylim(ymin=0, ymax=1)
plt.xlim(xmin=0, xmax=75)
# plt.title(ranking_id)
plt.legend()
plt.xlabel('Time', fontsize=16, fontweight='bold')
plt.ylabel('Spamness', fontsize=16, fontweight='bold')
# plt.show()
# plt.savefig('performanceWithSpamDetection_%s.png'%ranking_id)
savefig('performanceWithSpamDetection_%s.png'%ranking_id)
plt.clf()
示例9: writeARFFFile
# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def writeARFFFile(place):
userVectors = defaultdict(dict)
locationToUserMap = dict((l['location'], l) for l in locationToUserMapIterator(place, minCheckins=50))
for lid in locationToUserMap:
for user in locationToUserMap[lid]['users']:
userVectors[user][lid.replace(' ', '_')]=sum(len(locationToUserMap[lid]['users'][user][d][db]) for d in locationToUserMap[lid]['users'][user] for db in locationToUserMap[lid]['users'][user][d])
for user in userVectors.keys()[:]:
if sum(userVectors[user].itervalues())<place['minUserCheckins']: del userVectors[user]
arffFile=ARFF.writeARFFForClustering(userVectors, place['name'])
outputFileName = getARFFFileName(place)
FileIO.createDirectoryForFile(outputFileName)
GeneralMethods.runCommand('mv %s %s'%(arffFile, outputFileName))
示例10: writeLocationToUserMap
# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def writeLocationToUserMap(place):
name, boundary = place['name'], place['boundary']
GeneralMethods.runCommand('rm -rf %s'%placesLocationToUserMapFile%name)
for location in filteredLocationToUserAndTimeMapIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, inputFile=locationToUserAndExactTimeMapFile):
lid=getLocationFromLid(location['location'])
if isWithinBoundingBox(lid, boundary):
location['categories'] = ''; location['tags'] = ''; location['name']=''
title = venuesCollection.find_one({'lid':location['location']})
if title: location['name'] = unicode(title['n']).encode("utf-8")
meta = venuesMetaDataCollection.find_one({'_id':location['location']})
if meta: location['categories'] = unicode(meta['c']).encode("utf-8"); location['tags'] = unicode(meta['t']).encode("utf-8")
for user in location['users'].keys()[:]: location['users'][str(user)]=location['users'][user]; del location['users'][user]
location['noOfCheckins']=sum([len(epochs) for user, userVector in location['users'].iteritems() for day, dayVector in userVector.iteritems() for db, epochs in dayVector.iteritems()])
if location['noOfCheckins']>place.get('minLocationCheckins',0): FileIO.writeToFileAsJson(location, placesLocationToUserMapFile%name)
示例11: writeTopClusterFeatures
# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def writeTopClusterFeatures(place):
locationNames = {}
def getLocationName(lid):
if lid not in locationNames:
locationObject = venuesCollection.find_one({'lid':lid})
if locationObject: locationNames[lid] = unicode(locationObject['n']).encode("utf-8")
else: locationNames[lid] = ''
return locationNames[lid]
GeneralMethods.runCommand('rm -rf %s'%placesUserClusterFeaturesFile%place['name'])
documents = [userVector.values() for user, userVector in FileIO.iterateJsonFromFile(placesUserClustersFile%place['name'])]
for data in getTopFeaturesForClass(documents, 1000):
clusterId, features = data
modifiedFeatures = []
for feature in features: modifiedFeatures.append(list(feature) + [getLocationName(feature[0].replace('_', ' '))])
FileIO.writeToFileAsJson([clusterId, GeneralMethods.getRandomColor(), modifiedFeatures], placesUserClusterFeaturesFile%place['name'])
示例12: writeLocationsWithClusterInfoFile
# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def writeLocationsWithClusterInfoFile(place):
GeneralMethods.runCommand('rm -rf %s'%placesLocationWithClusterInfoFile%place['name'])
for clustering in iteraterUserClusterings(place):
dataToWrite, userClusterMap = {}, {}
for clusterId, users in clustering[2]['clusters'].iteritems():
for user in users: userClusterMap[user]=clusterId
locationMap = defaultdict(dict)
for location in locationToUserMapIterator(place):
locationMap[location['location']] = {'name':unicode(location['name']).encode("utf-8"), 'checkins':defaultdict(list)}
for user, userVector in location['users'].iteritems():
if user in userClusterMap:
for day, dayVector in userVector.iteritems():
for db, epochs in dayVector.iteritems():
locationMap[location['location']]['checkins'][userClusterMap[user]]+=epochs
dataToWrite[str(clustering[0])]=locationMap
FileIO.writeToFileAsJson(dataToWrite,placesLocationWithClusterInfoFile%place['name'])
示例13: performanceAsPercentageOfGlobalSpammerVaries
# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def performanceAsPercentageOfGlobalSpammerVaries(generateData):
experimentData = defaultdict(dict)
for iteration in range(10):
# for spammerPercentage in range(1,21):
# spammerPercentage = spammerPercentage*0.05
for spammerPercentage in range(1,11):
spammerPercentage = spammerPercentage*0.1
experimentFileName = spamModelFolder+'performanceAsPercentageOfGlobalSpammerVaries/%s/%0.3f'%(iteration,spammerPercentage)
print experimentFileName
if generateData:
model = MixedUsersModel()
conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)],
'ratio': {'normal': 0.985, 'spammer': 0.015},
'spamRatio': {'localPayloads': 1-spammerPercentage, 'globalPayloads': spammerPercentage},
'noOfGlobalSpammerPayloads': 10,
'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesDuplicatesRemoved, RankingModel.popularMessages],
'experimentFileName': experimentFileName}
GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf)
else:
tempData = defaultdict(list)
for data in FileIO.iterateJsonFromFile(experimentFileName):
for ranking_id in data['spammmess']:
tempData[ranking_id]+=data['spammmess'][ranking_id]
experimentData[iteration][spammerPercentage]=tempData
if not generateData:
realDataY = defaultdict(dict)
for iteration in experimentData:
dataY = defaultdict(list)
dataX = []
for perct in sorted(experimentData[iteration]):
dataX.append(perct)
for ranking_id, values in experimentData[iteration][perct].iteritems(): dataY[ranking_id].append(np.mean(values))
dataX=sorted(dataX)
for ranking_id in dataY:
for x, y in zip(dataX, dataY[ranking_id]):
if x not in realDataY[ranking_id]: realDataY[ranking_id][x]=[]
realDataY[ranking_id][x].append(y)
for ranking_id in dataY:
if ranking_id in labels:
plt.plot(dataX, [np.mean(realDataY[ranking_id][x]) for x in dataX], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id])
plt.xlabel('Percentage of Spammers Using Group Strategy', fontsize=16, fontweight='bold')
plt.ylabel('Spamness', fontsize=16, fontweight='bold')
# plt.title('Spammness when spammers use mixed strategy')
plt.legend(loc=4)
# plt.show()
plt.savefig('performanceAsPercentageOfGlobalSpammerVaries.png')
plt.clf()
示例14: performanceAsNoOfGlobalPayloadsVary
# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def performanceAsNoOfGlobalPayloadsVary(generateData):
experimentData = defaultdict(dict)
for iteration in range(10):
for noOfGlobalSpammerPayloads in range(1,500):
# for noOfGlobalSpammerPayloads in range(10,11):
Spammer.globalPayloads = None
experimentFileName = spamModelFolder+'performanceAsNoOfGlobalPayloadsVary/%s/%0.3f'%(iteration,noOfGlobalSpammerPayloads)
print experimentFileName
if generateData:
model = MixedUsersModel()
conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 0.985, 'spammer': 0.015},
'noOfGlobalSpammerPayloads': noOfGlobalSpammerPayloads,
'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesDuplicatesRemoved, RankingModel.popularMessages],
'experimentFileName': experimentFileName}
GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf)
else:
tempData = defaultdict(list)
for data in FileIO.iterateJsonFromFile(experimentFileName):
for ranking_id in data['spammmess']:
tempData[ranking_id]+=data['spammmess'][ranking_id]
experimentData[iteration][noOfGlobalSpammerPayloads]=tempData
if not generateData:
realDataY = defaultdict(dict)
for iteration in experimentData:
dataY = defaultdict(list)
dataX = []
for perct in sorted(experimentData[iteration]):
dataX.append(perct)
for ranking_id, values in experimentData[iteration][perct].iteritems(): dataY[ranking_id].append(np.mean(values))
dataX=sorted(dataX)
for ranking_id in dataY:
for x, y in zip(dataX, dataY[ranking_id]):
if x not in realDataY[ranking_id]: realDataY[ranking_id][x]=[]
realDataY[ranking_id][x].append(y)
for ranking_id in dataY:
if ranking_id in labels:
dy = [np.mean(realDataY[ranking_id][x]) for x in dataX[:20]] + list(smooth([np.mean(realDataY[ranking_id][x]) for x in dataX[20:]])) #+smooth([np.mean(realDataY[ranking_id][x]) for x in dataX[20:]]
plt.semilogx(dataX, dy[:len(dataX)], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id])
# for ranking_id in dataY: plt.plot(dataX, [np.mean(realDataY[ranking_id][x]) for x in dataX], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id])
plt.xlabel('Payloads Per Spam Group', fontsize=15, fontweight='bold')
plt.ylabel('Spamness', fontsize=15, fontweight='bold')
# plt.title('Spammness with changing global payloads')
plt.legend(loc=4)
# plt.show()
plt.savefig('performanceAsNoOfGlobalPayloadsVary.png')
plt.clf()
示例15: generate_tuo_location_and_tuo_neighbor_location_and_influence_score
# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def generate_tuo_location_and_tuo_neighbor_location_and_influence_score(models_ids, startTime, endTime, outputFolder, hashtag_tag):
def get_hashtag_weights(map_from_hashtag_to_tuples_of_occurrences_and_time_range):
total_occurrences = sum([len(occurrences)
for hashtag, (occurrences, time_range) in
map_from_hashtag_to_tuples_of_occurrences_and_time_range.iteritems()]) + 0.
return dict([(hashtag, len(occurrences)/total_occurrences)
for hashtag, (occurrences, time_range) in
map_from_hashtag_to_tuples_of_occurrences_and_time_range.iteritems()])
def get_location_weights(hashtags_for_source_location, map_from_location_to_hashtags):
set_of_hashtags_for_source_location = set(hashtags_for_source_location.keys())
return dict([(location, len(set(hashtags.keys()).intersection(set_of_hashtags_for_source_location))/(len(set_of_hashtags_for_source_location)+0.))
for location, hashtags in
map_from_location_to_hashtags.iteritems()])
for model_id in models_ids:
output_file = tuo_location_and_tuo_neighbor_location_and_influence_score_file%(model_id, hashtag_tag)
GeneralMethods.runCommand('rm -rf %s'%output_file)
for line_count, location_object in enumerate(iterateJsonFromFile(
location_objects_file%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d'))
)):
print line_count, model_id
tuo_neighbor_location_and_influence_score = []
mf_hashtag_to_hashtag_weights = get_hashtag_weights(location_object['hashtags'])
mf_location_to_location_weights = get_location_weights(location_object['hashtags'], location_object['links'])
location_hashtag_set = set(location_object['hashtags'])
for neighbor_location, mf_hashtag_to_tuo_occurrences_and_time_range in location_object['links'].iteritems():
influence_scores = []
mf_neighbor_location_hashtag_to_hashtag_weights = get_hashtag_weights(mf_hashtag_to_tuo_occurrences_and_time_range)
neighbor_location_hashtag_set = set(mf_hashtag_to_tuo_occurrences_and_time_range.keys())
for hashtag, (neighbor_location_occurrences, time_range) in mf_hashtag_to_tuo_occurrences_and_time_range.iteritems():
if hashtag in location_object['hashtags']:
location_occurrences = location_object['hashtags'][hashtag][0]
pure_influence_score = MF_INFLUENCE_MEASURING_MODELS_TO_MODEL_ID[model_id](location_occurrences, neighbor_location_occurrences)
influence_scores.append(mf_hashtag_to_hashtag_weights[hashtag]*pure_influence_score)
if hashtag_tag==w_extra_hashtags_tag:
for hashtag in location_hashtag_set.difference(neighbor_location_hashtag_set):
influence_scores.append(mf_hashtag_to_hashtag_weights[hashtag]*1.0)
# influence_scores.append(1.0)
for hashtag in neighbor_location_hashtag_set.difference(location_hashtag_set):
influence_scores.append(mf_neighbor_location_hashtag_to_hashtag_weights[hashtag]*-1.0)
# influence_scores.append(-1.0)
mean_influence_scores = np.mean(influence_scores)
tuo_neighbor_location_and_influence_score.append([neighbor_location,
mf_location_to_location_weights[neighbor_location]*mean_influence_scores])
tuo_neighbor_location_and_influence_score = sorted(tuo_neighbor_location_and_influence_score, key=itemgetter(1))
FileIO.writeToFileAsJson([location_object['id'], tuo_neighbor_location_and_influence_score], output_file)