当前位置: 首页>>代码示例>>Python>>正文


Python GeneralMethods.runCommand方法代码示例

本文整理汇总了Python中library.classes.GeneralMethods.runCommand方法的典型用法代码示例。如果您正苦于以下问题:Python GeneralMethods.runCommand方法的具体用法?Python GeneralMethods.runCommand怎么用?Python GeneralMethods.runCommand使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在library.classes.GeneralMethods的用法示例。


在下文中一共展示了GeneralMethods.runCommand方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: generate_tuo_location_and_tuo_neighbor_location_and_pure_influence_score

# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
    def generate_tuo_location_and_tuo_neighbor_location_and_pure_influence_score(models_ids, startTime, endTime, outputFolder, hashtag_tag):
        for model_id in models_ids:
#            if w_extra_hashtags: output_file = tuo_location_and_tuo_neighbor_location_and_pure_influence_score_file%(model_id, hashtag_tag)
#            else: output_file = tuo_location_and_tuo_neighbor_location_and_pure_influence_score_file%(model_id, wout_extra_hashtags_tag)
            output_file = tuo_location_and_tuo_neighbor_location_and_pure_influence_score_file%(model_id, hashtag_tag)
            GeneralMethods.runCommand('rm -rf %s'%output_file)
            for line_count, location_object in enumerate(iterateJsonFromFile(
                     location_objects_file%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d'))
                     )):
                print line_count, model_id
                tuo_neighbor_location_and_pure_influence_score = []
                location_hashtag_set = set(location_object['hashtags'])
                for neighbor_location, mf_hashtag_to_tuo_occurrences_and_time_range in location_object['links'].iteritems():
                    pure_influence_scores = []
                    for hashtag, (neighbor_location_occurrences, time_range) in mf_hashtag_to_tuo_occurrences_and_time_range.iteritems():
                        if hashtag in location_object['hashtags']:
                            location_occurrences = location_object['hashtags'][hashtag][0]
                            pure_influence_scores.append(MF_INFLUENCE_MEASURING_MODELS_TO_MODEL_ID[model_id](location_occurrences, neighbor_location_occurrences))
                    neighbor_location_hashtag_set = set(mf_hashtag_to_tuo_occurrences_and_time_range.keys())
                    if hashtag_tag==w_extra_hashtags_tag:
                        for hashtag in location_hashtag_set.difference(neighbor_location_hashtag_set): pure_influence_scores.append(1.0)
                        for hashtag in neighbor_location_hashtag_set.difference(location_hashtag_set): pure_influence_scores.append(-1.0)
                    mean_pure_influence_score = np.mean(pure_influence_scores)
                    tuo_neighbor_location_and_pure_influence_score.append([neighbor_location, mean_pure_influence_score])
                tuo_neighbor_location_and_pure_influence_score = sorted(tuo_neighbor_location_and_pure_influence_score, key=itemgetter(1))
                FileIO.writeToFileAsJson([location_object['id'], tuo_neighbor_location_and_pure_influence_score], output_file)
开发者ID:kykamath,项目名称:hashtags_and_geo,代码行数:28,代码来源:models.py

示例2: generate_tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity

# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
 def generate_tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity(model_ids, startTime, endTime, outputFolder):
     def location_similarity(location_vector_1, location_vector_2): 
         return reduce(lambda total, k: total+(location_vector_1.get(k,0)*location_vector_2.get(k,0)), set(location_vector_1.keys()).union(location_vector_2.keys()),0.)
     influence_types=[InfluenceMeasuringModels.TYPE_COMPLETE_INFLUENCE, InfluenceMeasuringModels.TYPE_OUTGOING_INFLUENCE, InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE]
     for model_id in model_ids:
         mf_location_to_mf_influence_type_to_influence_vector = dict(Experiments.load_tuo_location_and_mf_influence_type_to_influence_vector(model_id))
         GeneralMethods.runCommand('rm -rf %s'%tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity_file%model_id)
         for line_count, location_object in enumerate(iterateJsonFromFile(
                      location_objects_file%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d'))
                  )):
             print line_count
             location = location_object['id']
             tuo_neighbor_location_and_mf_influence_type_and_similarity = []
             for neighbor_location in location_object['links'].keys(): 
                 mf_influence_type_and_similarity = {}
                 for influence_type in influence_types:
                     similarity = location_similarity( 
                                                          mf_location_to_mf_influence_type_to_influence_vector[location][influence_type],
                                                          mf_location_to_mf_influence_type_to_influence_vector[neighbor_location][influence_type]
                                                   )
                     mf_influence_type_and_similarity[influence_type] = similarity
                 so_hashtags_for_location = set(location_object['hashtags'].keys())
                 so_hashtags_for_neighbor_location = set(location_object['links'][neighbor_location].keys())
                 numerator = len(so_hashtags_for_location.intersection(so_hashtags_for_neighbor_location)) + 0.
                 denominator = len(so_hashtags_for_location.union(so_hashtags_for_neighbor_location)) + 0.
                 mf_influence_type_and_similarity[JACCARD_SIMILARITY] = numerator/denominator                
                 tuo_neighbor_location_and_mf_influence_type_and_similarity.append([neighbor_location, mf_influence_type_and_similarity])
             FileIO.writeToFileAsJson(
                                      [location, tuo_neighbor_location_and_mf_influence_type_and_similarity],
                                      tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity_file%model_id
                                      )
开发者ID:kykamath,项目名称:hashtags_and_geo,代码行数:33,代码来源:models.py

示例3: generate_hashtag_specific_location_and_pure_influence_scores

# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
 def generate_hashtag_specific_location_and_pure_influence_scores(test_models_ids):
     for test_model_id in test_models_ids:
         output_file = f_ltuo_hashtag_and_ltuo_location_and_pure_influence_score%(test_model_id)
         GeneralMethods.runCommand('rm -rf %s'%output_file)
         ltuo_hashtag_and_ltuo_location_and_occurrence_time = Experiments.load_ltuo_hashtag_and_ltuo_location_and_occurrence_time()
         for hashtag_count, (hashtag, ltuo_location_and_occurrence_time) in\
                 enumerate(ltuo_hashtag_and_ltuo_location_and_occurrence_time):
             ltuo_location_and_occurrence_times = [(location, sorted(zip(*ito_location_and_occurrence_time)[1]))
                                                     for location, ito_location_and_occurrence_time in
                                                         groupby(
                                                                 sorted(ltuo_location_and_occurrence_time, key=itemgetter(0)),
                                                                 key=itemgetter(0)
                                                         )
                                                 ] 
             print hashtag_count, test_model_id
             ltuo_location_and_pure_influence_score = []
             for location, location_occurrence_times in ltuo_location_and_occurrence_times:
                 pure_influence_scores = []
                 for neighbor_location, neighbor_location_occurrence_times in ltuo_location_and_occurrence_times:
                     if location!=neighbor_location:
                         pure_influence_score = MF_INFLUENCE_MEASURING_MODELS_TO_MODEL_ID[test_model_id](neighbor_location_occurrence_times, location_occurrence_times)
                         pure_influence_scores.append(pure_influence_score)
                 ltuo_location_and_pure_influence_score.append([location, np.mean(pure_influence_scores)])
             ltuo_location_and_pure_influence_score = sorted(ltuo_location_and_pure_influence_score, key=itemgetter(1))
             FileIO.writeToFileAsJson([hashtag, ltuo_location_and_pure_influence_score], output_file)
开发者ID:kykamath,项目名称:hashtags_and_geo,代码行数:27,代码来源:models.py

示例4: analyzeQuality

# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
    def analyzeQuality(graphs, graphType):
        def getQualityScore(graphMap, edgesToKeep, timeDifference):
            dataToReturn = []
            for j, intervalInSeconds in enumerate([1]):
                intervalInSeconds*=timeDifference
                linearGraph = LocationGraphs.combineLocationGraphs(graphMap, startingGraphId, datetime.datetime.fromtimestamp(endingGraphId+1), intervalInSeconds, linear=True, edgesToKeep=edgesToKeep)
                logGraph = LocationGraphs.combineLocationGraphs(graphMap, startingGraphId, datetime.datetime.fromtimestamp(endingGraphId+1), intervalInSeconds, linear=False, edgesToKeep=edgesToKeep)
                linearClusters = [[str(c), [l[0]for l in lst]] for c, lst in groupby(sorted(clusterUsingAffinityPropagation(linearGraph)[1], key=itemgetter(1)), key=itemgetter(1))]
                logarithmicClusters = [[str(c), [l[0]for l in lst]] for c, lst in groupby(sorted(clusterUsingAffinityPropagation(logGraph)[1], key=itemgetter(1)), key=itemgetter(1))]
                score = LocationGraphs.getClusterQualityScore(linearClusters, logarithmicClusters)
                print intervalInSeconds, edgesToKeep, score
                dataToReturn.append(score)
            return dataToReturn
        graphFile = qualityMetricsFolder%graphType
        print graphFile
        GeneralMethods.runCommand('rm -rf %s'%graphFile)
        for edgesToKeep in range(1,11): 
#        for edgesToKeep in [1,10]: 
            edgesToKeep*=0.1
            graphMap = dict(graphs[:])
            startingGraphId, endingGraphId = min(graphMap.keys()), max(graphMap.keys())
            timeDifference = endingGraphId-startingGraphId
            LocationGraphs.updateLogarithmicGraphs(graphMap, edgesToKeep=edgesToKeep)
#            print {'edgesToKeep': edgesToKeep, 'score': np.mean(getQualityScore(graphMap, edgesToKeep, timeDifference))}
            FileIO.writeToFileAsJson({'edgesToKeep': edgesToKeep, 'score': np.mean(getQualityScore(graphMap, edgesToKeep, timeDifference))}, graphFile)
开发者ID:kykamath,项目名称:hashtags_and_geo,代码行数:27,代码来源:analysis.py

示例5: trendCurves

# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def trendCurves():
    model = MixedUsersModel()
    experimentFileName = spamModelFolder+model.id
    conf = {'model': model, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.trendCurves, 1)], 'ratio': {'normal': 0.985, 'spammer': 0.015},
            'experimentFileName': experimentFileName}
    GeneralMethods.runCommand('rm -rf %s'%experimentFileName); run(**conf)
    Analysis.trendCurves(experimentFileName=experimentFileName)
开发者ID:kykamath,项目名称:spam_model,代码行数:9,代码来源:experiments.py

示例6: writeUserClustersFile

# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
 def writeUserClustersFile(place):
     print 'Generating clusters...'
     userVectors = GenerateDataFiles.getUserVectors(place)
     GeneralMethods.runCommand('rm -rf %s'%placesUserClustersFile%place['name'])
     clusterAssignments = Clustering.cluster(Clustering.EM, placesARFFFile%place['name'], userVectors, '-N -1')
 #    clusterAssignments = Clustering.cluster(Clustering.KMeans, placesARFFFile%place['name'], userVectors, '-N 2')
     for userId, userVector in userVectors.iteritems(): userVectors[userId] = {'userVector': userVector, 'clusterId': clusterAssignments[userId]}
     for data in userVectors.iteritems(): FileIO.writeToFileAsJson(data, placesUserClustersFile%place['name'])
开发者ID:kykamath,项目名称:users_and_geo,代码行数:10,代码来源:places1.py

示例7: performanceWithSpamFilteringForLatestMessages

# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def performanceWithSpamFilteringForLatestMessages(generateData):
    experimentData = defaultdict(dict)
    for iteration in range(10):
#        for spammerPercentage in range(1,21):
##            spammerPercentage = 20
#            spammerPercentage = spammerPercentage*0.05
#        for spammerPercentage in range(1,11):
#            spammerPercentage = spammerPercentage*0.02
#        for spammerPercentage in range(1,201):
#            spammerPercentage = spammerPercentage* 0.005
        l1 = [spammerPercentage* 0.001 for spammerPercentage in range(1,51)]
        l2 = [spammerPercentage* 0.05 for spammerPercentage in range(1,21)]
        l3 = [0.01]+l2
        for spammerPercentage in l1:
            experimentFileName = spamModelFolder+'performanceWithSpamFilteringForLatestMessages/%s/%0.3f'%(iteration,spammerPercentage)
            print experimentFileName
            if generateData:
                model = MixedUsersModel()
                conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage},
                        'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesSpamFiltered],
                        'experimentFileName': experimentFileName,
#                        'noOfPayloadsPerSpammer': 1, 'noOfTopics': 10
                        }
                
#                conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage},
#                        'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesDuplicatesRemoved, RankingModel.popularMessages],
#                        'experimentFileName': experimentFileName}
                
                GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf)
            else:
                tempData = defaultdict(list)
                for data in FileIO.iterateJsonFromFile(experimentFileName):
                    for ranking_id in data['spammmess']:
                        tempData[ranking_id]+=data['spammmess'][ranking_id]
                experimentData[iteration][spammerPercentage]=tempData
    if not generateData:
        realDataY = defaultdict(dict)
        for iteration in experimentData:
            dataY = defaultdict(list)
            dataX = []
            for perct in sorted(experimentData[iteration]):
                dataX.append(perct)
                for ranking_id, values in experimentData[iteration][perct].iteritems(): dataY[ranking_id].append(np.mean(values))
            dataX=sorted(dataX)
            for ranking_id in dataY:
                for x, y in zip(dataX, dataY[ranking_id]): 
                    if x not in realDataY[ranking_id]: realDataY[ranking_id][x]=[] 
                    realDataY[ranking_id][x].append(y)
        for ranking_id in dataY: plt.plot(dataX, [np.mean(realDataY[ranking_id][x]) for x in dataX], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id])
        plt.xlabel('Percentage of Spammers', fontsize=16, fontweight='bold')
        plt.ylabel('Spamness', fontsize=16, fontweight='bold')
#        plt.title('Performance with spam filtering')
        plt.legend(loc=2)
#        plt.show()
        plt.xlim(xmax=0.05)
        plt.savefig('performanceWithSpamFilteringForLatestMessages.png')
        plt.clf()
开发者ID:kykamath,项目名称:spam_model,代码行数:59,代码来源:experiments.py

示例8: performanceWithSpamDetection

# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def performanceWithSpamDetection(generateData):
    experimentData = defaultdict(dict)
    ratios = [0.0,0.4,0.9]
    marker = dict([(0.0, 's'), (0.4, 'o'), (0.9, 'd')])
#    spammerPercentages = [0.2, 0.01, 0.01]
    spammerPercentages = [0.015, 0.015, 0.015]
    for iteration in range(10):
        for spamDetectionRatio, spammerPercentage in zip(ratios, spammerPercentages):
            experimentFileName = spamModelFolder+'performanceWithSpamDetection/%s/%0.3f'%(iteration,spamDetectionRatio)
            print experimentFileName
            if generateData:
                model = MixedUsersModel()
                conf = {'model': model, 'numberOfTimeSteps': 100, 'addUsersMethod': User.addUsersUsingRatioWithSpamDetection, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage},
    #                        'spammerMessagingProbability': spammerBudget,
                        'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesSpamFiltered, RankingModel.popularMessages, RankingModel.popularMessagesSpamFiltered],
                        'spamDetectionRatio': spamDetectionRatio,
                        'experimentFileName': experimentFileName}
                GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf)
            else:
                for data in FileIO.iterateJsonFromFile(experimentFileName):
                    for ranking_id in data['spammmess']:
                        if data['currentTimeStep'] not in experimentData[spamDetectionRatio]: experimentData[spamDetectionRatio][data['currentTimeStep']]=defaultdict(list)
                        experimentData[spamDetectionRatio][data['currentTimeStep']][ranking_id]+=data['spammmess'][ranking_id]
    if not generateData:
        sdr = {}
        for spamDetectionRatio in sorted(experimentData.keys()):
            dataToPlot = defaultdict(list)
            for timeUnit in experimentData[spamDetectionRatio]:
                dataToPlot['x'].append(timeUnit)
                for ranking_id in experimentData[spamDetectionRatio][timeUnit]: dataToPlot[ranking_id].append(np.mean(experimentData[spamDetectionRatio][timeUnit][ranking_id]))
            sdr[spamDetectionRatio]=dataToPlot
        for ranking_id in [RankingModel.LATEST_MESSAGES_SPAM_FILTERED, RankingModel.POPULAR_MESSAGES_SPAM_FILTERED]:
#        for ranking_id in [RankingModel.LATEST_MESSAGES, RankingModel.POPULAR_MESSAGES]:
            for spamDetectionRatio in ratios:
                print ranking_id, spamDetectionRatio
                dataY = smooth(sdr[spamDetectionRatio][ranking_id],8)[:len(sdr[spamDetectionRatio]['x'])]
                dataX, dataY = sdr[spamDetectionRatio]['x'][10:], dataY[10:]
                print 'x', [x-10 for x in dataX]
                if spamDetectionRatio==0.0: 
                    print ranking_id, dataY
                    plt.plot([x-10 for x in dataX], dataY, label='%s'%(labels[ranking_id]), lw=1, marker=marker[spamDetectionRatio])
                else: 
                    print ranking_id, dataY
                    plt.plot([x-10 for x in dataX], dataY, label='%s (%d'%(labels[ranking_id].replace('Filtering', 'Detection'),spamDetectionRatio*100)+'%)', lw=1, marker=marker[spamDetectionRatio])
            plt.ylim(ymin=0, ymax=1)
            plt.xlim(xmin=0, xmax=75)
#            plt.title(ranking_id)
            plt.legend()
            plt.xlabel('Time', fontsize=16, fontweight='bold')
            plt.ylabel('Spamness', fontsize=16, fontweight='bold')
#            plt.show()
#            plt.savefig('performanceWithSpamDetection_%s.png'%ranking_id)
            savefig('performanceWithSpamDetection_%s.png'%ranking_id)
            plt.clf()
开发者ID:kykamath,项目名称:spam_model,代码行数:56,代码来源:experiments.py

示例9: writeARFFFile

# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def writeARFFFile(place):
    userVectors = defaultdict(dict)
    locationToUserMap = dict((l['location'], l) for l in locationToUserMapIterator(place, minCheckins=50))
    for lid in locationToUserMap:
        for user in locationToUserMap[lid]['users']: 
            userVectors[user][lid.replace(' ', '_')]=sum(len(locationToUserMap[lid]['users'][user][d][db]) for d in locationToUserMap[lid]['users'][user] for db in locationToUserMap[lid]['users'][user][d])
    for user in userVectors.keys()[:]: 
        if sum(userVectors[user].itervalues())<place['minUserCheckins']: del userVectors[user]
    arffFile=ARFF.writeARFFForClustering(userVectors, place['name'])
    outputFileName = getARFFFileName(place)
    FileIO.createDirectoryForFile(outputFileName)
    GeneralMethods.runCommand('mv %s %s'%(arffFile, outputFileName))
开发者ID:kykamath,项目名称:users_and_geo,代码行数:14,代码来源:places.py

示例10: writeLocationToUserMap

# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
 def writeLocationToUserMap(place):
     name, boundary = place['name'], place['boundary']
     GeneralMethods.runCommand('rm -rf %s'%placesLocationToUserMapFile%name)
     for location in filteredLocationToUserAndTimeMapIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, inputFile=locationToUserAndExactTimeMapFile):
         lid=getLocationFromLid(location['location'])
         if isWithinBoundingBox(lid, boundary): 
             location['categories'] = ''; location['tags'] = ''; location['name']=''
             title = venuesCollection.find_one({'lid':location['location']})
             if title: location['name'] = unicode(title['n']).encode("utf-8")
             meta = venuesMetaDataCollection.find_one({'_id':location['location']})
             if meta: location['categories'] = unicode(meta['c']).encode("utf-8"); location['tags'] = unicode(meta['t']).encode("utf-8")
             for user in location['users'].keys()[:]: location['users'][str(user)]=location['users'][user]; del location['users'][user]
             location['noOfCheckins']=sum([len(epochs) for user, userVector in location['users'].iteritems() for day, dayVector in userVector.iteritems() for db, epochs in dayVector.iteritems()])
             if location['noOfCheckins']>place.get('minLocationCheckins',0): FileIO.writeToFileAsJson(location, placesLocationToUserMapFile%name)
开发者ID:kykamath,项目名称:users_and_geo,代码行数:16,代码来源:places1.py

示例11: writeTopClusterFeatures

# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
 def writeTopClusterFeatures(place):
     locationNames = {}
     def getLocationName(lid): 
         if lid not in locationNames:
             locationObject = venuesCollection.find_one({'lid':lid})
             if locationObject: locationNames[lid] = unicode(locationObject['n']).encode("utf-8")
             else: locationNames[lid] = ''
         return locationNames[lid]
     GeneralMethods.runCommand('rm -rf %s'%placesUserClusterFeaturesFile%place['name'])
     documents = [userVector.values() for user, userVector in FileIO.iterateJsonFromFile(placesUserClustersFile%place['name'])]
     for data in getTopFeaturesForClass(documents, 1000): 
         clusterId, features = data
         modifiedFeatures = []
         for feature in features: modifiedFeatures.append(list(feature) + [getLocationName(feature[0].replace('_', ' '))])
         FileIO.writeToFileAsJson([clusterId, GeneralMethods.getRandomColor(), modifiedFeatures], placesUserClusterFeaturesFile%place['name'])
开发者ID:kykamath,项目名称:users_and_geo,代码行数:17,代码来源:places1.py

示例12: writeLocationsWithClusterInfoFile

# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def writeLocationsWithClusterInfoFile(place):
    GeneralMethods.runCommand('rm -rf %s'%placesLocationWithClusterInfoFile%place['name'])
    for clustering in iteraterUserClusterings(place):
        dataToWrite, userClusterMap = {}, {}
        for clusterId, users in clustering[2]['clusters'].iteritems(): 
            for user in users: userClusterMap[user]=clusterId
        locationMap = defaultdict(dict)
        for location in locationToUserMapIterator(place):
            locationMap[location['location']] = {'name':unicode(location['name']).encode("utf-8"), 'checkins':defaultdict(list)}
            for user, userVector in location['users'].iteritems():
                if user in userClusterMap:
                    for day, dayVector in userVector.iteritems():
                        for db, epochs in dayVector.iteritems():
                            locationMap[location['location']]['checkins'][userClusterMap[user]]+=epochs
            dataToWrite[str(clustering[0])]=locationMap
        FileIO.writeToFileAsJson(dataToWrite,placesLocationWithClusterInfoFile%place['name']) 
开发者ID:kykamath,项目名称:users_and_geo,代码行数:18,代码来源:places.py

示例13: performanceAsPercentageOfGlobalSpammerVaries

# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def performanceAsPercentageOfGlobalSpammerVaries(generateData):
    experimentData = defaultdict(dict)
    for iteration in range(10):
#        for spammerPercentage in range(1,21):
#            spammerPercentage = spammerPercentage*0.05
        for spammerPercentage in range(1,11):
            spammerPercentage = spammerPercentage*0.1
            experimentFileName = spamModelFolder+'performanceAsPercentageOfGlobalSpammerVaries/%s/%0.3f'%(iteration,spammerPercentage)
            print experimentFileName
            if generateData:
                model = MixedUsersModel()
                conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 
                        'ratio': {'normal': 0.985, 'spammer': 0.015},
                        'spamRatio': {'localPayloads': 1-spammerPercentage, 'globalPayloads': spammerPercentage},
                        'noOfGlobalSpammerPayloads': 10,
                        'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesDuplicatesRemoved, RankingModel.popularMessages],
                        'experimentFileName': experimentFileName}
                GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf)
            else:
                tempData = defaultdict(list)
                for data in FileIO.iterateJsonFromFile(experimentFileName):
                    for ranking_id in data['spammmess']:
                        tempData[ranking_id]+=data['spammmess'][ranking_id]
                experimentData[iteration][spammerPercentage]=tempData
    if not generateData:
        realDataY = defaultdict(dict)
        for iteration in experimentData:
            dataY = defaultdict(list)
            dataX = []
            for perct in sorted(experimentData[iteration]):
                dataX.append(perct)
                for ranking_id, values in experimentData[iteration][perct].iteritems(): dataY[ranking_id].append(np.mean(values))
            dataX=sorted(dataX)
            for ranking_id in dataY:
                for x, y in zip(dataX, dataY[ranking_id]): 
                    if x not in realDataY[ranking_id]: realDataY[ranking_id][x]=[] 
                    realDataY[ranking_id][x].append(y)
        for ranking_id in dataY: 
            if ranking_id in labels:
                plt.plot(dataX, [np.mean(realDataY[ranking_id][x]) for x in dataX], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id])
        plt.xlabel('Percentage of Spammers Using Group Strategy', fontsize=16, fontweight='bold')
        plt.ylabel('Spamness', fontsize=16, fontweight='bold')
#        plt.title('Spammness when spammers use mixed strategy')
        plt.legend(loc=4)
#        plt.show()
        plt.savefig('performanceAsPercentageOfGlobalSpammerVaries.png')
        plt.clf()
开发者ID:kykamath,项目名称:spam_model,代码行数:49,代码来源:experiments.py

示例14: performanceAsNoOfGlobalPayloadsVary

# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
def performanceAsNoOfGlobalPayloadsVary(generateData):
    experimentData = defaultdict(dict)
    for iteration in range(10):
        for noOfGlobalSpammerPayloads in range(1,500):
#        for noOfGlobalSpammerPayloads in range(10,11):
            Spammer.globalPayloads = None
            experimentFileName = spamModelFolder+'performanceAsNoOfGlobalPayloadsVary/%s/%0.3f'%(iteration,noOfGlobalSpammerPayloads)
            print experimentFileName
            if generateData:
                model = MixedUsersModel()
                conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 0.985, 'spammer': 0.015},
                        'noOfGlobalSpammerPayloads': noOfGlobalSpammerPayloads,
                        'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesDuplicatesRemoved, RankingModel.popularMessages],
                        'experimentFileName': experimentFileName}
                GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf)
            else:
                tempData = defaultdict(list)
                for data in FileIO.iterateJsonFromFile(experimentFileName):
                    for ranking_id in data['spammmess']:
                        tempData[ranking_id]+=data['spammmess'][ranking_id]
                experimentData[iteration][noOfGlobalSpammerPayloads]=tempData
    if not generateData:
        realDataY = defaultdict(dict)
        for iteration in experimentData:
            dataY = defaultdict(list)
            dataX = []
            for perct in sorted(experimentData[iteration]):
                dataX.append(perct)
                for ranking_id, values in experimentData[iteration][perct].iteritems(): dataY[ranking_id].append(np.mean(values))
            dataX=sorted(dataX)
            for ranking_id in dataY:
                for x, y in zip(dataX, dataY[ranking_id]): 
                    if x not in realDataY[ranking_id]: realDataY[ranking_id][x]=[] 
                    realDataY[ranking_id][x].append(y)
        for ranking_id in dataY:
            if ranking_id in labels: 
                dy = [np.mean(realDataY[ranking_id][x]) for x in dataX[:20]] + list(smooth([np.mean(realDataY[ranking_id][x]) for x in dataX[20:]])) #+smooth([np.mean(realDataY[ranking_id][x]) for x in dataX[20:]]
                plt.semilogx(dataX, dy[:len(dataX)], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id])
#        for ranking_id in dataY: plt.plot(dataX, [np.mean(realDataY[ranking_id][x]) for x in dataX], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id])  
        plt.xlabel('Payloads Per Spam Group', fontsize=15, fontweight='bold')
        plt.ylabel('Spamness', fontsize=15, fontweight='bold')
#        plt.title('Spammness with changing global payloads')
        plt.legend(loc=4)
#        plt.show()
        plt.savefig('performanceAsNoOfGlobalPayloadsVary.png')
        plt.clf()
开发者ID:kykamath,项目名称:spam_model,代码行数:48,代码来源:experiments.py

示例15: generate_tuo_location_and_tuo_neighbor_location_and_influence_score

# 需要导入模块: from library.classes import GeneralMethods [as 别名]
# 或者: from library.classes.GeneralMethods import runCommand [as 别名]
 def generate_tuo_location_and_tuo_neighbor_location_and_influence_score(models_ids, startTime, endTime, outputFolder, hashtag_tag):
     def get_hashtag_weights(map_from_hashtag_to_tuples_of_occurrences_and_time_range):
         total_occurrences = sum([len(occurrences) 
                                  for hashtag, (occurrences, time_range) in 
                                  map_from_hashtag_to_tuples_of_occurrences_and_time_range.iteritems()]) + 0.
         return dict([(hashtag, len(occurrences)/total_occurrences)
             for hashtag, (occurrences, time_range) in 
             map_from_hashtag_to_tuples_of_occurrences_and_time_range.iteritems()])
     def get_location_weights(hashtags_for_source_location, map_from_location_to_hashtags):
         set_of_hashtags_for_source_location = set(hashtags_for_source_location.keys())
         return dict([(location, len(set(hashtags.keys()).intersection(set_of_hashtags_for_source_location))/(len(set_of_hashtags_for_source_location)+0.))
                      for location, hashtags in 
                      map_from_location_to_hashtags.iteritems()])
     for model_id in models_ids:
         output_file = tuo_location_and_tuo_neighbor_location_and_influence_score_file%(model_id, hashtag_tag)
         GeneralMethods.runCommand('rm -rf %s'%output_file)
         for line_count, location_object in enumerate(iterateJsonFromFile(
                  location_objects_file%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d'))
                  )):
             print line_count, model_id
             tuo_neighbor_location_and_influence_score = []
             mf_hashtag_to_hashtag_weights = get_hashtag_weights(location_object['hashtags'])
             mf_location_to_location_weights = get_location_weights(location_object['hashtags'], location_object['links'])
             location_hashtag_set = set(location_object['hashtags'])
             for neighbor_location, mf_hashtag_to_tuo_occurrences_and_time_range in location_object['links'].iteritems():
                 influence_scores = []
                 mf_neighbor_location_hashtag_to_hashtag_weights = get_hashtag_weights(mf_hashtag_to_tuo_occurrences_and_time_range)
                 neighbor_location_hashtag_set = set(mf_hashtag_to_tuo_occurrences_and_time_range.keys())
                 for hashtag, (neighbor_location_occurrences, time_range) in mf_hashtag_to_tuo_occurrences_and_time_range.iteritems():
                     if hashtag in location_object['hashtags']:
                         location_occurrences = location_object['hashtags'][hashtag][0]
                         pure_influence_score = MF_INFLUENCE_MEASURING_MODELS_TO_MODEL_ID[model_id](location_occurrences, neighbor_location_occurrences)
                         influence_scores.append(mf_hashtag_to_hashtag_weights[hashtag]*pure_influence_score)
                 if hashtag_tag==w_extra_hashtags_tag:
                     for hashtag in location_hashtag_set.difference(neighbor_location_hashtag_set): 
                         influence_scores.append(mf_hashtag_to_hashtag_weights[hashtag]*1.0)
 #                        influence_scores.append(1.0)
                     for hashtag in neighbor_location_hashtag_set.difference(location_hashtag_set): 
                         influence_scores.append(mf_neighbor_location_hashtag_to_hashtag_weights[hashtag]*-1.0)
 #                        influence_scores.append(-1.0)
                 mean_influence_scores = np.mean(influence_scores)
                 tuo_neighbor_location_and_influence_score.append([neighbor_location, 
                                                                    mf_location_to_location_weights[neighbor_location]*mean_influence_scores])
             tuo_neighbor_location_and_influence_score = sorted(tuo_neighbor_location_and_influence_score, key=itemgetter(1))
             FileIO.writeToFileAsJson([location_object['id'], tuo_neighbor_location_and_influence_score], output_file)
开发者ID:kykamath,项目名称:hashtags_and_geo,代码行数:47,代码来源:models.py


注:本文中的library.classes.GeneralMethods.runCommand方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。