当前位置: 首页>>代码示例>>Python>>正文


Python FileIO.iterateJsonFromFile方法代码示例

本文整理汇总了Python中library.file_io.FileIO.iterateJsonFromFile方法的典型用法代码示例。如果您正苦于以下问题:Python FileIO.iterateJsonFromFile方法的具体用法?Python FileIO.iterateJsonFromFile怎么用?Python FileIO.iterateJsonFromFile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在library.file_io.FileIO的用法示例。


在下文中一共展示了FileIO.iterateJsonFromFile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: generate_data_for_significant_nei_utm_ids

# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
    def generate_data_for_significant_nei_utm_ids():
        output_file = GeneralMethods.get_method_id()+'.json'
        so_hashtags, mf_utm_id_to_valid_nei_utm_ids = set(), {}
        for utm_object in \
                FileIO.iterateJsonFromFile(f_hashtags_by_utm_id, True):
            for hashtag, count in utm_object['mf_hashtag_to_count'].iteritems():
                if hashtag!='total_num_of_occurrences': so_hashtags.add(hashtag)
            mf_utm_id_to_valid_nei_utm_ids[utm_object['utm_id']] =\
                                                            utm_object['mf_nei_utm_id_to_common_h_count'].keys()
        hashtags = sorted(list(so_hashtags))
        mf_utm_id_to_vector = {}
        for utm_object in FileIO.iterateJsonFromFile(f_hashtags_by_utm_id, True):
#                print i, utm_object['utm_id']
            utm_id_vector =  map(lambda hashtag: utm_object['mf_hashtag_to_count'].get(hashtag, 0.0),
                                 hashtags)
            mf_utm_id_to_vector[utm_object['utm_id']] = robjects.FloatVector(utm_id_vector)
        for i, (utm_id, vector) in enumerate(mf_utm_id_to_vector.iteritems()):
            print '%s of %s'%(i+1, len(mf_utm_id_to_vector))
            ltuo_utm_id_and_vector = [(utm_id, vector)]
            for valid_nei_utm_id in mf_utm_id_to_valid_nei_utm_ids[utm_id]:
                if valid_nei_utm_id in mf_utm_id_to_vector and valid_nei_utm_id!=utm_id:
                    ltuo_utm_id_and_vector.append((valid_nei_utm_id, mf_utm_id_to_vector[valid_nei_utm_id]))
            od = rlc.OrdDict(sorted(ltuo_utm_id_and_vector, key=itemgetter(0)))
            df_utm_vectors = robjects.DataFrame(od)
            df_utm_vectors_json = R_Helper.get_json_for_data_frame(df_utm_vectors)
            dfm_dict = cjson.decode(df_utm_vectors_json)
            mf_utm_ids_to_utm_colnames = dict(zip(zip(*ltuo_utm_id_and_vector)[0], df_utm_vectors.colnames))
            utm_id_colname = mf_utm_ids_to_utm_colnames[utm_id]
            dfm_dict['prediction_variable'] = utm_id_colname
            dfm_dict['predictor_variables'] = filter(lambda colname: colname!=utm_id_colname,
                                                     df_utm_vectors.colnames)
            dfm_dict['mf_utm_colnames_to_utm_ids'] = dict(zip(df_utm_vectors.colnames, zip(*ltuo_utm_id_and_vector)[0]))
            FileIO.writeToFileAsJson(dfm_dict, output_file)
开发者ID:kykamath,项目名称:hashtags_and_geo,代码行数:35,代码来源:analysis.py

示例2: plotQualityWithKMeansAndSSA

# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
 def plotQualityWithKMeansAndSSA():
     del plotSettings["ssa_mr"]
     speedStats = dict([(k, {"f1": [], "nmi": [], "purity": []}) for k in plotSettings])
     for data in FileIO.iterateJsonFromFile(TweetsFile.stats_file):
         for k in speedStats:
             for metric in speedStats["ssa"]:
                 speedStats[k][metric].append(data[k][metric])
     for k in speedStats:
         del speedStats[k]["f1"]
     speedStats.update(dict([(k, {"f1": [], "nmi": [], "purity": []}) for k in kMeansPlotSettings]))
     k = "k_means"
     for data in FileIO.iterateJsonFromFile(TweetsFile.combined_stats_file):
         for metric in speedStats["k_means"]:
             speedStats[k][metric].append(data[k][metric])
     for k in speedStats:
         if "f1" in speedStats[k]:
             del speedStats[k]["f1"]
     dataForPlot = dict([(k, []) for k in speedStats])
     for k in speedStats:
         for k1 in speedStats[k]:
             dataForPlot[k] += [np.mean(speedStats[k][k1])]
     #        del dataForPlot['k_means']
     print dataForPlot
     ind, width = np.arange(2), 0.1
     rects, i = [], 1
     plotSettings.update(kMeansPlotSettings)
     for k in dataForPlot:
         rects.append(plt.bar(ind + i * width, dataForPlot[k], width, color=plotSettings[k]["color"]))
         i += 1
     plt.ylabel(getLatexForString("Score"))
     plt.title(getLatexForString("Clustering quality comparison for Streaming LSH with SSA"))
     plt.xticks(ind + 2 * width, ("$Purity$", "$NMI$"))
     plt.legend([r[0] for r in rects], [plotSettings[k]["label"] for k in plotSettings], loc=4)
     #        plt.show()
     plt.savefig("qualityComparisonAll.pdf")
开发者ID:kykamath,项目名称:hd_streams_clustering,代码行数:37,代码来源:quality_comparison_with_ssa.py

示例3: build

# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
def build(numberOfTimeUnits=24):
    validLattices = set()
    for data in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%('world','%s_%s'%(2,11))): validLattices.add(data['id'])
    documents, lattices = [], set()
    for h in FileIO.iterateJsonFromFile(hashtagsFile%('training_world','%s_%s'%(2,11))): 
        hashtag, document = Hashtag(h), []
        if hashtag.isValidObject():
            for timeUnit, occs in enumerate(hashtag.getOccrancesEveryTimeWindowIterator(HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS)):
                occs = filter(lambda t: t[0] in validLattices, occs)
                occs = sorted(occs, key=itemgetter(0))
                if occs: 
                    for lattice in zip(*occs)[0]: lattices.add(lattice)
                document.append([timeUnit, [(k, len(list(i))) for k, i in groupby(occs, key=itemgetter(0))]])
            if document: documents.append(document)
    lattices = sorted(list(lattices))
    print len(lattices)
    documents = [(d, TargetSelectionRegressionClassifier.getPercentageDistributionInLattice(d)) for d in documents]
    documents = documents[:int(len(documents)*0.80)]
    for decisionTimeUnit in range(1, numberOfTimeUnits+1):
        for latticeCount, predictingLattice in enumerate(lattices):
            print decisionTimeUnit, latticeCount,
            inputVectors, outputValues = [], []
            for rawDocument, processedDocument in documents:
                documentForTimeUnit = TargetSelectionRegressionClassifier.getPercentageDistributionInLattice(rawDocument[:decisionTimeUnit])
                if documentForTimeUnit and processedDocument:
                    vector =  [documentForTimeUnit.get(l, 0) for l in lattices]
                    inputVectors.append(vector), outputValues.append(float(processedDocument.get(predictingLattice, 0)))
#            TargetSelectionRegressionClassifier(decisionTimeUnit=decisionTimeUnit, predictingLattice=predictingLattice).build(zip(inputVectors, outputValues))
            TargetSelectionRegressionSVMRBFClassifier(decisionTimeUnit=decisionTimeUnit, predictingLattice=predictingLattice).build(zip(inputVectors, outputValues))
开发者ID:kykamath,项目名称:hashtags_and_geo,代码行数:31,代码来源:target_selection_regression_classifier.py

示例4: analyzeJustifyExponentialDecay

# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
    def analyzeJustifyExponentialDecay(self):
        global evaluation
        experimentsData = {JustifyExponentialDecay.with_decay: {}, JustifyExponentialDecay.without_decay: {}}
        for data in FileIO.iterateJsonFromFile(JustifyExponentialDecay.stats_file): experimentsData[data['iteration_parameters']['type']][getDateTimeObjectFromTweetTimestamp(data['iteration_parameters']['current_time'])]=data['clusters']
        qualityData = []
        for k1, k2 in zip(sorted(experimentsData[JustifyExponentialDecay.with_decay]), sorted(experimentsData[JustifyExponentialDecay.without_decay])):
            qualityData.append((k1, evaluation.getEvaluationMetrics(experimentsData[JustifyExponentialDecay.with_decay][k1], None, None)['purity']-evaluation.getEvaluationMetrics(experimentsData[JustifyExponentialDecay.without_decay][k1], None, None)['purity']))
        keyTime = sorted(qualityData, key=itemgetter(1))[-1][0]
        clusterWithDecay = [i for i in experimentsData[JustifyExponentialDecay.with_decay][keyTime] if len(i)>=3]
        clusterWithOutDecay = [i for i in experimentsData[JustifyExponentialDecay.without_decay][keyTime] if len(i)>=3]
#        for c in clusterWithDecay:
#            print c, [evaluation.expertsToClassMap[i.lower()] for i in c]

        interestedCluster = set(['Zap2it', 'ESPNAndyKatz', 'comingsoonnet', '950KJR', 'ginasmith888', 'UKCoachCalipari', 'SportsFanz', 'David_Henrie'])
        for c in clusterWithOutDecay:
            if len(set(c).intersection(interestedCluster))>0: 
#                print c, [evaluation.expertsToClassMap[i.lower()] for i in c]
                setString = ', '.join(['%s (%s)'%(i, evaluation.expertsToClassMap[i.lower()]) for i in sorted(c)]).replace(' ', '\\ ').replace('_', '\\_')
                print keyTime, '&', setString, '\\\\'
            
        clustersDiscoveredEarlierByDecay = {}
        for kt in sorted(experimentsData[JustifyExponentialDecay.with_decay]):
            for c in experimentsData[JustifyExponentialDecay.with_decay][kt]:
                c=sorted(c)
                if len(set(c).intersection(interestedCluster))>0: 
                    classes = [evaluation.expertsToClassMap[i.lower()] for i in c if i.lower() in evaluation.expertsToClassMap]
                    if sorted([(k, len(list(g))/float(len(classes))) for k,g in groupby(sorted(classes))], key=itemgetter(1))[-1][1]>0.7:
                        if kt>datetime(2011,3,19) and kt<=keyTime: clustersDiscoveredEarlierByDecay[kt]=c
        observedStrings = set()
        for k in sorted(clustersDiscoveredEarlierByDecay): 
            setString = ', '.join(['%s (%s)'%(i, evaluation.expertsToClassMap[i.lower()]) for i in sorted(clustersDiscoveredEarlierByDecay[k])]).replace(' ', '\\ ').replace('_', '\\_')
            if setString not in observedStrings: print k, '&', setString, '\\\\'; observedStrings.add(setString)
开发者ID:greeness,项目名称:hd_streams_clustering,代码行数:34,代码来源:algorithms_performance.py

示例5: probabilisticCoverageModelExample

# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
    def probabilisticCoverageModelExample(hashtag, type):
        MINUTES, timeUnit = 5, 1
        print len(CoverageBasedLatticeSelectionModel.lattices)
        for hashtagObject in FileIO.iterateJsonFromFile('/mnt/chevron/kykamath/data/geo/hashtags/analysis/all_world/2_11/hashtagsWithoutEndingWindow'):
            if hashtagObject['h']==hashtag:
                occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False)
                occurances = list(zip(*sorted(occsDistributionInTimeUnits.iteritems(), key=itemgetter(0)))[1])
                occsInTimeunit =  zip(*reduce(lambda aggList, l: aggList+l, occurances[:timeUnit], []))[0]
                allOccurances = zip(*reduce(lambda aggList, l: aggList+l, occurances, []))[0]
                if type=='5m': probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(occsInTimeunit)
                else: 
                    print getRadius(allOccurances)
                    probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(allOccurances)
                latticeScores = CoverageBasedLatticeSelectionModel.spreadProbability(CoverageBasedLatticeSelectionModel.lattices, probabilityDistributionForObservedLattices)
                points, colors = zip(*map(lambda t: (getLocationFromLid(t[0].replace('_', ' ')), t[1]), sorted(latticeScores.iteritems(), key=itemgetter(1))))
#                print points[0], colors[0]
                ax = plt.subplot(111)
                sc = plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap='cool', lw = 0)
                divider = make_axes_locatable(ax)
#                plt.title('Jaccard similarity with New York')
                cax = divider.append_axes("right", size="5%", pad=0.05)
                plt.colorbar(sc, cax=cax)
                plt.show()
#                plt.savefig('../images/coverage_examples/%s_%s.png'%(hashtag, type))
                plt.clf()
                break
开发者ID:kykamath,项目名称:hashtags_and_geo,代码行数:28,代码来源:plots.py

示例6: coverageIndication

# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
    def coverageIndication():
        MINUTES = 5
        for timeUnit, color, shape in [(1, 'r', 'x'), (3, 'g', 'd'), (6, 'b', 's')]:
            print timeUnit
            data = defaultdict(int)
            for hashtagObject in FileIO.iterateJsonFromFile(hashtagsFile%('training_world','%s_%s'%(2,11))):
                try:
                    occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False)
                    occurances = list(zip(*sorted(occsDistributionInTimeUnits.iteritems(), key=itemgetter(0)))[1])
                    occsInTimeunit =  zip(*reduce(lambda aggList, l: aggList+l, occurances[:timeUnit], []))[0]
                    if len(occsInTimeunit)>10:
                        allOccurances = zip(*reduce(lambda aggList, l: aggList+l, occurances, []))[0]
                        timeUnitRadius, allRadius = getRadius(occsInTimeunit), getRadius(allOccurances)
                        data[int(abs(timeUnitRadius-allRadius))/50*50+50]+=1
#                        data[round(abs(timeUnitRadius-allRadius)/allRadius, 2)]+=1
                except IndexError as e: pass
            for k in data.keys()[:]: 
                if data[k]<3: del data[k]
            dataX, dataY = zip(*sorted(data.iteritems(), key=itemgetter(0)))
            plt.loglog(dataX, dataY, lw=2, label=str(timeUnit*MINUTES) + ' minutes', marker=shape)
#        plt.loglog([1],[1])
#        plt.title('Early indication of coverage'), 
        plt.xlabel('Coverage difference (miles)', fontsize=20), plt.ylabel('Number of hashtags', fontsize=20)
        plt.legend()
#        plt.show()
        plt.savefig('../images/coverageIndication.png')
开发者ID:kykamath,项目名称:hashtags_and_geo,代码行数:28,代码来源:plots.py

示例7: temporalLocalityTemporalDistanceExample

# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
    def temporalLocalityTemporalDistanceExample(lattice=NEW_YORK):
        distances = defaultdict(dict)
        for latticeObject in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%('training_world','%s_%s'%(2,11))):
            if latticeObject['id']==lattice:
                latticeHashtagsSet = set(latticeObject['hashtags'])
                for neighborLattice, neighborHashtags in latticeObject['links'].iteritems():
                    distances[neighborLattice] = {}
                    neighborHashtags = filterOutNeighborHashtagsOutside1_5IQROfTemporalDistance(latticeObject['hashtags'], neighborHashtags, findLag=False)
                    neighborHashtagsSet = set(neighborHashtags)
                    distances[neighborLattice]['similarity']=len(latticeHashtagsSet.intersection(neighborHashtagsSet))/float(len(latticeHashtagsSet.union(neighborHashtagsSet)))
                    distances[neighborLattice]['temporalDistance']=np.mean([abs(latticeObject['hashtags'][k][0]-neighborHashtags[k][0]) for k in neighborHashtags if k in latticeObject['hashtags']])/(60.*60.)
                    distances[neighborLattice]['geoDistance']=getHaversineDistanceForLids(latticeObject['id'].replace('_', ' '), neighborLattice.replace('_', ' '))
                break
        dataPoints = []
        ax = plt.subplot(111)
        for k, data in distances.iteritems(): dataPoints.append((getLocationFromLid(k.replace('_', ' ')), data['temporalDistance']))
        points, colors = zip(*sorted(dataPoints, key=itemgetter(1)))
        sc = plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', cmap='RdPu', c=colors, lw = 0, alpha=1.0)
        plotPointsOnWorldMap([getLocationFromLid(lattice.replace('_', ' '))], blueMarble=False, bkcolor='#CFCFCF', c='#64FF1C', lw = 0)
        divider = make_axes_locatable(ax)
        plt.title('Average time difference from New York')
        cax = divider.append_axes("right", size="5%", pad=0.05)
        plt.colorbar(sc, cax=cax)
#        plt.show()
        plt.savefig('../images/temporalDistanceExample.png')
开发者ID:kykamath,项目名称:hashtags_and_geo,代码行数:27,代码来源:plots.py

示例8: plotClusteringSpeed

# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
 def plotClusteringSpeed(saveFig=True):
     plotSettings = {
         "k_means": {"label": "Iterative k-means", "color": "#FD0006"},
         "mr_k_means": {"label": "MR k-means", "color": "#5AF522"},
         "streaming_lsh": {"label": "Stream CDA", "color": "#7109AA"},
     }
     dataToPlot = {
         "k_means": {"x": [], "y": []},
         "mr_k_means": {"x": [], "y": []},
         "streaming_lsh": {"x": [], "y": []},
     }
     for data in FileIO.iterateJsonFromFile(TweetsFile.combined_stats_file):
         for k in plotSettings:
             dataToPlot[k]["x"].append(data[k]["no_of_documents"])
             dataToPlot[k]["y"].append(data[k]["iteration_time"])
     for k in plotSettings:
         plt.loglog(
             dataToPlot[k]["x"],
             dataToPlot[k]["y"],
             label=plotSettings[k]["label"],
             color=plotSettings[k]["color"],
             lw=2,
         )
     plt.legend(loc=4)
     if saveFig:
         plt.xlabel(getLatexForString("\# of documents"))
         plt.ylabel(getLatexForString("Running time (s)"))
         plt.title(getLatexForString("Running time comparsion for Streaing LSH with k-Means"))
     plt.xlim(xmin=800, xmax=100000)
     plt.xticks([])
     #        plt.show()
     if saveFig:
         plt.savefig("speedComparisonWithKMeans.pdf")
开发者ID:kykamath,项目名称:hd_streams_clustering,代码行数:35,代码来源:quality_comparison_with_ssa.py

示例9: plotClusteringQuality

# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
 def plotClusteringQuality():
     del plotSettings["ssa_mr"]
     speedStats = dict([(k, {"f1": [], "nmi": [], "purity": []}) for k in plotSettings])
     for data in FileIO.iterateJsonFromFile(TweetsFile.stats_file):
         for k in speedStats:
             for metric in speedStats["ssa"]:
                 speedStats[k][metric].append(data[k][metric])
     dataForPlot = dict([(k, []) for k in plotSettings])
     for k, v in speedStats.iteritems():
         print k
         for k1, v1 in v.iteritems():
             if type(v1[0]) != type([]):
                 print k1, "(%0.2f %0.2f)" % (np.mean(v1), np.var(v1))
                 dataForPlot[k] += [np.mean(v1)]
             else:
                 print k1, ["(%0.2f %0.2f)" % (np.mean(z), np.var(z)) for z in zip(*v1)]
                 dataForPlot[k] += [np.mean(z) for z in zip(*v1)]
     ind, width = np.arange(5), 0.1
     rects, i = [], 0
     for k in dataForPlot:
         rects.append(plt.bar(ind + i * width, dataForPlot[k], width, color=plotSettings[k]["color"]))
         i += 1
     plt.ylabel(getLatexForString("Score"))
     plt.title(getLatexForString("Clustering quality comparison for Streaming LSH with SSA"))
     plt.xticks(ind + width, ("$F$", "$Precision$", "$Recall$", "$Purity$", "$NMI$"))
     plt.legend([r[0] for r in rects], [plotSettings[k]["label"] for k in plotSettings], loc=4)
     #        plt.show()
     plt.savefig("qualityComparisonWithSSA.pdf")
开发者ID:kykamath,项目名称:hd_streams_clustering,代码行数:30,代码来源:quality_comparison_with_ssa.py

示例10: print_dense_utm_ids

# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
 def print_dense_utm_ids():
     ''' Prints list of dense utm_ids.
     '''
     print [utm_object['utm_id'] 
            for utm_object in FileIO.iterateJsonFromFile(
                                                 f_hashtags_by_utm_id,
                                                 remove_params_dict=True)]
开发者ID:kykamath,项目名称:hashtags_and_geo,代码行数:9,代码来源:analysis.py

示例11: iterateExpertClusters

# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
    def iterateExpertClusters(startingDay=datetime(2011,3,19), endingDay=datetime(2011,3, 30)):
#    def iterateExpertClusters(startingDay=datetime(2011,3,19), endingDay=datetime(2011,4,7)):
        while startingDay<=endingDay:
            for line in FileIO.iterateJsonFromFile(experts_twitter_stream_settings.lsh_clusters_folder+FileIO.getFileByDay(startingDay)): 
                currentTime = getDateTimeObjectFromTweetTimestamp(line['time_stamp'])
                for clusterMap in line['clusters']: yield (currentTime, TwitterCrowdsSpecificMethods.getClusterFromMapFormat(clusterMap))
            startingDay+=timedelta(days=1)
开发者ID:greeness,项目名称:hd_streams_clustering,代码行数:9,代码来源:data_generation_and_crowd_analysis.py

示例12: significant_nei_utm_ids

# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
 def significant_nei_utm_ids():
     output_folder = fld_google_drive_data_analysis%GeneralMethods.get_method_id()+'/%s.png'
     for i, data in enumerate(FileIO.iterateJsonFromFile(f_significant_nei_utm_ids, remove_params_dict=True)):
         utm_lat_long = UTMConverter.getLatLongUTMIdInLatLongForm(data['utm_id'])
         nei_utm_lat_longs = map(
                           lambda nei_utm_id: UTMConverter.getLatLongUTMIdInLatLongForm(nei_utm_id),
                           data['nei_utm_ids']
                         )
         if nei_utm_lat_longs:
             output_file = output_folder%('%s_%s'%(utm_lat_long))
             plotPointsOnWorldMap(nei_utm_lat_longs,
                                  blueMarble=False,
                                  bkcolor='#CFCFCF',
                                  lw = 0,
                                  color = '#EA00FF',
                                  alpha=1.)
             _, m = plotPointsOnWorldMap([utm_lat_long],
                                  blueMarble=False,
                                  bkcolor='#CFCFCF',
                                  lw = 0,
                                  color = '#2BFF00',
                                  s = 40,
                                  returnBaseMapObject=True,
                                  alpha=1.)
             for nei_utm_lat_long in nei_utm_lat_longs:
                 m.drawgreatcircle(utm_lat_long[1],
                                   utm_lat_long[0],
                                   nei_utm_lat_long[1],
                                   nei_utm_lat_long[0],
                                   color='#FFA600',
                                   lw=1.5,
                                   alpha=1.0)
             print 'Saving %s'%(i+1)
             savefig(output_file)
开发者ID:kykamath,项目名称:hashtags_and_geo,代码行数:36,代码来源:plots.py

示例13: trendCurves

# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
    def trendCurves(iterationData=None, experimentFileName=None):
        if iterationData: 
            currentTimeStep, _, currentTopics, _, finalCall, conf = iterationData
            experimentFileName = conf['experimentFileName']
            if not finalCall:
                topicDistribution = dict((str(topic.id), {'total': topic.totalCount, 'timeStep': topic.countDistribution[currentTimeStep]}) for topic in currentTopics)
#                print currentTimeStep
                FileIO.writeToFileAsJson({'t':currentTimeStep, 'topics':topicDistribution}, experimentFileName)
            else:
                iterationInfo  = {'trending_topics': [topic.id for topic in currentTopics if topic.stickiness>=stickinessLowerThreshold],
                      'topic_colors': dict((str(topic.id), topic.color) for topic in currentTopics),
                      'conf': conf}
                del iterationInfo['conf']['spamDectectionMethod']
                FileIO.writeToFileAsJson(iterationInfo, experimentFileName)
        else:
            topicsDataX = defaultdict(list)
            topicsDataY = defaultdict(list)
            for data in FileIO.iterateJsonFromFile(experimentFileName):
                if 'conf' not in data:
                    for topic in data['topics']: topicsDataX[topic].append(data['t']), topicsDataY[topic].append(data['topics'][topic]['timeStep'])
                else: topicColorMap=data['topic_colors']; trendingTopics=data['trending_topics']
            for topic in topicsDataX: plt.fill_between(topicsDataX[topic], topicsDataY[topic], color=topicColorMap[str(topic)], alpha=1.0)
            plt.figure()
            for topic in trendingTopics: plt.fill_between(topicsDataX[str(topic)], topicsDataY[str(topic)], color=topicColorMap[str(topic)], alpha=1.0)
            plt.ylabel('Number of Contents', fontsize=16, fontweight='bold')
            plt.show()
开发者ID:kykamath,项目名称:spam_model,代码行数:28,代码来源:models_1.py

示例14: measureCorrelations

# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
    def measureCorrelations(timeRange, outputFolder):
        '''
        ['haversine_distance', 'temporal_distance_in_hours', 0.20147108648121248]
        ['haversine_distance', 'sharing_probability', -0.19587239643328627]
        '''
        measures = [
                    (LatticeGraph.typeHaversineDistance, LatticeGraph.typeTemporalDistanceInHours),
                    (LatticeGraph.typeHaversineDistance, LatticeGraph.typeSharingProbability),
                    ]
        runData = []
        for xMeasure, yMeasure in measures:
            i, xdata, ydata = 1, [], []
            for latticeObject in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%(outputFolder,'%s_%s'%timeRange)):
                print i, latticeObject['id']; i+=1
                xdata+=zip(*xMeasure['method'](latticeObject)['links'].iteritems())[1]
                ydata+=zip(*yMeasure['method'](latticeObject)['links'].iteritems())[1]
#                if i==200: break
            preasonsCorrelation, _ = stats.pearsonr(xdata, ydata)
#            plt.scatter(xdata[:5000], ydata[:5000])
#            plt.title('Pearson\'s co-efficient %0.3f'%preasonsCorrelation)
#            plt.xlabel(xMeasure['title']), plt.ylabel(yMeasure['title'])
#            plt.show()
            runData.append([xMeasure['id'], yMeasure['id'], preasonsCorrelation])
        for i in runData:
            print i
开发者ID:kykamath,项目名称:hashtags_and_geo,代码行数:27,代码来源:analysis.py

示例15: plotDimensionsUpdateFrequencyEstimation

# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import iterateJsonFromFile [as 别名]
 def plotDimensionsUpdateFrequencyEstimation(self, returnAxisValuesOnly=True):
     '''
     numberOfTimeUnits=10*24*12
     Experts stream 12
     Houston stream 2
     '''
     dataDistribution = defaultdict(list)
     for line in FileIO.iterateJsonFromFile(self.dimensionsUpdateFrequencyFile):
         for k, v in line[ParameterEstimation.dimensionsUpdateFrequencyId].iteritems():
             k = int(k) / self.timeUnitInSeconds.seconds
             if k not in dataDistribution: dataDistribution[k] = [0., 0.]
             dataDistribution[k][0] += v; dataDistribution[k][1] += 1
     x, y = [], []; [(x.append(k), y.append((dataDistribution[k][0] / dataDistribution[k][1]))) for k in sorted(dataDistribution)]
     x1, y1 = [], []; [(x1.append(k), y1.append((dataDistribution[k][0] / dataDistribution[k][1]) / k)) for k in sorted(dataDistribution)]
     x = x[:numberOfTimeUnits]; y = y[:numberOfTimeUnits]; x1 = x1[:numberOfTimeUnits]; y1 = y1[:numberOfTimeUnits]
     def subPlot(id):
         plt.subplot(id)
         inactivityCorordinates = max(zip(x1, y1), key=itemgetter(1))
         plt.semilogx(x1, y1, '-', color=self.stream_settings['plot_color'], label=getLatexForString(self.stream_settings['plot_label'] + ' (Update frequency=%d TU)' % inactivityCorordinates[0]), lw=2)
         plt.subplot(id).yaxis.set_major_formatter(FuncFormatter(lambda x, i: '%0.1f' % (x / 10. ** 3)))
         plt.semilogx([inactivityCorordinates[0]], [inactivityCorordinates[1]], 'o', alpha=0.7, color='r')
         plt.subplot(id).yaxis.set_major_formatter(FuncFormatter(lambda x, i: '%0.1f' % (x / 10. ** 3)))
         plt.yticks((min(y1), max(y1)))
         print self.stream_settings['plot_label'], inactivityCorordinates[0]
     plt.subplot(311)
     plt.title(getLatexForString('Dimensions update frequency estimation'))
     plt.semilogx(x, y, '-', color=self.stream_settings['plot_color'], label=getLatexForString(self.stream_settings['plot_label']), lw=2)
     plt.subplot(311).yaxis.set_major_formatter(FuncFormatter(lambda x, i: '%0.1f' % (x / 10. ** 5)))
     plt.text(0.0, 1.01, getLatexForString('10^5'), transform=plt.gca().transAxes)
     plt.ylabel(getLatexForString('\# of decayed dimensions'))
     if self.stream_settings['stream_id'] == 'experts_twitter_stream': subPlot(312)
     else: subPlot(313); plt.xlabel(getLatexForString(xlabelTimeUnits))
     plt.ylabel(getLatexForString('Rate of DD (10^3)'))
     plt.legend(loc=3)
     if returnAxisValuesOnly: plt.show()
开发者ID:greeness,项目名称:hd_streams_clustering,代码行数:37,代码来源:stream_parameters_estimation.py


注:本文中的library.file_io.FileIO.iterateJsonFromFile方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。