当前位置: 首页>>代码示例>>Python>>正文


Python Utilities.writeAsJsonToFile方法代码示例

本文整理汇总了Python中utilities.Utilities.writeAsJsonToFile方法的典型用法代码示例。如果您正苦于以下问题:Python Utilities.writeAsJsonToFile方法的具体用法?Python Utilities.writeAsJsonToFile怎么用?Python Utilities.writeAsJsonToFile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在utilities.Utilities的用法示例。


在下文中一共展示了Utilities.writeAsJsonToFile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: generateRawDataForGivenSetOfUsers

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
 def generateRawDataForGivenSetOfUsers(numberOfUsersList):
     for numberOfUsers in numberOfUsersList:
         currentTime = Settings.startTime
         allExpertsTop, allExpertsBottom = (
             ExpertUsers(number=numberOfUsers),
             ExpertUsers(number=numberOfUsers, type=ExpertUsers.typeBottom),
         )
         allExpertsList = {}
         for k, v in allExpertsTop.list.iteritems():
             allExpertsList[k] = v
         for k, v in allExpertsBottom.list.iteritems():
             allExpertsList[k] = v
         while currentTime <= Settings.endTime:
             trainingFile = Utilities.getTrainingFile(currentTime, DocumentType.typeRaw, numberOfUsers)
             testFile = Utilities.getTestFile(currentTime, DocumentType.typeRaw, numberOfUsers, bottom=True)
             Utilities.createDirectory(trainingFile), Utilities.createDirectory(testFile)
             print numberOfUsers, Settings.twitterUsersTweetsFolder + "%s.gz" % Utilities.getDataFile(currentTime)
             for tweet in CreateTrainingAndTestSets.getTweetsFromExperts(
                 allExpertsList, Settings.twitterUsersTweetsFolder + "%s.gz" % Utilities.getDataFile(currentTime)
             ):
                 tweet["class"] = allExpertsList[tweet["user"]["id_str"]]["class"]
                 if tweet["user"]["id_str"] in allExpertsTop.list:
                     Utilities.writeAsJsonToFile(tweet, trainingFile)
                 else:
                     Utilities.writeAsJsonToFile(tweet, testFile)
             currentTime += timedelta(days=1)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:28,代码来源:datasets.py

示例2: generateStatsForGlobalClassifier

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
 def generateStatsForGlobalClassifier():
     classifier = GlobalClassifier()
     classifier.load()
     currentDay = Settings.startTime
     while currentDay<=Settings.endTime:
         data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format),  'metric': 'aucm', 'data_type': DocumentType.typeRuuslUnigram, 'test_data_days': 1}
         data['value'] = classifier.getAUCM(TestDocuments(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=1).iterator())
         Utilities.writeAsJsonToFile(data, Settings.stats_for_global_classifier)
         currentDay+=timedelta(days=1)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:11,代码来源:experiments.py

示例3: generateStatsForTrainingDataPerDay

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
 def generateStatsForTrainingDataPerDay():
     currentDay = Settings.startTime
     noOfDays = 1
     while currentDay<=Settings.endTime:
         classDistribution = defaultdict(int)
         for d in Utilities.getTweets(fileNameMethod=Utilities.getTrainingFile, dataDirection=DataDirection.past, currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDays):
             classDistribution[d[1]]+=1
         data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'class_distribution': classDistribution}
         Utilities.writeAsJsonToFile(data, Settings.stats_for_training_data)
         currentDay+=timedelta(days=1)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:12,代码来源:experiments.py

示例4: generateStatsToDetermineFixedWindowLength

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
 def generateStatsToDetermineFixedWindowLength():
     global maxLength
     currentDay = Settings.startTime
     while currentDay<=Settings.endTime:
         for noOfDays in Utilities.getClassifierLengthsByDay(currentDay, maxLength): 
             classifier = FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDays)
             classifier.load()
             data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExperts, 'data_type': DocumentType.typeRuuslUnigram, 'test_data_days': 1}
             data['value'] = classifier.getAUCM(TestDocuments(currentTime=currentDay+timedelta(days=1), numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=1).iterator())
             Utilities.writeAsJsonToFile(data, Settings.stats_to_determine_fixed_window_length)
         currentDay+=timedelta(days=1)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:13,代码来源:experiments.py

示例5: generateStatsForTopFeatures

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
 def generateStatsForTopFeatures():
     global maxLength
     currentDay = Settings.startTime
     noOfDays = 1
     while currentDay<=Settings.endTime:
         classifier = FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDays)
         classifier.load()
         data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'number_of_experts': Settings.numberOfExperts, 'data_type': DocumentType.typeRuuslUnigram}
         data['features']=classifier.showMostInformativeFeatures(2000)
         Utilities.writeAsJsonToFile(data, Settings.stats_for_most_informative_features)
         currentDay+=timedelta(days=1)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:13,代码来源:experiments.py

示例6: generateStatsObservePerformanceByRelabelingDocuments

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
 def generateStatsObservePerformanceByRelabelingDocuments():
     global maxLength, idealModelLength
     currentDay = Settings.startTime
     while currentDay<=Settings.endTime:
         noOfDaysList = list(set([idealModelLength]).intersection(set(Utilities.getClassifierLengthsByDay(currentDay, maxLength))))
         for noOfDays in noOfDaysList: 
             classifier = FixedWindowWithRelabeledDocumentsClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDays)
             classifier.load()
             data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExperts, 'data_type': DocumentType.typeRuuslUnigram, 'test_data_days': 1}
             data['value'] = classifier.getAUCM(TestDocuments(currentTime=currentDay+timedelta(days=1), numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=1).iterator())
             Utilities.writeAsJsonToFile(data, Settings.stats_to_observe_performance_by_relabeling_documents)
         currentDay+=timedelta(days=1)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:14,代码来源:experiments.py

示例7: generateDataSetStats125

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
 def generateDataSetStats125():
     currentDay = Settings.startTime
     while currentDay<=Settings.endTime:
         data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'train_classes': defaultdict(int), 'test_classes': defaultdict(int)}
         inputTrainingSetFile = Utilities.getTrainingFile(currentDay, DocumentType.typeRuuslUnigram, Settings.numberOfExperts)
         inputTestSetFile = Utilities.getTestFile(currentDay, DocumentType.typeRuuslUnigram, Settings.numberOfExperts, bottom=True)
         for file, tweetType in [(inputTrainingSetFile, 'training'), (inputTestSetFile, 'test')]:
             for tweet in Utilities.iterateTweetsFromFile(file):
                 if tweetType=='training': data['train_classes'][tweet['class']]+=1
                 else: data['test_classes'][tweet['class']]+=1
         Utilities.writeAsJsonToFile(data, Settings.stats_for_dataset_125)
         currentDay+=timedelta(days=1)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:14,代码来源:experiments.py

示例8: generate

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
 def generate(self):
     for inputFile, outputFile in [
         (self.inputTrainingSetFile, self.outputTrainingSetFile),
         (self.inputTestSetFile, self.outputTestSetFile),
     ]:
         for tweet in Utilities.iterateTweetsFromFile(inputFile):
             data = {}
             for k in DocumentType.keys:
                 data[k] = tweet[k]
             data["screen_name"] = tweet["screen_name"]
             data["user_id"] = tweet["user_id"]
             data["document"] = tweet["document"] + DocumentTypeRuuslUnigramWithMeta.getUrlMeta(data["text"])
             Utilities.writeAsJsonToFile(data, outputFile)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:15,代码来源:datasets.py

示例9: generateDataForGlobalClassifier

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
 def generateDataForGlobalClassifier():
     inputDataFile = "/home/kykamath/projects/Classifiers/src/lda_svm/global_classifier/data/global_classifier"
     classToIntMap = {"sports": 1, "politics": 2, "entertainment": 3, "technology": 4}
     for line in open(inputDataFile):
         try:
             classType, term = line.strip().split()
             stringClassType = Utilities.getTopicForIndex(classType)
             if stringClassType in classToIntMap:
                 Utilities.writeAsJsonToFile(
                     {"class": stringClassType, "data": [term]}, Settings.globalClassifierData
                 )
         except:
             pass
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:15,代码来源:datasets.py

示例10: generateDataSetStats

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
 def generateDataSetStats():
     currentDay = Settings.startTime
     expertUsers = ExpertUsers()
     allExpertsList={}
     for k, v in expertUsers.list.iteritems(): allExpertsList[k]=v
     while currentDay<=Settings.endTime:
         data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classes': defaultdict(int), 'total_tweets': 0}
         for tweet in CreateTrainingAndTestSets.getTweetsFromExperts(allExpertsList, Settings.twitterUsersTweetsFolder+'%s.gz'%Utilities.getDataFile(currentDay)):
             if tweet['user']['id_str'] in expertUsers.list: 
                 classType = allExpertsList[tweet['user']['id_str']]['class']
                 data['classes'][classType]+=1
                 data['total_tweets']+=1
         Utilities.writeAsJsonToFile(data, Settings.stats_for_dataset)
         currentDay+=timedelta(days=1)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:16,代码来源:experiments.py

示例11: generateStatsToCompareDifferentDocumentTypes

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
 def generateStatsToCompareDifferentDocumentTypes():
     global maxLength, idealModelLength
     dataTypes = [DocumentType.typeRuuslUnigram, DocumentType.typeCharBigram, DocumentType.typeCharTrigram, DocumentType.typeRuuslBigram, DocumentType.typeRuuslTrigram, DocumentType.typeRuuslSparseBigram,
                  DocumentType.typeRuuslUnigramNouns, DocumentType.typeRuuslUnigramWithMeta, DocumentType.typeRuuslUnigramNounsWithMeta]
     currentDay = Settings.startTime
     while currentDay<=Settings.endTime:
         noOfDaysList = list(set([idealModelLength]).intersection(set(Utilities.getClassifierLengthsByDay(currentDay, maxLength))))
         for noOfDays in noOfDaysList: 
             for dataType in dataTypes:
                 print currentDay, noOfDays, dataType
                 classifier = FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=dataType, noOfDays=noOfDays)
                 classifier.load()
                 data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExperts, 'data_type': dataType, 'test_data_days': 1}
                 data['value'] = classifier.getAUCM(TestDocuments(currentTime=currentDay+timedelta(days=1), numberOfExperts=Settings.numberOfExperts, dataType=dataType, noOfDays=1).iterator())
                 Utilities.writeAsJsonToFile(data, Settings.stats_to_compare_different_document_types)
         currentDay+=timedelta(days=1)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:18,代码来源:experiments.py

示例12: generateStatsToCompareCollocations

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
 def generateStatsToCompareCollocations():
     global maxLength, idealModelLength
     dataType = DocumentType.typeRuuslUnigram
     collocationMeasures = [Collocations.measureTypeChiSquare, Collocations.measureTypeLikelihoodRatio]
     currentDay = Settings.startTime
     while currentDay<=Settings.endTime:
         noOfDaysList = list(set([idealModelLength]).intersection(set(Utilities.getClassifierLengthsByDay(currentDay, maxLength))))
         print currentDay, noOfDaysList
         for noOfDays in noOfDaysList: 
             for collocationMeasure in collocationMeasures: 
                 classifier = FixedWindowWithCollocationsClassifier(collocationMeasure=collocationMeasure, currentTime=currentDay, numberOfExperts=Settings.numberOfExpertsSecondSet, dataType=dataType, noOfDays=noOfDays)
                 classifier.load()
                 data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExpertsSecondSet, 'data_type': dataType, 'collocation_measure': collocationMeasure, 'test_data_days': 1}
                 data['value'] = classifier.getAUCM(TestDocumentsWithCollocations(collocationMeasure, currentTime=currentDay+timedelta(days=1), numberOfExperts=Settings.numberOfExperts, dataType=dataType, noOfDays=1).iterator())
                 Utilities.writeAsJsonToFile(data, Settings.stats_to_compare_collocations)
         currentDay+=timedelta(days=1)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:18,代码来源:experiments.py

示例13: generateStatsForDiminishingAUCM

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
 def generateStatsForDiminishingAUCM():
     currentDay = datetime(2011, 3, 26)
     for i in range(5):
         print currentDay
         try:
             testDay = currentDay+timedelta(days=1)
             noOfDays = [1, 4, 8]
             for daysInFuture in range(1, 20):
                 for noOfDay in noOfDays:
                         classifier = FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDay)
                         classifier.load()
                         data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'test_day': datetime.strftime(testDay, Settings.twitter_api_time_format), 'classifier_length': noOfDay, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExperts, 'data_type': DocumentType.typeRuuslUnigram, 'test_data_days': 1, 'no_of_days_in_future': daysInFuture}
                         data['value'] = classifier.getAUCM(TestDocuments(currentTime=testDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=1).iterator())
                         Utilities.writeAsJsonToFile(data, Settings.stats_for_diminishing_aucm)
                 testDay+=timedelta(days=1)
         except: pass
         currentDay+=timedelta(days=1)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:19,代码来源:experiments.py

示例14: generateRawDataForIntermediateSetOfUsers

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
 def generateRawDataForIntermediateSetOfUsers():
     currentTime = Settings.startTime
     allExpertsTop = ExpertUsers(Settings.numberOfExperts)
     allExpertsIntermediate = ExpertUsers(Settings.numberOfExpertsSecondSet)
     allExpertsList = {}
     for k, v in allExpertsTop.list.iteritems():
         del allExpertsIntermediate.list[k]
     for k, v in allExpertsIntermediate.list.iteritems():
         allExpertsList[k] = v
     while currentTime <= Settings.endTime:
         for numberOfExperts in [Settings.numberOfExpertsSecondSet]:
             trainingFile = Utilities.getTrainingFile(currentTime, DocumentType.typeRaw, numberOfExperts)
             Utilities.createDirectory(trainingFile)
             print numberOfExperts, Settings.twitterUsersTweetsFolder + "%s.gz" % Utilities.getDataFile(currentTime)
             for tweet in CreateTrainingAndTestSets.getTweetsFromExperts(
                 allExpertsList, Settings.twitterUsersTweetsFolder + "%s.gz" % Utilities.getDataFile(currentTime)
             ):
                 tweet["class"] = allExpertsList[tweet["user"]["id_str"]]["class"]
                 Utilities.writeAsJsonToFile(tweet, trainingFile)
         currentTime += timedelta(days=1)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:22,代码来源:datasets.py

示例15: generateStatsForStreamClassifier

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
    def generateStatsForStreamClassifier():
        '''
        Total documents: 117731
        '''
        streamClassifiers = [StreamClassifierFeatureScoreDecay, StreamClassifierFeatureScoreDecayWithInverseClassFrequency, StreamClassifierNaiveBayesWithLaplaceSmoothing]
        numberOfExpertsList = [Settings.numberOfExperts]
        noOfDaysList = [25]
#        score_thresholds = [0.2*i for i in range(5)]
        for classifier in streamClassifiers:
            for numberOfExperts in numberOfExpertsList:
                for noOfDays in noOfDaysList:
#                    for score_threshold in score_thresholds:
#                    Settings.stream_classifier_class_probability_threshold = score_threshold
                    streamClassifier = classifier(decayRate=Settings.stream_classifier_decay_rate, currentTime=Settings.startTime, dataType=DocumentType.typeRuuslUnigram, numberOfExperts=numberOfExperts, noOfDays=noOfDays)
                    streamClassifier.classifyingMethod = streamClassifier.classifyForAUCM
#                    data = {'classifier_type':streamClassifier.type, 'stream_length_in_days':noOfDays, 'number_of_experts': numberOfExperts, 'metric':'aucm', 'score_threshold':Settings.stream_classifier_class_probability_threshold}
                    data = {'classifier_type':streamClassifier.type, 'stream_length_in_days':noOfDays, 'number_of_experts': numberOfExperts, 'metric':'aucm'}
                    streamClassifier.start()
                    data['number_of_documents_classified'] = len(streamClassifier.classifiedDocuments)
                    data['value']=streamClassifier.getAUCM()
                    Utilities.writeAsJsonToFile(data, Settings.stats_for_stream_classifier_comparisons)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:23,代码来源:experiments.py


注:本文中的utilities.Utilities.writeAsJsonToFile方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。