本文整理汇总了Python中utilities.Utilities.writeAsJsonToFile方法的典型用法代码示例。如果您正苦于以下问题:Python Utilities.writeAsJsonToFile方法的具体用法?Python Utilities.writeAsJsonToFile怎么用?Python Utilities.writeAsJsonToFile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类utilities.Utilities
的用法示例。
在下文中一共展示了Utilities.writeAsJsonToFile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: generateRawDataForGivenSetOfUsers
# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
def generateRawDataForGivenSetOfUsers(numberOfUsersList):
for numberOfUsers in numberOfUsersList:
currentTime = Settings.startTime
allExpertsTop, allExpertsBottom = (
ExpertUsers(number=numberOfUsers),
ExpertUsers(number=numberOfUsers, type=ExpertUsers.typeBottom),
)
allExpertsList = {}
for k, v in allExpertsTop.list.iteritems():
allExpertsList[k] = v
for k, v in allExpertsBottom.list.iteritems():
allExpertsList[k] = v
while currentTime <= Settings.endTime:
trainingFile = Utilities.getTrainingFile(currentTime, DocumentType.typeRaw, numberOfUsers)
testFile = Utilities.getTestFile(currentTime, DocumentType.typeRaw, numberOfUsers, bottom=True)
Utilities.createDirectory(trainingFile), Utilities.createDirectory(testFile)
print numberOfUsers, Settings.twitterUsersTweetsFolder + "%s.gz" % Utilities.getDataFile(currentTime)
for tweet in CreateTrainingAndTestSets.getTweetsFromExperts(
allExpertsList, Settings.twitterUsersTweetsFolder + "%s.gz" % Utilities.getDataFile(currentTime)
):
tweet["class"] = allExpertsList[tweet["user"]["id_str"]]["class"]
if tweet["user"]["id_str"] in allExpertsTop.list:
Utilities.writeAsJsonToFile(tweet, trainingFile)
else:
Utilities.writeAsJsonToFile(tweet, testFile)
currentTime += timedelta(days=1)
示例2: generateStatsForGlobalClassifier
# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
def generateStatsForGlobalClassifier():
classifier = GlobalClassifier()
classifier.load()
currentDay = Settings.startTime
while currentDay<=Settings.endTime:
data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'metric': 'aucm', 'data_type': DocumentType.typeRuuslUnigram, 'test_data_days': 1}
data['value'] = classifier.getAUCM(TestDocuments(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=1).iterator())
Utilities.writeAsJsonToFile(data, Settings.stats_for_global_classifier)
currentDay+=timedelta(days=1)
示例3: generateStatsForTrainingDataPerDay
# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
def generateStatsForTrainingDataPerDay():
currentDay = Settings.startTime
noOfDays = 1
while currentDay<=Settings.endTime:
classDistribution = defaultdict(int)
for d in Utilities.getTweets(fileNameMethod=Utilities.getTrainingFile, dataDirection=DataDirection.past, currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDays):
classDistribution[d[1]]+=1
data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'class_distribution': classDistribution}
Utilities.writeAsJsonToFile(data, Settings.stats_for_training_data)
currentDay+=timedelta(days=1)
示例4: generateStatsToDetermineFixedWindowLength
# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
def generateStatsToDetermineFixedWindowLength():
global maxLength
currentDay = Settings.startTime
while currentDay<=Settings.endTime:
for noOfDays in Utilities.getClassifierLengthsByDay(currentDay, maxLength):
classifier = FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDays)
classifier.load()
data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExperts, 'data_type': DocumentType.typeRuuslUnigram, 'test_data_days': 1}
data['value'] = classifier.getAUCM(TestDocuments(currentTime=currentDay+timedelta(days=1), numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=1).iterator())
Utilities.writeAsJsonToFile(data, Settings.stats_to_determine_fixed_window_length)
currentDay+=timedelta(days=1)
示例5: generateStatsForTopFeatures
# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
def generateStatsForTopFeatures():
global maxLength
currentDay = Settings.startTime
noOfDays = 1
while currentDay<=Settings.endTime:
classifier = FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDays)
classifier.load()
data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'number_of_experts': Settings.numberOfExperts, 'data_type': DocumentType.typeRuuslUnigram}
data['features']=classifier.showMostInformativeFeatures(2000)
Utilities.writeAsJsonToFile(data, Settings.stats_for_most_informative_features)
currentDay+=timedelta(days=1)
示例6: generateStatsObservePerformanceByRelabelingDocuments
# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
def generateStatsObservePerformanceByRelabelingDocuments():
global maxLength, idealModelLength
currentDay = Settings.startTime
while currentDay<=Settings.endTime:
noOfDaysList = list(set([idealModelLength]).intersection(set(Utilities.getClassifierLengthsByDay(currentDay, maxLength))))
for noOfDays in noOfDaysList:
classifier = FixedWindowWithRelabeledDocumentsClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDays)
classifier.load()
data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExperts, 'data_type': DocumentType.typeRuuslUnigram, 'test_data_days': 1}
data['value'] = classifier.getAUCM(TestDocuments(currentTime=currentDay+timedelta(days=1), numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=1).iterator())
Utilities.writeAsJsonToFile(data, Settings.stats_to_observe_performance_by_relabeling_documents)
currentDay+=timedelta(days=1)
示例7: generateDataSetStats125
# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
def generateDataSetStats125():
currentDay = Settings.startTime
while currentDay<=Settings.endTime:
data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'train_classes': defaultdict(int), 'test_classes': defaultdict(int)}
inputTrainingSetFile = Utilities.getTrainingFile(currentDay, DocumentType.typeRuuslUnigram, Settings.numberOfExperts)
inputTestSetFile = Utilities.getTestFile(currentDay, DocumentType.typeRuuslUnigram, Settings.numberOfExperts, bottom=True)
for file, tweetType in [(inputTrainingSetFile, 'training'), (inputTestSetFile, 'test')]:
for tweet in Utilities.iterateTweetsFromFile(file):
if tweetType=='training': data['train_classes'][tweet['class']]+=1
else: data['test_classes'][tweet['class']]+=1
Utilities.writeAsJsonToFile(data, Settings.stats_for_dataset_125)
currentDay+=timedelta(days=1)
示例8: generate
# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
def generate(self):
for inputFile, outputFile in [
(self.inputTrainingSetFile, self.outputTrainingSetFile),
(self.inputTestSetFile, self.outputTestSetFile),
]:
for tweet in Utilities.iterateTweetsFromFile(inputFile):
data = {}
for k in DocumentType.keys:
data[k] = tweet[k]
data["screen_name"] = tweet["screen_name"]
data["user_id"] = tweet["user_id"]
data["document"] = tweet["document"] + DocumentTypeRuuslUnigramWithMeta.getUrlMeta(data["text"])
Utilities.writeAsJsonToFile(data, outputFile)
示例9: generateDataForGlobalClassifier
# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
def generateDataForGlobalClassifier():
inputDataFile = "/home/kykamath/projects/Classifiers/src/lda_svm/global_classifier/data/global_classifier"
classToIntMap = {"sports": 1, "politics": 2, "entertainment": 3, "technology": 4}
for line in open(inputDataFile):
try:
classType, term = line.strip().split()
stringClassType = Utilities.getTopicForIndex(classType)
if stringClassType in classToIntMap:
Utilities.writeAsJsonToFile(
{"class": stringClassType, "data": [term]}, Settings.globalClassifierData
)
except:
pass
示例10: generateDataSetStats
# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
def generateDataSetStats():
currentDay = Settings.startTime
expertUsers = ExpertUsers()
allExpertsList={}
for k, v in expertUsers.list.iteritems(): allExpertsList[k]=v
while currentDay<=Settings.endTime:
data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classes': defaultdict(int), 'total_tweets': 0}
for tweet in CreateTrainingAndTestSets.getTweetsFromExperts(allExpertsList, Settings.twitterUsersTweetsFolder+'%s.gz'%Utilities.getDataFile(currentDay)):
if tweet['user']['id_str'] in expertUsers.list:
classType = allExpertsList[tweet['user']['id_str']]['class']
data['classes'][classType]+=1
data['total_tweets']+=1
Utilities.writeAsJsonToFile(data, Settings.stats_for_dataset)
currentDay+=timedelta(days=1)
示例11: generateStatsToCompareDifferentDocumentTypes
# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
def generateStatsToCompareDifferentDocumentTypes():
global maxLength, idealModelLength
dataTypes = [DocumentType.typeRuuslUnigram, DocumentType.typeCharBigram, DocumentType.typeCharTrigram, DocumentType.typeRuuslBigram, DocumentType.typeRuuslTrigram, DocumentType.typeRuuslSparseBigram,
DocumentType.typeRuuslUnigramNouns, DocumentType.typeRuuslUnigramWithMeta, DocumentType.typeRuuslUnigramNounsWithMeta]
currentDay = Settings.startTime
while currentDay<=Settings.endTime:
noOfDaysList = list(set([idealModelLength]).intersection(set(Utilities.getClassifierLengthsByDay(currentDay, maxLength))))
for noOfDays in noOfDaysList:
for dataType in dataTypes:
print currentDay, noOfDays, dataType
classifier = FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=dataType, noOfDays=noOfDays)
classifier.load()
data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExperts, 'data_type': dataType, 'test_data_days': 1}
data['value'] = classifier.getAUCM(TestDocuments(currentTime=currentDay+timedelta(days=1), numberOfExperts=Settings.numberOfExperts, dataType=dataType, noOfDays=1).iterator())
Utilities.writeAsJsonToFile(data, Settings.stats_to_compare_different_document_types)
currentDay+=timedelta(days=1)
示例12: generateStatsToCompareCollocations
# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
def generateStatsToCompareCollocations():
global maxLength, idealModelLength
dataType = DocumentType.typeRuuslUnigram
collocationMeasures = [Collocations.measureTypeChiSquare, Collocations.measureTypeLikelihoodRatio]
currentDay = Settings.startTime
while currentDay<=Settings.endTime:
noOfDaysList = list(set([idealModelLength]).intersection(set(Utilities.getClassifierLengthsByDay(currentDay, maxLength))))
print currentDay, noOfDaysList
for noOfDays in noOfDaysList:
for collocationMeasure in collocationMeasures:
classifier = FixedWindowWithCollocationsClassifier(collocationMeasure=collocationMeasure, currentTime=currentDay, numberOfExperts=Settings.numberOfExpertsSecondSet, dataType=dataType, noOfDays=noOfDays)
classifier.load()
data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExpertsSecondSet, 'data_type': dataType, 'collocation_measure': collocationMeasure, 'test_data_days': 1}
data['value'] = classifier.getAUCM(TestDocumentsWithCollocations(collocationMeasure, currentTime=currentDay+timedelta(days=1), numberOfExperts=Settings.numberOfExperts, dataType=dataType, noOfDays=1).iterator())
Utilities.writeAsJsonToFile(data, Settings.stats_to_compare_collocations)
currentDay+=timedelta(days=1)
示例13: generateStatsForDiminishingAUCM
# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
def generateStatsForDiminishingAUCM():
currentDay = datetime(2011, 3, 26)
for i in range(5):
print currentDay
try:
testDay = currentDay+timedelta(days=1)
noOfDays = [1, 4, 8]
for daysInFuture in range(1, 20):
for noOfDay in noOfDays:
classifier = FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDay)
classifier.load()
data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'test_day': datetime.strftime(testDay, Settings.twitter_api_time_format), 'classifier_length': noOfDay, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExperts, 'data_type': DocumentType.typeRuuslUnigram, 'test_data_days': 1, 'no_of_days_in_future': daysInFuture}
data['value'] = classifier.getAUCM(TestDocuments(currentTime=testDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=1).iterator())
Utilities.writeAsJsonToFile(data, Settings.stats_for_diminishing_aucm)
testDay+=timedelta(days=1)
except: pass
currentDay+=timedelta(days=1)
示例14: generateRawDataForIntermediateSetOfUsers
# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
def generateRawDataForIntermediateSetOfUsers():
currentTime = Settings.startTime
allExpertsTop = ExpertUsers(Settings.numberOfExperts)
allExpertsIntermediate = ExpertUsers(Settings.numberOfExpertsSecondSet)
allExpertsList = {}
for k, v in allExpertsTop.list.iteritems():
del allExpertsIntermediate.list[k]
for k, v in allExpertsIntermediate.list.iteritems():
allExpertsList[k] = v
while currentTime <= Settings.endTime:
for numberOfExperts in [Settings.numberOfExpertsSecondSet]:
trainingFile = Utilities.getTrainingFile(currentTime, DocumentType.typeRaw, numberOfExperts)
Utilities.createDirectory(trainingFile)
print numberOfExperts, Settings.twitterUsersTweetsFolder + "%s.gz" % Utilities.getDataFile(currentTime)
for tweet in CreateTrainingAndTestSets.getTweetsFromExperts(
allExpertsList, Settings.twitterUsersTweetsFolder + "%s.gz" % Utilities.getDataFile(currentTime)
):
tweet["class"] = allExpertsList[tweet["user"]["id_str"]]["class"]
Utilities.writeAsJsonToFile(tweet, trainingFile)
currentTime += timedelta(days=1)
示例15: generateStatsForStreamClassifier
# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import writeAsJsonToFile [as 别名]
def generateStatsForStreamClassifier():
'''
Total documents: 117731
'''
streamClassifiers = [StreamClassifierFeatureScoreDecay, StreamClassifierFeatureScoreDecayWithInverseClassFrequency, StreamClassifierNaiveBayesWithLaplaceSmoothing]
numberOfExpertsList = [Settings.numberOfExperts]
noOfDaysList = [25]
# score_thresholds = [0.2*i for i in range(5)]
for classifier in streamClassifiers:
for numberOfExperts in numberOfExpertsList:
for noOfDays in noOfDaysList:
# for score_threshold in score_thresholds:
# Settings.stream_classifier_class_probability_threshold = score_threshold
streamClassifier = classifier(decayRate=Settings.stream_classifier_decay_rate, currentTime=Settings.startTime, dataType=DocumentType.typeRuuslUnigram, numberOfExperts=numberOfExperts, noOfDays=noOfDays)
streamClassifier.classifyingMethod = streamClassifier.classifyForAUCM
# data = {'classifier_type':streamClassifier.type, 'stream_length_in_days':noOfDays, 'number_of_experts': numberOfExperts, 'metric':'aucm', 'score_threshold':Settings.stream_classifier_class_probability_threshold}
data = {'classifier_type':streamClassifier.type, 'stream_length_in_days':noOfDays, 'number_of_experts': numberOfExperts, 'metric':'aucm'}
streamClassifier.start()
data['number_of_documents_classified'] = len(streamClassifier.classifiedDocuments)
data['value']=streamClassifier.getAUCM()
Utilities.writeAsJsonToFile(data, Settings.stats_for_stream_classifier_comparisons)