当前位置: 首页>>代码示例>>Python>>正文


Python Utilities.getTrainingFile方法代码示例

本文整理汇总了Python中utilities.Utilities.getTrainingFile方法的典型用法代码示例。如果您正苦于以下问题:Python Utilities.getTrainingFile方法的具体用法?Python Utilities.getTrainingFile怎么用?Python Utilities.getTrainingFile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在utilities.Utilities的用法示例。


在下文中一共展示了Utilities.getTrainingFile方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import getTrainingFile [as 别名]
 def __init__(self, currentTime, outputDataType, numberOfExperts):
     self.currentTime = currentTime
     self.numberOfExperts = numberOfExperts
     self.inputTrainingSetFile = Utilities.getTrainingFile(currentTime, DocumentType.typeRaw, self.numberOfExperts)
     self.inputTestSetFile = Utilities.getTestFile(
         currentTime, DocumentType.typeRaw, self.numberOfExperts, bottom=True
     )
     self.outputTrainingSetFile = Utilities.getTrainingFile(currentTime, outputDataType, self.numberOfExperts)
     self.outputTestSetFile = Utilities.getTestFile(currentTime, outputDataType, self.numberOfExperts, bottom=True)
     Utilities.createDirectory(self.outputTrainingSetFile), Utilities.createDirectory(self.outputTestSetFile)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:12,代码来源:datasets.py

示例2: generateRawDataForGivenSetOfUsers

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import getTrainingFile [as 别名]
 def generateRawDataForGivenSetOfUsers(numberOfUsersList):
     for numberOfUsers in numberOfUsersList:
         currentTime = Settings.startTime
         allExpertsTop, allExpertsBottom = (
             ExpertUsers(number=numberOfUsers),
             ExpertUsers(number=numberOfUsers, type=ExpertUsers.typeBottom),
         )
         allExpertsList = {}
         for k, v in allExpertsTop.list.iteritems():
             allExpertsList[k] = v
         for k, v in allExpertsBottom.list.iteritems():
             allExpertsList[k] = v
         while currentTime <= Settings.endTime:
             trainingFile = Utilities.getTrainingFile(currentTime, DocumentType.typeRaw, numberOfUsers)
             testFile = Utilities.getTestFile(currentTime, DocumentType.typeRaw, numberOfUsers, bottom=True)
             Utilities.createDirectory(trainingFile), Utilities.createDirectory(testFile)
             print numberOfUsers, Settings.twitterUsersTweetsFolder + "%s.gz" % Utilities.getDataFile(currentTime)
             for tweet in CreateTrainingAndTestSets.getTweetsFromExperts(
                 allExpertsList, Settings.twitterUsersTweetsFolder + "%s.gz" % Utilities.getDataFile(currentTime)
             ):
                 tweet["class"] = allExpertsList[tweet["user"]["id_str"]]["class"]
                 if tweet["user"]["id_str"] in allExpertsTop.list:
                     Utilities.writeAsJsonToFile(tweet, trainingFile)
                 else:
                     Utilities.writeAsJsonToFile(tweet, testFile)
             currentTime += timedelta(days=1)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:28,代码来源:datasets.py

示例3: generateDataSetStats125

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import getTrainingFile [as 别名]
 def generateDataSetStats125():
     currentDay = Settings.startTime
     while currentDay<=Settings.endTime:
         data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'train_classes': defaultdict(int), 'test_classes': defaultdict(int)}
         inputTrainingSetFile = Utilities.getTrainingFile(currentDay, DocumentType.typeRuuslUnigram, Settings.numberOfExperts)
         inputTestSetFile = Utilities.getTestFile(currentDay, DocumentType.typeRuuslUnigram, Settings.numberOfExperts, bottom=True)
         for file, tweetType in [(inputTrainingSetFile, 'training'), (inputTestSetFile, 'test')]:
             for tweet in Utilities.iterateTweetsFromFile(file):
                 if tweetType=='training': data['train_classes'][tweet['class']]+=1
                 else: data['test_classes'][tweet['class']]+=1
         Utilities.writeAsJsonToFile(data, Settings.stats_for_dataset_125)
         currentDay+=timedelta(days=1)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:14,代码来源:experiments.py

示例4: generateRawDataForIntermediateSetOfUsers

# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import getTrainingFile [as 别名]
 def generateRawDataForIntermediateSetOfUsers():
     currentTime = Settings.startTime
     allExpertsTop = ExpertUsers(Settings.numberOfExperts)
     allExpertsIntermediate = ExpertUsers(Settings.numberOfExpertsSecondSet)
     allExpertsList = {}
     for k, v in allExpertsTop.list.iteritems():
         del allExpertsIntermediate.list[k]
     for k, v in allExpertsIntermediate.list.iteritems():
         allExpertsList[k] = v
     while currentTime <= Settings.endTime:
         for numberOfExperts in [Settings.numberOfExpertsSecondSet]:
             trainingFile = Utilities.getTrainingFile(currentTime, DocumentType.typeRaw, numberOfExperts)
             Utilities.createDirectory(trainingFile)
             print numberOfExperts, Settings.twitterUsersTweetsFolder + "%s.gz" % Utilities.getDataFile(currentTime)
             for tweet in CreateTrainingAndTestSets.getTweetsFromExperts(
                 allExpertsList, Settings.twitterUsersTweetsFolder + "%s.gz" % Utilities.getDataFile(currentTime)
             ):
                 tweet["class"] = allExpertsList[tweet["user"]["id_str"]]["class"]
                 Utilities.writeAsJsonToFile(tweet, trainingFile)
         currentTime += timedelta(days=1)
开发者ID:kykamath,项目名称:twitter_classifier,代码行数:22,代码来源:datasets.py


注:本文中的utilities.Utilities.getTrainingFile方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。