当前位置: 首页>>代码示例>>Python>>正文


Python Utils.loadFromFiles方法代码示例

本文整理汇总了Python中Utils.Utils.loadFromFiles方法的典型用法代码示例。如果您正苦于以下问题:Python Utils.loadFromFiles方法的具体用法?Python Utils.loadFromFiles怎么用?Python Utils.loadFromFiles使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Utils.Utils的用法示例。


在下文中一共展示了Utils.loadFromFiles方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: Reduce

# 需要导入模块: from Utils import Utils [as 别名]
# 或者: from Utils.Utils import loadFromFiles [as 别名]
 def Reduce(self, partitionFiles):
     '\n        Method to call the user Reduce function on unique <key, list(values)>\n        '
     countReduceCalls = 0
     countSkippedRecords = 0
     result = list()
     listKeyValue = Utils.loadFromFiles(partitionFiles, 'rb')
     partitionedData = defaultdict(list)
     for (key, value) in listKeyValue:
         partitionedData[key].append(value)
     if self.orderingGaurantee:
         listKeys = sorted(partitionedData.keys())
     else:
         listKeys = partitionedData.keys()
     for key in listKeys:
         try:
             result.append(
             self.reduceFunc((key, partitionedData[key])))
             countReduceCalls+=1
         except:
             if self.skipBadRecords:
                 countSkippedRecords+=1
             else:
                 raise 
     return (result, countReduceCalls, countSkippedRecords)
开发者ID:samishchandra,项目名称:MapReduce,代码行数:26,代码来源:MapReduce.py

示例2: main

# 需要导入模块: from Utils import Utils [as 别名]
# 或者: from Utils.Utils import loadFromFiles [as 别名]
 def main(self):
     '\n        Main method invoked when process starts, represents the main flow of execution\n        '
     self.myPrint('***** Master *****')
     self.myPrint('Input Splits: %s ' % (str([inputSplit.inputData for inputSplit in self.inputSplits])))
     startTime = time.time()
     self.initialSplitsAssignmentToWorkers(self.inputSplits, self.mapWorkers)
     pingFreq = int(self.workerTimeout / (3))
     lastPingedTime = time.time()
     while any(((not inputSplit.isProcessedBy) for inputSplit in self.inputSplits)):
         self._label_('ReceiveResults')
         currentTime = time.time()
         if (int(currentTime - (lastPingedTime)) >= pingFreq):
             self._label_('pingAndCheckWorkers')
             self.pingAndCheckWorkers(self.mapWorkers, currentTime)
             lastPingedTime = currentTime
     if (not all((inputSplit.isProcessedBy for inputSplit in self.inputSplits))):
         raise 
         RuntimeError('all inputSplits are not processed')
     self.send(('ExitCommand',), 
     self.mapWorkers.keys())
     mapOutputFiles = list()
     for mapWorker in self.mapWorkers.keys():
         mapOutputFiles.extend(
         self.mapWorkers[mapWorker].outputFiles.values())
     self.myPrint('Map output files: %s' % (mapOutputFiles))
     self.myPrint('********* Map step completed *********')
     listKeyValue = Utils.loadFromFiles(mapOutputFiles, 'rb')
     if self.debugFlag:
         totalMapValues = sum((v for (k, v) in listKeyValue))
         self.myPrint(totalMapValues)
     self.myPrint(listKeyValue)
     partitionOuputFiles = defaultdict(list)
     for mapWorker in self.mapWorkers.keys():
         for partitionKey in self.mapWorkers[mapWorker].outputFiles.keys():
             partitionOuputFiles[partitionKey].append(self.mapWorkers[mapWorker].outputFiles[partitionKey])
     self.myPrint('Grouped partition files: %s' % (partitionOuputFiles))
     for key in partitionOuputFiles.keys():
         self.partitionSplits.append(
         InputMetaData(partitionOuputFiles[key]))
     self.initialSplitsAssignmentToWorkers(self.partitionSplits, self.reduceWorkers)
     pingFreq = int(self.workerTimeout / (3))
     lastPingedTime = time.time()
     while any(((not partitionSplit.isProcessedBy) for partitionSplit in self.partitionSplits)):
         self._label_('ReceiveResults')
         currentTime = time.time()
         if (int(currentTime - (lastPingedTime)) >= pingFreq):
             self._label_('pingAndCheckWorkers')
             self.pingAndCheckWorkers(self.reduceWorkers, currentTime)
             lastPingedTime = currentTime
     self.send(('ExitCommand',), 
     self.reduceWorkers.keys())
     reduceOutputFiles = list()
     for reduceWorker in self.reduceWorkers.keys():
         reduceOutputFiles.extend(self.reduceWorkers[reduceWorker].outputFiles)
     self.myPrint('Reduce output files: %s' % (reduceOutputFiles))
     self.myPrint('********* Reduce step completed *********')
     listKeyValue = Utils.loadFromFiles(reduceOutputFiles, 'rb')
     if self.debugFlag:
         totalReduceValues = sum((v for (k, v) in listKeyValue))
         self.myPrint(totalReduceValues)
     self.myPrint(listKeyValue)
     if self.debugFlag:
         if (totalMapValues != totalReduceValues):
             raise 
             RuntimeError('outputs of Map and Reduce steps do not match')
     endTime = time.time()
     self.getStats()
     self._label_('exit')
     self.myPrint('Master Exiting')
     print('\n')
     print('No of Mappers: %d' % (len(self.mapWorkers)))
     print('No of Reducers: %d' % (len(self.reduceWorkers)))
     print('No of Map tasks: %d' % (self.noOfMapTasks()))
     print('No of Reduce tasks: %d' % (self.noOfReduceTasks()))
     print('No of failed Map workers: %d' % (self.noOfFailedMapWorkers()))
     print('No of failed Reduce workers: %d' % (self.noOfFailedReduceWorkers()))
     print('No of Map function calls: %d' % (self.noOfUserMapCalls()))
     print('No of Reduce function calls: %d' % (self.noOfUserReduceCalls()))
     print('Total time elapsed: %.2fsec' % (endTime - (startTime)))
开发者ID:samishchandra,项目名称:MapReduce,代码行数:81,代码来源:MapReduce.py


注:本文中的Utils.Utils.loadFromFiles方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。