本文整理汇总了Python中Utils.Utils.loadFromFiles方法的典型用法代码示例。如果您正苦于以下问题:Python Utils.loadFromFiles方法的具体用法?Python Utils.loadFromFiles怎么用?Python Utils.loadFromFiles使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Utils.Utils
的用法示例。
在下文中一共展示了Utils.loadFromFiles方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Reduce
# 需要导入模块: from Utils import Utils [as 别名]
# 或者: from Utils.Utils import loadFromFiles [as 别名]
def Reduce(self, partitionFiles):
'\n Method to call the user Reduce function on unique <key, list(values)>\n '
countReduceCalls = 0
countSkippedRecords = 0
result = list()
listKeyValue = Utils.loadFromFiles(partitionFiles, 'rb')
partitionedData = defaultdict(list)
for (key, value) in listKeyValue:
partitionedData[key].append(value)
if self.orderingGaurantee:
listKeys = sorted(partitionedData.keys())
else:
listKeys = partitionedData.keys()
for key in listKeys:
try:
result.append(
self.reduceFunc((key, partitionedData[key])))
countReduceCalls+=1
except:
if self.skipBadRecords:
countSkippedRecords+=1
else:
raise
return (result, countReduceCalls, countSkippedRecords)
示例2: main
# 需要导入模块: from Utils import Utils [as 别名]
# 或者: from Utils.Utils import loadFromFiles [as 别名]
def main(self):
'\n Main method invoked when process starts, represents the main flow of execution\n '
self.myPrint('***** Master *****')
self.myPrint('Input Splits: %s ' % (str([inputSplit.inputData for inputSplit in self.inputSplits])))
startTime = time.time()
self.initialSplitsAssignmentToWorkers(self.inputSplits, self.mapWorkers)
pingFreq = int(self.workerTimeout / (3))
lastPingedTime = time.time()
while any(((not inputSplit.isProcessedBy) for inputSplit in self.inputSplits)):
self._label_('ReceiveResults')
currentTime = time.time()
if (int(currentTime - (lastPingedTime)) >= pingFreq):
self._label_('pingAndCheckWorkers')
self.pingAndCheckWorkers(self.mapWorkers, currentTime)
lastPingedTime = currentTime
if (not all((inputSplit.isProcessedBy for inputSplit in self.inputSplits))):
raise
RuntimeError('all inputSplits are not processed')
self.send(('ExitCommand',),
self.mapWorkers.keys())
mapOutputFiles = list()
for mapWorker in self.mapWorkers.keys():
mapOutputFiles.extend(
self.mapWorkers[mapWorker].outputFiles.values())
self.myPrint('Map output files: %s' % (mapOutputFiles))
self.myPrint('********* Map step completed *********')
listKeyValue = Utils.loadFromFiles(mapOutputFiles, 'rb')
if self.debugFlag:
totalMapValues = sum((v for (k, v) in listKeyValue))
self.myPrint(totalMapValues)
self.myPrint(listKeyValue)
partitionOuputFiles = defaultdict(list)
for mapWorker in self.mapWorkers.keys():
for partitionKey in self.mapWorkers[mapWorker].outputFiles.keys():
partitionOuputFiles[partitionKey].append(self.mapWorkers[mapWorker].outputFiles[partitionKey])
self.myPrint('Grouped partition files: %s' % (partitionOuputFiles))
for key in partitionOuputFiles.keys():
self.partitionSplits.append(
InputMetaData(partitionOuputFiles[key]))
self.initialSplitsAssignmentToWorkers(self.partitionSplits, self.reduceWorkers)
pingFreq = int(self.workerTimeout / (3))
lastPingedTime = time.time()
while any(((not partitionSplit.isProcessedBy) for partitionSplit in self.partitionSplits)):
self._label_('ReceiveResults')
currentTime = time.time()
if (int(currentTime - (lastPingedTime)) >= pingFreq):
self._label_('pingAndCheckWorkers')
self.pingAndCheckWorkers(self.reduceWorkers, currentTime)
lastPingedTime = currentTime
self.send(('ExitCommand',),
self.reduceWorkers.keys())
reduceOutputFiles = list()
for reduceWorker in self.reduceWorkers.keys():
reduceOutputFiles.extend(self.reduceWorkers[reduceWorker].outputFiles)
self.myPrint('Reduce output files: %s' % (reduceOutputFiles))
self.myPrint('********* Reduce step completed *********')
listKeyValue = Utils.loadFromFiles(reduceOutputFiles, 'rb')
if self.debugFlag:
totalReduceValues = sum((v for (k, v) in listKeyValue))
self.myPrint(totalReduceValues)
self.myPrint(listKeyValue)
if self.debugFlag:
if (totalMapValues != totalReduceValues):
raise
RuntimeError('outputs of Map and Reduce steps do not match')
endTime = time.time()
self.getStats()
self._label_('exit')
self.myPrint('Master Exiting')
print('\n')
print('No of Mappers: %d' % (len(self.mapWorkers)))
print('No of Reducers: %d' % (len(self.reduceWorkers)))
print('No of Map tasks: %d' % (self.noOfMapTasks()))
print('No of Reduce tasks: %d' % (self.noOfReduceTasks()))
print('No of failed Map workers: %d' % (self.noOfFailedMapWorkers()))
print('No of failed Reduce workers: %d' % (self.noOfFailedReduceWorkers()))
print('No of Map function calls: %d' % (self.noOfUserMapCalls()))
print('No of Reduce function calls: %d' % (self.noOfUserReduceCalls()))
print('Total time elapsed: %.2fsec' % (endTime - (startTime)))