本文整理汇总了Python中dpark.accumulator.Accumulator类的典型用法代码示例。如果您正苦于以下问题:Python Accumulator类的具体用法?Python Accumulator怎么用?Python Accumulator使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Accumulator类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run_task
def run_task(task_data):
try:
gc.disable()
task, ntry = cPickle.loads(decompress(task_data))
setproctitle('dpark worker %s: run task %s' % (Script, task))
Accumulator.clear()
result = task.run(ntry)
accUpdate = Accumulator.values()
if marshalable(result):
flag, data = 0, marshal.dumps(result)
else:
flag, data = 1, cPickle.dumps(result, -1)
data = compress(data)
if len(data) > TASK_RESULT_LIMIT:
path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data))
f = open(path, 'w')
f.write(data)
f.close()
data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:])
flag += 2
return mesos_pb2.TASK_FINISHED, cPickle.dumps((Success(), (flag, data), accUpdate), -1)
except FetchFailed, e:
return mesos_pb2.TASK_FAILED, cPickle.dumps((e, None, None), -1)
示例2: run_task
def run_task(task, aid):
try:
setproctitle('dpark worker %s: run task %s' % (Script, task))
Accumulator.clear()
result = task.run(aid)
accUpdate = Accumulator.values()
try:
flag, data = 0, marshal.dumps(result)
except ValueError:
flag, data = 1, cPickle.dumps(result)
if len(data) > TASK_RESULT_LIMIT and env.dfs:
workdir = env.get('WORKDIR')
path = os.path.join(workdir, str(task.id)+'.result')
with open(path, 'w') as f:
f.write(data)
data = path
flag += 2
setproctitle('dpark worker: idle')
return mesos_pb2.TASK_FINISHED, cPickle.dumps((task.id, Success(), (flag, data), accUpdate), -1)
except Exception, e:
import traceback
msg = traceback.format_exc()
setproctitle('dpark worker: idle')
return mesos_pb2.TASK_FAILED, cPickle.dumps((task.id, OtherFailure(msg), None, None), -1)
示例3: run_task
def run_task(task_data):
try:
gc.disable()
task, ntry = cPickle.loads(decompress(task_data))
setproctitle('dpark worker %s: run task %s' % (Script, task))
Accumulator.clear()
result = task.run(ntry)
accUpdate = Accumulator.values()
if marshalable(result):
flag, data = 0, marshal.dumps(result)
else:
flag, data = 1, cPickle.dumps(result, -1)
data = compress(data)
if len(data) > TASK_RESULT_LIMIT:
workdir = env.get('WORKDIR')
name = 'task_%s_%s.result' % (task.id, ntry)
path = os.path.join(workdir, name)
f = open(path, 'w')
f.write(data)
f.close()
data = LocalFileShuffle.getServerUri() + '/' + name
flag += 2
return mesos_pb2.TASK_FINISHED, cPickle.dumps((task.id, Success(), (flag, data), accUpdate), -1)
except Exception, e:
import traceback
msg = traceback.format_exc()
return mesos_pb2.TASK_FAILED, cPickle.dumps((task.id, OtherFailure(msg), None, None), -1)
示例4: run_task
def run_task(task, aid):
logger.debug("Running task %r", task)
try:
Accumulator.clear()
result = task.run(aid)
accumUpdates = Accumulator.values()
return (task.id, Success(), result, accumUpdates)
except Exception, e:
logger.error("error in task %s", task)
import traceback
traceback.print_exc()
return (task.id, OtherFailure("exception:" + str(e)), None, None)
示例5: run_task
def run_task(task, aid):
logger.debug('Running task %r', task)
try:
Accumulator.clear()
result = task.run(aid)
accumUpdates = Accumulator.values()
MutableDict.flush()
return (task.id, Success(), result, accumUpdates)
except Exception as e:
logger.error('error in task %s', task)
import traceback
traceback.print_exc()
return (task.id, OtherFailure('exception:' + str(e)), None, None)
示例6: run_task
def run_task(task_data):
try:
gc.disable()
task, task_try_id = loads(decompress(task_data))
ttid = TTID(task_try_id)
Accumulator.clear()
result = task.run(ttid.ttid)
env.task_stats.bytes_max_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * 1024
accUpdate = Accumulator.values()
MutableDict.flush()
if marshalable(result):
try:
flag, data = 0, marshal.dumps(result)
except Exception:
flag, data = 1, cPickle.dumps(result, -1)
else:
flag, data = 1, cPickle.dumps(result, -1)
data = compress(data)
if len(data) > TASK_RESULT_LIMIT:
# shuffle_id start from 1
swd = ShuffleWorkDir(0, task.id, ttid.task_try)
tmppath = swd.alloc_tmp(len(data))
with open(tmppath, 'wb') as f:
f.write(data)
f.close()
path = swd.export(tmppath)
data = '/'.join(
[env.server_uri] + path.split('/')[-3:]
)
flag += 2
return TaskState.finished, cPickle.dumps(((flag, data), accUpdate, env.task_stats), -1)
except FetchFailed as e:
return TaskState.failed, TaskEndReason.fetch_failed, str(e), cPickle.dumps(e)
except Exception as e:
import traceback
msg = traceback.format_exc()
ename = e.__class__.__name__
fatal_exceptions = (DparkUserFatalError, ArithmeticError,
ValueError, LookupError, SyntaxError,
TypeError, AssertionError)
prefix = "FATAL" if isinstance(e, fatal_exceptions) else "FAILED"
return TaskState.failed, '{}_EXCEPTION_{}'.format(prefix, ename), msg, cPickle.dumps(e)
finally:
gc.collect()
gc.enable()
示例7: run_task
def run_task(task_data):
try:
gc.disable()
task, ntry = cPickle.loads(decompress(task_data))
Accumulator.clear()
result = task.run(ntry)
accUpdate = Accumulator.values()
MutableDict.flush()
if marshalable(result):
try:
flag, data = 0, marshal.dumps(result)
except Exception, e:
flag, data = 1, cPickle.dumps(result, -1)
else:
示例8: run_task
def run_task(task_data):
try:
gc.disable()
task, ntry = cPickle.loads(decompress(task_data))
setproctitle('dpark worker %s: run task %s' % (Script, task))
Accumulator.clear()
result = task.run(ntry)
accUpdate = Accumulator.values()
if marshalable(result):
try:
flag, data = 0, marshal.dumps(result)
except Exception, e:
flag, data = 1, cPickle.dumps(result, -1)
else:
示例9: run_task
def run_task(task_data):
try:
gc.disable()
task, ntry = loads(decompress(task_data))
Accumulator.clear()
result = task.run(ntry)
accUpdate = Accumulator.values()
MutableDict.flush()
if marshalable(result):
try:
flag, data = 0, marshal.dumps(result)
except Exception as e:
flag, data = 1, cPickle.dumps(result, -1)
else:
flag, data = 1, cPickle.dumps(result, -1)
data = compress(data)
if len(data) > TASK_RESULT_LIMIT:
path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data))
f = open(path, 'w')
f.write(data)
f.close()
data = '/'.join(
[LocalFileShuffle.getServerUri()] + path.split('/')[-3:]
)
flag += 2
return 'TASK_FINISHED', cPickle.dumps(
(Success(), (flag, data), accUpdate), -1)
except FetchFailed as e:
return 'TASK_FAILED', cPickle.dumps((e, None, None), -1)
except:
import traceback
msg = traceback.format_exc()
return 'TASK_FAILED', cPickle.dumps(
(OtherFailure(msg), None, None), -1)
finally:
close_mfs()
gc.collect()
gc.enable()
示例10: runJob
def runJob(self, finalRdd, func, partitions, allowLocal):
outputParts = list(partitions)
numOutputParts = len(partitions)
finalStage = self.newStage(finalRdd, None)
results = [None]*numOutputParts
finished = [None]*numOutputParts
lastFinished = 0
numFinished = 0
waiting = set()
running = set()
failed = set()
pendingTasks = {}
lastFetchFailureTime = 0
self.updateCacheLocs()
logger.debug("Final stage: %s, %d", finalStage, numOutputParts)
logger.debug("Parents of final stage: %s", finalStage.parents)
logger.debug("Missing parents: %s", self.getMissingParentStages(finalStage))
if allowLocal and (not finalStage.parents or not self.getMissingParentStages(finalStage)) and numOutputParts == 1:
split = finalRdd.splits[outputParts[0]]
yield func(finalRdd.iterator(split))
return
def submitStage(stage):
logger.debug("submit stage %s", stage)
if stage not in waiting and stage not in running:
missing = self.getMissingParentStages(stage)
if not missing:
submitMissingTasks(stage)
running.add(stage)
else:
for parent in missing:
submitStage(parent)
waiting.add(stage)
def submitMissingTasks(stage):
myPending = pendingTasks.setdefault(stage, set())
tasks = []
have_prefer = True
if stage == finalStage:
for i in range(numOutputParts):
if not finished[i]:
part = outputParts[i]
if have_prefer:
locs = self.getPreferredLocs(finalRdd, part)
if not locs:
have_prefer = False
else:
locs = []
tasks.append(ResultTask(finalStage.id, finalRdd,
func, part, locs, i))
else:
for p in range(stage.numPartitions):
if not stage.outputLocs[p]:
if have_prefer:
locs = self.getPreferredLocs(stage.rdd, p)
if not locs:
have_prefer = False
else:
locs = []
tasks.append(ShuffleMapTask(stage.id, stage.rdd,
stage.shuffleDep, p, locs))
logger.debug("add to pending %s tasks", len(tasks))
myPending |= set(t.id for t in tasks)
self.submitTasks(tasks)
submitStage(finalStage)
while numFinished != numOutputParts:
try:
evt = self.completionEvents.get(False)
except Queue.Empty:
self.check()
if self._shutdown:
sys.exit(1)
if failed and time.time() > lastFetchFailureTime + RESUBMIT_TIMEOUT:
self.updateCacheLocs()
for stage in failed:
logger.info("Resubmitting failed stages: %s", stage)
submitStage(stage)
failed.clear()
else:
time.sleep(0.1)
continue
task, reason = evt.task, evt.reason
stage = self.idToStage[task.stageId]
if stage not in pendingTasks: # stage from other job
continue
logger.debug("remove from pedding %s from %s", task, stage)
pendingTasks[stage].remove(task.id)
if isinstance(reason, Success):
Accumulator.merge(evt.accumUpdates)
if isinstance(task, ResultTask):
finished[task.outputId] = True
numFinished += 1
#.........这里部分代码省略.........
示例11: runJob
#.........这里部分代码省略.........
tasks.append(ResultTask(finalStage.id, finalRdd,
func, part, locs, i))
else:
for p in range(stage.numPartitions):
if not stage.outputLocs[p]:
if have_prefer:
locs = self.getPreferredLocs(stage.rdd, p)
if not locs:
have_prefer = False
else:
locs = []
tasks.append(ShuffleMapTask(stage.id, stage.rdd,
stage.shuffleDep, p, locs))
logger.debug('add to pending %s tasks', len(tasks))
myPending |= set(t.id for t in tasks)
self.submitTasks(tasks)
submitStage(finalStage)
while numFinished != numOutputParts:
try:
evt = self.completionEvents.get(False)
except Queue.Empty:
self.check()
if self._shutdown:
sys.exit(1)
if (failed and
time.time() > lastFetchFailureTime + RESUBMIT_TIMEOUT):
self.updateCacheLocs()
for stage in failed:
logger.info('Resubmitting failed stages: %s', stage)
submitStage(stage)
failed.clear()
else:
time.sleep(0.1)
continue
task, reason = evt.task, evt.reason
stage = self.idToStage[task.stageId]
if stage not in pendingTasks: # stage from other job
continue
logger.debug('remove from pending %s from %s', task, stage)
pendingTasks[stage].remove(task.id)
if isinstance(reason, Success):
Accumulator.merge(evt.accumUpdates)
if isinstance(task, ResultTask):
finished[task.outputId] = True
numFinished += 1
results[task.outputId] = evt.result
while lastFinished < numOutputParts and finished[
lastFinished]:
yield results[lastFinished]
results[lastFinished] = None
lastFinished += 1
elif isinstance(task, ShuffleMapTask):
stage = self.idToStage[task.stageId]
stage.addOutputLoc(task.partition, evt.result)
if not pendingTasks[stage] and all(stage.outputLocs):
logger.debug(
'%s finished; looking for newly runnable stages',
stage
)
onStageFinished(stage)
running.remove(stage)
if stage.shuffleDep is not None:
self.mapOutputTracker.registerMapOutputs(
stage.shuffleDep.shuffleId,
[l[-1] for l in stage.outputLocs])
self.updateCacheLocs()
newlyRunnable = set(
stage for stage in waiting
if not self.getMissingParentStages(stage)
)
waiting -= newlyRunnable
running |= newlyRunnable
logger.debug(
'newly runnable: %s, %s', waiting, newlyRunnable)
for stage in newlyRunnable:
submitMissingTasks(stage)
elif isinstance(reason, FetchFailed):
if stage in running:
waiting.add(stage)
mapStage = self.shuffleToMapStage[reason.shuffleId]
mapStage.removeHost(reason.serverUri)
failed.add(mapStage)
lastFetchFailureTime = time.time()
else:
logger.error(
'task %s failed: %s %s %s',
task,
reason,
type(reason),
reason.message)
raise Exception(reason.message)
onStageFinished(finalStage)
assert not any(results)
return