當前位置: 首頁>>代碼示例>>Python>>正文


Python Accumulator.merge方法代碼示例

本文整理匯總了Python中dpark.accumulator.Accumulator.merge方法的典型用法代碼示例。如果您正苦於以下問題:Python Accumulator.merge方法的具體用法?Python Accumulator.merge怎麽用?Python Accumulator.merge使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在dpark.accumulator.Accumulator的用法示例。


在下文中一共展示了Accumulator.merge方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: runJob

# 需要導入模塊: from dpark.accumulator import Accumulator [as 別名]
# 或者: from dpark.accumulator.Accumulator import merge [as 別名]
    def runJob(self, finalRdd, func, partitions, allowLocal):
        outputParts = list(partitions)
        numOutputParts = len(partitions)
        finalStage = self.newStage(finalRdd, None)
        results = [None]*numOutputParts
        finished = [None]*numOutputParts
        lastFinished = 0
        numFinished = 0

        waiting = set()
        running = set()
        failed = set()
        pendingTasks = {}
        lastFetchFailureTime = 0

        self.updateCacheLocs()

        logger.debug("Final stage: %s, %d", finalStage, numOutputParts)
        logger.debug("Parents of final stage: %s", finalStage.parents)
        logger.debug("Missing parents: %s", self.getMissingParentStages(finalStage))

        if allowLocal and (not finalStage.parents or not self.getMissingParentStages(finalStage)) and numOutputParts == 1:
            split = finalRdd.splits[outputParts[0]]
            yield func(finalRdd.iterator(split))
            return

        def submitStage(stage):
            logger.debug("submit stage %s", stage)
            if stage not in waiting and stage not in running:
                missing = self.getMissingParentStages(stage)
                if not missing:
                    submitMissingTasks(stage)
                    running.add(stage)
                else:
                    for parent in missing:
                        submitStage(parent)
                    waiting.add(stage)

        def submitMissingTasks(stage):
            myPending = pendingTasks.setdefault(stage, set())
            tasks = []
            have_prefer = True
            if stage == finalStage:
                for i in range(numOutputParts):
                    if not finished[i]:
                        part = outputParts[i]
                        if have_prefer:
                            locs = self.getPreferredLocs(finalRdd, part)
                            if not locs:
                                have_prefer = False
                        else:
                            locs = []
                        tasks.append(ResultTask(finalStage.id, finalRdd,
                            func, part, locs, i))
            else:
                for p in range(stage.numPartitions):
                    if not stage.outputLocs[p]:
                        if have_prefer:
                            locs = self.getPreferredLocs(stage.rdd, p)
                            if not locs:
                                have_prefer = False
                        else:
                            locs = []
                        tasks.append(ShuffleMapTask(stage.id, stage.rdd,
                            stage.shuffleDep, p, locs))
            logger.debug("add to pending %s tasks", len(tasks))
            myPending |= set(t.id for t in tasks)
            self.submitTasks(tasks)

        submitStage(finalStage)

        while numFinished != numOutputParts:
            try:
                evt = self.completionEvents.get(False)
            except Queue.Empty:
                self.check()
                if self._shutdown:
                    sys.exit(1)

                if failed and time.time() > lastFetchFailureTime + RESUBMIT_TIMEOUT:
                    self.updateCacheLocs()
                    for stage in failed:
                        logger.info("Resubmitting failed stages: %s", stage)
                        submitStage(stage)
                    failed.clear()
                else:
                    time.sleep(0.1)
                continue
               
            task, reason = evt.task, evt.reason
            stage = self.idToStage[task.stageId]
            if stage not in pendingTasks: # stage from other job
                continue
            logger.debug("remove from pedding %s from %s", task, stage)
            pendingTasks[stage].remove(task.id)
            if isinstance(reason, Success):
                Accumulator.merge(evt.accumUpdates)
                if isinstance(task, ResultTask):
                    finished[task.outputId] = True
                    numFinished += 1
#.........這裏部分代碼省略.........
開發者ID:tclh123,項目名稱:dpark,代碼行數:103,代碼來源:schedule.py

示例2: runJob

# 需要導入模塊: from dpark.accumulator import Accumulator [as 別名]
# 或者: from dpark.accumulator.Accumulator import merge [as 別名]
    def runJob(self, finalRdd, func, partitions, allowLocal):
        outputParts = list(partitions)
        numOutputParts = len(partitions)
        finalStage = self.newStage(finalRdd, None)
        try:
            from dpark.web.ui.views.rddopgraph import StageInfo
            stage_info = StageInfo()
            stage_info.create_stage_info(finalStage)

            def create_stage_info_recur(cur_stage, is_final=False):
                if not cur_stage or cur_stage.id in self.idToRunJob:
                    return
                for par_stage in cur_stage.parents:
                    create_stage_info_recur(par_stage)
                if cur_stage.id not in self.idToRunJob:
                    self.idToRunJob[cur_stage.id] = StageInfo.idToStageInfo[cur_stage.id]
                    self.idToRunJob[cur_stage.id].is_final = is_final

            create_stage_info_recur(finalStage, is_final=True)
        except ImportError:
            pass
        results = [None] * numOutputParts
        finished = [None] * numOutputParts
        lastFinished = 0
        numFinished = 0

        waiting = set()
        running = set()
        failed = set()
        pendingTasks = {}
        lastFetchFailureTime = 0

        self.updateCacheLocs()

        logger.debug('Final stage: %s, %d', finalStage, numOutputParts)
        logger.debug('Parents of final stage: %s', finalStage.parents)
        logger.debug(
            'Missing parents: %s',
            self.getMissingParentStages(finalStage))

        def onStageFinished(stage):
            def _(r, dep):
                return r._do_checkpoint()

            MutableDict.merge()
            walk_dependencies(stage.rdd, _)

        if (allowLocal and
                (
                    not finalStage.parents or
                    not self.getMissingParentStages(finalStage)
                ) and numOutputParts == 1):
            split = finalRdd.splits[outputParts[0]]
            yield func(finalRdd.iterator(split))
            onStageFinished(finalStage)
            return

        def submitStage(stage):
            logger.debug('submit stage %s', stage)
            if stage not in waiting and stage not in running:
                missing = self.getMissingParentStages(stage)
                if not missing:
                    submitMissingTasks(stage)
                    running.add(stage)
                else:
                    for parent in missing:
                        submitStage(parent)
                    waiting.add(stage)

        def submitMissingTasks(stage):
            myPending = pendingTasks.setdefault(stage, set())
            tasks = []
            have_prefer = True
            if stage == finalStage:
                for i in range(numOutputParts):
                    if not finished[i]:
                        part = outputParts[i]
                        if have_prefer:
                            locs = self.getPreferredLocs(finalRdd, part)
                            if not locs:
                                have_prefer = False
                        else:
                            locs = []
                        tasks.append(ResultTask(finalStage.id, finalRdd,
                                                func, part, locs, i))
            else:
                for p in range(stage.numPartitions):
                    if not stage.outputLocs[p]:
                        if have_prefer:
                            locs = self.getPreferredLocs(stage.rdd, p)
                            if not locs:
                                have_prefer = False
                        else:
                            locs = []
                        tasks.append(ShuffleMapTask(stage.id, stage.rdd,
                                                    stage.shuffleDep, p, locs))
            logger.debug('add to pending %s tasks', len(tasks))
            myPending |= set(t.id for t in tasks)
            self.submitTasks(tasks)

#.........這裏部分代碼省略.........
開發者ID:windreamer,項目名稱:dpark,代碼行數:103,代碼來源:schedule.py


注:本文中的dpark.accumulator.Accumulator.merge方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。