Python step._interpret_hadoop_jar_command_stderr函数代码示例

本文整理汇总了Python中mrjob.logs.step._interpret_hadoop_jar_command_stderr函数的典型用法代码示例。如果您正苦于以下问题：Python _interpret_hadoop_jar_command_stderr函数的具体用法？Python _interpret_hadoop_jar_command_stderr怎么用？Python _interpret_hadoop_jar_command_stderr使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了_interpret_hadoop_jar_command_stderr函数的12个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_yarn_error

    def test_yarn_error(self):
        lines = [
            "16/01/22 19:14:16 INFO mapreduce.Job: Task Id :"
            " attempt_1453488173054_0001_m_000000_0, Status : FAILED\n",
            "Error: java.lang.RuntimeException: PipeMapRed" ".waitOutputThreads(): subprocess failed with code 1\n",
            "\tat org.apache.hadoop.streaming.PipeMapRed" ".waitOutputThreads(PipeMapRed.java:330)\n",
            "\tat org.apache.hadoop.streaming.PipeMapRed.mapRedFinished" "(PipeMapRed.java:543)\n",
            "\n",
        ]

        self.assertEqual(
            _interpret_hadoop_jar_command_stderr(lines),
            dict(
                errors=[
                    dict(
                        attempt_id="attempt_1453488173054_0001_m_000000_0",
                        hadoop_error=dict(
                            message=(
                                "Error: java.lang.RuntimeException: PipeMapRed"
                                ".waitOutputThreads(): subprocess failed with"
                                " code 1\n\tat org.apache.hadoop.streaming"
                                ".PipeMapRed.waitOutputThreads(PipeMapRed.java"
                                ":330)\n\tat org.apache.hadoop.streaming"
                                ".PipeMapRed.mapRedFinished(PipeMapRed.java"
                                ":543)"
                            ),
                            num_lines=5,
                            start_line=0,
                        ),
                        # task ID is implied by attempt ID
                        task_id="task_1453488173054_0001_m_000000",
                    )
                ]
            ),
        )

开发者ID:davidmarin，项目名称:mrjob，代码行数:35，代码来源:test_step.py

示例2: test_lines_can_be_bytes

 def test_lines_can_be_bytes(self):
     self.assertEqual(
         _interpret_hadoop_jar_command_stderr(
             [b"15/12/11 13:33:11 INFO mapreduce.Job:" b" Running job: job_1449857544442_0002\n"]
         ),
         dict(job_id="job_1449857544442_0002"),
     )

开发者ID:davidmarin，项目名称:mrjob，代码行数:7，代码来源:test_step.py

示例3: test_yarn_error_without_exception

    def test_yarn_error_without_exception(self):
        # when there's no exception, just use the whole line as the message
        lines = [
            '16/01/22 19:14:16 INFO mapreduce.Job: Task Id :'
            ' attempt_1453488173054_0001_m_000000_0, Status : FAILED\n',
        ]

        self.assertEqual(
            _interpret_hadoop_jar_command_stderr(lines),
            dict(
                errors=[
                    dict(
                        attempt_id='attempt_1453488173054_0001_m_000000_0',
                        hadoop_error=dict(
                            message=(
                                'Task Id :'
                                ' attempt_1453488173054_0001_m_000000_0,'
                                ' Status : FAILED'
                            ),
                            num_lines=1,
                            start_line=0,
                        ),
                        # task ID is implied by attempt ID
                        task_id='task_1453488173054_0001_m_000000',
                    )
                ]
            ))

开发者ID:Affirm，项目名称:mrjob，代码行数:27，代码来源:test_step.py

示例4: test_record_callback

    def test_record_callback(self):
        records = []

        def record_callback(record):
            records.append(record)

        lines = [
            "packageJobJar: [/mnt/var/lib/hadoop/tmp/hadoop"
            "-unjar7873615084086492115/] []"
            " /tmp/streamjob737002412080260811.jar tmpDir=null\n",
            "15/12/11 13:33:11 INFO mapreduce.Job:" " Running job: job_1449857544442_0002\n",
            "Streaming Command Failed!\n",
        ]

        self.assertEqual(
            _interpret_hadoop_jar_command_stderr(lines, record_callback=record_callback),
            dict(job_id="job_1449857544442_0002"),
        )

        self.assertEqual(
            records,
            [
                dict(
                    caller_location="",
                    level="",
                    logger="",
                    message=(
                        "packageJobJar: [/mnt/var/lib/hadoop/tmp/hadoop"
                        "-unjar7873615084086492115/] []"
                        " /tmp/streamjob737002412080260811.jar"
                        " tmpDir=null"
                    ),
                    num_lines=1,
                    start_line=0,
                    thread="",
                    timestamp="",
                ),
                dict(
                    caller_location="",
                    level="INFO",
                    logger="mapreduce.Job",
                    message="Running job: job_1449857544442_0002",
                    num_lines=1,
                    start_line=1,
                    thread="",
                    timestamp="15/12/11 13:33:11",
                ),
                dict(
                    caller_location="",
                    level="",
                    logger="",
                    message="Streaming Command Failed!",
                    num_lines=1,
                    start_line=2,
                    thread="",
                    timestamp="",
                ),
            ],
        )

开发者ID:davidmarin，项目名称:mrjob，代码行数:59，代码来源:test_step.py

示例5: test_treat_eio_as_eof

    def test_treat_eio_as_eof(self):
        def yield_lines():
            yield ("15/12/11 13:33:11 INFO mapreduce.Job:" " Running job: job_1449857544442_0002\n")
            e = IOError()
            e.errno = errno.EIO
            raise e

        self.assertEqual(_interpret_hadoop_jar_command_stderr(yield_lines()), dict(job_id="job_1449857544442_0002"))

开发者ID:davidmarin，项目名称:mrjob，代码行数:8，代码来源:test_step.py

示例6: test_infer_job_id_from_application_id

    def test_infer_job_id_from_application_id(self):
        lines = [
            "15/12/11 13:32:45 INFO impl.YarnClientImpl:" " Submitted application application_1449857544442_0002\n"
        ]

        self.assertEqual(
            _interpret_hadoop_jar_command_stderr(lines),
            dict(application_id="application_1449857544442_0002", job_id="job_1449857544442_0002"),
        )

开发者ID:davidmarin，项目名称:mrjob，代码行数:9，代码来源:test_step.py

示例7: test_record_callback

    def test_record_callback(self):
        records = []

        def record_callback(record):
            records.append(record)

        lines = [
            'packageJobJar: [/mnt/var/lib/hadoop/tmp/hadoop'
            '-unjar7873615084086492115/] []'
            ' /tmp/streamjob737002412080260811.jar tmpDir=null\n',
            '15/12/11 13:33:11 INFO mapreduce.Job:'
            ' Running job: job_1449857544442_0002\n',
            'Streaming Command Failed!\n',
        ]

        self.assertEqual(
            _interpret_hadoop_jar_command_stderr(
                lines, record_callback=record_callback),
            dict(job_id='job_1449857544442_0002'))

        self.assertEqual(
            records,
            [
                dict(
                    caller_location='',
                    level='',
                    logger='',
                    message=('packageJobJar: [/mnt/var/lib/hadoop/tmp/hadoop'
                             '-unjar7873615084086492115/] []'
                             ' /tmp/streamjob737002412080260811.jar'
                             ' tmpDir=null'),
                    num_lines=1,
                    start_line=0,
                    thread='',
                    timestamp='',
                ),
                dict(
                    caller_location='',
                    level='INFO',
                    logger='mapreduce.Job',
                    message='Running job: job_1449857544442_0002',
                    num_lines=1,
                    start_line=1,
                    thread='',
                    timestamp='15/12/11 13:33:11',
                ),
                dict(
                    caller_location='',
                    level='',
                    logger='',
                    message='Streaming Command Failed!',
                    num_lines=1,
                    start_line=2,
                    thread='',
                    timestamp='',
                ),
            ])

开发者ID:Affirm，项目名称:mrjob，代码行数:57，代码来源:test_step.py

示例8: test_yarn_error

    def test_yarn_error(self):
        lines = [
            '16/01/22 19:14:16 INFO mapreduce.Job: Task Id :'
            ' attempt_1453488173054_0001_m_000000_0, Status : FAILED\n',
            'Error: java.lang.RuntimeException: PipeMapRed'
            '.waitOutputThreads(): subprocess failed with code 1\n',
            '\tat org.apache.hadoop.streaming.PipeMapRed'
            '.waitOutputThreads(PipeMapRed.java:330)\n',
            '\tat org.apache.hadoop.streaming.PipeMapRed.mapRedFinished'
            '(PipeMapRed.java:543)\n',
            '\n',
        ]

        self.assertEqual(
            _interpret_hadoop_jar_command_stderr(lines),
            dict(
                errors=[
                    dict(
                        attempt_id='attempt_1453488173054_0001_m_000000_0',
                        hadoop_error=dict(
                            message=(
                                'Error: java.lang.RuntimeException: PipeMapRed'
                                '.waitOutputThreads(): subprocess failed with'
                                ' code 1\n\tat org.apache.hadoop.streaming'
                                '.PipeMapRed.waitOutputThreads(PipeMapRed.java'
                                ':330)\n\tat org.apache.hadoop.streaming'
                                '.PipeMapRed.mapRedFinished(PipeMapRed.java'
                                ':543)'
                            ),
                            num_lines=5,
                            start_line=0,
                        ),
                        # task ID is implied by attempt ID
                        task_id='task_1453488173054_0001_m_000000',
                    )
                ]
            ))

开发者ID:Affirm，项目名称:mrjob，代码行数:37，代码来源:test_step.py

示例9: _run_job_in_hadoop

    def _run_job_in_hadoop(self):
        for step_num, step in enumerate(self._get_steps()):
            self._warn_about_spark_archives(step)

            step_args = self._args_for_step(step_num)
            env = _fix_env(self._env_for_step(step_num))

            # log this *after* _args_for_step(), which can start a search
            # for the Hadoop streaming jar
            log.info('Running step %d of %d...' %
                     (step_num + 1, self._num_steps()))
            log.debug('> %s' % cmd_line(step_args))
            log.debug('  with environment: %r' % sorted(env.items()))

            log_interpretation = {}
            self._log_interpretations.append(log_interpretation)

            # try to use a PTY if it's available
            try:
                pid, master_fd = pty.fork()
            except (AttributeError, OSError):
                # no PTYs, just use Popen

                # user won't get much feedback for a while, so tell them
                # Hadoop is running
                log.debug('No PTY available, using Popen() to invoke Hadoop')

                step_proc = Popen(step_args, stdout=PIPE, stderr=PIPE, env=env)

                step_interpretation = _interpret_hadoop_jar_command_stderr(
                    step_proc.stderr,
                    record_callback=_log_record_from_hadoop)

                # there shouldn't be much output to STDOUT
                for line in step_proc.stdout:
                    _log_line_from_hadoop(to_unicode(line).strip('\r\n'))

                step_proc.stdout.close()
                step_proc.stderr.close()

                returncode = step_proc.wait()
            else:
                # we have PTYs
                if pid == 0:  # we are the child process
                    os.execvpe(step_args[0], step_args, env)
                else:
                    log.debug('Invoking Hadoop via PTY')

                    with os.fdopen(master_fd, 'rb') as master:
                        # reading from master gives us the subprocess's
                        # stderr and stdout (it's a fake terminal)
                        step_interpretation = (
                            _interpret_hadoop_jar_command_stderr(
                                master,
                                record_callback=_log_record_from_hadoop))
                        _, returncode = os.waitpid(pid, 0)

            # make sure output_dir is filled
            if 'output_dir' not in step_interpretation:
                step_interpretation['output_dir'] = (
                    self._step_output_uri(step_num))

            log_interpretation['step'] = step_interpretation

            step_type = step['type']

            if not _is_spark_step_type(step_type):
                counters = self._pick_counters(log_interpretation, step_type)
                if counters:
                    log.info(_format_counters(counters))
                else:
                    log.warning('No counters found')

            if returncode:
                error = self._pick_error(log_interpretation, step_type)
                if error:
                    log.error('Probable cause of failure:\n\n%s\n' %
                              _format_error(error))

                # use CalledProcessError's well-known message format
                reason = str(CalledProcessError(returncode, step_args))
                raise StepFailedException(
                    reason=reason, step_num=step_num,
                    num_steps=self._num_steps())

开发者ID:okomestudio，项目名称:mrjob，代码行数:84，代码来源:hadoop.py

示例10: test_yarn

 def test_yarn(self):
     self.assertEqual(_interpret_hadoop_jar_command_stderr(YARN_STEP_LOG_LINES), PARSED_YARN_STEP_LOG_LINES)

开发者ID:davidmarin，项目名称:mrjob，代码行数:2，代码来源:test_step.py

示例11: test_empty

 def test_empty(self):
     self.assertEqual(_interpret_hadoop_jar_command_stderr([]), {})

开发者ID:davidmarin，项目名称:mrjob，代码行数:2，代码来源:test_step.py

示例12: _run_job_in_hadoop

    def _run_job_in_hadoop(self):
        for step_num in range(self._num_steps()):
            step_args = self._args_for_step(step_num)

            # log this *after* _args_for_step(), which can start a search
            # for the Hadoop streaming jar
            log.info('Running step %d of %d' %
                      (step_num + 1, self._num_steps()))
            log.debug('> %s' % cmd_line(step_args))

            log_interpretation = {}
            self._log_interpretations.append(log_interpretation)

            # try to use a PTY if it's available
            try:
                pid, master_fd = pty.fork()
            except (AttributeError, OSError):
                # no PTYs, just use Popen

                # user won't get much feedback for a while, so tell them
                # Hadoop is running
                log.debug('No PTY available, using Popen() to invoke Hadoop')

                step_proc = Popen(step_args, stdout=PIPE, stderr=PIPE)

                step_interpretation = _interpret_hadoop_jar_command_stderr(
                    step_proc.stderr,
                    record_callback=_log_record_from_hadoop)

                # there shouldn't be much output to STDOUT
                for line in step_proc.stdout:
                    _log_line_from_hadoop(to_string(line).strip('\r\n'))

                step_proc.stdout.close()
                step_proc.stderr.close()

                returncode = step_proc.wait()
            else:
                # we have PTYs
                if pid == 0:  # we are the child process
                    os.execvp(step_args[0], step_args)
                else:
                    log.debug('Invoking Hadoop via PTY')

                    with os.fdopen(master_fd, 'rb') as master:
                        # reading from master gives us the subprocess's
                        # stderr and stdout (it's a fake terminal)
                        step_interpretation = (
                            _interpret_hadoop_jar_command_stderr(
                                master,
                                record_callback=_log_record_from_hadoop))
                        _, returncode = os.waitpid(pid, 0)

            # make sure output_dir is filled
            if 'output_dir' not in step_interpretation:
                step_interpretation['output_dir'] = (
                    self._hdfs_step_output_dir(step_num))

            log_interpretation['step'] = step_interpretation

            if 'counters' not in step_interpretation:
                log.info('Attempting to read counters from history log')
                self._interpret_history_log(log_interpretation)

            # just print counters for this one step
            self._print_counters(step_nums=[step_num])

            if returncode:
                error = self._pick_error(log_interpretation)
                if error:
                    log.error('Probable cause of failure:\n\n%s\n' %
                              _format_error(error))

                raise CalledProcessError(returncode, step_args)

开发者ID:imtiaz39，项目名称:mrjob，代码行数:74，代码来源:hadoop.py

注：本文中的mrjob.logs.step._interpret_hadoop_jar_command_stderr函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。