本文整理汇总了Python中mrjob.logs.step._interpret_hadoop_jar_command_stderr函数的典型用法代码示例。如果您正苦于以下问题:Python _interpret_hadoop_jar_command_stderr函数的具体用法?Python _interpret_hadoop_jar_command_stderr怎么用?Python _interpret_hadoop_jar_command_stderr使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了_interpret_hadoop_jar_command_stderr函数的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_yarn_error
def test_yarn_error(self):
lines = [
"16/01/22 19:14:16 INFO mapreduce.Job: Task Id :"
" attempt_1453488173054_0001_m_000000_0, Status : FAILED\n",
"Error: java.lang.RuntimeException: PipeMapRed" ".waitOutputThreads(): subprocess failed with code 1\n",
"\tat org.apache.hadoop.streaming.PipeMapRed" ".waitOutputThreads(PipeMapRed.java:330)\n",
"\tat org.apache.hadoop.streaming.PipeMapRed.mapRedFinished" "(PipeMapRed.java:543)\n",
"\n",
]
self.assertEqual(
_interpret_hadoop_jar_command_stderr(lines),
dict(
errors=[
dict(
attempt_id="attempt_1453488173054_0001_m_000000_0",
hadoop_error=dict(
message=(
"Error: java.lang.RuntimeException: PipeMapRed"
".waitOutputThreads(): subprocess failed with"
" code 1\n\tat org.apache.hadoop.streaming"
".PipeMapRed.waitOutputThreads(PipeMapRed.java"
":330)\n\tat org.apache.hadoop.streaming"
".PipeMapRed.mapRedFinished(PipeMapRed.java"
":543)"
),
num_lines=5,
start_line=0,
),
# task ID is implied by attempt ID
task_id="task_1453488173054_0001_m_000000",
)
]
),
)
示例2: test_lines_can_be_bytes
def test_lines_can_be_bytes(self):
self.assertEqual(
_interpret_hadoop_jar_command_stderr(
[b"15/12/11 13:33:11 INFO mapreduce.Job:" b" Running job: job_1449857544442_0002\n"]
),
dict(job_id="job_1449857544442_0002"),
)
示例3: test_yarn_error_without_exception
def test_yarn_error_without_exception(self):
# when there's no exception, just use the whole line as the message
lines = [
'16/01/22 19:14:16 INFO mapreduce.Job: Task Id :'
' attempt_1453488173054_0001_m_000000_0, Status : FAILED\n',
]
self.assertEqual(
_interpret_hadoop_jar_command_stderr(lines),
dict(
errors=[
dict(
attempt_id='attempt_1453488173054_0001_m_000000_0',
hadoop_error=dict(
message=(
'Task Id :'
' attempt_1453488173054_0001_m_000000_0,'
' Status : FAILED'
),
num_lines=1,
start_line=0,
),
# task ID is implied by attempt ID
task_id='task_1453488173054_0001_m_000000',
)
]
))
示例4: test_record_callback
def test_record_callback(self):
records = []
def record_callback(record):
records.append(record)
lines = [
"packageJobJar: [/mnt/var/lib/hadoop/tmp/hadoop"
"-unjar7873615084086492115/] []"
" /tmp/streamjob737002412080260811.jar tmpDir=null\n",
"15/12/11 13:33:11 INFO mapreduce.Job:" " Running job: job_1449857544442_0002\n",
"Streaming Command Failed!\n",
]
self.assertEqual(
_interpret_hadoop_jar_command_stderr(lines, record_callback=record_callback),
dict(job_id="job_1449857544442_0002"),
)
self.assertEqual(
records,
[
dict(
caller_location="",
level="",
logger="",
message=(
"packageJobJar: [/mnt/var/lib/hadoop/tmp/hadoop"
"-unjar7873615084086492115/] []"
" /tmp/streamjob737002412080260811.jar"
" tmpDir=null"
),
num_lines=1,
start_line=0,
thread="",
timestamp="",
),
dict(
caller_location="",
level="INFO",
logger="mapreduce.Job",
message="Running job: job_1449857544442_0002",
num_lines=1,
start_line=1,
thread="",
timestamp="15/12/11 13:33:11",
),
dict(
caller_location="",
level="",
logger="",
message="Streaming Command Failed!",
num_lines=1,
start_line=2,
thread="",
timestamp="",
),
],
)
示例5: test_treat_eio_as_eof
def test_treat_eio_as_eof(self):
def yield_lines():
yield ("15/12/11 13:33:11 INFO mapreduce.Job:" " Running job: job_1449857544442_0002\n")
e = IOError()
e.errno = errno.EIO
raise e
self.assertEqual(_interpret_hadoop_jar_command_stderr(yield_lines()), dict(job_id="job_1449857544442_0002"))
示例6: test_infer_job_id_from_application_id
def test_infer_job_id_from_application_id(self):
lines = [
"15/12/11 13:32:45 INFO impl.YarnClientImpl:" " Submitted application application_1449857544442_0002\n"
]
self.assertEqual(
_interpret_hadoop_jar_command_stderr(lines),
dict(application_id="application_1449857544442_0002", job_id="job_1449857544442_0002"),
)
示例7: test_record_callback
def test_record_callback(self):
records = []
def record_callback(record):
records.append(record)
lines = [
'packageJobJar: [/mnt/var/lib/hadoop/tmp/hadoop'
'-unjar7873615084086492115/] []'
' /tmp/streamjob737002412080260811.jar tmpDir=null\n',
'15/12/11 13:33:11 INFO mapreduce.Job:'
' Running job: job_1449857544442_0002\n',
'Streaming Command Failed!\n',
]
self.assertEqual(
_interpret_hadoop_jar_command_stderr(
lines, record_callback=record_callback),
dict(job_id='job_1449857544442_0002'))
self.assertEqual(
records,
[
dict(
caller_location='',
level='',
logger='',
message=('packageJobJar: [/mnt/var/lib/hadoop/tmp/hadoop'
'-unjar7873615084086492115/] []'
' /tmp/streamjob737002412080260811.jar'
' tmpDir=null'),
num_lines=1,
start_line=0,
thread='',
timestamp='',
),
dict(
caller_location='',
level='INFO',
logger='mapreduce.Job',
message='Running job: job_1449857544442_0002',
num_lines=1,
start_line=1,
thread='',
timestamp='15/12/11 13:33:11',
),
dict(
caller_location='',
level='',
logger='',
message='Streaming Command Failed!',
num_lines=1,
start_line=2,
thread='',
timestamp='',
),
])
示例8: test_yarn_error
def test_yarn_error(self):
lines = [
'16/01/22 19:14:16 INFO mapreduce.Job: Task Id :'
' attempt_1453488173054_0001_m_000000_0, Status : FAILED\n',
'Error: java.lang.RuntimeException: PipeMapRed'
'.waitOutputThreads(): subprocess failed with code 1\n',
'\tat org.apache.hadoop.streaming.PipeMapRed'
'.waitOutputThreads(PipeMapRed.java:330)\n',
'\tat org.apache.hadoop.streaming.PipeMapRed.mapRedFinished'
'(PipeMapRed.java:543)\n',
'\n',
]
self.assertEqual(
_interpret_hadoop_jar_command_stderr(lines),
dict(
errors=[
dict(
attempt_id='attempt_1453488173054_0001_m_000000_0',
hadoop_error=dict(
message=(
'Error: java.lang.RuntimeException: PipeMapRed'
'.waitOutputThreads(): subprocess failed with'
' code 1\n\tat org.apache.hadoop.streaming'
'.PipeMapRed.waitOutputThreads(PipeMapRed.java'
':330)\n\tat org.apache.hadoop.streaming'
'.PipeMapRed.mapRedFinished(PipeMapRed.java'
':543)'
),
num_lines=5,
start_line=0,
),
# task ID is implied by attempt ID
task_id='task_1453488173054_0001_m_000000',
)
]
))
示例9: _run_job_in_hadoop
def _run_job_in_hadoop(self):
for step_num, step in enumerate(self._get_steps()):
self._warn_about_spark_archives(step)
step_args = self._args_for_step(step_num)
env = _fix_env(self._env_for_step(step_num))
# log this *after* _args_for_step(), which can start a search
# for the Hadoop streaming jar
log.info('Running step %d of %d...' %
(step_num + 1, self._num_steps()))
log.debug('> %s' % cmd_line(step_args))
log.debug(' with environment: %r' % sorted(env.items()))
log_interpretation = {}
self._log_interpretations.append(log_interpretation)
# try to use a PTY if it's available
try:
pid, master_fd = pty.fork()
except (AttributeError, OSError):
# no PTYs, just use Popen
# user won't get much feedback for a while, so tell them
# Hadoop is running
log.debug('No PTY available, using Popen() to invoke Hadoop')
step_proc = Popen(step_args, stdout=PIPE, stderr=PIPE, env=env)
step_interpretation = _interpret_hadoop_jar_command_stderr(
step_proc.stderr,
record_callback=_log_record_from_hadoop)
# there shouldn't be much output to STDOUT
for line in step_proc.stdout:
_log_line_from_hadoop(to_unicode(line).strip('\r\n'))
step_proc.stdout.close()
step_proc.stderr.close()
returncode = step_proc.wait()
else:
# we have PTYs
if pid == 0: # we are the child process
os.execvpe(step_args[0], step_args, env)
else:
log.debug('Invoking Hadoop via PTY')
with os.fdopen(master_fd, 'rb') as master:
# reading from master gives us the subprocess's
# stderr and stdout (it's a fake terminal)
step_interpretation = (
_interpret_hadoop_jar_command_stderr(
master,
record_callback=_log_record_from_hadoop))
_, returncode = os.waitpid(pid, 0)
# make sure output_dir is filled
if 'output_dir' not in step_interpretation:
step_interpretation['output_dir'] = (
self._step_output_uri(step_num))
log_interpretation['step'] = step_interpretation
step_type = step['type']
if not _is_spark_step_type(step_type):
counters = self._pick_counters(log_interpretation, step_type)
if counters:
log.info(_format_counters(counters))
else:
log.warning('No counters found')
if returncode:
error = self._pick_error(log_interpretation, step_type)
if error:
log.error('Probable cause of failure:\n\n%s\n' %
_format_error(error))
# use CalledProcessError's well-known message format
reason = str(CalledProcessError(returncode, step_args))
raise StepFailedException(
reason=reason, step_num=step_num,
num_steps=self._num_steps())
示例10: test_yarn
def test_yarn(self):
self.assertEqual(_interpret_hadoop_jar_command_stderr(YARN_STEP_LOG_LINES), PARSED_YARN_STEP_LOG_LINES)
示例11: test_empty
def test_empty(self):
self.assertEqual(_interpret_hadoop_jar_command_stderr([]), {})
示例12: _run_job_in_hadoop
def _run_job_in_hadoop(self):
for step_num in range(self._num_steps()):
step_args = self._args_for_step(step_num)
# log this *after* _args_for_step(), which can start a search
# for the Hadoop streaming jar
log.info('Running step %d of %d' %
(step_num + 1, self._num_steps()))
log.debug('> %s' % cmd_line(step_args))
log_interpretation = {}
self._log_interpretations.append(log_interpretation)
# try to use a PTY if it's available
try:
pid, master_fd = pty.fork()
except (AttributeError, OSError):
# no PTYs, just use Popen
# user won't get much feedback for a while, so tell them
# Hadoop is running
log.debug('No PTY available, using Popen() to invoke Hadoop')
step_proc = Popen(step_args, stdout=PIPE, stderr=PIPE)
step_interpretation = _interpret_hadoop_jar_command_stderr(
step_proc.stderr,
record_callback=_log_record_from_hadoop)
# there shouldn't be much output to STDOUT
for line in step_proc.stdout:
_log_line_from_hadoop(to_string(line).strip('\r\n'))
step_proc.stdout.close()
step_proc.stderr.close()
returncode = step_proc.wait()
else:
# we have PTYs
if pid == 0: # we are the child process
os.execvp(step_args[0], step_args)
else:
log.debug('Invoking Hadoop via PTY')
with os.fdopen(master_fd, 'rb') as master:
# reading from master gives us the subprocess's
# stderr and stdout (it's a fake terminal)
step_interpretation = (
_interpret_hadoop_jar_command_stderr(
master,
record_callback=_log_record_from_hadoop))
_, returncode = os.waitpid(pid, 0)
# make sure output_dir is filled
if 'output_dir' not in step_interpretation:
step_interpretation['output_dir'] = (
self._hdfs_step_output_dir(step_num))
log_interpretation['step'] = step_interpretation
if 'counters' not in step_interpretation:
log.info('Attempting to read counters from history log')
self._interpret_history_log(log_interpretation)
# just print counters for this one step
self._print_counters(step_nums=[step_num])
if returncode:
error = self._pick_error(log_interpretation)
if error:
log.error('Probable cause of failure:\n\n%s\n' %
_format_error(error))
raise CalledProcessError(returncode, step_args)