本文整理汇总了Python中mrjob.emr.EMRJobRunner._find_probable_cause_of_failure方法的典型用法代码示例。如果您正苦于以下问题:Python EMRJobRunner._find_probable_cause_of_failure方法的具体用法?Python EMRJobRunner._find_probable_cause_of_failure怎么用?Python EMRJobRunner._find_probable_cause_of_failure使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mrjob.emr.EMRJobRunner
的用法示例。
在下文中一共展示了EMRJobRunner._find_probable_cause_of_failure方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: FindProbableCauseOfFailureTestCase
# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import _find_probable_cause_of_failure [as 别名]
class FindProbableCauseOfFailureTestCase(MockEMRAndS3TestCase):
# We're mostly concerned here that the right log files are read in the
# right order. parsing of the logs is handled by tests.parse_test
@setup
def make_runner(self):
self.runner = EMRJobRunner(s3_sync_wait_time=0,
s3_scratch_uri='s3://walrus/tmp',
conf_path=False)
self.runner._s3_job_log_uri = BUCKET_URI + LOG_DIR
@teardown
def cleanup_runner(self):
self.runner.cleanup()
def test_empty(self):
self.add_mock_s3_data({'walrus': {}})
assert_equal(self.runner._find_probable_cause_of_failure([1]), None)
def test_python_exception(self):
self.add_mock_s3_data({'walrus': {
ATTEMPT_0_DIR + 'stderr':
GARBAGE + TRACEBACK_START + PY_EXCEPTION + GARBAGE,
ATTEMPT_0_DIR + 'syslog':
make_input_uri_line(BUCKET_URI + 'input.gz'),
}})
assert_equal(self.runner._find_probable_cause_of_failure([1]),
{'lines': list(StringIO(PY_EXCEPTION)),
's3_log_file_uri':
BUCKET_URI + ATTEMPT_0_DIR + 'stderr',
'input_uri': BUCKET_URI + 'input.gz'})
def test_python_exception_without_input_uri(self):
self.add_mock_s3_data({'walrus': {
ATTEMPT_0_DIR + 'stderr': (
GARBAGE + TRACEBACK_START + PY_EXCEPTION + GARBAGE),
}})
assert_equal(self.runner._find_probable_cause_of_failure([1]),
{'lines': list(StringIO(PY_EXCEPTION)),
's3_log_file_uri':
BUCKET_URI + ATTEMPT_0_DIR + 'stderr',
'input_uri': None})
def test_java_exception(self):
self.add_mock_s3_data({'walrus': {
ATTEMPT_0_DIR + 'stderr': GARBAGE + GARBAGE,
ATTEMPT_0_DIR + 'syslog':
make_input_uri_line(BUCKET_URI + 'input.gz') +
GARBAGE +
CHILD_ERR_LINE +
JAVA_STACK_TRACE +
GARBAGE,
}})
assert_equal(self.runner._find_probable_cause_of_failure([1]),
{'lines': list(StringIO(JAVA_STACK_TRACE)),
's3_log_file_uri':
BUCKET_URI + ATTEMPT_0_DIR + 'syslog',
'input_uri': BUCKET_URI + 'input.gz'})
def test_java_exception_without_input_uri(self):
self.add_mock_s3_data({'walrus': {
ATTEMPT_0_DIR + 'syslog':
CHILD_ERR_LINE +
JAVA_STACK_TRACE +
GARBAGE,
}})
assert_equal(self.runner._find_probable_cause_of_failure([1]),
{'lines': list(StringIO(JAVA_STACK_TRACE)),
's3_log_file_uri':
BUCKET_URI + ATTEMPT_0_DIR + 'syslog',
'input_uri': None})
def test_hadoop_streaming_error(self):
# we should look only at step 2 since the errors in the other
# steps are boring
#
# we include input.gz just to test that we DON'T check for it
self.add_mock_s3_data({'walrus': {
LOG_DIR + 'steps/1/syslog':
GARBAGE +
HADOOP_ERR_LINE_PREFIX + BORING_HADOOP_ERROR + '\n',
LOG_DIR + 'steps/2/syslog':
GARBAGE +
make_input_uri_line(BUCKET_URI + 'input.gz') +
HADOOP_ERR_LINE_PREFIX + USEFUL_HADOOP_ERROR + '\n',
LOG_DIR + 'steps/3/syslog':
HADOOP_ERR_LINE_PREFIX + BORING_HADOOP_ERROR + '\n',
}})
assert_equal(self.runner._find_probable_cause_of_failure([1, 2, 3]),
{'lines': [USEFUL_HADOOP_ERROR + '\n'],
's3_log_file_uri':
BUCKET_URI + LOG_DIR + 'steps/2/syslog',
'input_uri': None})
def test_later_task_attempt_steps_win(self):
# should look at later steps first
self.add_mock_s3_data({'walrus': {
TASK_ATTEMPTS_DIR + 'attempt_201007271720_0001_r_000126_3/stderr':
TRACEBACK_START + PY_EXCEPTION,
#.........这里部分代码省略.........