本文整理汇总了Python中mrjob.emr.EMRJobRunner.cleanup方法的典型用法代码示例。如果您正苦于以下问题:Python EMRJobRunner.cleanup方法的具体用法?Python EMRJobRunner.cleanup怎么用?Python EMRJobRunner.cleanup使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mrjob.emr.EMRJobRunner
的用法示例。
在下文中一共展示了EMRJobRunner.cleanup方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: MRBossTestCase
# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import cleanup [as 别名]
class MRBossTestCase(MockBotoTestCase):
def setUp(self):
super(MRBossTestCase, self).setUp()
self.make_runner()
def tearDown(self):
self.cleanup_runner()
super(MRBossTestCase, self).tearDown()
def make_runner(self):
self.runner = EMRJobRunner(conf_paths=[])
self.add_mock_s3_data({'walrus': {}})
self.runner = EMRJobRunner(s3_sync_wait_time=0,
s3_tmp_dir='s3://walrus/tmp',
conf_paths=[])
self.runner._s3_job_log_uri = BUCKET_URI + LOG_DIR
self.prepare_runner_for_ssh(self.runner)
self.output_dir = tempfile.mkdtemp(prefix='mrboss_wd')
def cleanup_runner(self):
"""This method assumes ``prepare_runner_for_ssh()`` was called. That
method isn't a "proper" setup method because it requires different
arguments for different tests.
"""
shutil.rmtree(self.output_dir)
self.runner.cleanup()
def test_one_node(self):
mock_ssh_file('testmaster', 'some_file', b'file contents')
run_on_all_nodes(self.runner, self.output_dir, ['cat', 'some_file'],
print_stderr=False)
with open(os.path.join(self.output_dir, 'master', 'stdout'), 'r') as f:
self.assertEqual(f.read().rstrip(), 'file contents')
self.assertEqual(os.listdir(self.output_dir), ['master'])
def test_two_nodes(self):
self.add_slave()
self.runner._opts['num_ec2_instances'] = 2
mock_ssh_file('testmaster', 'some_file', b'file contents 1')
mock_ssh_file('testmaster!testslave0', 'some_file', b'file contents 2')
self.runner.fs # force initialization of _ssh_fs
run_on_all_nodes(self.runner, self.output_dir, ['cat', 'some_file'],
print_stderr=False)
with open(os.path.join(self.output_dir, 'master', 'stdout'), 'r') as f:
self.assertEqual(f.read().rstrip(), 'file contents 1')
with open(os.path.join(self.output_dir, 'slave testslave0', 'stdout'),
'r') as f:
self.assertEqual(f.read().strip(), 'file contents 2')
self.assertEqual(sorted(os.listdir(self.output_dir)),
['master', 'slave testslave0'])
示例2: test_spark_script_step_without_mr_job_script
# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import cleanup [as 别名]
def test_spark_script_step_without_mr_job_script(self):
spark_script_path = self.makefile('a_spark_script.py')
steps = MRSparkScript(['--script', spark_script_path])._steps_desc()
runner = EMRJobRunner(steps=steps, stdin=BytesIO())
runner.run()
runner.cleanup()
示例3: test_spark_jar_step_without_mr_job_script
# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import cleanup [as 别名]
def test_spark_jar_step_without_mr_job_script(self):
spark_jar_path = self.makefile('fireflies.jar')
steps = MRSparkJar(['--jar', spark_jar_path])._steps_desc()
runner = EMRJobRunner(steps=steps, stdin=BytesIO())
runner.run()
runner.cleanup()
示例4: test_jar_step_without_mr_job_script
# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import cleanup [as 别名]
def test_jar_step_without_mr_job_script(self):
jar_path = self.makefile('dora.jar')
steps = MRJustAJar(['--jar', jar_path])._steps_desc()
runner = EMRJobRunner(steps=steps, stdin=BytesIO(b'backpack'))
runner.run()
runner.cleanup()
示例5: MRBossTestCase
# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import cleanup [as 别名]
class MRBossTestCase(MockEMRAndS3TestCase):
@setup
def make_runner(self):
self.runner = EMRJobRunner(conf_path=False)
self.add_mock_s3_data({'walrus': {}})
self.runner = EMRJobRunner(s3_sync_wait_time=0,
s3_scratch_uri='s3://walrus/tmp',
conf_path=False)
self.runner._s3_job_log_uri = BUCKET_URI + LOG_DIR
self.prepare_runner_for_ssh(self.runner)
self.runner._enable_slave_ssh_access()
self.output_dir = tempfile.mkdtemp(prefix='mrboss_wd')
@teardown
def cleanup_runner(self):
"""This method assumes ``prepare_runner_for_ssh()`` was called. That
method isn't a "proper" setup method because it requires different
arguments for different tests.
"""
shutil.rmtree(self.output_dir)
self.runner.cleanup()
self.teardown_ssh()
def test_one_node(self):
mock_ssh_file('testmaster', 'some_file', 'file contents')
run_on_all_nodes(self.runner, self.output_dir, ['cat', 'some_file'],
print_stderr=False)
with open(os.path.join(self.output_dir, 'master', 'stdout'), 'r') as f:
assert_equal(f.read(), 'file contents\n')
assert_equal(os.listdir(self.output_dir), ['master'])
def test_two_nodes(self):
self.add_slave()
self.runner._opts['num_ec2_instances'] = 2
mock_ssh_file('testmaster', 'some_file', 'file contents 1')
mock_ssh_file('testmaster!testslave0', 'some_file', 'file contents 2')
run_on_all_nodes(self.runner, self.output_dir, ['cat', 'some_file'],
print_stderr=False)
with open(os.path.join(self.output_dir, 'master', 'stdout'), 'r') as f:
assert_equal(f.read(), 'file contents 1\n')
with open(os.path.join(self.output_dir, 'slave testslave0', 'stdout'),
'r') as f:
assert_equal(f.read(), 'file contents 2\n')
assert_equal(sorted(os.listdir(self.output_dir)),
['master', 'slave testslave0'])
示例6: FindProbableCauseOfFailureTestCase
# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import cleanup [as 别名]
class FindProbableCauseOfFailureTestCase(MockEMRAndS3TestCase):
# We're mostly concerned here that the right log files are read in the
# right order. parsing of the logs is handled by tests.parse_test
@setup
def make_runner(self):
self.runner = EMRJobRunner(s3_sync_wait_time=0,
s3_scratch_uri='s3://walrus/tmp',
conf_path=False)
self.runner._s3_job_log_uri = BUCKET_URI + LOG_DIR
@teardown
def cleanup_runner(self):
self.runner.cleanup()
def test_empty(self):
self.add_mock_s3_data({'walrus': {}})
assert_equal(self.runner._find_probable_cause_of_failure([1]), None)
def test_python_exception(self):
self.add_mock_s3_data({'walrus': {
ATTEMPT_0_DIR + 'stderr':
GARBAGE + TRACEBACK_START + PY_EXCEPTION + GARBAGE,
ATTEMPT_0_DIR + 'syslog':
make_input_uri_line(BUCKET_URI + 'input.gz'),
}})
assert_equal(self.runner._find_probable_cause_of_failure([1]),
{'lines': list(StringIO(PY_EXCEPTION)),
's3_log_file_uri':
BUCKET_URI + ATTEMPT_0_DIR + 'stderr',
'input_uri': BUCKET_URI + 'input.gz'})
def test_python_exception_without_input_uri(self):
self.add_mock_s3_data({'walrus': {
ATTEMPT_0_DIR + 'stderr': (
GARBAGE + TRACEBACK_START + PY_EXCEPTION + GARBAGE),
}})
assert_equal(self.runner._find_probable_cause_of_failure([1]),
{'lines': list(StringIO(PY_EXCEPTION)),
's3_log_file_uri':
BUCKET_URI + ATTEMPT_0_DIR + 'stderr',
'input_uri': None})
def test_java_exception(self):
self.add_mock_s3_data({'walrus': {
ATTEMPT_0_DIR + 'stderr': GARBAGE + GARBAGE,
ATTEMPT_0_DIR + 'syslog':
make_input_uri_line(BUCKET_URI + 'input.gz') +
GARBAGE +
CHILD_ERR_LINE +
JAVA_STACK_TRACE +
GARBAGE,
}})
assert_equal(self.runner._find_probable_cause_of_failure([1]),
{'lines': list(StringIO(JAVA_STACK_TRACE)),
's3_log_file_uri':
BUCKET_URI + ATTEMPT_0_DIR + 'syslog',
'input_uri': BUCKET_URI + 'input.gz'})
def test_java_exception_without_input_uri(self):
self.add_mock_s3_data({'walrus': {
ATTEMPT_0_DIR + 'syslog':
CHILD_ERR_LINE +
JAVA_STACK_TRACE +
GARBAGE,
}})
assert_equal(self.runner._find_probable_cause_of_failure([1]),
{'lines': list(StringIO(JAVA_STACK_TRACE)),
's3_log_file_uri':
BUCKET_URI + ATTEMPT_0_DIR + 'syslog',
'input_uri': None})
def test_hadoop_streaming_error(self):
# we should look only at step 2 since the errors in the other
# steps are boring
#
# we include input.gz just to test that we DON'T check for it
self.add_mock_s3_data({'walrus': {
LOG_DIR + 'steps/1/syslog':
GARBAGE +
HADOOP_ERR_LINE_PREFIX + BORING_HADOOP_ERROR + '\n',
LOG_DIR + 'steps/2/syslog':
GARBAGE +
make_input_uri_line(BUCKET_URI + 'input.gz') +
HADOOP_ERR_LINE_PREFIX + USEFUL_HADOOP_ERROR + '\n',
LOG_DIR + 'steps/3/syslog':
HADOOP_ERR_LINE_PREFIX + BORING_HADOOP_ERROR + '\n',
}})
assert_equal(self.runner._find_probable_cause_of_failure([1, 2, 3]),
{'lines': [USEFUL_HADOOP_ERROR + '\n'],
's3_log_file_uri':
BUCKET_URI + LOG_DIR + 'steps/2/syslog',
'input_uri': None})
def test_later_task_attempt_steps_win(self):
# should look at later steps first
self.add_mock_s3_data({'walrus': {
TASK_ATTEMPTS_DIR + 'attempt_201007271720_0001_r_000126_3/stderr':
TRACEBACK_START + PY_EXCEPTION,
#.........这里部分代码省略.........