当前位置: 首页>>代码示例>>Python>>正文


Python EMRJobRunner.cleanup方法代码示例

本文整理汇总了Python中mrjob.emr.EMRJobRunner.cleanup方法的典型用法代码示例。如果您正苦于以下问题:Python EMRJobRunner.cleanup方法的具体用法?Python EMRJobRunner.cleanup怎么用?Python EMRJobRunner.cleanup使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在mrjob.emr.EMRJobRunner的用法示例。


在下文中一共展示了EMRJobRunner.cleanup方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: MRBossTestCase

# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import cleanup [as 别名]
class MRBossTestCase(MockBotoTestCase):

    def setUp(self):
        super(MRBossTestCase, self).setUp()
        self.make_runner()

    def tearDown(self):
        self.cleanup_runner()
        super(MRBossTestCase, self).tearDown()

    def make_runner(self):
        self.runner = EMRJobRunner(conf_paths=[])
        self.add_mock_s3_data({'walrus': {}})
        self.runner = EMRJobRunner(s3_sync_wait_time=0,
                                   s3_tmp_dir='s3://walrus/tmp',
                                   conf_paths=[])
        self.runner._s3_job_log_uri = BUCKET_URI + LOG_DIR
        self.prepare_runner_for_ssh(self.runner)
        self.output_dir = tempfile.mkdtemp(prefix='mrboss_wd')

    def cleanup_runner(self):
        """This method assumes ``prepare_runner_for_ssh()`` was called. That
        method isn't a "proper" setup method because it requires different
        arguments for different tests.
        """
        shutil.rmtree(self.output_dir)
        self.runner.cleanup()

    def test_one_node(self):
        mock_ssh_file('testmaster', 'some_file', b'file contents')

        run_on_all_nodes(self.runner, self.output_dir, ['cat', 'some_file'],
                         print_stderr=False)

        with open(os.path.join(self.output_dir, 'master', 'stdout'), 'r') as f:
            self.assertEqual(f.read().rstrip(), 'file contents')

        self.assertEqual(os.listdir(self.output_dir), ['master'])

    def test_two_nodes(self):
        self.add_slave()
        self.runner._opts['num_ec2_instances'] = 2

        mock_ssh_file('testmaster', 'some_file', b'file contents 1')
        mock_ssh_file('testmaster!testslave0', 'some_file', b'file contents 2')

        self.runner.fs  # force initialization of _ssh_fs

        run_on_all_nodes(self.runner, self.output_dir, ['cat', 'some_file'],
                         print_stderr=False)

        with open(os.path.join(self.output_dir, 'master', 'stdout'), 'r') as f:
            self.assertEqual(f.read().rstrip(), 'file contents 1')

        with open(os.path.join(self.output_dir, 'slave testslave0', 'stdout'),
                  'r') as f:
            self.assertEqual(f.read().strip(), 'file contents 2')

        self.assertEqual(sorted(os.listdir(self.output_dir)),
                         ['master', 'slave testslave0'])
开发者ID:kartheek6,项目名称:mrjob,代码行数:62,代码来源:test_mrboss.py

示例2: test_spark_script_step_without_mr_job_script

# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import cleanup [as 别名]
    def test_spark_script_step_without_mr_job_script(self):
        spark_script_path = self.makefile('a_spark_script.py')
        steps = MRSparkScript(['--script', spark_script_path])._steps_desc()

        runner = EMRJobRunner(steps=steps, stdin=BytesIO())

        runner.run()
        runner.cleanup()
开发者ID:Affirm,项目名称:mrjob,代码行数:10,代码来源:test_runner.py

示例3: test_spark_jar_step_without_mr_job_script

# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import cleanup [as 别名]
    def test_spark_jar_step_without_mr_job_script(self):
        spark_jar_path = self.makefile('fireflies.jar')
        steps = MRSparkJar(['--jar', spark_jar_path])._steps_desc()

        runner = EMRJobRunner(steps=steps, stdin=BytesIO())

        runner.run()
        runner.cleanup()
开发者ID:Affirm,项目名称:mrjob,代码行数:10,代码来源:test_runner.py

示例4: test_jar_step_without_mr_job_script

# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import cleanup [as 别名]
    def test_jar_step_without_mr_job_script(self):
        jar_path = self.makefile('dora.jar')
        steps = MRJustAJar(['--jar', jar_path])._steps_desc()

        runner = EMRJobRunner(steps=steps, stdin=BytesIO(b'backpack'))

        runner.run()
        runner.cleanup()
开发者ID:Affirm,项目名称:mrjob,代码行数:10,代码来源:test_runner.py

示例5: MRBossTestCase

# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import cleanup [as 别名]
class MRBossTestCase(MockEMRAndS3TestCase):

    @setup
    def make_runner(self):
        self.runner = EMRJobRunner(conf_path=False)
        self.add_mock_s3_data({'walrus': {}})
        self.runner = EMRJobRunner(s3_sync_wait_time=0,
                                   s3_scratch_uri='s3://walrus/tmp',
                                   conf_path=False)
        self.runner._s3_job_log_uri = BUCKET_URI + LOG_DIR
        self.prepare_runner_for_ssh(self.runner)
        self.runner._enable_slave_ssh_access()
        self.output_dir = tempfile.mkdtemp(prefix='mrboss_wd')

    @teardown
    def cleanup_runner(self):
        """This method assumes ``prepare_runner_for_ssh()`` was called. That
        method isn't a "proper" setup method because it requires different
        arguments for different tests.
        """
        shutil.rmtree(self.output_dir)
        self.runner.cleanup()
        self.teardown_ssh()

    def test_one_node(self):
        mock_ssh_file('testmaster', 'some_file', 'file contents')

        run_on_all_nodes(self.runner, self.output_dir, ['cat', 'some_file'],
                         print_stderr=False)

        with open(os.path.join(self.output_dir, 'master', 'stdout'), 'r') as f:
            assert_equal(f.read(), 'file contents\n')

        assert_equal(os.listdir(self.output_dir), ['master'])

    def test_two_nodes(self):
        self.add_slave()
        self.runner._opts['num_ec2_instances'] = 2

        mock_ssh_file('testmaster', 'some_file', 'file contents 1')
        mock_ssh_file('testmaster!testslave0', 'some_file', 'file contents 2')

        run_on_all_nodes(self.runner, self.output_dir, ['cat', 'some_file'],
                         print_stderr=False)

        with open(os.path.join(self.output_dir, 'master', 'stdout'), 'r') as f:
            assert_equal(f.read(), 'file contents 1\n')

        with open(os.path.join(self.output_dir, 'slave testslave0', 'stdout'),
                  'r') as f:
            assert_equal(f.read(), 'file contents 2\n')

        assert_equal(sorted(os.listdir(self.output_dir)),
                     ['master', 'slave testslave0'])
开发者ID:gimlids,项目名称:LTPM,代码行数:56,代码来源:mrboss_test.py

示例6: FindProbableCauseOfFailureTestCase

# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import cleanup [as 别名]
class FindProbableCauseOfFailureTestCase(MockEMRAndS3TestCase):
    # We're mostly concerned here that the right log files are read in the
    # right order. parsing of the logs is handled by tests.parse_test

    @setup
    def make_runner(self):
        self.runner = EMRJobRunner(s3_sync_wait_time=0,
                                   s3_scratch_uri='s3://walrus/tmp',
                                   conf_path=False)
        self.runner._s3_job_log_uri = BUCKET_URI + LOG_DIR

    @teardown
    def cleanup_runner(self):
        self.runner.cleanup()

    def test_empty(self):
        self.add_mock_s3_data({'walrus': {}})
        assert_equal(self.runner._find_probable_cause_of_failure([1]), None)

    def test_python_exception(self):
        self.add_mock_s3_data({'walrus': {
            ATTEMPT_0_DIR + 'stderr':
                GARBAGE + TRACEBACK_START + PY_EXCEPTION + GARBAGE,
            ATTEMPT_0_DIR + 'syslog':
                make_input_uri_line(BUCKET_URI + 'input.gz'),
        }})
        assert_equal(self.runner._find_probable_cause_of_failure([1]),
                     {'lines': list(StringIO(PY_EXCEPTION)),
                      's3_log_file_uri':
                          BUCKET_URI + ATTEMPT_0_DIR + 'stderr',
                      'input_uri': BUCKET_URI + 'input.gz'})

    def test_python_exception_without_input_uri(self):
        self.add_mock_s3_data({'walrus': {
            ATTEMPT_0_DIR + 'stderr': (
                GARBAGE + TRACEBACK_START + PY_EXCEPTION + GARBAGE),
        }})
        assert_equal(self.runner._find_probable_cause_of_failure([1]),
                     {'lines': list(StringIO(PY_EXCEPTION)),
                      's3_log_file_uri':
                          BUCKET_URI + ATTEMPT_0_DIR + 'stderr',
                      'input_uri': None})

    def test_java_exception(self):
        self.add_mock_s3_data({'walrus': {
            ATTEMPT_0_DIR + 'stderr': GARBAGE + GARBAGE,
            ATTEMPT_0_DIR + 'syslog':
                make_input_uri_line(BUCKET_URI + 'input.gz') +
                GARBAGE +
                CHILD_ERR_LINE +
                JAVA_STACK_TRACE +
                GARBAGE,
        }})
        assert_equal(self.runner._find_probable_cause_of_failure([1]),
                     {'lines': list(StringIO(JAVA_STACK_TRACE)),
                      's3_log_file_uri':
                          BUCKET_URI + ATTEMPT_0_DIR + 'syslog',
                      'input_uri': BUCKET_URI + 'input.gz'})

    def test_java_exception_without_input_uri(self):
        self.add_mock_s3_data({'walrus': {
            ATTEMPT_0_DIR + 'syslog':
                CHILD_ERR_LINE +
                JAVA_STACK_TRACE +
                GARBAGE,
        }})
        assert_equal(self.runner._find_probable_cause_of_failure([1]),
                     {'lines': list(StringIO(JAVA_STACK_TRACE)),
                      's3_log_file_uri':
                          BUCKET_URI + ATTEMPT_0_DIR + 'syslog',
                      'input_uri': None})

    def test_hadoop_streaming_error(self):
        # we should look only at step 2 since the errors in the other
        # steps are boring
        #
        # we include input.gz just to test that we DON'T check for it
        self.add_mock_s3_data({'walrus': {
            LOG_DIR + 'steps/1/syslog':
                GARBAGE +
                HADOOP_ERR_LINE_PREFIX + BORING_HADOOP_ERROR + '\n',
            LOG_DIR + 'steps/2/syslog':
                GARBAGE +
                make_input_uri_line(BUCKET_URI + 'input.gz') +
                HADOOP_ERR_LINE_PREFIX + USEFUL_HADOOP_ERROR + '\n',
            LOG_DIR + 'steps/3/syslog':
                HADOOP_ERR_LINE_PREFIX + BORING_HADOOP_ERROR + '\n',
        }})

        assert_equal(self.runner._find_probable_cause_of_failure([1, 2, 3]),
                     {'lines': [USEFUL_HADOOP_ERROR + '\n'],
                      's3_log_file_uri':
                          BUCKET_URI + LOG_DIR + 'steps/2/syslog',
                      'input_uri': None})

    def test_later_task_attempt_steps_win(self):
        # should look at later steps first
        self.add_mock_s3_data({'walrus': {
            TASK_ATTEMPTS_DIR + 'attempt_201007271720_0001_r_000126_3/stderr':
                TRACEBACK_START + PY_EXCEPTION,
#.........这里部分代码省略.........
开发者ID:boursier,项目名称:mrjob,代码行数:103,代码来源:emr_test.py


注:本文中的mrjob.emr.EMRJobRunner.cleanup方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。