当前位置: 首页>>代码示例>>Python>>正文


Python _JOB_KEY_RE.match函数代码示例

本文整理汇总了Python中mrjob.tools.emr.audit_usage._JOB_KEY_RE.match函数的典型用法代码示例。如果您正苦于以下问题:Python match函数的具体用法?Python match怎么用?Python match使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了match函数的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_owner_and_label_switches

    def test_owner_and_label_switches(self):
        runner_opts = ['--no-conf', '--owner=ads', '--label=ads_chain']
        runner = MRTwoStepJob(runner_opts).make_runner()
        match = _JOB_KEY_RE.match(runner.get_job_key())

        self.assertEqual(match.group(1), 'ads_chain')
        self.assertEqual(match.group(2), 'ads')
开发者ID:anirudhreddy92,项目名称:mrjob,代码行数:7,代码来源:test_runner.py

示例2: test_owner_and_label_kwargs

    def test_owner_and_label_kwargs(self):
        runner = InlineMRJobRunner(conf_paths=[],
                                   owner='ads', label='ads_chain')
        match = _JOB_KEY_RE.match(runner.get_job_key())

        self.assertEqual(match.group(1), 'ads_chain')
        self.assertEqual(match.group(2), 'ads')
开发者ID:anirudhreddy92,项目名称:mrjob,代码行数:7,代码来源:test_runner.py

示例3: test_empty_no_user

    def test_empty_no_user(self):
        self.getuser_should_fail = True
        runner = InlineMRJobRunner(conf_paths=[])
        match = _JOB_KEY_RE.match(runner.get_job_key())

        self.assertEqual(match.group(1), 'no_script')
        self.assertEqual(match.group(2), 'no_user')
开发者ID:anirudhreddy92,项目名称:mrjob,代码行数:7,代码来源:test_runner.py

示例4: test_auto_owner

    def test_auto_owner(self):
        os.environ['USER'] = 'mcp'
        runner = InlineMRJobRunner(conf_paths=[])
        match = _JOB_KEY_RE.match(runner.get_job_key())

        self.assertEqual(match.group(1), 'no_script')
        self.assertEqual(match.group(2), 'mcp')
开发者ID:anirudhreddy92,项目名称:mrjob,代码行数:7,代码来源:test_runner.py

示例5: test_auto_everything

    def test_auto_everything(self):
        test_start = datetime.datetime.utcnow()

        os.environ["USER"] = "mcp"
        runner = MRTwoStepJob(["--no-conf"]).make_runner()
        match = _JOB_KEY_RE.match(runner.get_job_key())

        self.assertEqual(match.group(1), "mr_two_step_job")
        self.assertEqual(match.group(2), "mcp")

        job_start = datetime.datetime.strptime(match.group(3) + match.group(4), "%Y%m%d%H%M%S")
        job_start = job_start.replace(microsecond=int(match.group(5)))
        self.assertGreaterEqual(job_start, test_start)
        self.assertLessEqual(job_start - test_start, datetime.timedelta(seconds=5))
开发者ID:irskep,项目名称:mrjob,代码行数:14,代码来源:test_runner.py

示例6: test_auto_label

    def test_auto_label(self):
        runner = MRTwoStepJob(['--no-conf']).make_runner()
        match = _JOB_KEY_RE.match(runner.get_job_key())

        self.assertEqual(match.group(1), 'mr_two_step_job')
        self.assertEqual(match.group(2), getpass.getuser())
开发者ID:anirudhreddy92,项目名称:mrjob,代码行数:6,代码来源:test_runner.py

示例7: test_empty

    def test_empty(self):
        runner = InlineMRJobRunner(conf_paths=[])
        match = _JOB_KEY_RE.match(runner.get_job_key())

        self.assertEqual(match.group(1), 'no_script')
        self.assertEqual(match.group(2), getpass.getuser())
开发者ID:anirudhreddy92,项目名称:mrjob,代码行数:6,代码来源:test_runner.py

示例8: test_end_to_end

    def test_end_to_end(self):
        # read from STDIN, a local file, and a remote file
        stdin = BytesIO(b'foo\nbar\n')

        local_input_path = os.path.join(self.tmp_dir, 'input')
        with open(local_input_path, 'wb') as local_input_file:
            local_input_file.write(b'bar\nqux\n')

        remote_input_path = 'gs://walrus/data/foo'
        self.put_gcs_multi({
            remote_input_path: b'foo\n'
        })

        mr_job = MRHadoopFormatJob(['-r', 'dataproc', '-v',
                                    '-', local_input_path, remote_input_path,
                                    '--jobconf', 'x=y'])
        mr_job.sandbox(stdin=stdin)

        results = []

        gcs_buckets_snapshot = copy.deepcopy(self._gcs_client._cache_buckets)
        gcs_objects_snapshot = copy.deepcopy(self._gcs_client._cache_objects)

        fake_gcs_output = [
            b'1\t"qux"\n2\t"bar"\n',
            b'2\t"foo"\n5\tnull\n'
        ]

        with mr_job.make_runner() as runner:
            self.assertIsInstance(runner, DataprocJobRunner)

            # make sure that initializing the runner doesn't affect GCS
            # (Issue #50)
            self.assertEqual(gcs_buckets_snapshot, self._gcs_client._cache_buckets)
            self.assertEqual(gcs_objects_snapshot, self._gcs_client._cache_objects)

            runner.run()

            # setup fake output
            self.put_job_output_parts(runner, fake_gcs_output)

            for line in runner.stream_output():
                key, value = mr_job.parse_output_line(line)
                results.append((key, value))

            local_tmp_dir = runner._get_local_tmp_dir()
            # make sure cleanup hasn't happened yet
            self.assertTrue(os.path.exists(local_tmp_dir))
            self.assertTrue(any(runner.fs.ls(runner.get_output_dir())))

            name_match = _JOB_KEY_RE.match(runner._job_key)
            self.assertEqual(name_match.group(1), 'mr_hadoop_format_job')
            self.assertEqual(name_match.group(2), getpass.getuser())

            # make sure our input and output formats are attached to
            # the correct steps
            jobs_list = runner.api_client.jobs().list(projectId=runner._gcp_project, region=_DATAPROC_API_REGION).execute()
            jobs = jobs_list['items']

            step_0_args = jobs[0]['hadoopJob']['args']
            step_1_args = jobs[1]['hadoopJob']['args']

            self.assertIn('-inputformat', step_0_args)
            self.assertNotIn('-outputformat', step_0_args)
            self.assertNotIn('-inputformat', step_1_args)
            self.assertIn('-outputformat', step_1_args)

            # make sure jobconf got through
            self.assertIn('-D', step_0_args)
            self.assertIn('x=y', step_0_args)
            self.assertIn('-D', step_1_args)
            # job overrides jobconf in step 1
            self.assertIn('x=z', step_1_args)

            # make sure mrjob.tar.gz is created and uploaded as
            # a bootstrap file
            self.assertTrue(os.path.exists(runner._mrjob_tar_gz_path))
            self.assertIn(runner._mrjob_tar_gz_path,
                          runner._upload_mgr.path_to_uri())
            self.assertIn(runner._mrjob_tar_gz_path,
                          runner._bootstrap_dir_mgr.paths())

            cluster_id = runner.get_cluster_id()

        self.assertEqual(sorted(results),
                         [(1, 'qux'), (2, 'bar'), (2, 'foo'), (5, None)])

        # make sure cleanup happens
        self.assertFalse(os.path.exists(local_tmp_dir))

        # we don't clean-up the output dir as we're relying on lifecycle management
        output_dirs = list(runner.fs.ls(runner.get_output_dir()))
        self.assertEqual(len(fake_gcs_output), len(output_dirs))

        # job should get terminated
        cluster = self._dataproc_client._cache_clusters[_TEST_PROJECT][cluster_id]
        cluster_state = self._dataproc_client.get_state(cluster)
        self.assertEqual(cluster_state, 'DELETING')
开发者ID:Jeremyfanfan,项目名称:mrjob,代码行数:98,代码来源:test_dataproc.py


注:本文中的mrjob.tools.emr.audit_usage._JOB_KEY_RE.match函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。