当前位置: 首页>>代码示例>>Python>>正文


Python launch.MRJobLauncher类代码示例

本文整理汇总了Python中mrjob.launch.MRJobLauncher的典型用法代码示例。如果您正苦于以下问题:Python MRJobLauncher类的具体用法?Python MRJobLauncher怎么用?Python MRJobLauncher使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了MRJobLauncher类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

def main():
    parser = ArgumentParser()
    parser.add_argument('output', help='ZipNum Cluster Output directory')
    parser.add_argument('inputs', nargs='+', help='CDX Input glob eg: /cdx/*.cdx.gz')
    parser.add_argument('-s', '--shards', default=10, type=int,
                        help='Number of ZipNum Cluster shards to create')

    parser.add_argument('-l', '--numlines', default=3000, type=int,
                        help='Number of lines per gzip block (default 3000)')

    parser.add_argument('-p', '--parallel', action='store_true',
                        help='Run in parllel (multiple maps/reducer processes)')

    r = parser.parse_args()

    MRJobLauncher.set_up_logging(quiet=False,
                                 verbose=False,
                                 stream=sys.stderr)

    log.setLevel(logging.INFO)
    compat_log = logging.getLogger('mrjob.compat')
    compat_log.setLevel(logging.ERROR)

    run_job(r.inputs, r.output, r.shards, r.parallel, r.numlines)
    build_summary_and_loc(r.output)
开发者ID:commoncrawl,项目名称:webarchive-indexing,代码行数:25,代码来源:build_local_zipnum.py

示例2: test_hadoop_runner

 def test_hadoop_runner(self):
     # you can't instantiate a HadoopJobRunner without Hadoop installed
     launcher = MRJobLauncher(args=["--no-conf", "-r", "hadoop", "", "--hadoop-streaming-jar", "HUNNY"])
     with no_handlers_for_logger("mrjob.runner"):
         with patch.dict(os.environ, {"HADOOP_HOME": "100-Acre Wood"}):
             with launcher.make_runner() as runner:
                 self.assertIsInstance(runner, HadoopJobRunner)
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:7,代码来源:test_launch.py

示例3: test_hadoop_runner

 def test_hadoop_runner(self):
     # you can't instantiate a HadoopJobRunner without Hadoop installed
     launcher = MRJobLauncher(args=['--no-conf', '-r', 'hadoop', '',
                                    '--hadoop-streaming-jar', 'HUNNY'])
     with no_handlers_for_logger('mrjob.runner'):
         with patch.dict(os.environ, {'HADOOP_HOME': '100-Acre Wood'}):
             with launcher.make_runner() as runner:
                 self.assertIsInstance(runner, HadoopJobRunner)
开发者ID:Asana,项目名称:mrjob,代码行数:8,代码来源:test_launch.py

示例4: test_no_output

 def test_no_output(self):
     launcher = MRJobLauncher(args=['--no-conf', '--no-output', ''])
     launcher.sandbox()
     with patch.object(launcher, 'make_runner') as m_make_runner:
         runner = RunnerStub()
         _mock_context_mgr(m_make_runner, runner)
         runner.stream_output.return_value = ['a line']
         launcher.run_job()
         self.assertEqual(launcher.stdout.getvalue(), '')
         self.assertEqual(launcher.stderr.getvalue(), '')
开发者ID:DrMavenRebe,项目名称:mrjob,代码行数:10,代码来源:test_launch.py

示例5: test_no_output

 def test_no_output(self):
     launcher = MRJobLauncher(args=["--no-conf", "--no-output", ""])
     launcher.sandbox()
     with patch.object(launcher, "make_runner") as m_make_runner:
         runner = Mock()
         _mock_context_mgr(m_make_runner, runner)
         runner.stream_output.return_value = ["a line"]
         launcher.run_job()
         self.assertEqual(launcher.stdout.getvalue(), "")
         self.assertEqual(launcher.stderr.getvalue(), "")
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:10,代码来源:test_launch.py

示例6: test_no_file_args_required

    def test_no_file_args_required(self):
        words1 = self.makefile('words1', b'kit and caboodle\n')
        words2 = self.makefile('words2', b'baubles\nbangles and beads\n')

        job = MRJobLauncher(
            args=['-r', 'local', tests.sr_wc.__file__, words1, words2])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

            lines = list(to_lines(runner.cat_output()))
            self.assertEqual(len(lines), 1)
            self.assertEqual(int(lines[0]), 7)
开发者ID:Affirm,项目名称:mrjob,代码行数:14,代码来源:test_local.py

示例7: _make_launcher

    def _make_launcher(self, *args):
        """Make a launcher, add a mock runner (``launcher.mock_runner``), and
        set it up so that ``launcher.make_runner().__enter__()`` returns
        ``launcher.mock_runner()``.
        """
        launcher = MRJobLauncher(args=['--no-conf', ''] + list(args))
        launcher.sandbox()

        launcher.mock_runner = Mock()
        launcher.mock_runner.stream_output.return_value = [b'a line\n']

        launcher.make_runner = MagicMock()  # include __enter__
        launcher.make_runner.return_value.__enter__.return_value = (
            launcher.mock_runner)

        return launcher
开发者ID:etiennebatise,项目名称:mrjob,代码行数:16,代码来源:test_launch.py

示例8: test_emr_runner

 def test_emr_runner(self):
     launcher = MRJobLauncher(args=['--no-conf', '-r', 'emr', ''])
     with no_handlers_for_logger('mrjob.runner'):
         with launcher.make_runner() as runner:
             self.assertIsInstance(runner, EMRJobRunner)
开发者ID:DrMavenRebe,项目名称:mrjob,代码行数:5,代码来源:test_launch.py

示例9: test_emr_runner

 def test_emr_runner(self):
     launcher = MRJobLauncher(args=["--no-conf", "-r", "emr", ""])
     with no_handlers_for_logger("mrjob"):
         with patch_fs_s3():
             with launcher.make_runner() as runner:
                 self.assertIsInstance(runner, EMRJobRunner)
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:6,代码来源:test_launch.py

示例10: test_local_runner

 def test_local_runner(self):
     launcher = MRJobLauncher(args=["--no-conf", "-r", "local", ""])
     with no_handlers_for_logger("mrjob.runner"):
         with launcher.make_runner() as runner:
             self.assertIsInstance(runner, LocalMRJobRunner)
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:5,代码来源:test_launch.py


注:本文中的mrjob.launch.MRJobLauncher类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。