本文整理汇总了Python中mrjob.hadoop.HadoopJobRunner._streaming_args方法的典型用法代码示例。如果您正苦于以下问题:Python HadoopJobRunner._streaming_args方法的具体用法?Python HadoopJobRunner._streaming_args怎么用?Python HadoopJobRunner._streaming_args使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mrjob.hadoop.HadoopJobRunner
的用法示例。
在下文中一共展示了HadoopJobRunner._streaming_args方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: StreamingArgsTestCase
# 需要导入模块: from mrjob.hadoop import HadoopJobRunner [as 别名]
# 或者: from mrjob.hadoop.HadoopJobRunner import _streaming_args [as 别名]
class StreamingArgsTestCase(EmptyMrjobConfTestCase):
def setUp(self):
super(StreamingArgsTestCase, self).setUp()
self.runner = HadoopJobRunner(
hadoop_bin='hadoop', hadoop_streaming_jar='streaming.jar')
self.runner._hadoop_version='0.20.204'
self.simple_patch(self.runner, '_new_upload_args',
return_value=['new_upload_args'])
self.simple_patch(self.runner, '_old_upload_args',
return_value=['old_upload_args'])
self.simple_patch(self.runner, '_hadoop_conf_args',
return_value=['hadoop_conf_args'])
self.simple_patch(self.runner, '_hdfs_step_input_files',
return_value=['hdfs_step_input_files'])
self.simple_patch(self.runner, '_hdfs_step_output_dir',
return_value='hdfs_step_output_dir')
self.runner._script = {'name': 'my_job.py'}
self._new_basic_args = [
'hadoop', 'jar', 'streaming.jar',
'new_upload_args', 'hadoop_conf_args',
'-input', 'hdfs_step_input_files',
'-output', 'hdfs_step_output_dir']
self._old_basic_args = [
'hadoop', 'jar', 'streaming.jar',
'hadoop_conf_args',
'-input', 'hdfs_step_input_files',
'-output', 'hdfs_step_output_dir',
'old_upload_args']
def simple_patch(self, obj, attr, side_effect=None, return_value=None):
patcher = patch.object(obj, attr, side_effect=side_effect,
return_value=return_value)
patcher.start()
self.addCleanup(patcher.stop)
def _assert_streaming_step(self, step, args, step_num=0, num_steps=1):
self.assertEqual(
self.runner._streaming_args(step, step_num, num_steps),
self._new_basic_args + args)
def _assert_streaming_step_old(self, step, args, step_num=0, num_steps=1):
self.runner._hadoop_version = '0.18'
self.assertEqual(
self._old_basic_args + args,
self.runner._streaming_args(step, step_num, num_steps))
def test_basic_mapper(self):
self._assert_streaming_step(
{
'type': 'streaming',
'mapper': {
'type': 'script',
},
},
['-mapper', 'python my_job.py --step-num=0 --mapper',
'-jobconf', 'mapred.reduce.tasks=0'])
def test_basic_reducer(self):
self._assert_streaming_step(
{
'type': 'streaming',
'reducer': {
'type': 'script',
},
},
['-mapper', 'cat',
'-reducer', 'python my_job.py --step-num=0 --reducer'])
def test_pre_filters(self):
self._assert_streaming_step(
{
'type': 'streaming',
'mapper': {
'type': 'script',
'pre_filter': 'grep anything',
},
'combiner': {
'type': 'script',
'pre_filter': 'grep nothing',
},
'reducer': {
'type': 'script',
'pre_filter': 'grep something',
},
},
["-mapper",
"bash -c 'grep anything | python my_job.py --step-num=0"
" --mapper'",
"-combiner",
"bash -c 'grep nothing | python my_job.py --step-num=0"
" --combiner'",
"-reducer",
"bash -c 'grep something | python my_job.py --step-num=0"
" --reducer'"])
def test_combiner_018(self):
self._assert_streaming_step_old(
#.........这里部分代码省略.........
示例2: StreamingArgsTestCase
# 需要导入模块: from mrjob.hadoop import HadoopJobRunner [as 别名]
# 或者: from mrjob.hadoop.HadoopJobRunner import _streaming_args [as 别名]
class StreamingArgsTestCase(EmptyMrjobConfTestCase):
MRJOB_CONF_CONTENTS = {"runners": {"hadoop": {"hadoop_home": "kansas", "hadoop_streaming_jar": "binks.jar.jar"}}}
def setUp(self):
super(StreamingArgsTestCase, self).setUp()
self.runner = HadoopJobRunner(
hadoop_bin="hadoop", hadoop_streaming_jar="streaming.jar", mr_job_script="my_job.py", stdin=StringIO()
)
self.runner._add_job_files_for_upload()
self.runner._hadoop_version = "0.20.204"
self.simple_patch(self.runner, "_new_upload_args", return_value=["new_upload_args"])
self.simple_patch(self.runner, "_old_upload_args", return_value=["old_upload_args"])
self.simple_patch(self.runner, "_hadoop_conf_args", return_value=["hadoop_conf_args"])
self.simple_patch(self.runner, "_hdfs_step_input_files", return_value=["hdfs_step_input_files"])
self.simple_patch(self.runner, "_hdfs_step_output_dir", return_value="hdfs_step_output_dir")
self.runner._script_path = "my_job.py"
self._new_basic_args = [
"hadoop",
"jar",
"streaming.jar",
"new_upload_args",
"hadoop_conf_args",
"-input",
"hdfs_step_input_files",
"-output",
"hdfs_step_output_dir",
]
self._old_basic_args = [
"hadoop",
"jar",
"streaming.jar",
"hadoop_conf_args",
"-input",
"hdfs_step_input_files",
"-output",
"hdfs_step_output_dir",
"old_upload_args",
]
def simple_patch(self, obj, attr, side_effect=None, return_value=None):
patcher = patch.object(obj, attr, side_effect=side_effect, return_value=return_value)
patcher.start()
self.addCleanup(patcher.stop)
def _assert_streaming_step(self, step, args, step_num=0, num_steps=1):
self.assertEqual(self.runner._streaming_args(step, step_num, num_steps), self._new_basic_args + args)
def _assert_streaming_step_old(self, step, args, step_num=0, num_steps=1):
self.runner._hadoop_version = "0.18"
self.assertEqual(self._old_basic_args + args, self.runner._streaming_args(step, step_num, num_steps))
def test_basic_mapper(self):
self._assert_streaming_step(
{"type": "streaming", "mapper": {"type": "script"}},
["-mapper", "python my_job.py --step-num=0 --mapper", "-jobconf", "mapred.reduce.tasks=0"],
)
def test_basic_reducer(self):
self._assert_streaming_step(
{"type": "streaming", "reducer": {"type": "script"}},
["-mapper", "cat", "-reducer", "python my_job.py --step-num=0 --reducer"],
)
def test_pre_filters(self):
self._assert_streaming_step(
{
"type": "streaming",
"mapper": {"type": "script", "pre_filter": "grep anything"},
"combiner": {"type": "script", "pre_filter": "grep nothing"},
"reducer": {"type": "script", "pre_filter": "grep something"},
},
[
"-mapper",
"bash -c 'grep anything | python my_job.py --step-num=0" " --mapper'",
"-combiner",
"bash -c 'grep nothing | python my_job.py --step-num=0" " --combiner'",
"-reducer",
"bash -c 'grep something | python my_job.py --step-num=0" " --reducer'",
],
)
def test_combiner_018(self):
self._assert_streaming_step_old(
{"type": "streaming", "mapper": {"type": "command", "command": "cat"}, "combiner": {"type": "script"}},
[
"-mapper",
"bash -c 'cat | sort | python my_job.py --step-num=0" " --combiner'",
"-jobconf",
"mapred.reduce.tasks=0",
],
)
def test_pre_filters_018(self):
self._assert_streaming_step_old(
{
"type": "streaming",
#.........这里部分代码省略.........