本文整理汇总了Python中mrjob.hadoop.HadoopJobRunner._streaming_args_for_step方法的典型用法代码示例。如果您正苦于以下问题:Python HadoopJobRunner._streaming_args_for_step方法的具体用法?Python HadoopJobRunner._streaming_args_for_step怎么用?Python HadoopJobRunner._streaming_args_for_step使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mrjob.hadoop.HadoopJobRunner
的用法示例。
在下文中一共展示了HadoopJobRunner._streaming_args_for_step方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: StreamingArgsTestCase
# 需要导入模块: from mrjob.hadoop import HadoopJobRunner [as 别名]
# 或者: from mrjob.hadoop.HadoopJobRunner import _streaming_args_for_step [as 别名]
class StreamingArgsTestCase(EmptyMrjobConfTestCase):
MRJOB_CONF_CONTENTS = {'runners': {'hadoop': {
'hadoop_home': 'kansas',
'hadoop_streaming_jar': 'binks.jar.jar',
}}}
def setUp(self):
super(StreamingArgsTestCase, self).setUp()
self.runner = HadoopJobRunner(
hadoop_bin='hadoop', hadoop_streaming_jar='streaming.jar',
mr_job_script='my_job.py', stdin=StringIO())
self.runner._add_job_files_for_upload()
self.runner._hadoop_version='0.20.204'
self.simple_patch(self.runner, '_new_upload_args',
return_value=['new_upload_args'])
self.simple_patch(self.runner, '_old_upload_args',
return_value=['old_upload_args'])
self.simple_patch(self.runner, '_hadoop_args_for_step',
return_value=['hadoop_args_for_step'])
self.simple_patch(self.runner, '_hdfs_step_input_files',
return_value=['hdfs_step_input_files'])
self.simple_patch(self.runner, '_hdfs_step_output_dir',
return_value='hdfs_step_output_dir')
self.runner._script_path = 'my_job.py'
self._new_basic_args = [
'hadoop', 'jar', 'streaming.jar',
'new_upload_args', 'hadoop_args_for_step',
'-input', 'hdfs_step_input_files',
'-output', 'hdfs_step_output_dir']
self._old_basic_args = [
'hadoop', 'jar', 'streaming.jar',
'hadoop_args_for_step',
'-input', 'hdfs_step_input_files',
'-output', 'hdfs_step_output_dir',
'old_upload_args']
def simple_patch(self, obj, attr, side_effect=None, return_value=None):
patcher = patch.object(obj, attr, side_effect=side_effect,
return_value=return_value)
patcher.start()
self.addCleanup(patcher.stop)
def _assert_streaming_step(self, step, args):
self.runner._steps = [step]
self.assertEqual(
self.runner._streaming_args_for_step(0),
self._new_basic_args + args)
def _assert_streaming_step_old(self, step, args):
self.runner._hadoop_version = '0.18'
self.runner._steps = [step]
self.assertEqual(
self.runner._streaming_args_for_step(0),
self._old_basic_args + args)
def test_basic_mapper(self):
self._assert_streaming_step(
{
'type': 'streaming',
'mapper': {
'type': 'script',
},
},
['-mapper', 'python my_job.py --step-num=0 --mapper',
'-jobconf', 'mapred.reduce.tasks=0'])
def test_basic_reducer(self):
self._assert_streaming_step(
{
'type': 'streaming',
'reducer': {
'type': 'script',
},
},
['-mapper', 'cat',
'-reducer', 'python my_job.py --step-num=0 --reducer'])
def test_pre_filters(self):
self._assert_streaming_step(
{
'type': 'streaming',
'mapper': {
'type': 'script',
'pre_filter': 'grep anything',
},
'combiner': {
'type': 'script',
'pre_filter': 'grep nothing',
},
'reducer': {
'type': 'script',
'pre_filter': 'grep something',
},
},
["-mapper",
"bash -c 'grep anything | python my_job.py --step-num=0"
#.........这里部分代码省略.........