本文整理汇总了Python中mrjob.emr.EMRJobRunner.make_persistent_job_flow方法的典型用法代码示例。如果您正苦于以下问题:Python EMRJobRunner.make_persistent_job_flow方法的具体用法?Python EMRJobRunner.make_persistent_job_flow怎么用?Python EMRJobRunner.make_persistent_job_flow使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mrjob.emr.EMRJobRunner
的用法示例。
在下文中一共展示了EMRJobRunner.make_persistent_job_flow方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import make_persistent_job_flow [as 别名]
def main():
# parser command-line args
option_parser = make_option_parser()
options, args = option_parser.parse_args()
if args:
option_parser.error('takes no arguments')
# set up logging
if not options.quiet:
log_to_stream(name='mrjob', debug=options.verbose)
# create the persistent job
runner_kwargs = {
'conf_path': options.conf_path,
'ec2_instance_type': options.ec2_instance_type,
'ec2_master_instance_type': options.ec2_master_instance_type,
'ec2_slave_instance_type': options.ec2_slave_instance_type,
'label': options.label,
'num_ec2_instances': options.num_ec2_instances,
'owner': options.owner,
}
runner = EMRJobRunner(**runner_kwargs)
emr_job_flow_id = runner.make_persistent_job_flow()
print emr_job_flow_id
示例2: test_create_scratch_uri
# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import make_persistent_job_flow [as 别名]
def test_create_scratch_uri(self):
# "walrus" bucket will be ignored; it doesn't start with "mrjob-"
self.add_mock_s3_data({'walrus': {}, 'zebra': {}})
runner = EMRJobRunner(conf_path=False, s3_sync_wait_time=0.01)
# bucket name should be mrjob- plus 16 random hex digits
s3_scratch_uri = runner._opts['s3_scratch_uri']
assert_equal(s3_scratch_uri[:11], 's3://mrjob-')
assert_equal(s3_scratch_uri[27:], '/tmp/')
# bucket shouldn't actually exist yet
scratch_bucket, _ = parse_s3_uri(s3_scratch_uri)
assert_not_in(scratch_bucket, self.mock_s3_fs.keys())
# need to do something to ensure that the bucket actually gets
# created. let's launch a (mock) job flow
jfid = runner.make_persistent_job_flow()
assert_in(scratch_bucket, self.mock_s3_fs.keys())
runner.make_emr_conn().terminate_jobflow(jfid)
# once our scratch bucket is created, we should re-use it
runner2 = EMRJobRunner(conf_path=False)
assert_equal(runner2._opts['s3_scratch_uri'], s3_scratch_uri)
s3_scratch_uri = runner._opts['s3_scratch_uri']
示例3: test_local_bootstrap_action
# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import make_persistent_job_flow [as 别名]
def test_local_bootstrap_action(self):
# make sure that local bootstrap action scripts get uploaded to S3
action_path = os.path.join(self.tmp_dir, 'apt-install.sh')
with open(action_path, 'w') as f:
f.write('for $pkg in [email protected]; do sudo apt-get install $pkg; done\n')
bootstrap_actions = [
action_path + ' python-scipy mysql-server']
runner = EMRJobRunner(conf_path=False,
bootstrap_actions=bootstrap_actions,
s3_sync_wait_time=0.01)
job_flow_id = runner.make_persistent_job_flow()
emr_conn = runner.make_emr_conn()
job_flow = emr_conn.describe_jobflow(job_flow_id)
actions = job_flow.bootstrapactions
assert_equal(len(actions), 2)
assert actions[0].path.startswith('s3://mrjob-')
assert actions[0].path.endswith('/apt-install.sh')
assert_equal(actions[0].name, 'apt-install.sh')
assert_equal(actions[0].args, ['python-scipy', 'mysql-server'])
# check for master boostrap script
assert actions[1].path.startswith('s3://mrjob-')
assert actions[1].path.endswith('b.py')
assert_equal(actions[1].args, [])
assert_equal(actions[1].name, 'master')
# make sure master bootstrap script is on S3
assert runner.path_exists(actions[1].path)
示例4: main
# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import make_persistent_job_flow [as 别名]
def main():
# parser command-line args
option_parser = make_option_parser()
options, args = option_parser.parse_args()
if args:
option_parser.error('takes no arguments')
MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
# create the persistent job
runner_kwargs = options.__dict__.copy()
del runner_kwargs['quiet']
del runner_kwargs['verbose']
runner = EMRJobRunner(**runner_kwargs)
emr_job_flow_id = runner.make_persistent_job_flow()
print emr_job_flow_id
示例5: main
# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import make_persistent_job_flow [as 别名]
def main():
# parser command-line args
option_parser = make_option_parser()
options, args = option_parser.parse_args()
if args:
option_parser.error('takes no arguments')
# set up logging
if not options.quiet:
log_to_stream(name='mrjob', debug=options.verbose)
# create the persistent job
runner_kwargs = options.__dict__.copy()
del runner_kwargs['quiet']
del runner_kwargs['verbose']
runner = EMRJobRunner(**runner_kwargs)
emr_job_flow_id = runner.make_persistent_job_flow()
print emr_job_flow_id
示例6: test_bootstrap_actions_get_added
# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import make_persistent_job_flow [as 别名]
def test_bootstrap_actions_get_added(self):
bootstrap_actions = [
's3://elasticmapreduce/bootstrap-actions/configure-hadoop -m,mapred.tasktracker.map.tasks.maximum=1',
's3://foo/bar#xyzzy', # use alternate name for script
]
runner = EMRJobRunner(conf_path=False,
bootstrap_actions=bootstrap_actions,
s3_sync_wait_time=0.01)
job_flow_id = runner.make_persistent_job_flow()
emr_conn = runner.make_emr_conn()
job_flow = emr_conn.describe_jobflow(job_flow_id)
actions = job_flow.bootstrapactions
assert_equal(len(actions), 3)
assert_equal(
actions[0].path,
's3://elasticmapreduce/bootstrap-actions/configure-hadoop')
assert_equal(
actions[0].args,
['-m,mapred.tasktracker.map.tasks.maximum=1'])
assert_equal(actions[0].name, 'configure-hadoop')
assert_equal(actions[1].path, 's3://foo/bar')
assert_equal(actions[1].args, [])
assert_equal(actions[1].name, 'xyzzy')
# check for master bootstrap script
assert actions[2].path.startswith('s3://mrjob-')
assert actions[2].path.endswith('b.py')
assert_equal(actions[2].args, [])
assert_equal(actions[2].name, 'master')
# make sure master bootstrap script is on S3
assert runner.path_exists(actions[2].path)
示例7: main
# 需要导入模块: from mrjob.emr import EMRJobRunner [as 别名]
# 或者: from mrjob.emr.EMRJobRunner import make_persistent_job_flow [as 别名]
def main(args=None):
"""Run the create_job_flow tool with arguments from ``sys.argv`` and
printing to ``sys.stdout``."""
runner = EMRJobRunner(**runner_kwargs(args))
emr_job_flow_id = runner.make_persistent_job_flow()
print emr_job_flow_id