本文整理汇总了Python中boto.emr.connection.EmrConnection.terminate_jobflow方法的典型用法代码示例。如果您正苦于以下问题:Python EmrConnection.terminate_jobflow方法的具体用法?Python EmrConnection.terminate_jobflow怎么用?Python EmrConnection.terminate_jobflow使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类boto.emr.connection.EmrConnection
的用法示例。
在下文中一共展示了EmrConnection.terminate_jobflow方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: terminate
# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import terminate_jobflow [as 别名]
def terminate(cluster_id):
try:
emr_connection = EmrConnection()
emr_connection.set_termination_protection(cluster_id, False)
emr_connection.terminate_jobflow(cluster_id)
return True
except Exception, e:
print e
return False
示例2: creating_a_connection
# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import terminate_jobflow [as 别名]
class EMR:
def creating_a_connection(self):
#Creating a connection
from boto.emr.connection import EmrConnection
self.conn = EmrConnection('', '')
def creating_streaming_job(self):
#Creating Streaming JobFlow Steps
from boto.emr.step import StreamingStep
self.step = StreamingStep(name='my bigdata task',
mapper='s3n://eth-src/raw_to_stations.py',
#mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
reducer='s3n://eth-src/stations_to_features.py',
#reducer='aggregate',
input='s3n://eth-input/2007.csv',
#input='s3n://elasticmapreduce/samples/wordcount/input',
output='s3n://eth-middle/2007')
def creating_jobflows(self):
#Creating JobFlows
#import boto.emr
#self.conn = boto.emr.connect_to_region('eu-west-1')
job_id = self.conn.run_jobflow(name='My jobflow',
log_uri='s3://eth-log/jobflow_logs',
master_instance_type='m3.xlarge',
slave_instance_type='m1.large',
num_instances=2,
steps=[self.step],
ami_version='3.3.1'
)
status = self.conn.describe_jobflow(job_id)
status.state
def terminating_jobflows(self, job_id):
#Terminating JobFlows
#self.conn = boto.emr.connect_to_region('eu-west-1')
self.conn.terminate_jobflow(job_id)
示例3: __init__
# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import terminate_jobflow [as 别名]
#.........这里部分代码省略.........
if contents.startswith('FinalRank'):
self._is_done = True # cache result
break
return self._is_done
def is_alive(self):
"""
Checks whether the jobflow has completed, failed, or been
terminated.
Special notes:
WARNING! This method should only be called **after**
is_done() in order to be able to distinguish between the
cases where the map-reduce job has outputted 'FinalRank'
on its final iteration and has a 'COMPLETED' state.
"""
jobflow = self.describe()
if jobflow.state in ('COMPLETED', 'FAILED', 'TERMINATED'):
return False
return True
def terminate(self):
"""
Terminates a running map-reduce job.
"""
if not self.job_id:
raise RankmaniacError('No job is running.')
self._emr_conn.terminate_jobflow(self.job_id)
self.job_id = None
self._reset()
def download(self, outdir='results'):
"""
Downloads the results from Amazon S3 to the local directory.
Keyword arguments:
outdir <str> the base directory to which to
download contents.
Special notes:
This method downloads all keys (files) from the configured
bucket for this particular team. It creates subdirectories
as needed.
"""
bucket = self._s3_conn.get_bucket(self._s3_bucket)
keys = bucket.list(prefix=self._get_keyname())
for key in keys:
keyname = key.name
# Ignore folder keys
if '$' not in keyname:
suffix = keyname.split('/')[1:] # removes team identifier
filename = os.path.join(outdir, *suffix)
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
os.makedirs(dirname)
key.get_contents_to_filename(filename)
示例4: EMRCluster
# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import terminate_jobflow [as 别名]
class EMRCluster(object):
'''Representation of an EMR cluster.
TODO: add bridge to boto interface for unit test.
'''
emr_status_delay = 10 # in sec
emr_status_max_delay = 60 # in sec
emr_status_max_error = 30 # number of errors
emr_max_idle = 10 * 60 # 10 min (in sec)
rate_limit_lock = RateLimitLock()
def __init__(self, prop):
'''Constructor, initialize EMR connection.'''
self.prop = prop
self.conn = EmrConnection(self.prop.ec2.key, self.prop.ec2.secret)
self.jobid = None
self.retry = 0
self.level = 0
self.last_update = -1
@property
def priority(self):
'''The priority used in EMRManager.
The lower value, the higher priority.
'''
with EMRCluster.rate_limit_lock:
if self.jobid is None:
return 1
return 0
def get_instance_groups(self):
'''Get instance groups to start a cluster.
It calculates the price with self.level, which indicates the
price upgrades from the original price.
'''
instance_groups = []
for group in self.prop.emr.instance_groups:
(num, group_name, instance_type) = group
level = max(0, min(self.level, len(self.prop.emr.price_upgrade_rate) - 1)) # 0 <= level < len(...)
bprice = self.prop.emr.prices[instance_type] * self.prop.emr.price_upgrade_rate[level]
name = '%s-%[email protected]%f' % (group_name, 'SPOT', bprice)
# Use on-demand instance if prices are zero.
if bprice > 0:
ig = InstanceGroup(num, group_name, instance_type, 'SPOT', name, '%.3f' % bprice)
else:
ig = InstanceGroup(num, group_name, instance_type, 'ON_DEMAND', name)
instance_groups.append(ig)
return instance_groups
def get_bootstrap_actions(self):
'''Get list of bootstrap actions from property'''
actions = []
for bootstrap_action in self.prop.emr.bootstrap_actions:
assert len(bootstrap_action) >= 2, 'Wrong bootstrap action definition: ' + str(bootstrap_action)
actions.append(BootstrapAction(bootstrap_action[0], bootstrap_action[1], bootstrap_action[2:]))
return actions
@synchronized
def start(self):
'''Start a EMR cluster.'''
# emr.project_name is required
if self.prop.emr.project_name is None:
raise ValueError('emr.project_name is not set')
self.last_update = time.time()
with EMRCluster.rate_limit_lock:
self.jobid = self.conn.run_jobflow(name=self.prop.emr.cluster_name,
ec2_keyname=self.prop.emr.keyname,
log_uri=self.prop.emr.log_uri,
ami_version=self.prop.emr.ami_version,
bootstrap_actions=self.get_bootstrap_actions(),
keep_alive=True,
action_on_failure='CONTINUE',
api_params={'VisibleToAllUsers': 'true'},
instance_groups=self.get_instance_groups())
message('Job flow created: %s', self.jobid)
# Tag EC2 instances to allow future analysis
tags = {'FlowControl': 'Briefly',
'Project': self.prop.emr.project_name}
if self.prop.emr.tags is not None:
assert isinstance(self.prop.emr.tags, dict)
tags = dict(tags.items() + self.prop.emr.tags.items())
self.conn.add_tags(self.jobid, tags)
@synchronized
def terminate(self, level_upgrade=0):
'''Terminate this EMR cluster.'''
if self.jobid is None:
return
self.level += level_upgrade # upgrade to another price level
message('Terminate jobflow: %s', self.jobid)
for i in xrange(3):
try:
with EMRCluster.rate_limit_lock:
self.conn.terminate_jobflow(self.jobid)
#.........这里部分代码省略.........
示例5: EmrManager
# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import terminate_jobflow [as 别名]
#.........这里部分代码省略.........
return cluster_id
except:
logging.error("Launching EMR cluster failed")
return "FAILED"
# run scripting step in cluster
def run_scripting_step(self, cluster_id, name, script_path):
try:
step = ScriptRunnerStep(name=name,
step_args=[script_path],
action_on_failure="CONTINUE")
return self._run_step(cluster_id, step)
except:
logging.error("Running scripting step in cluster " + cluster_id + " failed.")
return "FAILED"
# run streaming step in cluster
def run_streaming_step(self, cluster_id, name, mapper_path, reducer_path, input_path, output_path):
try:
# bundle files with the job
files = []
if mapper_path != "NONE":
files.append(mapper_path)
mapper_path = mapper_path.split("/")[-1]
if reducer_path != "NONE":
files.append(reducer_path)
reducer_path = reducer_path.split("/")[-1]
# build streaming step
logging.debug("Launching streaming step with mapper: " + mapper_path + " reducer: " + reducer_path + " and files: " + str(files))
step = StreamingStep(name=name,
step_args=["-files"] + files,
mapper=mapper_path,
reducer=reducer_path,
input=input_path,
output=output_path,
action_on_failure="CONTINUE")
return self._run_step(cluster_id, step)
except:
logging.error("Running streaming step in cluster " + cluster_id + " failed.")
return "FAILED"
# run mapreduce jar step in cluster
def run_jar_step(self, cluster_id, name, jar_path, class_name, input_path, output_path):
try:
# build streaming step
logging.debug("Launching jar step with jar: " + jar_path + " class name: " + class_name + " input: " + input_path + " and output: " + output_path)
step = JarStep(name=name,
jar=jar_path,
step_args= [class_name,
input_path,
output_path])
return self._run_step(cluster_id, step)
except:
logging.error("Running jar step in cluster " + cluster_id + " failed.")
return "FAILED"
def _run_step(self, cluster_id, step):
step_list = self.connection.add_jobflow_steps(cluster_id, [step])
step_id = step_list.stepids[0].value
logging.info("Starting step " + step_id + " in cluster " + cluster_id + ". Please be patient. Check the progress of the job in your AWS Console")
# Checking the state of the step
state = self._find_step_state(cluster_id, step_id)
while state != u'NOT_FOUND' and state != u'ERROR' and state != u'FAILED' and state!=u'COMPLETED':
#sleeping to recheck for status.
time.sleep(int(self.step_status_wait))
state = self._find_step_state(cluster_id, step_id)
logging.info("Starting step " + step_id + " in cluster " + cluster_id + ". Status: " + state)
if state == u'FAILED':
logging.error("Step " + step_id + " failed in cluster: " + cluster_id)
return "FAILED"
if state == u'NOT_FOUND':
logging.error("Step " + step_id + " could not be found in cluster: " + cluster_id)
return "NOT_FOUND"
if state == u'ERROR':
logging.error("Step " + step_id + " produced an error in _find_step_state in cluster: " + cluster_id)
return "ERROR"
#Check if the state is WAITING. Then launch the next steps
if state == u'COMPLETED':
#Finding the master node dns of EMR cluster
logging.info("Step " + step_id + " succesfully completed in cluster: " + cluster_id)
return step_id
def _find_step_state(self, cluster_id, step_id):
try:
step_summary_list = self.connection.list_steps(cluster_id)
for step_summary in step_summary_list.steps:
if step_summary.id == step_id:
return step_summary.status.state
return "NOT_FOUND"
except:
return "ERROR"
#Method for terminating the EMR cluster
def terminate_cluster(self, cluster_id):
self.connection.terminate_jobflow(cluster_id)
示例6: EmrClient
# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import terminate_jobflow [as 别名]
class EmrClient(object):
# The Hadoop version to use
HADOOP_VERSION = '1.0.3'
# The AMI version to use
AMI_VERSION = '2.4.7'
# Interval to wait between polls to EMR cluster in seconds
CLUSTER_OPERATION_RESULTS_POLLING_SECONDS = 10
# Timeout for EMR creation and ramp up in seconds
CLUSTER_OPERATION_RESULTS_TIMEOUT_SECONDS = 60 * 30
def __init__(self, region_name='us-east-1', aws_access_key_id=None, aws_secret_access_key=None):
# If the access key is not specified, get it from the luigi config.cfg file
if not aws_access_key_id:
aws_access_key_id = luigi.configuration.get_config().get('aws', 'aws_access_key_id')
if not aws_secret_access_key:
aws_secret_access_key = luigi.configuration.get_config().get('aws', 'aws_secret_access_key')
# Create the region in which to run
region_endpoint = u'elasticmapreduce.%s.amazonaws.com' % (region_name)
region = RegionInfo(name=region_name, endpoint=region_endpoint)
self.emr_connection = EmrConnection(aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
region=region)
def launch_emr_cluster(self, cluster_name, log_uri, ec2_keyname=None, master_type='m1.small', core_type='m1.small', num_instances=2, hadoop_version='1.0.3', ami_version='2.4.7', ):
# TODO Remove
# install_pig_step = InstallPigStep()
jobflow_id = self.emr_connection.run_jobflow(name=cluster_name,
log_uri=log_uri,
ec2_keyname=ec2_keyname,
master_instance_type=master_type,
slave_instance_type=core_type,
num_instances=num_instances,
keep_alive=True,
enable_debugging=True,
hadoop_version=EmrClient.HADOOP_VERSION,
steps=[],
ami_version=EmrClient.AMI_VERSION)
# Log important information
status = self.emr_connection.describe_jobflow(jobflow_id)
logger.info('Creating new cluster %s with following details' % status.name)
logger.info('jobflow ID:\t%s' % status.jobflowid)
logger.info('Log URI:\t%s' % status.loguri)
logger.info('Master Instance Type:\t%s' % status.masterinstancetype)
# A cluster of size 1 does not have any slave instances
if hasattr(status, 'slaveinstancetype'):
logger.info('Slave Instance Type:\t%s' % status.slaveinstancetype)
logger.info('Number of Instances:\t%s' % status.instancecount)
logger.info('Hadoop Version:\t%s' % status.hadoopversion)
logger.info('AMI Version:\t%s' % status.amiversion)
logger.info('Keep Alive:\t%s' % status.keepjobflowalivewhennosteps)
return self._poll_until_cluster_ready(jobflow_id)
def add_pig_step(self, jobflow_id, pig_file, name='Pig Script', pig_versions='latest', pig_args=[]):
pig_step = PigStep(name=name,
pig_file=pig_file,
pig_versions=pig_versions,
pig_args=pig_args,
# action_on_failure='CONTINUE',
)
self.emr_connection.add_jobflow_steps(jobflow_id, [pig_step])
# Poll until the cluster is done working
return self._poll_until_cluster_ready(jobflow_id)
def shutdown_emr_cluster(self, jobflow_id):
self.emr_connection.terminate_jobflow(jobflow_id)
return self._poll_until_cluster_shutdown(jobflow_id)
def get_jobflow_id(self):
# Get the id of the cluster that is WAITING for work
return self.emr_connection.list_clusters(cluster_states=['WAITING']).clusters[0].id
def get_master_dns(self):
"""
Get the master node's public address
"""
# Get the jobflow ID
jobflow_id = self.get_master_dns()
#.........这里部分代码省略.........
示例7: __init__
# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import terminate_jobflow [as 别名]
class Rankmaniac:
'''Rankmaniac Wrapper
This class provides a simple wrapper around the Amazon Web Services SDK.
It should provide all the functionality required in terms of MapReduce,
so students don't need to worry about learning the EMR and S3 API.
'''
def __init__(self, team_id, access_key, secret_key):
'''Rankmaniac class constructor
Creates a new instance of the Rankmaniac Wrapper for a specific
team.
Arguments:
team_id string the team ID.
access_key string AWS access key.
secret_key string AWS secret key.
'''
self.s3_bucket = 'cs144caltech'
self.team_id = team_id
self.emr_conn = EmrConnection(access_key, secret_key)
self.s3_conn = S3Connection(access_key, secret_key)
self.job_id = None
def __del__(self):
if self.job_id:
self.terminate_job()
def submit_job(self, mapper, reducer, input, output, num_map=1,
num_reduce=1):
'''Submit a new MapReduce job
Submits a new MapReduce job with a single step. To add more steps,
call add_step. To terminate this job, call terminate_job.
Arguments:
mapper string path to the mapper, relative to
your data directory.
reducer string path to the reducer, relative to
your data directory.
input string path to the input data, relative to
your data directory. To specify a
directory as input, ensure your path
contains a trailing /.
output string path to the desired output directory.
num_map int number of map tasks for this job.
num_reduce int number of reduce tasks for this job.
'''
if self.job_id:
raise Exception('There currently already exists a running job.')
job_name = self._make_name()
step = self._make_step(mapper, reducer, input, output, num_map,
num_reduce)
self.job_id = \
self.emr_conn.run_jobflow(name=job_name,
steps=[step],
num_instances=1,
log_uri=self._get_s3_url() + 'job_logs',
keep_alive=True)
def terminate_job(self):
'''Terminate a running MapReduce job
Stops the current running job.
'''
if not self.job_id:
raise Exception('No job is running.')
self.emr_conn.terminate_jobflow(self.job_id)
self.job_id = None
def get_job(self):
'''Gets the running job details
Returns:
JobFlow object with relevant fields:
state string the state of the job flow, either
COMPLETED | FAILED | TERMINATED
RUNNING | SHUTTING_DOWN | STARTING
WAITING | BOOTSTRAPPING
steps list(Step) a list of the step details in the
workflow. A Step has the relevant
fields:
status string
startdatetime string
enddatetime string
Note: Amazon has an upper-limit on the frequency with which you can
call this function; we have had success with calling it one
every 10 seconds.
'''
if not self.job_id:
#.........这里部分代码省略.........
示例8: str
# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import terminate_jobflow [as 别名]
steps = [step],
num_instances = 1
)
# <codecell>
print jobid
#start a new thread to check program status
result_queue = multiprocessing.Queue()
process = multiprocessing.Process(target=check_status, args=[emrcon,jobid,result_queue])
process.start()
#finished
result = result_queue.get()
emrcon.terminate_jobflow(jobid)
# <codecell>
"""
import re
# <codecell>
for word in b.list():
keystring = str(word.key)
if re.match(keystring,'part-00000'):
word.get_contents_to_filename('/Users/winteram/Documents/Teaching/wordcount_output.txt')
# <codecell>