本文整理汇总了Python中boto.emr.connection.EmrConnection.list_steps方法的典型用法代码示例。如果您正苦于以下问题:Python EmrConnection.list_steps方法的具体用法?Python EmrConnection.list_steps怎么用?Python EmrConnection.list_steps使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类boto.emr.connection.EmrConnection
的用法示例。
在下文中一共展示了EmrConnection.list_steps方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: EmrManager
# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import list_steps [as 别名]
#.........这里部分代码省略.........
return cluster_id
except:
logging.error("Launching EMR cluster failed")
return "FAILED"
# run scripting step in cluster
def run_scripting_step(self, cluster_id, name, script_path):
try:
step = ScriptRunnerStep(name=name,
step_args=[script_path],
action_on_failure="CONTINUE")
return self._run_step(cluster_id, step)
except:
logging.error("Running scripting step in cluster " + cluster_id + " failed.")
return "FAILED"
# run streaming step in cluster
def run_streaming_step(self, cluster_id, name, mapper_path, reducer_path, input_path, output_path):
try:
# bundle files with the job
files = []
if mapper_path != "NONE":
files.append(mapper_path)
mapper_path = mapper_path.split("/")[-1]
if reducer_path != "NONE":
files.append(reducer_path)
reducer_path = reducer_path.split("/")[-1]
# build streaming step
logging.debug("Launching streaming step with mapper: " + mapper_path + " reducer: " + reducer_path + " and files: " + str(files))
step = StreamingStep(name=name,
step_args=["-files"] + files,
mapper=mapper_path,
reducer=reducer_path,
input=input_path,
output=output_path,
action_on_failure="CONTINUE")
return self._run_step(cluster_id, step)
except:
logging.error("Running streaming step in cluster " + cluster_id + " failed.")
return "FAILED"
# run mapreduce jar step in cluster
def run_jar_step(self, cluster_id, name, jar_path, class_name, input_path, output_path):
try:
# build streaming step
logging.debug("Launching jar step with jar: " + jar_path + " class name: " + class_name + " input: " + input_path + " and output: " + output_path)
step = JarStep(name=name,
jar=jar_path,
step_args= [class_name,
input_path,
output_path])
return self._run_step(cluster_id, step)
except:
logging.error("Running jar step in cluster " + cluster_id + " failed.")
return "FAILED"
def _run_step(self, cluster_id, step):
step_list = self.connection.add_jobflow_steps(cluster_id, [step])
step_id = step_list.stepids[0].value
logging.info("Starting step " + step_id + " in cluster " + cluster_id + ". Please be patient. Check the progress of the job in your AWS Console")
# Checking the state of the step
state = self._find_step_state(cluster_id, step_id)
while state != u'NOT_FOUND' and state != u'ERROR' and state != u'FAILED' and state!=u'COMPLETED':
#sleeping to recheck for status.
time.sleep(int(self.step_status_wait))
state = self._find_step_state(cluster_id, step_id)
logging.info("Starting step " + step_id + " in cluster " + cluster_id + ". Status: " + state)
if state == u'FAILED':
logging.error("Step " + step_id + " failed in cluster: " + cluster_id)
return "FAILED"
if state == u'NOT_FOUND':
logging.error("Step " + step_id + " could not be found in cluster: " + cluster_id)
return "NOT_FOUND"
if state == u'ERROR':
logging.error("Step " + step_id + " produced an error in _find_step_state in cluster: " + cluster_id)
return "ERROR"
#Check if the state is WAITING. Then launch the next steps
if state == u'COMPLETED':
#Finding the master node dns of EMR cluster
logging.info("Step " + step_id + " succesfully completed in cluster: " + cluster_id)
return step_id
def _find_step_state(self, cluster_id, step_id):
try:
step_summary_list = self.connection.list_steps(cluster_id)
for step_summary in step_summary_list.steps:
if step_summary.id == step_id:
return step_summary.status.state
return "NOT_FOUND"
except:
return "ERROR"
#Method for terminating the EMR cluster
def terminate_cluster(self, cluster_id):
self.connection.terminate_jobflow(cluster_id)
示例2: enumerate
# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import list_steps [as 别名]
for index, cluster in enumerate(clusters.clusters):
print "[%s] %s" % (index, cluster.id)
# if there is a command line arg, use it for the cluster_id
if len(sys.argv) > 1:
cluster_id = sys.argv[1]
else:
if len(clusters.clusters) == 0:
sys.exit("No EMR clusters running.")
selected_cluster = input("Select a Cluster: ")
cluster_id = clusters.clusters[int(selected_cluster)].id
print cluster_id
# List EMR Steps
steps = emr_conn.list_steps(cluster_id)
step_cnt = 0
for index, step in enumerate(steps.steps):
time = dateutil.parser.parse(step.status.timeline.creationdatetime).astimezone(tz.tzlocal())
print "[%s] NAME: %s - STATE: %s - START TIME: %s" % (index, step.name, step.status.state,
time.strftime("%Y-%m-%d %H:%M"))
step_cnt += 1
# if there are two command line args, use the second one as the selected step index
if len(sys.argv) > 2:
selected_step = sys.argv[2]
else:
selected_step = input("Select a Step: ")
step_id = steps.steps[int(selected_step)].id
print step_id
示例3: __init__
# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import list_steps [as 别名]
#.........这里部分代码省略.........
Returns a boto.emr.emrobject.JobFlow object.
Special notes:
The JobFlow object has the following relevant fields.
state <str> the state of the job flow,
either COMPLETED
| FAILED
| TERMINATED
| RUNNING
| SHUTTING_DOWN
| STARTING
| WAITING
steps <list(boto.emr.emrobject.Step)>
a list of the step details in the workflow.
The Step object has the following relevant fields.
state <str> the state of the step.
startdatetime <str> the start time of the
job.
enddatetime <str> the end time of the job.
WARNING! Amazon has an upper-limit on the frequency with
which you can call this method; we have had success with
calling it at most once every 10 seconds.
"""
if not self.job_id:
raise RankmaniacError('No job is running.')
cinfo = self._emr_conn.describe_cluster(self.job_id)
sinfo1 = self._emr_conn.list_steps(self.job_id)
steps = sinfo1.steps
if "marker" in dir(sinfo1):
sinfo2 = self._emr_conn.list_steps(self.job_id, marker=sinfo1.marker)
steps += sinfo2.steps
return {"cluster": cinfo, "steps": steps}
def _get_last_process_step_iter_no(self, jobdesc=None):
"""
Returns the most recently process-step of the job flow that has
been completed.
Keyword arguments:
jobdesc <boto.emr.JobFlow> cached description of
jobflow to use
"""
if jobdesc is None:
jobdesc = self.describe()
steps = jobdesc["steps"]
cnt = 0
for i in range(len(steps)):
step = steps[i]
if step.status.state != 'COMPLETED':
continue
cnt += 1
return cnt / 2 - 1