当前位置: 首页>>代码示例>>Python>>正文


Python EmrConnection.list_steps方法代码示例

本文整理汇总了Python中boto.emr.connection.EmrConnection.list_steps方法的典型用法代码示例。如果您正苦于以下问题:Python EmrConnection.list_steps方法的具体用法?Python EmrConnection.list_steps怎么用?Python EmrConnection.list_steps使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在boto.emr.connection.EmrConnection的用法示例。


在下文中一共展示了EmrConnection.list_steps方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: EmrManager

# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import list_steps [as 别名]

#.........这里部分代码省略.........
                return cluster_id
        except:
            logging.error("Launching EMR cluster failed")
            return "FAILED"

    # run scripting step in cluster
    def run_scripting_step(self, cluster_id, name, script_path):
        try:
            step = ScriptRunnerStep(name=name, 
                                    step_args=[script_path],
                                    action_on_failure="CONTINUE")
            return self._run_step(cluster_id, step)
        except:
            logging.error("Running scripting step in cluster " + cluster_id + " failed.")
            return "FAILED"

    # run streaming step in cluster
    def run_streaming_step(self, cluster_id, name, mapper_path, reducer_path, input_path, output_path):
        try:
            # bundle files with the job
            files = []
            if mapper_path != "NONE":
                files.append(mapper_path)
                mapper_path = mapper_path.split("/")[-1]
            if reducer_path != "NONE":
                files.append(reducer_path)
                reducer_path = reducer_path.split("/")[-1]
            # build streaming step
            logging.debug("Launching streaming step with mapper: " + mapper_path + " reducer: " + reducer_path + " and files: " + str(files))
            step = StreamingStep(name=name,
                                    step_args=["-files"] + files, 
                                    mapper=mapper_path, 
                                    reducer=reducer_path, 
                                    input=input_path, 
                                    output=output_path, 
                                    action_on_failure="CONTINUE")
            return self._run_step(cluster_id, step)            
        except:
            logging.error("Running streaming step in cluster " + cluster_id + " failed.")
            return "FAILED"

    # run mapreduce jar step in cluster
    def run_jar_step(self, cluster_id, name, jar_path, class_name, input_path, output_path):
        try:
            # build streaming step
            logging.debug("Launching jar step with jar: " + jar_path + " class name: " + class_name + " input: " + input_path + " and output: " + output_path)
            step = JarStep(name=name,
                            jar=jar_path, 
                            step_args= [class_name,
                                        input_path,
                                        output_path])
            return self._run_step(cluster_id, step)            
        except:
            logging.error("Running jar step in cluster " + cluster_id + " failed.")
            return "FAILED"

    def _run_step(self, cluster_id, step):
        step_list = self.connection.add_jobflow_steps(cluster_id, [step])
        step_id = step_list.stepids[0].value

        logging.info("Starting step " + step_id + " in cluster " + cluster_id + ". Please be patient. Check the progress of the job in your AWS Console")

        # Checking the state of the step
        state = self._find_step_state(cluster_id, step_id)
        while state != u'NOT_FOUND' and state != u'ERROR' and state != u'FAILED' and state!=u'COMPLETED':
            #sleeping to recheck for status.
            time.sleep(int(self.step_status_wait))
            state = self._find_step_state(cluster_id, step_id)
            logging.info("Starting step " + step_id + " in cluster " + cluster_id + ". Status: " + state)

        if state == u'FAILED':
            logging.error("Step " + step_id + " failed in cluster: " + cluster_id)
            return "FAILED"
        if state == u'NOT_FOUND':
            logging.error("Step " + step_id + " could not be found in cluster: " + cluster_id)
            return "NOT_FOUND"
        if state == u'ERROR':
            logging.error("Step " + step_id + " produced an error in _find_step_state in cluster: " + cluster_id)
            return "ERROR"

        #Check if the state is WAITING. Then launch the next steps
        if state == u'COMPLETED':
            #Finding the master node dns of EMR cluster
            logging.info("Step " + step_id + " succesfully completed in cluster: " + cluster_id)
            return step_id


    def _find_step_state(self, cluster_id, step_id):
        try:
            step_summary_list = self.connection.list_steps(cluster_id)
            for step_summary in step_summary_list.steps:
                if step_summary.id == step_id:
                    return step_summary.status.state
            return "NOT_FOUND"
        except:
            return "ERROR"

    #Method for terminating the EMR cluster
    def terminate_cluster(self, cluster_id):
        self.connection.terminate_jobflow(cluster_id)
开发者ID:DiegoTUI,项目名称:emr-orchestrator,代码行数:104,代码来源:emr_manager.py

示例2: enumerate

# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import list_steps [as 别名]
    for index, cluster in enumerate(clusters.clusters):
        print "[%s] %s" % (index, cluster.id)

    # if there is a command line arg, use it for the cluster_id
    if len(sys.argv) > 1:
        cluster_id = sys.argv[1]
    else:
        if len(clusters.clusters) == 0:
            sys.exit("No EMR clusters running.")
        selected_cluster = input("Select a Cluster: ")
        cluster_id = clusters.clusters[int(selected_cluster)].id

    print cluster_id

    # List EMR Steps
    steps = emr_conn.list_steps(cluster_id)
    step_cnt = 0
    for index, step in enumerate(steps.steps):
        time = dateutil.parser.parse(step.status.timeline.creationdatetime).astimezone(tz.tzlocal())
        print "[%s] NAME: %s - STATE: %s - START TIME: %s" % (index, step.name, step.status.state,
                                                              time.strftime("%Y-%m-%d %H:%M"))
        step_cnt += 1

    # if there are two command line args, use the second one as the selected step index
    if len(sys.argv) > 2:
        selected_step = sys.argv[2]
    else:
        selected_step = input("Select a Step: ")

    step_id = steps.steps[int(selected_step)].id
    print step_id
开发者ID:mas-dse,项目名称:UCSD_BigData_Scripts,代码行数:33,代码来源:get_emr_logs.py

示例3: __init__

# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import list_steps [as 别名]

#.........这里部分代码省略.........
        Returns a boto.emr.emrobject.JobFlow object.

        Special notes:
            The JobFlow object has the following relevant fields.
                state       <str>           the state of the job flow,
                                            either COMPLETED
                                                 | FAILED
                                                 | TERMINATED
                                                 | RUNNING
                                                 | SHUTTING_DOWN
                                                 | STARTING
                                                 | WAITING

                steps       <list(boto.emr.emrobject.Step)>
                            a list of the step details in the workflow.

            The Step object has the following relevant fields.
                state               <str>       the state of the step.

                startdatetime       <str>       the start time of the
                                                job.

                enddatetime         <str>       the end time of the job.

            WARNING! Amazon has an upper-limit on the frequency with
            which you can call this method; we have had success with
            calling it at most once every 10 seconds.
        """

        if not self.job_id:
            raise RankmaniacError('No job is running.')
            
        cinfo = self._emr_conn.describe_cluster(self.job_id)
        sinfo1 = self._emr_conn.list_steps(self.job_id)
        steps = sinfo1.steps

        if "marker" in dir(sinfo1):
            sinfo2 = self._emr_conn.list_steps(self.job_id, marker=sinfo1.marker)
            steps += sinfo2.steps

        return {"cluster": cinfo, "steps": steps}

    def _get_last_process_step_iter_no(self, jobdesc=None):
        """
        Returns the most recently process-step of the job flow that has
        been completed.

        Keyword arguments:
            jobdesc     <boto.emr.JobFlow>      cached description of
                                                jobflow to use
        """

        if jobdesc is None:
            jobdesc = self.describe()
        steps = jobdesc["steps"]
    
        cnt = 0
        for i in range(len(steps)):
            step = steps[i]
            if step.status.state != 'COMPLETED':
                continue

            cnt += 1

        return cnt / 2 - 1
开发者ID:fboemer,项目名称:Rankmaniac,代码行数:69,代码来源:rankmaniac.py


注:本文中的boto.emr.connection.EmrConnection.list_steps方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。