当前位置: 首页>>代码示例>>Python>>正文


Python EmrConnection.describe_cluster方法代码示例

本文整理汇总了Python中boto.emr.connection.EmrConnection.describe_cluster方法的典型用法代码示例。如果您正苦于以下问题:Python EmrConnection.describe_cluster方法的具体用法?Python EmrConnection.describe_cluster怎么用?Python EmrConnection.describe_cluster使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在boto.emr.connection.EmrConnection的用法示例。


在下文中一共展示了EmrConnection.describe_cluster方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: create_data_source_variable

# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import describe_cluster [as 别名]
def create_data_source_variable(cluster_id, cr):
    """
    Creates a data source variable .json file using the cluster_id of an EMR cluster_id
    @PARAM:  cluster_id:  ID of an EMR cluster
    return:  True if success, creates a file in the pwd 'default_emr.json'

    Object created should look like:

    HADOOP_DATA_SOURCE_NAME="emr_data_source"
    HADOOP_DATA_SOURCE_DISTRO="Cloudera CDH5.4-5.7"
    HADOOP_DATA_SOURCE_HOST="emr_master_dns_hostname"
    HADOOP_DATA_SOURCE_PORT=8020
    HADOOP_DATA_SOURCE_USER="hdfs"
    HADOOP_DATA_SOURCE_GROUP="hadoop"
    HADOOP_DATA_SOURCE_JT_HOST="emr_master_dns_hostname"
    HADOOP_DATA_SOURCE_JT_PORT=8032
    CONNECTION_PARAMETERS='[{"key":"mapreduce.jobhistory.address", "value":"0.0.0.0:10020"}, ' \
                            '{"key":"mapreduce.jobhistory.webapp.address", "value":"cdh5hakerberosnn.alpinenow.local:19888"}, ' \
                            '{"key":"yarn.app.mapreduce.am.staging-dir", "value":"/tmp/hadoop-yarn/staging"}, ' \
                            '{"key":"yarn.resourcemanager.admin.address", "value":"cdh5hakerberosnn.alpinenow.local:8033"}, ' \
                            '{"key":"yarn.resourcemanager.resource-tracker.address", "value":"cdh5hakerberosnn.alpinenow.local:8031"}, ' \
                            '{"key":"yarn.resourcemanager.scheduler.address", "value":"cdh5hakerberosnn.alpinenow.local:8030"}]'

    """
    conn = EmrConnection(
        cr.get_config("aws_access_key"),
        cr.get_config("aws_secret_key"),
        region = RegionInfo(name = cr.get_config("aws_region"),
            endpoint = cr.get_config("aws_region") + ".elasticmapreduce.amazonaws.com" ))

    emr_cluster = conn.describe_cluster(cluster_id)
    master_dns_hostname = emr_cluster.masterpublicdnsname

    # Build up connection parameters
    conn_params = []
    conn_params.append({"key": "mapreduce.jobhistory.address", "value": "{0}:10020".format(master_dns_hostname)})
    conn_params.append({"key": "mapreduce.jobhistory.webapp.address", "value": "{0}:19888".format(master_dns_hostname)})
    conn_params.append({"key": "yarn.app.mapreduce.am.staging-dir", "value": "/user"})
    conn_params.append({"key": "yarn.resourcemanager.admin.address", "value": "{0}:8033".format(master_dns_hostname)})
    conn_params.append({"key": "yarn.resourcemanager.scheduler.address", "value": "{0}:8030".format(master_dns_hostname)})
    conn_params_str = "CONNECTION_PARAMETERS=\"{0}\"".format(conn_params)
    email_str = "EMAIL=\"avalanche_{0}.alpinenow.com\"".format(random.randint(1,99999))

    with open("emr_default.conf", "w") as f:
        f.writelines("HADOOP_DATA_SOURCE_NAME=\"{0}\"\n".format(cr.get_config("emr_cluster_name")))
        f.writelines("HADOOP_DATA_SOURCE_DISTRO=\"{0}\"\n".format("Amazon EMR5"))
        f.writelines("HADOOP_DATA_SOURCE_HOST=\"{0}\"\n".format(master_dns_hostname))
        f.writelines("HADOOP_DATA_SOURCE_POST=\"8020\"\n")
        f.writelines("HADOOP_DATA_SOURCE_USER=\"hdfs\"\n")
        f.writelines("HADOOP_DATA_SOURCE_GROUP=\"hadoop\"\n")
        f.writelines("HADOOP_DATA_SOURCE_JT_HOST=\"{0}\"\n".format(master_dns_hostname))
        f.writelines("HADOOP_DATA_SOURCE_JT_PORT=\"8032\"\n")
        f.writelines(email_str)
        f.writelines(conn_params_str)
开发者ID:nkapinos,项目名称:generators,代码行数:56,代码来源:create.py

示例2: create_emr_cluster

# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import describe_cluster [as 别名]
def create_emr_cluster(cr):
    """
    @PARAM:  Cluster configuration reader object
    Creates an EMR cluster given a set of configuration parameters
    Return:  EMR Cluster ID
    """

    #region = cr.get_config("aws_region")
    #conn = boto.emr.connect_to_region(region)
    conn = EmrConnection(
        cr.get_config("aws_access_key"),
        cr.get_config("aws_secret_key"),
        region = RegionInfo(name = cr.get_config("aws_region"),
                            endpoint = cr.get_config("aws_region") + ".elasticmapreduce.amazonaws.com" ))


    #  Create list of instance groups:  master, core, and task
    instance_groups = []
    instance_groups.append(InstanceGroup(
        num_instances = cr.get_config("emr_master_node_count"),
        role = "MASTER",
        type = cr.get_config("emr_master_node_type"),
        market = cr.get_config("emr_market_type"),
        name = "Master Node" ))

    instance_groups.append(InstanceGroup(
        num_instances = cr.get_config("emr_core_node_count"),
        role = "CORE",
        type = cr.get_config("emr_core_node_type"),
        market = cr.get_config("emr_market_type"),
        name = "Core Node" ))

    #  Only create task nodes if specifcally asked for
    if cr.get_config("emr_task_node_count") > 0:
        instance_groups.append(InstanceGroup(
            num_instances = cr.get_config("emr_task_node_count"),
            role = "TASK",
            type = cr.get_config("emr_task_node_type"),
            market = cr.get_config("emr_market_type"),
            name = "Task Node" ))

    print "Creating EMR Cluster with instance groups: {0}".format(instance_groups)

    #  Use these params to add overrrides, these will go away in Boto3
    api_params = {"Instances.Ec2SubnetId": cr.get_config("aws_subnet_id"), "ReleaseLabel": cr.get_config("emr_version")}

    #  Add step to load data
    step_args = ["s3-dist-cp","--s3Endpoint=s3-us-west-1.amazonaws.com","--src=s3://alpine-qa/automation/automation_test_data/","--dest=hdfs:///automation_test_data","--srcPattern=.*[a-zA-Z,]+"]
    step = JarStep(name = "s3distcp for data loading",
                jar = "command-runner.jar",
                step_args = step_args,
                action_on_failure = "CONTINUE"
                )

    cluster_id = conn.run_jobflow(
        cr.get_config("emr_cluster_name"),
        instance_groups = instance_groups,
        action_on_failure = "TERMINATE_JOB_FLOW",
        keep_alive = True,
        enable_debugging = True,
        log_uri = cr.get_config("emr_log_uri"),
        #hadoop_version = "Amazon 2.7.2",
        #ReleaseLabel = "emr-5.0.0",
        #ami_version = "5.0.0",
        steps = [step],
        bootstrap_actions = [],
        ec2_keyname = cr.get_config("ec2_keyname"),
        visible_to_all_users = True,
        job_flow_role = "EMR_EC2_DefaultRole",
        service_role = "EMR_DefaultRole",
        api_params = api_params )

    print "EMR Cluster created, cluster id: {0}".format(cluster_id)
    state = conn.describe_cluster(cluster_id).status.state
    while state != u'COMPLETED' and state != u'SHUTTING_DOWN' and state != u'FAILED' and state != u'WAITING':
        #sleeping to recheck for status.
        time.sleep(5)
        state = conn.describe_cluster(cluster_id).status.state
        print "State is: {0}, sleeping 5s...".format(state)

    if state == u'SHUTTING_DOWN' or state == u'FAILED':
        return "ERROR"

    #Check if the state is WAITING. Then launch the next steps
    if state == u'WAITING':
        #Finding the master node dns of EMR cluster
        master_dns = conn.describe_cluster(cluster_id).masterpublicdnsname
        print "DNS Name: {0}".format(master_dns)
        return cluster_id
开发者ID:nkapinos,项目名称:generators,代码行数:91,代码来源:create.py

示例3: __init__

# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import describe_cluster [as 别名]

#.........这里部分代码省略.........

        Returns a boto.emr.emrobject.JobFlow object.

        Special notes:
            The JobFlow object has the following relevant fields.
                state       <str>           the state of the job flow,
                                            either COMPLETED
                                                 | FAILED
                                                 | TERMINATED
                                                 | RUNNING
                                                 | SHUTTING_DOWN
                                                 | STARTING
                                                 | WAITING

                steps       <list(boto.emr.emrobject.Step)>
                            a list of the step details in the workflow.

            The Step object has the following relevant fields.
                state               <str>       the state of the step.

                startdatetime       <str>       the start time of the
                                                job.

                enddatetime         <str>       the end time of the job.

            WARNING! Amazon has an upper-limit on the frequency with
            which you can call this method; we have had success with
            calling it at most once every 10 seconds.
        """

        if not self.job_id:
            raise RankmaniacError('No job is running.')
            
        cinfo = self._emr_conn.describe_cluster(self.job_id)
        sinfo1 = self._emr_conn.list_steps(self.job_id)
        steps = sinfo1.steps

        if "marker" in dir(sinfo1):
            sinfo2 = self._emr_conn.list_steps(self.job_id, marker=sinfo1.marker)
            steps += sinfo2.steps

        return {"cluster": cinfo, "steps": steps}

    def _get_last_process_step_iter_no(self, jobdesc=None):
        """
        Returns the most recently process-step of the job flow that has
        been completed.

        Keyword arguments:
            jobdesc     <boto.emr.JobFlow>      cached description of
                                                jobflow to use
        """

        if jobdesc is None:
            jobdesc = self.describe()
        steps = jobdesc["steps"]
    
        cnt = 0
        for i in range(len(steps)):
            step = steps[i]
            if step.status.state != 'COMPLETED':
                continue

            cnt += 1

        return cnt / 2 - 1
开发者ID:fboemer,项目名称:Rankmaniac,代码行数:70,代码来源:rankmaniac.py


注:本文中的boto.emr.connection.EmrConnection.describe_cluster方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。