本文整理汇总了Python中boto.emr.connection.EmrConnection.describe_cluster方法的典型用法代码示例。如果您正苦于以下问题:Python EmrConnection.describe_cluster方法的具体用法?Python EmrConnection.describe_cluster怎么用?Python EmrConnection.describe_cluster使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类boto.emr.connection.EmrConnection
的用法示例。
在下文中一共展示了EmrConnection.describe_cluster方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: create_data_source_variable
# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import describe_cluster [as 别名]
def create_data_source_variable(cluster_id, cr):
"""
Creates a data source variable .json file using the cluster_id of an EMR cluster_id
@PARAM: cluster_id: ID of an EMR cluster
return: True if success, creates a file in the pwd 'default_emr.json'
Object created should look like:
HADOOP_DATA_SOURCE_NAME="emr_data_source"
HADOOP_DATA_SOURCE_DISTRO="Cloudera CDH5.4-5.7"
HADOOP_DATA_SOURCE_HOST="emr_master_dns_hostname"
HADOOP_DATA_SOURCE_PORT=8020
HADOOP_DATA_SOURCE_USER="hdfs"
HADOOP_DATA_SOURCE_GROUP="hadoop"
HADOOP_DATA_SOURCE_JT_HOST="emr_master_dns_hostname"
HADOOP_DATA_SOURCE_JT_PORT=8032
CONNECTION_PARAMETERS='[{"key":"mapreduce.jobhistory.address", "value":"0.0.0.0:10020"}, ' \
'{"key":"mapreduce.jobhistory.webapp.address", "value":"cdh5hakerberosnn.alpinenow.local:19888"}, ' \
'{"key":"yarn.app.mapreduce.am.staging-dir", "value":"/tmp/hadoop-yarn/staging"}, ' \
'{"key":"yarn.resourcemanager.admin.address", "value":"cdh5hakerberosnn.alpinenow.local:8033"}, ' \
'{"key":"yarn.resourcemanager.resource-tracker.address", "value":"cdh5hakerberosnn.alpinenow.local:8031"}, ' \
'{"key":"yarn.resourcemanager.scheduler.address", "value":"cdh5hakerberosnn.alpinenow.local:8030"}]'
"""
conn = EmrConnection(
cr.get_config("aws_access_key"),
cr.get_config("aws_secret_key"),
region = RegionInfo(name = cr.get_config("aws_region"),
endpoint = cr.get_config("aws_region") + ".elasticmapreduce.amazonaws.com" ))
emr_cluster = conn.describe_cluster(cluster_id)
master_dns_hostname = emr_cluster.masterpublicdnsname
# Build up connection parameters
conn_params = []
conn_params.append({"key": "mapreduce.jobhistory.address", "value": "{0}:10020".format(master_dns_hostname)})
conn_params.append({"key": "mapreduce.jobhistory.webapp.address", "value": "{0}:19888".format(master_dns_hostname)})
conn_params.append({"key": "yarn.app.mapreduce.am.staging-dir", "value": "/user"})
conn_params.append({"key": "yarn.resourcemanager.admin.address", "value": "{0}:8033".format(master_dns_hostname)})
conn_params.append({"key": "yarn.resourcemanager.scheduler.address", "value": "{0}:8030".format(master_dns_hostname)})
conn_params_str = "CONNECTION_PARAMETERS=\"{0}\"".format(conn_params)
email_str = "EMAIL=\"avalanche_{0}.alpinenow.com\"".format(random.randint(1,99999))
with open("emr_default.conf", "w") as f:
f.writelines("HADOOP_DATA_SOURCE_NAME=\"{0}\"\n".format(cr.get_config("emr_cluster_name")))
f.writelines("HADOOP_DATA_SOURCE_DISTRO=\"{0}\"\n".format("Amazon EMR5"))
f.writelines("HADOOP_DATA_SOURCE_HOST=\"{0}\"\n".format(master_dns_hostname))
f.writelines("HADOOP_DATA_SOURCE_POST=\"8020\"\n")
f.writelines("HADOOP_DATA_SOURCE_USER=\"hdfs\"\n")
f.writelines("HADOOP_DATA_SOURCE_GROUP=\"hadoop\"\n")
f.writelines("HADOOP_DATA_SOURCE_JT_HOST=\"{0}\"\n".format(master_dns_hostname))
f.writelines("HADOOP_DATA_SOURCE_JT_PORT=\"8032\"\n")
f.writelines(email_str)
f.writelines(conn_params_str)
示例2: create_emr_cluster
# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import describe_cluster [as 别名]
def create_emr_cluster(cr):
"""
@PARAM: Cluster configuration reader object
Creates an EMR cluster given a set of configuration parameters
Return: EMR Cluster ID
"""
#region = cr.get_config("aws_region")
#conn = boto.emr.connect_to_region(region)
conn = EmrConnection(
cr.get_config("aws_access_key"),
cr.get_config("aws_secret_key"),
region = RegionInfo(name = cr.get_config("aws_region"),
endpoint = cr.get_config("aws_region") + ".elasticmapreduce.amazonaws.com" ))
# Create list of instance groups: master, core, and task
instance_groups = []
instance_groups.append(InstanceGroup(
num_instances = cr.get_config("emr_master_node_count"),
role = "MASTER",
type = cr.get_config("emr_master_node_type"),
market = cr.get_config("emr_market_type"),
name = "Master Node" ))
instance_groups.append(InstanceGroup(
num_instances = cr.get_config("emr_core_node_count"),
role = "CORE",
type = cr.get_config("emr_core_node_type"),
market = cr.get_config("emr_market_type"),
name = "Core Node" ))
# Only create task nodes if specifcally asked for
if cr.get_config("emr_task_node_count") > 0:
instance_groups.append(InstanceGroup(
num_instances = cr.get_config("emr_task_node_count"),
role = "TASK",
type = cr.get_config("emr_task_node_type"),
market = cr.get_config("emr_market_type"),
name = "Task Node" ))
print "Creating EMR Cluster with instance groups: {0}".format(instance_groups)
# Use these params to add overrrides, these will go away in Boto3
api_params = {"Instances.Ec2SubnetId": cr.get_config("aws_subnet_id"), "ReleaseLabel": cr.get_config("emr_version")}
# Add step to load data
step_args = ["s3-dist-cp","--s3Endpoint=s3-us-west-1.amazonaws.com","--src=s3://alpine-qa/automation/automation_test_data/","--dest=hdfs:///automation_test_data","--srcPattern=.*[a-zA-Z,]+"]
step = JarStep(name = "s3distcp for data loading",
jar = "command-runner.jar",
step_args = step_args,
action_on_failure = "CONTINUE"
)
cluster_id = conn.run_jobflow(
cr.get_config("emr_cluster_name"),
instance_groups = instance_groups,
action_on_failure = "TERMINATE_JOB_FLOW",
keep_alive = True,
enable_debugging = True,
log_uri = cr.get_config("emr_log_uri"),
#hadoop_version = "Amazon 2.7.2",
#ReleaseLabel = "emr-5.0.0",
#ami_version = "5.0.0",
steps = [step],
bootstrap_actions = [],
ec2_keyname = cr.get_config("ec2_keyname"),
visible_to_all_users = True,
job_flow_role = "EMR_EC2_DefaultRole",
service_role = "EMR_DefaultRole",
api_params = api_params )
print "EMR Cluster created, cluster id: {0}".format(cluster_id)
state = conn.describe_cluster(cluster_id).status.state
while state != u'COMPLETED' and state != u'SHUTTING_DOWN' and state != u'FAILED' and state != u'WAITING':
#sleeping to recheck for status.
time.sleep(5)
state = conn.describe_cluster(cluster_id).status.state
print "State is: {0}, sleeping 5s...".format(state)
if state == u'SHUTTING_DOWN' or state == u'FAILED':
return "ERROR"
#Check if the state is WAITING. Then launch the next steps
if state == u'WAITING':
#Finding the master node dns of EMR cluster
master_dns = conn.describe_cluster(cluster_id).masterpublicdnsname
print "DNS Name: {0}".format(master_dns)
return cluster_id
示例3: __init__
# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import describe_cluster [as 别名]
#.........这里部分代码省略.........
Returns a boto.emr.emrobject.JobFlow object.
Special notes:
The JobFlow object has the following relevant fields.
state <str> the state of the job flow,
either COMPLETED
| FAILED
| TERMINATED
| RUNNING
| SHUTTING_DOWN
| STARTING
| WAITING
steps <list(boto.emr.emrobject.Step)>
a list of the step details in the workflow.
The Step object has the following relevant fields.
state <str> the state of the step.
startdatetime <str> the start time of the
job.
enddatetime <str> the end time of the job.
WARNING! Amazon has an upper-limit on the frequency with
which you can call this method; we have had success with
calling it at most once every 10 seconds.
"""
if not self.job_id:
raise RankmaniacError('No job is running.')
cinfo = self._emr_conn.describe_cluster(self.job_id)
sinfo1 = self._emr_conn.list_steps(self.job_id)
steps = sinfo1.steps
if "marker" in dir(sinfo1):
sinfo2 = self._emr_conn.list_steps(self.job_id, marker=sinfo1.marker)
steps += sinfo2.steps
return {"cluster": cinfo, "steps": steps}
def _get_last_process_step_iter_no(self, jobdesc=None):
"""
Returns the most recently process-step of the job flow that has
been completed.
Keyword arguments:
jobdesc <boto.emr.JobFlow> cached description of
jobflow to use
"""
if jobdesc is None:
jobdesc = self.describe()
steps = jobdesc["steps"]
cnt = 0
for i in range(len(steps)):
step = steps[i]
if step.status.state != 'COMPLETED':
continue
cnt += 1
return cnt / 2 - 1