当前位置: 首页>>代码示例>>Python>>正文


Python connection.EmrConnection类代码示例

本文整理汇总了Python中boto.emr.connection.EmrConnection的典型用法代码示例。如果您正苦于以下问题:Python EmrConnection类的具体用法?Python EmrConnection怎么用?Python EmrConnection使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了EmrConnection类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: add_steps

def add_steps(cluster_id, key):
	try:
		emr_connection = EmrConnection()
		emr_connection.add_jobflow_steps(cluster_id, get_steps(key, key))
		return True
	except Exception, e:
		return False
开发者ID:valeter,项目名称:nlp-site,代码行数:7,代码来源:process_file.py

示例2: start_hadoop_cluster

def start_hadoop_cluster(nodenum):
	try:
		hadoop_params = ['-m','mapred.tasktracker.map.tasks.maximum=1',
		          '-m', 'mapred.child.java.opts=-Xmx10g']
		configure_hadoop_action = BootstrapAction('configure_hadoop', 's3://elasticmapreduce/bootstrap-actions/configure-hadoop', hadoop_params)

		emr_connection = EmrConnection()
		bucket_name = "udk-bucket"
		steps = []
		copy_jar_step = JarStep(name='copy-jar',
			jar='s3n://' + bucket_name + '/copy-to-hdfs.jar',
			step_args=['s3n://' + bucket_name + '/pipeline.pear',
				'/mnt/pipeline.pear'])
		steps.append(copy_jar_step)

		jobflow_id = emr_connection.run_jobflow(name='udk',
			log_uri='s3://udk-bucket/jobflow_logs',
			master_instance_type='m2.xlarge',
			slave_instance_type='m2.xlarge',
			num_instances=nodenum,
			keep_alive=True,
			enable_debugging=False,
			bootstrap_actions=[configure_hadoop_action],
			hadoop_version='1.0.3',
			steps=steps)
		emr_connection.set_termination_protection(jobflow_id, True)
		
		return jobflow_id
	except Exception, e:
		return "none" 
开发者ID:valeter,项目名称:nlp-site,代码行数:30,代码来源:run_cluster.py

示例3: run

    def run(self):
        """Run the Hive job on EMR cluster
        """
        #  copy the data source to a new object
        #  (Hive deletes/moves the original)
        copy_s3_file(self.input_path, self.data_path)

        # and create the hive script
        self._generate_and_upload_hive_script()

        logger.info("Waiting {} seconds for S3 eventual consistency".format(
                    self.s3_sync_wait_time))
        time.sleep(self.s3_sync_wait_time)

        # TODO more options like setting aws region
        conn = EmrConnection(self.aws_access_key_id,
                             self.aws_secret_access_key)

        setup_step = InstallHiveStep(self.hive_version)
        run_step = HiveStep(self.job_name, self.script_path)

        jobid = conn.run_jobflow(
            self.job_name,
            self.log_path,
            action_on_failure='CANCEL_AND_WAIT',
            master_instance_type=self.master_instance_type,
            slave_instance_type=self.slave_instance_type,
            ami_version=self.ami_version,
            num_instances=self.num_instances)

        conn.add_jobflow_steps(jobid, [setup_step, run_step])

        self._wait_for_job_to_complete(conn, jobid)

        logger.info("Output file is in: {0}".format(self.output_path))
开发者ID:JonathanBatten,项目名称:apiarist,代码行数:35,代码来源:emr.py

示例4: get_cluster_status

def get_cluster_status(cluster_id):
	try:
		emr_connection = EmrConnection()
		flow = emr_connection.describe_jobflow(cluster_id)
		if flow == None:
			return "none"
		return flow.state
	except Exception, e:
		return "none"
开发者ID:valeter,项目名称:nlp-site,代码行数:9,代码来源:process_file.py

示例5: terminate

def terminate(cluster_id):
	try:
		emr_connection = EmrConnection()
		emr_connection.set_termination_protection(cluster_id, False)
		emr_connection.terminate_jobflow(cluster_id)
		return True
	except Exception, e:
		print e
		return False
开发者ID:valeter,项目名称:nlp-site,代码行数:9,代码来源:terminate_cluster.py

示例6: create_data_source_variable

def create_data_source_variable(cluster_id, cr):
    """
    Creates a data source variable .json file using the cluster_id of an EMR cluster_id
    @PARAM:  cluster_id:  ID of an EMR cluster
    return:  True if success, creates a file in the pwd 'default_emr.json'

    Object created should look like:

    HADOOP_DATA_SOURCE_NAME="emr_data_source"
    HADOOP_DATA_SOURCE_DISTRO="Cloudera CDH5.4-5.7"
    HADOOP_DATA_SOURCE_HOST="emr_master_dns_hostname"
    HADOOP_DATA_SOURCE_PORT=8020
    HADOOP_DATA_SOURCE_USER="hdfs"
    HADOOP_DATA_SOURCE_GROUP="hadoop"
    HADOOP_DATA_SOURCE_JT_HOST="emr_master_dns_hostname"
    HADOOP_DATA_SOURCE_JT_PORT=8032
    CONNECTION_PARAMETERS='[{"key":"mapreduce.jobhistory.address", "value":"0.0.0.0:10020"}, ' \
                            '{"key":"mapreduce.jobhistory.webapp.address", "value":"cdh5hakerberosnn.alpinenow.local:19888"}, ' \
                            '{"key":"yarn.app.mapreduce.am.staging-dir", "value":"/tmp/hadoop-yarn/staging"}, ' \
                            '{"key":"yarn.resourcemanager.admin.address", "value":"cdh5hakerberosnn.alpinenow.local:8033"}, ' \
                            '{"key":"yarn.resourcemanager.resource-tracker.address", "value":"cdh5hakerberosnn.alpinenow.local:8031"}, ' \
                            '{"key":"yarn.resourcemanager.scheduler.address", "value":"cdh5hakerberosnn.alpinenow.local:8030"}]'

    """
    conn = EmrConnection(
        cr.get_config("aws_access_key"),
        cr.get_config("aws_secret_key"),
        region = RegionInfo(name = cr.get_config("aws_region"),
            endpoint = cr.get_config("aws_region") + ".elasticmapreduce.amazonaws.com" ))

    emr_cluster = conn.describe_cluster(cluster_id)
    master_dns_hostname = emr_cluster.masterpublicdnsname

    # Build up connection parameters
    conn_params = []
    conn_params.append({"key": "mapreduce.jobhistory.address", "value": "{0}:10020".format(master_dns_hostname)})
    conn_params.append({"key": "mapreduce.jobhistory.webapp.address", "value": "{0}:19888".format(master_dns_hostname)})
    conn_params.append({"key": "yarn.app.mapreduce.am.staging-dir", "value": "/user"})
    conn_params.append({"key": "yarn.resourcemanager.admin.address", "value": "{0}:8033".format(master_dns_hostname)})
    conn_params.append({"key": "yarn.resourcemanager.scheduler.address", "value": "{0}:8030".format(master_dns_hostname)})
    conn_params_str = "CONNECTION_PARAMETERS=\"{0}\"".format(conn_params)
    email_str = "EMAIL=\"avalanche_{0}.alpinenow.com\"".format(random.randint(1,99999))

    with open("emr_default.conf", "w") as f:
        f.writelines("HADOOP_DATA_SOURCE_NAME=\"{0}\"\n".format(cr.get_config("emr_cluster_name")))
        f.writelines("HADOOP_DATA_SOURCE_DISTRO=\"{0}\"\n".format("Amazon EMR5"))
        f.writelines("HADOOP_DATA_SOURCE_HOST=\"{0}\"\n".format(master_dns_hostname))
        f.writelines("HADOOP_DATA_SOURCE_POST=\"8020\"\n")
        f.writelines("HADOOP_DATA_SOURCE_USER=\"hdfs\"\n")
        f.writelines("HADOOP_DATA_SOURCE_GROUP=\"hadoop\"\n")
        f.writelines("HADOOP_DATA_SOURCE_JT_HOST=\"{0}\"\n".format(master_dns_hostname))
        f.writelines("HADOOP_DATA_SOURCE_JT_PORT=\"8032\"\n")
        f.writelines(email_str)
        f.writelines(conn_params_str)
开发者ID:nkapinos,项目名称:generators,代码行数:54,代码来源:create.py

示例7: __init__

    def __init__(self, team_id, access_key, secret_key,
                 bucket='cs144students'):
        """
        (constructor)

        Creates a new instance of the Rankmaniac class for a specific
        team using the provided credentials.

        Arguments:
            team_id       <str>     the team identifier, which may be
                                    differ slightly from the actual team
                                    name.

            access_key    <str>     the AWS access key identifier.
            secret_key    <str>     the AWS secret acess key.

        Keyword arguments:
            bucket        <str>     the S3 bucket name.
        """

        region = RegionInfo(None, self.DefaultRegionName,
                            self.DefaultRegionEndpoint)

        self._s3_bucket = bucket
        self._s3_conn = S3Connection(access_key, secret_key)
        self._emr_conn = EmrConnection(access_key, secret_key, region=region)

        self.team_id = team_id
        self.job_id = None

        self._reset()
        self._num_instances = 1
开发者ID:aagarwal1990,项目名称:CS144_Rankmaniac,代码行数:32,代码来源:rankmaniac.py

示例8: __init__

 def __init__(self, prop):
   '''Constructor, initialize EMR connection.'''
   self.prop = prop
   self.conn = EmrConnection(self.prop.ec2.key, self.prop.ec2.secret)
   self.jobid = None
   self.retry = 0
   self.level = 0
   self.last_update = -1
开发者ID:RajeshNarayan,项目名称:briefly,代码行数:8,代码来源:hadoop.py

示例9: get_internal_ips_from_emr

def get_internal_ips_from_emr(cluster_id, cr):
    """
    Retrieves a list of internal IP addresses for a given EMR cluster
    """

    #  Open connection to EMR
    conn = EmrConnection(
        cr.get_config("aws_access_key"),
        cr.get_config("aws_secret_key"),
        region = RegionInfo(name = cr.get_config("aws_region"),
            endpoint = cr.get_config("aws_region") + ".elasticmapreduce.amazonaws.com" ))

    #  Build list of internal ips from list_instances EMR API
    emr_internal_ips = []
    emr_instances = conn.list_instances(cluster_id).instances
    for instance in emr_instances:
        emr_internal_ips.append(instance.privateipaddress)

    return emr_internal_ips
开发者ID:nkapinos,项目名称:generators,代码行数:19,代码来源:create.py

示例10: __init__

    def __init__(self, spec_filename="spec.json"):
        import boto
        from boto.emr.connection import EmrConnection, RegionInfo

        super(HiveRuntime, self).__init__(spec_filename)
        p = self.settings.Param
        self.s3_conn = boto.connect_s3(p.AWS_ACCESS_KEY_ID, p.AWS_ACCESS_KEY_SECRET)
        self.s3_bucket = self.s3_conn.get_bucket(p.S3_BUCKET)
        self.region = p.AWS_Region
        self.emr_conn = EmrConnection(p.AWS_ACCESS_KEY_ID, p.AWS_ACCESS_KEY_SECRET,
                region = RegionInfo(name = self.region,
                    endpoint = self.region + '.elasticmapreduce.amazonaws.com'))
        self.job_flow_id = p.EMR_jobFlowId
开发者ID:Haizhi,项目名称:screwjack,代码行数:13,代码来源:specparser.py

示例11: creating_a_connection

class EMR:
    def creating_a_connection(self):
        #Creating a connection
        from boto.emr.connection import EmrConnection
        self.conn = EmrConnection('', '')

    def creating_streaming_job(self):
        #Creating Streaming JobFlow Steps
        from boto.emr.step import StreamingStep
        self.step = StreamingStep(name='my bigdata task',
            mapper='s3n://eth-src/raw_to_stations.py',
            #mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
            reducer='s3n://eth-src/stations_to_features.py',
            #reducer='aggregate',
            input='s3n://eth-input/2007.csv',
            #input='s3n://elasticmapreduce/samples/wordcount/input',
            output='s3n://eth-middle/2007')

    def creating_jobflows(self):
        #Creating JobFlows
        #import boto.emr
        #self.conn = boto.emr.connect_to_region('eu-west-1')
        job_id = self.conn.run_jobflow(name='My jobflow',
                log_uri='s3://eth-log/jobflow_logs',
                master_instance_type='m3.xlarge',
                slave_instance_type='m1.large',
                num_instances=2,
                steps=[self.step],
                ami_version='3.3.1'
                )

        status = self.conn.describe_jobflow(job_id)
        status.state

    def terminating_jobflows(self, job_id):
        #Terminating JobFlows
        #self.conn = boto.emr.connect_to_region('eu-west-1')
        self.conn.terminate_jobflow(job_id)
开发者ID:raynald,项目名称:ETH_BBBigData,代码行数:38,代码来源:EMR.py

示例12: main

def main(argv):

  # load the config
  config = ConfigParser()
  config.read(os.path.join(os.path.split(argv[0])[0] if not None else '','config.ini'))

  # load AWS config
  awsConfig = ConfigParser()
  awsConfig.read(config.get('Common','aws'))

  aws_access_key = awsConfig.get('AWS','aws_access_key')
  aws_secret_key = awsConfig.get('AWS','aws_secret_key')
  event_bucket = awsConfig.get('AWS','event_bucket')
  output_bucket = awsConfig.get('AWS','emr_output_bucket')
  script_bucket = awsConfig.get('AWS','script_bucket')
  
  jobId = argv[1]

  emrConnection = EmrConnection(aws_access_key, aws_secret_key)

  s3Connection = S3Connection(aws_access_key, aws_secret_key)

  # clean s3 output
  bucket = s3Connection.get_bucket(output_bucket)
  for key in bucket.get_all_keys(prefix=BUCKET_KEY):
    bucket.delete_key(key)

  step = StreamingStep(name='Foursquare event deduper',
                      mapper='s3://%s/dedup_mapper.py foursquare' % script_bucket,
                      reducer='s3://%s/dedup_reducer.py' % script_bucket,
                      input='s3://%s/normalized' % event_bucket,
                      output='s3://%s/%s' % (output_bucket,BUCKET_KEY),
                      action_on_failure='CONTINUE')

  emrConnection.add_jobflow_steps(jobId, step)

  print 'Successfully started streaming steps'
开发者ID:Thisisdotme,项目名称:thisis.me,代码行数:37,代码来源:foursquare_dedup.py

示例13: EMRInventory

class EMRInventory():
    def __init__(self, region='eu-west-1'):
        regionEMR = self.get_emr_region(region)
        self.emrConnection = EmrConnection(region=regionEMR)

    def list_current_resources(self, region='eu-west-1'):
        jobFlows = self.emrConnection.describe_jobflows()
        for jobFlow in jobFlows:
            print jobFlow.jobflowid

    def get_emr_region(self, region='eu-west-1'):
        regionEndpoint = '%s.elasticmapreduce.amazonaws.com' % region
        regionEMR = RegionInfo (name=region,
                                endpoint=regionEndpoint)
        return regionEMR
开发者ID:ainestal,项目名称:magpie,代码行数:15,代码来源:emr.py

示例14: EmrJarRuntime

class EmrJarRuntime(ZetRuntime):
    def __init__(self, spec_filename="spec.json"):
        import boto
        from boto.emr.connection import EmrConnection, RegionInfo

        # super(ZetRuntime, self).__init__()
        # TODO
        self.settings = get_settings_from_file(spec_filename)

        p = self.settings.Param
        self.s3_conn = boto.connect_s3(p.AWS_ACCESS_KEY_ID, p.AWS_ACCESS_KEY_SECRET)
        self.s3_bucket = self.s3_conn.get_bucket(p.S3_BUCKET)
        self.region = p.AWS_Region
        self.emr_conn = EmrConnection(p.AWS_ACCESS_KEY_ID, p.AWS_ACCESS_KEY_SECRET,
                region = RegionInfo(name = self.region,
                    endpoint = self.region + '.elasticmapreduce.amazonaws.com'))
        self.job_flow_id = p.EMR_jobFlowId

    def get_s3_working_dir(self, path=""):
        ps = self.settings
        glb_vars = ps.GlobalParam
        return os.path.join('zetjob', glb_vars['userName'], "job%s" % glb_vars['jobId'], "blk%s" % glb_vars['blockId'], path)

    def execute(self, jar_path, args):
        from boto.emr.step import JarStep

        s3_jar_path = s3_upload(self.s3_bucket, self.get_s3_working_dir(jar_path), jar_path)
        # s3_jar_path = "s3://run-jars/jar/mahout-core-1.0-SNAPSHOT-job.jar"
        print("Uploading jar to s3 : %s -> %s" % (jar_path, s3_jar_path))

        print("Add jobflow step")
        step = JarStep(name='cl_filter', jar=s3_jar_path, step_args=args)
        self.emr_conn.add_jobflow_steps(self.job_flow_id, steps=[step])

        print("Waiting jobflow step done")
        emr_wait_job(self.emr_conn, self.job_flow_id)
开发者ID:Haizhi,项目名称:screwjack,代码行数:36,代码来源:specparser.py

示例15: __init__

    def __init__(self, region_name='us-east-1', aws_access_key_id=None, aws_secret_access_key=None):
 
        # If the access key is not specified, get it from the luigi config.cfg file
        if not aws_access_key_id:
            aws_access_key_id = luigi.configuration.get_config().get('aws', 'aws_access_key_id')
 
        if not aws_secret_access_key:
            aws_secret_access_key = luigi.configuration.get_config().get('aws', 'aws_secret_access_key')
 
 
        # Create the region in which to run
        region_endpoint = u'elasticmapreduce.%s.amazonaws.com' % (region_name)
        region = RegionInfo(name=region_name, endpoint=region_endpoint)
 
        self.emr_connection = EmrConnection(aws_access_key_id=aws_access_key_id,
                                            aws_secret_access_key=aws_secret_access_key,
                                            region=region)
开发者ID:mbrio,项目名称:Luigi,代码行数:17,代码来源:emr_client.py


注:本文中的boto.emr.connection.EmrConnection类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。