当前位置: 首页>>代码示例>>Python>>正文


Python EmrConnection.terminate_jobflow方法代码示例

本文整理汇总了Python中boto.emr.connection.EmrConnection.terminate_jobflow方法的典型用法代码示例。如果您正苦于以下问题:Python EmrConnection.terminate_jobflow方法的具体用法?Python EmrConnection.terminate_jobflow怎么用?Python EmrConnection.terminate_jobflow使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在boto.emr.connection.EmrConnection的用法示例。


在下文中一共展示了EmrConnection.terminate_jobflow方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: terminate

# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import terminate_jobflow [as 别名]
def terminate(cluster_id):
	try:
		emr_connection = EmrConnection()
		emr_connection.set_termination_protection(cluster_id, False)
		emr_connection.terminate_jobflow(cluster_id)
		return True
	except Exception, e:
		print e
		return False
开发者ID:valeter,项目名称:nlp-site,代码行数:11,代码来源:terminate_cluster.py

示例2: creating_a_connection

# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import terminate_jobflow [as 别名]
class EMR:
    def creating_a_connection(self):
        #Creating a connection
        from boto.emr.connection import EmrConnection
        self.conn = EmrConnection('', '')

    def creating_streaming_job(self):
        #Creating Streaming JobFlow Steps
        from boto.emr.step import StreamingStep
        self.step = StreamingStep(name='my bigdata task',
            mapper='s3n://eth-src/raw_to_stations.py',
            #mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
            reducer='s3n://eth-src/stations_to_features.py',
            #reducer='aggregate',
            input='s3n://eth-input/2007.csv',
            #input='s3n://elasticmapreduce/samples/wordcount/input',
            output='s3n://eth-middle/2007')

    def creating_jobflows(self):
        #Creating JobFlows
        #import boto.emr
        #self.conn = boto.emr.connect_to_region('eu-west-1')
        job_id = self.conn.run_jobflow(name='My jobflow',
                log_uri='s3://eth-log/jobflow_logs',
                master_instance_type='m3.xlarge',
                slave_instance_type='m1.large',
                num_instances=2,
                steps=[self.step],
                ami_version='3.3.1'
                )

        status = self.conn.describe_jobflow(job_id)
        status.state

    def terminating_jobflows(self, job_id):
        #Terminating JobFlows
        #self.conn = boto.emr.connect_to_region('eu-west-1')
        self.conn.terminate_jobflow(job_id)
开发者ID:raynald,项目名称:ETH_BBBigData,代码行数:40,代码来源:EMR.py

示例3: __init__

# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import terminate_jobflow [as 别名]

#.........这里部分代码省略.........

            if contents.startswith('FinalRank'):
                self._is_done = True # cache result
                break

        return self._is_done

    def is_alive(self):
        """
        Checks whether the jobflow has completed, failed, or been
        terminated.

        Special notes:
            WARNING! This method should only be called **after**
            is_done() in order to be able to distinguish between the
            cases where the map-reduce job has outputted 'FinalRank'
            on its final iteration and has a 'COMPLETED' state.
        """

        jobflow = self.describe()
        if jobflow.state in ('COMPLETED', 'FAILED', 'TERMINATED'):
            return False

        return True

    def terminate(self):
        """
        Terminates a running map-reduce job.
        """

        if not self.job_id:
            raise RankmaniacError('No job is running.')

        self._emr_conn.terminate_jobflow(self.job_id)
        self.job_id = None

        self._reset()

    def download(self, outdir='results'):
        """
        Downloads the results from Amazon S3 to the local directory.

        Keyword arguments:
            outdir      <str>       the base directory to which to
                                    download contents.

        Special notes:
            This method downloads all keys (files) from the configured
            bucket for this particular team. It creates subdirectories
            as needed.
        """

        bucket = self._s3_conn.get_bucket(self._s3_bucket)
        keys = bucket.list(prefix=self._get_keyname())
        for key in keys:
            keyname = key.name
            # Ignore folder keys
            if '$' not in keyname:
                suffix = keyname.split('/')[1:] # removes team identifier
                filename = os.path.join(outdir, *suffix)
                dirname = os.path.dirname(filename)

                if not os.path.exists(dirname):
                    os.makedirs(dirname)

                key.get_contents_to_filename(filename)
开发者ID:aagarwal1990,项目名称:CS144_Rankmaniac,代码行数:70,代码来源:rankmaniac.py

示例4: EMRCluster

# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import terminate_jobflow [as 别名]
class EMRCluster(object):
  '''Representation of an EMR cluster.
     TODO: add bridge to boto interface for unit test.
  '''
  emr_status_delay = 10      # in sec
  emr_status_max_delay = 60  # in sec
  emr_status_max_error = 30  # number of errors
  emr_max_idle = 10 * 60     # 10 min (in sec)
  rate_limit_lock = RateLimitLock()

  def __init__(self, prop):
    '''Constructor, initialize EMR connection.'''
    self.prop = prop
    self.conn = EmrConnection(self.prop.ec2.key, self.prop.ec2.secret)
    self.jobid = None
    self.retry = 0
    self.level = 0
    self.last_update = -1

  @property
  def priority(self):
    '''The priority used in EMRManager.
       The lower value, the higher priority.
    '''
    with EMRCluster.rate_limit_lock:
      if self.jobid is None:
        return 1
      return 0

  def get_instance_groups(self):
    '''Get instance groups to start a cluster.
       It calculates the price with self.level, which indicates the
       price upgrades from the original price.
    '''
    instance_groups = []
    for group in self.prop.emr.instance_groups:
      (num, group_name, instance_type) = group
      level = max(0, min(self.level, len(self.prop.emr.price_upgrade_rate) - 1))  # 0 <= level < len(...)
      bprice = self.prop.emr.prices[instance_type] * self.prop.emr.price_upgrade_rate[level]
      name = '%s-%[email protected]%f' % (group_name, 'SPOT', bprice)

      # Use on-demand instance if prices are zero.
      if bprice > 0:
        ig = InstanceGroup(num, group_name, instance_type, 'SPOT', name, '%.3f' % bprice)
      else:
        ig = InstanceGroup(num, group_name, instance_type, 'ON_DEMAND', name)

      instance_groups.append(ig)      

    return instance_groups

  def get_bootstrap_actions(self):
    '''Get list of bootstrap actions from property'''
    actions = []
    for bootstrap_action in self.prop.emr.bootstrap_actions:
      assert len(bootstrap_action) >= 2, 'Wrong bootstrap action definition: ' + str(bootstrap_action)
      actions.append(BootstrapAction(bootstrap_action[0], bootstrap_action[1], bootstrap_action[2:]))
    return actions

  @synchronized
  def start(self):
    '''Start a EMR cluster.'''
    # emr.project_name is required
    if self.prop.emr.project_name is None:
      raise ValueError('emr.project_name is not set')

    self.last_update = time.time()
    with EMRCluster.rate_limit_lock:
      self.jobid = self.conn.run_jobflow(name=self.prop.emr.cluster_name,
                                         ec2_keyname=self.prop.emr.keyname,
                                         log_uri=self.prop.emr.log_uri,
                                         ami_version=self.prop.emr.ami_version,
                                         bootstrap_actions=self.get_bootstrap_actions(),
                                         keep_alive=True,
                                         action_on_failure='CONTINUE',
                                         api_params={'VisibleToAllUsers': 'true'},
                                         instance_groups=self.get_instance_groups())
    message('Job flow created: %s', self.jobid)

    # Tag EC2 instances to allow future analysis
    tags = {'FlowControl': 'Briefly',
            'Project': self.prop.emr.project_name}
    if self.prop.emr.tags is not None:
      assert isinstance(self.prop.emr.tags, dict)
      tags = dict(tags.items() + self.prop.emr.tags.items())
    self.conn.add_tags(self.jobid, tags)

  @synchronized
  def terminate(self, level_upgrade=0):
    '''Terminate this EMR cluster.'''
    if self.jobid is None:
      return

    self.level += level_upgrade # upgrade to another price level

    message('Terminate jobflow: %s', self.jobid)
    for i in xrange(3):
      try:
        with EMRCluster.rate_limit_lock:
          self.conn.terminate_jobflow(self.jobid)
#.........这里部分代码省略.........
开发者ID:RajeshNarayan,项目名称:briefly,代码行数:103,代码来源:hadoop.py

示例5: EmrManager

# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import terminate_jobflow [as 别名]

#.........这里部分代码省略.........
                return cluster_id
        except:
            logging.error("Launching EMR cluster failed")
            return "FAILED"

    # run scripting step in cluster
    def run_scripting_step(self, cluster_id, name, script_path):
        try:
            step = ScriptRunnerStep(name=name, 
                                    step_args=[script_path],
                                    action_on_failure="CONTINUE")
            return self._run_step(cluster_id, step)
        except:
            logging.error("Running scripting step in cluster " + cluster_id + " failed.")
            return "FAILED"

    # run streaming step in cluster
    def run_streaming_step(self, cluster_id, name, mapper_path, reducer_path, input_path, output_path):
        try:
            # bundle files with the job
            files = []
            if mapper_path != "NONE":
                files.append(mapper_path)
                mapper_path = mapper_path.split("/")[-1]
            if reducer_path != "NONE":
                files.append(reducer_path)
                reducer_path = reducer_path.split("/")[-1]
            # build streaming step
            logging.debug("Launching streaming step with mapper: " + mapper_path + " reducer: " + reducer_path + " and files: " + str(files))
            step = StreamingStep(name=name,
                                    step_args=["-files"] + files, 
                                    mapper=mapper_path, 
                                    reducer=reducer_path, 
                                    input=input_path, 
                                    output=output_path, 
                                    action_on_failure="CONTINUE")
            return self._run_step(cluster_id, step)            
        except:
            logging.error("Running streaming step in cluster " + cluster_id + " failed.")
            return "FAILED"

    # run mapreduce jar step in cluster
    def run_jar_step(self, cluster_id, name, jar_path, class_name, input_path, output_path):
        try:
            # build streaming step
            logging.debug("Launching jar step with jar: " + jar_path + " class name: " + class_name + " input: " + input_path + " and output: " + output_path)
            step = JarStep(name=name,
                            jar=jar_path, 
                            step_args= [class_name,
                                        input_path,
                                        output_path])
            return self._run_step(cluster_id, step)            
        except:
            logging.error("Running jar step in cluster " + cluster_id + " failed.")
            return "FAILED"

    def _run_step(self, cluster_id, step):
        step_list = self.connection.add_jobflow_steps(cluster_id, [step])
        step_id = step_list.stepids[0].value

        logging.info("Starting step " + step_id + " in cluster " + cluster_id + ". Please be patient. Check the progress of the job in your AWS Console")

        # Checking the state of the step
        state = self._find_step_state(cluster_id, step_id)
        while state != u'NOT_FOUND' and state != u'ERROR' and state != u'FAILED' and state!=u'COMPLETED':
            #sleeping to recheck for status.
            time.sleep(int(self.step_status_wait))
            state = self._find_step_state(cluster_id, step_id)
            logging.info("Starting step " + step_id + " in cluster " + cluster_id + ". Status: " + state)

        if state == u'FAILED':
            logging.error("Step " + step_id + " failed in cluster: " + cluster_id)
            return "FAILED"
        if state == u'NOT_FOUND':
            logging.error("Step " + step_id + " could not be found in cluster: " + cluster_id)
            return "NOT_FOUND"
        if state == u'ERROR':
            logging.error("Step " + step_id + " produced an error in _find_step_state in cluster: " + cluster_id)
            return "ERROR"

        #Check if the state is WAITING. Then launch the next steps
        if state == u'COMPLETED':
            #Finding the master node dns of EMR cluster
            logging.info("Step " + step_id + " succesfully completed in cluster: " + cluster_id)
            return step_id


    def _find_step_state(self, cluster_id, step_id):
        try:
            step_summary_list = self.connection.list_steps(cluster_id)
            for step_summary in step_summary_list.steps:
                if step_summary.id == step_id:
                    return step_summary.status.state
            return "NOT_FOUND"
        except:
            return "ERROR"

    #Method for terminating the EMR cluster
    def terminate_cluster(self, cluster_id):
        self.connection.terminate_jobflow(cluster_id)
开发者ID:DiegoTUI,项目名称:emr-orchestrator,代码行数:104,代码来源:emr_manager.py

示例6: EmrClient

# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import terminate_jobflow [as 别名]
class EmrClient(object):


    # The Hadoop version to use
    HADOOP_VERSION = '1.0.3'

    # The AMI version to use
    AMI_VERSION = '2.4.7'
 
    # Interval to wait between polls to EMR cluster in seconds
    CLUSTER_OPERATION_RESULTS_POLLING_SECONDS = 10
 
    # Timeout for EMR creation and ramp up in seconds
    CLUSTER_OPERATION_RESULTS_TIMEOUT_SECONDS = 60 * 30
 
    def __init__(self, region_name='us-east-1', aws_access_key_id=None, aws_secret_access_key=None):
 
        # If the access key is not specified, get it from the luigi config.cfg file
        if not aws_access_key_id:
            aws_access_key_id = luigi.configuration.get_config().get('aws', 'aws_access_key_id')
 
        if not aws_secret_access_key:
            aws_secret_access_key = luigi.configuration.get_config().get('aws', 'aws_secret_access_key')
 
 
        # Create the region in which to run
        region_endpoint = u'elasticmapreduce.%s.amazonaws.com' % (region_name)
        region = RegionInfo(name=region_name, endpoint=region_endpoint)
 
        self.emr_connection = EmrConnection(aws_access_key_id=aws_access_key_id,
                                            aws_secret_access_key=aws_secret_access_key,
                                            region=region)
 
    def launch_emr_cluster(self, cluster_name, log_uri, ec2_keyname=None, master_type='m1.small', core_type='m1.small', num_instances=2, hadoop_version='1.0.3', ami_version='2.4.7', ):
 
        # TODO Remove
        # install_pig_step = InstallPigStep()
 
        jobflow_id = self.emr_connection.run_jobflow(name=cluster_name,
                              log_uri=log_uri,
                              ec2_keyname=ec2_keyname,
                              master_instance_type=master_type,
                              slave_instance_type=core_type,
                              num_instances=num_instances,
                              keep_alive=True,
                              enable_debugging=True,
                              hadoop_version=EmrClient.HADOOP_VERSION,
                              steps=[], 
                              ami_version=EmrClient.AMI_VERSION)
 
        # Log important information
        status = self.emr_connection.describe_jobflow(jobflow_id)

        logger.info('Creating new cluster %s with following details' % status.name)
        logger.info('jobflow ID:\t%s' % status.jobflowid)
        logger.info('Log URI:\t%s' % status.loguri)
        logger.info('Master Instance Type:\t%s' % status.masterinstancetype)
        
        # A cluster of size 1 does not have any slave instances
        if hasattr(status, 'slaveinstancetype'):
            logger.info('Slave Instance Type:\t%s' % status.slaveinstancetype)
        
        logger.info('Number of Instances:\t%s' % status.instancecount)
        logger.info('Hadoop Version:\t%s' % status.hadoopversion)
        logger.info('AMI Version:\t%s' % status.amiversion)
        logger.info('Keep Alive:\t%s' % status.keepjobflowalivewhennosteps)
 
        return self._poll_until_cluster_ready(jobflow_id)
 
 
    def add_pig_step(self, jobflow_id, pig_file, name='Pig Script', pig_versions='latest', pig_args=[]): 

        pig_step = PigStep(name=name,
                           pig_file=pig_file,
                           pig_versions=pig_versions,
                           pig_args=pig_args,
                           # action_on_failure='CONTINUE',
                       )

        self.emr_connection.add_jobflow_steps(jobflow_id, [pig_step])

        # Poll until the cluster is done working        
        return self._poll_until_cluster_ready(jobflow_id)


    def shutdown_emr_cluster(self, jobflow_id):
 
        self.emr_connection.terminate_jobflow(jobflow_id)
        return self._poll_until_cluster_shutdown(jobflow_id)
 
    def get_jobflow_id(self):
        # Get the id of the cluster that is WAITING for work
        return self.emr_connection.list_clusters(cluster_states=['WAITING']).clusters[0].id
 
    def get_master_dns(self):
        """
        Get the master node's public address
        """
        # Get the jobflow ID
        jobflow_id = self.get_master_dns()
#.........这里部分代码省略.........
开发者ID:mbrio,项目名称:Luigi,代码行数:103,代码来源:emr_client.py

示例7: __init__

# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import terminate_jobflow [as 别名]
class Rankmaniac:
    '''Rankmaniac Wrapper

    This class provides a simple wrapper around the Amazon Web Services SDK.
    It should provide all the functionality required in terms of MapReduce,
    so students don't need to worry about learning the EMR and S3 API.
    '''

    def __init__(self, team_id, access_key, secret_key):
        '''Rankmaniac class constructor

        Creates a new instance of the Rankmaniac Wrapper for a specific
        team.

        Arguments:
            team_id         string      the team ID.
            access_key      string      AWS access key.
            secret_key      string      AWS secret key.
        '''

        self.s3_bucket = 'cs144caltech'

        self.team_id = team_id
        self.emr_conn = EmrConnection(access_key, secret_key)
        self.s3_conn = S3Connection(access_key, secret_key)
        self.job_id = None

    def __del__(self):

        if self.job_id:
            self.terminate_job()

    def submit_job(self, mapper, reducer, input, output, num_map=1, 
                   num_reduce=1):
        '''Submit a new MapReduce job

        Submits a new MapReduce job with a single step. To add more steps,
        call add_step. To terminate this job, call terminate_job.
        
        Arguments:
            mapper          string      path to the mapper, relative to
                                        your data directory.
            reducer         string      path to the reducer, relative to
                                        your data directory.
            input           string      path to the input data, relative to
                                        your data directory. To specify a
                                        directory as input, ensure your path
                                        contains a trailing /.
            output          string      path to the desired output directory.
            num_map         int         number of map tasks for this job.
            num_reduce      int         number of reduce tasks for this job.
        '''

        if self.job_id:
            raise Exception('There currently already exists a running job.')
        
        job_name = self._make_name()
        step = self._make_step(mapper, reducer, input, output, num_map, 
                               num_reduce)
        self.job_id = \
          self.emr_conn.run_jobflow(name=job_name,
                                    steps=[step],
                                    num_instances=1,
                                    log_uri=self._get_s3_url() + 'job_logs',
                                    keep_alive=True)

    def terminate_job(self):
        '''Terminate a running MapReduce job

        Stops the current running job.
        '''

        if not self.job_id:
            raise Exception('No job is running.')

        self.emr_conn.terminate_jobflow(self.job_id)
        self.job_id = None

    def get_job(self):
        '''Gets the running job details

        Returns:
            JobFlow object with relevant fields:
                state           string      the state of the job flow, either
                                            COMPLETED | FAILED | TERMINATED
                                            RUNNING | SHUTTING_DOWN | STARTING
                                            WAITING | BOOTSTRAPPING
                steps           list(Step)  a list of the step details in the
                                            workflow. A Step has the relevant
                                            fields:
                                                status              string
                                                startdatetime       string
                                                enddatetime         string

        Note: Amazon has an upper-limit on the frequency with which you can
              call this function; we have had success with calling it one
              every 10 seconds.
        '''
        
        if not self.job_id:
#.........这里部分代码省略.........
开发者ID:arjunc12,项目名称:rankmaniac,代码行数:103,代码来源:rankmaniac.py

示例8: str

# 需要导入模块: from boto.emr.connection import EmrConnection [as 别名]
# 或者: from boto.emr.connection.EmrConnection import terminate_jobflow [as 别名]
                           steps         = [step],
                           num_instances = 1
                           )

# <codecell>

print jobid

#start a new thread to check program status
result_queue = multiprocessing.Queue()
process = multiprocessing.Process(target=check_status, args=[emrcon,jobid,result_queue])
process.start()

#finished
result = result_queue.get()
emrcon.terminate_jobflow(jobid)


# <codecell>
"""
import re

# <codecell>

for word in b.list():
    keystring = str(word.key)
    if re.match(keystring,'part-00000'):
        word.get_contents_to_filename('/Users/winteram/Documents/Teaching/wordcount_output.txt')

# <codecell>
开发者ID:todatamining,项目名称:db1,代码行数:32,代码来源:hsql.py


注:本文中的boto.emr.connection.EmrConnection.terminate_jobflow方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。