当前位置: 首页>>代码示例>>Python>>正文


Python util.strip_microseconds函数代码示例

本文整理汇总了Python中mrjob.util.strip_microseconds函数的典型用法代码示例。如果您正苦于以下问题:Python strip_microseconds函数的具体用法?Python strip_microseconds怎么用?Python strip_microseconds使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了strip_microseconds函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: terminate_and_notify

def terminate_and_notify(runner, to_terminate, dry_run=False,
                         max_mins_locked=None, quiet=False):
    if not to_terminate:
        return

    for jf, pending, time_idle, time_to_end_of_hour in to_terminate:
        did_terminate = False
        if not dry_run:
            status = attempt_to_acquire_lock(
                runner.make_s3_conn(),
                runner._lock_uri(jf),
                runner._opts['s3_sync_wait_time'],
                runner._make_unique_job_name(label='terminate'),
                mins_to_expiration=max_mins_locked,
            )
            if status:
                runner.make_emr_conn().terminate_jobflow(jf.jobflowid)
                did_terminate = True

        if did_terminate and not quiet:
            fmt = ('Terminated job flow %s (%s); was %s for %s, %s to end of'
                   ' hour')
            print fmt % (
                    jf.jobflowid, jf.name,
                    'pending' if pending else 'idle',
                    strip_microseconds(time_idle),
                    strip_microseconds(time_to_end_of_hour))
开发者ID:JeffersonK,项目名称:mrjob,代码行数:27,代码来源:terminate_idle_job_flows.py

示例2: terminate_and_notify

def terminate_and_notify(runner, to_terminate, dry_run=False,
                         max_mins_locked=None, quiet=False):
    if not to_terminate:
        return

    for jf, pending, time_idle, time_to_end_of_hour in to_terminate:
        fmt = ('Terminated job flow %s (%s); was %s for %s, %s to end of hour')
        msg = fmt % (
                jf.jobflowid, jf.name,
                'pending' if pending else 'idle',
                strip_microseconds(time_idle),
                strip_microseconds(time_to_end_of_hour))

        did_terminate = False
        if not dry_run:
            status = attempt_to_acquire_lock(
                runner.make_s3_conn(),
                runner._lock_uri(jf),
                runner._opts['s3_sync_wait_time'],
                '%s (%s)' % (msg,
                             runner._make_unique_job_name(label='terminate')),
                mins_to_expiration=max_mins_locked,
            )
            if status:
                runner.make_emr_conn().terminate_jobflow(jf.jobflowid)
                did_terminate = True
            elif not quiet:
                log.info('%s was locked between getting job flow info and'
                         ' trying to terminate it; skipping' % jf.jobflowid)

        if did_terminate and not quiet:
            print msg
开发者ID:AnthonyNystrom,项目名称:mrjob,代码行数:32,代码来源:terminate_idle_job_flows.py

示例3: _terminate_and_notify

def _terminate_and_notify(runner, cluster_id, cluster_name, num_steps,
                          is_pending, time_idle, time_to_end_of_hour,
                          dry_run=False, max_mins_locked=None, quiet=False):

    fmt = ('Terminated cluster %s (%s); was %s for %s, %s to end of hour')
    msg = fmt % (
        cluster_id, cluster_name,
        'pending' if is_pending else 'idle',
        strip_microseconds(time_idle),
        strip_microseconds(time_to_end_of_hour))

    did_terminate = False
    if dry_run:
        did_terminate = True
    else:
        status = _attempt_to_acquire_lock(
            runner.fs.make_s3_conn(),
            runner._lock_uri(cluster_id, num_steps),
            runner._opts['cloud_fs_sync_secs'],
            '%s (%s)' % (msg,
                         runner._make_unique_job_key(label='terminate')),
            mins_to_expiration=max_mins_locked,
        )
        if status:
            runner.make_emr_conn().terminate_jobflow(cluster_id)
            did_terminate = True
        elif not quiet:
            log.info('%s was locked between getting cluster info and'
                     ' trying to terminate it; skipping' % cluster_id)

    if did_terminate and not quiet:
        print(msg)
开发者ID:davidmarin,项目名称:mrjob,代码行数:32,代码来源:terminate_idle_clusters.py

示例4: terminate_and_notify

def terminate_and_notify(emr_conn, to_terminate, dry_run=False):
    if not to_terminate:
        return

    for job_flow_id, name, time_idle, time_to_end_of_hour in to_terminate:
        if not dry_run:
            emr_conn.terminate_jobflow(job_flow_id)
        print ('Terminated job flow %s (%s); was idle for %s,'
               ' %s to end of hour' %
               (job_flow_id, name, strip_microseconds(time_idle),
                strip_microseconds(time_to_end_of_hour)))
开发者ID:dataartisan,项目名称:mrjob,代码行数:11,代码来源:terminate_idle_job_flows.py

示例5: terminate_and_notify

def terminate_and_notify(emr_conn, to_terminate, dry_run=False):
    if not to_terminate:
        return

    for id, name, pending, time_idle, time_to_end_of_hour in to_terminate:
        if not dry_run:
            emr_conn.terminate_jobflow(id)
        print ('Terminated job flow %s (%s); was %s for %s, %s to end of hour'
               % (id, name,
                  'pending' if pending else 'idle',
                  strip_microseconds(time_idle),
                  strip_microseconds(time_to_end_of_hour)))
开发者ID:GabbleEngineer,项目名称:mrjob,代码行数:12,代码来源:terminate_idle_job_flows.py

示例6: pprint_job_flow

def pprint_job_flow(jf):
    """Print a job flow to stdout in this form::

        job.flow.name
        j-JOB_FLOW_ID: 2 instances (master=m1.small, slaves=m1.small, 20 \
minutes to the hour)
    """
    instance_count = int(jf.instancecount)

    nosep_segments = [
        '%d instance' % instance_count,
    ]
    if instance_count > 1:
        nosep_segments.append('s')

    comma_segments = [
        'master=%s' % jf.masterinstancetype,
    ]

    if instance_count > 1:
        comma_segments.append('slaves=%s' % jf.slaveinstancetype)

    comma_segments.append('%s to end of hour' %
                          strip_microseconds(est_time_to_hour(jf)))

    nosep_segments += [
        ' (',
        ', '.join(comma_segments),
        ')',
    ]

    print '%s: %s' % (jf.jobflowid, jf.name)
    print ''.join(nosep_segments)
    print jf.state
    print
开发者ID:Anihc,项目名称:mrjob,代码行数:35,代码来源:job_flow_pool.py

示例7: _format_timedelta

def _format_timedelta(time):
    """Format a timedelta for use in a columnar format. This just
    tweaks stuff like ``'3 days, 9:00:00'`` to line up with
    ``'3 days, 10:00:00'``
    """
    result = str(strip_microseconds(time))

    parts = result.split()
    if len(parts) == 3 and len(parts[-1]) == 7:
        return '%s %s  %s' % tuple(parts)
    else:
        return result
开发者ID:Affirm,项目名称:mrjob,代码行数:12,代码来源:report_long_jobs.py

示例8: inspect_and_maybe_terminate_job_flows

def inspect_and_maybe_terminate_job_flows(
    conf_path=None,
    dry_run=False,
    max_hours_idle=None,
    mins_to_end_of_hour=None,
    now=None,
    pool_name=None,
    pooled_only=False,
    unpooled_only=False,
    max_mins_locked=None,
    quiet=False,
    **kwargs
):

    if now is None:
        now = datetime.utcnow()

    # old default behavior
    if max_hours_idle is None and mins_to_end_of_hour is None:
        max_hours_idle = DEFAULT_MAX_HOURS_IDLE

    runner = EMRJobRunner(conf_path=conf_path, **kwargs)
    emr_conn = runner.make_emr_conn()

    log.info(
        'getting info about all job flows (this goes back about 2 months)')
    # We don't filter by job flow state because we want this to work even
    # if Amazon adds another kind of idle state.
    job_flows = describe_all_job_flows(emr_conn)

    num_bootstrapping = 0
    num_done = 0
    num_idle = 0
    num_non_streaming = 0
    num_pending = 0
    num_running = 0

    # a list of tuples of job flow id, name, idle time (as a timedelta)
    to_terminate = []

    for jf in job_flows:

        # check if job flow is done
        if is_job_flow_done(jf):
            num_done += 1

        # check if job flow is bootstrapping
        elif is_job_flow_bootstrapping(jf):
            num_bootstrapping += 1

        # we can't really tell if non-streaming jobs are idle or not, so
        # let them be (see Issue #60)
        elif not is_job_flow_streaming(jf):
            num_non_streaming += 1

        elif is_job_flow_running(jf):
            num_running += 1

        else:
            time_idle = now - time_last_active(jf)
            time_to_end_of_hour = est_time_to_hour(jf, now=now)
            _, pool = pool_hash_and_name(jf)
            pending = job_flow_has_pending_steps(jf)

            if pending:
                num_pending += 1
            else:
                num_idle += 1

            log.debug(
                'Job flow %s %s for %s, %s to end of hour, %s (%s)' %
                      (jf.jobflowid,
                       'pending' if pending else 'idle',
                       strip_microseconds(time_idle),
                       strip_microseconds(time_to_end_of_hour),
                       ('unpooled' if pool is None else 'in %s pool' % pool),
                       jf.name))

            # filter out job flows that don't meet our criteria
            if (max_hours_idle is not None and
                time_idle <= timedelta(hours=max_hours_idle)):
                continue

            # mins_to_end_of_hour doesn't apply to jobs with pending steps
            if (mins_to_end_of_hour is not None and
                (pending or
                 time_to_end_of_hour >= timedelta(
                    minutes=mins_to_end_of_hour))):
                continue

            if (pooled_only and pool is None):
                continue

            if (unpooled_only and pool is not None):
                continue

            if (pool_name is not None and pool != pool_name):
                continue

            to_terminate.append((jf, pending, time_idle, time_to_end_of_hour))
#.........这里部分代码省略.........
开发者ID:AnthonyNystrom,项目名称:mrjob,代码行数:101,代码来源:terminate_idle_job_flows.py

示例9: print_report


#.........这里部分代码省略.........
            h -= timedelta(hours=1)
        print()

    print('* Job flows are considered to belong to the user and job that')
    print('  started them or last ran on them.')
    print()

    # Top jobs
    print('Top jobs, by total time used:')
    for label, nih_used in sorted(s['label_to_nih_used'].items(),
                                  key=lambda lb_nih: (-lb_nih[1], lb_nih[0])):
        print('  %9.2f %s' % (nih_used, label))
    print()

    print('Top jobs, by time billed but not used:')
    for label, nih_bbnu in sorted(s['label_to_nih_bbnu'].items(),
                                  key=lambda lb_nih1: (-lb_nih1[1], lb_nih1[0])):
        print('  %9.2f %s' % (nih_bbnu, label))
    print()

    # Top users
    print('Top users, by total time used:')
    for owner, nih_used in sorted(s['owner_to_nih_used'].items(),
                                  key=lambda o_nih: (-o_nih[1], o_nih[0])):
        print('  %9.2f %s' % (nih_used, owner))
    print()

    print('Top users, by time billed but not used:')
    for owner, nih_bbnu in sorted(s['owner_to_nih_bbnu'].items(),
                                  key=lambda o_nih2: (-o_nih2[1], o_nih2[0])):
        print('  %9.2f %s' % (nih_bbnu, owner))
    print()

    # Top job steps
    print('Top job steps, by total time used (step number first):')
    for (label, step_num), nih_used in sorted(
            s['job_step_to_nih_used'].items(),
            key=lambda k_nih: (-k_nih[1], k_nih[0])):

        if label:
            print('  %9.2f %3d %s' % (nih_used, step_num, label))
        else:
            print('  %9.2f     (non-mrjob step)' % (nih_used,))
    print()

    print('Top job steps, by total time billed but not used (un-pooled only):')
    for (label, step_num), nih_bbnu in sorted(
            s['job_step_to_nih_bbnu_no_pool'].items(),
            key=lambda k_nih3: (-k_nih3[1], k_nih3[0])):

        if label:
            print('  %9.2f %3d %s' % (nih_bbnu, step_num, label))
        else:
            print('  %9.2f     (non-mrjob step)' % (nih_bbnu,))
    print()

    # Top pools
    print('All pools, by total time billed:')
    for pool, nih_billed in sorted(s['pool_to_nih_billed'].items(),
                                   key=lambda p_nih: (-p_nih[1], p_nih[0])):
        print('  %9.2f %s' % (nih_billed, pool or '(not pooled)'))
    print()

    print('All pools, by total time billed but not used:')
    for pool, nih_bbnu in sorted(s['pool_to_nih_bbnu'].items(),
                                 key=lambda p_nih4: (-p_nih4[1], p_nih4[0])):
        print('  %9.2f %s' % (nih_bbnu, pool or '(not pooled)'))
    print()

    # Top job flows
    print('All job flows, by total time billed:')
    top_clusters = sorted(s['clusters'],
                          key=lambda cs: (-cs['nih_billed'], cs['name']))
    for cs in top_clusters:
        print('  %9.2f %-15s %s' % (
            cs['nih_billed'], cs['id'], cs['name']))
    print()

    print('All job flows, by time billed but not used:')
    top_clusters_bbnu = sorted(
        s['clusters'], key=lambda cs: (-cs['nih_bbnu'], cs['name']))
    for cs in top_clusters_bbnu:
        print('  %9.2f %-15s %s' % (
            cs['nih_bbnu'], cs['id'], cs['name']))
    print()

    # Details
    print('Details for all job flows:')
    print()
    print(' id              state                  created             steps'
          '        time ran     billed    waste   user   name')

    all_clusters = sorted(s['clusters'], key=lambda cs: cs['created'],
                          reverse=True)

    for cs in all_clusters:
        print(' %-15s %-22s %19s %3d %17s %9.2f %9.2f %8s %s' % (
            cs['id'], cs['state'], cs['created'], cs['num_steps'],
            strip_microseconds(cs['ran']), cs['nih_used'], cs['nih_bbnu'],
            (cs['owner'] or ''), (cs['label'] or ('not started by mrjob'))))
开发者ID:kartheek6,项目名称:mrjob,代码行数:101,代码来源:audit_usage.py

示例10: inspect_and_maybe_terminate_job_flows

def inspect_and_maybe_terminate_job_flows(
    conf_path=None,
    dry_run=False,
    max_hours_idle=None,
    mins_to_end_of_hour=None,
    now=None,
    pool_name=None,
    pooled_only=False,
    unpooled_only=False,
):

    if now is None:
        now = datetime.utcnow()

    # old default behavior
    if max_hours_idle is None and mins_to_end_of_hour is None:
        max_hours_idle = DEFAULT_MAX_HOURS_IDLE

    emr_conn = EMRJobRunner(conf_path=conf_path).make_emr_conn()

    log.info(
        'getting info about all job flows (this goes back about 2 months)')
    # We don't filter by job flow state because we want this to work even
    # if Amazon adds another kind of idle state.
    job_flows = describe_all_job_flows(emr_conn)

    num_running = 0
    num_idle = 0
    num_done = 0
    num_non_streaming = 0
    # a list of tuples of job flow id, name, idle time (as a timedelta)
    to_terminate = []

    for jf in job_flows:

        # check if job flow is done
        if is_job_flow_done(jf):
            num_done += 1

        # we can't really tell if non-streaming jobs are idle or not, so
        # let them be (see Issue #60)
        elif is_job_flow_non_streaming(jf):
            num_non_streaming += 1

        elif is_job_flow_running(jf):
            num_running += 1

        else:
            num_idle += 1
            time_idle = time_job_flow_idle(jf, now=now)
            time_to_end_of_hour = time_to_end_of_hour_for_job_flow(jf, now=now)
            pool = job_flow_pool_name(jf)

            log.debug(
                'Job flow %-15s idle for %s, %s to end of hour, %s (%s)' %
                      (jf.jobflowid,
                       strip_microseconds(time_idle),
                       strip_microseconds(time_to_end_of_hour),
                       ('unpooled' if pool is None else 'in %s pool' % pool),
                       jf.name))

            # filter out job flows that don't meet our criteria
            if (max_hours_idle is not None and
                time_idle <= timedelta(hours=max_hours_idle)):
                continue

            if (mins_to_end_of_hour is not None and
                time_to_end_of_hour >=
                    timedelta(minutes=mins_to_end_of_hour)):
                continue

            if (pooled_only and pool is None):
                continue

            if (unpooled_only and pool is not None):
                continue

            if (pool_name is not None and pool != pool_name):
                continue

            to_terminate.append(
                (jf.jobflowid, jf.name, time_idle, time_to_end_of_hour))

    log.info(
        'Job flow statuses: %d running, %d idle, %d active non-streaming,'
        ' %d done' % (num_running, num_idle, num_non_streaming, num_done))

    terminate_and_notify(emr_conn, to_terminate, dry_run=dry_run)
开发者ID:ddehghan,项目名称:mrjob,代码行数:88,代码来源:terminate_idle_job_flows.py

示例11: _maybe_terminate_clusters

def _maybe_terminate_clusters(dry_run=False,
                              max_hours_idle=None,
                              mins_to_end_of_hour=None,
                              now=None,
                              pool_name=None,
                              pooled_only=False,
                              unpooled_only=False,
                              max_mins_locked=None,
                              quiet=False,
                              **kwargs):
    if now is None:
        now = datetime.utcnow()

    # old default behavior
    if max_hours_idle is None and mins_to_end_of_hour is None:
        max_hours_idle = _DEFAULT_MAX_HOURS_IDLE

    runner = EMRJobRunner(**kwargs)
    emr_conn = runner.make_emr_conn()

    num_starting = 0
    num_bootstrapping = 0
    num_done = 0
    num_idle = 0
    num_pending = 0
    num_running = 0

    # We don't filter by cluster state because we want this to work even
    # if Amazon adds another kind of idle state.
    for cluster_summary in _yield_all_clusters(emr_conn):
        cluster_id = cluster_summary.id

        # check if cluster is done
        if _is_cluster_done(cluster_summary):
            num_done += 1
            continue

        # check if cluster is starting
        if _is_cluster_starting(cluster_summary):
            num_starting += 1
            continue

        # check if cluster is bootstrapping
        if _is_cluster_bootstrapping(cluster_summary):
            num_bootstrapping += 1
            continue

        # need steps to learn more about cluster
        steps = _list_all_steps(emr_conn, cluster_id)

        if any(_is_step_running(step) for step in steps):
            num_running += 1
            continue

        # cluster is idle
        time_idle = now - _time_last_active(cluster_summary, steps)
        time_to_end_of_hour = _est_time_to_hour(cluster_summary, now=now)
        is_pending = _cluster_has_pending_steps(steps)

        bootstrap_actions = list(_yield_all_bootstrap_actions(
            emr_conn, cluster_id))
        _, pool = _pool_hash_and_name(bootstrap_actions)

        if is_pending:
            num_pending += 1
        else:
            num_idle += 1

        log.debug(
            'cluster %s %s for %s, %s to end of hour, %s (%s)' %
            (cluster_id,
             'pending' if is_pending else 'idle',
             strip_microseconds(time_idle),
             strip_microseconds(time_to_end_of_hour),
             ('unpooled' if pool is None else 'in %s pool' % pool),
             cluster_summary.name))

        # filter out clusters that don't meet our criteria
        if (max_hours_idle is not None and
                time_idle <= timedelta(hours=max_hours_idle)):
            continue

        # mins_to_end_of_hour doesn't apply to jobs with pending steps
        if (mins_to_end_of_hour is not None and
            (is_pending or
             time_to_end_of_hour >= timedelta(
                minutes=mins_to_end_of_hour))):
            continue

        if (pooled_only and pool is None):
            continue

        if (unpooled_only and pool is not None):
            continue

        if (pool_name is not None and pool != pool_name):
            continue

        # terminate idle cluster
        _terminate_and_notify(
#.........这里部分代码省略.........
开发者ID:davidmarin,项目名称:mrjob,代码行数:101,代码来源:terminate_idle_clusters.py

示例12: print_report


#.........这里部分代码省略.........
                s['date_to_nih_bbnu'].get(d, 0.0),
                percent(s['date_to_nih_bbnu'].get(d, 0.0),
                        s['date_to_nih_billed'][d]))
            d -= timedelta(days=1)
        print

    print '* Job flows are considered to belong to the user and job that'
    print '  started them or last ran on them.'
    print

    # Top jobs
    print 'Top jobs, by total time used:'
    for label, nih_used in sorted(s['label_to_nih_used'].iteritems(),
                                    key=lambda (lb, nih): (-nih, lb)):
        print '  %9.2f %s' % (nih_used, label)
    print

    print 'Top jobs, by time billed but not used:'
    for label, nih_bbnu in sorted(s['label_to_nih_bbnu'].iteritems(),
                                  key=lambda (lb, nih): (-nih, lb)):
        print '  %9.2f %s' % (nih_bbnu, label)
    print

    # Top users
    print 'Top users, by total time used:'
    for owner, nih_used in sorted(s['owner_to_nih_used'].iteritems(),
                                    key=lambda (o, nih): (-nih, o)):
        print '  %9.2f %s' % (nih_used, owner)
    print

    print 'Top users, by time billed but not used:'
    for owner, nih_bbnu in sorted(s['owner_to_nih_bbnu'].iteritems(),
                                  key=lambda (o, nih): (-nih, o)):
        print '  %9.2f %s' % (nih_bbnu, owner)
    print

    # Top job steps
    print 'Top job steps, by total time used (step number first):'
    for (label, step_num), nih_used in sorted(
        s['job_step_to_nih_used'].iteritems(), key=lambda (k, nih): (-nih, k)):
        if label:
            print '  %9.2f %3d %s' % (nih_used, step_num, label)
        else:
            print '  %9.2f     (non-mrjob step)' % (nih_used,)
    print

    print 'Top job steps, by total time billed but not used (un-pooled only):'
    for (label, step_num), nih_bbnu in sorted(
        s['job_step_to_nih_bbnu_no_pool'].iteritems(),
        key=lambda (k, nih): (-nih, k)):

        if label:
            print '  %9.2f %3d %s' % (nih_bbnu, step_num, label)
        else:
            print '  %9.2f     (non-mrjob step)' % (nih_bbnu,)
    print

    # Top pools
    print 'All pools, by total time billed:'
    for pool, nih_billed in sorted(s['pool_to_nih_billed'].iteritems(),
                                   key=lambda (p, nih): (-nih, p)):
        print '  %9.2f %s' % (nih_billed, pool or '(not pooled)')
    print

    print 'All pools, by total time billed but not used:'
    for pool, nih_bbnu in sorted(s['pool_to_nih_bbnu'].iteritems(),
                                 key=lambda (p, nih): (-nih, p)):
        print '  %9.2f %s' % (nih_bbnu, pool or '(not pooled)')
    print

    # Top job flows
    print 'All job flows, by total time billed:'
    top_job_flows = sorted(s['flows'],
                           key=lambda jf: (-jf['nih_billed'], jf['name']))
    for jf in top_job_flows:
        print '  %9.2f %-15s %s' % (
            jf['nih_billed'], jf['id'], jf['name'])
    print

    print 'All job flows, by time billed but not used:'
    top_job_flows_bbnu = sorted(s['flows'],
                                key=lambda jf: (-jf['nih_bbnu'], jf['name']))
    for jf in top_job_flows_bbnu:
        print '  %9.2f %-15s %s' % (
            jf['nih_bbnu'], jf['id'], jf['name'])
    print

    # Details
    print 'Details for all job flows:'
    print
    print (' id              state         created             steps'
           '        time ran     billed    waste   user   name')

    all_job_flows = sorted(s['flows'], key=lambda jf: jf['created'],
                           reverse=True)
    for jf in all_job_flows:
        print ' %-15s %-13s %19s %3d %17s %9.2f %9.2f %8s %s' % (
            jf['id'], jf['state'], jf['created'], jf['num_steps'],
            strip_microseconds(jf['ran']), jf['nih_used'], jf['nih_bbnu'],
            (jf['owner'] or ''), (jf['label'] or ('not started by mrjob')))
开发者ID:ddehghan,项目名称:mrjob,代码行数:101,代码来源:audit_usage.py

示例13: _round_up_to_next_second

def _round_up_to_next_second(td):
    """Round up to the next second because that's how EMR bills."""
    if td.microseconds:
        return strip_microseconds(td) + timedelta(seconds=1)
    else:
        return td
开发者ID:Affirm,项目名称:mrjob,代码行数:6,代码来源:audit_usage.py

示例14: _maybe_terminate_clusters

def _maybe_terminate_clusters(dry_run=False,
                              max_mins_idle=None,
                              now=None,
                              pool_name=None,
                              pooled_only=False,
                              unpooled_only=False,
                              max_mins_locked=None,
                              quiet=False,
                              **kwargs):
    if now is None:
        now = _boto3_now()

    # old default behavior
    if max_mins_idle is None:
        max_mins_idle = _DEFAULT_MAX_MINS_IDLE

    runner = EMRJobRunner(**kwargs)
    emr_client = runner.make_emr_client()

    num_starting = 0
    num_bootstrapping = 0
    num_done = 0
    num_idle = 0
    num_pending = 0
    num_running = 0

    # We don't filter by cluster state because we want this to work even
    # if Amazon adds another kind of idle state.
    for cluster_summary in _boto3_paginate(
            'Clusters', emr_client, 'list_clusters'):

        cluster_id = cluster_summary['Id']

        # check if cluster is done
        if _is_cluster_done(cluster_summary):
            num_done += 1
            continue

        # check if cluster is starting
        if _is_cluster_starting(cluster_summary):
            num_starting += 1
            continue

        # check if cluster is bootstrapping
        if _is_cluster_bootstrapping(cluster_summary):
            num_bootstrapping += 1
            continue

        # need steps to learn more about cluster
        steps = list(reversed(list(_boto3_paginate(
            'Steps', emr_client, 'list_steps',
            ClusterId=cluster_id))))

        if any(_is_step_running(step) for step in steps):
            num_running += 1
            continue

        # cluster is idle
        time_idle = now - _time_last_active(cluster_summary, steps)
        is_pending = _cluster_has_pending_steps(steps)

        # need to get actual cluster to see tags
        cluster = emr_client.describe_cluster(ClusterId=cluster_id)['Cluster']

        _, pool = _pool_hash_and_name(cluster)

        if is_pending:
            num_pending += 1
        else:
            num_idle += 1

        log.debug(
            'cluster %s %s for %s, %s (%s)' %
            (cluster_id,
             'pending' if is_pending else 'idle',
             strip_microseconds(time_idle),
             ('unpooled' if pool is None else 'in %s pool' % pool),
             cluster_summary['Name']))

        # filter out clusters that don't meet our criteria
        if (max_mins_idle is not None and
                time_idle <= timedelta(minutes=max_mins_idle)):
            continue

        if (pooled_only and pool is None):
            continue

        if (unpooled_only and pool is not None):
            continue

        if (pool_name is not None and pool != pool_name):
            continue

        # terminate idle cluster
        _terminate_and_notify(
            runner=runner,
            cluster_id=cluster_id,
            cluster_name=cluster_summary['Name'],
            num_steps=len(steps),
            is_pending=is_pending,
#.........这里部分代码省略.........
开发者ID:okomestudio,项目名称:mrjob,代码行数:101,代码来源:terminate_idle_clusters.py

示例15: print_report


#.........这里部分代码省略.........
            print " %13s  %9.2f %9.2f %9.2f     %5.1f" % (
                h.strftime("%Y-%m-%d %H"),
                s["hour_to_nih_billed"].get(h, 0.0),
                s["hour_to_nih_used"].get(h, 0.0),
                s["hour_to_nih_bbnu"].get(h, 0.0),
                percent(s["hour_to_nih_bbnu"].get(h, 0.0), s["hour_to_nih_billed"].get(h, 0.0)),
            )
            h -= timedelta(hours=1)
        print

    print "* Job flows are considered to belong to the user and job that"
    print "  started them or last ran on them."
    print

    # Top jobs
    print "Top jobs, by total time used:"
    for label, nih_used in sorted(s["label_to_nih_used"].iteritems(), key=lambda (lb, nih): (-nih, lb)):
        print "  %9.2f %s" % (nih_used, label)
    print

    print "Top jobs, by time billed but not used:"
    for label, nih_bbnu in sorted(s["label_to_nih_bbnu"].iteritems(), key=lambda (lb, nih): (-nih, lb)):
        print "  %9.2f %s" % (nih_bbnu, label)
    print

    # Top users
    print "Top users, by total time used:"
    for owner, nih_used in sorted(s["owner_to_nih_used"].iteritems(), key=lambda (o, nih): (-nih, o)):
        print "  %9.2f %s" % (nih_used, owner)
    print

    print "Top users, by time billed but not used:"
    for owner, nih_bbnu in sorted(s["owner_to_nih_bbnu"].iteritems(), key=lambda (o, nih): (-nih, o)):
        print "  %9.2f %s" % (nih_bbnu, owner)
    print

    # Top job steps
    print "Top job steps, by total time used (step number first):"
    for (label, step_num), nih_used in sorted(s["job_step_to_nih_used"].iteritems(), key=lambda (k, nih): (-nih, k)):
        if label:
            print "  %9.2f %3d %s" % (nih_used, step_num, label)
        else:
            print "  %9.2f     (non-mrjob step)" % (nih_used,)
    print

    print "Top job steps, by total time billed but not used (un-pooled only):"
    for (label, step_num), nih_bbnu in sorted(
        s["job_step_to_nih_bbnu_no_pool"].iteritems(), key=lambda (k, nih): (-nih, k)
    ):

        if label:
            print "  %9.2f %3d %s" % (nih_bbnu, step_num, label)
        else:
            print "  %9.2f     (non-mrjob step)" % (nih_bbnu,)
    print

    # Top pools
    print "All pools, by total time billed:"
    for pool, nih_billed in sorted(s["pool_to_nih_billed"].iteritems(), key=lambda (p, nih): (-nih, p)):
        print "  %9.2f %s" % (nih_billed, pool or "(not pooled)")
    print

    print "All pools, by total time billed but not used:"
    for pool, nih_bbnu in sorted(s["pool_to_nih_bbnu"].iteritems(), key=lambda (p, nih): (-nih, p)):
        print "  %9.2f %s" % (nih_bbnu, pool or "(not pooled)")
    print

    # Top job flows
    print "All job flows, by total time billed:"
    top_job_flows = sorted(s["flows"], key=lambda jf: (-jf["nih_billed"], jf["name"]))
    for jf in top_job_flows:
        print "  %9.2f %-15s %s" % (jf["nih_billed"], jf["id"], jf["name"])
    print

    print "All job flows, by time billed but not used:"
    top_job_flows_bbnu = sorted(s["flows"], key=lambda jf: (-jf["nih_bbnu"], jf["name"]))
    for jf in top_job_flows_bbnu:
        print "  %9.2f %-15s %s" % (jf["nih_bbnu"], jf["id"], jf["name"])
    print

    # Details
    print "Details for all job flows:"
    print
    print (
        " id              state         created             steps" "        time ran     billed    waste   user   name"
    )

    all_job_flows = sorted(s["flows"], key=lambda jf: jf["created"], reverse=True)
    for jf in all_job_flows:
        print " %-15s %-13s %19s %3d %17s %9.2f %9.2f %8s %s" % (
            jf["id"],
            jf["state"],
            jf["created"],
            jf["num_steps"],
            strip_microseconds(jf["ran"]),
            jf["nih_used"],
            jf["nih_bbnu"],
            (jf["owner"] or ""),
            (jf["label"] or ("not started by mrjob")),
        )
开发者ID:rfbowen,项目名称:mrjob,代码行数:101,代码来源:audit_usage.py


注:本文中的mrjob.util.strip_microseconds函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。