本文整理汇总了Python中mrjob.util.strip_microseconds函数的典型用法代码示例。如果您正苦于以下问题:Python strip_microseconds函数的具体用法?Python strip_microseconds怎么用?Python strip_microseconds使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了strip_microseconds函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: terminate_and_notify
def terminate_and_notify(runner, to_terminate, dry_run=False,
max_mins_locked=None, quiet=False):
if not to_terminate:
return
for jf, pending, time_idle, time_to_end_of_hour in to_terminate:
did_terminate = False
if not dry_run:
status = attempt_to_acquire_lock(
runner.make_s3_conn(),
runner._lock_uri(jf),
runner._opts['s3_sync_wait_time'],
runner._make_unique_job_name(label='terminate'),
mins_to_expiration=max_mins_locked,
)
if status:
runner.make_emr_conn().terminate_jobflow(jf.jobflowid)
did_terminate = True
if did_terminate and not quiet:
fmt = ('Terminated job flow %s (%s); was %s for %s, %s to end of'
' hour')
print fmt % (
jf.jobflowid, jf.name,
'pending' if pending else 'idle',
strip_microseconds(time_idle),
strip_microseconds(time_to_end_of_hour))
示例2: terminate_and_notify
def terminate_and_notify(runner, to_terminate, dry_run=False,
max_mins_locked=None, quiet=False):
if not to_terminate:
return
for jf, pending, time_idle, time_to_end_of_hour in to_terminate:
fmt = ('Terminated job flow %s (%s); was %s for %s, %s to end of hour')
msg = fmt % (
jf.jobflowid, jf.name,
'pending' if pending else 'idle',
strip_microseconds(time_idle),
strip_microseconds(time_to_end_of_hour))
did_terminate = False
if not dry_run:
status = attempt_to_acquire_lock(
runner.make_s3_conn(),
runner._lock_uri(jf),
runner._opts['s3_sync_wait_time'],
'%s (%s)' % (msg,
runner._make_unique_job_name(label='terminate')),
mins_to_expiration=max_mins_locked,
)
if status:
runner.make_emr_conn().terminate_jobflow(jf.jobflowid)
did_terminate = True
elif not quiet:
log.info('%s was locked between getting job flow info and'
' trying to terminate it; skipping' % jf.jobflowid)
if did_terminate and not quiet:
print msg
示例3: _terminate_and_notify
def _terminate_and_notify(runner, cluster_id, cluster_name, num_steps,
is_pending, time_idle, time_to_end_of_hour,
dry_run=False, max_mins_locked=None, quiet=False):
fmt = ('Terminated cluster %s (%s); was %s for %s, %s to end of hour')
msg = fmt % (
cluster_id, cluster_name,
'pending' if is_pending else 'idle',
strip_microseconds(time_idle),
strip_microseconds(time_to_end_of_hour))
did_terminate = False
if dry_run:
did_terminate = True
else:
status = _attempt_to_acquire_lock(
runner.fs.make_s3_conn(),
runner._lock_uri(cluster_id, num_steps),
runner._opts['cloud_fs_sync_secs'],
'%s (%s)' % (msg,
runner._make_unique_job_key(label='terminate')),
mins_to_expiration=max_mins_locked,
)
if status:
runner.make_emr_conn().terminate_jobflow(cluster_id)
did_terminate = True
elif not quiet:
log.info('%s was locked between getting cluster info and'
' trying to terminate it; skipping' % cluster_id)
if did_terminate and not quiet:
print(msg)
示例4: terminate_and_notify
def terminate_and_notify(emr_conn, to_terminate, dry_run=False):
if not to_terminate:
return
for job_flow_id, name, time_idle, time_to_end_of_hour in to_terminate:
if not dry_run:
emr_conn.terminate_jobflow(job_flow_id)
print ('Terminated job flow %s (%s); was idle for %s,'
' %s to end of hour' %
(job_flow_id, name, strip_microseconds(time_idle),
strip_microseconds(time_to_end_of_hour)))
示例5: terminate_and_notify
def terminate_and_notify(emr_conn, to_terminate, dry_run=False):
if not to_terminate:
return
for id, name, pending, time_idle, time_to_end_of_hour in to_terminate:
if not dry_run:
emr_conn.terminate_jobflow(id)
print ('Terminated job flow %s (%s); was %s for %s, %s to end of hour'
% (id, name,
'pending' if pending else 'idle',
strip_microseconds(time_idle),
strip_microseconds(time_to_end_of_hour)))
示例6: pprint_job_flow
def pprint_job_flow(jf):
"""Print a job flow to stdout in this form::
job.flow.name
j-JOB_FLOW_ID: 2 instances (master=m1.small, slaves=m1.small, 20 \
minutes to the hour)
"""
instance_count = int(jf.instancecount)
nosep_segments = [
'%d instance' % instance_count,
]
if instance_count > 1:
nosep_segments.append('s')
comma_segments = [
'master=%s' % jf.masterinstancetype,
]
if instance_count > 1:
comma_segments.append('slaves=%s' % jf.slaveinstancetype)
comma_segments.append('%s to end of hour' %
strip_microseconds(est_time_to_hour(jf)))
nosep_segments += [
' (',
', '.join(comma_segments),
')',
]
print '%s: %s' % (jf.jobflowid, jf.name)
print ''.join(nosep_segments)
print jf.state
print
示例7: _format_timedelta
def _format_timedelta(time):
"""Format a timedelta for use in a columnar format. This just
tweaks stuff like ``'3 days, 9:00:00'`` to line up with
``'3 days, 10:00:00'``
"""
result = str(strip_microseconds(time))
parts = result.split()
if len(parts) == 3 and len(parts[-1]) == 7:
return '%s %s %s' % tuple(parts)
else:
return result
示例8: inspect_and_maybe_terminate_job_flows
def inspect_and_maybe_terminate_job_flows(
conf_path=None,
dry_run=False,
max_hours_idle=None,
mins_to_end_of_hour=None,
now=None,
pool_name=None,
pooled_only=False,
unpooled_only=False,
max_mins_locked=None,
quiet=False,
**kwargs
):
if now is None:
now = datetime.utcnow()
# old default behavior
if max_hours_idle is None and mins_to_end_of_hour is None:
max_hours_idle = DEFAULT_MAX_HOURS_IDLE
runner = EMRJobRunner(conf_path=conf_path, **kwargs)
emr_conn = runner.make_emr_conn()
log.info(
'getting info about all job flows (this goes back about 2 months)')
# We don't filter by job flow state because we want this to work even
# if Amazon adds another kind of idle state.
job_flows = describe_all_job_flows(emr_conn)
num_bootstrapping = 0
num_done = 0
num_idle = 0
num_non_streaming = 0
num_pending = 0
num_running = 0
# a list of tuples of job flow id, name, idle time (as a timedelta)
to_terminate = []
for jf in job_flows:
# check if job flow is done
if is_job_flow_done(jf):
num_done += 1
# check if job flow is bootstrapping
elif is_job_flow_bootstrapping(jf):
num_bootstrapping += 1
# we can't really tell if non-streaming jobs are idle or not, so
# let them be (see Issue #60)
elif not is_job_flow_streaming(jf):
num_non_streaming += 1
elif is_job_flow_running(jf):
num_running += 1
else:
time_idle = now - time_last_active(jf)
time_to_end_of_hour = est_time_to_hour(jf, now=now)
_, pool = pool_hash_and_name(jf)
pending = job_flow_has_pending_steps(jf)
if pending:
num_pending += 1
else:
num_idle += 1
log.debug(
'Job flow %s %s for %s, %s to end of hour, %s (%s)' %
(jf.jobflowid,
'pending' if pending else 'idle',
strip_microseconds(time_idle),
strip_microseconds(time_to_end_of_hour),
('unpooled' if pool is None else 'in %s pool' % pool),
jf.name))
# filter out job flows that don't meet our criteria
if (max_hours_idle is not None and
time_idle <= timedelta(hours=max_hours_idle)):
continue
# mins_to_end_of_hour doesn't apply to jobs with pending steps
if (mins_to_end_of_hour is not None and
(pending or
time_to_end_of_hour >= timedelta(
minutes=mins_to_end_of_hour))):
continue
if (pooled_only and pool is None):
continue
if (unpooled_only and pool is not None):
continue
if (pool_name is not None and pool != pool_name):
continue
to_terminate.append((jf, pending, time_idle, time_to_end_of_hour))
#.........这里部分代码省略.........
示例9: print_report
#.........这里部分代码省略.........
h -= timedelta(hours=1)
print()
print('* Job flows are considered to belong to the user and job that')
print(' started them or last ran on them.')
print()
# Top jobs
print('Top jobs, by total time used:')
for label, nih_used in sorted(s['label_to_nih_used'].items(),
key=lambda lb_nih: (-lb_nih[1], lb_nih[0])):
print(' %9.2f %s' % (nih_used, label))
print()
print('Top jobs, by time billed but not used:')
for label, nih_bbnu in sorted(s['label_to_nih_bbnu'].items(),
key=lambda lb_nih1: (-lb_nih1[1], lb_nih1[0])):
print(' %9.2f %s' % (nih_bbnu, label))
print()
# Top users
print('Top users, by total time used:')
for owner, nih_used in sorted(s['owner_to_nih_used'].items(),
key=lambda o_nih: (-o_nih[1], o_nih[0])):
print(' %9.2f %s' % (nih_used, owner))
print()
print('Top users, by time billed but not used:')
for owner, nih_bbnu in sorted(s['owner_to_nih_bbnu'].items(),
key=lambda o_nih2: (-o_nih2[1], o_nih2[0])):
print(' %9.2f %s' % (nih_bbnu, owner))
print()
# Top job steps
print('Top job steps, by total time used (step number first):')
for (label, step_num), nih_used in sorted(
s['job_step_to_nih_used'].items(),
key=lambda k_nih: (-k_nih[1], k_nih[0])):
if label:
print(' %9.2f %3d %s' % (nih_used, step_num, label))
else:
print(' %9.2f (non-mrjob step)' % (nih_used,))
print()
print('Top job steps, by total time billed but not used (un-pooled only):')
for (label, step_num), nih_bbnu in sorted(
s['job_step_to_nih_bbnu_no_pool'].items(),
key=lambda k_nih3: (-k_nih3[1], k_nih3[0])):
if label:
print(' %9.2f %3d %s' % (nih_bbnu, step_num, label))
else:
print(' %9.2f (non-mrjob step)' % (nih_bbnu,))
print()
# Top pools
print('All pools, by total time billed:')
for pool, nih_billed in sorted(s['pool_to_nih_billed'].items(),
key=lambda p_nih: (-p_nih[1], p_nih[0])):
print(' %9.2f %s' % (nih_billed, pool or '(not pooled)'))
print()
print('All pools, by total time billed but not used:')
for pool, nih_bbnu in sorted(s['pool_to_nih_bbnu'].items(),
key=lambda p_nih4: (-p_nih4[1], p_nih4[0])):
print(' %9.2f %s' % (nih_bbnu, pool or '(not pooled)'))
print()
# Top job flows
print('All job flows, by total time billed:')
top_clusters = sorted(s['clusters'],
key=lambda cs: (-cs['nih_billed'], cs['name']))
for cs in top_clusters:
print(' %9.2f %-15s %s' % (
cs['nih_billed'], cs['id'], cs['name']))
print()
print('All job flows, by time billed but not used:')
top_clusters_bbnu = sorted(
s['clusters'], key=lambda cs: (-cs['nih_bbnu'], cs['name']))
for cs in top_clusters_bbnu:
print(' %9.2f %-15s %s' % (
cs['nih_bbnu'], cs['id'], cs['name']))
print()
# Details
print('Details for all job flows:')
print()
print(' id state created steps'
' time ran billed waste user name')
all_clusters = sorted(s['clusters'], key=lambda cs: cs['created'],
reverse=True)
for cs in all_clusters:
print(' %-15s %-22s %19s %3d %17s %9.2f %9.2f %8s %s' % (
cs['id'], cs['state'], cs['created'], cs['num_steps'],
strip_microseconds(cs['ran']), cs['nih_used'], cs['nih_bbnu'],
(cs['owner'] or ''), (cs['label'] or ('not started by mrjob'))))
示例10: inspect_and_maybe_terminate_job_flows
def inspect_and_maybe_terminate_job_flows(
conf_path=None,
dry_run=False,
max_hours_idle=None,
mins_to_end_of_hour=None,
now=None,
pool_name=None,
pooled_only=False,
unpooled_only=False,
):
if now is None:
now = datetime.utcnow()
# old default behavior
if max_hours_idle is None and mins_to_end_of_hour is None:
max_hours_idle = DEFAULT_MAX_HOURS_IDLE
emr_conn = EMRJobRunner(conf_path=conf_path).make_emr_conn()
log.info(
'getting info about all job flows (this goes back about 2 months)')
# We don't filter by job flow state because we want this to work even
# if Amazon adds another kind of idle state.
job_flows = describe_all_job_flows(emr_conn)
num_running = 0
num_idle = 0
num_done = 0
num_non_streaming = 0
# a list of tuples of job flow id, name, idle time (as a timedelta)
to_terminate = []
for jf in job_flows:
# check if job flow is done
if is_job_flow_done(jf):
num_done += 1
# we can't really tell if non-streaming jobs are idle or not, so
# let them be (see Issue #60)
elif is_job_flow_non_streaming(jf):
num_non_streaming += 1
elif is_job_flow_running(jf):
num_running += 1
else:
num_idle += 1
time_idle = time_job_flow_idle(jf, now=now)
time_to_end_of_hour = time_to_end_of_hour_for_job_flow(jf, now=now)
pool = job_flow_pool_name(jf)
log.debug(
'Job flow %-15s idle for %s, %s to end of hour, %s (%s)' %
(jf.jobflowid,
strip_microseconds(time_idle),
strip_microseconds(time_to_end_of_hour),
('unpooled' if pool is None else 'in %s pool' % pool),
jf.name))
# filter out job flows that don't meet our criteria
if (max_hours_idle is not None and
time_idle <= timedelta(hours=max_hours_idle)):
continue
if (mins_to_end_of_hour is not None and
time_to_end_of_hour >=
timedelta(minutes=mins_to_end_of_hour)):
continue
if (pooled_only and pool is None):
continue
if (unpooled_only and pool is not None):
continue
if (pool_name is not None and pool != pool_name):
continue
to_terminate.append(
(jf.jobflowid, jf.name, time_idle, time_to_end_of_hour))
log.info(
'Job flow statuses: %d running, %d idle, %d active non-streaming,'
' %d done' % (num_running, num_idle, num_non_streaming, num_done))
terminate_and_notify(emr_conn, to_terminate, dry_run=dry_run)
示例11: _maybe_terminate_clusters
def _maybe_terminate_clusters(dry_run=False,
max_hours_idle=None,
mins_to_end_of_hour=None,
now=None,
pool_name=None,
pooled_only=False,
unpooled_only=False,
max_mins_locked=None,
quiet=False,
**kwargs):
if now is None:
now = datetime.utcnow()
# old default behavior
if max_hours_idle is None and mins_to_end_of_hour is None:
max_hours_idle = _DEFAULT_MAX_HOURS_IDLE
runner = EMRJobRunner(**kwargs)
emr_conn = runner.make_emr_conn()
num_starting = 0
num_bootstrapping = 0
num_done = 0
num_idle = 0
num_pending = 0
num_running = 0
# We don't filter by cluster state because we want this to work even
# if Amazon adds another kind of idle state.
for cluster_summary in _yield_all_clusters(emr_conn):
cluster_id = cluster_summary.id
# check if cluster is done
if _is_cluster_done(cluster_summary):
num_done += 1
continue
# check if cluster is starting
if _is_cluster_starting(cluster_summary):
num_starting += 1
continue
# check if cluster is bootstrapping
if _is_cluster_bootstrapping(cluster_summary):
num_bootstrapping += 1
continue
# need steps to learn more about cluster
steps = _list_all_steps(emr_conn, cluster_id)
if any(_is_step_running(step) for step in steps):
num_running += 1
continue
# cluster is idle
time_idle = now - _time_last_active(cluster_summary, steps)
time_to_end_of_hour = _est_time_to_hour(cluster_summary, now=now)
is_pending = _cluster_has_pending_steps(steps)
bootstrap_actions = list(_yield_all_bootstrap_actions(
emr_conn, cluster_id))
_, pool = _pool_hash_and_name(bootstrap_actions)
if is_pending:
num_pending += 1
else:
num_idle += 1
log.debug(
'cluster %s %s for %s, %s to end of hour, %s (%s)' %
(cluster_id,
'pending' if is_pending else 'idle',
strip_microseconds(time_idle),
strip_microseconds(time_to_end_of_hour),
('unpooled' if pool is None else 'in %s pool' % pool),
cluster_summary.name))
# filter out clusters that don't meet our criteria
if (max_hours_idle is not None and
time_idle <= timedelta(hours=max_hours_idle)):
continue
# mins_to_end_of_hour doesn't apply to jobs with pending steps
if (mins_to_end_of_hour is not None and
(is_pending or
time_to_end_of_hour >= timedelta(
minutes=mins_to_end_of_hour))):
continue
if (pooled_only and pool is None):
continue
if (unpooled_only and pool is not None):
continue
if (pool_name is not None and pool != pool_name):
continue
# terminate idle cluster
_terminate_and_notify(
#.........这里部分代码省略.........
示例12: print_report
#.........这里部分代码省略.........
s['date_to_nih_bbnu'].get(d, 0.0),
percent(s['date_to_nih_bbnu'].get(d, 0.0),
s['date_to_nih_billed'][d]))
d -= timedelta(days=1)
print
print '* Job flows are considered to belong to the user and job that'
print ' started them or last ran on them.'
print
# Top jobs
print 'Top jobs, by total time used:'
for label, nih_used in sorted(s['label_to_nih_used'].iteritems(),
key=lambda (lb, nih): (-nih, lb)):
print ' %9.2f %s' % (nih_used, label)
print
print 'Top jobs, by time billed but not used:'
for label, nih_bbnu in sorted(s['label_to_nih_bbnu'].iteritems(),
key=lambda (lb, nih): (-nih, lb)):
print ' %9.2f %s' % (nih_bbnu, label)
print
# Top users
print 'Top users, by total time used:'
for owner, nih_used in sorted(s['owner_to_nih_used'].iteritems(),
key=lambda (o, nih): (-nih, o)):
print ' %9.2f %s' % (nih_used, owner)
print
print 'Top users, by time billed but not used:'
for owner, nih_bbnu in sorted(s['owner_to_nih_bbnu'].iteritems(),
key=lambda (o, nih): (-nih, o)):
print ' %9.2f %s' % (nih_bbnu, owner)
print
# Top job steps
print 'Top job steps, by total time used (step number first):'
for (label, step_num), nih_used in sorted(
s['job_step_to_nih_used'].iteritems(), key=lambda (k, nih): (-nih, k)):
if label:
print ' %9.2f %3d %s' % (nih_used, step_num, label)
else:
print ' %9.2f (non-mrjob step)' % (nih_used,)
print
print 'Top job steps, by total time billed but not used (un-pooled only):'
for (label, step_num), nih_bbnu in sorted(
s['job_step_to_nih_bbnu_no_pool'].iteritems(),
key=lambda (k, nih): (-nih, k)):
if label:
print ' %9.2f %3d %s' % (nih_bbnu, step_num, label)
else:
print ' %9.2f (non-mrjob step)' % (nih_bbnu,)
print
# Top pools
print 'All pools, by total time billed:'
for pool, nih_billed in sorted(s['pool_to_nih_billed'].iteritems(),
key=lambda (p, nih): (-nih, p)):
print ' %9.2f %s' % (nih_billed, pool or '(not pooled)')
print
print 'All pools, by total time billed but not used:'
for pool, nih_bbnu in sorted(s['pool_to_nih_bbnu'].iteritems(),
key=lambda (p, nih): (-nih, p)):
print ' %9.2f %s' % (nih_bbnu, pool or '(not pooled)')
print
# Top job flows
print 'All job flows, by total time billed:'
top_job_flows = sorted(s['flows'],
key=lambda jf: (-jf['nih_billed'], jf['name']))
for jf in top_job_flows:
print ' %9.2f %-15s %s' % (
jf['nih_billed'], jf['id'], jf['name'])
print
print 'All job flows, by time billed but not used:'
top_job_flows_bbnu = sorted(s['flows'],
key=lambda jf: (-jf['nih_bbnu'], jf['name']))
for jf in top_job_flows_bbnu:
print ' %9.2f %-15s %s' % (
jf['nih_bbnu'], jf['id'], jf['name'])
print
# Details
print 'Details for all job flows:'
print
print (' id state created steps'
' time ran billed waste user name')
all_job_flows = sorted(s['flows'], key=lambda jf: jf['created'],
reverse=True)
for jf in all_job_flows:
print ' %-15s %-13s %19s %3d %17s %9.2f %9.2f %8s %s' % (
jf['id'], jf['state'], jf['created'], jf['num_steps'],
strip_microseconds(jf['ran']), jf['nih_used'], jf['nih_bbnu'],
(jf['owner'] or ''), (jf['label'] or ('not started by mrjob')))
示例13: _round_up_to_next_second
def _round_up_to_next_second(td):
"""Round up to the next second because that's how EMR bills."""
if td.microseconds:
return strip_microseconds(td) + timedelta(seconds=1)
else:
return td
示例14: _maybe_terminate_clusters
def _maybe_terminate_clusters(dry_run=False,
max_mins_idle=None,
now=None,
pool_name=None,
pooled_only=False,
unpooled_only=False,
max_mins_locked=None,
quiet=False,
**kwargs):
if now is None:
now = _boto3_now()
# old default behavior
if max_mins_idle is None:
max_mins_idle = _DEFAULT_MAX_MINS_IDLE
runner = EMRJobRunner(**kwargs)
emr_client = runner.make_emr_client()
num_starting = 0
num_bootstrapping = 0
num_done = 0
num_idle = 0
num_pending = 0
num_running = 0
# We don't filter by cluster state because we want this to work even
# if Amazon adds another kind of idle state.
for cluster_summary in _boto3_paginate(
'Clusters', emr_client, 'list_clusters'):
cluster_id = cluster_summary['Id']
# check if cluster is done
if _is_cluster_done(cluster_summary):
num_done += 1
continue
# check if cluster is starting
if _is_cluster_starting(cluster_summary):
num_starting += 1
continue
# check if cluster is bootstrapping
if _is_cluster_bootstrapping(cluster_summary):
num_bootstrapping += 1
continue
# need steps to learn more about cluster
steps = list(reversed(list(_boto3_paginate(
'Steps', emr_client, 'list_steps',
ClusterId=cluster_id))))
if any(_is_step_running(step) for step in steps):
num_running += 1
continue
# cluster is idle
time_idle = now - _time_last_active(cluster_summary, steps)
is_pending = _cluster_has_pending_steps(steps)
# need to get actual cluster to see tags
cluster = emr_client.describe_cluster(ClusterId=cluster_id)['Cluster']
_, pool = _pool_hash_and_name(cluster)
if is_pending:
num_pending += 1
else:
num_idle += 1
log.debug(
'cluster %s %s for %s, %s (%s)' %
(cluster_id,
'pending' if is_pending else 'idle',
strip_microseconds(time_idle),
('unpooled' if pool is None else 'in %s pool' % pool),
cluster_summary['Name']))
# filter out clusters that don't meet our criteria
if (max_mins_idle is not None and
time_idle <= timedelta(minutes=max_mins_idle)):
continue
if (pooled_only and pool is None):
continue
if (unpooled_only and pool is not None):
continue
if (pool_name is not None and pool != pool_name):
continue
# terminate idle cluster
_terminate_and_notify(
runner=runner,
cluster_id=cluster_id,
cluster_name=cluster_summary['Name'],
num_steps=len(steps),
is_pending=is_pending,
#.........这里部分代码省略.........
示例15: print_report
#.........这里部分代码省略.........
print " %13s %9.2f %9.2f %9.2f %5.1f" % (
h.strftime("%Y-%m-%d %H"),
s["hour_to_nih_billed"].get(h, 0.0),
s["hour_to_nih_used"].get(h, 0.0),
s["hour_to_nih_bbnu"].get(h, 0.0),
percent(s["hour_to_nih_bbnu"].get(h, 0.0), s["hour_to_nih_billed"].get(h, 0.0)),
)
h -= timedelta(hours=1)
print
print "* Job flows are considered to belong to the user and job that"
print " started them or last ran on them."
print
# Top jobs
print "Top jobs, by total time used:"
for label, nih_used in sorted(s["label_to_nih_used"].iteritems(), key=lambda (lb, nih): (-nih, lb)):
print " %9.2f %s" % (nih_used, label)
print
print "Top jobs, by time billed but not used:"
for label, nih_bbnu in sorted(s["label_to_nih_bbnu"].iteritems(), key=lambda (lb, nih): (-nih, lb)):
print " %9.2f %s" % (nih_bbnu, label)
print
# Top users
print "Top users, by total time used:"
for owner, nih_used in sorted(s["owner_to_nih_used"].iteritems(), key=lambda (o, nih): (-nih, o)):
print " %9.2f %s" % (nih_used, owner)
print
print "Top users, by time billed but not used:"
for owner, nih_bbnu in sorted(s["owner_to_nih_bbnu"].iteritems(), key=lambda (o, nih): (-nih, o)):
print " %9.2f %s" % (nih_bbnu, owner)
print
# Top job steps
print "Top job steps, by total time used (step number first):"
for (label, step_num), nih_used in sorted(s["job_step_to_nih_used"].iteritems(), key=lambda (k, nih): (-nih, k)):
if label:
print " %9.2f %3d %s" % (nih_used, step_num, label)
else:
print " %9.2f (non-mrjob step)" % (nih_used,)
print
print "Top job steps, by total time billed but not used (un-pooled only):"
for (label, step_num), nih_bbnu in sorted(
s["job_step_to_nih_bbnu_no_pool"].iteritems(), key=lambda (k, nih): (-nih, k)
):
if label:
print " %9.2f %3d %s" % (nih_bbnu, step_num, label)
else:
print " %9.2f (non-mrjob step)" % (nih_bbnu,)
print
# Top pools
print "All pools, by total time billed:"
for pool, nih_billed in sorted(s["pool_to_nih_billed"].iteritems(), key=lambda (p, nih): (-nih, p)):
print " %9.2f %s" % (nih_billed, pool or "(not pooled)")
print
print "All pools, by total time billed but not used:"
for pool, nih_bbnu in sorted(s["pool_to_nih_bbnu"].iteritems(), key=lambda (p, nih): (-nih, p)):
print " %9.2f %s" % (nih_bbnu, pool or "(not pooled)")
print
# Top job flows
print "All job flows, by total time billed:"
top_job_flows = sorted(s["flows"], key=lambda jf: (-jf["nih_billed"], jf["name"]))
for jf in top_job_flows:
print " %9.2f %-15s %s" % (jf["nih_billed"], jf["id"], jf["name"])
print
print "All job flows, by time billed but not used:"
top_job_flows_bbnu = sorted(s["flows"], key=lambda jf: (-jf["nih_bbnu"], jf["name"]))
for jf in top_job_flows_bbnu:
print " %9.2f %-15s %s" % (jf["nih_bbnu"], jf["id"], jf["name"])
print
# Details
print "Details for all job flows:"
print
print (
" id state created steps" " time ran billed waste user name"
)
all_job_flows = sorted(s["flows"], key=lambda jf: jf["created"], reverse=True)
for jf in all_job_flows:
print " %-15s %-13s %19s %3d %17s %9.2f %9.2f %8s %s" % (
jf["id"],
jf["state"],
jf["created"],
jf["num_steps"],
strip_microseconds(jf["ran"]),
jf["nih_used"],
jf["nih_bbnu"],
(jf["owner"] or ""),
(jf["label"] or ("not started by mrjob")),
)