本文整理汇总了Python中mrjob.options.add_basic_opts函数的典型用法代码示例。如果您正苦于以下问题:Python add_basic_opts函数的具体用法?Python add_basic_opts怎么用?Python add_basic_opts使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了add_basic_opts函数的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
def main(args):
# parser command-line args
usage = '%prog [options]'
description = "Collect EMR stats from active jobflows. "
description += "Active jobflows are those in states of: "
description += "BOOTSTRAPPING, RUNNING, STARTING, and WAITING. "
description += "Collected stats include total number of active jobflows"
description += "and total number of Amazon EC2 instances used to execute"
description += "these jobflows. The instance counts are not separated by"
description += "instance type."
option_parser = OptionParser(usage=usage, description=description)
option_parser.add_option(
"-p", "--pretty-print",
action="store_true", dest="pretty_print", default=False,
help=('Pretty print the collected stats'))
add_basic_opts(option_parser)
options, args = option_parser.parse_args(args)
if args:
option_parser.error('takes no arguments')
MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
log.info('collecting EMR active jobflows...')
job_flows = collect_active_job_flows(options.conf_paths)
log.info('compiling stats from collected jobflows...')
stats = job_flows_to_stats(job_flows)
if options.pretty_print:
pretty_print(stats)
else:
print(json.dumps(stats))
示例2: make_option_parser
def make_option_parser():
usage = '%prog [options] <time-untouched> <URIs>'
description = (
'Delete all files in a given URI that are older than a specified'
' time.\n\nThe time parameter defines the threshold for removing'
' files. If the file has not been accessed for *time*, the file is'
' removed. The time argument is a number with an optional'
' single-character suffix specifying the units: m for minutes, h for'
' hours, d for days. If no suffix is specified, time is in hours.')
option_parser = OptionParser(usage=usage, description=description)
option_parser.add_option(
'-t', '--test', dest='test', default=False,
action='store_true',
help="Don't actually delete any files; just log that we would")
add_basic_opts(option_parser)
scrape_options_into_new_groups(MRJob().all_option_groups(), {
option_parser: ('aws_region', 's3_endpoint'),
})
alphabetize_options(option_parser)
return option_parser
示例3: make_option_parser
def make_option_parser():
usage = "%prog [options] <time-untouched> <URIs>"
description = (
"Delete all files in a given URI that are older than a specified"
" time.\n\nThe time parameter defines the threshold for removing"
" files. If the file has not been accessed for *time*, the file is"
" removed. The time argument is a number with an optional"
" single-character suffix specifying the units: m for minutes, h for"
" hours, d for days. If no suffix is specified, time is in hours."
)
option_parser = OptionParser(usage=usage, description=description)
option_parser.add_option(
"-t",
"--test",
dest="test",
default=False,
action="store_true",
help="Don't actually delete any files; just log that we would",
)
add_basic_opts(option_parser)
return option_parser
示例4: make_option_parser
def make_option_parser():
usage = '%prog [options]'
description = ('Terminate idle EMR clusters that meet the criteria'
' passed in on the command line (or, by default,'
' clusters that have been idle for one hour).')
option_parser = OptionParser(usage=usage, description=description)
option_parser.add_option(
'--max-hours-idle', dest='max_hours_idle',
default=None, type='float',
help=('Max number of hours a cluster can go without bootstrapping,'
' running a step, or having a new step created. This will fire'
' even if there are pending steps which EMR has failed to'
' start. Make sure you set this higher than the amount of time'
' your jobs can take to start instances and bootstrap.'))
option_parser.add_option(
'--max-mins-locked', dest='max_mins_locked',
default=DEFAULT_MAX_MINUTES_LOCKED, type='float',
help='Max number of minutes a cluster can be locked while idle.')
option_parser.add_option(
'--mins-to-end-of-hour', dest='mins_to_end_of_hour',
default=None, type='float',
help=('Terminate clusters that are within this many minutes of'
' the end of a full hour since the job started running'
' AND have no pending steps.'))
option_parser.add_option(
'--unpooled-only', dest='unpooled_only', action='store_true',
default=False,
help='Only terminate un-pooled clusters')
option_parser.add_option(
'--pooled-only', dest='pooled_only', action='store_true',
default=False,
help='Only terminate pooled clusters')
option_parser.add_option(
'--pool-name', dest='pool_name', default=None,
help='Only terminate clusters in the given named pool.')
option_parser.add_option(
'--dry-run', dest='dry_run', default=False,
action='store_true',
help="Don't actually kill idle jobs; just log that we would")
option_parser.add_option(
'-t', '--test', dest='test', default=False,
action='store_true',
help="Don't actually delete any files; just log that we would")
add_basic_opts(option_parser)
add_emr_connect_opts(option_parser)
alphabetize_options(option_parser)
return option_parser
示例5: make_option_parser
def make_option_parser():
usage = '%prog [options]'
description = 'Print a giant report on EMR usage.'
option_parser = OptionParser(usage=usage, description=description)
option_parser.add_option(
'--max-days-ago', dest='max_days_ago', type='float', default=None,
help=('Max number of days ago to look at jobs. By default, we go back'
' as far as EMR supports (currently about 2 months)'))
add_basic_opts(option_parser)
return option_parser
示例6: make_option_parser
def make_option_parser():
usage = '%prog [options] jobflowid'
description = 'Terminate an existing EMR job flow.'
option_parser = OptionParser(usage=usage, description=description)
option_parser.add_option(
'-t', '--test', dest='test', default=False,
action='store_true',
help="Don't actually delete any files; just log that we would")
add_basic_opts(option_parser)
return option_parser
示例7: make_option_parser
def make_option_parser():
usage = 'usage: %prog [options] JOB_FLOW_ID'
description = (
'List, display, and parse Hadoop logs associated with EMR job flows.'
' Useful for debugging failed jobs for which mrjob did not display a'
' useful error message or for inspecting jobs whose output has been'
' lost.')
option_parser = OptionParser(usage=usage, description=description)
add_basic_opts(option_parser)
option_parser.add_option('-f', '--find-failure', dest='find_failure',
action='store_true', default=False,
help=('Search the logs for information about why'
' the job failed'))
option_parser.add_option('-l', '--list', dest='list_relevant',
action="store_true", default=False,
help='List log files MRJob finds relevant')
option_parser.add_option('-L', '--list-all', dest='list_all',
action="store_true", default=False,
help='List all log files')
option_parser.add_option('-a', '--cat', dest='cat_relevant',
action="store_true", default=False,
help='Cat log files MRJob finds relevant')
option_parser.add_option('-A', '--cat-all', dest='cat_all',
action="store_true", default=False,
help='Cat all log files to JOB_FLOW_ID/')
option_parser.add_option('-s', '--step-num', dest='step_num',
action='store', type='int', default=None,
help=('Limit results to a single step. To be used'
' with --list and --cat.'))
option_parser.add_option('--counters', dest='get_counters',
action='store_true', default=False,
help='Show counters from the job flow')
add_emr_connect_opts(option_parser)
scrape_options_into_new_groups(MRJob().all_option_groups(), {
option_parser: ('ec2_key_pair_file', 's3_sync_wait_time', 'ssh_bin')
})
alphabetize_options(option_parser)
return option_parser
示例8: make_option_parser
def make_option_parser():
usage = '%prog [options]'
description = ('Report jobs running for more than a certain number of'
' hours (by default, %.1f). This can help catch buggy jobs'
' and Hadoop/EMR operational issues.' % DEFAULT_MIN_HOURS)
option_parser = OptionParser(usage=usage, description=description)
option_parser.add_option(
'--min-hours', dest='min_hours', type='float',
default=DEFAULT_MIN_HOURS,
help=('Minimum number of hours a job can run before we report it.'
' Default: %default'))
add_basic_opts(option_parser)
return option_parser
示例9: make_option_parser
def make_option_parser():
usage = "%prog [options]"
description = "Print a giant report on EMR usage."
option_parser = OptionParser(usage=usage, description=description)
option_parser.add_option(
"--max-days-ago",
dest="max_days_ago",
type="float",
default=None,
help=(
"Max number of days ago to look at jobs. By default, we go back"
" as far as EMR supports (currently about 2 months)"
),
)
add_basic_opts(option_parser)
return option_parser
示例10: make_option_parser
def make_option_parser():
usage = '%prog [options]'
description = (
'Create a persistent EMR job flow to run jobs in, and print its ID to'
' stdout. WARNING: Do not run'
' this without mrjob.tools.emr.terminate_idle_job_flows in your'
' crontab; job flows left idle can quickly become expensive!')
option_parser = OptionParser(usage=usage, description=description)
add_basic_opts(option_parser)
# these aren't nicely broken down, just scrape specific options
scrape_options_into_new_groups(MRJob().all_option_groups(), {
option_parser: (
'bootstrap_mrjob',
'label',
'owner',
),
})
add_emr_connect_opts(option_parser)
add_emr_launch_opts(option_parser)
alphabetize_options(option_parser)
return option_parser
示例11: main
def main(cl_args=None):
usage = 'usage: %prog JOB_FLOW_ID OUTPUT_DIR [options] "command string"'
description = ('Run a command on the master and all slaves of an EMR job'
' flow. Store stdout and stderr for results in OUTPUT_DIR.')
option_parser = OptionParser(usage=usage, description=description)
option_parser.add_option('-o', '--output-dir', dest='output_dir',
default=None,
help="Specify an output directory (default:"
" JOB_FLOW_ID)")
add_basic_opts(option_parser)
add_emr_connect_opts(option_parser)
scrape_options_into_new_groups(MRJob().all_option_groups(), {
option_parser: ('ec2_key_pair_file', 'ssh_bin'),
})
alphabetize_options(option_parser)
options, args = option_parser.parse_args(cl_args)
MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
runner_kwargs = options.__dict__.copy()
for unused_arg in ('output_dir', 'quiet', 'verbose'):
del runner_kwargs[unused_arg]
if len(args) < 2:
option_parser.print_help()
sys.exit(1)
job_flow_id, cmd_string = args[:2]
cmd_args = shlex_split(cmd_string)
output_dir = os.path.abspath(options.output_dir or job_flow_id)
with EMRJobRunner(emr_job_flow_id=job_flow_id, **runner_kwargs) as runner:
runner._enable_slave_ssh_access()
run_on_all_nodes(runner, output_dir, cmd_args)
示例12: configure_options
def configure_options(self):
"""Define arguments for this script. Called from :py:meth:`__init__()`.
Run ``python -m mrjob.job.MRJob --help`` to see all options.
Re-define to define custom command-line arguments::
def configure_options(self):
super(MRYourJob, self).configure_options
self.add_passthrough_option(...)
self.add_file_option(...)
...
"""
self.option_parser.add_option(
'--help', dest='help_main', action='store_true', default=False,
help='show this message and exit')
self.option_parser.add_option(
'--help-emr', dest='help_emr', action='store_true', default=False,
help='show EMR-related options')
self.option_parser.add_option(
'--help-hadoop', dest='help_hadoop', action='store_true',
default=False,
help='show Hadoop-related options')
self.option_parser.add_option(
'--help-local', dest='help_local', action='store_true',
default=False,
help='show local/inline runner-related options')
self.option_parser.add_option(
'--help-runner', dest='help_runner', action='store_true',
default=False, help='show runner-related options')
# protocol stuff
self.proto_opt_group = OptionGroup(
self.option_parser, 'Protocols')
self.option_parser.add_option_group(self.proto_opt_group)
add_protocol_opts(self.proto_opt_group)
# options for running the entire job
self.runner_opt_group = OptionGroup(
self.option_parser, 'Running the entire job')
self.option_parser.add_option_group(self.runner_opt_group)
add_runner_opts(self.runner_opt_group, self._DEFAULT_RUNNER)
add_basic_opts(self.runner_opt_group)
# options for inline/local runners
self.local_opt_group = OptionGroup(
self.option_parser,
'Running locally (these apply when you set -r inline or -r local)')
self.option_parser.add_option_group(self.local_opt_group)
add_local_opts(self.local_opt_group)
# options common to Hadoop and EMR
self.hadoop_emr_opt_group = OptionGroup(
self.option_parser,
'Running on Hadoop or EMR (these apply when you set -r hadoop or'
' -r emr)')
self.option_parser.add_option_group(self.hadoop_emr_opt_group)
add_hadoop_emr_opts(self.hadoop_emr_opt_group)
# options for running the job on Hadoop
self.hadoop_opt_group = OptionGroup(
self.option_parser,
'Running on Hadoop (these apply when you set -r hadoop)')
self.option_parser.add_option_group(self.hadoop_opt_group)
add_hadoop_opts(self.hadoop_opt_group)
# options for running the job on EMR
self.emr_opt_group = OptionGroup(
self.option_parser,
'Running on EMR (these apply when you set -r emr)')
self.option_parser.add_option_group(self.emr_opt_group)
add_emr_opts(self.emr_opt_group)
示例13: make_option_parser
def make_option_parser():
usage = "%prog [options]"
description = (
"Terminate idle EMR job flows that meet the criteria"
" passed in on the command line (or, by default,"
" job flows that have been idle for one hour)."
)
option_parser = OptionParser(usage=usage, description=description)
option_parser.add_option(
"--max-hours-idle",
dest="max_hours_idle",
default=None,
type="float",
help=(
"Max number of hours a job flow can go without bootstrapping,"
" running a step, or having a new step created. This will fire"
" even if there are pending steps which EMR has failed to"
" start. Make sure you set this higher than the amount of time"
" your jobs can take to start instances and bootstrap."
),
)
option_parser.add_option(
"--max-mins-locked",
dest="max_mins_locked",
default=DEFAULT_MAX_MINUTES_LOCKED,
type="float",
help="Max number of minutes a job flow can be locked while idle.",
)
option_parser.add_option(
"--mins-to-end-of-hour",
dest="mins_to_end_of_hour",
default=None,
type="float",
help=(
"Terminate job flows that are within this many minutes of"
" the end of a full hour since the job started running"
" AND have no pending steps."
),
)
option_parser.add_option(
"--unpooled-only",
dest="unpooled_only",
action="store_true",
default=False,
help="Only terminate un-pooled job flows",
)
option_parser.add_option(
"--pooled-only", dest="pooled_only", action="store_true", default=False, help="Only terminate pooled job flows"
)
option_parser.add_option(
"--pool-name", dest="pool_name", default=None, help="Only terminate job flows in the given named pool."
)
option_parser.add_option(
"--dry-run",
dest="dry_run",
default=False,
action="store_true",
help="Don't actually kill idle jobs; just log that we would",
)
option_parser.add_option(
"-t",
"--test",
dest="test",
default=False,
action="store_true",
help="Don't actually delete any files; just log that we would",
)
add_basic_opts(option_parser)
return option_parser
示例14: configure_options
def configure_options(self):
"""Define arguments for this script. Called from :py:meth:`__init__()`.
Run ``python -m mrjob.job.MRJob --help`` to see all options.
Re-define to define custom command-line arguments::
def configure_options(self):
super(MRYourJob, self).configure_options
self.add_passthrough_option(...)
self.add_file_option(...)
...
"""
self.option_parser.add_option(
"--help", dest="help_main", action="store_true", default=False, help="show this message and exit"
)
self.option_parser.add_option(
"--help-emr", dest="help_emr", action="store_true", default=False, help="show EMR-related options"
)
self.option_parser.add_option(
"--help-hadoop", dest="help_hadoop", action="store_true", default=False, help="show Hadoop-related options"
)
self.option_parser.add_option(
"--help-runner", dest="help_runner", action="store_true", default=False, help="show runner-related options"
)
# protocol stuff
self.proto_opt_group = OptionGroup(self.option_parser, "Protocols")
self.option_parser.add_option_group(self.proto_opt_group)
add_protocol_opts(self.proto_opt_group)
# options for running the entire job
self.runner_opt_group = OptionGroup(self.option_parser, "Running the entire job")
self.option_parser.add_option_group(self.runner_opt_group)
add_runner_opts(self.runner_opt_group, self._DEFAULT_RUNNER)
add_basic_opts(self.runner_opt_group)
self.hadoop_opts_opt_group = OptionGroup(
self.option_parser,
"Configuring or emulating Hadoop (these apply when you set -r" " hadoop, -r emr, or -r local)",
)
self.option_parser.add_option_group(self.hadoop_opts_opt_group)
add_hadoop_shared_opts(self.hadoop_opts_opt_group)
# options common to Hadoop and EMR
self.hadoop_emr_opt_group = OptionGroup(
self.option_parser, "Running on Hadoop or EMR (these apply when you set -r hadoop or" " -r emr)"
)
self.option_parser.add_option_group(self.hadoop_emr_opt_group)
add_hadoop_emr_opts(self.hadoop_emr_opt_group)
# options for running the job on Hadoop
self.hadoop_opt_group = OptionGroup(
self.option_parser, "Running on Hadoop (these apply when you set -r hadoop)"
)
self.option_parser.add_option_group(self.hadoop_opt_group)
add_hadoop_opts(self.hadoop_opt_group)
# options for running the job on EMR
self.emr_opt_group = OptionGroup(
self.option_parser, "Running on Amazon Elastic MapReduce (these apply when you set -r" " emr)"
)
self.option_parser.add_option_group(self.emr_opt_group)
add_emr_opts(self.emr_opt_group)