本文整理汇总了Python中mrjob.conf.load_opts_from_mrjob_confs函数的典型用法代码示例。如果您正苦于以下问题:Python load_opts_from_mrjob_confs函数的具体用法?Python load_opts_from_mrjob_confs怎么用?Python load_opts_from_mrjob_confs使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了load_opts_from_mrjob_confs函数的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_symlink_to_duplicate_conf_path
def test_symlink_to_duplicate_conf_path(self):
conf_path = os.path.join(self.tmp_dir, "mrjob.conf")
with open(conf_path, "w") as f:
dump_mrjob_conf({}, f)
conf_symlink_path = os.path.join(self.tmp_dir, "mrjob.conf.symlink")
os.symlink("mrjob.conf", conf_symlink_path)
self.assertEqual(load_opts_from_mrjob_confs("foo", [conf_path, conf_symlink_path]), [(conf_symlink_path, {})])
self.assertEqual(load_opts_from_mrjob_confs("foo", [conf_symlink_path, conf_path]), [(conf_path, {})])
示例2: __init__
def __init__(self, alias, opts, conf_paths):
"""
:param alias: Runner alias (e.g. ``'local'``)
:param opts: Keyword args to runner's constructor (usually from the
command line).
:param conf_paths: An iterable of paths to config files
"""
super(RunnerOptionStore, self).__init__()
# sanitize incoming options and issue warnings for bad keys
opts = self.validated_options(opts)
unsanitized_opt_dicts = load_opts_from_mrjob_confs(
alias, conf_paths=conf_paths)
for path, mrjob_conf_opts in unsanitized_opt_dicts:
self.cascading_dicts.append(self.validated_options(
mrjob_conf_opts, from_where=(' from %s' % path)))
self.cascading_dicts.append(opts)
if (len(self.cascading_dicts) > 2 and
all(len(d) == 0 for d in self.cascading_dicts[2:-1]) and
(len(conf_paths or []) > 0)):
log.warning('No configs specified for %s runner' % alias)
self.populate_values_from_cascading_dicts()
log.debug('Active configuration:')
log.debug(pprint.pformat(self))
示例3: __init__
def __init__(self, alias, opts, conf_paths):
"""
:param alias: Runner alias (e.g. ``'local'``)
:param opts: Options from the command line
:param conf_paths: Either a file path or an iterable of paths to config
files
"""
super(RunnerOptionStore, self).__init__()
# sanitize incoming options and issue warnings for bad keys
opts = self.validated_options(
opts, 'Got unexpected keyword arguments: %s')
unsanitized_opt_dicts = load_opts_from_mrjob_confs(
alias, conf_paths=conf_paths)
for path, mrjob_conf_opts in unsanitized_opt_dicts:
self.cascading_dicts.append(self.validated_options(
mrjob_conf_opts,
'Got unexpected opts from %s: %%s' % path))
self.cascading_dicts.append(opts)
if (len(self.cascading_dicts) > 2 and
all(len(d) == 0 for d in self.cascading_dicts[2:-1])):
log.warning('No configs specified for %s runner' % alias)
self.populate_values_from_cascading_dicts()
self._validate_cleanup()
示例4: test_symlink_to_duplicate_conf_path
def test_symlink_to_duplicate_conf_path(self):
conf_path = os.path.join(self.tmp_dir, 'mrjob.conf')
with open(conf_path, 'w') as f:
dump_mrjob_conf({}, f)
conf_symlink_path = os.path.join(self.tmp_dir, 'mrjob.conf.symlink')
os.symlink('mrjob.conf', conf_symlink_path)
self.assertEqual(
load_opts_from_mrjob_confs(
'foo', [conf_path, conf_symlink_path]),
[(conf_symlink_path, {})])
self.assertEqual(
load_opts_from_mrjob_confs(
'foo', [conf_symlink_path, conf_path]),
[(conf_path, {})])
示例5: test_conf_path_order_beats_include
def test_conf_path_order_beats_include(self):
conf_path_1 = os.path.join(self.tmp_dir, 'mrjob.1.conf')
conf_path_2 = os.path.join(self.tmp_dir, 'mrjob.2.conf')
with open(conf_path_1, 'w') as f:
dump_mrjob_conf({}, f)
with open(conf_path_2, 'w') as f:
dump_mrjob_conf({}, f)
# shouldn't matter that conf_path_1 includes conf_path_2
self.assertEqual(
load_opts_from_mrjob_confs('foo', [conf_path_1, conf_path_2]),
[(conf_path_1, {}), (conf_path_2, {})])
示例6: __init__
def __init__(self, mr_job_script=None, conf_paths=None,
extra_args=None, file_upload_args=None,
hadoop_input_format=None, hadoop_output_format=None,
input_paths=None, output_dir=None, partitioner=None,
sort_values=None, stdin=None, step_output_dir=None,
**opts):
"""All runners take the following keyword arguments:
:type mr_job_script: str
:param mr_job_script: the path of the ``.py`` file containing the
:py:class:`~mrjob.job.MRJob`. If this is None,
you won't actually be able to :py:meth:`run` the
job, but other utilities (e.g. :py:meth:`ls`)
will work.
:type conf_paths: None or list
:param conf_paths: List of config files to combine and use, or None to
search for mrjob.conf in the default locations.
:type extra_args: list of str
:param extra_args: a list of extra cmd-line arguments to pass to the
mr_job script. This is a hook to allow jobs to take
additional arguments.
:param file_upload_args: a list of tuples of ``('--ARGNAME', path)``.
The file at the given path will be uploaded
to the local directory of the mr_job script
when it runs, and then passed into the script
with ``--ARGNAME``. Useful for passing in
SQLite DBs and other configuration files to
your job.
:type hadoop_input_format: str
:param hadoop_input_format: name of an optional Hadoop ``InputFormat``
class. Passed to Hadoop along with your
first step with the ``-inputformat``
option. Note that if you write your own
class, you'll need to include it in your
own custom streaming jar (see
:mrjob-opt:`hadoop_streaming_jar`).
:type hadoop_output_format: str
:param hadoop_output_format: name of an optional Hadoop
``OutputFormat`` class. Passed to Hadoop
along with your first step with the
``-outputformat`` option. Note that if you
write your own class, you'll need to
include it in your own custom streaming
jar (see
:mrjob-opt:`hadoop_streaming_jar`).
:type input_paths: list of str
:param input_paths: Input files for your job. Supports globs and
recursively walks directories (e.g.
``['data/common/', 'data/training/*.gz']``). If
this is left blank, we'll read from stdin
:type output_dir: str
:param output_dir: An empty/non-existent directory where Hadoop
should put the final output from the job.
If you don't specify an output directory, we'll
output into a subdirectory of this job's temporary
directory. You can control this from the command
line with ``--output-dir``. This option cannot be
set from configuration files. If used with the
hadoop runner, this path does not need to be fully
qualified with ``hdfs://`` URIs because it's
understood that it has to be on HDFS.
:type partitioner: str
:param partitioner: Optional name of a Hadoop partitioner class, e.g.
``'org.apache.hadoop.mapred.lib.HashPartitioner'``.
Hadoop streaming will use this to determine how
mapper output should be sorted and distributed
to reducers.
:type sort_values: bool
:param sort_values: if true, set partitioners and jobconf variables
so that reducers to receive the values
associated with any key in sorted order (sorted by
their *encoded* value). Also known as secondary
sort.
:param stdin: an iterable (can be a ``BytesIO`` or even a list) to use
as stdin. This is a hook for testing; if you set
``stdin`` via :py:meth:`~mrjob.job.MRJob.sandbox`, it'll
get passed through to the runner. If for some reason
your lines are missing newlines, we'll add them;
this makes it easier to write automated tests.
:type step_output_dir: str
:param step_output_dir: An empty/non-existent directory where Hadoop
should put output from all steps other than
the last one (this only matters for multi-step
jobs). Currently ignored by local runners.
"""
self._ran_job = False
# opts are made from:
#
# empty defaults (everything set to None)
# runner-specific defaults
# opts from config file(s)
# opts from command line
self._opts = self._combine_confs(
[(None, {key: None for key in self.OPT_NAMES})] +
[(None, self._default_opts())] +
load_opts_from_mrjob_confs(self.alias, conf_paths) +
[('the command line', opts)]
)
#.........这里部分代码省略.........