本文整理汇总了Python中mrjob.setup.WorkingDirManager.name_to_path方法的典型用法代码示例。如果您正苦于以下问题:Python WorkingDirManager.name_to_path方法的具体用法?Python WorkingDirManager.name_to_path怎么用?Python WorkingDirManager.name_to_path使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mrjob.setup.WorkingDirManager
的用法示例。
在下文中一共展示了WorkingDirManager.name_to_path方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_simple
# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
def test_simple(self):
wd = WorkingDirManager()
wd.add('archive', 's3://bucket/path/to/baz.tar.gz')
wd.add('file', 'foo/bar.py')
self.assertEqual(wd.name_to_path('file'),
{'bar.py': 'foo/bar.py'})
self.assertEqual(wd.name_to_path('archive'),
{'baz.tar.gz': 's3://bucket/path/to/baz.tar.gz'})
示例2: test_auto_names_are_different_from_assigned_names
# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
def test_auto_names_are_different_from_assigned_names(self):
wd = WorkingDirManager()
wd.add('file', 'foo/bar.py', name='qux.py')
wd.add('file', 'foo/bar.py') # use default name bar.py
self.assertEqual(wd.name_to_path('file'),
{'qux.py': 'foo/bar.py',
'bar.py': 'foo/bar.py'})
示例3: MRJobRunner
# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
#.........这里部分代码省略.........
jobconf = self._jobconf_for_step(step_num)
if uses_generic_jobconf(version):
for key, value in sorted(jobconf.items()):
if value is not None:
args.extend(['-D', '%s=%s' % (key, value)])
# old-style jobconf
else:
for key, value in sorted(jobconf.items()):
if value is not None:
args.extend(['-jobconf', '%s=%s' % (key, value)])
# partitioner
if self._partitioner:
args.extend(['-partitioner', self._partitioner])
# cmdenv
for key, value in sorted(self._opts['cmdenv'].items()):
args.append('-cmdenv')
args.append('%s=%s' % (key, value))
# hadoop_input_format
if (step_num == 0 and self._hadoop_input_format):
args.extend(['-inputformat', self._hadoop_input_format])
# hadoop_output_format
if (step_num == self._num_steps() - 1 and self._hadoop_output_format):
args.extend(['-outputformat', self._hadoop_output_format])
return args
def _arg_hash_paths(self, type, upload_mgr):
"""Helper function for the *upload_args methods."""
for name, path in self._working_dir_mgr.name_to_path(type).items():
uri = self._upload_mgr.uri(path)
yield '%s#%s' % (uri, name)
def _upload_args(self, upload_mgr):
args = []
# TODO: does Hadoop have a way of coping with paths that have
# commas in their names?
file_hash_paths = list(self._arg_hash_paths('file', upload_mgr))
if file_hash_paths:
args.append('-files')
args.append(','.join(file_hash_paths))
archive_hash_paths = list(self._arg_hash_paths('archive', upload_mgr))
if archive_hash_paths:
args.append('-archives')
args.append(','.join(archive_hash_paths))
return args
def _pre_0_20_upload_args(self, upload_mgr):
"""-files/-archive args for Hadoop prior to 0.20.203"""
args = []
for file_hash in self._arg_hash_paths('file', upload_mgr):
args.append('-cacheFile')
args.append(file_hash)
for archive_hash in self._arg_hash_paths('archive', upload_mgr):
args.append('-cacheArchive')
args.append(archive_hash)
示例4: test_lazy_naming
# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
def test_lazy_naming(self):
wd = WorkingDirManager()
wd.add("file", "qux.py") # qux.py by default
wd.add("file", "bar.py", name="qux.py")
self.assertEqual(wd.name_to_path("file"), {"qux.py": "bar.py", "qux-1.py": "qux.py"})
示例5: test_auto_names_are_different_from_assigned_names
# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
def test_auto_names_are_different_from_assigned_names(self):
wd = WorkingDirManager()
wd.add("file", "foo/bar.py", name="qux.py")
wd.add("file", "foo/bar.py") # use default name bar.py
self.assertEqual(wd.name_to_path("file"), {"qux.py": "foo/bar.py", "bar.py": "foo/bar.py"})
示例6: test_okay_to_give_same_path_same_name
# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
def test_okay_to_give_same_path_same_name(self):
wd = WorkingDirManager()
wd.add("file", "foo/bar.py", name="qux.py")
wd.add("file", "foo/bar.py", name="qux.py")
self.assertEqual(wd.name_to_path("file"), {"qux.py": "foo/bar.py"})
示例7: test_simple
# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
def test_simple(self):
wd = WorkingDirManager()
wd.add("archive", "s3://bucket/path/to/baz.tar.gz")
wd.add("file", "foo/bar.py")
self.assertEqual(wd.name_to_path("file"), {"bar.py": "foo/bar.py"})
self.assertEqual(wd.name_to_path("archive"), {"baz.tar.gz": "s3://bucket/path/to/baz.tar.gz"})
示例8: test_empty
# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
def test_empty(self):
wd = WorkingDirManager()
self.assertEqual(wd.name_to_path("archive"), {})
self.assertEqual(wd.name_to_path("file"), {})
示例9: test_lazy_naming
# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
def test_lazy_naming(self):
wd = WorkingDirManager()
wd.add('file', 'qux.py') # qux.py by default
wd.add('file', 'bar.py', name='qux.py')
self.assertEqual(wd.name_to_path('file'),
{'qux.py': 'bar.py', 'qux-1.py': 'qux.py'})
示例10: test_okay_to_give_same_path_same_name
# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
def test_okay_to_give_same_path_same_name(self):
wd = WorkingDirManager()
wd.add('file', 'foo/bar.py', name='qux.py')
wd.add('file', 'foo/bar.py', name='qux.py')
self.assertEqual(wd.name_to_path('file'),
{'qux.py': 'foo/bar.py'})
示例11: test_empty
# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
def test_empty(self):
wd = WorkingDirManager()
self.assertEqual(wd.name_to_path('archive'), {})
self.assertEqual(wd.name_to_path('file'), {})
self.assertEqual(wd.paths(), set())
示例12: MRJobRunner
# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
#.........这里部分代码省略.........
Also translate jobconfs to the current Hadoop version, if necessary.
"""
step = self._get_step(step_num)
# _sort_values_jobconf() isn't relevant to Spark,
# but it doesn't do any harm either
jobconf = combine_dicts(self._sort_values_jobconf(),
self._opts['jobconf'],
step.get('jobconf'))
# if user is using the wrong jobconfs, add in the correct ones
# and log a warning
hadoop_version = self.get_hadoop_version()
if hadoop_version:
jobconf = translate_jobconf_dict(jobconf, hadoop_version)
return jobconf
def _sort_values_jobconf(self):
"""Jobconf dictionary to enable sorting by value.
"""
if not self._sort_values:
return {}
# translate _SORT_VALUES_JOBCONF to the correct Hadoop version,
# without logging a warning
hadoop_version = self.get_hadoop_version()
jobconf = {}
for k, v in _SORT_VALUES_JOBCONF.items():
if hadoop_version:
jobconf[translate_jobconf(k, hadoop_version)] = v
else:
for j in translate_jobconf_for_all_versions(k):
jobconf[j] = v
return jobconf
def _sort_values_partitioner(self):
"""Partitioner to use with *sort_values* keyword to the constructor."""
if self._sort_values:
return _SORT_VALUES_PARTITIONER
else:
return None
def _parse_setup_and_py_files(self):
"""Parse the *setup* option with
:py:func:`mrjob.setup.parse_setup_cmd()`, and patch in *py_files*.
"""
setup = []
# py_files
for path in self._opts['py_files']:
# Spark (at least v1.3.1) doesn't work with # and --py-files,
# see #1375
if '#' in path:
raise ValueError("py_files cannot contain '#'")
path_dict = parse_legacy_hash_path('file', path)
setup.append(['export PYTHONPATH=', path_dict, ':$PYTHONPATH'])
# setup
for cmd in self._opts['setup']:
setup.append(parse_setup_cmd(cmd))
return setup
def _upload_args(self):
# just upload every file and archive in the working dir manager
return self._upload_args_helper('-files', None, '-archives', None)
def _upload_args_helper(
self, files_opt_str, files, archives_opt_str, archives):
args = []
file_hash_paths = list(self._arg_hash_paths('file', files))
if file_hash_paths:
args.append(files_opt_str)
args.append(','.join(file_hash_paths))
archive_hash_paths = list(self._arg_hash_paths('archive', archives))
if archive_hash_paths:
args.append(archives_opt_str)
args.append(','.join(archive_hash_paths))
return args
def _arg_hash_paths(self, type, named_paths=None):
"""Helper function for the *upload_args methods."""
if named_paths is None:
# just return everything managed by _working_dir_mgr
named_paths = sorted(
self._working_dir_mgr.name_to_path(type).items())
for name, path in named_paths:
if not name:
name = self._working_dir_mgr.name(type, path)
uri = self._upload_mgr.uri(path)
yield '%s#%s' % (uri, name)
示例13: HadoopInTheCloudJobRunner
# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
#.........这里部分代码省略.........
def _master_bootstrap_script_content(self, bootstrap):
"""Return a list containing the lines of the master bootstrap script.
(without trailing newlines)
"""
out = []
# shebang, precommands
out.extend(self._start_of_sh_script())
out.append('')
# store $PWD
out.append('# store $PWD')
out.append('__mrjob_PWD=$PWD')
out.append('')
# special case for PWD being in /, which happens on Dataproc
# (really we should cd to tmp or something)
out.append('if [ $__mrjob_PWD = "/" ]; then')
out.append(' __mrjob_PWD=""')
out.append('fi')
out.append('')
# run commands in a block so we can redirect stdout to stderr
# (e.g. to catch errors from compileall). See #370
out.append('{')
# download files
out.append(' # download files and mark them executable')
cp_to_local = self._cp_to_local_cmd()
# TODO: why bother with $__mrjob_PWD here, since we're already in it?
for name, path in sorted(
self._bootstrap_dir_mgr.name_to_path('file').items()):
uri = self._upload_mgr.uri(path)
out.append('')
out.append(' %s %s $__mrjob_PWD/%s' %
(cp_to_local, pipes.quote(uri), pipes.quote(name)))
# imitate Hadoop Distributed Cache (see #1602)
out.append(' chmod u+rx $__mrjob_PWD/%s' % pipes.quote(name))
out.append('')
# download and unarchive archives
archive_names_and_paths = sorted(
self._bootstrap_dir_mgr.name_to_path('archive').items())
if archive_names_and_paths:
# make tmp dir if needed
out.append(' # download and unpack archives')
out.append(' __mrjob_TMP=$(mktemp -d)')
out.append('')
for name, path in archive_names_and_paths:
uri = self._upload_mgr.uri(path)
ext = file_ext(basename(path))
# copy file to tmp dir
quoted_archive_path = '$__mrjob_TMP/%s' % pipes.quote(name)
out.append(' %s %s %s' % (
cp_to_local, pipes.quote(uri), quoted_archive_path))
# unarchive file
if ext not in _EXT_TO_UNARCHIVE_CMD:
raise KeyError('unknown archive file extension: %s' % path)
unarchive_cmd = _EXT_TO_UNARCHIVE_CMD[ext]
示例14: HadoopInTheCloudJobRunner
# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
#.........这里部分代码省略.........
# shebang, precommands
out.extend(self._start_of_sh_script())
out.append('')
# for example, create a tmp dir and cd to it
if self._bootstrap_pre_commands():
out.extend(self._bootstrap_pre_commands())
out.append('')
# store $PWD
out.append('# store $PWD')
out.append('__mrjob_PWD=$PWD')
out.append('')
# special case for PWD being in /, which happens on Dataproc
# (really we should cd to tmp or something)
out.append('if [ $__mrjob_PWD = "/" ]; then')
out.append(' __mrjob_PWD=""')
out.append('fi')
out.append('')
# run commands in a block so we can redirect stdout to stderr
# (e.g. to catch errors from compileall). See #370
out.append('{')
# download files
out.append(' # download files and mark them executable')
cp_to_local = self._cp_to_local_cmd()
# TODO: why bother with $__mrjob_PWD here, since we're already in it?
for name, path in sorted(
self._bootstrap_dir_mgr.name_to_path('file').items()):
uri = self._upload_mgr.uri(path)
out.append(' %s %s $__mrjob_PWD/%s' %
(cp_to_local, pipes.quote(uri), pipes.quote(name)))
# imitate Hadoop Distributed Cache (see #1602)
out.append(' chmod u+rx $__mrjob_PWD/%s' % pipes.quote(name))
out.append('')
# download and unarchive archives
archive_names_and_paths = sorted(
self._bootstrap_dir_mgr.name_to_path('archive').items())
if archive_names_and_paths:
# make tmp dir if needed
out.append(' # download and unpack archives')
out.append(' __mrjob_TMP=$(mktemp -d)')
out.append('')
for name, path in archive_names_and_paths:
uri = self._upload_mgr.uri(path)
ext = file_ext(basename(path))
# copy file to tmp dir
quoted_archive_path = '$__mrjob_TMP/%s' % pipes.quote(name)
out.append(' %s %s %s' % (
cp_to_local, pipes.quote(uri), quoted_archive_path))
# unarchive file
if ext not in _EXT_TO_UNARCHIVE_CMD:
raise KeyError('unknown archive file extension: %s' % path)
unarchive_cmd = _EXT_TO_UNARCHIVE_CMD[ext]
out.append(' ' + unarchive_cmd % dict(