当前位置: 首页>>代码示例>>Python>>正文


Python WorkingDirManager.name_to_path方法代码示例

本文整理汇总了Python中mrjob.setup.WorkingDirManager.name_to_path方法的典型用法代码示例。如果您正苦于以下问题:Python WorkingDirManager.name_to_path方法的具体用法?Python WorkingDirManager.name_to_path怎么用?Python WorkingDirManager.name_to_path使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在mrjob.setup.WorkingDirManager的用法示例。


在下文中一共展示了WorkingDirManager.name_to_path方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_simple

# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
 def test_simple(self):
     wd = WorkingDirManager()
     wd.add('archive', 's3://bucket/path/to/baz.tar.gz')
     wd.add('file', 'foo/bar.py')
     self.assertEqual(wd.name_to_path('file'),
                      {'bar.py': 'foo/bar.py'})
     self.assertEqual(wd.name_to_path('archive'),
                      {'baz.tar.gz': 's3://bucket/path/to/baz.tar.gz'})
开发者ID:anirudhreddy92,项目名称:mrjob,代码行数:10,代码来源:test_setup.py

示例2: test_auto_names_are_different_from_assigned_names

# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
 def test_auto_names_are_different_from_assigned_names(self):
     wd = WorkingDirManager()
     wd.add('file', 'foo/bar.py', name='qux.py')
     wd.add('file', 'foo/bar.py')  # use default name bar.py
     self.assertEqual(wd.name_to_path('file'),
                      {'qux.py': 'foo/bar.py',
                       'bar.py': 'foo/bar.py'})
开发者ID:anirudhreddy92,项目名称:mrjob,代码行数:9,代码来源:test_setup.py

示例3: MRJobRunner

# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]

#.........这里部分代码省略.........
        jobconf = self._jobconf_for_step(step_num)

        if uses_generic_jobconf(version):
            for key, value in sorted(jobconf.items()):
                if value is not None:
                    args.extend(['-D', '%s=%s' % (key, value)])
        # old-style jobconf
        else:
            for key, value in sorted(jobconf.items()):
                if value is not None:
                    args.extend(['-jobconf', '%s=%s' % (key, value)])

        # partitioner
        if self._partitioner:
            args.extend(['-partitioner', self._partitioner])

        # cmdenv
        for key, value in sorted(self._opts['cmdenv'].items()):
            args.append('-cmdenv')
            args.append('%s=%s' % (key, value))

        # hadoop_input_format
        if (step_num == 0 and self._hadoop_input_format):
            args.extend(['-inputformat', self._hadoop_input_format])

        # hadoop_output_format
        if (step_num == self._num_steps() - 1 and self._hadoop_output_format):
            args.extend(['-outputformat', self._hadoop_output_format])

        return args

    def _arg_hash_paths(self, type, upload_mgr):
        """Helper function for the *upload_args methods."""
        for name, path in self._working_dir_mgr.name_to_path(type).items():
            uri = self._upload_mgr.uri(path)
            yield '%s#%s' % (uri, name)

    def _upload_args(self, upload_mgr):
        args = []

        # TODO: does Hadoop have a way of coping with paths that have
        # commas in their names?

        file_hash_paths = list(self._arg_hash_paths('file', upload_mgr))
        if file_hash_paths:
            args.append('-files')
            args.append(','.join(file_hash_paths))

        archive_hash_paths = list(self._arg_hash_paths('archive', upload_mgr))
        if archive_hash_paths:
            args.append('-archives')
            args.append(','.join(archive_hash_paths))

        return args

    def _pre_0_20_upload_args(self, upload_mgr):
        """-files/-archive args for Hadoop prior to 0.20.203"""
        args = []

        for file_hash in self._arg_hash_paths('file', upload_mgr):
            args.append('-cacheFile')
            args.append(file_hash)

        for archive_hash in self._arg_hash_paths('archive', upload_mgr):
            args.append('-cacheArchive')
            args.append(archive_hash)
开发者ID:parastoo-62,项目名称:mrjob,代码行数:70,代码来源:runner.py

示例4: test_lazy_naming

# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
 def test_lazy_naming(self):
     wd = WorkingDirManager()
     wd.add("file", "qux.py")  # qux.py by default
     wd.add("file", "bar.py", name="qux.py")
     self.assertEqual(wd.name_to_path("file"), {"qux.py": "bar.py", "qux-1.py": "qux.py"})
开发者ID:irskep,项目名称:mrjob,代码行数:7,代码来源:test_setup.py

示例5: test_auto_names_are_different_from_assigned_names

# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
 def test_auto_names_are_different_from_assigned_names(self):
     wd = WorkingDirManager()
     wd.add("file", "foo/bar.py", name="qux.py")
     wd.add("file", "foo/bar.py")  # use default name bar.py
     self.assertEqual(wd.name_to_path("file"), {"qux.py": "foo/bar.py", "bar.py": "foo/bar.py"})
开发者ID:irskep,项目名称:mrjob,代码行数:7,代码来源:test_setup.py

示例6: test_okay_to_give_same_path_same_name

# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
 def test_okay_to_give_same_path_same_name(self):
     wd = WorkingDirManager()
     wd.add("file", "foo/bar.py", name="qux.py")
     wd.add("file", "foo/bar.py", name="qux.py")
     self.assertEqual(wd.name_to_path("file"), {"qux.py": "foo/bar.py"})
开发者ID:irskep,项目名称:mrjob,代码行数:7,代码来源:test_setup.py

示例7: test_simple

# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
 def test_simple(self):
     wd = WorkingDirManager()
     wd.add("archive", "s3://bucket/path/to/baz.tar.gz")
     wd.add("file", "foo/bar.py")
     self.assertEqual(wd.name_to_path("file"), {"bar.py": "foo/bar.py"})
     self.assertEqual(wd.name_to_path("archive"), {"baz.tar.gz": "s3://bucket/path/to/baz.tar.gz"})
开发者ID:irskep,项目名称:mrjob,代码行数:8,代码来源:test_setup.py

示例8: test_empty

# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
 def test_empty(self):
     wd = WorkingDirManager()
     self.assertEqual(wd.name_to_path("archive"), {})
     self.assertEqual(wd.name_to_path("file"), {})
开发者ID:irskep,项目名称:mrjob,代码行数:6,代码来源:test_setup.py

示例9: test_lazy_naming

# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
 def test_lazy_naming(self):
     wd = WorkingDirManager()
     wd.add('file', 'qux.py')  # qux.py by default
     wd.add('file', 'bar.py', name='qux.py')
     self.assertEqual(wd.name_to_path('file'),
                      {'qux.py': 'bar.py', 'qux-1.py': 'qux.py'})
开发者ID:anirudhreddy92,项目名称:mrjob,代码行数:8,代码来源:test_setup.py

示例10: test_okay_to_give_same_path_same_name

# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
 def test_okay_to_give_same_path_same_name(self):
     wd = WorkingDirManager()
     wd.add('file', 'foo/bar.py', name='qux.py')
     wd.add('file', 'foo/bar.py', name='qux.py')
     self.assertEqual(wd.name_to_path('file'),
                      {'qux.py': 'foo/bar.py'})
开发者ID:anirudhreddy92,项目名称:mrjob,代码行数:8,代码来源:test_setup.py

示例11: test_empty

# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]
 def test_empty(self):
     wd = WorkingDirManager()
     self.assertEqual(wd.name_to_path('archive'), {})
     self.assertEqual(wd.name_to_path('file'), {})
     self.assertEqual(wd.paths(), set())
开发者ID:Affirm,项目名称:mrjob,代码行数:7,代码来源:test_setup.py

示例12: MRJobRunner

# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]

#.........这里部分代码省略.........
        Also translate jobconfs to the current Hadoop version, if necessary.
        """

        step = self._get_step(step_num)

        # _sort_values_jobconf() isn't relevant to Spark,
        # but it doesn't do any harm either

        jobconf = combine_dicts(self._sort_values_jobconf(),
                                self._opts['jobconf'],
                                step.get('jobconf'))

        # if user is using the wrong jobconfs, add in the correct ones
        # and log a warning
        hadoop_version = self.get_hadoop_version()
        if hadoop_version:
            jobconf = translate_jobconf_dict(jobconf, hadoop_version)

        return jobconf

    def _sort_values_jobconf(self):
        """Jobconf dictionary to enable sorting by value.
        """
        if not self._sort_values:
            return {}

        # translate _SORT_VALUES_JOBCONF to the correct Hadoop version,
        # without logging a warning
        hadoop_version = self.get_hadoop_version()

        jobconf = {}
        for k, v in _SORT_VALUES_JOBCONF.items():
            if hadoop_version:
                jobconf[translate_jobconf(k, hadoop_version)] = v
            else:
                for j in translate_jobconf_for_all_versions(k):
                    jobconf[j] = v

        return jobconf

    def _sort_values_partitioner(self):
        """Partitioner to use with *sort_values* keyword to the constructor."""
        if self._sort_values:
            return _SORT_VALUES_PARTITIONER
        else:
            return None

    def _parse_setup_and_py_files(self):
        """Parse the *setup* option with
        :py:func:`mrjob.setup.parse_setup_cmd()`, and patch in *py_files*.
        """
        setup = []

        # py_files
        for path in self._opts['py_files']:
            # Spark (at least v1.3.1) doesn't work with # and --py-files,
            # see #1375
            if '#' in path:
                raise ValueError("py_files cannot contain '#'")
            path_dict = parse_legacy_hash_path('file', path)
            setup.append(['export PYTHONPATH=', path_dict, ':$PYTHONPATH'])

        # setup
        for cmd in self._opts['setup']:
            setup.append(parse_setup_cmd(cmd))

        return setup

    def _upload_args(self):
        # just upload every file and archive in the working dir manager
        return self._upload_args_helper('-files', None, '-archives', None)

    def _upload_args_helper(
            self, files_opt_str, files, archives_opt_str, archives):
        args = []

        file_hash_paths = list(self._arg_hash_paths('file', files))
        if file_hash_paths:
            args.append(files_opt_str)
            args.append(','.join(file_hash_paths))

        archive_hash_paths = list(self._arg_hash_paths('archive', archives))
        if archive_hash_paths:
            args.append(archives_opt_str)
            args.append(','.join(archive_hash_paths))

        return args

    def _arg_hash_paths(self, type, named_paths=None):
        """Helper function for the *upload_args methods."""
        if named_paths is None:
            # just return everything managed by _working_dir_mgr
            named_paths = sorted(
                self._working_dir_mgr.name_to_path(type).items())

        for name, path in named_paths:
            if not name:
                name = self._working_dir_mgr.name(type, path)
            uri = self._upload_mgr.uri(path)
            yield '%s#%s' % (uri, name)
开发者ID:okomestudio,项目名称:mrjob,代码行数:104,代码来源:runner.py

示例13: HadoopInTheCloudJobRunner

# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]

#.........这里部分代码省略.........
    def _master_bootstrap_script_content(self, bootstrap):
        """Return a list containing the lines of the master bootstrap script.
        (without trailing newlines)
        """
        out = []

        # shebang, precommands
        out.extend(self._start_of_sh_script())
        out.append('')

        # store $PWD
        out.append('# store $PWD')
        out.append('__mrjob_PWD=$PWD')
        out.append('')

        # special case for PWD being in /, which happens on Dataproc
        # (really we should cd to tmp or something)
        out.append('if [ $__mrjob_PWD = "/" ]; then')
        out.append('  __mrjob_PWD=""')
        out.append('fi')
        out.append('')

        # run commands in a block so we can redirect stdout to stderr
        # (e.g. to catch errors from compileall). See #370
        out.append('{')

        # download files
        out.append('  # download files and mark them executable')

        cp_to_local = self._cp_to_local_cmd()

        # TODO: why bother with $__mrjob_PWD here, since we're already in it?
        for name, path in sorted(
                self._bootstrap_dir_mgr.name_to_path('file').items()):
            uri = self._upload_mgr.uri(path)
            out.append('')
            out.append('  %s %s $__mrjob_PWD/%s' %
                       (cp_to_local, pipes.quote(uri), pipes.quote(name)))
            # imitate Hadoop Distributed Cache (see #1602)
            out.append('  chmod u+rx $__mrjob_PWD/%s' % pipes.quote(name))
        out.append('')

        # download and unarchive archives
        archive_names_and_paths = sorted(
            self._bootstrap_dir_mgr.name_to_path('archive').items())
        if archive_names_and_paths:
            # make tmp dir if needed
            out.append('  # download and unpack archives')
            out.append('  __mrjob_TMP=$(mktemp -d)')
            out.append('')

            for name, path in archive_names_and_paths:
                uri = self._upload_mgr.uri(path)
                ext = file_ext(basename(path))

                # copy file to tmp dir
                quoted_archive_path = '$__mrjob_TMP/%s' % pipes.quote(name)

                out.append('  %s %s %s' % (
                    cp_to_local, pipes.quote(uri), quoted_archive_path))

                # unarchive file
                if ext not in _EXT_TO_UNARCHIVE_CMD:
                    raise KeyError('unknown archive file extension: %s' % path)
                unarchive_cmd = _EXT_TO_UNARCHIVE_CMD[ext]
开发者ID:okomestudio,项目名称:mrjob,代码行数:69,代码来源:cloud.py

示例14: HadoopInTheCloudJobRunner

# 需要导入模块: from mrjob.setup import WorkingDirManager [as 别名]
# 或者: from mrjob.setup.WorkingDirManager import name_to_path [as 别名]

#.........这里部分代码省略.........

        # shebang, precommands
        out.extend(self._start_of_sh_script())
        out.append('')

        # for example, create a tmp dir and cd to it
        if self._bootstrap_pre_commands():
            out.extend(self._bootstrap_pre_commands())
            out.append('')

        # store $PWD
        out.append('# store $PWD')
        out.append('__mrjob_PWD=$PWD')
        out.append('')

        # special case for PWD being in /, which happens on Dataproc
        # (really we should cd to tmp or something)
        out.append('if [ $__mrjob_PWD = "/" ]; then')
        out.append('  __mrjob_PWD=""')
        out.append('fi')
        out.append('')

        # run commands in a block so we can redirect stdout to stderr
        # (e.g. to catch errors from compileall). See #370
        out.append('{')

        # download files
        out.append('  # download files and mark them executable')

        cp_to_local = self._cp_to_local_cmd()

        # TODO: why bother with $__mrjob_PWD here, since we're already in it?
        for name, path in sorted(
                self._bootstrap_dir_mgr.name_to_path('file').items()):
            uri = self._upload_mgr.uri(path)
            out.append('  %s %s $__mrjob_PWD/%s' %
                       (cp_to_local, pipes.quote(uri), pipes.quote(name)))
            # imitate Hadoop Distributed Cache (see #1602)
            out.append('  chmod u+rx $__mrjob_PWD/%s' % pipes.quote(name))
            out.append('')

        # download and unarchive archives
        archive_names_and_paths = sorted(
            self._bootstrap_dir_mgr.name_to_path('archive').items())
        if archive_names_and_paths:
            # make tmp dir if needed
            out.append('  # download and unpack archives')
            out.append('  __mrjob_TMP=$(mktemp -d)')
            out.append('')

            for name, path in archive_names_and_paths:
                uri = self._upload_mgr.uri(path)
                ext = file_ext(basename(path))

                # copy file to tmp dir
                quoted_archive_path = '$__mrjob_TMP/%s' % pipes.quote(name)

                out.append('  %s %s %s' % (
                    cp_to_local, pipes.quote(uri), quoted_archive_path))

                # unarchive file
                if ext not in _EXT_TO_UNARCHIVE_CMD:
                    raise KeyError('unknown archive file extension: %s' % path)
                unarchive_cmd = _EXT_TO_UNARCHIVE_CMD[ext]

                out.append('  ' + unarchive_cmd % dict(
开发者ID:Affirm,项目名称:mrjob,代码行数:70,代码来源:cloud.py


注:本文中的mrjob.setup.WorkingDirManager.name_to_path方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。