当前位置: 首页>>代码示例>>Python>>正文


Python parse.is_uri函数代码示例

本文整理汇总了Python中mrjob.parse.is_uri函数的典型用法代码示例。如果您正苦于以下问题:Python is_uri函数的具体用法?Python is_uri怎么用?Python is_uri使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了is_uri函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_uri_parsing

 def test_uri_parsing(self):
     self.assertEqual(is_uri('notauri!'), False)
     self.assertEqual(is_uri('they://did/the/monster/mash'), True)
     self.assertEqual(is_s3_uri('s3://a/uri'), True)
     self.assertEqual(is_s3_uri('s3n://a/uri'), True)
     self.assertEqual(is_s3_uri('hdfs://a/uri'), False)
     self.assertEqual(parse_s3_uri('s3://bucket/loc'), ('bucket', 'loc'))
开发者ID:Asana,项目名称:mrjob,代码行数:7,代码来源:test_parse.py

示例2: join

 def join(self, dirname, filename):
     """Join *filename* onto *dirname* (which may be a URI)"""
     if is_uri(filename):
         return filename
     elif is_uri(dirname):
         return posixpath.join(dirname, filename)
     else:
         return os.path.join(dirname, filename)
开发者ID:kartheek6,项目名称:mrjob,代码行数:8,代码来源:base.py

示例3: test_spark_master_mesos

    def test_spark_master_mesos(self):
        runner = SparkMRJobRunner(spark_master='mesos://host:12345')

        self.assertTrue(is_uri(runner._spark_tmp_dir))
        self.assertEqual(runner._spark_tmp_dir[:8], 'hdfs:///')

        self.assertIsNotNone(runner._upload_mgr)
开发者ID:Affirm,项目名称:mrjob,代码行数:7,代码来源:test_runner.py

示例4: test_default

    def test_default(self):
        runner = SparkMRJobRunner()

        self.assertFalse(is_uri(runner._spark_tmp_dir))
        self.assertIsNone(runner._upload_mgr)

        self.assertEqual(runner._spark_tmp_dir[-6:], '-spark')
开发者ID:Affirm,项目名称:mrjob,代码行数:7,代码来源:test_runner.py

示例5: ls

    def ls(self, path_glob):
        if not is_uri(path_glob):
            for path in super(HadoopJobRunner, self).ls(path_glob):
                yield path
            return

        components = urlparse(path_glob)
        hdfs_prefix = '%s://%s' % (components.scheme, components.netloc)

        stdout = self._invoke_hadoop(
            ['fs', '-lsr', path_glob],
            return_stdout=True,
            ok_stderr=[HADOOP_LSR_NO_SUCH_FILE])

        for line in StringIO(stdout):
            fields = line.rstrip('\r\n').split()
            # expect lines like:
            # -rw-r--r--   3 dave users       3276 2010-01-13 14:00 /foo/bar
            if len(fields) < 8:
                raise Exception('unexpected ls line from hadoop: %r' % line)
            # ignore directories
            if fields[0].startswith('d'):
                continue
            # not sure if you can have spaces in filenames; just to be safe
            path = ' '.join(fields[7:])
            yield hdfs_prefix + path
开发者ID:BrandonHaynes,项目名称:mrjob,代码行数:26,代码来源:hadoop.py

示例6: _cat_file

    def _cat_file(self, filename):
        if is_uri(filename):
            # stream from HDFS
            cat_args = self._opts['hadoop_bin'] + ['fs', '-cat', filename]
            log.debug('> %s' % cmd_line(cat_args))

            cat_proc = Popen(cat_args, stdout=PIPE, stderr=PIPE)

            def stream():
                for line in cat_proc.stdout:
                    yield line

                # there shouldn't be any stderr
                for line in cat_proc.stderr:
                    log.error('STDERR: ' + line)

                returncode = cat_proc.wait()

                if returncode != 0:
                    raise CalledProcessError(returncode, cat_args)

            return read_file(filename, stream())
        else:
            # read from local filesystem
            return super(HadoopJobRunner, self)._cat_file(filename)
开发者ID:BrandonHaynes,项目名称:mrjob,代码行数:25,代码来源:hadoop.py

示例7: _setup_input

    def _setup_input(self):
        """Copy local input files (if any) to a special directory on HDFS.

        Set self._hdfs_input_files
        """
        # winnow out HDFS files from local ones
        self._hdfs_input_files = []
        local_input_files = []

        for path in self._input_paths:
            if is_uri(path):
                # Don't even bother running the job if the input isn't there.
                if not self.ls(path):
                    raise AssertionError(
                        'Input path %s does not exist!' % (path,))
                self._hdfs_input_files.append(path)
            else:
                local_input_files.append(path)

        # copy local files into an input directory, with names like
        # 00000-actual_name.ext
        if local_input_files:
            hdfs_input_dir = posixpath.join(self._hdfs_tmp_dir, 'input')
            log.info('Uploading input to %s' % hdfs_input_dir)
            self._mkdir_on_hdfs(hdfs_input_dir)

            for i, path in enumerate(local_input_files):
                if path == '-':
                    path = self._dump_stdin_to_local_file()

                target = '%s/%05i-%s' % (
                    hdfs_input_dir, i, os.path.basename(path))
                self._upload_to_hdfs(path, target)

            self._hdfs_input_files.append(hdfs_input_dir)
开发者ID:AnthonyNystrom,项目名称:mrjob,代码行数:35,代码来源:hadoop.py

示例8: test_spark_master_yarn

    def test_spark_master_yarn(self):
        runner = SparkMRJobRunner(spark_master='yarn')

        self.assertTrue(is_uri(runner._spark_tmp_dir))
        self.assertEqual(runner._spark_tmp_dir[:8], 'hdfs:///')

        self.assertIsNotNone(runner._upload_mgr)
开发者ID:Affirm,项目名称:mrjob,代码行数:7,代码来源:test_runner.py

示例9: _create_input_manifest_if_needed

    def _create_input_manifest_if_needed(self):
        """Create a file with a list of URIs of input files."""
        if self._input_manifest_path or not self._uses_input_manifest():
            return

        uris = []

        log.info('finding input files to add to manifest...')

        for path in self._get_input_paths():
            log.debug('  in %s' % path)
            if is_uri(path):
                # URIs might be globs
                for uri in self.fs.ls(path):
                    uris.append(uri)
            else:
                # local paths are expected to be single files
                # (shell would resolve globs)
                if self._upload_mgr:
                    uris.append(self._upload_mgr.uri(path))
                else:
                    # just make sure job can find files from it's working dir
                    uris.append(os.path.abspath(path))

        log.info('found %d input files' % len(uris))

        path = os.path.join(self._get_local_tmp_dir(), 'input-manifest.txt')
        self._write_script(uris, path, 'input manifest')

        self._input_manifest_path = path
        if self._upload_mgr:
            self._upload_mgr.add(self._input_manifest_path)
开发者ID:Affirm,项目名称:mrjob,代码行数:32,代码来源:runner.py

示例10: fully_qualify_hdfs_path

def fully_qualify_hdfs_path(path):
    """If path isn't an ``hdfs://`` URL, turn it into one."""
    if is_uri(path):
        return path
    elif path.startswith('/'):
        return 'hdfs://' + path
    else:
        return 'hdfs:///user/%s/%s' % (getpass.getuser(), path)
开发者ID:Infolaber,项目名称:mrjob,代码行数:8,代码来源:hadoop.py

示例11: uri

    def uri(self, path):
        """Get the URI for the given path. If *path* is a URI, just return it.
        """
        if (not os.path.exists(path)) and is_uri(path):
            return path

        if path in self._path_to_name:
            return posixpath.join(self.prefix, self._path_to_name[path])
        else:
            raise ValueError('%r is not a URI or a known local file' % (path,))
开发者ID:eyecat,项目名称:mrjob,代码行数:10,代码来源:setup.py

示例12: ls

    def ls(self, path_glob):
        components = urlparse(path_glob)
        hdfs_prefix = '%s://%s' % (components.scheme, components.netloc)

        version = self.get_hadoop_version()

        # use ls -R on Hadoop 2 (see #1152)
        if uses_yarn(version):
            args = ['fs', '-ls', '-R', path_glob]
        else:
            args = ['fs', '-lsr', path_glob]

        try:
            stdout = self.invoke_hadoop(args, return_stdout=True,
                                        ok_stderr=[_HADOOP_LS_NO_SUCH_FILE])
        except CalledProcessError:
            raise IOError("Could not ls %s" % path_glob)

        for line in BytesIO(stdout):
            line = line.rstrip(b'\r\n')

            # ignore total item count
            if line.startswith(b'Found '):
                continue

            fields = line.split(b' ')

            # Throw out directories
            if fields[0].startswith(b'd'):
                continue

            # Try to figure out which part of the line is the path
            # Expected lines:
            #
            # HDFS:
            # -rw-r--r--   3 dave users       3276 2010-01-13 14:00 /foo/bar
            #
            # S3:
            # -rwxrwxrwx   1          3276 010-01-13 14:00 /foo/bar
            path_index = None
            for index, field in enumerate(fields):
                # look for time field, and pick one after that
                # (can't use field[2] because that's an int in Python 3)
                if len(field) == 5 and field[2:3] == b':':
                    path_index = (index + 1)
            if not path_index:
                raise IOError("Could not locate path in string %r" % line)

            path = to_unicode(line.split(b' ', path_index)[-1])
            # handle fully qualified URIs from newer versions of Hadoop ls
            # (see Pull Request #577)
            if is_uri(path):
                yield path
            else:
                yield hdfs_prefix + path
开发者ID:Yelp,项目名称:mrjob,代码行数:55,代码来源:hadoop.py

示例13: _endpoint_url

def _endpoint_url(host_or_uri):
    """If *host_or_uri* is non-empty and isn't a URI, prepend ``'https://'``.

    Otherwise, pass through as-is.
    """
    if not host_or_uri:
        return host_or_uri
    elif is_uri(host_or_uri):
        return host_or_uri
    else:
        return 'https://' + host_or_uri
开发者ID:okomestudio,项目名称:mrjob,代码行数:11,代码来源:s3.py

示例14: path_exists

    def path_exists(self, path_glob):
        """Does the given path exist?

        If dest is a directory (ends with a "/"), we check if there are
        any files starting with that path.
        """
        if not is_uri(path_glob):
            return super(HadoopJobRunner, self).path_exists(path_glob)

        return bool(self._invoke_hadoop(['fs', '-test', '-e', path_glob],
                                        ok_returncodes=(0, 1)))
开发者ID:BrandonHaynes,项目名称:mrjob,代码行数:11,代码来源:hadoop.py

示例15: test_copy_files_with_rename_to_remote_wd_mirror

    def test_copy_files_with_rename_to_remote_wd_mirror(self):
        self.add_mock_s3_data({'walrus': {'fish': b'salmon',
                                          'fowl': b'goose'}})

        foe_path = self.makefile('foe', b'giant')

        run_spark_submit = self.start(patch(
            'mrjob.bin.MRJobBinRunner._run_spark_submit',
            return_value=0))

        job = MRSparkOSWalk(['-r', 'spark',
                             '--spark-master', 'mesos://host:9999',
                             '--spark-tmp-dir', 's3://walrus/tmp',
                             '--file', 's3://walrus/fish#ghoti',
                             '--file', 's3://walrus/fowl',
                             '--file', foe_path])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

            # check working dir mirror
            wd_mirror = runner._wd_mirror()
            fs = runner.fs

            self.assertIsNotNone(wd_mirror)
            self.assertTrue(is_uri(wd_mirror))

            self.assertTrue(fs.exists(wd_mirror))
            # uploaded for rename
            self.assertTrue(fs.exists(fs.join(wd_mirror, 'ghoti')))
            # wrong name
            self.assertFalse(fs.exists(fs.join(wd_mirror, 'fish')))
            # no need to upload, already visible
            self.assertFalse(fs.exists(fs.join(wd_mirror, 'fowl')))
            # need to upload from local to remote
            self.assertTrue(fs.exists(fs.join(wd_mirror, 'foe')))

            run_spark_submit.assert_called_once_with(
                ANY, ANY, record_callback=ANY)

            spark_submit_args = run_spark_submit.call_args[0][0]
            self.assertIn('--files', spark_submit_args)
            files_arg = spark_submit_args[
                spark_submit_args.index('--files') + 1]

            self.assertEqual(
                files_arg, ','.join([
                    fs.join(wd_mirror, 'foe'),
                    's3://walrus/fowl',
                    fs.join(wd_mirror, 'ghoti'),
                    fs.join(wd_mirror, 'mr_spark_os_walk.py'),
                ]))
开发者ID:Yelp,项目名称:mrjob,代码行数:53,代码来源:test_runner.py


注:本文中的mrjob.parse.is_uri函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。