当前位置: 首页>>代码示例>>Python>>正文


Python parse.urlparse函数代码示例

本文整理汇总了Python中mrjob.parse.urlparse函数的典型用法代码示例。如果您正苦于以下问题:Python urlparse函数的具体用法?Python urlparse怎么用?Python urlparse使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了urlparse函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: hadoop_fs_ls

def hadoop_fs_ls(stdout, stderr, environ, *args):
    """Implements hadoop fs -ls."""
    hdfs_path_globs = args or ['']

    failed = False
    for hdfs_path_glob in hdfs_path_globs:
        parsed = urlparse(hdfs_path_glob)
        scheme = parsed.scheme
        netloc = parsed.netloc

        real_path_glob = hdfs_path_to_real_path(hdfs_path_glob, environ)
        real_paths = glob.glob(real_path_glob)

        paths = []
        max_size = 0

        if not real_paths:
            print >> stderr, (
                'ls: Cannot access %s: No such file or directory.' %
                hdfs_path_glob)
            failed = True
        else:
            for real_path in real_paths:
                paths.append((real_path, scheme, netloc, 0))

        for path in paths:
            print >> stdout, _hadoop_ls_line(*path + (max_size, environ))

    if failed:
        return -1
    else:
        return 0
开发者ID:ENuge,项目名称:mrjob,代码行数:32,代码来源:mockhadoop.py

示例2: ls

    def ls(self, path_glob):
        components = urlparse(path_glob)
        hdfs_prefix = '%s://%s' % (components.scheme, components.netloc)

        try:
            stdout = self.invoke_hadoop(
                ['fs', '-lsr', path_glob],
                return_stdout=True,
                ok_stderr=[HADOOP_LSR_NO_SUCH_FILE])
        except CalledProcessError:
            raise IOError("Could not ls %s" % path_glob)

        path_index = None
        for line in StringIO(stdout):
            fields = line.rstrip('\r\n').split()

            # Throw out directories
            if fields[0].startswith('d'):
                continue

            # Try to figure out which part of the line is the path
            # Expected lines:
            # -rw-r--r--   3 dave users       3276 2010-01-13 14:00 /foo/bar # HDFS
            # -rwxrwxrwx   1          3276 010-01-13 14:00 /foo/bar # S3
            if not path_index:
                for index, field in enumerate(fields):
                    if len(field) == 5 and field[2] == ':':
                        path_index = (index + 1)
                if not path_index:
                    raise IOError("Could not locate path in string '%s'" % line)

            path = ' '.join(fields[path_index:])
            yield hdfs_prefix + path
开发者ID:bigo,项目名称:mrjob,代码行数:33,代码来源:hadoop.py

示例3: ls

    def ls(self, path_glob):
        if not is_uri(path_glob):
            for path in super(HadoopJobRunner, self).ls(path_glob):
                yield path
            return

        components = urlparse(path_glob)
        hdfs_prefix = '%s://%s' % (components.scheme, components.netloc)

        stdout = self._invoke_hadoop(
            ['fs', '-lsr', path_glob],
            return_stdout=True,
            ok_stderr=[HADOOP_LSR_NO_SUCH_FILE])

        for line in StringIO(stdout):
            fields = line.rstrip('\r\n').split()
            # expect lines like:
            # -rw-r--r--   3 dave users       3276 2010-01-13 14:00 /foo/bar
            if len(fields) < 8:
                raise Exception('unexpected ls line from hadoop: %r' % line)
            # ignore directories
            if fields[0].startswith('d'):
                continue
            # not sure if you can have spaces in filenames; just to be safe
            path = ' '.join(fields[7:])
            yield hdfs_prefix + path
开发者ID:BrandonHaynes,项目名称:mrjob,代码行数:26,代码来源:hadoop.py

示例4: hadoop_fs_lsr

def hadoop_fs_lsr(stdout, stderr, environ, *args):
    """Implements hadoop fs -lsr."""
    hdfs_path_globs = args or ['']

    def ls_line(real_path, scheme, netloc):
        hdfs_path = real_path_to_hdfs_path(real_path, environ)

        # we could actually implement ls here, but mrjob only cares about
        # the path
        if os.path.isdir(real_path):
            file_type = 'd'
        else:
            file_type = '-'

        if scheme in ('s3', 's3n'):
            # no user and group on S3 (see Pull Request #573)
            user_and_group = ''
        else:
            user_and_group = 'dave supergroup'

        # newer Hadoop returns fully qualified URIs (see Pull Request #577)
        if scheme and environ.get('MOCK_HADOOP_LS_RETURNS_FULL_URIS'):
            hdfs_path = '%s://%s%s' % (scheme, netloc, hdfs_path)

        return (
            '%srwxrwxrwx - %s      18321 2010-10-01 15:16 %s' %
            (file_type, user_and_group, hdfs_path))

    failed = False
    for hdfs_path_glob in hdfs_path_globs:
        parsed = urlparse(hdfs_path_glob)
        scheme = parsed.scheme
        netloc = parsed.netloc

        real_path_glob = hdfs_path_to_real_path(hdfs_path_glob, environ)
        real_paths = glob.glob(real_path_glob)
        if not real_paths:
            print >> stderr, (
                'lsr: Cannot access %s: No such file or directory.' %
                hdfs_path_glob)
            failed = True
        else:
            for real_path in real_paths:
                if os.path.isdir(real_path):
                    for dirpath, dirnames, filenames in os.walk(real_path):
                        print >> stdout, ls_line(dirpath, scheme, netloc)
                        for filename in filenames:
                            path = os.path.join(dirpath, filename)
                            print >> stdout, ls_line(path, scheme, netloc)
                else:
                    print >> stdout, ls_line(real_path, scheme, netloc)

    if failed:
        return -1
    else:
        return 0
开发者ID:Infolaber,项目名称:mrjob,代码行数:56,代码来源:mockhadoop.py

示例5: _ls_detailed

    def _ls_detailed(self, path_glob):
        """Recursively list files on GCS and includes some metadata about them:
        - object name
        - size
        - md5 hash
        - _uri

        *path_glob* can include ``?`` to match single characters or
        ``*`` to match 0 or more characters. Both ``?`` and ``*`` can match
        ``/``.
        """

        scheme = urlparse(path_glob).scheme

        bucket_name, base_name = _path_glob_to_parsed_gcs_uri(path_glob)

        # allow subdirectories of the path/glob
        if path_glob and not path_glob.endswith('/'):
            dir_glob = path_glob + '/*'
        else:
            dir_glob = path_glob + '*'

        list_request = self.api_client.objects().list(
            bucket=bucket_name, prefix=base_name, fields=_LS_FIELDS_TO_RETURN)

        uri_prefix = '%s://%s' % (scheme, bucket_name)
        while list_request:
            try:
                resp = list_request.execute()
            except google_errors.HttpError as e:
                if e.resp.status == 404:
                    return

                raise

            resp_items = resp.get('items') or []
            for item in resp_items:
                # We generate the item URI by adding the "gs://" prefix
                uri = "%s/%s" % (uri_prefix, item['name'])

                # enforce globbing
                if not (fnmatch.fnmatchcase(uri, path_glob) or
                        fnmatch.fnmatchcase(uri, dir_glob)):
                    continue

                # filter out folders
                if uri.endswith('/'):
                    continue

                item['_uri'] = uri
                item['bucket'] = bucket_name
                item['size'] = int(item['size'])
                yield item

            list_request = self.api_client.objects().list_next(
                list_request, resp)
开发者ID:Jeremyfanfan,项目名称:mrjob,代码行数:56,代码来源:gcs.py

示例6: hdfs_path_to_real_path

def hdfs_path_to_real_path(hdfs_path, environ):
    components = urlparse(hdfs_path)

    scheme = components.scheme
    path = components.path

    if not scheme and not path.startswith("/"):
        path = "/user/%s/%s" % (environ["USER"], path)

    return os.path.join(environ["MOCK_HDFS_ROOT"], path.lstrip("/"))
开发者ID:ashleymiller,项目名称:mrjob,代码行数:10,代码来源:mockhadoop.py

示例7: ls

    def ls(self, path_glob):
        components = urlparse(path_glob)
        hdfs_prefix = '%s://%s' % (components.scheme, components.netloc)

        version = self.get_hadoop_version()

        # use ls -R on Hadoop 2 (see #1152)
        if uses_yarn(version):
            args = ['fs', '-ls', '-R', path_glob]
        else:
            args = ['fs', '-lsr', path_glob]

        try:
            stdout = self.invoke_hadoop(args, return_stdout=True,
                                        ok_stderr=[_HADOOP_LS_NO_SUCH_FILE])
        except CalledProcessError:
            raise IOError("Could not ls %s" % path_glob)

        for line in BytesIO(stdout):
            line = line.rstrip(b'\r\n')

            # ignore total item count
            if line.startswith(b'Found '):
                continue

            fields = line.split(b' ')

            # Throw out directories
            if fields[0].startswith(b'd'):
                continue

            # Try to figure out which part of the line is the path
            # Expected lines:
            #
            # HDFS:
            # -rw-r--r--   3 dave users       3276 2010-01-13 14:00 /foo/bar
            #
            # S3:
            # -rwxrwxrwx   1          3276 010-01-13 14:00 /foo/bar
            path_index = None
            for index, field in enumerate(fields):
                # look for time field, and pick one after that
                # (can't use field[2] because that's an int in Python 3)
                if len(field) == 5 and field[2:3] == b':':
                    path_index = (index + 1)
            if not path_index:
                raise IOError("Could not locate path in string %r" % line)

            path = to_unicode(line.split(b' ', path_index)[-1])
            # handle fully qualified URIs from newer versions of Hadoop ls
            # (see Pull Request #577)
            if is_uri(path):
                yield path
            else:
                yield hdfs_prefix + path
开发者ID:Yelp,项目名称:mrjob,代码行数:55,代码来源:hadoop.py

示例8: copy_from_local

    def copy_from_local(self, path, local_file):
        # Ensure that local_file has a file:/// at the beginning...
        local_file = urlparse(local_file)
        assert local_file.scheme in ('', 'test'), "local_file must be local"
        assert os.path.exists(local_file.path), "local_file must exist"
        local_file = urlunparse(['file'] + list(local_file[1:]))

        try:
            self.invoke_hadoop(['fs', '-put', local_file, path])
        except CalledProcessError as e:
            raise OSError("Could not create file: %s" % e)
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:11,代码来源:hadoop.py

示例9: hdfs_uri_to_real_path

def hdfs_uri_to_real_path(hdfs_uri, environ):
    """Map an HDFS URI to a path on the filesystem."""
    components = urlparse(hdfs_uri)

    scheme = components.scheme
    path = components.path

    if not scheme and not path.startswith('/'):
        path = '/user/%s/%s' % (environ['USER'], path)

    return os.path.join(get_mock_hdfs_root(environ=environ), path.lstrip('/'))
开发者ID:gitbenedict,项目名称:mrjob,代码行数:11,代码来源:mockhadoop.py

示例10: _hadoop_fs_ls

def _hadoop_fs_ls(cmd_name, stdout, stderr, environ, path_args, recursive):
    """Helper for hadoop_fs_ls() and hadoop_fs_lsr()."""
    hdfs_path_globs = path_args or ['']

    failed = False
    for hdfs_path_glob in hdfs_path_globs:
        parsed = urlparse(hdfs_path_glob)
        scheme = parsed.scheme
        netloc = parsed.netloc

        real_path_glob = hdfs_path_to_real_path(hdfs_path_glob, environ)
        real_paths = glob.glob(real_path_glob)

        paths = []

        if not real_paths:
            print('%s: Cannot access %s: No such file or directory.' %
                  (cmd_name, hdfs_path_glob), file=stderr)
            failed = True
        else:
            for real_path in real_paths:
                if os.path.isdir(real_path):
                    if recursive:
                        for dirpath, dirnames, filenames in os.walk(real_path):
                            paths.append((dirpath, scheme, netloc, 0))
                            for filename in filenames:
                                path = os.path.join(dirpath, filename)
                                size = os.path.getsize(path)
                                paths.append((path, scheme, netloc, size))
                    else:
                        for filename in os.listdir(real_path):
                            path = os.path.join(real_path, filename)
                            if os.path.isdir(path):
                                size = 0
                            else:
                                size = os.path.getsize(path)
                            paths.append((path, scheme, netloc, size))
                else:
                    size = os.path.getsize(real_path)
                    paths.append((real_path, scheme, netloc, size))

        if paths:
            print('Found %d items' % len(paths), file=stdout)
            max_size = max(size for _, __, ___, size in paths)
            for path in paths:
                print(_hadoop_ls_line(*path + (max_size, environ)),
                      file=stdout)

    if failed:
        return -1
    else:
        return 0
开发者ID:kodizant,项目名称:mrjob,代码行数:52,代码来源:mockhadoop.py

示例11: parse_gcs_uri

def parse_gcs_uri(uri):
    """Parse a GCS URI into (bucket, key)

    >>> parse_gcs_uri("gs://walrus/tmp/")
    ('walrus', 'tmp/')

    If ``uri`` is not a GCS URI, raise a ValueError
    """
    components = urlparse(uri)
    if components.scheme != "gs" or '/' not in components.path:
        raise ValueError('Invalid GCS URI: %s' % uri)

    return components.netloc, components.path[1:]
开发者ID:Jeremyfanfan,项目名称:mrjob,代码行数:13,代码来源:gcs.py

示例12: join

    def join(self, path, *paths):
        """Join *paths* onto *path* (which may be a URI)"""
        all_paths = (path,) + paths

        # if there's a URI, we only care about it and what follows
        for i in range(len(all_paths), 0, -1):
            if is_uri(all_paths[i - 1]):
                scheme, netloc, uri_path = urlparse(all_paths[i - 1])[:3]
                return '%s://%s%s' % (
                    scheme, netloc, posixpath.join(
                        uri_path or '/', *all_paths[i:]))
        else:
            return os.path.join(*all_paths)
开发者ID:Dean838,项目名称:mrjob,代码行数:13,代码来源:base.py

示例13: hadoop_fs_put

def hadoop_fs_put(stdout, stderr, environ, *args):
    """Implements hadoop fs -put"""
    if len(args) < 2:
        stderr.write('Usage: java FsShell [-put <localsrc> ... <dst>]')
        return -1

    srcs = args[:-1]
    dst = args[-1]

    real_dst = hdfs_path_to_real_path(dst, environ)
    dst_dir = os.path.isdir(real_dst)
    real_dir = os.path.dirname(real_dst)

    # dst could be a dir or a filename; we don't know
    if not dst_dir and not os.path.isdir(real_dir):
        os.makedirs(real_dir)

    skipped = False

    for src in srcs:
        # If the destination is a directory then we put the source into it
        # under its basename. If the destination is a file or does not exist
        # then this is where we wish to write to.
        target = os.path.join(real_dst, os.path.basename(src)) \
            if dst_dir else real_dst

        if os.path.exists(target):
            if os.path.isdir(src):
                stderr.write("Target %s is a directory" %
                             real_path_to_hdfs_path(target, environ))
            else:
                stderr.write("Target %s already exists" %
                             real_path_to_hdfs_path(target, environ))
            skipped = True
            continue

        src_url = urlparse(src)
        if src_url.scheme in ('file', ''):
            src = src_url.path
        else:
            raise ValueError("hadoop fs -put mock supports only empty or "
                             "'file' schemes for input: %s" % src)

        shutil.copy(src, real_dst)

    return 255 if skipped else 0
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:46,代码来源:mockhadoop.py

示例14: ls

    def ls(self, path_glob):
        """Recursively list files on S3.

        *path_glob* can include ``?`` to match single characters or
        ``*`` to match 0 or more characters. Both ``?`` and ``*`` can match
        ``/``.

        .. versionchanged:: 0.5.0

            You no longer need a trailing slash to list "directories" on S3;
            both ``ls('s3://b/dir')`` and `ls('s3://b/dir/')` will list
            all keys starting with ``dir/``.
        """

        # clean up the  base uri to ensure we have an equal uri to boto (s3://)
        # just in case we get passed s3n://
        scheme = urlparse(path_glob).scheme

        # support globs
        glob_match = GLOB_RE.match(path_glob)

        # we're going to search for all keys starting with base_uri
        if glob_match:
            # cut it off at first wildcard
            base_uri = glob_match.group(1)
        else:
            base_uri = path_glob

        bucket_name, base_name = parse_s3_uri(base_uri)

        # allow subdirectories of the path/glob
        if path_glob and not path_glob.endswith('/'):
            dir_glob = path_glob + '/*'
        else:
            dir_glob = path_glob + '*'

        bucket = self.get_bucket(bucket_name)
        for key in bucket.list(base_name):
            uri = "%s://%s/%s" % (scheme, bucket_name, key.name)

            # enforce globbing
            if not (fnmatch.fnmatchcase(uri, path_glob) or
                    fnmatch.fnmatchcase(uri, dir_glob)):
                continue

            yield uri
开发者ID:gitbenedict,项目名称:mrjob,代码行数:46,代码来源:s3.py

示例15: test_urlparse

 def test_urlparse(self):
     self.assertEqual(urlparse('http://www.yelp.com/lil_brudder'),
                      ('http', 'www.yelp.com', '/lil_brudder', '', '', ''))
     self.assertEqual(urlparse('cant://touch/this'),
                      ('cant', 'touch', '/this', '', '', ''))
     self.assertEqual(urlparse('s3://bucket/path'),
                      ('s3', 'bucket', '/path', '', '', ''))
     self.assertEqual(urlparse('s3://bucket/path#customname'),
                      ('s3', 'bucket', '/path', '', '', 'customname'))
     self.assertEqual(urlparse('s3://bucket'),
                      ('s3', 'bucket', '', '', '', ''))
     self.assertEqual(urlparse('s3://bucket/'),
                      ('s3', 'bucket', '/', '', '', ''))
开发者ID:Affirm,项目名称:mrjob,代码行数:13,代码来源:test_parse.py


注:本文中的mrjob.parse.urlparse函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。