当前位置: 首页>>代码示例>>Python>>正文


Python parse.parse_s3_uri函数代码示例

本文整理汇总了Python中mrjob.parse.parse_s3_uri函数的典型用法代码示例。如果您正苦于以下问题:Python parse_s3_uri函数的具体用法?Python parse_s3_uri怎么用?Python parse_s3_uri使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了parse_s3_uri函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_uri_parsing

 def test_uri_parsing(self):
     self.assertEqual(is_uri('notauri!'), False)
     self.assertEqual(is_uri('they://did/the/monster/mash'), True)
     self.assertEqual(is_s3_uri('s3://a/uri'), True)
     self.assertEqual(is_s3_uri('s3n://a/uri'), True)
     self.assertEqual(is_s3_uri('hdfs://a/uri'), False)
     self.assertEqual(parse_s3_uri('s3://bucket/loc'), ('bucket', 'loc'))
开发者ID:Asana,项目名称:mrjob,代码行数:7,代码来源:test_parse.py

示例2: _s3_ls

    def _s3_ls(self, uri):
        """Helper for ls(); doesn't bother with globbing or directories"""
        bucket_name, key_name = parse_s3_uri(uri)

        bucket = self.get_bucket(bucket_name)
        for key in bucket.list(key_name):
            yield s3_key_to_uri(key)
开发者ID:nilesh-molankar,项目名称:mrjob,代码行数:7,代码来源:s3.py

示例3: _get_s3_key

    def _get_s3_key(self, uri):
        """Get the boto3 s3.Object matching the given S3 uri, or
        return None if that key doesn't exist.

        uri is an S3 URI: ``s3://foo/bar``
        """
        bucket_name, key_name = parse_s3_uri(uri)
        return self.get_bucket(bucket_name).Object(key_name)
开发者ID:okomestudio,项目名称:mrjob,代码行数:8,代码来源:s3.py

示例4: _s3_ls

    def _s3_ls(self, uri):
        """Helper for ls(); doesn't bother with globbing or directories"""
        s3_conn = self.make_s3_conn()
        bucket_name, key_name = parse_s3_uri(uri)

        bucket = s3_conn.get_bucket(bucket_name, validate=VALIDATE_BUCKET)
        for key in bucket.list(key_name):
            yield s3_key_to_uri(key)
开发者ID:LXiong,项目名称:mrjob,代码行数:8,代码来源:s3.py

示例5: make_s3_key

    def make_s3_key(self, uri):
        """Create the given S3 key, and return the corresponding
        boto Key object.

        uri is an S3 URI: ``s3://foo/bar``
        """
        bucket_name, key_name = parse_s3_uri(uri)

        return self.get_bucket(bucket_name).new_key(key_name)
开发者ID:gitbenedict,项目名称:mrjob,代码行数:9,代码来源:s3.py

示例6: get_s3_keys

    def get_s3_keys(self, uri):
        """Get a stream of boto Key objects for each key inside
        the given dir on S3.

        uri is an S3 URI: ``s3://foo/bar``
        """
        bucket_name, key_prefix = parse_s3_uri(uri)
        bucket = self.get_bucket(bucket_name)
        for key in bucket.list(key_prefix):
            yield key
开发者ID:gitbenedict,项目名称:mrjob,代码行数:10,代码来源:s3.py

示例7: ls

    def ls(self, path_glob):
        """Recursively list files on S3.

        This doesn't list "directories" unless there's actually a
        corresponding key ending with a '/' (which is weird and confusing;
        don't make S3 keys ending in '/')

        To list a directory, path_glob must end with a trailing
        slash (foo and foo/ are different on S3)
        """

        log.debug("ls %s", path_glob)

        # clean up the  base uri to ensure we have an equal uri to boto (s3://)
        # just incase we get passed s3n://
        scheme = urlparse(path_glob).scheme

        # support globs
        glob_match = GLOB_RE.match(path_glob)

        # we're going to search for all keys starting with base_uri
        if glob_match:
            # cut it off at first wildcard
            base_uri = glob_match.group(1)
        else:
            base_uri = path_glob

        # Check if we're only going to get results by using a / on the end
        uris = self._s3_ls(base_uri)
        try:
            first = uris.next()
            uris = chain([first], uris)
        except (boto.exception.S3ResponseError, StopIteration):
            try:
                uris = self._s3_ls(base_uri.rstrip("/") + "/")
            except (boto.exception.S3ResponseError, StopIteration):
                return

        prev_uri = None
        for uri in uris:
            uri = "%s://%s/%s" % ((scheme,) + parse_s3_uri(uri))

            # enforce globbing
            if glob_match and not fnmatch.fnmatchcase(uri, path_glob):
                continue

            # If there are keys /data and /data/my_file then we consider there
            # to be a file /data, overriding there being a directory called
            # /data containing a file my_file. We discard /data/my_file.
            if prev_uri is not None and uri.startswith(prev_uri):
                continue

            yield uri
            prev_uri = uri.rstrip("/") + "/"
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:54,代码来源:s3.py

示例8: test_cleanup

    def test_cleanup(self):
        runner = EMRJobRunner(conf_paths=[], s3_sync_wait_time=0.01)

        # add some mock data and change last_modified
        remote_input_path = 's3://walrus/data/'
        self.add_mock_s3_data({'walrus': {'data/foo': 'foo\n',
                                        'data/bar': 'bar\n',
                                        'data/qux': 'qux\n'}})

        s3_conn = runner.make_s3_conn()
        bucket_name, key_name = parse_s3_uri(remote_input_path)
        bucket = s3_conn.get_bucket(bucket_name)

        key_foo = bucket.get_key('data/foo')
        key_bar = bucket.get_key('data/bar')
        key_qux = bucket.get_key('data/qux')
        key_bar.last_modified = datetime.now() - timedelta(days=45)
        key_qux.last_modified = datetime.now() - timedelta(hours=50)

        # make sure keys are there
        assert isinstance(key_foo, MockKey)
        assert isinstance(key_bar, MockKey)
        assert isinstance(key_qux, MockKey)

        s3_cleanup(remote_input_path, timedelta(days=30), dry_run=True,
                   conf_paths=[])

        # dry-run shouldn't delete anything
        assert isinstance(key_foo, MockKey)
        assert isinstance(key_bar, MockKey)
        assert isinstance(key_qux, MockKey)

        s3_cleanup(remote_input_path, timedelta(days=30), conf_paths=[])

        key_foo = bucket.get_key('data/foo')
        key_bar = bucket.get_key('data/bar')
        key_qux = bucket.get_key('data/qux')

        # make sure key_bar is deleted
        assert isinstance(key_foo, MockKey)
        self.assertEqual(key_bar, None)
        assert isinstance(key_qux, MockKey)

        s3_cleanup(remote_input_path, timedelta(hours=48), conf_paths=[])

        key_foo = bucket.get_key('data/foo')
        key_bar = bucket.get_key('data/bar')
        key_qux = bucket.get_key('data/qux')

        # make sure key_qux is deleted
        assert isinstance(key_foo, MockKey)
        self.assertEqual(key_bar, None)
        self.assertEqual(key_qux, None)
开发者ID:Anihc,项目名称:mrjob,代码行数:53,代码来源:test_s3_tmpwatch.py

示例9: get_s3_key

    def get_s3_key(self, uri, s3_conn=None):
        """Get the boto Key object matching the given S3 uri, or
        return None if that key doesn't exist.

        uri is an S3 URI: ``s3://foo/bar``

        You may optionally pass in an existing s3 connection through
        ``s3_conn``.
        """
        if not s3_conn:
            s3_conn = self.make_s3_conn()
        bucket_name, key_name = parse_s3_uri(uri)

        return s3_conn.get_bucket(bucket_name).get_key(key_name)
开发者ID:adaptivelab,项目名称:mrjob,代码行数:14,代码来源:s3.py

示例10: make_s3_key

    def make_s3_key(self, uri, s3_conn=None):
        """Create the given S3 key, and return the corresponding
        boto Key object.

        uri is an S3 URI: ``s3://foo/bar``

        You may optionally pass in an existing S3 connection through
        ``s3_conn``.
        """
        if not s3_conn:
            s3_conn = self.make_s3_conn()
        bucket_name, key_name = parse_s3_uri(uri)

        return s3_conn.get_bucket(bucket_name).new_key(key_name)
开发者ID:inncapsule,项目名称:mrjob,代码行数:14,代码来源:s3.py

示例11: get_s3_keys

    def get_s3_keys(self, uri, s3_conn=None):
        """Get a stream of boto Key objects for each key inside
        the given dir on S3.

        uri is an S3 URI: ``s3://foo/bar``

        You may optionally pass in an existing S3 connection through s3_conn
        """
        if not s3_conn:
            s3_conn = self.make_s3_conn()

        bucket_name, key_prefix = parse_s3_uri(uri)
        bucket = s3_conn.get_bucket(bucket_name)
        for key in bucket.list(key_prefix):
            yield key
开发者ID:inncapsule,项目名称:mrjob,代码行数:15,代码来源:s3.py

示例12: mkdir

    def mkdir(self, dest):
        """Make a directory. This doesn't actually create directories on S3
        (because there is no such thing), but it will create the corresponding
        bucket if it doesn't exist.
        """
        bucket_name, key_name = parse_s3_uri(dest)

        client = self.make_s3_client()

        try:
            client.head_bucket(Bucket=bucket_name)
        except botocore.exceptions.ClientError as ex:
            if _client_error_status(ex) != 404:
                raise

            self.create_bucket(bucket_name)
开发者ID:Yelp,项目名称:mrjob,代码行数:16,代码来源:s3.py

示例13: ls

    def ls(self, path_glob):
        """Recursively list files on S3.

        *path_glob* can include ``?`` to match single characters or
        ``*`` to match 0 or more characters. Both ``?`` and ``*`` can match
        ``/``.

        .. versionchanged:: 0.5.0

            You no longer need a trailing slash to list "directories" on S3;
            both ``ls('s3://b/dir')`` and `ls('s3://b/dir/')` will list
            all keys starting with ``dir/``.
        """

        # clean up the  base uri to ensure we have an equal uri to boto (s3://)
        # just in case we get passed s3n://
        scheme = urlparse(path_glob).scheme

        # support globs
        glob_match = GLOB_RE.match(path_glob)

        # we're going to search for all keys starting with base_uri
        if glob_match:
            # cut it off at first wildcard
            base_uri = glob_match.group(1)
        else:
            base_uri = path_glob

        bucket_name, base_name = parse_s3_uri(base_uri)

        # allow subdirectories of the path/glob
        if path_glob and not path_glob.endswith('/'):
            dir_glob = path_glob + '/*'
        else:
            dir_glob = path_glob + '*'

        bucket = self.get_bucket(bucket_name)
        for key in bucket.list(base_name):
            uri = "%s://%s/%s" % (scheme, bucket_name, key.name)

            # enforce globbing
            if not (fnmatch.fnmatchcase(uri, path_glob) or
                    fnmatch.fnmatchcase(uri, dir_glob)):
                continue

            yield uri
开发者ID:gitbenedict,项目名称:mrjob,代码行数:46,代码来源:s3.py

示例14: get_s3_key

    def get_s3_key(self, uri):
        """Get the boto Key object matching the given S3 uri, or
        return None if that key doesn't exist.

        uri is an S3 URI: ``s3://foo/bar``
        """
        bucket_name, key_name = parse_s3_uri(uri)

        try:
            bucket = self.get_bucket(bucket_name)
        except boto.exception.S3ResponseError as e:
            if e.status != 404:
                raise e
            key = None
        else:
            key = bucket.get_key(key_name)

        return key
开发者ID:gitbenedict,项目名称:mrjob,代码行数:18,代码来源:s3.py

示例15: get_s3_key

    def get_s3_key(self, uri, s3_conn=None):
        """Get the boto Key object matching the given S3 uri, or
        return None if that key doesn't exist.

        uri is an S3 URI: ``s3://foo/bar``

        You may optionally pass in an existing s3 connection through
        ``s3_conn``.
        """
        if not s3_conn:
            s3_conn = self.make_s3_conn()
        bucket_name, key_name = parse_s3_uri(uri)

        try:
            bucket = s3_conn.get_bucket(bucket_name)
        except boto.exception.S3ResponseError, e:
            if e.status != 404:
                raise e
            key = None
开发者ID:inncapsule,项目名称:mrjob,代码行数:19,代码来源:s3.py


注:本文中的mrjob.parse.parse_s3_uri函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。