Python parse.parse_s3_uri函数代码示例

本文整理汇总了Python中mrjob.parse.parse_s3_uri函数的典型用法代码示例。如果您正苦于以下问题：Python parse_s3_uri函数的具体用法？Python parse_s3_uri怎么用？Python parse_s3_uri使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了parse_s3_uri函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_uri_parsing

 def test_uri_parsing(self):
     self.assertEqual(is_uri('notauri!'), False)
     self.assertEqual(is_uri('they://did/the/monster/mash'), True)
     self.assertEqual(is_s3_uri('s3://a/uri'), True)
     self.assertEqual(is_s3_uri('s3n://a/uri'), True)
     self.assertEqual(is_s3_uri('hdfs://a/uri'), False)
     self.assertEqual(parse_s3_uri('s3://bucket/loc'), ('bucket', 'loc'))

开发者ID:Asana，项目名称:mrjob，代码行数:7，代码来源:test_parse.py

示例2: _s3_ls

    def _s3_ls(self, uri):
        """Helper for ls(); doesn't bother with globbing or directories"""
        bucket_name, key_name = parse_s3_uri(uri)

        bucket = self.get_bucket(bucket_name)
        for key in bucket.list(key_name):
            yield s3_key_to_uri(key)

开发者ID:nilesh-molankar，项目名称:mrjob，代码行数:7，代码来源:s3.py

示例3: _get_s3_key

    def _get_s3_key(self, uri):
        """Get the boto3 s3.Object matching the given S3 uri, or
        return None if that key doesn't exist.

        uri is an S3 URI: ``s3://foo/bar``
        """
        bucket_name, key_name = parse_s3_uri(uri)
        return self.get_bucket(bucket_name).Object(key_name)

开发者ID:okomestudio，项目名称:mrjob，代码行数:8，代码来源:s3.py

示例4: _s3_ls

    def _s3_ls(self, uri):
        """Helper for ls(); doesn't bother with globbing or directories"""
        s3_conn = self.make_s3_conn()
        bucket_name, key_name = parse_s3_uri(uri)

        bucket = s3_conn.get_bucket(bucket_name, validate=VALIDATE_BUCKET)
        for key in bucket.list(key_name):
            yield s3_key_to_uri(key)

开发者ID:LXiong，项目名称:mrjob，代码行数:8，代码来源:s3.py

示例5: make_s3_key

    def make_s3_key(self, uri):
        """Create the given S3 key, and return the corresponding
        boto Key object.

        uri is an S3 URI: ``s3://foo/bar``
        """
        bucket_name, key_name = parse_s3_uri(uri)

        return self.get_bucket(bucket_name).new_key(key_name)

开发者ID:gitbenedict，项目名称:mrjob，代码行数:9，代码来源:s3.py

示例6: get_s3_keys

    def get_s3_keys(self, uri):
        """Get a stream of boto Key objects for each key inside
        the given dir on S3.

        uri is an S3 URI: ``s3://foo/bar``
        """
        bucket_name, key_prefix = parse_s3_uri(uri)
        bucket = self.get_bucket(bucket_name)
        for key in bucket.list(key_prefix):
            yield key

开发者ID:gitbenedict，项目名称:mrjob，代码行数:10，代码来源:s3.py

示例7: ls

    def ls(self, path_glob):
        """Recursively list files on S3.

        This doesn't list "directories" unless there's actually a
        corresponding key ending with a '/' (which is weird and confusing;
        don't make S3 keys ending in '/')

        To list a directory, path_glob must end with a trailing
        slash (foo and foo/ are different on S3)
        """

        log.debug("ls %s", path_glob)

        # clean up the  base uri to ensure we have an equal uri to boto (s3://)
        # just incase we get passed s3n://
        scheme = urlparse(path_glob).scheme

        # support globs
        glob_match = GLOB_RE.match(path_glob)

        # we're going to search for all keys starting with base_uri
        if glob_match:
            # cut it off at first wildcard
            base_uri = glob_match.group(1)
        else:
            base_uri = path_glob

        # Check if we're only going to get results by using a / on the end
        uris = self._s3_ls(base_uri)
        try:
            first = uris.next()
            uris = chain([first], uris)
        except (boto.exception.S3ResponseError, StopIteration):
            try:
                uris = self._s3_ls(base_uri.rstrip("/") + "/")
            except (boto.exception.S3ResponseError, StopIteration):
                return

        prev_uri = None
        for uri in uris:
            uri = "%s://%s/%s" % ((scheme,) + parse_s3_uri(uri))

            # enforce globbing
            if glob_match and not fnmatch.fnmatchcase(uri, path_glob):
                continue

            # If there are keys /data and /data/my_file then we consider there
            # to be a file /data, overriding there being a directory called
            # /data containing a file my_file. We discard /data/my_file.
            if prev_uri is not None and uri.startswith(prev_uri):
                continue

            yield uri
            prev_uri = uri.rstrip("/") + "/"

开发者ID:duedil-ltd，项目名称:mrjob，代码行数:54，代码来源:s3.py

示例8: test_cleanup

    def test_cleanup(self):
        runner = EMRJobRunner(conf_paths=[], s3_sync_wait_time=0.01)

        # add some mock data and change last_modified
        remote_input_path = 's3://walrus/data/'
        self.add_mock_s3_data({'walrus': {'data/foo': 'foo\n',
                                        'data/bar': 'bar\n',
                                        'data/qux': 'qux\n'}})

        s3_conn = runner.make_s3_conn()
        bucket_name, key_name = parse_s3_uri(remote_input_path)
        bucket = s3_conn.get_bucket(bucket_name)

        key_foo = bucket.get_key('data/foo')
        key_bar = bucket.get_key('data/bar')
        key_qux = bucket.get_key('data/qux')
        key_bar.last_modified = datetime.now() - timedelta(days=45)
        key_qux.last_modified = datetime.now() - timedelta(hours=50)

        # make sure keys are there
        assert isinstance(key_foo, MockKey)
        assert isinstance(key_bar, MockKey)
        assert isinstance(key_qux, MockKey)

        s3_cleanup(remote_input_path, timedelta(days=30), dry_run=True,
                   conf_paths=[])

        # dry-run shouldn't delete anything
        assert isinstance(key_foo, MockKey)
        assert isinstance(key_bar, MockKey)
        assert isinstance(key_qux, MockKey)

        s3_cleanup(remote_input_path, timedelta(days=30), conf_paths=[])

        key_foo = bucket.get_key('data/foo')
        key_bar = bucket.get_key('data/bar')
        key_qux = bucket.get_key('data/qux')

        # make sure key_bar is deleted
        assert isinstance(key_foo, MockKey)
        self.assertEqual(key_bar, None)
        assert isinstance(key_qux, MockKey)

        s3_cleanup(remote_input_path, timedelta(hours=48), conf_paths=[])

        key_foo = bucket.get_key('data/foo')
        key_bar = bucket.get_key('data/bar')
        key_qux = bucket.get_key('data/qux')

        # make sure key_qux is deleted
        assert isinstance(key_foo, MockKey)
        self.assertEqual(key_bar, None)
        self.assertEqual(key_qux, None)

开发者ID:Anihc，项目名称:mrjob，代码行数:53，代码来源:test_s3_tmpwatch.py

示例9: get_s3_key

    def get_s3_key(self, uri, s3_conn=None):
        """Get the boto Key object matching the given S3 uri, or
        return None if that key doesn't exist.

        uri is an S3 URI: ``s3://foo/bar``

        You may optionally pass in an existing s3 connection through
        ``s3_conn``.
        """
        if not s3_conn:
            s3_conn = self.make_s3_conn()
        bucket_name, key_name = parse_s3_uri(uri)

        return s3_conn.get_bucket(bucket_name).get_key(key_name)

开发者ID:adaptivelab，项目名称:mrjob，代码行数:14，代码来源:s3.py

示例10: make_s3_key

    def make_s3_key(self, uri, s3_conn=None):
        """Create the given S3 key, and return the corresponding
        boto Key object.

        uri is an S3 URI: ``s3://foo/bar``

        You may optionally pass in an existing S3 connection through
        ``s3_conn``.
        """
        if not s3_conn:
            s3_conn = self.make_s3_conn()
        bucket_name, key_name = parse_s3_uri(uri)

        return s3_conn.get_bucket(bucket_name).new_key(key_name)

开发者ID:inncapsule，项目名称:mrjob，代码行数:14，代码来源:s3.py

示例11: get_s3_keys

    def get_s3_keys(self, uri, s3_conn=None):
        """Get a stream of boto Key objects for each key inside
        the given dir on S3.

        uri is an S3 URI: ``s3://foo/bar``

        You may optionally pass in an existing S3 connection through s3_conn
        """
        if not s3_conn:
            s3_conn = self.make_s3_conn()

        bucket_name, key_prefix = parse_s3_uri(uri)
        bucket = s3_conn.get_bucket(bucket_name)
        for key in bucket.list(key_prefix):
            yield key

开发者ID:inncapsule，项目名称:mrjob，代码行数:15，代码来源:s3.py

示例12: mkdir

    def mkdir(self, dest):
        """Make a directory. This doesn't actually create directories on S3
        (because there is no such thing), but it will create the corresponding
        bucket if it doesn't exist.
        """
        bucket_name, key_name = parse_s3_uri(dest)

        client = self.make_s3_client()

        try:
            client.head_bucket(Bucket=bucket_name)
        except botocore.exceptions.ClientError as ex:
            if _client_error_status(ex) != 404:
                raise

            self.create_bucket(bucket_name)

开发者ID:Yelp，项目名称:mrjob，代码行数:16，代码来源:s3.py

示例13: ls

    def ls(self, path_glob):
        """Recursively list files on S3.

        *path_glob* can include ``?`` to match single characters or
        ``*`` to match 0 or more characters. Both ``?`` and ``*`` can match
        ``/``.

        .. versionchanged:: 0.5.0

            You no longer need a trailing slash to list "directories" on S3;
            both ``ls('s3://b/dir')`` and `ls('s3://b/dir/')` will list
            all keys starting with ``dir/``.
        """

        # clean up the  base uri to ensure we have an equal uri to boto (s3://)
        # just in case we get passed s3n://
        scheme = urlparse(path_glob).scheme

        # support globs
        glob_match = GLOB_RE.match(path_glob)

        # we're going to search for all keys starting with base_uri
        if glob_match:
            # cut it off at first wildcard
            base_uri = glob_match.group(1)
        else:
            base_uri = path_glob

        bucket_name, base_name = parse_s3_uri(base_uri)

        # allow subdirectories of the path/glob
        if path_glob and not path_glob.endswith('/'):
            dir_glob = path_glob + '/*'
        else:
            dir_glob = path_glob + '*'

        bucket = self.get_bucket(bucket_name)
        for key in bucket.list(base_name):
            uri = "%s://%s/%s" % (scheme, bucket_name, key.name)

            # enforce globbing
            if not (fnmatch.fnmatchcase(uri, path_glob) or
                    fnmatch.fnmatchcase(uri, dir_glob)):
                continue

            yield uri

开发者ID:gitbenedict，项目名称:mrjob，代码行数:46，代码来源:s3.py

示例14: get_s3_key

    def get_s3_key(self, uri):
        """Get the boto Key object matching the given S3 uri, or
        return None if that key doesn't exist.

        uri is an S3 URI: ``s3://foo/bar``
        """
        bucket_name, key_name = parse_s3_uri(uri)

        try:
            bucket = self.get_bucket(bucket_name)
        except boto.exception.S3ResponseError as e:
            if e.status != 404:
                raise e
            key = None
        else:
            key = bucket.get_key(key_name)

        return key

开发者ID:gitbenedict，项目名称:mrjob，代码行数:18，代码来源:s3.py

示例15: get_s3_key

    def get_s3_key(self, uri, s3_conn=None):
        """Get the boto Key object matching the given S3 uri, or
        return None if that key doesn't exist.

        uri is an S3 URI: ``s3://foo/bar``

        You may optionally pass in an existing s3 connection through
        ``s3_conn``.
        """
        if not s3_conn:
            s3_conn = self.make_s3_conn()
        bucket_name, key_name = parse_s3_uri(uri)

        try:
            bucket = s3_conn.get_bucket(bucket_name)
        except boto.exception.S3ResponseError, e:
            if e.status != 404:
                raise e
            key = None

开发者ID:inncapsule，项目名称:mrjob，代码行数:19，代码来源:s3.py

注：本文中的mrjob.parse.parse_s3_uri函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。