本文整理汇总了Python中mrjob.parse.parse_s3_uri函数的典型用法代码示例。如果您正苦于以下问题:Python parse_s3_uri函数的具体用法?Python parse_s3_uri怎么用?Python parse_s3_uri使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parse_s3_uri函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_uri_parsing
def test_uri_parsing(self):
self.assertEqual(is_uri('notauri!'), False)
self.assertEqual(is_uri('they://did/the/monster/mash'), True)
self.assertEqual(is_s3_uri('s3://a/uri'), True)
self.assertEqual(is_s3_uri('s3n://a/uri'), True)
self.assertEqual(is_s3_uri('hdfs://a/uri'), False)
self.assertEqual(parse_s3_uri('s3://bucket/loc'), ('bucket', 'loc'))
示例2: _s3_ls
def _s3_ls(self, uri):
"""Helper for ls(); doesn't bother with globbing or directories"""
bucket_name, key_name = parse_s3_uri(uri)
bucket = self.get_bucket(bucket_name)
for key in bucket.list(key_name):
yield s3_key_to_uri(key)
示例3: _get_s3_key
def _get_s3_key(self, uri):
"""Get the boto3 s3.Object matching the given S3 uri, or
return None if that key doesn't exist.
uri is an S3 URI: ``s3://foo/bar``
"""
bucket_name, key_name = parse_s3_uri(uri)
return self.get_bucket(bucket_name).Object(key_name)
示例4: _s3_ls
def _s3_ls(self, uri):
"""Helper for ls(); doesn't bother with globbing or directories"""
s3_conn = self.make_s3_conn()
bucket_name, key_name = parse_s3_uri(uri)
bucket = s3_conn.get_bucket(bucket_name, validate=VALIDATE_BUCKET)
for key in bucket.list(key_name):
yield s3_key_to_uri(key)
示例5: make_s3_key
def make_s3_key(self, uri):
"""Create the given S3 key, and return the corresponding
boto Key object.
uri is an S3 URI: ``s3://foo/bar``
"""
bucket_name, key_name = parse_s3_uri(uri)
return self.get_bucket(bucket_name).new_key(key_name)
示例6: get_s3_keys
def get_s3_keys(self, uri):
"""Get a stream of boto Key objects for each key inside
the given dir on S3.
uri is an S3 URI: ``s3://foo/bar``
"""
bucket_name, key_prefix = parse_s3_uri(uri)
bucket = self.get_bucket(bucket_name)
for key in bucket.list(key_prefix):
yield key
示例7: ls
def ls(self, path_glob):
"""Recursively list files on S3.
This doesn't list "directories" unless there's actually a
corresponding key ending with a '/' (which is weird and confusing;
don't make S3 keys ending in '/')
To list a directory, path_glob must end with a trailing
slash (foo and foo/ are different on S3)
"""
log.debug("ls %s", path_glob)
# clean up the base uri to ensure we have an equal uri to boto (s3://)
# just incase we get passed s3n://
scheme = urlparse(path_glob).scheme
# support globs
glob_match = GLOB_RE.match(path_glob)
# we're going to search for all keys starting with base_uri
if glob_match:
# cut it off at first wildcard
base_uri = glob_match.group(1)
else:
base_uri = path_glob
# Check if we're only going to get results by using a / on the end
uris = self._s3_ls(base_uri)
try:
first = uris.next()
uris = chain([first], uris)
except (boto.exception.S3ResponseError, StopIteration):
try:
uris = self._s3_ls(base_uri.rstrip("/") + "/")
except (boto.exception.S3ResponseError, StopIteration):
return
prev_uri = None
for uri in uris:
uri = "%s://%s/%s" % ((scheme,) + parse_s3_uri(uri))
# enforce globbing
if glob_match and not fnmatch.fnmatchcase(uri, path_glob):
continue
# If there are keys /data and /data/my_file then we consider there
# to be a file /data, overriding there being a directory called
# /data containing a file my_file. We discard /data/my_file.
if prev_uri is not None and uri.startswith(prev_uri):
continue
yield uri
prev_uri = uri.rstrip("/") + "/"
示例8: test_cleanup
def test_cleanup(self):
runner = EMRJobRunner(conf_paths=[], s3_sync_wait_time=0.01)
# add some mock data and change last_modified
remote_input_path = 's3://walrus/data/'
self.add_mock_s3_data({'walrus': {'data/foo': 'foo\n',
'data/bar': 'bar\n',
'data/qux': 'qux\n'}})
s3_conn = runner.make_s3_conn()
bucket_name, key_name = parse_s3_uri(remote_input_path)
bucket = s3_conn.get_bucket(bucket_name)
key_foo = bucket.get_key('data/foo')
key_bar = bucket.get_key('data/bar')
key_qux = bucket.get_key('data/qux')
key_bar.last_modified = datetime.now() - timedelta(days=45)
key_qux.last_modified = datetime.now() - timedelta(hours=50)
# make sure keys are there
assert isinstance(key_foo, MockKey)
assert isinstance(key_bar, MockKey)
assert isinstance(key_qux, MockKey)
s3_cleanup(remote_input_path, timedelta(days=30), dry_run=True,
conf_paths=[])
# dry-run shouldn't delete anything
assert isinstance(key_foo, MockKey)
assert isinstance(key_bar, MockKey)
assert isinstance(key_qux, MockKey)
s3_cleanup(remote_input_path, timedelta(days=30), conf_paths=[])
key_foo = bucket.get_key('data/foo')
key_bar = bucket.get_key('data/bar')
key_qux = bucket.get_key('data/qux')
# make sure key_bar is deleted
assert isinstance(key_foo, MockKey)
self.assertEqual(key_bar, None)
assert isinstance(key_qux, MockKey)
s3_cleanup(remote_input_path, timedelta(hours=48), conf_paths=[])
key_foo = bucket.get_key('data/foo')
key_bar = bucket.get_key('data/bar')
key_qux = bucket.get_key('data/qux')
# make sure key_qux is deleted
assert isinstance(key_foo, MockKey)
self.assertEqual(key_bar, None)
self.assertEqual(key_qux, None)
示例9: get_s3_key
def get_s3_key(self, uri, s3_conn=None):
"""Get the boto Key object matching the given S3 uri, or
return None if that key doesn't exist.
uri is an S3 URI: ``s3://foo/bar``
You may optionally pass in an existing s3 connection through
``s3_conn``.
"""
if not s3_conn:
s3_conn = self.make_s3_conn()
bucket_name, key_name = parse_s3_uri(uri)
return s3_conn.get_bucket(bucket_name).get_key(key_name)
示例10: make_s3_key
def make_s3_key(self, uri, s3_conn=None):
"""Create the given S3 key, and return the corresponding
boto Key object.
uri is an S3 URI: ``s3://foo/bar``
You may optionally pass in an existing S3 connection through
``s3_conn``.
"""
if not s3_conn:
s3_conn = self.make_s3_conn()
bucket_name, key_name = parse_s3_uri(uri)
return s3_conn.get_bucket(bucket_name).new_key(key_name)
示例11: get_s3_keys
def get_s3_keys(self, uri, s3_conn=None):
"""Get a stream of boto Key objects for each key inside
the given dir on S3.
uri is an S3 URI: ``s3://foo/bar``
You may optionally pass in an existing S3 connection through s3_conn
"""
if not s3_conn:
s3_conn = self.make_s3_conn()
bucket_name, key_prefix = parse_s3_uri(uri)
bucket = s3_conn.get_bucket(bucket_name)
for key in bucket.list(key_prefix):
yield key
示例12: mkdir
def mkdir(self, dest):
"""Make a directory. This doesn't actually create directories on S3
(because there is no such thing), but it will create the corresponding
bucket if it doesn't exist.
"""
bucket_name, key_name = parse_s3_uri(dest)
client = self.make_s3_client()
try:
client.head_bucket(Bucket=bucket_name)
except botocore.exceptions.ClientError as ex:
if _client_error_status(ex) != 404:
raise
self.create_bucket(bucket_name)
示例13: ls
def ls(self, path_glob):
"""Recursively list files on S3.
*path_glob* can include ``?`` to match single characters or
``*`` to match 0 or more characters. Both ``?`` and ``*`` can match
``/``.
.. versionchanged:: 0.5.0
You no longer need a trailing slash to list "directories" on S3;
both ``ls('s3://b/dir')`` and `ls('s3://b/dir/')` will list
all keys starting with ``dir/``.
"""
# clean up the base uri to ensure we have an equal uri to boto (s3://)
# just in case we get passed s3n://
scheme = urlparse(path_glob).scheme
# support globs
glob_match = GLOB_RE.match(path_glob)
# we're going to search for all keys starting with base_uri
if glob_match:
# cut it off at first wildcard
base_uri = glob_match.group(1)
else:
base_uri = path_glob
bucket_name, base_name = parse_s3_uri(base_uri)
# allow subdirectories of the path/glob
if path_glob and not path_glob.endswith('/'):
dir_glob = path_glob + '/*'
else:
dir_glob = path_glob + '*'
bucket = self.get_bucket(bucket_name)
for key in bucket.list(base_name):
uri = "%s://%s/%s" % (scheme, bucket_name, key.name)
# enforce globbing
if not (fnmatch.fnmatchcase(uri, path_glob) or
fnmatch.fnmatchcase(uri, dir_glob)):
continue
yield uri
示例14: get_s3_key
def get_s3_key(self, uri):
"""Get the boto Key object matching the given S3 uri, or
return None if that key doesn't exist.
uri is an S3 URI: ``s3://foo/bar``
"""
bucket_name, key_name = parse_s3_uri(uri)
try:
bucket = self.get_bucket(bucket_name)
except boto.exception.S3ResponseError as e:
if e.status != 404:
raise e
key = None
else:
key = bucket.get_key(key_name)
return key
示例15: get_s3_key
def get_s3_key(self, uri, s3_conn=None):
"""Get the boto Key object matching the given S3 uri, or
return None if that key doesn't exist.
uri is an S3 URI: ``s3://foo/bar``
You may optionally pass in an existing s3 connection through
``s3_conn``.
"""
if not s3_conn:
s3_conn = self.make_s3_conn()
bucket_name, key_name = parse_s3_uri(uri)
try:
bucket = s3_conn.get_bucket(bucket_name)
except boto.exception.S3ResponseError, e:
if e.status != 404:
raise e
key = None