本文整理汇总了Python中mrjob.parse.is_s3_uri函数的典型用法代码示例。如果您正苦于以下问题:Python is_s3_uri函数的具体用法?Python is_s3_uri怎么用?Python is_s3_uri使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了is_s3_uri函数的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_uri_parsing
def test_uri_parsing(self):
self.assertEqual(is_uri('notauri!'), False)
self.assertEqual(is_uri('they://did/the/monster/mash'), True)
self.assertEqual(is_s3_uri('s3://a/uri'), True)
self.assertEqual(is_s3_uri('s3n://a/uri'), True)
self.assertEqual(is_s3_uri('hdfs://a/uri'), False)
self.assertEqual(parse_s3_uri('s3://bucket/loc'), ('bucket', 'loc'))
示例2: _candidate_log_subdirs
def _candidate_log_subdirs(fs, log_type, log_dir, node_log_path, ssh_host):
"""Yield lists of subdirectories to look for logs in.
Currently, this means first SSH (if *ssh_host* is set), and then *log_dir*
(if set).
"""
# first, try SSH (most up-to-date)
if ssh_host:
yield _ssh_log_subdirs(fs, log_type, node_log_path=node_log_path, ssh_host=ssh_host)
# then try the log directory
if log_dir:
if is_s3_uri(log_dir):
relative_path = _S3_LOG_TYPE_TO_RELATIVE_PATH.get(log_type)
else:
relative_path = _LOG_TYPE_TO_RELATIVE_PATH.get(log_type)
if relative_path is not None:
yield [posixpath.join(log_dir, relative_path, "")]
示例3: simulate_progress
def simulate_progress(self, jobflow_id, now=None):
"""Simulate progress on the given job flow. This is automatically
run when we call describe_jobflow().
:type jobflow_id: str
:param jobflow_id: fake job flow ID
:type now: py:class:`datetime.datetime`
:param now: alternate time to use as the current time (should be UTC)
"""
if now is None:
now = datetime.utcnow()
if self.simulation_iterator:
try:
self.simulation_iterator.next()
except StopIteration:
raise AssertionError(
'Simulated progress too many times; bailing out')
job_flow = self.mock_emr_job_flows[jobflow_id]
# if job is STARTING, move it along to WAITING
if job_flow.state == 'STARTING':
job_flow.state = 'WAITING'
job_flow.startdatetime = to_iso8601(now)
# instances are now provisioned and running
for ig in job_flow.instancegroups:
ig.instancerunningcount = ig.instancerequestcount
# if job is done, don't advance it
if job_flow.state in ('COMPLETED', 'TERMINATED', 'FAILED'):
return
# if SHUTTING_DOWN, finish shutting down
if job_flow.state == 'SHUTTING_DOWN':
if job_flow.reason == 'Shut down as step failed':
job_flow.state = 'FAILED'
else:
job_flow.state = 'TERMINATED'
job_flow.enddatetime = to_iso8601(now)
return
# if a step is currently running, advance it
steps = getattr(job_flow, 'steps', None) or []
for step_num, step in enumerate(steps):
# skip steps that are already done
if step.state in ('COMPLETED', 'FAILED', 'CANCELLED'):
continue
if step.name in ('Setup Hadoop Debugging', ):
step.state = 'COMPLETED'
continue
# allow steps to get stuck
if getattr(step, 'mock_no_progress', None):
return
# found currently running step! going to handle it, then exit
if step.state == 'PENDING':
step.state = 'RUNNING'
step.startdatetime = to_iso8601(now)
return
assert step.state == 'RUNNING'
step.enddatetime = to_iso8601(now)
# check if we're supposed to have an error
if (jobflow_id, step_num) in self.mock_emr_failures:
step.state = 'FAILED'
reason = self.mock_emr_failures[(jobflow_id, step_num)]
if reason:
job_flow.reason = reason
if step.actiononfailure == 'TERMINATE_JOB_FLOW':
job_flow.state = 'SHUTTING_DOWN'
if not reason:
job_flow.reason = 'Shut down as step failed'
return
step.state = 'COMPLETED'
# create fake output if we're supposed to write to S3
output_uri = self._get_step_output_uri(step)
if output_uri and is_s3_uri(output_uri):
mock_output = self.mock_emr_output.get(
(jobflow_id, step_num)) or ['']
bucket_name, key_name = parse_s3_uri(output_uri)
# write output to S3
for i, bytes in enumerate(mock_output):
add_mock_s3_data(self.mock_s3_fs, {
bucket_name: {key_name + 'part-%05d' % i: bytes}})
elif (jobflow_id, step_num) in self.mock_emr_output:
raise AssertionError(
"can't use output for job flow ID %s, step %d "
"(it doesn't output to S3)" %
(jobflow_id, step_num))
# done!
return
#.........这里部分代码省略.........
示例4: can_handle_path
def can_handle_path(self, path):
return is_s3_uri(path)
示例5: test_is_s3_uri
def test_is_s3_uri(self):
self.assertEqual(is_s3_uri('s3://a/uri'), True)
self.assertEqual(is_s3_uri('s3n://a/uri'), True)
self.assertEqual(is_s3_uri('s3a://a/uri'), True)
self.assertEqual(is_s3_uri('hdfs://a/uri'), False)
示例6: simulate_progress
def simulate_progress(self, jobflow_id, now=None):
"""Simulate progress on the given job flow. This is automatically
run when we call describe_jobflow().
:type jobflow_id: str
:param jobflow_id: fake job flow ID
:type now: py:class:`datetime.datetime`
:param now: alternate time to use as the current time (should be UTC)
"""
if now is None:
now = datetime.datetime.utcnow()
if self.simulation_steps_left <= 0:
raise AssertionError("Simulated progress too many times; bailing out")
self.simulation_steps_left -= 1
job_flow = self.mock_emr_job_flows[jobflow_id]
# if job is STARTING, move it along to WAITING
if job_flow.state == "STARTING":
job_flow.state = "WAITING"
job_flow.startdatetime = to_iso8601(now)
# if job is done, don't advance it
if job_flow.state in ("COMPLETED", "TERMINATED", "FAILED"):
return
# if SHUTTING_DOWN, finish shutting down
if job_flow.state == "SHUTTING_DOWN":
if job_flow.reason == "Shut down as step failed":
job_flow.state = "FAILED"
else:
job_flow.state = "TERMINATED"
job_flow.enddatetime = to_iso8601(now)
return
# if a step is currently running, advance it
for step_num, step in enumerate(job_flow.steps):
# skip steps that are already done
if step.state in ("COMPLETED", "FAILED", "CANCELLED"):
continue
if step.name in ("Setup Hadoop Debugging",):
step.state = "COMPLETED"
continue
# found currently running step! going to handle it, then exit
if step.state == "PENDING":
step.state = "RUNNING"
step.startdatetime = to_iso8601(now)
return
assert step.state == "RUNNING"
step.enddatetime = to_iso8601(now)
# check if we're supposed to have an error
if (jobflow_id, step_num) in self.mock_emr_failures:
step.state = "FAILED"
reason = self.mock_emr_failures[(jobflow_id, step_num)]
if reason:
job_flow.reason = reason
if step.actiononfailure == "TERMINATE_JOB_FLOW":
job_flow.state = "SHUTTING_DOWN"
if not reason:
job_flow.reason = "Shut down as step failed"
return
step.state = "COMPLETED"
# create fake output if we're supposed to write to S3
output_uri = self._get_step_output_uri(step)
if output_uri and is_s3_uri(output_uri):
mock_output = self.mock_emr_output.get((jobflow_id, step_num)) or [""]
bucket_name, key_name = parse_s3_uri(output_uri)
# write output to S3
for i, bytes in enumerate(mock_output):
add_mock_s3_data(self.mock_s3_fs, {bucket_name: {key_name + "part-%05d" % i: bytes}})
elif (jobflow_id, step_num) in self.mock_emr_output:
raise AssertionError(
"can't use output for job flow ID %s, step %d " "(it doesn't output to S3)" % (jobflow_id, step_num)
)
# done!
return
# no pending steps. shut down job if appropriate
if job_flow.keepjobflowalivewhennosteps == "true":
job_flow.state = "WAITING"
job_flow.reason = "Waiting for steps to run"
else:
job_flow.state = "COMPLETED"
job_flow.reason = "Steps Completed"