本文整理汇总了Python中mrjob.util.log_to_stream函数的典型用法代码示例。如果您正苦于以下问题:Python log_to_stream函数的具体用法?Python log_to_stream怎么用?Python log_to_stream使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了log_to_stream函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_messy_error
def test_messy_error(self):
counter_string = 'Job JOBID="_001" FAILED_REDUCES="0" COUNTERS="THIS IS NOT ACTUALLY A COUNTER"'
with no_handlers_for_logger(''):
stderr = StringIO()
log_to_stream('mrjob.parse', stderr, level=logging.WARN)
assert_equal((None, None), parse_hadoop_counters_from_line(counter_string))
assert_in('Cannot parse Hadoop counter line', stderr.getvalue())
示例2: test_failed_job
def test_failed_job(self):
mr_job = MRTwoStepJob(['-r', 'dataproc', '-v'])
mr_job.sandbox()
with no_handlers_for_logger('mrjob.dataproc'):
stderr = StringIO()
log_to_stream('mrjob.dataproc', stderr)
self._dataproc_client.job_get_advances_states = (
collections.deque(['SETUP_DONE', 'RUNNING', 'ERROR']))
with mr_job.make_runner() as runner:
self.assertIsInstance(runner, DataprocJobRunner)
self.assertRaises(StepFailedException, runner.run)
self.assertIn(' => ERROR\n', stderr.getvalue())
cluster_id = runner.get_cluster_id()
# job should get terminated
cluster = (
self._dataproc_client._cache_clusters[_TEST_PROJECT][cluster_id])
cluster_state = self._dataproc_client.get_state(cluster)
self.assertEqual(cluster_state, 'DELETING')
示例3: test_cleanup_options
def test_cleanup_options(self):
stderr = StringIO()
with no_handlers_for_logger('mrjob.runner'):
log_to_stream('mrjob.runner', stderr)
opts = RunnerOptionStore(
'inline',
dict(cleanup=['LOCAL_SCRATCH', 'REMOTE_SCRATCH'],
cleanup_on_failure=['JOB_FLOW', 'SCRATCH']),
[])
self.assertEqual(opts['cleanup'], ['LOCAL_TMP', 'CLOUD_TMP'])
self.assertIn(
'Deprecated cleanup option LOCAL_SCRATCH has been renamed'
' to LOCAL_TMP', stderr.getvalue())
self.assertIn(
'Deprecated cleanup option REMOTE_SCRATCH has been renamed'
' to CLOUD_TMP', stderr.getvalue())
self.assertEqual(opts['cleanup_on_failure'], ['CLUSTER', 'TMP'])
self.assertIn(
'Deprecated cleanup_on_failure option JOB_FLOW has been'
' renamed to CLUSTER', stderr.getvalue())
self.assertIn(
'Deprecated cleanup_on_failure option SCRATCH has been renamed'
' to TMP', stderr.getvalue())
示例4: test_non_log_lines
def test_non_log_lines(self):
lines = StringIO('foo\n'
'bar\n'
'15/12/11 13:26:08 ERROR streaming.StreamJob:'
' Error Launching job :'
' Output directory already exists\n'
'Streaming Command Failed!')
with no_handlers_for_logger('mrjob.logs.parse'):
stderr = StringIO()
log_to_stream('mrjob.logs.parse', stderr)
self.assertEqual(
list(_parse_hadoop_log_lines(lines)), [
# ignore leading non-log lines
dict(
timestamp='15/12/11 13:26:08',
level='ERROR',
logger='streaming.StreamJob',
thread=None,
# no way to know that Streaming Command Failed! wasn't part
# of a multi-line message
message=('Error Launching job :'
' Output directory already exists\n'
'Streaming Command Failed!'))
])
# should be one warning for each leading non-log line
log_lines = stderr.getvalue().splitlines()
self.assertEqual(len(log_lines), 2)
示例5: main
def main():
# parser command-line args
option_parser = make_option_parser()
options, args = option_parser.parse_args()
if args:
option_parser.error('takes no arguments')
# set up logging
if not options.quiet:
log_to_stream(name='mrjob', debug=options.verbose)
# create the persistent job
runner_kwargs = {
'conf_path': options.conf_path,
'ec2_instance_type': options.ec2_instance_type,
'ec2_master_instance_type': options.ec2_master_instance_type,
'ec2_slave_instance_type': options.ec2_slave_instance_type,
'label': options.label,
'num_ec2_instances': options.num_ec2_instances,
'owner': options.owner,
}
runner = EMRJobRunner(**runner_kwargs)
emr_job_flow_id = runner.make_persistent_job_flow()
print emr_job_flow_id
示例6: test_non_log_lines
def test_non_log_lines(self):
lines = StringIO(
"foo\n"
"bar\n"
"15/12/11 13:26:08 ERROR streaming.StreamJob:"
" Error Launching job :"
" Output directory already exists\n"
"Streaming Command Failed!"
)
with no_handlers_for_logger("mrjob.logs.parse"):
stderr = StringIO()
log_to_stream("mrjob.logs.parse", stderr)
self.assertEqual(
list(_parse_hadoop_log_lines(lines)),
[
# ignore leading non-log lines
dict(
timestamp="15/12/11 13:26:08",
level="ERROR",
logger="streaming.StreamJob",
thread=None,
# no way to know that Streaming Command Failed! wasn't part
# of a multi-line message
message=(
"Error Launching job :" " Output directory already exists\n" "Streaming Command Failed!"
),
)
],
)
# should be one warning for each leading non-log line
log_lines = stderr.getvalue().splitlines()
self.assertEqual(len(log_lines), 2)
示例7: test_deprecated_mapper_final_positional_arg
def test_deprecated_mapper_final_positional_arg(self):
def mapper(k, v):
pass
def reducer(k, v):
pass
def mapper_final():
pass
stderr = StringIO()
with no_handlers_for_logger():
log_to_stream('mrjob.job', stderr)
step = MRJob.mr(mapper, reducer, mapper_final)
# should be allowed to specify mapper_final as a positional arg,
# but we log a warning
self.assertEqual(
step,
MRJob.mr(
mapper=mapper, reducer=reducer, mapper_final=mapper_final))
self.assertIn('mapper_final should be specified', stderr.getvalue())
# can't specify mapper_final as a positional and keyword arg
self.assertRaises(
TypeError,
MRJob.mr,
mapper,
reducer,
mapper_final,
mapper_final=mapper_final)
示例8: assert_hadoop_version
def assert_hadoop_version(self, JobClass, version_string):
mr_job = JobClass()
mock_log = StringIO()
with no_handlers_for_logger("mrjob.job"):
log_to_stream("mrjob.job", mock_log)
self.assertEqual(mr_job.jobconf()["hadoop_version"], version_string)
self.assertIn("should be a string", mock_log.getvalue())
示例9: test_mixed_behavior_2
def test_mixed_behavior_2(self):
stderr = StringIO()
with no_handlers_for_logger():
log_to_stream('mrjob.job', stderr)
mr_job = self.MRInconsistentJob2()
self.assertEqual(mr_job.options.input_protocol, None)
self.assertEqual(mr_job.input_protocol().__class__, ReprProtocol)
self.assertIn('custom behavior', stderr.getvalue())
示例10: updated_and_warnings
def updated_and_warnings(self, jobconf, hadoop_version):
jobconf = jobconf.copy()
with no_handlers_for_logger("mrjob.runner"):
stderr = StringIO()
log_to_stream("mrjob.runner", stderr)
self.runner._update_jobconf_for_hadoop_version(jobconf, hadoop_version)
return jobconf, stderr.getvalue()
示例11: test_default_protocols
def test_default_protocols(self):
stderr = StringIO()
with no_handlers_for_logger():
log_to_stream('mrjob.job', stderr)
mr_job = MRBoringJob()
self.assertEqual(mr_job.options.input_protocol, 'raw_value')
self.assertEqual(mr_job.options.protocol, 'json')
self.assertEqual(mr_job.options.output_protocol, 'json')
self.assertNotIn('deprecated', stderr.getvalue())
示例12: test_overriding_explicit_default_protocols
def test_overriding_explicit_default_protocols(self):
stderr = StringIO()
with no_handlers_for_logger():
log_to_stream('mrjob.job', stderr)
mr_job = self.MRBoringJob2(args=['--protocol=json'])
self.assertEqual(mr_job.options.input_protocol, 'json')
self.assertEqual(mr_job.options.protocol, 'json')
self.assertEqual(mr_job.options.output_protocol, 'repr')
self.assertIn('deprecated', stderr.getvalue())
示例13: main
def main():
option_parser = make_option_parser()
options, args = option_parser.parse_args()
if args:
option_parser.error('takes no arguments')
# set up logging
if not options.quiet:
log_to_stream(name='mrjob', debug=options.verbose)
emr_conn = EMRJobRunner().make_emr_conn()
log.info(
'getting info about all job flows (this goes back about 2 weeks)')
job_flows = emr_conn.describe_jobflows()
now = datetime.utcnow()
num_running = 0
num_idle = 0
num_done = 0
# a list of tuples of job flow id, name, idle time (as a timedelta)
to_terminate = []
for jf in job_flows:
# check if job flow is done
if hasattr(jf, 'enddatetime'):
num_done += 1
# check if job flow is currently running
elif jf.steps and not hasattr(jf.steps[-1], 'enddatetime'):
num_running += 1
# job flow is idle. how long?
else:
num_idle += 1
if jf.steps:
idle_since = datetime.strptime(
jf.steps[-1].enddatetime, ISO8601)
else:
idle_since = datetime.strptime(
jf.creationdatetime, ISO8601)
idle_time = now - idle_since
# don't care about fractions of a second
idle_time = timedelta(idle_time.days, idle_time.seconds)
log.debug('Job flow %s (%s) idle for %s' %
(jf.jobflowid, jf.name, idle_time))
if idle_time > timedelta(hours=options.max_hours_idle):
to_terminate.append(
(jf.jobflowid, jf.name, idle_time))
log.info('Job flow statuses: %d running, %d idle, %d done' %
(num_running, num_idle, num_done))
terminate_and_notify(emr_conn, to_terminate, options)
示例14: get_debug_printout
def get_debug_printout(self, opt_store_class, alias, opts):
stderr = StringIO()
with no_handlers_for_logger():
log_to_stream('mrjob.runner', stderr, debug=True)
# debug printout happens in constructor
opt_store_class(alias, opts, [])
return stderr.getvalue()
示例15: test_option_debug_printout
def test_option_debug_printout(self):
stderr = StringIO()
with no_handlers_for_logger():
log_to_stream('mrjob.runner', stderr, debug=True)
InlineMRJobRunner(owner='dave')
self.assertIn("'owner'", stderr.getvalue())
self.assertIn("'dave'", stderr.getvalue())