本文整理汇总了Python中mrjob.util.to_lines函数的典型用法代码示例。如果您正苦于以下问题:Python to_lines函数的具体用法?Python to_lines怎么用?Python to_lines使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了to_lines函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _assert_output_matches
def _assert_output_matches(
self, job_class, input_bytes=b'', input_paths=(), job_args=[]):
# run classes defined in this module in inline mode, classes
# with their own script files in local mode. used by
# test_skip_combiner_that_runs_cmd()
if job_class.__module__ == __name__:
runner_alias = 'inline'
else:
runner_alias = 'local'
reference_job = self._reference_job(
job_class, input_bytes=input_bytes,
input_paths=input_paths,
job_args=job_args,
runner_alias=runner_alias)
with reference_job.make_runner() as runner:
runner.run()
reference_output = sorted(to_lines(runner.cat_output()))
harness_job = self._harness_job(
job_class, input_bytes=input_bytes,
input_paths=input_paths,
job_args=job_args)
with harness_job.make_runner() as runner:
runner.run()
harness_output = sorted(to_lines(runner.cat_output()))
self.assertEqual(harness_output, reference_output)
示例2: test_mixed_job
def test_mixed_job(self):
# test a combination of streaming and spark steps
job = MRStreamingAndSpark(['-r', 'spark'])
job.sandbox(stdin=BytesIO(
b'foo\nbar\n'))
with job.make_runner() as runner:
runner.run()
# converts to 'null\t"foo"', 'null\t"bar"' and then counts chars
self.assertEqual(
sorted(to_lines(runner.cat_output())),
[
b'\t 2\n',
b'" 4\n',
b'a 1\n',
b'b 1\n',
b'f 1\n',
b'l 4\n',
b'n 2\n',
b'o 2\n',
b'r 1\n',
b'u 2\n',
]
)
示例3: test_no_trailing_newline
def test_no_trailing_newline(self):
self.assertEqual(
list(to_lines(iter([
b'Alouette,\ngentille',
b' Alouette.',
]))),
[b'Alouette,\n', b'gentille Alouette.'])
示例4: test_python_dash_v_as_python_bin
def test_python_dash_v_as_python_bin(self):
python_cmd = cmd_line([sys.executable or 'python', '-v'])
mr_job = MRTwoStepJob(['--python-bin', python_cmd, '--no-conf',
'-r', 'local'])
mr_job.sandbox(stdin=[b'bar\n'])
with mr_job.make_runner() as runner:
runner.run()
# expect python -v crud in stderr
with open(runner._task_stderr_path('mapper', 0, 0)) as lines:
self.assertTrue(any(
'import mrjob' in line or # Python 2
"import 'mrjob'" in line
for line in lines))
with open(runner._task_stderr_path('mapper', 0, 0)) as lines:
self.assertTrue(any(
'#' in line for line in lines))
# should still get expected results
self.assertEqual(
sorted(to_lines(runner.cat_output())),
sorted([b'1\tnull\n', b'1\t"bar"\n']))
示例5: test_loading_bootstrapped_mrjob_library
def test_loading_bootstrapped_mrjob_library(self):
# track the dir we're loading mrjob from rather than the full path
# to deal with edge cases where we load from the .py file,
# and the script loads from the .pyc compiled from that .py file.
our_mrjob_dir = os.path.dirname(os.path.realpath(mrjob.__file__))
with mrjob_conf_patcher():
mr_job = MRJobWhereAreYou(['-r', 'local', '--bootstrap-mrjob'])
mr_job.sandbox()
with mr_job.make_runner() as runner:
# sanity check
self.assertEqual(runner._bootstrap_mrjob(), True)
local_tmp_dir = os.path.realpath(runner._get_local_tmp_dir())
runner.run()
output = list(to_lines(runner.cat_output()))
self.assertEqual(len(output), 1)
# script should load mrjob from its working dir
_, script_mrjob_dir = mr_job.parse_output_line(output[0])
self.assertNotEqual(our_mrjob_dir, script_mrjob_dir)
self.assertTrue(script_mrjob_dir.startswith(local_tmp_dir))
示例6: test_cat_output
def test_cat_output(self):
a_dir_path = os.path.join(self.tmp_dir, 'a')
b_dir_path = os.path.join(self.tmp_dir, 'b')
l_dir_path = os.path.join(self.tmp_dir, '_logs')
os.mkdir(a_dir_path)
os.mkdir(b_dir_path)
os.mkdir(l_dir_path)
a_file_path = os.path.join(a_dir_path, 'part-00000')
b_file_path = os.path.join(b_dir_path, 'part-00001')
c_file_path = os.path.join(self.tmp_dir, 'part-00002')
x_file_path = os.path.join(l_dir_path, 'log.xml')
y_file_path = os.path.join(self.tmp_dir, '_SUCCESS')
with open(a_file_path, 'w') as f:
f.write('A')
with open(b_file_path, 'w') as f:
f.write('B')
with open(c_file_path, 'w') as f:
f.write('C')
with open(x_file_path, 'w') as f:
f.write('<XML XML XML/>')
with open(y_file_path, 'w') as f:
f.write('I win')
runner = InlineMRJobRunner(conf_paths=[], output_dir=self.tmp_dir)
self.assertEqual(sorted(to_lines(runner.cat_output())),
[b'A', b'B', b'C'])
示例7: test_buffered_lines
def test_buffered_lines(self):
self.assertEqual(
list(to_lines(chunk for chunk in
[b'The quick\nbrown fox\nju',
b'mped over\nthe lazy\ndog',
b's.\n'])),
[b'The quick\n', b'brown fox\n', b'jumped over\n', b'the lazy\n',
b'dogs.\n'])
示例8: test_multiple_2
def test_multiple_2(self):
data = b'x\ny\nz\n'
job = MRCmdJob(['--mapper-cmd=cat', '--reducer-cmd-2', 'wc -l',
'--runner=local', '--no-conf'])
job.sandbox(stdin=BytesIO(data))
with job.make_runner() as r:
r.run()
self.assertEqual(sum(int(l) for l in to_lines(r.cat_output())), 3)
示例9: test_long_lines
def test_long_lines(self):
super_long_line = b'a' * 10000 + b'\n' + b'b' * 1000 + b'\nlast\n'
self.assertEqual(
list(to_lines(
chunk for chunk in
(super_long_line[0 + i:1024 + i]
for i in range(0, len(super_long_line), 1024)))),
[b'a' * 10000 + b'\n', b'b' * 1000 + b'\n', b'last\n'])
示例10: parse_output
def parse_output(self, chunks):
"""Parse the final output of this MRJob (as a stream of byte chunks)
into a stream of ``(key, value)``.
"""
read = self.output_protocol().read
for line in to_lines(chunks):
yield read(line)
示例11: test_read_all_non_hidden_files
def test_read_all_non_hidden_files(self):
self.makefile(os.path.join(self.output_dir, 'baz'),
b'qux\n')
self.makefile(os.path.join(self.output_dir, 'foo', 'bar'),
b'baz\n')
self.assertEqual(sorted(to_lines(self.runner.cat_output())),
[b'baz\n', b'qux\n'])
示例12: test_output_dir_not_considered_hidden
def test_output_dir_not_considered_hidden(self):
output_dir = os.path.join(self.tmp_dir, '_hidden', '_output_dir')
self.makefile(os.path.join(output_dir, 'part-00000'),
b'cats\n')
runner = InlineMRJobRunner(conf_paths=[], output_dir=output_dir)
self.assertEqual(sorted(to_lines(runner.stream_output())),
[b'cats\n'])
示例13: test_eof_without_trailing_newline
def test_eof_without_trailing_newline(self):
self.assertEqual(
list(to_lines(iter([
b'Alouette,\ngentille',
b' Alouette.',
b'', # treated as EOF
b'Allouette,\nje te p',
b'lumerais.',
]))),
[b'Alouette,\n', b'gentille Alouette.',
b'Allouette,\n', b'je te plumerais.'])
示例14: stream_output
def stream_output(self):
"""Like :py:meth:`cat_output` except that it groups bytes into
lines. Equivalent to ``mrjob.util.to_lines(runner.stream_output())``.
.. deprecated:: 0.6.0
"""
log.warning('stream_output() is deprecated and will be removed in'
' v0.7.0. use mrjob.util.to_lines(runner.cat_output())'
' instead.')
return to_lines(self.cat_output())
示例15: _cat_log_lines
def _cat_log_lines(fs, path):
"""Yield lines from the given log.
Log errors rather than raising them.
"""
try:
if not fs.exists(path):
return
for line in to_lines(fs.cat(path)):
yield to_unicode(line)
except (IOError, OSError) as e:
log.warning("couldn't cat() %s: %r" % (path, e))