本文整理汇总了Python中mrjob.util.read_input函数的典型用法代码示例。如果您正苦于以下问题:Python read_input函数的具体用法?Python read_input怎么用?Python read_input使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了read_input函数的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: line_group_generator
def line_group_generator(input_path):
# Generate lines from a given input_path, if keep_sorted is True,
# group lines by key; otherwise have one line per group
# concatenate all lines with the same key and yield them
# together
if keep_sorted:
def reducer_key(line):
return line.split('\t')[0]
# assume that input is a collection of key <tab> value pairs
# match all non-tab characters
for _, lines in itertools.groupby(
read_input(input_path), key=reducer_key):
yield lines
else:
for line in read_input(input_path):
yield (line,)
示例2: _read_input
def _read_input(self):
"""Read from stdin, or one more files, or directories.
Yield one line at time.
- Resolve globs (``foo_*.gz``).
- Decompress ``.gz`` and ``.bz2`` files.
- If path is ``-``, read from STDIN.
- Recursively read all files in a directory
"""
paths = self.args or ['-']
for path in paths:
for line in read_input(path, stdin=self.stdin):
yield line
示例3: test_bad_glob
def test_bad_glob(self):
# read_input is a generator, so we won't get an error
# until we try to read from it
self.assertRaises(IOError, list,
read_input(os.path.join(self.tmpdir, 'lions*')))
示例4: test_dir_recursion
def test_dir_recursion(self):
lines = read_input(self.tmpdir)
self.assertEqual(list(lines), [self.BEAVER_DATA] * 4)
示例5: test_glob_including_dir
def test_glob_including_dir(self):
lines = read_input(os.path.join(self.tmpdir, 'beavers*'))
self.assertEqual(list(lines), [self.BEAVER_DATA] * 4)
示例6: test_stdin_can_be_iterator
def test_stdin_can_be_iterator(self):
lines = read_input('-', stdin=[self.BEAVER_DATA] * 5)
self.assertEqual(list(lines), [self.BEAVER_DATA] * 5)
示例7: test_dir
def test_dir(self):
lines = read_input(os.path.join(self.tmpdir, 'beavers/'))
self.assertEqual(list(lines), [self.BEAVER_DATA])
示例8: test_glob
def test_glob(self):
lines = read_input(os.path.join(self.tmpdir, 'beavers.*'))
assert_equal(list(lines), [self.BEAVER_DATA] * 3)
示例9: test_stdin
def test_stdin(self):
lines = read_input('-', stdin=BytesIO(self.BEAVER_DATA))
self.assertEqual(list(lines), [self.BEAVER_DATA])
示例10: test_bz2_file
def test_bz2_file(self):
lines = read_input(os.path.join(self.tmpdir, 'beavers.bz2'))
assert_equal(list(lines), [self.BEAVER_DATA])
示例11: test_stdin
def test_stdin(self):
lines = read_input('-', stdin=StringIO(self.BEAVER_DATA))
assert_equal(list(lines), [self.BEAVER_DATA])
示例12: test_glob
def test_glob(self):
lines = read_input(os.path.join(self.tmpdir, "beavers.*"))
self.assertEqual(list(lines), [self.BEAVER_DATA] * 3)
示例13: test_bz2_file
def test_bz2_file(self):
lines = read_input(os.path.join(self.tmpdir, "beavers.bz2"))
self.assertEqual(list(lines), [self.BEAVER_DATA])
示例14: test_stdin
def test_stdin(self):
lines = read_input("-", stdin=StringIO(self.BEAVER_DATA))
self.assertEqual(list(lines), [self.BEAVER_DATA])