本文整理汇总了Python中mrjob.fs.hadoop.HadoopFilesystem._cat_file方法的典型用法代码示例。如果您正苦于以下问题:Python HadoopFilesystem._cat_file方法的具体用法?Python HadoopFilesystem._cat_file怎么用?Python HadoopFilesystem._cat_file使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mrjob.fs.hadoop.HadoopFilesystem
的用法示例。
在下文中一共展示了HadoopFilesystem._cat_file方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_logging_stderr_in_cleanup
# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import _cat_file [as 别名]
def test_logging_stderr_in_cleanup(self):
def mock_Popen(*args, **kwargs):
mock_proc = MagicMock()
mock_proc.stdout = MagicMock()
mock_proc.stdout.__iter__.return_value = [
b'line1\n', b'line2\n']
mock_proc.stderr = MagicMock()
mock_proc.stderr.__iter__.return_value = [
b'Emergency, everybody to get from street\n']
mock_proc.wait.return_value = 0
return mock_proc
self.start(patch('mrjob.fs.hadoop.Popen', mock_Popen))
mock_log = self.start(patch('mrjob.fs.hadoop.log'))
fs = HadoopFilesystem()
data = b''.join(fs._cat_file('/some/path'))
self.assertEqual(data, b'line1\nline2\n')
mock_log.error.assert_called_once_with(
'STDERR: Emergency, everybody to get from street')
示例2: HadoopFSTestCase
# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import _cat_file [as 别名]
class HadoopFSTestCase(MockSubprocessTestCase):
def setUp(self):
super(HadoopFSTestCase, self).setUp()
# wrap HadoopFilesystem so it gets cat()
self.fs = HadoopFilesystem(["hadoop"])
self.set_up_mock_hadoop()
self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)
def set_up_mock_hadoop(self):
# setup fake hadoop home
self.env = {}
self.env["HADOOP_HOME"] = self.makedirs("mock_hadoop_home")
self.makefile(
os.path.join("mock_hadoop_home", "contrib", "streaming", "hadoop-0.X.Y-streaming.jar"),
"i are java bytecode",
)
self.env["MOCK_HDFS_ROOT"] = self.makedirs("mock_hdfs_root")
self.env["MOCK_HADOOP_OUTPUT"] = self.makedirs("mock_hadoop_output")
self.env["USER"] = "mrjob_tests"
# don't set MOCK_HADOOP_LOG, we get command history other ways
def make_mock_file(self, name, contents="contents"):
return self.makefile(os.path.join("mock_hdfs_root", name), contents)
def test_ls_empty(self):
self.assertEqual(list(self.fs.ls("hdfs:///")), [])
def test_ls_basic(self):
self.make_mock_file("f")
self.assertEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///f"])
def test_ls_basic_2(self):
self.make_mock_file("f")
self.make_mock_file("f2")
self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///f", "hdfs:///f2"])
def test_ls_recurse(self):
self.make_mock_file("f")
self.make_mock_file("d/f2")
self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///f", "hdfs:///d/f2"])
def test_ls_s3n(self):
# hadoop fs -lsr doesn't have user and group info when reading from s3
self.make_mock_file("f", "foo")
self.make_mock_file("f3 win", "foo" * 10)
self.assertItemsEqual(list(self.fs.ls("s3n://bucket/")), ["s3n://bucket/f", "s3n://bucket/f3 win"])
def test_single_space(self):
self.make_mock_file("foo bar")
self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///foo bar"])
def test_double_space(self):
self.make_mock_file("foo bar")
self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///foo bar"])
def test_cat_uncompressed(self):
self.make_mock_file("data/foo", "foo\nfoo\n")
remote_path = self.fs.path_join("hdfs:///data", "foo")
self.assertEqual(list(self.fs._cat_file(remote_path)), ["foo\n", "foo\n"])
def test_cat_bz2(self):
self.make_mock_file("data/foo.bz2", bz2.compress("foo\n" * 1000))
remote_path = self.fs.path_join("hdfs:///data", "foo.bz2")
self.assertEqual(list(self.fs._cat_file(remote_path)), ["foo\n"] * 1000)
def test_cat_gz(self):
self.make_mock_file("data/foo.gz", gzip_compress("foo\n" * 10000))
remote_path = self.fs.path_join("hdfs:///data", "foo.gz")
self.assertEqual(list(self.fs._cat_file(remote_path)), ["foo\n"] * 10000)
def test_du(self):
self.make_mock_file("data1", "abcd")
self.make_mock_file("more/data2", "defg")
self.make_mock_file("more/data3", "hijk")
self.assertEqual(self.fs.du("hdfs:///"), 12)
self.assertEqual(self.fs.du("hdfs:///data1"), 4)
self.assertEqual(self.fs.du("hdfs:///more"), 8)
self.assertEqual(self.fs.du("hdfs:///more/*"), 8)
self.assertEqual(self.fs.du("hdfs:///more/data2"), 4)
self.assertEqual(self.fs.du("hdfs:///more/data3"), 4)
def test_mkdir(self):
for hadoop_version in ["0.20.0", "0.23.0", "1.2.0", "2.0.0"]:
self.env["MOCK_HADOOP_VERSION"] = hadoop_version
self.fs.mkdir("hdfs:///d")
local_path = os.path.join(self.tmp_dir, "mock_hdfs_root", "d")
self.assertEqual(os.path.isdir(local_path), True)
def test_path_exists_no(self):
path = "hdfs:///f"
self.assertEqual(self.fs.path_exists(path), False)
#.........这里部分代码省略.........
示例3: HadoopFSTestCase
# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import _cat_file [as 别名]
class HadoopFSTestCase(MockSubprocessTestCase):
def setUp(self):
super(HadoopFSTestCase, self).setUp()
# wrap HadoopFilesystem so it gets cat()
self.fs = HadoopFilesystem(['hadoop'])
self.set_up_mock_hadoop()
self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)
def set_up_mock_hadoop(self):
# setup fake hadoop home
self.env = {}
self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')
self.makefile(
os.path.join(
'mock_hadoop_home',
'contrib',
'streaming',
'hadoop-0.X.Y-streaming.jar'),
'i are java bytecode',
)
self.env['MOCK_HDFS_ROOT'] = self.makedirs('mock_hdfs_root')
self.env['MOCK_HADOOP_OUTPUT'] = self.makedirs('mock_hadoop_output')
self.env['USER'] = 'mrjob_tests'
# don't set MOCK_HADOOP_LOG, we get command history other ways
def make_hdfs_file(self, name, contents):
return self.makefile(os.path.join('mock_hdfs_root', name), contents)
def test_ls_empty(self):
self.assertEqual(list(self.fs.ls('hdfs:///')), [])
def test_ls_basic(self):
self.make_hdfs_file('f', 'contents')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])
def test_ls_basic_2(self):
self.make_hdfs_file('f', 'contents')
self.make_hdfs_file('f2', 'contents')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f',
'hdfs:///f2'])
def test_ls_recurse(self):
self.make_hdfs_file('f', 'contents')
self.make_hdfs_file('d/f2', 'contents')
self.assertEqual(list(self.fs.ls('hdfs:///')),
['hdfs:///f', 'hdfs:///d/f2'])
def test_cat_uncompressed(self):
# mockhadoop doesn't support compressed files, so we won't test for it.
# this is only a sanity check anyway.
self.makefile(os.path.join('mock_hdfs_root', 'data', 'foo'), 'foo\nfoo\n')
remote_path = self.fs.path_join('hdfs:///data', 'foo')
self.assertEqual(list(self.fs._cat_file(remote_path)), ['foo\n', 'foo\n'])
def test_du(self):
self.makefile(os.path.join('mock_hdfs_root', 'data1'), 'abcd')
self.makedirs('mock_hdfs_root/more')
self.makefile(os.path.join('mock_hdfs_root', 'more', 'data2'), 'defg')
self.makefile(os.path.join('mock_hdfs_root', 'more', 'data3'), 'hijk')
self.assertEqual(self.fs.du('hdfs:///'), 12)
self.assertEqual(self.fs.du('hdfs:///data1'), 4)
self.assertEqual(self.fs.du('hdfs:///more'), 8)
self.assertEqual(self.fs.du('hdfs:///more/*'), 8)
self.assertEqual(self.fs.du('hdfs:///more/data2'), 4)
self.assertEqual(self.fs.du('hdfs:///more/data3'), 4)
def test_mkdir(self):
self.fs.mkdir('hdfs:///d')
local_path = os.path.join(self.tmp_dir, 'mock_hdfs_root', 'd')
self.assertEqual(os.path.isdir(local_path), True)
def test_rm(self):
local_path = self.make_hdfs_file('f', 'contents')
self.assertEqual(os.path.exists(local_path), True)
self.fs.rm('hdfs:///f')
self.assertEqual(os.path.exists(local_path), False)
def test_touchz(self):
# mockhadoop doesn't implement this.
pass
示例4: HadoopFSTestCase
# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import _cat_file [as 别名]
class HadoopFSTestCase(MockSubprocessTestCase):
def setUp(self):
super(HadoopFSTestCase, self).setUp()
# wrap HadoopFilesystem so it gets cat()
self.fs = HadoopFilesystem(['hadoop'])
self.set_up_mock_hadoop()
self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)
def set_up_mock_hadoop(self):
# setup fake hadoop home
self.env = {}
self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')
self.makefile(
os.path.join(
'mock_hadoop_home',
'contrib',
'streaming',
'hadoop-0.X.Y-streaming.jar'),
'i are java bytecode',
)
self.env['MOCK_HADOOP_TMP'] = self.makedirs('mock_hadoop')
self.env['MOCK_HADOOP_VERSION'] = '2.7.1'
self.env['USER'] = 'mrjob_tests'
def make_mock_file(self, name, contents='contents'):
return self.makefile(
os.path.join(get_mock_hdfs_root(self.env), name), contents)
def test_cat_uncompressed(self):
self.make_mock_file('data/foo', 'foo\nfoo\n')
remote_path = self.fs.join('hdfs:///data', 'foo')
self.assertEqual(
b''.join(self.fs._cat_file(remote_path)),
b'foo\nfoo\n')
def test_cat_bz2(self):
self.make_mock_file('data/foo.bz2', bz2.compress(b'foo\n' * 1000))
remote_path = self.fs.join('hdfs:///data', 'foo.bz2')
self.assertEqual(
b''.join(self.fs._cat_file(remote_path)),
b'foo\n' * 1000)
def test_cat_gz(self):
self.make_mock_file('data/foo.gz', gzip_compress(b'foo\n' * 10000))
remote_path = self.fs.join('hdfs:///data', 'foo.gz')
self.assertEqual(
b''.join(self.fs._cat_file(remote_path)),
b'foo\n' * 10000)
def test_ls_empty(self):
self.assertEqual(list(self.fs.ls('hdfs:///')), [])
def test_ls_basic(self):
self.make_mock_file('f')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])
def test_ls_basic_2(self):
self.make_mock_file('f')
self.make_mock_file('f2')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///f', 'hdfs:///f2'])
def test_ls_recurse(self):
self.make_mock_file('f')
self.make_mock_file('d/f2')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///d/f2', 'hdfs:///f'])
def test_ls_s3n(self):
# hadoop fs -lsr doesn't have user and group info when reading from s3
self.make_mock_file('f', 'foo')
self.make_mock_file('f3 win', 'foo' * 10)
self.assertEqual(sorted(self.fs.ls('s3n://bucket/')),
['s3n://bucket/f', 's3n://bucket/f3 win'])
def test_ls_s3a(self):
# hadoop fs -lsr doesn't have user and group info when reading from s3
self.make_mock_file('f', 'foo')
self.make_mock_file('f3 win', 'foo' * 10)
self.assertEqual(sorted(self.fs.ls('s3a://bucket/')),
['s3a://bucket/f', 's3a://bucket/f3 win'])
def test_single_space(self):
self.make_mock_file('foo bar')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///foo bar'])
def test_double_space(self):
self.make_mock_file('foo bar')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
#.........这里部分代码省略.........
示例5: HadoopFSTestCase
# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import _cat_file [as 别名]
class HadoopFSTestCase(MockSubprocessTestCase):
def setUp(self):
super(HadoopFSTestCase, self).setUp()
# wrap HadoopFilesystem so it gets cat()
self.fs = HadoopFilesystem(['hadoop'])
self.set_up_mock_hadoop()
self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)
def set_up_mock_hadoop(self):
# setup fake hadoop home
self.env = {}
self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')
self.makefile(
os.path.join(
'mock_hadoop_home',
'contrib',
'streaming',
'hadoop-0.X.Y-streaming.jar'),
'i are java bytecode',
)
self.env['MOCK_HDFS_ROOT'] = self.makedirs('mock_hdfs_root')
self.env['MOCK_HADOOP_OUTPUT'] = self.makedirs('mock_hadoop_output')
self.env['USER'] = 'mrjob_tests'
# don't set MOCK_HADOOP_LOG, we get command history other ways
def make_hdfs_file(self, name, contents='contents'):
return self.makefile(os.path.join('mock_hdfs_root', name), contents)
def make_hdfs_dir(self, name):
return self.makedirs(os.path.join('mock_hdfs_root', name))
def make_hdfs_tree(self, path, files=None):
if files is None:
files = ('f', 'g/a/b', 'g/a/a/b')
test_files = []
for f in sorted(files):
f = os.path.join(path, f)
self.make_hdfs_file(f, f)
test_files.append("hdfs:///" + f)
self.assertEqual(
sorted(self.fs.ls("hdfs:///" + path.rstrip('/') + '/*')),
test_files
)
return path
def test_ls_empty(self):
self.assertEqual(list(self.fs.ls('hdfs:///')), [])
def test_ls_basic(self):
self.make_hdfs_file('f')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])
def test_ls_basic_2(self):
self.make_hdfs_file('f')
self.make_hdfs_file('f2')
self.assertItemsEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f',
'hdfs:///f2'])
def test_ls_recurse(self):
self.make_hdfs_file('f')
self.make_hdfs_file('d/f2')
self.assertItemsEqual(list(self.fs.ls('hdfs:///')),
['hdfs:///f', 'hdfs:///d/f2'])
def test_ls_s3n(self):
# hadoop fs -lsr doesn't have user and group info when reading from s3
self.make_hdfs_file('f', 'foo')
self.make_hdfs_file('f3 win', 'foo' * 10)
self.assertItemsEqual(list(self.fs.ls('s3n://bucket/')),
['s3n://bucket/f', 's3n://bucket/f3 win'])
def test_single_space(self):
self.make_hdfs_file('foo bar')
self.assertItemsEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///foo bar'])
def test_double_space(self):
self.make_hdfs_file('foo bar')
self.assertItemsEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///foo bar'])
def test_cat_uncompressed(self):
# mockhadoop doesn't support compressed files, so we won't test for it.
# this is only a sanity check anyway.
self.make_hdfs_file('data/foo', 'foo\nfoo\n')
remote_path = self.fs.path_join('hdfs:///data', 'foo')
self.assertEqual(list(self.fs._cat_file(remote_path)),
['foo\n', 'foo\n'])
def test_write_str(self):
path = 'hdfs:///write-test-str'
content = 'some content!'
self.fs.write(path, content)
self.assertEqual("".join(self.fs.cat(path)), content)
def test_write_file(self):
path = 'hdfs:///write-test-fileobj'
content = StringIO('some content!')
#.........这里部分代码省略.........
示例6: HadoopFSTestCase
# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import _cat_file [as 别名]
class HadoopFSTestCase(MockSubprocessTestCase):
def setUp(self):
super(HadoopFSTestCase, self).setUp()
# wrap HadoopFilesystem so it gets cat()
self.fs = HadoopFilesystem(['hadoop'])
self.set_up_mock_hadoop()
self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)
def set_up_mock_hadoop(self):
# setup fake hadoop home
self.env = {}
self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')
self.makefile(
os.path.join(
'mock_hadoop_home',
'contrib',
'streaming',
'hadoop-0.X.Y-streaming.jar'),
'i are java bytecode',
)
self.env['MOCK_HDFS_ROOT'] = self.makedirs('mock_hdfs_root')
self.env['MOCK_HADOOP_OUTPUT'] = self.makedirs('mock_hadoop_output')
self.env['USER'] = 'mrjob_tests'
# don't set MOCK_HADOOP_LOG, we get command history other ways
def make_mock_file(self, name, contents='contents'):
return self.makefile(os.path.join('mock_hdfs_root', name), contents)
def test_ls_empty(self):
self.assertEqual(list(self.fs.ls('hdfs:///')), [])
def test_ls_basic(self):
self.make_mock_file('f')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])
def test_ls_basic_2(self):
self.make_mock_file('f')
self.make_mock_file('f2')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///f', 'hdfs:///f2'])
def test_ls_recurse(self):
self.make_mock_file('f')
self.make_mock_file('d/f2')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///d/f2', 'hdfs:///f'])
def test_ls_s3n(self):
# hadoop fs -lsr doesn't have user and group info when reading from s3
self.make_mock_file('f', 'foo')
self.make_mock_file('f3 win', 'foo' * 10)
self.assertEqual(sorted(self.fs.ls('s3n://bucket/')),
['s3n://bucket/f', 's3n://bucket/f3 win'])
def test_single_space(self):
self.make_mock_file('foo bar')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///foo bar'])
def test_double_space(self):
self.make_mock_file('foo bar')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///foo bar'])
def test_cat_uncompressed(self):
self.make_mock_file('data/foo', 'foo\nfoo\n')
remote_path = self.fs.path_join('hdfs:///data', 'foo')
self.assertEqual(list(self.fs._cat_file(remote_path)),
[b'foo\n', b'foo\n'])
def test_cat_bz2(self):
self.make_mock_file('data/foo.bz2', bz2.compress(b'foo\n' * 1000))
remote_path = self.fs.path_join('hdfs:///data', 'foo.bz2')
self.assertEqual(list(self.fs._cat_file(remote_path)),
[b'foo\n'] * 1000)
def test_cat_gz(self):
self.make_mock_file('data/foo.gz', gzip_compress(b'foo\n' * 10000))
remote_path = self.fs.path_join('hdfs:///data', 'foo.gz')
self.assertEqual(list(self.fs._cat_file(remote_path)),
[b'foo\n'] * 10000)
def test_du(self):
self.make_mock_file('data1', 'abcd')
self.make_mock_file('more/data2', 'defg')
self.make_mock_file('more/data3', 'hijk')
self.assertEqual(self.fs.du('hdfs:///'), 12)
self.assertEqual(self.fs.du('hdfs:///data1'), 4)
self.assertEqual(self.fs.du('hdfs:///more'), 8)
self.assertEqual(self.fs.du('hdfs:///more/*'), 8)
#.........这里部分代码省略.........