本文整理汇总了Python中mrjob.fs.hadoop.HadoopFilesystem.du方法的典型用法代码示例。如果您正苦于以下问题:Python HadoopFilesystem.du方法的具体用法?Python HadoopFilesystem.du怎么用?Python HadoopFilesystem.du使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mrjob.fs.hadoop.HadoopFilesystem
的用法示例。
在下文中一共展示了HadoopFilesystem.du方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: HadoopFSTestCase
# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import du [as 别名]
class HadoopFSTestCase(MockSubprocessTestCase):
def setUp(self):
super(HadoopFSTestCase, self).setUp()
# wrap HadoopFilesystem so it gets cat()
self.fs = HadoopFilesystem(["hadoop"])
self.set_up_mock_hadoop()
self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)
def set_up_mock_hadoop(self):
# setup fake hadoop home
self.env = {}
self.env["HADOOP_HOME"] = self.makedirs("mock_hadoop_home")
self.makefile(
os.path.join("mock_hadoop_home", "contrib", "streaming", "hadoop-0.X.Y-streaming.jar"),
"i are java bytecode",
)
self.env["MOCK_HDFS_ROOT"] = self.makedirs("mock_hdfs_root")
self.env["MOCK_HADOOP_OUTPUT"] = self.makedirs("mock_hadoop_output")
self.env["USER"] = "mrjob_tests"
# don't set MOCK_HADOOP_LOG, we get command history other ways
def make_mock_file(self, name, contents="contents"):
return self.makefile(os.path.join("mock_hdfs_root", name), contents)
def test_ls_empty(self):
self.assertEqual(list(self.fs.ls("hdfs:///")), [])
def test_ls_basic(self):
self.make_mock_file("f")
self.assertEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///f"])
def test_ls_basic_2(self):
self.make_mock_file("f")
self.make_mock_file("f2")
self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///f", "hdfs:///f2"])
def test_ls_recurse(self):
self.make_mock_file("f")
self.make_mock_file("d/f2")
self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///f", "hdfs:///d/f2"])
def test_ls_s3n(self):
# hadoop fs -lsr doesn't have user and group info when reading from s3
self.make_mock_file("f", "foo")
self.make_mock_file("f3 win", "foo" * 10)
self.assertItemsEqual(list(self.fs.ls("s3n://bucket/")), ["s3n://bucket/f", "s3n://bucket/f3 win"])
def test_single_space(self):
self.make_mock_file("foo bar")
self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///foo bar"])
def test_double_space(self):
self.make_mock_file("foo bar")
self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///foo bar"])
def test_cat_uncompressed(self):
self.make_mock_file("data/foo", "foo\nfoo\n")
remote_path = self.fs.path_join("hdfs:///data", "foo")
self.assertEqual(list(self.fs._cat_file(remote_path)), ["foo\n", "foo\n"])
def test_cat_bz2(self):
self.make_mock_file("data/foo.bz2", bz2.compress("foo\n" * 1000))
remote_path = self.fs.path_join("hdfs:///data", "foo.bz2")
self.assertEqual(list(self.fs._cat_file(remote_path)), ["foo\n"] * 1000)
def test_cat_gz(self):
self.make_mock_file("data/foo.gz", gzip_compress("foo\n" * 10000))
remote_path = self.fs.path_join("hdfs:///data", "foo.gz")
self.assertEqual(list(self.fs._cat_file(remote_path)), ["foo\n"] * 10000)
def test_du(self):
self.make_mock_file("data1", "abcd")
self.make_mock_file("more/data2", "defg")
self.make_mock_file("more/data3", "hijk")
self.assertEqual(self.fs.du("hdfs:///"), 12)
self.assertEqual(self.fs.du("hdfs:///data1"), 4)
self.assertEqual(self.fs.du("hdfs:///more"), 8)
self.assertEqual(self.fs.du("hdfs:///more/*"), 8)
self.assertEqual(self.fs.du("hdfs:///more/data2"), 4)
self.assertEqual(self.fs.du("hdfs:///more/data3"), 4)
def test_mkdir(self):
for hadoop_version in ["0.20.0", "0.23.0", "1.2.0", "2.0.0"]:
self.env["MOCK_HADOOP_VERSION"] = hadoop_version
self.fs.mkdir("hdfs:///d")
local_path = os.path.join(self.tmp_dir, "mock_hdfs_root", "d")
self.assertEqual(os.path.isdir(local_path), True)
def test_path_exists_no(self):
path = "hdfs:///f"
self.assertEqual(self.fs.path_exists(path), False)
#.........这里部分代码省略.........
示例2: HadoopFSTestCase
# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import du [as 别名]
class HadoopFSTestCase(MockSubprocessTestCase):
def setUp(self):
super(HadoopFSTestCase, self).setUp()
# wrap HadoopFilesystem so it gets cat()
self.fs = HadoopFilesystem(['hadoop'])
self.set_up_mock_hadoop()
self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)
def set_up_mock_hadoop(self):
# setup fake hadoop home
self.env = {}
self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')
self.makefile(
os.path.join(
'mock_hadoop_home',
'contrib',
'streaming',
'hadoop-0.X.Y-streaming.jar'),
'i are java bytecode',
)
self.env['MOCK_HDFS_ROOT'] = self.makedirs('mock_hdfs_root')
self.env['MOCK_HADOOP_OUTPUT'] = self.makedirs('mock_hadoop_output')
self.env['USER'] = 'mrjob_tests'
# don't set MOCK_HADOOP_LOG, we get command history other ways
def make_hdfs_file(self, name, contents):
return self.makefile(os.path.join('mock_hdfs_root', name), contents)
def test_ls_empty(self):
self.assertEqual(list(self.fs.ls('hdfs:///')), [])
def test_ls_basic(self):
self.make_hdfs_file('f', 'contents')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])
def test_ls_basic_2(self):
self.make_hdfs_file('f', 'contents')
self.make_hdfs_file('f2', 'contents')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f',
'hdfs:///f2'])
def test_ls_recurse(self):
self.make_hdfs_file('f', 'contents')
self.make_hdfs_file('d/f2', 'contents')
self.assertEqual(list(self.fs.ls('hdfs:///')),
['hdfs:///f', 'hdfs:///d/f2'])
def test_cat_uncompressed(self):
# mockhadoop doesn't support compressed files, so we won't test for it.
# this is only a sanity check anyway.
self.makefile(os.path.join('mock_hdfs_root', 'data', 'foo'), 'foo\nfoo\n')
remote_path = self.fs.path_join('hdfs:///data', 'foo')
self.assertEqual(list(self.fs._cat_file(remote_path)), ['foo\n', 'foo\n'])
def test_du(self):
self.makefile(os.path.join('mock_hdfs_root', 'data1'), 'abcd')
self.makedirs('mock_hdfs_root/more')
self.makefile(os.path.join('mock_hdfs_root', 'more', 'data2'), 'defg')
self.makefile(os.path.join('mock_hdfs_root', 'more', 'data3'), 'hijk')
self.assertEqual(self.fs.du('hdfs:///'), 12)
self.assertEqual(self.fs.du('hdfs:///data1'), 4)
self.assertEqual(self.fs.du('hdfs:///more'), 8)
self.assertEqual(self.fs.du('hdfs:///more/*'), 8)
self.assertEqual(self.fs.du('hdfs:///more/data2'), 4)
self.assertEqual(self.fs.du('hdfs:///more/data3'), 4)
def test_mkdir(self):
self.fs.mkdir('hdfs:///d')
local_path = os.path.join(self.tmp_dir, 'mock_hdfs_root', 'd')
self.assertEqual(os.path.isdir(local_path), True)
def test_rm(self):
local_path = self.make_hdfs_file('f', 'contents')
self.assertEqual(os.path.exists(local_path), True)
self.fs.rm('hdfs:///f')
self.assertEqual(os.path.exists(local_path), False)
def test_touchz(self):
# mockhadoop doesn't implement this.
pass
示例3: HadoopFSTestCase
# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import du [as 别名]
#.........这里部分代码省略.........
['foo\n', 'foo\n'])
def test_write_str(self):
path = 'hdfs:///write-test-str'
content = 'some content!'
self.fs.write(path, content)
self.assertEqual("".join(self.fs.cat(path)), content)
def test_write_file(self):
path = 'hdfs:///write-test-fileobj'
content = StringIO('some content!')
self.fs.write(path, content)
self.assertEqual("".join(self.fs.cat(path)), content.getvalue())
def test_write_overwrite(self):
self.make_hdfs_file('existing', 'this file already exists')
self.assertRaises(OSError, self.fs.write, 'hdfs:///existing',
'can not overwrite')
def test_copy_from_local(self):
content = 'file filler'
dst = 'hdfs:///hadoop-copy'
src = self.makefile('local-source', content)
self.fs.copy_from_local(dst, src)
self.assertEqual("".join(self.fs.cat(dst)), content)
def test_copy_from_local_override(self):
src = self.makefile('local-source', 'source')
self.make_hdfs_file('existing', 'this file already exists')
self.assertRaises(OSError, self.fs.copy_from_local,
'hdfs:///existing', src)
def test_du(self):
self.make_hdfs_file('data1', 'abcd')
self.make_hdfs_file('more/data2', 'defg')
self.make_hdfs_file('more/data3', 'hijk')
self.assertEqual(self.fs.du('hdfs:///'), 12)
self.assertEqual(self.fs.du('hdfs:///data1'), 4)
self.assertEqual(self.fs.du('hdfs:///more'), 8)
self.assertEqual(self.fs.du('hdfs:///more/*'), 8)
self.assertEqual(self.fs.du('hdfs:///more/data2'), 4)
self.assertEqual(self.fs.du('hdfs:///more/data3'), 4)
def test_mkdir(self):
self.fs.mkdir('hdfs:///d')
local_path = os.path.join(self.tmp_dir, 'mock_hdfs_root', 'd')
self.assertEqual(os.path.isdir(local_path), True)
def test_path_exists_no(self):
path = 'hdfs:///f'
self.assertEqual(self.fs.path_exists(path), False)
def test_path_exists_yes(self):
self.make_hdfs_file('f')
path = 'hdfs:///f'
self.assertEqual(self.fs.path_exists(path), True)
def test_rm(self):
local_path = self.make_hdfs_file('f')
self.assertEqual(os.path.exists(local_path), True)
self.fs.rm('hdfs:///f')
self.assertEqual(os.path.exists(local_path), False)
def test_rm_tree_noslash_files(self):
示例4: HadoopFSTestCase
# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import du [as 别名]
#.........这里部分代码省略.........
def test_ls_basic_2(self):
self.make_mock_file('f')
self.make_mock_file('f2')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///f', 'hdfs:///f2'])
def test_ls_recurse(self):
self.make_mock_file('f')
self.make_mock_file('d/f2')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///d/f2', 'hdfs:///f'])
def test_ls_s3n(self):
# hadoop fs -lsr doesn't have user and group info when reading from s3
self.make_mock_file('f', 'foo')
self.make_mock_file('f3 win', 'foo' * 10)
self.assertEqual(sorted(self.fs.ls('s3n://bucket/')),
['s3n://bucket/f', 's3n://bucket/f3 win'])
def test_ls_s3a(self):
# hadoop fs -lsr doesn't have user and group info when reading from s3
self.make_mock_file('f', 'foo')
self.make_mock_file('f3 win', 'foo' * 10)
self.assertEqual(sorted(self.fs.ls('s3a://bucket/')),
['s3a://bucket/f', 's3a://bucket/f3 win'])
def test_single_space(self):
self.make_mock_file('foo bar')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///foo bar'])
def test_double_space(self):
self.make_mock_file('foo bar')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///foo bar'])
def test_du(self):
self.make_mock_file('data1', 'abcd')
self.make_mock_file('more/data2', 'defg')
self.make_mock_file('more/data3', 'hijk')
self.assertEqual(self.fs.du('hdfs:///'), 12)
self.assertEqual(self.fs.du('hdfs:///data1'), 4)
self.assertEqual(self.fs.du('hdfs:///more'), 8)
self.assertEqual(self.fs.du('hdfs:///more/*'), 8)
self.assertEqual(self.fs.du('hdfs:///more/data2'), 4)
self.assertEqual(self.fs.du('hdfs:///more/data3'), 4)
def test_du_non_existent(self):
self.assertEqual(self.fs.du('hdfs:///does-not-exist'), 0)
def test_exists_no(self):
path = 'hdfs:///f'
self.assertEqual(self.fs.exists(path), False)
def test_exists_yes(self):
self.make_mock_file('f')
path = 'hdfs:///f'
self.assertEqual(self.fs.exists(path), True)
def test_mkdir(self):
self.fs.mkdir('hdfs:///d/ave')
path_in_mock_hdfs = os.path.join(
get_mock_hdfs_root(self.env), 'd', 'ave')
self.assertEqual(os.path.isdir(path_in_mock_hdfs), True)
def test_put(self):
local_path = self.makefile('foo', contents=b'bar')
dest = 'hdfs:///bar'
self.fs.put(local_path, dest)
self.assertEqual(b''.join(self.fs.cat(dest)), b'bar')
def test_no_put_to_dir(self):
local_path = self.makefile('foo', contents=b'bar')
self.assertRaises(ValueError, self.fs.put, local_path, 'hdfs:///')
def test_rm(self):
path_in_mock_hdfs = self.make_mock_file('f')
self.assertEqual(os.path.exists(path_in_mock_hdfs), True)
self.fs.rm('hdfs:///f')
self.assertEqual(os.path.exists(path_in_mock_hdfs), False)
def test_rm_recursive(self):
path_in_mock_hdfs = self.make_mock_file('foo/bar')
self.assertEqual(os.path.exists(path_in_mock_hdfs), True)
self.fs.rm('hdfs:///foo') # remove containing directory
self.assertEqual(os.path.exists(path_in_mock_hdfs), False)
def test_rm_nonexistent(self):
self.fs.rm('hdfs:///baz')
def test_touchz(self):
self.assertEqual(list(self.fs.ls('hdfs:///')), [])
self.fs.touchz('hdfs:///empty')
self.assertEqual(list(self.fs.ls('hdfs:///')),
['hdfs:///empty'])
示例5: HadoopFSTestCase
# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import du [as 别名]
class HadoopFSTestCase(MockSubprocessTestCase):
def setUp(self):
super(HadoopFSTestCase, self).setUp()
# wrap HadoopFilesystem so it gets cat()
self.fs = HadoopFilesystem(['hadoop'])
self.set_up_mock_hadoop()
self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)
def set_up_mock_hadoop(self):
# setup fake hadoop home
self.env = {}
self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')
self.makefile(
os.path.join(
'mock_hadoop_home',
'contrib',
'streaming',
'hadoop-0.X.Y-streaming.jar'),
'i are java bytecode',
)
self.env['MOCK_HDFS_ROOT'] = self.makedirs('mock_hdfs_root')
self.env['MOCK_HADOOP_OUTPUT'] = self.makedirs('mock_hadoop_output')
self.env['USER'] = 'mrjob_tests'
# don't set MOCK_HADOOP_LOG, we get command history other ways
def make_mock_file(self, name, contents='contents'):
return self.makefile(os.path.join('mock_hdfs_root', name), contents)
def test_ls_empty(self):
self.assertEqual(list(self.fs.ls('hdfs:///')), [])
def test_ls_basic(self):
self.make_mock_file('f')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])
def test_ls_basic_2(self):
self.make_mock_file('f')
self.make_mock_file('f2')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///f', 'hdfs:///f2'])
def test_ls_recurse(self):
self.make_mock_file('f')
self.make_mock_file('d/f2')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///d/f2', 'hdfs:///f'])
def test_ls_s3n(self):
# hadoop fs -lsr doesn't have user and group info when reading from s3
self.make_mock_file('f', 'foo')
self.make_mock_file('f3 win', 'foo' * 10)
self.assertEqual(sorted(self.fs.ls('s3n://bucket/')),
['s3n://bucket/f', 's3n://bucket/f3 win'])
def test_single_space(self):
self.make_mock_file('foo bar')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///foo bar'])
def test_double_space(self):
self.make_mock_file('foo bar')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///foo bar'])
def test_cat_uncompressed(self):
self.make_mock_file('data/foo', 'foo\nfoo\n')
remote_path = self.fs.path_join('hdfs:///data', 'foo')
self.assertEqual(list(self.fs._cat_file(remote_path)),
[b'foo\n', b'foo\n'])
def test_cat_bz2(self):
self.make_mock_file('data/foo.bz2', bz2.compress(b'foo\n' * 1000))
remote_path = self.fs.path_join('hdfs:///data', 'foo.bz2')
self.assertEqual(list(self.fs._cat_file(remote_path)),
[b'foo\n'] * 1000)
def test_cat_gz(self):
self.make_mock_file('data/foo.gz', gzip_compress(b'foo\n' * 10000))
remote_path = self.fs.path_join('hdfs:///data', 'foo.gz')
self.assertEqual(list(self.fs._cat_file(remote_path)),
[b'foo\n'] * 10000)
def test_du(self):
self.make_mock_file('data1', 'abcd')
self.make_mock_file('more/data2', 'defg')
self.make_mock_file('more/data3', 'hijk')
self.assertEqual(self.fs.du('hdfs:///'), 12)
self.assertEqual(self.fs.du('hdfs:///data1'), 4)
self.assertEqual(self.fs.du('hdfs:///more'), 8)
self.assertEqual(self.fs.du('hdfs:///more/*'), 8)
#.........这里部分代码省略.........