当前位置: 首页>>代码示例>>Python>>正文


Python HadoopFilesystem.du方法代码示例

本文整理汇总了Python中mrjob.fs.hadoop.HadoopFilesystem.du方法的典型用法代码示例。如果您正苦于以下问题:Python HadoopFilesystem.du方法的具体用法?Python HadoopFilesystem.du怎么用?Python HadoopFilesystem.du使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在mrjob.fs.hadoop.HadoopFilesystem的用法示例。


在下文中一共展示了HadoopFilesystem.du方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: HadoopFSTestCase

# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import du [as 别名]
class HadoopFSTestCase(MockSubprocessTestCase):
    def setUp(self):
        super(HadoopFSTestCase, self).setUp()
        # wrap HadoopFilesystem so it gets cat()
        self.fs = HadoopFilesystem(["hadoop"])
        self.set_up_mock_hadoop()
        self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)

    def set_up_mock_hadoop(self):
        # setup fake hadoop home
        self.env = {}
        self.env["HADOOP_HOME"] = self.makedirs("mock_hadoop_home")

        self.makefile(
            os.path.join("mock_hadoop_home", "contrib", "streaming", "hadoop-0.X.Y-streaming.jar"),
            "i are java bytecode",
        )

        self.env["MOCK_HDFS_ROOT"] = self.makedirs("mock_hdfs_root")
        self.env["MOCK_HADOOP_OUTPUT"] = self.makedirs("mock_hadoop_output")
        self.env["USER"] = "mrjob_tests"
        # don't set MOCK_HADOOP_LOG, we get command history other ways

    def make_mock_file(self, name, contents="contents"):
        return self.makefile(os.path.join("mock_hdfs_root", name), contents)

    def test_ls_empty(self):
        self.assertEqual(list(self.fs.ls("hdfs:///")), [])

    def test_ls_basic(self):
        self.make_mock_file("f")
        self.assertEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///f"])

    def test_ls_basic_2(self):
        self.make_mock_file("f")
        self.make_mock_file("f2")
        self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///f", "hdfs:///f2"])

    def test_ls_recurse(self):
        self.make_mock_file("f")
        self.make_mock_file("d/f2")
        self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///f", "hdfs:///d/f2"])

    def test_ls_s3n(self):
        # hadoop fs -lsr doesn't have user and group info when reading from s3
        self.make_mock_file("f", "foo")
        self.make_mock_file("f3 win", "foo" * 10)
        self.assertItemsEqual(list(self.fs.ls("s3n://bucket/")), ["s3n://bucket/f", "s3n://bucket/f3 win"])

    def test_single_space(self):
        self.make_mock_file("foo bar")
        self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///foo bar"])

    def test_double_space(self):
        self.make_mock_file("foo  bar")
        self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///foo  bar"])

    def test_cat_uncompressed(self):
        self.make_mock_file("data/foo", "foo\nfoo\n")

        remote_path = self.fs.path_join("hdfs:///data", "foo")

        self.assertEqual(list(self.fs._cat_file(remote_path)), ["foo\n", "foo\n"])

    def test_cat_bz2(self):
        self.make_mock_file("data/foo.bz2", bz2.compress("foo\n" * 1000))

        remote_path = self.fs.path_join("hdfs:///data", "foo.bz2")

        self.assertEqual(list(self.fs._cat_file(remote_path)), ["foo\n"] * 1000)

    def test_cat_gz(self):
        self.make_mock_file("data/foo.gz", gzip_compress("foo\n" * 10000))

        remote_path = self.fs.path_join("hdfs:///data", "foo.gz")

        self.assertEqual(list(self.fs._cat_file(remote_path)), ["foo\n"] * 10000)

    def test_du(self):
        self.make_mock_file("data1", "abcd")
        self.make_mock_file("more/data2", "defg")
        self.make_mock_file("more/data3", "hijk")

        self.assertEqual(self.fs.du("hdfs:///"), 12)
        self.assertEqual(self.fs.du("hdfs:///data1"), 4)
        self.assertEqual(self.fs.du("hdfs:///more"), 8)
        self.assertEqual(self.fs.du("hdfs:///more/*"), 8)
        self.assertEqual(self.fs.du("hdfs:///more/data2"), 4)
        self.assertEqual(self.fs.du("hdfs:///more/data3"), 4)

    def test_mkdir(self):
        for hadoop_version in ["0.20.0", "0.23.0", "1.2.0", "2.0.0"]:
            self.env["MOCK_HADOOP_VERSION"] = hadoop_version
            self.fs.mkdir("hdfs:///d")
            local_path = os.path.join(self.tmp_dir, "mock_hdfs_root", "d")
            self.assertEqual(os.path.isdir(local_path), True)

    def test_path_exists_no(self):
        path = "hdfs:///f"
        self.assertEqual(self.fs.path_exists(path), False)
#.........这里部分代码省略.........
开发者ID:DepengLuan,项目名称:mrjob,代码行数:103,代码来源:test_hadoop.py

示例2: HadoopFSTestCase

# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import du [as 别名]
class HadoopFSTestCase(MockSubprocessTestCase):

    def setUp(self):
        super(HadoopFSTestCase, self).setUp()
        # wrap HadoopFilesystem so it gets cat()
        self.fs = HadoopFilesystem(['hadoop'])
        self.set_up_mock_hadoop()
        self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)

    def set_up_mock_hadoop(self):
        # setup fake hadoop home
        self.env = {}
        self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')

        self.makefile(
            os.path.join(
                'mock_hadoop_home',
                'contrib',
                'streaming',
                'hadoop-0.X.Y-streaming.jar'),
            'i are java bytecode',
        )

        self.env['MOCK_HDFS_ROOT'] = self.makedirs('mock_hdfs_root')
        self.env['MOCK_HADOOP_OUTPUT'] = self.makedirs('mock_hadoop_output')
        self.env['USER'] = 'mrjob_tests'
        # don't set MOCK_HADOOP_LOG, we get command history other ways

    def make_hdfs_file(self, name, contents):
        return self.makefile(os.path.join('mock_hdfs_root', name), contents)

    def test_ls_empty(self):
        self.assertEqual(list(self.fs.ls('hdfs:///')), [])

    def test_ls_basic(self):
        self.make_hdfs_file('f', 'contents')
        self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])

    def test_ls_basic_2(self):
        self.make_hdfs_file('f', 'contents')
        self.make_hdfs_file('f2', 'contents')
        self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f',
                                                        'hdfs:///f2'])

    def test_ls_recurse(self):
        self.make_hdfs_file('f', 'contents')
        self.make_hdfs_file('d/f2', 'contents')
        self.assertEqual(list(self.fs.ls('hdfs:///')),
                         ['hdfs:///f', 'hdfs:///d/f2'])

    def test_cat_uncompressed(self):
        # mockhadoop doesn't support compressed files, so we won't test for it.
        # this is only a sanity check anyway.
        self.makefile(os.path.join('mock_hdfs_root', 'data', 'foo'), 'foo\nfoo\n')
        remote_path = self.fs.path_join('hdfs:///data', 'foo')

        self.assertEqual(list(self.fs._cat_file(remote_path)), ['foo\n', 'foo\n'])

    def test_du(self):
        self.makefile(os.path.join('mock_hdfs_root', 'data1'), 'abcd')
        self.makedirs('mock_hdfs_root/more')
        self.makefile(os.path.join('mock_hdfs_root', 'more', 'data2'), 'defg')
        self.makefile(os.path.join('mock_hdfs_root', 'more', 'data3'), 'hijk')

        self.assertEqual(self.fs.du('hdfs:///'), 12)
        self.assertEqual(self.fs.du('hdfs:///data1'), 4)
        self.assertEqual(self.fs.du('hdfs:///more'), 8)
        self.assertEqual(self.fs.du('hdfs:///more/*'), 8)
        self.assertEqual(self.fs.du('hdfs:///more/data2'), 4)
        self.assertEqual(self.fs.du('hdfs:///more/data3'), 4)

    def test_mkdir(self):
        self.fs.mkdir('hdfs:///d')
        local_path = os.path.join(self.tmp_dir, 'mock_hdfs_root', 'd')
        self.assertEqual(os.path.isdir(local_path), True)

    def test_rm(self):
        local_path = self.make_hdfs_file('f', 'contents')
        self.assertEqual(os.path.exists(local_path), True)
        self.fs.rm('hdfs:///f')
        self.assertEqual(os.path.exists(local_path), False)

    def test_touchz(self):
        # mockhadoop doesn't implement this.
        pass
开发者ID:adaptivelab,项目名称:mrjob,代码行数:87,代码来源:test_hadoop.py

示例3: HadoopFSTestCase

# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import du [as 别名]

#.........这里部分代码省略.........
                         ['foo\n', 'foo\n'])

    def test_write_str(self):
        path = 'hdfs:///write-test-str'
        content = 'some content!'
        self.fs.write(path, content)
        self.assertEqual("".join(self.fs.cat(path)), content)

    def test_write_file(self):
        path = 'hdfs:///write-test-fileobj'
        content = StringIO('some content!')
        self.fs.write(path, content)
        self.assertEqual("".join(self.fs.cat(path)), content.getvalue())

    def test_write_overwrite(self):
        self.make_hdfs_file('existing', 'this file already exists')
        self.assertRaises(OSError, self.fs.write, 'hdfs:///existing',
                          'can not overwrite')

    def test_copy_from_local(self):
        content = 'file filler'
        dst = 'hdfs:///hadoop-copy'
        src = self.makefile('local-source', content)

        self.fs.copy_from_local(dst, src)
        self.assertEqual("".join(self.fs.cat(dst)), content)

    def test_copy_from_local_override(self):
        src = self.makefile('local-source', 'source')
        self.make_hdfs_file('existing', 'this file already exists')
        self.assertRaises(OSError, self.fs.copy_from_local,
                          'hdfs:///existing', src)

    def test_du(self):
        self.make_hdfs_file('data1', 'abcd')
        self.make_hdfs_file('more/data2', 'defg')
        self.make_hdfs_file('more/data3', 'hijk')

        self.assertEqual(self.fs.du('hdfs:///'), 12)
        self.assertEqual(self.fs.du('hdfs:///data1'), 4)
        self.assertEqual(self.fs.du('hdfs:///more'), 8)
        self.assertEqual(self.fs.du('hdfs:///more/*'), 8)
        self.assertEqual(self.fs.du('hdfs:///more/data2'), 4)
        self.assertEqual(self.fs.du('hdfs:///more/data3'), 4)

    def test_mkdir(self):
        self.fs.mkdir('hdfs:///d')
        local_path = os.path.join(self.tmp_dir, 'mock_hdfs_root', 'd')
        self.assertEqual(os.path.isdir(local_path), True)

    def test_path_exists_no(self):
        path = 'hdfs:///f'
        self.assertEqual(self.fs.path_exists(path), False)

    def test_path_exists_yes(self):
        self.make_hdfs_file('f')
        path = 'hdfs:///f'
        self.assertEqual(self.fs.path_exists(path), True)

    def test_rm(self):
        local_path = self.make_hdfs_file('f')
        self.assertEqual(os.path.exists(local_path), True)
        self.fs.rm('hdfs:///f')
        self.assertEqual(os.path.exists(local_path), False)

    def test_rm_tree_noslash_files(self):
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:70,代码来源:test_hadoop.py

示例4: HadoopFSTestCase

# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import du [as 别名]

#.........这里部分代码省略.........
    def test_ls_basic_2(self):
        self.make_mock_file('f')
        self.make_mock_file('f2')
        self.assertEqual(sorted(self.fs.ls('hdfs:///')),
                         ['hdfs:///f', 'hdfs:///f2'])

    def test_ls_recurse(self):
        self.make_mock_file('f')
        self.make_mock_file('d/f2')
        self.assertEqual(sorted(self.fs.ls('hdfs:///')),
                         ['hdfs:///d/f2', 'hdfs:///f'])

    def test_ls_s3n(self):
        # hadoop fs -lsr doesn't have user and group info when reading from s3
        self.make_mock_file('f', 'foo')
        self.make_mock_file('f3 win', 'foo' * 10)
        self.assertEqual(sorted(self.fs.ls('s3n://bucket/')),
                         ['s3n://bucket/f', 's3n://bucket/f3 win'])

    def test_ls_s3a(self):
        # hadoop fs -lsr doesn't have user and group info when reading from s3
        self.make_mock_file('f', 'foo')
        self.make_mock_file('f3 win', 'foo' * 10)
        self.assertEqual(sorted(self.fs.ls('s3a://bucket/')),
                         ['s3a://bucket/f', 's3a://bucket/f3 win'])

    def test_single_space(self):
        self.make_mock_file('foo bar')
        self.assertEqual(sorted(self.fs.ls('hdfs:///')),
                         ['hdfs:///foo bar'])

    def test_double_space(self):
        self.make_mock_file('foo  bar')
        self.assertEqual(sorted(self.fs.ls('hdfs:///')),
                         ['hdfs:///foo  bar'])

    def test_du(self):
        self.make_mock_file('data1', 'abcd')
        self.make_mock_file('more/data2', 'defg')
        self.make_mock_file('more/data3', 'hijk')

        self.assertEqual(self.fs.du('hdfs:///'), 12)
        self.assertEqual(self.fs.du('hdfs:///data1'), 4)
        self.assertEqual(self.fs.du('hdfs:///more'), 8)
        self.assertEqual(self.fs.du('hdfs:///more/*'), 8)
        self.assertEqual(self.fs.du('hdfs:///more/data2'), 4)
        self.assertEqual(self.fs.du('hdfs:///more/data3'), 4)

    def test_du_non_existent(self):
        self.assertEqual(self.fs.du('hdfs:///does-not-exist'), 0)

    def test_exists_no(self):
        path = 'hdfs:///f'
        self.assertEqual(self.fs.exists(path), False)

    def test_exists_yes(self):
        self.make_mock_file('f')
        path = 'hdfs:///f'
        self.assertEqual(self.fs.exists(path), True)

    def test_mkdir(self):
        self.fs.mkdir('hdfs:///d/ave')
        path_in_mock_hdfs = os.path.join(
            get_mock_hdfs_root(self.env), 'd', 'ave')
        self.assertEqual(os.path.isdir(path_in_mock_hdfs), True)

    def test_put(self):
        local_path = self.makefile('foo', contents=b'bar')
        dest = 'hdfs:///bar'

        self.fs.put(local_path, dest)
        self.assertEqual(b''.join(self.fs.cat(dest)), b'bar')

    def test_no_put_to_dir(self):
        local_path = self.makefile('foo', contents=b'bar')

        self.assertRaises(ValueError, self.fs.put, local_path, 'hdfs:///')

    def test_rm(self):
        path_in_mock_hdfs = self.make_mock_file('f')
        self.assertEqual(os.path.exists(path_in_mock_hdfs), True)
        self.fs.rm('hdfs:///f')
        self.assertEqual(os.path.exists(path_in_mock_hdfs), False)

    def test_rm_recursive(self):
        path_in_mock_hdfs = self.make_mock_file('foo/bar')
        self.assertEqual(os.path.exists(path_in_mock_hdfs), True)
        self.fs.rm('hdfs:///foo')  # remove containing directory
        self.assertEqual(os.path.exists(path_in_mock_hdfs), False)

    def test_rm_nonexistent(self):
        self.fs.rm('hdfs:///baz')

    def test_touchz(self):
        self.assertEqual(list(self.fs.ls('hdfs:///')), [])

        self.fs.touchz('hdfs:///empty')

        self.assertEqual(list(self.fs.ls('hdfs:///')),
                         ['hdfs:///empty'])
开发者ID:Yelp,项目名称:mrjob,代码行数:104,代码来源:test_hadoop.py

示例5: HadoopFSTestCase

# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import du [as 别名]
class HadoopFSTestCase(MockSubprocessTestCase):

    def setUp(self):
        super(HadoopFSTestCase, self).setUp()
        # wrap HadoopFilesystem so it gets cat()
        self.fs = HadoopFilesystem(['hadoop'])
        self.set_up_mock_hadoop()
        self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)

    def set_up_mock_hadoop(self):
        # setup fake hadoop home
        self.env = {}
        self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')

        self.makefile(
            os.path.join(
                'mock_hadoop_home',
                'contrib',
                'streaming',
                'hadoop-0.X.Y-streaming.jar'),
            'i are java bytecode',
        )

        self.env['MOCK_HDFS_ROOT'] = self.makedirs('mock_hdfs_root')
        self.env['MOCK_HADOOP_OUTPUT'] = self.makedirs('mock_hadoop_output')
        self.env['USER'] = 'mrjob_tests'
        # don't set MOCK_HADOOP_LOG, we get command history other ways

    def make_mock_file(self, name, contents='contents'):
        return self.makefile(os.path.join('mock_hdfs_root', name), contents)

    def test_ls_empty(self):
        self.assertEqual(list(self.fs.ls('hdfs:///')), [])

    def test_ls_basic(self):
        self.make_mock_file('f')
        self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])

    def test_ls_basic_2(self):
        self.make_mock_file('f')
        self.make_mock_file('f2')
        self.assertEqual(sorted(self.fs.ls('hdfs:///')),
                         ['hdfs:///f', 'hdfs:///f2'])

    def test_ls_recurse(self):
        self.make_mock_file('f')
        self.make_mock_file('d/f2')
        self.assertEqual(sorted(self.fs.ls('hdfs:///')),
                         ['hdfs:///d/f2', 'hdfs:///f'])

    def test_ls_s3n(self):
        # hadoop fs -lsr doesn't have user and group info when reading from s3
        self.make_mock_file('f', 'foo')
        self.make_mock_file('f3 win', 'foo' * 10)
        self.assertEqual(sorted(self.fs.ls('s3n://bucket/')),
                         ['s3n://bucket/f', 's3n://bucket/f3 win'])

    def test_single_space(self):
        self.make_mock_file('foo bar')
        self.assertEqual(sorted(self.fs.ls('hdfs:///')),
                         ['hdfs:///foo bar'])

    def test_double_space(self):
        self.make_mock_file('foo  bar')
        self.assertEqual(sorted(self.fs.ls('hdfs:///')),
                         ['hdfs:///foo  bar'])

    def test_cat_uncompressed(self):
        self.make_mock_file('data/foo', 'foo\nfoo\n')

        remote_path = self.fs.path_join('hdfs:///data', 'foo')

        self.assertEqual(list(self.fs._cat_file(remote_path)),
                         [b'foo\n', b'foo\n'])

    def test_cat_bz2(self):
        self.make_mock_file('data/foo.bz2', bz2.compress(b'foo\n' * 1000))

        remote_path = self.fs.path_join('hdfs:///data', 'foo.bz2')

        self.assertEqual(list(self.fs._cat_file(remote_path)),
                         [b'foo\n'] * 1000)

    def test_cat_gz(self):
        self.make_mock_file('data/foo.gz', gzip_compress(b'foo\n' * 10000))

        remote_path = self.fs.path_join('hdfs:///data', 'foo.gz')

        self.assertEqual(list(self.fs._cat_file(remote_path)),
                         [b'foo\n'] * 10000)

    def test_du(self):
        self.make_mock_file('data1', 'abcd')
        self.make_mock_file('more/data2', 'defg')
        self.make_mock_file('more/data3', 'hijk')

        self.assertEqual(self.fs.du('hdfs:///'), 12)
        self.assertEqual(self.fs.du('hdfs:///data1'), 4)
        self.assertEqual(self.fs.du('hdfs:///more'), 8)
        self.assertEqual(self.fs.du('hdfs:///more/*'), 8)
#.........这里部分代码省略.........
开发者ID:DanisHack,项目名称:mrjob,代码行数:103,代码来源:test_hadoop.py


注:本文中的mrjob.fs.hadoop.HadoopFilesystem.du方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。