当前位置: 首页>>代码示例>>Python>>正文


Python hadoop.HadoopFilesystem类代码示例

本文整理汇总了Python中mrjob.fs.hadoop.HadoopFilesystem的典型用法代码示例。如果您正苦于以下问题:Python HadoopFilesystem类的具体用法?Python HadoopFilesystem怎么用?Python HadoopFilesystem使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了HadoopFilesystem类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_logging_stderr_in_cleanup

    def test_logging_stderr_in_cleanup(self):

        def mock_Popen(*args, **kwargs):
            mock_proc = MagicMock()

            mock_proc.stdout = MagicMock()
            mock_proc.stdout.__iter__.return_value = [
                b'line1\n', b'line2\n']

            mock_proc.stderr = MagicMock()
            mock_proc.stderr.__iter__.return_value = [
                b'Emergency, everybody to get from street\n']

            mock_proc.wait.return_value = 0

            return mock_proc

        self.start(patch('mrjob.fs.hadoop.Popen', mock_Popen))

        mock_log = self.start(patch('mrjob.fs.hadoop.log'))

        fs = HadoopFilesystem()

        data = b''.join(fs._cat_file('/some/path'))
        self.assertEqual(data, b'line1\nline2\n')

        mock_log.error.assert_called_once_with(
            'STDERR: Emergency, everybody to get from street')
开发者ID:Yelp,项目名称:mrjob,代码行数:28,代码来源:test_hadoop.py

示例2: test_deprecated_hadoop_home_option

    def test_deprecated_hadoop_home_option(self):
        hadoop_home = join(self.tmp_dir, 'hadoop_home_option')
        hadoop_bin = self.makefile(join(hadoop_home, 'bin', 'hadoop'),
                                   executable=True)

        # deprecation warning is in HadoopJobRunner
        self.fs = HadoopFilesystem(hadoop_home=hadoop_home)

        with no_handlers_for_logger('mrjob.fs.hadoop'):
            self.assertEqual(self.fs.get_hadoop_bin(), [hadoop_bin])
开发者ID:Dean838,项目名称:mrjob,代码行数:10,代码来源:test_hadoop.py

示例3: setUp

    def setUp(self):
        super(FindHadoopBinTestCase, self).setUp()

        # track calls to which()
        self.which = self.start(patch('mrjob.fs.hadoop.which', wraps=which))

        # keep which() from searching in /bin, etc.
        os.environ['PATH'] = self.tmp_dir

        # create basic HadoopFilesystem (okay to overwrite)
        self.fs = HadoopFilesystem()
开发者ID:Yelp,项目名称:mrjob,代码行数:11,代码来源:test_hadoop.py

示例4: setUp

 def setUp(self):
     super(HadoopFSTestCase, self).setUp()
     # wrap HadoopFilesystem so it gets cat()
     self.fs = HadoopFilesystem(["hadoop"])
     self.set_up_mock_hadoop()
     self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)
开发者ID:DepengLuan,项目名称:mrjob,代码行数:6,代码来源:test_hadoop.py

示例5: HadoopFSTestCase

class HadoopFSTestCase(MockSubprocessTestCase):
    def setUp(self):
        super(HadoopFSTestCase, self).setUp()
        # wrap HadoopFilesystem so it gets cat()
        self.fs = HadoopFilesystem(["hadoop"])
        self.set_up_mock_hadoop()
        self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)

    def set_up_mock_hadoop(self):
        # setup fake hadoop home
        self.env = {}
        self.env["HADOOP_HOME"] = self.makedirs("mock_hadoop_home")

        self.makefile(
            os.path.join("mock_hadoop_home", "contrib", "streaming", "hadoop-0.X.Y-streaming.jar"),
            "i are java bytecode",
        )

        self.env["MOCK_HDFS_ROOT"] = self.makedirs("mock_hdfs_root")
        self.env["MOCK_HADOOP_OUTPUT"] = self.makedirs("mock_hadoop_output")
        self.env["USER"] = "mrjob_tests"
        # don't set MOCK_HADOOP_LOG, we get command history other ways

    def make_mock_file(self, name, contents="contents"):
        return self.makefile(os.path.join("mock_hdfs_root", name), contents)

    def test_ls_empty(self):
        self.assertEqual(list(self.fs.ls("hdfs:///")), [])

    def test_ls_basic(self):
        self.make_mock_file("f")
        self.assertEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///f"])

    def test_ls_basic_2(self):
        self.make_mock_file("f")
        self.make_mock_file("f2")
        self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///f", "hdfs:///f2"])

    def test_ls_recurse(self):
        self.make_mock_file("f")
        self.make_mock_file("d/f2")
        self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///f", "hdfs:///d/f2"])

    def test_ls_s3n(self):
        # hadoop fs -lsr doesn't have user and group info when reading from s3
        self.make_mock_file("f", "foo")
        self.make_mock_file("f3 win", "foo" * 10)
        self.assertItemsEqual(list(self.fs.ls("s3n://bucket/")), ["s3n://bucket/f", "s3n://bucket/f3 win"])

    def test_single_space(self):
        self.make_mock_file("foo bar")
        self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///foo bar"])

    def test_double_space(self):
        self.make_mock_file("foo  bar")
        self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///foo  bar"])

    def test_cat_uncompressed(self):
        self.make_mock_file("data/foo", "foo\nfoo\n")

        remote_path = self.fs.path_join("hdfs:///data", "foo")

        self.assertEqual(list(self.fs._cat_file(remote_path)), ["foo\n", "foo\n"])

    def test_cat_bz2(self):
        self.make_mock_file("data/foo.bz2", bz2.compress("foo\n" * 1000))

        remote_path = self.fs.path_join("hdfs:///data", "foo.bz2")

        self.assertEqual(list(self.fs._cat_file(remote_path)), ["foo\n"] * 1000)

    def test_cat_gz(self):
        self.make_mock_file("data/foo.gz", gzip_compress("foo\n" * 10000))

        remote_path = self.fs.path_join("hdfs:///data", "foo.gz")

        self.assertEqual(list(self.fs._cat_file(remote_path)), ["foo\n"] * 10000)

    def test_du(self):
        self.make_mock_file("data1", "abcd")
        self.make_mock_file("more/data2", "defg")
        self.make_mock_file("more/data3", "hijk")

        self.assertEqual(self.fs.du("hdfs:///"), 12)
        self.assertEqual(self.fs.du("hdfs:///data1"), 4)
        self.assertEqual(self.fs.du("hdfs:///more"), 8)
        self.assertEqual(self.fs.du("hdfs:///more/*"), 8)
        self.assertEqual(self.fs.du("hdfs:///more/data2"), 4)
        self.assertEqual(self.fs.du("hdfs:///more/data3"), 4)

    def test_mkdir(self):
        for hadoop_version in ["0.20.0", "0.23.0", "1.2.0", "2.0.0"]:
            self.env["MOCK_HADOOP_VERSION"] = hadoop_version
            self.fs.mkdir("hdfs:///d")
            local_path = os.path.join(self.tmp_dir, "mock_hdfs_root", "d")
            self.assertEqual(os.path.isdir(local_path), True)

    def test_path_exists_no(self):
        path = "hdfs:///f"
        self.assertEqual(self.fs.path_exists(path), False)
#.........这里部分代码省略.........
开发者ID:DepengLuan,项目名称:mrjob,代码行数:101,代码来源:test_hadoop.py

示例6: HadoopFSTestCase

class HadoopFSTestCase(MockSubprocessTestCase):

    def setUp(self):
        super(HadoopFSTestCase, self).setUp()
        # wrap HadoopFilesystem so it gets cat()
        self.fs = HadoopFilesystem(['hadoop'])
        self.set_up_mock_hadoop()
        self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)

    def set_up_mock_hadoop(self):
        # setup fake hadoop home
        self.env = {}
        self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')

        self.makefile(
            os.path.join(
                'mock_hadoop_home',
                'contrib',
                'streaming',
                'hadoop-0.X.Y-streaming.jar'),
            'i are java bytecode',
        )

        self.env['MOCK_HDFS_ROOT'] = self.makedirs('mock_hdfs_root')
        self.env['MOCK_HADOOP_OUTPUT'] = self.makedirs('mock_hadoop_output')
        self.env['USER'] = 'mrjob_tests'
        # don't set MOCK_HADOOP_LOG, we get command history other ways

    def make_hdfs_file(self, name, contents):
        return self.makefile(os.path.join('mock_hdfs_root', name), contents)

    def test_ls_empty(self):
        self.assertEqual(list(self.fs.ls('hdfs:///')), [])

    def test_ls_basic(self):
        self.make_hdfs_file('f', 'contents')
        self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])

    def test_ls_basic_2(self):
        self.make_hdfs_file('f', 'contents')
        self.make_hdfs_file('f2', 'contents')
        self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f',
                                                        'hdfs:///f2'])

    def test_ls_recurse(self):
        self.make_hdfs_file('f', 'contents')
        self.make_hdfs_file('d/f2', 'contents')
        self.assertEqual(list(self.fs.ls('hdfs:///')),
                         ['hdfs:///f', 'hdfs:///d/f2'])

    def test_cat_uncompressed(self):
        # mockhadoop doesn't support compressed files, so we won't test for it.
        # this is only a sanity check anyway.
        self.makefile(os.path.join('mock_hdfs_root', 'data', 'foo'), 'foo\nfoo\n')
        remote_path = self.fs.path_join('hdfs:///data', 'foo')

        self.assertEqual(list(self.fs._cat_file(remote_path)), ['foo\n', 'foo\n'])

    def test_du(self):
        self.makefile(os.path.join('mock_hdfs_root', 'data1'), 'abcd')
        self.makedirs('mock_hdfs_root/more')
        self.makefile(os.path.join('mock_hdfs_root', 'more', 'data2'), 'defg')
        self.makefile(os.path.join('mock_hdfs_root', 'more', 'data3'), 'hijk')

        self.assertEqual(self.fs.du('hdfs:///'), 12)
        self.assertEqual(self.fs.du('hdfs:///data1'), 4)
        self.assertEqual(self.fs.du('hdfs:///more'), 8)
        self.assertEqual(self.fs.du('hdfs:///more/*'), 8)
        self.assertEqual(self.fs.du('hdfs:///more/data2'), 4)
        self.assertEqual(self.fs.du('hdfs:///more/data3'), 4)

    def test_mkdir(self):
        self.fs.mkdir('hdfs:///d')
        local_path = os.path.join(self.tmp_dir, 'mock_hdfs_root', 'd')
        self.assertEqual(os.path.isdir(local_path), True)

    def test_rm(self):
        local_path = self.make_hdfs_file('f', 'contents')
        self.assertEqual(os.path.exists(local_path), True)
        self.fs.rm('hdfs:///f')
        self.assertEqual(os.path.exists(local_path), False)

    def test_touchz(self):
        # mockhadoop doesn't implement this.
        pass
开发者ID:adaptivelab,项目名称:mrjob,代码行数:85,代码来源:test_hadoop.py

示例7: HadoopFSTestCase

class HadoopFSTestCase(MockSubprocessTestCase):

    def setUp(self):
        super(HadoopFSTestCase, self).setUp()
        # wrap HadoopFilesystem so it gets cat()
        self.fs = HadoopFilesystem(['hadoop'])
        self.set_up_mock_hadoop()
        self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)

    def set_up_mock_hadoop(self):
        # setup fake hadoop home
        self.env = {}
        self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')

        self.makefile(
            os.path.join(
                'mock_hadoop_home',
                'contrib',
                'streaming',
                'hadoop-0.X.Y-streaming.jar'),
            'i are java bytecode',
        )

        self.env['MOCK_HADOOP_TMP'] = self.makedirs('mock_hadoop')
        self.env['MOCK_HADOOP_VERSION'] = '2.7.1'

        self.env['USER'] = 'mrjob_tests'

    def make_mock_file(self, name, contents='contents'):
        return self.makefile(
            os.path.join(get_mock_hdfs_root(self.env), name), contents)

    def test_cat_uncompressed(self):
        self.make_mock_file('data/foo', 'foo\nfoo\n')

        remote_path = self.fs.join('hdfs:///data', 'foo')

        self.assertEqual(
            b''.join(self.fs._cat_file(remote_path)),
            b'foo\nfoo\n')

    def test_cat_bz2(self):
        self.make_mock_file('data/foo.bz2', bz2.compress(b'foo\n' * 1000))

        remote_path = self.fs.join('hdfs:///data', 'foo.bz2')

        self.assertEqual(
            b''.join(self.fs._cat_file(remote_path)),
            b'foo\n' * 1000)

    def test_cat_gz(self):
        self.make_mock_file('data/foo.gz', gzip_compress(b'foo\n' * 10000))

        remote_path = self.fs.join('hdfs:///data', 'foo.gz')

        self.assertEqual(
            b''.join(self.fs._cat_file(remote_path)),
            b'foo\n' * 10000)

    def test_ls_empty(self):
        self.assertEqual(list(self.fs.ls('hdfs:///')), [])

    def test_ls_basic(self):
        self.make_mock_file('f')
        self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])

    def test_ls_basic_2(self):
        self.make_mock_file('f')
        self.make_mock_file('f2')
        self.assertEqual(sorted(self.fs.ls('hdfs:///')),
                         ['hdfs:///f', 'hdfs:///f2'])

    def test_ls_recurse(self):
        self.make_mock_file('f')
        self.make_mock_file('d/f2')
        self.assertEqual(sorted(self.fs.ls('hdfs:///')),
                         ['hdfs:///d/f2', 'hdfs:///f'])

    def test_ls_s3n(self):
        # hadoop fs -lsr doesn't have user and group info when reading from s3
        self.make_mock_file('f', 'foo')
        self.make_mock_file('f3 win', 'foo' * 10)
        self.assertEqual(sorted(self.fs.ls('s3n://bucket/')),
                         ['s3n://bucket/f', 's3n://bucket/f3 win'])

    def test_ls_s3a(self):
        # hadoop fs -lsr doesn't have user and group info when reading from s3
        self.make_mock_file('f', 'foo')
        self.make_mock_file('f3 win', 'foo' * 10)
        self.assertEqual(sorted(self.fs.ls('s3a://bucket/')),
                         ['s3a://bucket/f', 's3a://bucket/f3 win'])

    def test_single_space(self):
        self.make_mock_file('foo bar')
        self.assertEqual(sorted(self.fs.ls('hdfs:///')),
                         ['hdfs:///foo bar'])

    def test_double_space(self):
        self.make_mock_file('foo  bar')
        self.assertEqual(sorted(self.fs.ls('hdfs:///')),
#.........这里部分代码省略.........
开发者ID:Yelp,项目名称:mrjob,代码行数:101,代码来源:test_hadoop.py

示例8: test_predefined_hadoop_bin

    def test_predefined_hadoop_bin(self):
        self.fs = HadoopFilesystem(hadoop_bin=['hadoop', '-v'])

        self.assertEqual(self.fs.get_hadoop_bin(), ['hadoop', '-v'])

        self.assertFalse(self.which.called)
开发者ID:Yelp,项目名称:mrjob,代码行数:6,代码来源:test_hadoop.py

示例9: FindHadoopBinTestCase

class FindHadoopBinTestCase(SandboxedTestCase):

    def setUp(self):
        super(FindHadoopBinTestCase, self).setUp()

        # track calls to which()
        self.which = self.start(patch('mrjob.fs.hadoop.which', wraps=which))

        # keep which() from searching in /bin, etc.
        os.environ['PATH'] = self.tmp_dir

        # create basic HadoopFilesystem (okay to overwrite)
        self.fs = HadoopFilesystem()

    def _add_hadoop_bin_for_envvar(self, envvar, *dirnames):
        """Add a fake "Hadoop" binary to its own subdirectory of
        ``self.tmp_dir``, and set *os.environ[envvar]* to point at it. You can
        use *dirnames* to put the binary in a subdirectory of
        *os.environ[envvar]* (e.g. ``'bin'``).

        return the path to the fake Hadoop binary.
        """
        os.environ[envvar] = join(self.tmp_dir, envvar.lower())

        hadoop_bin_path = join(join(os.environ[envvar], *dirnames), 'hadoop')

        self.makefile(hadoop_bin_path, executable=True)

        return hadoop_bin_path

    # tests without environment variables

    def test_do_nothing_on_init(self):
        self.assertFalse(self.which.called)

    def test_fallback(self):
        self.assertFalse(self.which.called)

        self.assertEqual(self.fs.get_hadoop_bin(), ['hadoop'])

        self.which.assert_called_once_with('hadoop', path=None)

    def test_predefined_hadoop_bin(self):
        self.fs = HadoopFilesystem(hadoop_bin=['hadoop', '-v'])

        self.assertEqual(self.fs.get_hadoop_bin(), ['hadoop', '-v'])

        self.assertFalse(self.which.called)

    # environment variable tests

    def _test_environment_variable(self, envvar, *dirnames):
        """Check if we can find the hadoop binary from *envvar*"""
        # okay to add after HadoopFilesystem() created; it hasn't looked yet
        hadoop_bin = self._add_hadoop_bin_for_envvar(envvar, *dirnames)

        self.assertEqual(self.fs.get_hadoop_bin(), [hadoop_bin])

    def test_hadoop_prefix(self):
        self._test_environment_variable('HADOOP_PREFIX', 'bin')

    def test_hadoop_home_envvar(self):
        self._test_environment_variable('HADOOP_HOME', 'bin')

    def test_hadoop_install(self):
        self._test_environment_variable('HADOOP_INSTALL', 'bin')

    def test_hadoop_install_hadoop_subdir(self):
        self._test_environment_variable('HADOOP_INSTALL', 'hadoop', 'bin')

    def test_path(self):
        self._test_environment_variable('PATH')

    def test_two_part_path(self):
        hadoop_path1 = join(self.tmp_dir, 'path1')
        hadoop_path1_bin = self.makefile(join(hadoop_path1, 'hadoop'),
                                         executable=True)
        hadoop_path2 = join(self.tmp_dir, 'path2')
        hadoop_path2_bin = self.makefile(join(hadoop_path2, 'hadoop'),
                                         executable=True)

        os.environ['PATH'] = ':'.join([hadoop_path1, hadoop_path2])

        self.assertEqual(self.fs.get_hadoop_bin(), [hadoop_path1_bin])
        self.assertNotEqual(self.fs.get_hadoop_bin(), [hadoop_path2_bin])

    def test_hadoop_mapred_home(self):
        self._test_environment_variable('HADOOP_MAPRED_HOME', 'bin')

    def test_hadoop_anything_home(self):
        self._test_environment_variable('HADOOP_ANYTHING_HOME', 'bin')

    def test_other_environment_variable(self):
        self._add_hadoop_bin_for_envvar('HADOOP_YARN_MRJOB_DIR', 'bin')

        self.assertEqual(self.fs.get_hadoop_bin(), ['hadoop'])

    # precedence tests

    def test_hadoop_prefix_beats_hadoop_home_envvar(self):
#.........这里部分代码省略.........
开发者ID:Yelp,项目名称:mrjob,代码行数:101,代码来源:test_hadoop.py

示例10: HadoopFSTestCase

class HadoopFSTestCase(MockSubprocessTestCase):

    def setUp(self):
        super(HadoopFSTestCase, self).setUp()
        # wrap HadoopFilesystem so it gets cat()
        self.fs = HadoopFilesystem(['hadoop'])
        self.set_up_mock_hadoop()
        self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)

    def set_up_mock_hadoop(self):
        # setup fake hadoop home
        self.env = {}
        self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')

        self.makefile(
            os.path.join(
                'mock_hadoop_home',
                'contrib',
                'streaming',
                'hadoop-0.X.Y-streaming.jar'),
            'i are java bytecode',
        )

        self.env['MOCK_HDFS_ROOT'] = self.makedirs('mock_hdfs_root')
        self.env['MOCK_HADOOP_OUTPUT'] = self.makedirs('mock_hadoop_output')
        self.env['USER'] = 'mrjob_tests'
        # don't set MOCK_HADOOP_LOG, we get command history other ways

    def make_hdfs_file(self, name, contents='contents'):
        return self.makefile(os.path.join('mock_hdfs_root', name), contents)

    def make_hdfs_dir(self, name):
        return self.makedirs(os.path.join('mock_hdfs_root', name))

    def make_hdfs_tree(self, path, files=None):
        if files is None:
            files = ('f', 'g/a/b', 'g/a/a/b')
        test_files = []
        for f in sorted(files):
            f = os.path.join(path, f)
            self.make_hdfs_file(f, f)
            test_files.append("hdfs:///" + f)
        self.assertEqual(
            sorted(self.fs.ls("hdfs:///" + path.rstrip('/') + '/*')),
            test_files
        )
        return path

    def test_ls_empty(self):
        self.assertEqual(list(self.fs.ls('hdfs:///')), [])

    def test_ls_basic(self):
        self.make_hdfs_file('f')
        self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])

    def test_ls_basic_2(self):
        self.make_hdfs_file('f')
        self.make_hdfs_file('f2')
        self.assertItemsEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f',
                                                        'hdfs:///f2'])
    def test_ls_recurse(self):
        self.make_hdfs_file('f')
        self.make_hdfs_file('d/f2')
        self.assertItemsEqual(list(self.fs.ls('hdfs:///')),
                         ['hdfs:///f', 'hdfs:///d/f2'])

    def test_ls_s3n(self):
        # hadoop fs -lsr doesn't have user and group info when reading from s3
        self.make_hdfs_file('f', 'foo')
        self.make_hdfs_file('f3 win', 'foo' * 10)
        self.assertItemsEqual(list(self.fs.ls('s3n://bucket/')),
                         ['s3n://bucket/f', 's3n://bucket/f3 win'])

    def test_single_space(self):
        self.make_hdfs_file('foo bar')
        self.assertItemsEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///foo bar'])

    def test_double_space(self):
        self.make_hdfs_file('foo  bar')
        self.assertItemsEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///foo  bar'])

    def test_cat_uncompressed(self):
        # mockhadoop doesn't support compressed files, so we won't test for it.
        # this is only a sanity check anyway.
        self.make_hdfs_file('data/foo', 'foo\nfoo\n')

        remote_path = self.fs.path_join('hdfs:///data', 'foo')

        self.assertEqual(list(self.fs._cat_file(remote_path)),
                         ['foo\n', 'foo\n'])

    def test_write_str(self):
        path = 'hdfs:///write-test-str'
        content = 'some content!'
        self.fs.write(path, content)
        self.assertEqual("".join(self.fs.cat(path)), content)

    def test_write_file(self):
        path = 'hdfs:///write-test-fileobj'
        content = StringIO('some content!')
#.........这里部分代码省略.........
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:101,代码来源:test_hadoop.py

示例11: HadoopFSTestCase

class HadoopFSTestCase(MockSubprocessTestCase):

    def setUp(self):
        super(HadoopFSTestCase, self).setUp()
        # wrap HadoopFilesystem so it gets cat()
        self.fs = HadoopFilesystem(['hadoop'])
        self.set_up_mock_hadoop()
        self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)

    def set_up_mock_hadoop(self):
        # setup fake hadoop home
        self.env = {}
        self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')

        self.makefile(
            os.path.join(
                'mock_hadoop_home',
                'contrib',
                'streaming',
                'hadoop-0.X.Y-streaming.jar'),
            'i are java bytecode',
        )

        self.env['MOCK_HDFS_ROOT'] = self.makedirs('mock_hdfs_root')
        self.env['MOCK_HADOOP_OUTPUT'] = self.makedirs('mock_hadoop_output')
        self.env['USER'] = 'mrjob_tests'
        # don't set MOCK_HADOOP_LOG, we get command history other ways

    def make_mock_file(self, name, contents='contents'):
        return self.makefile(os.path.join('mock_hdfs_root', name), contents)

    def test_ls_empty(self):
        self.assertEqual(list(self.fs.ls('hdfs:///')), [])

    def test_ls_basic(self):
        self.make_mock_file('f')
        self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])

    def test_ls_basic_2(self):
        self.make_mock_file('f')
        self.make_mock_file('f2')
        self.assertEqual(sorted(self.fs.ls('hdfs:///')),
                         ['hdfs:///f', 'hdfs:///f2'])

    def test_ls_recurse(self):
        self.make_mock_file('f')
        self.make_mock_file('d/f2')
        self.assertEqual(sorted(self.fs.ls('hdfs:///')),
                         ['hdfs:///d/f2', 'hdfs:///f'])

    def test_ls_s3n(self):
        # hadoop fs -lsr doesn't have user and group info when reading from s3
        self.make_mock_file('f', 'foo')
        self.make_mock_file('f3 win', 'foo' * 10)
        self.assertEqual(sorted(self.fs.ls('s3n://bucket/')),
                         ['s3n://bucket/f', 's3n://bucket/f3 win'])

    def test_single_space(self):
        self.make_mock_file('foo bar')
        self.assertEqual(sorted(self.fs.ls('hdfs:///')),
                         ['hdfs:///foo bar'])

    def test_double_space(self):
        self.make_mock_file('foo  bar')
        self.assertEqual(sorted(self.fs.ls('hdfs:///')),
                         ['hdfs:///foo  bar'])

    def test_cat_uncompressed(self):
        self.make_mock_file('data/foo', 'foo\nfoo\n')

        remote_path = self.fs.path_join('hdfs:///data', 'foo')

        self.assertEqual(list(self.fs._cat_file(remote_path)),
                         [b'foo\n', b'foo\n'])

    def test_cat_bz2(self):
        self.make_mock_file('data/foo.bz2', bz2.compress(b'foo\n' * 1000))

        remote_path = self.fs.path_join('hdfs:///data', 'foo.bz2')

        self.assertEqual(list(self.fs._cat_file(remote_path)),
                         [b'foo\n'] * 1000)

    def test_cat_gz(self):
        self.make_mock_file('data/foo.gz', gzip_compress(b'foo\n' * 10000))

        remote_path = self.fs.path_join('hdfs:///data', 'foo.gz')

        self.assertEqual(list(self.fs._cat_file(remote_path)),
                         [b'foo\n'] * 10000)

    def test_du(self):
        self.make_mock_file('data1', 'abcd')
        self.make_mock_file('more/data2', 'defg')
        self.make_mock_file('more/data3', 'hijk')

        self.assertEqual(self.fs.du('hdfs:///'), 12)
        self.assertEqual(self.fs.du('hdfs:///data1'), 4)
        self.assertEqual(self.fs.du('hdfs:///more'), 8)
        self.assertEqual(self.fs.du('hdfs:///more/*'), 8)
#.........这里部分代码省略.........
开发者ID:DanisHack,项目名称:mrjob,代码行数:101,代码来源:test_hadoop.py


注:本文中的mrjob.fs.hadoop.HadoopFilesystem类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。