本文整理汇总了Python中mrjob.fs.hadoop.HadoopFilesystem.cat方法的典型用法代码示例。如果您正苦于以下问题:Python HadoopFilesystem.cat方法的具体用法?Python HadoopFilesystem.cat怎么用?Python HadoopFilesystem.cat使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mrjob.fs.hadoop.HadoopFilesystem
的用法示例。
在下文中一共展示了HadoopFilesystem.cat方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: HadoopFSTestCase
# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import cat [as 别名]
class HadoopFSTestCase(MockSubprocessTestCase):
def setUp(self):
super(HadoopFSTestCase, self).setUp()
# wrap HadoopFilesystem so it gets cat()
self.fs = HadoopFilesystem(['hadoop'])
self.set_up_mock_hadoop()
self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)
def set_up_mock_hadoop(self):
# setup fake hadoop home
self.env = {}
self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')
self.makefile(
os.path.join(
'mock_hadoop_home',
'contrib',
'streaming',
'hadoop-0.X.Y-streaming.jar'),
'i are java bytecode',
)
self.env['MOCK_HADOOP_TMP'] = self.makedirs('mock_hadoop')
self.env['MOCK_HADOOP_VERSION'] = '2.7.1'
self.env['USER'] = 'mrjob_tests'
def make_mock_file(self, name, contents='contents'):
return self.makefile(
os.path.join(get_mock_hdfs_root(self.env), name), contents)
def test_cat_uncompressed(self):
self.make_mock_file('data/foo', 'foo\nfoo\n')
remote_path = self.fs.join('hdfs:///data', 'foo')
self.assertEqual(
b''.join(self.fs._cat_file(remote_path)),
b'foo\nfoo\n')
def test_cat_bz2(self):
self.make_mock_file('data/foo.bz2', bz2.compress(b'foo\n' * 1000))
remote_path = self.fs.join('hdfs:///data', 'foo.bz2')
self.assertEqual(
b''.join(self.fs._cat_file(remote_path)),
b'foo\n' * 1000)
def test_cat_gz(self):
self.make_mock_file('data/foo.gz', gzip_compress(b'foo\n' * 10000))
remote_path = self.fs.join('hdfs:///data', 'foo.gz')
self.assertEqual(
b''.join(self.fs._cat_file(remote_path)),
b'foo\n' * 10000)
def test_ls_empty(self):
self.assertEqual(list(self.fs.ls('hdfs:///')), [])
def test_ls_basic(self):
self.make_mock_file('f')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])
def test_ls_basic_2(self):
self.make_mock_file('f')
self.make_mock_file('f2')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///f', 'hdfs:///f2'])
def test_ls_recurse(self):
self.make_mock_file('f')
self.make_mock_file('d/f2')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///d/f2', 'hdfs:///f'])
def test_ls_s3n(self):
# hadoop fs -lsr doesn't have user and group info when reading from s3
self.make_mock_file('f', 'foo')
self.make_mock_file('f3 win', 'foo' * 10)
self.assertEqual(sorted(self.fs.ls('s3n://bucket/')),
['s3n://bucket/f', 's3n://bucket/f3 win'])
def test_ls_s3a(self):
# hadoop fs -lsr doesn't have user and group info when reading from s3
self.make_mock_file('f', 'foo')
self.make_mock_file('f3 win', 'foo' * 10)
self.assertEqual(sorted(self.fs.ls('s3a://bucket/')),
['s3a://bucket/f', 's3a://bucket/f3 win'])
def test_single_space(self):
self.make_mock_file('foo bar')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///foo bar'])
def test_double_space(self):
self.make_mock_file('foo bar')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
#.........这里部分代码省略.........
示例2: HadoopFSTestCase
# 需要导入模块: from mrjob.fs.hadoop import HadoopFilesystem [as 别名]
# 或者: from mrjob.fs.hadoop.HadoopFilesystem import cat [as 别名]
class HadoopFSTestCase(MockSubprocessTestCase):
def setUp(self):
super(HadoopFSTestCase, self).setUp()
# wrap HadoopFilesystem so it gets cat()
self.fs = HadoopFilesystem(['hadoop'])
self.set_up_mock_hadoop()
self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)
def set_up_mock_hadoop(self):
# setup fake hadoop home
self.env = {}
self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')
self.makefile(
os.path.join(
'mock_hadoop_home',
'contrib',
'streaming',
'hadoop-0.X.Y-streaming.jar'),
'i are java bytecode',
)
self.env['MOCK_HDFS_ROOT'] = self.makedirs('mock_hdfs_root')
self.env['MOCK_HADOOP_OUTPUT'] = self.makedirs('mock_hadoop_output')
self.env['USER'] = 'mrjob_tests'
# don't set MOCK_HADOOP_LOG, we get command history other ways
def make_hdfs_file(self, name, contents='contents'):
return self.makefile(os.path.join('mock_hdfs_root', name), contents)
def make_hdfs_dir(self, name):
return self.makedirs(os.path.join('mock_hdfs_root', name))
def make_hdfs_tree(self, path, files=None):
if files is None:
files = ('f', 'g/a/b', 'g/a/a/b')
test_files = []
for f in sorted(files):
f = os.path.join(path, f)
self.make_hdfs_file(f, f)
test_files.append("hdfs:///" + f)
self.assertEqual(
sorted(self.fs.ls("hdfs:///" + path.rstrip('/') + '/*')),
test_files
)
return path
def test_ls_empty(self):
self.assertEqual(list(self.fs.ls('hdfs:///')), [])
def test_ls_basic(self):
self.make_hdfs_file('f')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])
def test_ls_basic_2(self):
self.make_hdfs_file('f')
self.make_hdfs_file('f2')
self.assertItemsEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f',
'hdfs:///f2'])
def test_ls_recurse(self):
self.make_hdfs_file('f')
self.make_hdfs_file('d/f2')
self.assertItemsEqual(list(self.fs.ls('hdfs:///')),
['hdfs:///f', 'hdfs:///d/f2'])
def test_ls_s3n(self):
# hadoop fs -lsr doesn't have user and group info when reading from s3
self.make_hdfs_file('f', 'foo')
self.make_hdfs_file('f3 win', 'foo' * 10)
self.assertItemsEqual(list(self.fs.ls('s3n://bucket/')),
['s3n://bucket/f', 's3n://bucket/f3 win'])
def test_single_space(self):
self.make_hdfs_file('foo bar')
self.assertItemsEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///foo bar'])
def test_double_space(self):
self.make_hdfs_file('foo bar')
self.assertItemsEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///foo bar'])
def test_cat_uncompressed(self):
# mockhadoop doesn't support compressed files, so we won't test for it.
# this is only a sanity check anyway.
self.make_hdfs_file('data/foo', 'foo\nfoo\n')
remote_path = self.fs.path_join('hdfs:///data', 'foo')
self.assertEqual(list(self.fs._cat_file(remote_path)),
['foo\n', 'foo\n'])
def test_write_str(self):
path = 'hdfs:///write-test-str'
content = 'some content!'
self.fs.write(path, content)
self.assertEqual("".join(self.fs.cat(path)), content)
def test_write_file(self):
path = 'hdfs:///write-test-fileobj'
content = StringIO('some content!')
#.........这里部分代码省略.........