本文整理汇总了Python中pydoop.mapreduce.pipes.run_task函数的典型用法代码示例。如果您正苦于以下问题:Python run_task函数的具体用法?Python run_task怎么用?Python run_task使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了run_task函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_map_combiner_reduce
def test_map_combiner_reduce(self):
factory = TFactory(combiner=TReducer)
sas = SortAndShuffle()
run_task(factory, istream=self.stream, ostream=sas)
with self._mkf('foo_map_combiner_reduce.out') as o:
run_task(factory, istream=sas, ostream=o,
private_encoding=False)
self.check_result('foo_map_combiner_reduce.out', STREAM_1)
示例2: test_timer
def test_timer(self):
factory = TFactory(mapper=SleepingMapper)
exp_count = {
'registerCounter': 1,
'incrementCounter': Counter(
[_[0] for _ in STREAM_1]
)[TextWriter.MAP_ITEM]
}
with self._mkf('foo_map_only.out') as o:
run_task(factory, istream=self.stream1, ostream=o)
self.check_counts(o.name, exp_count)
示例3: test_map_only
def test_map_only(self):
factory = TFactory()
fname = self._mkfn('foo_map_only.out')
with open(fname, 'w') as o:
run_task(factory, istream=self.stream1, ostream=o)
exp_count = {
'done': 1,
'progress': 1,
'output': sum(len(_[2].split())
for _ in STREAM_1 if _[0] is TextWriter.MAP_ITEM)
}
self.check_counts(fname, exp_count)
示例4: test_timer
def test_timer(self):
factory = TFactory(mapper=SleepingMapper)
with self._mkf('foo_map_only.out') as o:
run_task(factory, istream=self.stream1, ostream=o)
count = Counter()
with open(o.name) as f:
for line in f:
count[line.strip().split('\t', 1)[0]] += 1
exp_count = {
'registerCounter': 2,
'incrementCounter': 2 * Counter([_[0] for _ in STREAM_1])['mapItem']
}
for k, v in exp_count.iteritems():
self.assertTrue(k in count)
self.assertEqual(count[k], v)
示例5: __run_test
def __run_test(self, mode, mapper_class, context_class):
cmd_file = self.__write_cmd_file(mode)
pp.run_task(
pp.Factory(mapper_class=mapper_class), private_encoding=False,
context_class=context_class, cmd_file=cmd_file)
out_fn = cmd_file + '.out'
out_records = []
with open(out_fn, 'rb') as f:
bf = BinaryDownStreamAdapter(f)
for cmd, args in bf:
if cmd == bf.OUTPUT:
name, color = args
out_records.append({'name': name, 'favorite_color': color})
self.assertEqual(len(out_records), len(self.records))
for out_r, r in zip(out_records, self.records):
for k, v in iteritems(out_r):
self.assertEqual(v.decode('UTF-8'), r[k])
示例6: __run_test
def __run_test(self, mode, mapper_class, context_class):
cmd_file = self.__write_cmd_file(mode)
pp.run_task(
pp.Factory(mapper_class=mapper_class), private_encoding=False,
context_class=context_class, cmd_file=cmd_file
)
out_fn = cmd_file + '.out'
out_records = []
with open(out_fn) as ostream:
for cmd, args in BinaryDownStreamFilter(ostream):
if cmd == 'output':
name, color = args
out_records.append({'name': name, 'favorite_color': color})
self.assertEqual(len(out_records), len(self.records))
for out_r, r in zip(out_records, self.records):
for k, v in out_r.iteritems():
self.assertEqual(v, r[k])
示例7: _test_map_reduce_with_private_encoding_helper
def _test_map_reduce_with_private_encoding_helper(self, factory,
fast_combiner=False):
self.stream3.close()
cmd_file = self.stream3.name
out_file = cmd_file + '.out'
reduce_infile = cmd_file + '.reduce'
reduce_outfile = reduce_infile + '.out'
run_task(factory, cmd_file=cmd_file, private_encoding=True,
fast_combiner=fast_combiner)
data = {}
bw = BinaryWriter
with open(out_file, 'rb') as f:
bf = BinaryDownStreamAdapter(f)
for cmd, args in bf:
if cmd == bw.OUTPUT:
data.setdefault(args[0], []).append(args[1])
stream = []
stream.append((bw.START_MESSAGE, 0))
stream.append((bw.SET_JOB_CONF, 'key1', 'value1', 'key2', 'value2'))
stream.append((bw.RUN_REDUCE, 0, 0))
for k in data:
stream.append((bw.REDUCE_KEY, k))
for v in data[k]:
stream.append((bw.REDUCE_VALUE, v))
stream.append((bw.CLOSE,))
binary_stream_writer(reduce_infile, stream)
run_task(factory, cmd_file=reduce_infile, private_encoding=True)
with open(reduce_outfile, 'rb') as f:
with self._mkf('foo.out', mode='w') as o:
bf = BinaryUpStreamDecoder(f)
for cmd, args in bf:
if cmd == bw.PROGRESS:
o.write('progress\t%s\n' % args[0])
elif cmd == bw.OUTPUT:
o.write('output\t%s\n' %
'\t'.join([x.decode('utf-8') for x in args]))
elif cmd == bw.DONE:
o.write('done\n')
self.check_result('foo.out', STREAM_2)
示例8: _test_map_reduce_with_private_encoding_helper
def _test_map_reduce_with_private_encoding_helper(self, factory,
fast_combiner=False):
self.stream3.close()
cmd_file = self.stream3.name
out_file = cmd_file + '.out'
reduce_infile = cmd_file + '.reduce'
reduce_outfile = reduce_infile + '.out'
run_task(factory, cmd_file=cmd_file, private_encoding=True,
fast_combiner=fast_combiner)
data = {}
with open(out_file) as f:
bf = BinaryDownStreamFilter(f)
for cmd, args in bf:
if cmd == 'output':
data.setdefault(args[0], []).append(args[1])
stream = []
stream.append(('start', 0))
stream.append(('setJobConf', ('key1', 'value1', 'key2', 'value2')))
stream.append(('runReduce', 0, False))
for k in data:
stream.append(('reduceKey', k))
for v in data[k]:
stream.append(('reduceValue', v))
stream.append(('close',))
binary_stream_writer(reduce_infile, stream)
run_task(factory, cmd_file=reduce_infile, private_encoding=True)
with open(reduce_outfile) as f, self._mkf('foo.out', mode='w') as o:
bf = BinaryUpStreamDecoder(f)
for cmd, args in bf:
if cmd == 'progress':
o.write('progress\t%s\n' % args[0])
elif cmd == 'output':
o.write('output\t%s\n' % '\t'.join(args))
elif cmd == 'done':
o.write('done\n')
self.check_result('foo.out', STREAM_3)
示例9: __main__
def __main__():
factory = pp.Factory(mapper_class=Mapper)
pp.run_task(factory, context_class=AvroContext)
示例10: FilterMapper
from pydoop.mapreduce.pipes import run_task, Factory
from pydoop.mapreduce.api import Mapper, Reducer
class FilterMapper(Mapper):
"""
Process a wordcount output stream, emitting only records relative to
words whose count is equal to or above the configured threshold.
"""
def __init__(self, context):
super(FilterMapper, self).__init__(context)
jc = context.job_conf
self.threshold = jc.get_int("filter.occurrence.threshold")
def map(self, context):
word, occurrence = context.key, context.value
occurrence = struct.unpack(">i", occurrence)[0]
if occurrence >= self.threshold:
context.emit(word, str(occurrence))
class FilterReducer(Reducer):
def reduce(self, context):
pass
if __name__ == "__main__":
run_task(Factory(FilterMapper, FilterReducer))
示例11: __main__
def __main__():
factory = pp.Factory(mapper_class=Mapper, reducer_class=Reducer)
pp.run_task(factory, private_encoding=True, context_class=AvroContext)
示例12: __main__
def __main__():
pipes.run_task(pipes.Factory(mapper_class=Mapper))
示例13: __main__
def __main__():
pipes.run_task(pipes.Factory(
mapper_class=Mapper,
record_writer_class=Writer,
))
示例14: main
def main():
return run_task(Factory(Mapper, Reducer, combiner_class=Reducer))
示例15: __main__
def __main__():
"""Main function to be executed by pydoop framework"""
factory = pp.Factory(mapper_class=Mapper, reducer_class=Reducer, record_reader_class=Reader)
pp.run_task(factory, private_encoding=True)