本文整理汇总了Python中apache_beam.runners.DataflowRunner._pardo_fn_data方法的典型用法代码示例。如果您正苦于以下问题:Python DataflowRunner._pardo_fn_data方法的具体用法?Python DataflowRunner._pardo_fn_data怎么用?Python DataflowRunner._pardo_fn_data使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类apache_beam.runners.DataflowRunner
的用法示例。
在下文中一共展示了DataflowRunner._pardo_fn_data方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run_ParDo
# 需要导入模块: from apache_beam.runners import DataflowRunner [as 别名]
# 或者: from apache_beam.runners.DataflowRunner import _pardo_fn_data [as 别名]
def run_ParDo(self, transform_node):
transform = transform_node.transform
output = transform_node.outputs[None]
element_coder = self._get_coder(output)
map_task_index, producer_index, output_index = self.outputs[
transform_node.inputs[0]]
# If any of this ParDo's side inputs depend on outputs from this map_task,
# we can't continue growing this map task.
def is_reachable(leaf, root):
if leaf == root:
return True
else:
return any(is_reachable(x, root) for x in self.dependencies[leaf])
if any(is_reachable(self.outputs[side_input.pvalue][0], map_task_index)
for side_input in transform_node.side_inputs):
# Start a new map tasks.
input_element_coder = self._get_coder(transform_node.inputs[0])
output_buffer = OutputBuffer(input_element_coder)
fusion_break_write = operation_specs.WorkerInMemoryWrite(
output_buffer=output_buffer,
write_windowed_values=True,
input=(producer_index, output_index),
output_coders=[input_element_coder])
self.map_tasks[map_task_index].append(
(transform_node.full_label + '/Write', fusion_break_write))
original_map_task_index = map_task_index
map_task_index, producer_index, output_index = len(self.map_tasks), 0, 0
fusion_break_read = operation_specs.WorkerRead(
output_buffer.source_bundle(),
output_coders=[input_element_coder])
self.map_tasks.append(
[(transform_node.full_label + '/Read', fusion_break_read)])
self.dependencies[map_task_index].add(original_map_task_index)
def create_side_read(side_input):
label = self.side_input_labels[side_input]
output_buffer = self.run_side_write(
side_input.pvalue, '%s/%s' % (transform_node.full_label, label))
return operation_specs.WorkerSideInputSource(
output_buffer.source(), label)
do_op = operation_specs.WorkerDoFn( #
serialized_fn=pickler.dumps(DataflowRunner._pardo_fn_data(
transform_node,
lambda side_input: self.side_input_labels[side_input])),
output_tags=[PropertyNames.OUT] + ['%s_%s' % (PropertyNames.OUT, tag)
for tag in transform.output_tags
],
# Same assumption that DataflowRunner has about coders being compatible
# across outputs.
output_coders=[element_coder] * (len(transform.output_tags) + 1),
input=(producer_index, output_index),
side_inputs=[create_side_read(side_input)
for side_input in transform_node.side_inputs])
producer_index = len(self.map_tasks[map_task_index])
self.outputs[transform_node.outputs[None]] = (
map_task_index, producer_index, 0)
for ix, tag in enumerate(transform.output_tags):
self.outputs[transform_node.outputs[
tag]] = map_task_index, producer_index, ix + 1
self.map_tasks[map_task_index].append((transform_node.full_label, do_op))
for side_input in transform_node.side_inputs:
self.dependencies[map_task_index].add(self.outputs[side_input.pvalue][0])