本文整理汇总了Python中apache_beam.runners.DataflowRunner.group_by_key_input_visitor方法的典型用法代码示例。如果您正苦于以下问题:Python DataflowRunner.group_by_key_input_visitor方法的具体用法?Python DataflowRunner.group_by_key_input_visitor怎么用?Python DataflowRunner.group_by_key_input_visitor使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类apache_beam.runners.DataflowRunner
的用法示例。
在下文中一共展示了DataflowRunner.group_by_key_input_visitor方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_group_by_key_input_visitor_for_non_gbk_transforms
# 需要导入模块: from apache_beam.runners import DataflowRunner [as 别名]
# 或者: from apache_beam.runners.DataflowRunner import group_by_key_input_visitor [as 别名]
def test_group_by_key_input_visitor_for_non_gbk_transforms(self):
p = TestPipeline()
pcoll = PCollection(p)
for transform in [beam.Flatten(), beam.Map(lambda x: x)]:
pcoll.element_type = typehints.Any
DataflowRunner.group_by_key_input_visitor().visit_transform(
AppliedPTransform(None, transform, "label", [pcoll]))
self.assertEqual(pcoll.element_type, typehints.Any)
示例2: test_group_by_key_input_visitor_with_invalid_inputs
# 需要导入模块: from apache_beam.runners import DataflowRunner [as 别名]
# 或者: from apache_beam.runners.DataflowRunner import group_by_key_input_visitor [as 别名]
def test_group_by_key_input_visitor_with_invalid_inputs(self):
p = TestPipeline()
pcoll1 = PCollection(p)
pcoll2 = PCollection(p)
for transform in [_GroupByKeyOnly(), beam.GroupByKey()]:
pcoll1.element_type = typehints.TupleSequenceConstraint
pcoll2.element_type = typehints.Set
err_msg = "Input to GroupByKey must be of Tuple or Any type"
for pcoll in [pcoll1, pcoll2]:
with self.assertRaisesRegexp(ValueError, err_msg):
DataflowRunner.group_by_key_input_visitor().visit_transform(
AppliedPTransform(None, transform, "label", [pcoll]))
示例3: test_group_by_key_input_visitor_with_valid_inputs
# 需要导入模块: from apache_beam.runners import DataflowRunner [as 别名]
# 或者: from apache_beam.runners.DataflowRunner import group_by_key_input_visitor [as 别名]
def test_group_by_key_input_visitor_with_valid_inputs(self):
p = TestPipeline()
pcoll1 = PCollection(p)
pcoll2 = PCollection(p)
pcoll3 = PCollection(p)
for transform in [_GroupByKeyOnly(), beam.GroupByKey()]:
pcoll1.element_type = None
pcoll2.element_type = typehints.Any
pcoll3.element_type = typehints.KV[typehints.Any, typehints.Any]
for pcoll in [pcoll1, pcoll2, pcoll3]:
DataflowRunner.group_by_key_input_visitor().visit_transform(
AppliedPTransform(None, transform, "label", [pcoll]))
self.assertEqual(pcoll.element_type,
typehints.KV[typehints.Any, typehints.Any])
示例4: test_group_by_key_input_visitor_with_invalid_inputs
# 需要导入模块: from apache_beam.runners import DataflowRunner [as 别名]
# 或者: from apache_beam.runners.DataflowRunner import group_by_key_input_visitor [as 别名]
def test_group_by_key_input_visitor_with_invalid_inputs(self):
p = TestPipeline()
pcoll1 = PCollection(p)
pcoll2 = PCollection(p)
for transform in [_GroupByKeyOnly(), beam.GroupByKey()]:
pcoll1.element_type = str
pcoll2.element_type = typehints.Set
err_msg = (
r"Input to 'label' must be compatible with KV\[Any, Any\]. "
"Found .*")
for pcoll in [pcoll1, pcoll2]:
with self.assertRaisesRegexp(ValueError, err_msg):
DataflowRunner.group_by_key_input_visitor().visit_transform(
AppliedPTransform(None, transform, "label", [pcoll]))
示例5: test_gbk_then_flatten_input_visitor
# 需要导入模块: from apache_beam.runners import DataflowRunner [as 别名]
# 或者: from apache_beam.runners.DataflowRunner import group_by_key_input_visitor [as 别名]
def test_gbk_then_flatten_input_visitor(self):
p = TestPipeline(
runner=DataflowRunner(),
options=PipelineOptions(self.default_properties))
none_str_pc = p | 'c1' >> beam.Create({None: 'a'})
none_int_pc = p | 'c2' >> beam.Create({None: 3})
flat = (none_str_pc, none_int_pc) | beam.Flatten()
_ = flat | beam.GroupByKey()
# This may change if type inference changes, but we assert it here
# to make sure the check below is not vacuous.
self.assertNotIsInstance(flat.element_type, typehints.TupleConstraint)
p.visit(DataflowRunner.group_by_key_input_visitor())
p.visit(DataflowRunner.flatten_input_visitor())
# The dataflow runner requires gbk input to be tuples *and* flatten
# inputs to be equal to their outputs. Assert both hold.
self.assertIsInstance(flat.element_type, typehints.TupleConstraint)
self.assertEqual(flat.element_type, none_str_pc.element_type)
self.assertEqual(flat.element_type, none_int_pc.element_type)