本文整理匯總了Python中apache_beam.runners.DataflowRunner.group_by_key_input_visitor方法的典型用法代碼示例。如果您正苦於以下問題:Python DataflowRunner.group_by_key_input_visitor方法的具體用法?Python DataflowRunner.group_by_key_input_visitor怎麽用?Python DataflowRunner.group_by_key_input_visitor使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類apache_beam.runners.DataflowRunner
的用法示例。
在下文中一共展示了DataflowRunner.group_by_key_input_visitor方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_group_by_key_input_visitor_for_non_gbk_transforms
# 需要導入模塊: from apache_beam.runners import DataflowRunner [as 別名]
# 或者: from apache_beam.runners.DataflowRunner import group_by_key_input_visitor [as 別名]
def test_group_by_key_input_visitor_for_non_gbk_transforms(self):
p = TestPipeline()
pcoll = PCollection(p)
for transform in [beam.Flatten(), beam.Map(lambda x: x)]:
pcoll.element_type = typehints.Any
DataflowRunner.group_by_key_input_visitor().visit_transform(
AppliedPTransform(None, transform, "label", [pcoll]))
self.assertEqual(pcoll.element_type, typehints.Any)
示例2: test_group_by_key_input_visitor_with_invalid_inputs
# 需要導入模塊: from apache_beam.runners import DataflowRunner [as 別名]
# 或者: from apache_beam.runners.DataflowRunner import group_by_key_input_visitor [as 別名]
def test_group_by_key_input_visitor_with_invalid_inputs(self):
p = TestPipeline()
pcoll1 = PCollection(p)
pcoll2 = PCollection(p)
for transform in [_GroupByKeyOnly(), beam.GroupByKey()]:
pcoll1.element_type = typehints.TupleSequenceConstraint
pcoll2.element_type = typehints.Set
err_msg = "Input to GroupByKey must be of Tuple or Any type"
for pcoll in [pcoll1, pcoll2]:
with self.assertRaisesRegexp(ValueError, err_msg):
DataflowRunner.group_by_key_input_visitor().visit_transform(
AppliedPTransform(None, transform, "label", [pcoll]))
示例3: test_group_by_key_input_visitor_with_valid_inputs
# 需要導入模塊: from apache_beam.runners import DataflowRunner [as 別名]
# 或者: from apache_beam.runners.DataflowRunner import group_by_key_input_visitor [as 別名]
def test_group_by_key_input_visitor_with_valid_inputs(self):
p = TestPipeline()
pcoll1 = PCollection(p)
pcoll2 = PCollection(p)
pcoll3 = PCollection(p)
for transform in [_GroupByKeyOnly(), beam.GroupByKey()]:
pcoll1.element_type = None
pcoll2.element_type = typehints.Any
pcoll3.element_type = typehints.KV[typehints.Any, typehints.Any]
for pcoll in [pcoll1, pcoll2, pcoll3]:
DataflowRunner.group_by_key_input_visitor().visit_transform(
AppliedPTransform(None, transform, "label", [pcoll]))
self.assertEqual(pcoll.element_type,
typehints.KV[typehints.Any, typehints.Any])
示例4: test_group_by_key_input_visitor_with_invalid_inputs
# 需要導入模塊: from apache_beam.runners import DataflowRunner [as 別名]
# 或者: from apache_beam.runners.DataflowRunner import group_by_key_input_visitor [as 別名]
def test_group_by_key_input_visitor_with_invalid_inputs(self):
p = TestPipeline()
pcoll1 = PCollection(p)
pcoll2 = PCollection(p)
for transform in [_GroupByKeyOnly(), beam.GroupByKey()]:
pcoll1.element_type = str
pcoll2.element_type = typehints.Set
err_msg = (
r"Input to 'label' must be compatible with KV\[Any, Any\]. "
"Found .*")
for pcoll in [pcoll1, pcoll2]:
with self.assertRaisesRegexp(ValueError, err_msg):
DataflowRunner.group_by_key_input_visitor().visit_transform(
AppliedPTransform(None, transform, "label", [pcoll]))
示例5: test_gbk_then_flatten_input_visitor
# 需要導入模塊: from apache_beam.runners import DataflowRunner [as 別名]
# 或者: from apache_beam.runners.DataflowRunner import group_by_key_input_visitor [as 別名]
def test_gbk_then_flatten_input_visitor(self):
p = TestPipeline(
runner=DataflowRunner(),
options=PipelineOptions(self.default_properties))
none_str_pc = p | 'c1' >> beam.Create({None: 'a'})
none_int_pc = p | 'c2' >> beam.Create({None: 3})
flat = (none_str_pc, none_int_pc) | beam.Flatten()
_ = flat | beam.GroupByKey()
# This may change if type inference changes, but we assert it here
# to make sure the check below is not vacuous.
self.assertNotIsInstance(flat.element_type, typehints.TupleConstraint)
p.visit(DataflowRunner.group_by_key_input_visitor())
p.visit(DataflowRunner.flatten_input_visitor())
# The dataflow runner requires gbk input to be tuples *and* flatten
# inputs to be equal to their outputs. Assert both hold.
self.assertIsInstance(flat.element_type, typehints.TupleConstraint)
self.assertEqual(flat.element_type, none_str_pc.element_type)
self.assertEqual(flat.element_type, none_int_pc.element_type)