本文整理汇总了Python中apache_beam.runners.DataflowRunner.job方法的典型用法代码示例。如果您正苦于以下问题:Python DataflowRunner.job方法的具体用法?Python DataflowRunner.job怎么用?Python DataflowRunner.job使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类apache_beam.runners.DataflowRunner
的用法示例。
在下文中一共展示了DataflowRunner.job方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_remote_runner_translation
# 需要导入模块: from apache_beam.runners import DataflowRunner [as 别名]
# 或者: from apache_beam.runners.DataflowRunner import job [as 别名]
def test_remote_runner_translation(self):
remote_runner = DataflowRunner()
p = Pipeline(remote_runner,
options=PipelineOptions(self.default_properties))
(p | ptransform.Create([1, 2, 3]) # pylint: disable=expression-not-assigned
| 'Do' >> ptransform.FlatMap(lambda x: [(x, x)])
| ptransform.GroupByKey())
remote_runner.job = apiclient.Job(p.options)
super(DataflowRunner, remote_runner).run(p)
示例2: test_remote_runner_display_data
# 需要导入模块: from apache_beam.runners import DataflowRunner [as 别名]
# 或者: from apache_beam.runners.DataflowRunner import job [as 别名]
def test_remote_runner_display_data(self):
remote_runner = DataflowRunner()
p = Pipeline(remote_runner,
options=PipelineOptions(self.default_properties))
# TODO: Should not subclass ParDo. Switch to PTransform as soon as
# composite transforms support display data.
class SpecialParDo(beam.ParDo):
def __init__(self, fn, now):
super(SpecialParDo, self).__init__(fn)
self.fn = fn
self.now = now
# Make this a list to be accessible within closure
def display_data(self):
return {'asubcomponent': self.fn,
'a_class': SpecialParDo,
'a_time': self.now}
class SpecialDoFn(beam.DoFn):
def display_data(self):
return {'dofn_value': 42}
def process(self):
pass
now = datetime.now()
# pylint: disable=expression-not-assigned
(p | ptransform.Create([1, 2, 3, 4, 5])
| 'Do' >> SpecialParDo(SpecialDoFn(), now))
remote_runner.job = apiclient.Job(p.options)
super(DataflowRunner, remote_runner).run(p)
job_dict = json.loads(str(remote_runner.job))
steps = [step
for step in job_dict['steps']
if len(step['properties'].get('display_data', [])) > 0]
step = steps[0]
disp_data = step['properties']['display_data']
disp_data = sorted(disp_data, key=lambda x: x['namespace']+x['key'])
nspace = SpecialParDo.__module__+ '.'
expected_data = [{'type': 'TIMESTAMP', 'namespace': nspace+'SpecialParDo',
'value': DisplayDataItem._format_value(now, 'TIMESTAMP'),
'key': 'a_time'},
{'type': 'STRING', 'namespace': nspace+'SpecialParDo',
'value': nspace+'SpecialParDo', 'key': 'a_class',
'shortValue': 'SpecialParDo'},
{'type': 'INTEGER', 'namespace': nspace+'SpecialDoFn',
'value': 42, 'key': 'dofn_value'}]
expected_data = sorted(expected_data, key=lambda x: x['namespace']+x['key'])
self.assertEqual(len(disp_data), 3)
self.assertEqual(disp_data, expected_data)
示例3: test_streaming_create_translation
# 需要导入模块: from apache_beam.runners import DataflowRunner [as 别名]
# 或者: from apache_beam.runners.DataflowRunner import job [as 别名]
def test_streaming_create_translation(self):
remote_runner = DataflowRunner()
self.default_properties.append("--streaming")
p = Pipeline(remote_runner, PipelineOptions(self.default_properties))
p | ptransform.Create([1]) # pylint: disable=expression-not-assigned
remote_runner.job = apiclient.Job(p._options)
# Performing configured PTransform overrides here.
p.replace_all(DataflowRunner._PTRANSFORM_OVERRIDES)
super(DataflowRunner, remote_runner).run(p)
job_dict = json.loads(str(remote_runner.job))
self.assertEqual(len(job_dict[u'steps']), 2)
self.assertEqual(job_dict[u'steps'][0][u'kind'], u'ParallelRead')
self.assertEqual(
job_dict[u'steps'][0][u'properties'][u'pubsub_subscription'],
'_starting_signal/')
self.assertEqual(job_dict[u'steps'][1][u'kind'], u'ParallelDo')