当前位置: 首页>>代码示例>>Python>>正文


Python pipeline.Pipeline类代码示例

本文整理汇总了Python中apache_beam.pipeline.Pipeline的典型用法代码示例。如果您正苦于以下问题:Python Pipeline类的具体用法?Python Pipeline怎么用?Python Pipeline使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Pipeline类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_full_completion

  def test_full_completion(self):
    # Create dummy file and close it.  Note that we need to do this because
    # Windows does not allow NamedTemporaryFiles to be reopened elsewhere
    # before the temporary file is closed.
    dummy_file = tempfile.NamedTemporaryFile(delete=False)
    dummy_file_name = dummy_file.name
    dummy_file.close()

    dummy_dir = tempfile.mkdtemp()

    remote_runner = DataflowRunner()
    pipeline = Pipeline(remote_runner,
                        options=PipelineOptions([
                            '--dataflow_endpoint=ignored',
                            '--sdk_location=' + dummy_file_name,
                            '--job_name=test-job',
                            '--project=test-project',
                            '--staging_location=' + dummy_dir,
                            '--temp_location=/dev/null',
                            '--template_location=' + dummy_file_name,
                            '--no_auth=True']))

    pipeline | beam.Create([1, 2, 3]) | beam.Map(lambda x: x) # pylint: disable=expression-not-assigned
    pipeline.run().wait_until_finish()
    with open(dummy_file_name) as template_file:
      saved_job_dict = json.load(template_file)
      self.assertEqual(
          saved_job_dict['environment']['sdkPipelineOptions']
          ['options']['project'], 'test-project')
      self.assertEqual(
          saved_job_dict['environment']['sdkPipelineOptions']
          ['options']['job_name'], 'test-job')
开发者ID:amarouni,项目名称:incubator-beam,代码行数:32,代码来源:template_runner_test.py

示例2: test_direct_runner_metrics

  def test_direct_runner_metrics(self):

    class MyDoFn(beam.DoFn):
      def start_bundle(self):
        count = Metrics.counter(self.__class__, 'bundles')
        count.inc()

      def finish_bundle(self):
        count = Metrics.counter(self.__class__, 'finished_bundles')
        count.inc()

      def process(self, element):
        gauge = Metrics.gauge(self.__class__, 'latest_element')
        gauge.set(element)
        count = Metrics.counter(self.__class__, 'elements')
        count.inc()
        distro = Metrics.distribution(self.__class__, 'element_dist')
        distro.update(element)
        return [element]

    runner = DirectRunner()
    p = Pipeline(runner,
                 options=PipelineOptions(self.default_properties))
    pcoll = (p | ptransform.Create([1, 2, 3, 4, 5])
             | 'Do' >> beam.ParDo(MyDoFn()))
    assert_that(pcoll, equal_to([1, 2, 3, 4, 5]))
    result = p.run()
    result.wait_until_finish()
    metrics = result.metrics().query()
    namespace = '{}.{}'.format(MyDoFn.__module__,
                               MyDoFn.__name__)

    hc.assert_that(
        metrics['counters'],
        hc.contains_inanyorder(
            MetricResult(
                MetricKey('Do', MetricName(namespace, 'elements')),
                5, 5),
            MetricResult(
                MetricKey('Do', MetricName(namespace, 'bundles')),
                1, 1),
            MetricResult(
                MetricKey('Do', MetricName(namespace, 'finished_bundles')),
                1, 1)))

    hc.assert_that(
        metrics['distributions'],
        hc.contains_inanyorder(
            MetricResult(
                MetricKey('Do', MetricName(namespace, 'element_dist')),
                DistributionResult(DistributionData(15, 5, 1, 5)),
                DistributionResult(DistributionData(15, 5, 1, 5)))))

    gauge_result = metrics['gauges'][0]
    hc.assert_that(
        gauge_result.key,
        hc.equal_to(MetricKey('Do', MetricName(namespace, 'latest_element'))))
    hc.assert_that(gauge_result.committed.value, hc.equal_to(5))
    hc.assert_that(gauge_result.attempted.value, hc.equal_to(5))
开发者ID:JavierRoger,项目名称:beam,代码行数:59,代码来源:runner_test.py

示例3: test_biqquery_read_streaming_fail

 def test_biqquery_read_streaming_fail(self):
   remote_runner = DataflowRunner()
   self.default_properties.append("--streaming")
   p = Pipeline(remote_runner, PipelineOptions(self.default_properties))
   _ = p | beam.io.Read(beam.io.BigQuerySource('some.table'))
   with self.assertRaisesRegexp(ValueError,
                                r'source is not currently available'):
     p.run()
开发者ID:charlesccychen,项目名称:incubator-beam,代码行数:8,代码来源:dataflow_runner_test.py

示例4: test_remote_runner_translation

  def test_remote_runner_translation(self):
    remote_runner = DataflowRunner()
    p = Pipeline(remote_runner,
                 options=PipelineOptions(self.default_properties))

    (p | ptransform.Create([1, 2, 3])  # pylint: disable=expression-not-assigned
     | 'Do' >> ptransform.FlatMap(lambda x: [(x, x)])
     | ptransform.GroupByKey())
    p.run()
开发者ID:aaltay,项目名称:incubator-beam,代码行数:9,代码来源:dataflow_runner_test.py

示例5: run

 def run(self, transform, options=None):
   """Run the given transform with this runner.
   """
   # Imported here to avoid circular dependencies.
   # pylint: disable=wrong-import-order, wrong-import-position
   from apache_beam.pipeline import Pipeline
   p = Pipeline(runner=self, options=options)
   p | transform
   return p.run()
开发者ID:aaltay,项目名称:incubator-beam,代码行数:9,代码来源:runner.py

示例6: test_parent_pointer

  def test_parent_pointer(self):
    class MyPTransform(beam.PTransform):

      def expand(self, p):
        self.p = p
        return p | beam.Create([None])

    p = beam.Pipeline()
    p | MyPTransform()  # pylint: disable=expression-not-assigned
    p = Pipeline.from_runner_api(Pipeline.to_runner_api(p), None, None)
    self.assertIsNotNone(p.transforms_stack[0].parts[0].parent)
    self.assertEquals(p.transforms_stack[0].parts[0].parent,
                      p.transforms_stack[0])
开发者ID:dpmills,项目名称:incubator-beam,代码行数:13,代码来源:pipeline_test.py

示例7: test_remote_runner_display_data

  def test_remote_runner_display_data(self):
    remote_runner = DataflowRunner()
    p = Pipeline(remote_runner,
                 options=PipelineOptions(self.default_properties))

    # TODO: Should not subclass ParDo. Switch to PTransform as soon as
    # composite transforms support display data.
    class SpecialParDo(beam.ParDo):
      def __init__(self, fn, now):
        super(SpecialParDo, self).__init__(fn)
        self.fn = fn
        self.now = now

      # Make this a list to be accessible within closure
      def display_data(self):
        return {'asubcomponent': self.fn,
                'a_class': SpecialParDo,
                'a_time': self.now}

    class SpecialDoFn(beam.DoFn):
      def display_data(self):
        return {'dofn_value': 42}

      def process(self):
        pass

    now = datetime.now()
    # pylint: disable=expression-not-assigned
    (p | ptransform.Create([1, 2, 3, 4, 5])
     | 'Do' >> SpecialParDo(SpecialDoFn(), now))

    p.run()
    job_dict = json.loads(str(remote_runner.job))
    steps = [step
             for step in job_dict['steps']
             if len(step['properties'].get('display_data', [])) > 0]
    step = steps[1]
    disp_data = step['properties']['display_data']
    disp_data = sorted(disp_data, key=lambda x: x['namespace']+x['key'])
    nspace = SpecialParDo.__module__+ '.'
    expected_data = [{'type': 'TIMESTAMP', 'namespace': nspace+'SpecialParDo',
                      'value': DisplayDataItem._format_value(now, 'TIMESTAMP'),
                      'key': 'a_time'},
                     {'type': 'STRING', 'namespace': nspace+'SpecialParDo',
                      'value': nspace+'SpecialParDo', 'key': 'a_class',
                      'shortValue': 'SpecialParDo'},
                     {'type': 'INTEGER', 'namespace': nspace+'SpecialDoFn',
                      'value': 42, 'key': 'dofn_value'}]
    expected_data = sorted(expected_data, key=lambda x: x['namespace']+x['key'])
    self.assertEqual(len(disp_data), 3)
    self.assertEqual(disp_data, expected_data)
开发者ID:aaltay,项目名称:incubator-beam,代码行数:51,代码来源:dataflow_runner_test.py

示例8: run

 def run(self, transform, options=None):
   """Run the given transform or callable with this runner.
   """
   # Imported here to avoid circular dependencies.
   # pylint: disable=wrong-import-order, wrong-import-position
   from apache_beam import PTransform
   from apache_beam.pvalue import PBegin
   from apache_beam.pipeline import Pipeline
   p = Pipeline(runner=self, options=options)
   if isinstance(transform, PTransform):
     p | transform
   else:
     transform(PBegin(p))
   return p.run()
开发者ID:aljoscha,项目名称:incubator-beam,代码行数:14,代码来源:runner.py

示例9: test_reuse_custom_transform_instance

 def test_reuse_custom_transform_instance(self):
   pipeline = Pipeline()
   pcoll1 = pipeline | 'pcoll1' >> Create([1, 2, 3])
   pcoll2 = pipeline | 'pcoll2' >> Create([4, 5, 6])
   transform = PipelineTest.CustomTransform()
   pcoll1 | transform
   with self.assertRaises(RuntimeError) as cm:
     pipeline.apply(transform, pcoll2)
   self.assertEqual(
       cm.exception.args[0],
       'Transform "CustomTransform" does not have a stable unique label. '
       'This will prevent updating of pipelines. '
       'To apply a transform with a specified label write '
       'pvalue | "label" >> transform')
开发者ID:dpmills,项目名称:incubator-beam,代码行数:14,代码来源:pipeline_test.py

示例10: test_streaming_create_translation

  def test_streaming_create_translation(self):
    remote_runner = DataflowRunner()
    self.default_properties.append("--streaming")
    p = Pipeline(remote_runner, PipelineOptions(self.default_properties))
    p | ptransform.Create([1])  # pylint: disable=expression-not-assigned
    p.run()
    job_dict = json.loads(str(remote_runner.job))
    self.assertEqual(len(job_dict[u'steps']), 2)

    self.assertEqual(job_dict[u'steps'][0][u'kind'], u'ParallelRead')
    self.assertEqual(
        job_dict[u'steps'][0][u'properties'][u'pubsub_subscription'],
        '_starting_signal/')
    self.assertEqual(job_dict[u'steps'][1][u'kind'], u'ParallelDo')
开发者ID:aaltay,项目名称:incubator-beam,代码行数:14,代码来源:dataflow_runner_test.py

示例11: setUp

 def setUp(self):
   self.pipeline = Pipeline(DirectRunner())
   self.visitor = ConsumerTrackingPipelineVisitor()
   try:                    # Python 2
     self.assertCountEqual = self.assertItemsEqual
   except AttributeError:  # Python 3
     pass
开发者ID:charlesccychen,项目名称:incubator-beam,代码行数:7,代码来源:consumer_tracking_pipeline_visitor_test.py

示例12: run

 def run(self, request, context):
   job_id = uuid.uuid4().get_hex()
   pipeline_result = Pipeline.from_runner_api(
       request.pipeline,
       'DirectRunner',
       PipelineOptions()).run()
   self.jobs[job_id] = pipeline_result
   return beam_job_api_pb2.SubmitJobResponse(jobId=job_id)
开发者ID:eljefe6a,项目名称:incubator-beam,代码行数:8,代码来源:server.py

示例13: test_streaming_create_translation

  def test_streaming_create_translation(self):
    remote_runner = DataflowRunner()
    self.default_properties.append("--streaming")
    p = Pipeline(remote_runner, PipelineOptions(self.default_properties))
    p | ptransform.Create([1])  # pylint: disable=expression-not-assigned
    remote_runner.job = apiclient.Job(p._options)
    # Performing configured PTransform overrides here.
    p.replace_all(DataflowRunner._PTRANSFORM_OVERRIDES)
    super(DataflowRunner, remote_runner).run(p)
    job_dict = json.loads(str(remote_runner.job))
    self.assertEqual(len(job_dict[u'steps']), 2)

    self.assertEqual(job_dict[u'steps'][0][u'kind'], u'ParallelRead')
    self.assertEqual(
        job_dict[u'steps'][0][u'properties'][u'pubsub_subscription'],
        '_starting_signal/')
    self.assertEqual(job_dict[u'steps'][1][u'kind'], u'ParallelDo')
开发者ID:amarouni,项目名称:incubator-beam,代码行数:17,代码来源:dataflow_runner_test.py

示例14: test_bad_path

  def test_bad_path(self):
    dummy_sdk_file = tempfile.NamedTemporaryFile()
    remote_runner = DataflowRunner()
    pipeline = Pipeline(remote_runner,
                        options=PipelineOptions([
                            '--dataflow_endpoint=ignored',
                            '--sdk_location=' + dummy_sdk_file.name,
                            '--job_name=test-job',
                            '--project=test-project',
                            '--staging_location=ignored',
                            '--temp_location=/dev/null',
                            '--template_location=/bad/path',
                            '--no_auth=True']))
    remote_runner.job = apiclient.Job(pipeline._options)

    with self.assertRaises(IOError):
      pipeline.run().wait_until_finish()
开发者ID:amarouni,项目名称:incubator-beam,代码行数:17,代码来源:template_runner_test.py

示例15: test_visit_entire_graph

  def test_visit_entire_graph(self):
    pipeline = Pipeline()
    pcoll1 = pipeline | 'pcoll' >> Create([1, 2, 3])
    pcoll2 = pcoll1 | 'do1' >> FlatMap(lambda x: [x + 1])
    pcoll3 = pcoll2 | 'do2' >> FlatMap(lambda x: [x + 1])
    pcoll4 = pcoll2 | 'do3' >> FlatMap(lambda x: [x + 1])
    transform = PipelineTest.CustomTransform()
    pcoll5 = pcoll4 | transform

    visitor = PipelineTest.Visitor(visited=[])
    pipeline.visit(visitor)
    self.assertEqual(set([pcoll1, pcoll2, pcoll3, pcoll4, pcoll5]),
                     set(visitor.visited))
    self.assertEqual(set(visitor.enter_composite),
                     set(visitor.leave_composite))
    self.assertEqual(3, len(visitor.enter_composite))
    self.assertEqual(visitor.enter_composite[2].transform, transform)
    self.assertEqual(visitor.leave_composite[1].transform, transform)
开发者ID:dpmills,项目名称:incubator-beam,代码行数:18,代码来源:pipeline_test.py


注:本文中的apache_beam.pipeline.Pipeline类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。