当前位置: 首页>>代码示例>>Python>>正文


Python TestPipeline.visit方法代码示例

本文整理汇总了Python中apache_beam.test_pipeline.TestPipeline.visit方法的典型用法代码示例。如果您正苦于以下问题:Python TestPipeline.visit方法的具体用法?Python TestPipeline.visit怎么用?Python TestPipeline.visit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在apache_beam.test_pipeline.TestPipeline的用法示例。


在下文中一共展示了TestPipeline.visit方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: model_textio_compressed

# 需要导入模块: from apache_beam.test_pipeline import TestPipeline [as 别名]
# 或者: from apache_beam.test_pipeline.TestPipeline import visit [as 别名]
def model_textio_compressed(renames, expected):
  """Using a Read Transform to read compressed text files."""
  p = TestPipeline()

  # [START model_textio_write_compressed]
  lines = p | 'ReadFromText' >> beam.io.ReadFromText(
      '/path/to/input-*.csv.gz',
      compression_type=beam.io.filesystem.CompressionTypes.GZIP)
  # [END model_textio_write_compressed]

  beam.assert_that(lines, beam.equal_to(expected))
  p.visit(SnippetUtils.RenameFiles(renames))
  p.run().wait_until_finish()
开发者ID:vikkyrk,项目名称:incubator-beam,代码行数:15,代码来源:snippets.py

示例2: construct_pipeline

# 需要导入模块: from apache_beam.test_pipeline import TestPipeline [as 别名]
# 或者: from apache_beam.test_pipeline.TestPipeline import visit [as 别名]
def construct_pipeline(renames):
  """A reverse words snippet as an example for constructing a pipeline."""
  import re

  class ReverseWords(beam.PTransform):
    """A PTransform that reverses individual elements in a PCollection."""

    def expand(self, pcoll):
      return pcoll | beam.Map(lambda e: e[::-1])

  def filter_words(unused_x):
    """Pass through filter to select everything."""
    return True

  # [START pipelines_constructing_creating]
  from apache_beam.utils.pipeline_options import PipelineOptions

  p = beam.Pipeline(options=PipelineOptions())
  # [END pipelines_constructing_creating]

  p = TestPipeline() # Use TestPipeline for testing.

  # [START pipelines_constructing_reading]
  lines = p | 'ReadMyFile' >> beam.io.ReadFromText('gs://some/inputData.txt')
  # [END pipelines_constructing_reading]

  # [START pipelines_constructing_applying]
  words = lines | beam.FlatMap(lambda x: re.findall(r'[A-Za-z\']+', x))
  reversed_words = words | ReverseWords()
  # [END pipelines_constructing_applying]

  # [START pipelines_constructing_writing]
  filtered_words = reversed_words | 'FilterWords' >> beam.Filter(filter_words)
  filtered_words | 'WriteMyFile' >> beam.io.WriteToText(
      'gs://some/outputData.txt')
  # [END pipelines_constructing_writing]

  p.visit(SnippetUtils.RenameFiles(renames))

  # [START pipelines_constructing_running]
  p.run()
开发者ID:vikkyrk,项目名称:incubator-beam,代码行数:43,代码来源:snippets.py

示例3: examples_wordcount_debugging

# 需要导入模块: from apache_beam.test_pipeline import TestPipeline [as 别名]
# 或者: from apache_beam.test_pipeline.TestPipeline import visit [as 别名]
def examples_wordcount_debugging(renames):
  """DebuggingWordCount example snippets."""
  import re

  import apache_beam as beam

  # [START example_wordcount_debugging_logging]
  # [START example_wordcount_debugging_aggregators]
  import logging

  class FilterTextFn(beam.DoFn):
    """A DoFn that filters for a specific key based on a regular expression."""

    def __init__(self, pattern):
      self.pattern = pattern
      # A custom metric can track values in your pipeline as it runs. Create
      # custom metrics matched_word and unmatched_words.
      self.matched_words = Metrics.counter(self.__class__, 'matched_words')
      self.umatched_words = Metrics.counter(self.__class__, 'umatched_words')

    def process(self, element):
      word, _ = element
      if re.match(self.pattern, word):
        # Log at INFO level each element we match. When executing this pipeline
        # using the Dataflow service, these log lines will appear in the Cloud
        # Logging UI.
        logging.info('Matched %s', word)

        # Add 1 to the custom metric counter matched_words
        self.matched_words.inc()
        yield element
      else:
        # Log at the "DEBUG" level each element that is not matched. Different
        # log levels can be used to control the verbosity of logging providing
        # an effective mechanism to filter less important information. Note
        # currently only "INFO" and higher level logs are emitted to the Cloud
        # Logger. This log message will not be visible in the Cloud Logger.
        logging.debug('Did not match %s', word)

        # Add 1 to the custom metric counter umatched_words
        self.umatched_words.inc()
  # [END example_wordcount_debugging_logging]
  # [END example_wordcount_debugging_aggregators]

  p = TestPipeline()  # Use TestPipeline for testing.
  filtered_words = (
      p
      | beam.io.ReadFromText(
          'gs://dataflow-samples/shakespeare/kinglear.txt')
      | 'ExtractWords' >> beam.FlatMap(lambda x: re.findall(r'[A-Za-z\']+', x))
      | beam.combiners.Count.PerElement()
      | 'FilterText' >> beam.ParDo(FilterTextFn('Flourish|stomach')))

  # [START example_wordcount_debugging_assert]
  beam.assert_that(
      filtered_words, beam.equal_to([('Flourish', 3), ('stomach', 1)]))
  # [END example_wordcount_debugging_assert]

  output = (filtered_words
            | 'format' >> beam.Map(lambda (word, c): '%s: %s' % (word, c))
            | 'Write' >> beam.io.WriteToText('gs://my-bucket/counts.txt'))

  p.visit(SnippetUtils.RenameFiles(renames))
  p.run()
开发者ID:vikkyrk,项目名称:incubator-beam,代码行数:66,代码来源:snippets.py

示例4: pipeline_monitoring

# 需要导入模块: from apache_beam.test_pipeline import TestPipeline [as 别名]
# 或者: from apache_beam.test_pipeline.TestPipeline import visit [as 别名]
def pipeline_monitoring(renames):
  """Using monitoring interface snippets."""

  import re
  import apache_beam as beam
  from apache_beam.utils.pipeline_options import PipelineOptions

  class WordCountOptions(PipelineOptions):

    @classmethod
    def _add_argparse_args(cls, parser):
      parser.add_argument('--input',
                          help='Input for the pipeline',
                          default='gs://my-bucket/input')
      parser.add_argument('--output',
                          help='output for the pipeline',
                          default='gs://my-bucket/output')

  class ExtractWordsFn(beam.DoFn):

    def process(self, element):
      words = re.findall(r'[A-Za-z\']+', element)
      for word in words:
        yield word

  class FormatCountsFn(beam.DoFn):

    def process(self, element):
      word, count = element
      yield '%s: %s' % (word, count)

  # [START pipeline_monitoring_composite]
  # The CountWords Composite Transform inside the WordCount pipeline.
  class CountWords(beam.PTransform):

    def expand(self, pcoll):
      return (pcoll
              # Convert lines of text into individual words.
              | 'ExtractWords' >> beam.ParDo(ExtractWordsFn())
              # Count the number of times each word occurs.
              | beam.combiners.Count.PerElement()
              # Format each word and count into a printable string.
              | 'FormatCounts' >> beam.ParDo(FormatCountsFn()))
  # [END pipeline_monitoring_composite]

  pipeline_options = PipelineOptions()
  options = pipeline_options.view_as(WordCountOptions)
  p = TestPipeline()  # Use TestPipeline for testing.

  # [START pipeline_monitoring_execution]
  (p
   # Read the lines of the input text.
   | 'ReadLines' >> beam.io.ReadFromText(options.input)
   # Count the words.
   | CountWords()
   # Write the formatted word counts to output.
   | 'WriteCounts' >> beam.io.WriteToText(options.output))
  # [END pipeline_monitoring_execution]

  p.visit(SnippetUtils.RenameFiles(renames))
  p.run()
开发者ID:vikkyrk,项目名称:incubator-beam,代码行数:63,代码来源:snippets.py


注:本文中的apache_beam.test_pipeline.TestPipeline.visit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。