本文整理汇总了Python中apache_beam.test_pipeline.TestPipeline.visit方法的典型用法代码示例。如果您正苦于以下问题:Python TestPipeline.visit方法的具体用法?Python TestPipeline.visit怎么用?Python TestPipeline.visit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类apache_beam.test_pipeline.TestPipeline
的用法示例。
在下文中一共展示了TestPipeline.visit方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: model_textio_compressed
# 需要导入模块: from apache_beam.test_pipeline import TestPipeline [as 别名]
# 或者: from apache_beam.test_pipeline.TestPipeline import visit [as 别名]
def model_textio_compressed(renames, expected):
"""Using a Read Transform to read compressed text files."""
p = TestPipeline()
# [START model_textio_write_compressed]
lines = p | 'ReadFromText' >> beam.io.ReadFromText(
'/path/to/input-*.csv.gz',
compression_type=beam.io.filesystem.CompressionTypes.GZIP)
# [END model_textio_write_compressed]
beam.assert_that(lines, beam.equal_to(expected))
p.visit(SnippetUtils.RenameFiles(renames))
p.run().wait_until_finish()
示例2: construct_pipeline
# 需要导入模块: from apache_beam.test_pipeline import TestPipeline [as 别名]
# 或者: from apache_beam.test_pipeline.TestPipeline import visit [as 别名]
def construct_pipeline(renames):
"""A reverse words snippet as an example for constructing a pipeline."""
import re
class ReverseWords(beam.PTransform):
"""A PTransform that reverses individual elements in a PCollection."""
def expand(self, pcoll):
return pcoll | beam.Map(lambda e: e[::-1])
def filter_words(unused_x):
"""Pass through filter to select everything."""
return True
# [START pipelines_constructing_creating]
from apache_beam.utils.pipeline_options import PipelineOptions
p = beam.Pipeline(options=PipelineOptions())
# [END pipelines_constructing_creating]
p = TestPipeline() # Use TestPipeline for testing.
# [START pipelines_constructing_reading]
lines = p | 'ReadMyFile' >> beam.io.ReadFromText('gs://some/inputData.txt')
# [END pipelines_constructing_reading]
# [START pipelines_constructing_applying]
words = lines | beam.FlatMap(lambda x: re.findall(r'[A-Za-z\']+', x))
reversed_words = words | ReverseWords()
# [END pipelines_constructing_applying]
# [START pipelines_constructing_writing]
filtered_words = reversed_words | 'FilterWords' >> beam.Filter(filter_words)
filtered_words | 'WriteMyFile' >> beam.io.WriteToText(
'gs://some/outputData.txt')
# [END pipelines_constructing_writing]
p.visit(SnippetUtils.RenameFiles(renames))
# [START pipelines_constructing_running]
p.run()
示例3: examples_wordcount_debugging
# 需要导入模块: from apache_beam.test_pipeline import TestPipeline [as 别名]
# 或者: from apache_beam.test_pipeline.TestPipeline import visit [as 别名]
def examples_wordcount_debugging(renames):
"""DebuggingWordCount example snippets."""
import re
import apache_beam as beam
# [START example_wordcount_debugging_logging]
# [START example_wordcount_debugging_aggregators]
import logging
class FilterTextFn(beam.DoFn):
"""A DoFn that filters for a specific key based on a regular expression."""
def __init__(self, pattern):
self.pattern = pattern
# A custom metric can track values in your pipeline as it runs. Create
# custom metrics matched_word and unmatched_words.
self.matched_words = Metrics.counter(self.__class__, 'matched_words')
self.umatched_words = Metrics.counter(self.__class__, 'umatched_words')
def process(self, element):
word, _ = element
if re.match(self.pattern, word):
# Log at INFO level each element we match. When executing this pipeline
# using the Dataflow service, these log lines will appear in the Cloud
# Logging UI.
logging.info('Matched %s', word)
# Add 1 to the custom metric counter matched_words
self.matched_words.inc()
yield element
else:
# Log at the "DEBUG" level each element that is not matched. Different
# log levels can be used to control the verbosity of logging providing
# an effective mechanism to filter less important information. Note
# currently only "INFO" and higher level logs are emitted to the Cloud
# Logger. This log message will not be visible in the Cloud Logger.
logging.debug('Did not match %s', word)
# Add 1 to the custom metric counter umatched_words
self.umatched_words.inc()
# [END example_wordcount_debugging_logging]
# [END example_wordcount_debugging_aggregators]
p = TestPipeline() # Use TestPipeline for testing.
filtered_words = (
p
| beam.io.ReadFromText(
'gs://dataflow-samples/shakespeare/kinglear.txt')
| 'ExtractWords' >> beam.FlatMap(lambda x: re.findall(r'[A-Za-z\']+', x))
| beam.combiners.Count.PerElement()
| 'FilterText' >> beam.ParDo(FilterTextFn('Flourish|stomach')))
# [START example_wordcount_debugging_assert]
beam.assert_that(
filtered_words, beam.equal_to([('Flourish', 3), ('stomach', 1)]))
# [END example_wordcount_debugging_assert]
output = (filtered_words
| 'format' >> beam.Map(lambda (word, c): '%s: %s' % (word, c))
| 'Write' >> beam.io.WriteToText('gs://my-bucket/counts.txt'))
p.visit(SnippetUtils.RenameFiles(renames))
p.run()
示例4: pipeline_monitoring
# 需要导入模块: from apache_beam.test_pipeline import TestPipeline [as 别名]
# 或者: from apache_beam.test_pipeline.TestPipeline import visit [as 别名]
def pipeline_monitoring(renames):
"""Using monitoring interface snippets."""
import re
import apache_beam as beam
from apache_beam.utils.pipeline_options import PipelineOptions
class WordCountOptions(PipelineOptions):
@classmethod
def _add_argparse_args(cls, parser):
parser.add_argument('--input',
help='Input for the pipeline',
default='gs://my-bucket/input')
parser.add_argument('--output',
help='output for the pipeline',
default='gs://my-bucket/output')
class ExtractWordsFn(beam.DoFn):
def process(self, element):
words = re.findall(r'[A-Za-z\']+', element)
for word in words:
yield word
class FormatCountsFn(beam.DoFn):
def process(self, element):
word, count = element
yield '%s: %s' % (word, count)
# [START pipeline_monitoring_composite]
# The CountWords Composite Transform inside the WordCount pipeline.
class CountWords(beam.PTransform):
def expand(self, pcoll):
return (pcoll
# Convert lines of text into individual words.
| 'ExtractWords' >> beam.ParDo(ExtractWordsFn())
# Count the number of times each word occurs.
| beam.combiners.Count.PerElement()
# Format each word and count into a printable string.
| 'FormatCounts' >> beam.ParDo(FormatCountsFn()))
# [END pipeline_monitoring_composite]
pipeline_options = PipelineOptions()
options = pipeline_options.view_as(WordCountOptions)
p = TestPipeline() # Use TestPipeline for testing.
# [START pipeline_monitoring_execution]
(p
# Read the lines of the input text.
| 'ReadLines' >> beam.io.ReadFromText(options.input)
# Count the words.
| CountWords()
# Write the formatted word counts to output.
| 'WriteCounts' >> beam.io.WriteToText(options.output))
# [END pipeline_monitoring_execution]
p.visit(SnippetUtils.RenameFiles(renames))
p.run()