当前位置: 首页>>代码示例>>Python>>正文


Python PipelineOptions.view_as方法代码示例

本文整理汇总了Python中apache_beam.utils.pipeline_options.PipelineOptions.view_as方法的典型用法代码示例。如果您正苦于以下问题:Python PipelineOptions.view_as方法的具体用法?Python PipelineOptions.view_as怎么用?Python PipelineOptions.view_as使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在apache_beam.utils.pipeline_options.PipelineOptions的用法示例。


在下文中一共展示了PipelineOptions.view_as方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_with_setup_file

# 需要导入模块: from apache_beam.utils.pipeline_options import PipelineOptions [as 别名]
# 或者: from apache_beam.utils.pipeline_options.PipelineOptions import view_as [as 别名]
  def test_with_setup_file(self):
    staging_dir = tempfile.mkdtemp()
    source_dir = tempfile.mkdtemp()
    self.create_temp_file(
        os.path.join(source_dir, 'setup.py'), 'notused')

    options = PipelineOptions()
    options.view_as(GoogleCloudOptions).staging_location = staging_dir
    self.update_options(options)
    options.view_as(SetupOptions).setup_file = os.path.join(
        source_dir, 'setup.py')

    self.assertEqual(
        [dependency.WORKFLOW_TARBALL_FILE],
        dependency.stage_job_resources(
            options,
            # We replace the build setup command because a realistic one would
            # require the setuptools package to be installed. Note that we can't
            # use "touch" here to create the expected output tarball file, since
            # touch is not available on Windows, so we invoke python to produce
            # equivalent behavior.
            build_setup_args=[
                'python', '-c', 'open(__import__("sys").argv[1], "a")',
                os.path.join(source_dir, dependency.WORKFLOW_TARBALL_FILE)],
            temp_dir=source_dir))
    self.assertTrue(
        os.path.isfile(
            os.path.join(staging_dir, dependency.WORKFLOW_TARBALL_FILE)))
开发者ID:vikkyrk,项目名称:incubator-beam,代码行数:30,代码来源:dependency_test.py

示例2: test_with_requirements_file

# 需要导入模块: from apache_beam.utils.pipeline_options import PipelineOptions [as 别名]
# 或者: from apache_beam.utils.pipeline_options.PipelineOptions import view_as [as 别名]
  def test_with_requirements_file(self):
    try:
      staging_dir = tempfile.mkdtemp()
      requirements_cache_dir = tempfile.mkdtemp()
      source_dir = tempfile.mkdtemp()

      options = PipelineOptions()
      options.view_as(GoogleCloudOptions).staging_location = staging_dir
      self.update_options(options)
      options.view_as(SetupOptions).requirements_cache = requirements_cache_dir
      options.view_as(SetupOptions).requirements_file = os.path.join(
          source_dir, dependency.REQUIREMENTS_FILE)
      self.create_temp_file(
          os.path.join(source_dir, dependency.REQUIREMENTS_FILE), 'nothing')
      self.assertEqual(
          sorted([dependency.REQUIREMENTS_FILE,
                  'abc.txt', 'def.txt']),
          sorted(dependency.stage_job_resources(
              options,
              populate_requirements_cache=self.populate_requirements_cache)))
      self.assertTrue(
          os.path.isfile(
              os.path.join(staging_dir, dependency.REQUIREMENTS_FILE)))
      self.assertTrue(os.path.isfile(os.path.join(staging_dir, 'abc.txt')))
      self.assertTrue(os.path.isfile(os.path.join(staging_dir, 'def.txt')))
    finally:
      shutil.rmtree(staging_dir)
      shutil.rmtree(requirements_cache_dir)
      shutil.rmtree(source_dir)
开发者ID:vikkyrk,项目名称:incubator-beam,代码行数:31,代码来源:dependency_test.py

示例3: run

# 需要导入模块: from apache_beam.utils.pipeline_options import PipelineOptions [as 别名]
# 或者: from apache_beam.utils.pipeline_options.PipelineOptions import view_as [as 别名]
def run():
    parser = argparse.ArgumentParser()
    parser.add_argument('--run_locally', dest='run_locally', default='', help='Run data subset and do not save.')
    known_args, pipeline_args = parser.parse_known_args()
    pipeline_options = PipelineOptions(pipeline_args)
    pipeline_options.view_as(SetupOptions).save_main_session = True
    gcloud_options = pipeline_options.view_as(GoogleCloudOptions)
    delete_from_datastore('dancedeets-hrd', gcloud_options, known_args.run_locally)
开发者ID:mikelambert,项目名称:dancedeets-monorepo,代码行数:10,代码来源:delete_old.py

示例4: examples_wordcount_minimal

# 需要导入模块: from apache_beam.utils.pipeline_options import PipelineOptions [as 别名]
# 或者: from apache_beam.utils.pipeline_options.PipelineOptions import view_as [as 别名]
def examples_wordcount_minimal(renames):
  """MinimalWordCount example snippets."""
  import re

  import apache_beam as beam

  from apache_beam.utils.pipeline_options import GoogleCloudOptions
  from apache_beam.utils.pipeline_options import StandardOptions
  from apache_beam.utils.pipeline_options import PipelineOptions

  # [START examples_wordcount_minimal_options]
  options = PipelineOptions()
  google_cloud_options = options.view_as(GoogleCloudOptions)
  google_cloud_options.project = 'my-project-id'
  google_cloud_options.job_name = 'myjob'
  google_cloud_options.staging_location = 'gs://your-bucket-name-here/staging'
  google_cloud_options.temp_location = 'gs://your-bucket-name-here/temp'
  options.view_as(StandardOptions).runner = 'DataflowRunner'
  # [END examples_wordcount_minimal_options]

  # Run it locally for testing.
  options = PipelineOptions()

  # [START examples_wordcount_minimal_create]
  p = beam.Pipeline(options=options)
  # [END examples_wordcount_minimal_create]

  (
      # [START examples_wordcount_minimal_read]
      p | beam.io.ReadFromText(
          'gs://dataflow-samples/shakespeare/kinglear.txt')
      # [END examples_wordcount_minimal_read]

      # [START examples_wordcount_minimal_pardo]
      | 'ExtractWords' >> beam.FlatMap(lambda x: re.findall(r'[A-Za-z\']+', x))
      # [END examples_wordcount_minimal_pardo]

      # [START examples_wordcount_minimal_count]
      | beam.combiners.Count.PerElement()
      # [END examples_wordcount_minimal_count]

      # [START examples_wordcount_minimal_map]
      | beam.Map(lambda (word, count): '%s: %s' % (word, count))
      # [END examples_wordcount_minimal_map]

      # [START examples_wordcount_minimal_write]
      | beam.io.WriteToText('gs://my-bucket/counts.txt')
      # [END examples_wordcount_minimal_write]
  )

  p.visit(SnippetUtils.RenameFiles(renames))

  # [START examples_wordcount_minimal_run]
  result = p.run()
  # [END examples_wordcount_minimal_run]
  result.wait_until_finish()
开发者ID:vikkyrk,项目名称:incubator-beam,代码行数:58,代码来源:snippets.py

示例5: test_get_all_options

# 需要导入模块: from apache_beam.utils.pipeline_options import PipelineOptions [as 别名]
# 或者: from apache_beam.utils.pipeline_options.PipelineOptions import view_as [as 别名]
 def test_get_all_options(self):
   for case in PipelineOptionsTest.TEST_CASES:
     options = PipelineOptions(flags=case['flags'])
     self.assertDictContainsSubset(case['expected'], options.get_all_options())
     self.assertEqual(options.view_as(
         PipelineOptionsTest.MockOptions).mock_flag,
                      case['expected']['mock_flag'])
     self.assertEqual(options.view_as(
         PipelineOptionsTest.MockOptions).mock_option,
                      case['expected']['mock_option'])
开发者ID:vikkyrk,项目名称:incubator-beam,代码行数:12,代码来源:pipeline_options_test.py

示例6: test_no_main_session

# 需要导入模块: from apache_beam.utils.pipeline_options import PipelineOptions [as 别名]
# 或者: from apache_beam.utils.pipeline_options.PipelineOptions import view_as [as 别名]
  def test_no_main_session(self):
    staging_dir = tempfile.mkdtemp()
    options = PipelineOptions()

    options.view_as(GoogleCloudOptions).staging_location = staging_dir
    options.view_as(SetupOptions).save_main_session = False
    self.update_options(options)

    self.assertEqual(
        [],
        dependency.stage_job_resources(options))
开发者ID:vikkyrk,项目名称:incubator-beam,代码行数:13,代码来源:dependency_test.py

示例7: run

# 需要导入模块: from apache_beam.utils.pipeline_options import PipelineOptions [as 别名]
# 或者: from apache_beam.utils.pipeline_options.PipelineOptions import view_as [as 别名]
def run(argv=None):
  """Main entry point; defines and runs the wordcount pipeline."""

  parser = argparse.ArgumentParser()
  parser.add_argument('--input',
                      dest='input',
                      default='gs://dataflow-samples/shakespeare/kinglear.txt',
                      help='Input file to process.')
  parser.add_argument('--kind',
                      dest='kind',
                      required=True,
                      help='Datastore Kind')
  parser.add_argument('--namespace',
                      dest='namespace',
                      help='Datastore Namespace')
  parser.add_argument('--ancestor',
                      dest='ancestor',
                      default='root',
                      help='The ancestor key name for all entities.')
  parser.add_argument('--output',
                      dest='output',
                      required=True,
                      help='Output file to write results to.')
  parser.add_argument('--read_only',
                      action='store_true',
                      help='Read an existing dataset, do not write first')
  parser.add_argument('--num_shards',
                      dest='num_shards',
                      type=int,
                      # If the system should choose automatically.
                      default=0,
                      help='Number of output shards')

  known_args, pipeline_args = parser.parse_known_args(argv)
  # We use the save_main_session option because one or more DoFn's in this
  # workflow rely on global context (e.g., a module imported at module level).
  pipeline_options = PipelineOptions(pipeline_args)
  pipeline_options.view_as(SetupOptions).save_main_session = True
  gcloud_options = pipeline_options.view_as(GoogleCloudOptions)

  # Write to Datastore if `read_only` options is not specified.
  if not known_args.read_only:
    write_to_datastore(gcloud_options.project, known_args, pipeline_options)

  # Read entities from Datastore.
  result = read_from_datastore(gcloud_options.project, known_args,
                               pipeline_options)

  empty_lines_filter = MetricsFilter().with_name('empty_lines')
  query_result = result.metrics().query(empty_lines_filter)
  if query_result['counters']:
    empty_lines_counter = query_result['counters'][0]
    logging.info('number of empty lines: %d', empty_lines_counter.committed)
开发者ID:amitsela,项目名称:incubator-beam,代码行数:55,代码来源:datastore_wordcount.py

示例8: test_sdk_location_gcs

# 需要导入模块: from apache_beam.utils.pipeline_options import PipelineOptions [as 别名]
# 或者: from apache_beam.utils.pipeline_options.PipelineOptions import view_as [as 别名]
  def test_sdk_location_gcs(self):
    staging_dir = tempfile.mkdtemp()
    sdk_location = 'gs://my-gcs-bucket/tarball.tar.gz'
    self.override_file_copy(sdk_location, staging_dir)

    options = PipelineOptions()
    options.view_as(GoogleCloudOptions).staging_location = staging_dir
    self.update_options(options)
    options.view_as(SetupOptions).sdk_location = sdk_location

    self.assertEqual(
        [names.DATAFLOW_SDK_TARBALL_FILE],
        dependency.stage_job_resources(options))
开发者ID:vikkyrk,项目名称:incubator-beam,代码行数:15,代码来源:dependency_test.py

示例9: test_requirements_file_not_present

# 需要导入模块: from apache_beam.utils.pipeline_options import PipelineOptions [as 别名]
# 或者: from apache_beam.utils.pipeline_options.PipelineOptions import view_as [as 别名]
 def test_requirements_file_not_present(self):
   staging_dir = tempfile.mkdtemp()
   with self.assertRaises(RuntimeError) as cm:
     options = PipelineOptions()
     options.view_as(GoogleCloudOptions).staging_location = staging_dir
     self.update_options(options)
     options.view_as(SetupOptions).requirements_file = 'nosuchfile'
     dependency.stage_job_resources(
         options, populate_requirements_cache=self.populate_requirements_cache)
   self.assertEqual(
       cm.exception.message,
       'The file %s cannot be found. It was specified in the '
       '--requirements_file command line option.' % 'nosuchfile')
开发者ID:vikkyrk,项目名称:incubator-beam,代码行数:15,代码来源:dependency_test.py

示例10: test_with_main_session

# 需要导入模块: from apache_beam.utils.pipeline_options import PipelineOptions [as 别名]
# 或者: from apache_beam.utils.pipeline_options.PipelineOptions import view_as [as 别名]
  def test_with_main_session(self):
    staging_dir = tempfile.mkdtemp()
    options = PipelineOptions()

    options.view_as(GoogleCloudOptions).staging_location = staging_dir
    options.view_as(SetupOptions).save_main_session = True
    self.update_options(options)

    self.assertEqual(
        [names.PICKLED_MAIN_SESSION_FILE],
        dependency.stage_job_resources(options))
    self.assertTrue(
        os.path.isfile(
            os.path.join(staging_dir, names.PICKLED_MAIN_SESSION_FILE)))
开发者ID:vikkyrk,项目名称:incubator-beam,代码行数:16,代码来源:dependency_test.py

示例11: test_with_extra_packages_missing_files

# 需要导入模块: from apache_beam.utils.pipeline_options import PipelineOptions [as 别名]
# 或者: from apache_beam.utils.pipeline_options.PipelineOptions import view_as [as 别名]
  def test_with_extra_packages_missing_files(self):
    staging_dir = tempfile.mkdtemp()
    with self.assertRaises(RuntimeError) as cm:

      options = PipelineOptions()
      options.view_as(GoogleCloudOptions).staging_location = staging_dir
      self.update_options(options)
      options.view_as(SetupOptions).extra_packages = ['nosuchfile.tar.gz']

      dependency.stage_job_resources(options)
    self.assertEqual(
        cm.exception.message,
        'The file %s cannot be found. It was specified in the '
        '--extra_packages command line option.' % 'nosuchfile.tar.gz')
开发者ID:vikkyrk,项目名称:incubator-beam,代码行数:16,代码来源:dependency_test.py

示例12: run

# 需要导入模块: from apache_beam.utils.pipeline_options import PipelineOptions [as 别名]
# 或者: from apache_beam.utils.pipeline_options.PipelineOptions import view_as [as 别名]
def run(argv=None):
  """Main entry point; defines and runs the wordcount pipeline."""
  class WordcountOptions(PipelineOptions):
    @classmethod
    def _add_argparse_args(cls, parser):
      parser.add_value_provider_argument(
          '--input',
          dest='input',
          default='gs://dataflow-samples/shakespeare/kinglear.txt',
          help='Input file to process.')
      parser.add_value_provider_argument(
          '--output',
          dest='output',
          required=True,
          help='Output file to write results to.')
  pipeline_options = PipelineOptions(argv)
  wordcount_options = pipeline_options.view_as(WordcountOptions)

  # We use the save_main_session option because one or more DoFn's in this
  # workflow rely on global context (e.g., a module imported at module level).
  pipeline_options.view_as(SetupOptions).save_main_session = True
  p = beam.Pipeline(options=pipeline_options)

  # Read the text file[pattern] into a PCollection.
  lines = p | 'read' >> ReadFromText(wordcount_options.input)

  # Count the occurrences of each word.
  counts = (lines
            | 'split' >> (beam.ParDo(WordExtractingDoFn())
                          .with_output_types(unicode))
            | 'pair_with_one' >> beam.Map(lambda x: (x, 1))
            | 'group' >> beam.GroupByKey()
            | 'count' >> beam.Map(lambda (word, ones): (word, sum(ones))))

  # Format the counts into a PCollection of strings.
  output = counts | 'format' >> beam.Map(lambda (word, c): '%s: %s' % (word, c))

  # Write the output using a "Write" transform that has side effects.
  # pylint: disable=expression-not-assigned
  output | 'write' >> WriteToText(wordcount_options.output)

  # Actually run the pipeline (all operations above are deferred).
  result = p.run()
  result.wait_until_finish()
  empty_lines_filter = MetricsFilter().with_name('empty_lines')
  query_result = result.metrics().query(empty_lines_filter)
  if query_result['counters']:
    empty_lines_counter = query_result['counters'][0]
    logging.info('number of empty lines: %d', empty_lines_counter.committed)
开发者ID:amitsela,项目名称:incubator-beam,代码行数:51,代码来源:wordcount.py

示例13: pipeline_options_remote

# 需要导入模块: from apache_beam.utils.pipeline_options import PipelineOptions [as 别名]
# 或者: from apache_beam.utils.pipeline_options.PipelineOptions import view_as [as 别名]
def pipeline_options_remote(argv):
  """Creating a Pipeline using a PipelineOptions object for remote execution."""

  from apache_beam import Pipeline
  from apache_beam.utils.pipeline_options import PipelineOptions

  # [START pipeline_options_create]
  options = PipelineOptions(flags=argv)
  # [END pipeline_options_create]

  # [START pipeline_options_define_custom]
  class MyOptions(PipelineOptions):

    @classmethod
    def _add_argparse_args(cls, parser):
      parser.add_argument('--input')
      parser.add_argument('--output')
  # [END pipeline_options_define_custom]

  from apache_beam.utils.pipeline_options import GoogleCloudOptions
  from apache_beam.utils.pipeline_options import StandardOptions

  # [START pipeline_options_dataflow_service]
  # Create and set your PipelineOptions.
  options = PipelineOptions(flags=argv)

  # For Cloud execution, set the Cloud Platform project, job_name,
  # staging location, temp_location and specify DataflowRunner.
  google_cloud_options = options.view_as(GoogleCloudOptions)
  google_cloud_options.project = 'my-project-id'
  google_cloud_options.job_name = 'myjob'
  google_cloud_options.staging_location = 'gs://my-bucket/binaries'
  google_cloud_options.temp_location = 'gs://my-bucket/temp'
  options.view_as(StandardOptions).runner = 'DataflowRunner'

  # Create the Pipeline with the specified options.
  p = Pipeline(options=options)
  # [END pipeline_options_dataflow_service]

  my_options = options.view_as(MyOptions)
  my_input = my_options.input
  my_output = my_options.output

  p = TestPipeline()  # Use TestPipeline for testing.

  lines = p | beam.io.ReadFromText(my_input)
  lines | beam.io.WriteToText(my_output)

  p.run()
开发者ID:vikkyrk,项目名称:incubator-beam,代码行数:51,代码来源:snippets.py

示例14: test_sdk_location_local_not_present

# 需要导入模块: from apache_beam.utils.pipeline_options import PipelineOptions [as 别名]
# 或者: from apache_beam.utils.pipeline_options.PipelineOptions import view_as [as 别名]
  def test_sdk_location_local_not_present(self):
    staging_dir = tempfile.mkdtemp()
    sdk_location = 'nosuchdir'
    with self.assertRaises(RuntimeError) as cm:
      options = PipelineOptions()
      options.view_as(GoogleCloudOptions).staging_location = staging_dir
      self.update_options(options)
      options.view_as(SetupOptions).sdk_location = sdk_location

      dependency.stage_job_resources(options)
    self.assertEqual(
        'The file "%s" cannot be found. Its '
        'location was specified by the --sdk_location command-line option.' %
        sdk_location,
        cm.exception.message)
开发者ID:vikkyrk,项目名称:incubator-beam,代码行数:17,代码来源:dependency_test.py

示例15: test_with_extra_packages_invalid_file_name

# 需要导入模块: from apache_beam.utils.pipeline_options import PipelineOptions [as 别名]
# 或者: from apache_beam.utils.pipeline_options.PipelineOptions import view_as [as 别名]
 def test_with_extra_packages_invalid_file_name(self):
   staging_dir = tempfile.mkdtemp()
   source_dir = tempfile.mkdtemp()
   self.create_temp_file(
       os.path.join(source_dir, 'abc.tgz'), 'nothing')
   with self.assertRaises(RuntimeError) as cm:
     options = PipelineOptions()
     options.view_as(GoogleCloudOptions).staging_location = staging_dir
     self.update_options(options)
     options.view_as(SetupOptions).extra_packages = [
         os.path.join(source_dir, 'abc.tgz')]
     dependency.stage_job_resources(options)
   self.assertEqual(
       cm.exception.message,
       'The --extra_package option expects a full path ending with ".tar" or '
       '".tar.gz" instead of %s' % os.path.join(source_dir, 'abc.tgz'))
开发者ID:vikkyrk,项目名称:incubator-beam,代码行数:18,代码来源:dependency_test.py


注:本文中的apache_beam.utils.pipeline_options.PipelineOptions.view_as方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。