当前位置: 首页>>代码示例>>Python>>正文


Python apache_beam.FlatMap方法代码示例

本文整理汇总了Python中apache_beam.FlatMap方法的典型用法代码示例。如果您正苦于以下问题:Python apache_beam.FlatMap方法的具体用法?Python apache_beam.FlatMap怎么用?Python apache_beam.FlatMap使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在apache_beam的用法示例。


在下文中一共展示了apache_beam.FlatMap方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: expand

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def expand(self, inputs):
    pcoll, = inputs
    def extract_outputs(outputs, num_outputs):
      if len(outputs) != num_outputs:
        raise ValueError(
            'Analyzer has {} outputs but its implementation produced {} '
            'values'.format(num_outputs, len(outputs)))
      for i, output in enumerate(outputs):
        yield beam.pvalue.TaggedOutput(str(i), output)

    output_keys = [str(i) for i in range(self._num_outputs)]
    outputs_tuple = (
        pcoll |
        'ExtractOutputs' >> beam.FlatMap(
            extract_outputs, self._num_outputs).with_outputs(*output_keys))
    return tuple(outputs_tuple[key] for key in output_keys) 
开发者ID:tensorflow,项目名称:transform,代码行数:18,代码来源:analyzer_impls.py

示例2: _clear_shared_state_after_barrier

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def _clear_shared_state_after_barrier(pipeline, input_barrier):
  """Clears any shared state from within a pipeline context.

  This will only be cleared once input_barrier becomes available.

  Args:
    pipeline: A `beam.Pipeline` object.
    input_barrier: A `PCollection` which the pipeline should wait for.

  Returns:
    An empty `PCollection`.
  """
  empty_pcoll = input_barrier | 'MakeCheapBarrier' >> beam.FlatMap(
      lambda x: None)
  return (pipeline
          | 'PrepareToClearSharedKeepAlives' >> beam.Create([None])
          | 'WaitAndClearSharedKeepAlives' >> beam.Map(
              lambda x, empty_side_input: shared.Shared().acquire(lambda: None),
              beam.pvalue.AsIter(empty_pcoll))) 
开发者ID:tensorflow,项目名称:transform,代码行数:21,代码来源:impl.py

示例3: expand

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def expand(self, pcollection):
        def get_dir_list(file_dir, suffix=""):
            file_list = []
            for file_name in os.listdir(file_dir):
                f = os.path.join(file_dir, file_name)
                if file_name.endswith(suffix):
                    file_list.append(f)

            return file_list

        def get_events(filename):
            catalog, wavename = read_nordic(filename, return_wavnames=True)
            for event in catalog.events:
                for pick in event.picks:
                    pick.waveform_id.wavename = wavename
                yield event

        return (
                pcollection
                | 'Create file directory' >> beam.Create(self.file_patterns)
                | 'List all files' >> beam.FlatMap(get_dir_list)
                | 'Get event' >> beam.FlatMap(get_events)
        ) 
开发者ID:SeisNN,项目名称:SeisNN,代码行数:25,代码来源:obspyio.py

示例4: shuffle

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def shuffle(p):
  """Shuffles data from PCollection.

  Args:
    p: PCollection.

  Returns:
    PCollection of shuffled data.
  """

  class _AddRandomKey(beam.DoFn):

    def process(self, element):
      yield random.random(), element

  shuffled_data = (
      p
      | 'PairWithRandom' >> beam.ParDo(_AddRandomKey())
      | 'GroupByRandom' >> beam.GroupByKey()
      | 'DropRandom' >> beam.FlatMap(lambda (k, vs): vs))
  return shuffled_data 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:23,代码来源:preprocess.py

示例5: shuffle_data

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def shuffle_data(p):
  """Shuffles data from PCollection.

  Args:
    p: PCollection.

  Returns:
    PCollection of shuffled data.
  """

  class _AddRandomKey(beam.DoFn):

    def process(self, element):
      yield (random.random(), element)

  shuffled_data = (
      p
      | 'PairWithRandom' >> beam.ParDo(_AddRandomKey())
      | 'GroupByRandom' >> beam.GroupByKey()
      | 'DropRandom' >> beam.FlatMap(lambda (k, vs): vs))
  return shuffled_data 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:23,代码来源:preprocess.py

示例6: run

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def run(p, args):
  """Creates a pipeline to build and write train/val/test datasets."""
  # pylint: disable=no-value-for-parameter
  query = bq_query.query
  if not args.cloud:
    query = "{} LIMIT 10".format(query)

  raw_data = (p
              | "ReadBQ" >> ReadBQ(query)
              | "HandleNullUserTags" >> beam.Map(_handle_null_user_tags)
              | "NormalizeUserTags" >> beam.Map(_normalize_user_tags))
  data = _run_tft_fn(raw_data, _preprocess_tft, args.tft_dir,
                     args.user_min_count, args.item_min_count)
  data = (data
          | "FilterData" >> beam.FlatMap(_filter_data)
          | "CleanTags" >> beam.Map(_clean_tags))
  data = _split_data(data)
  for name, dataset in data:
    dataset | "Write{}Output".format(name) >> WriteOutput(
        name, args.output_dir, constants.TRAIN_SPEC, args.plain_text) 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:22,代码来源:preprocess.py

示例7: _lint

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def _lint(self, examples):
    feature_val_w_counts = (
        examples
        | 'Tuplize' >> beam.FlatMap(
            utils.example_tuplizer(self._counted_features))
        | 'FlattenFeatureVals' >> beam.FlatMap(self._flatten_feature_vals)
        | 'CountFeatureVals' >> beam.combiners.Count.PerElement())

    if hasattr(self, '_count_transformer'):
      feature_val_w_counts |= 'TransformCounts' >> self._count_transformer

    return (
        feature_val_w_counts
        | 'PairValWithCount' >> beam.Map(self._shift_key)
        | 'GroupByFeature' >> beam.GroupByKey()
        | 'ValCountsToDict' >> beam.Map(self._val_counts_as_dict)
        | 'GenResults' >> beam.Map(self._check_feature)
        | 'DropUnwarned' >> beam.Filter(bool)
        | 'AsList' >> beam.combiners.ToList()
        | 'ToResult' >> beam.Map(self._to_result)) 
开发者ID:brain-research,项目名称:data-linter,代码行数:22,代码来源:linters.py

示例8: expand

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def expand(self, pcoll):
    # Create an empty PCollection that depends on pcoll.
    empty = pcoll | beam.FlatMap(lambda x: ())
    return pcoll | beam.Map(lambda x, unused: x, beam.pvalue.AsIter(empty)) 
开发者ID:googlegenomics,项目名称:gcp-variant-transforms,代码行数:6,代码来源:fusion_break.py

示例9: expand

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def expand(self, pcoll):
    return (pcoll
            | 'MapVariantsByKey' >> beam.FlatMap(self._map_by_variant_keys)
            | 'GroupVariantsByKey' >> beam.GroupByKey()
            | 'MergeVariantsByKey' >> beam.FlatMap(self._merge_variants_by_key)) 
开发者ID:googlegenomics,项目名称:gcp-variant-transforms,代码行数:7,代码来源:merge_variants.py

示例10: expand

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def expand(self, estimates):
    return (estimates
            | 'MapSamplesToValueCount' >> beam.FlatMap(
                self._get_sample_ids)
            | 'GroupAllSamples' >> beam.GroupByKey()) 
开发者ID:googlegenomics,项目名称:gcp-variant-transforms,代码行数:7,代码来源:extract_input_size.py

示例11: expand

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def expand(self, pcoll):
    if self._preserve_sample_order:
      return (pcoll
              | 'GetSampleIds' >> beam.Map(self._get_sample_ids)
              | 'RemoveDuplicates' >> beam.RemoveDuplicates()
              | 'Combine' >> beam.combiners.ToList()
              | 'ExtractUniqueSampleIds'
              >> beam.ParDo(self._extract_unique_sample_ids))
    else:
      return (pcoll
              | 'GetSampleIds' >> beam.FlatMap(self._get_sample_ids)
              | 'RemoveDuplicates' >> beam.RemoveDuplicates()
              | 'Combine' >> beam.combiners.ToList()
              | 'SortSampleIds' >> beam.ParDo(sorted)) 
开发者ID:googlegenomics,项目名称:gcp-variant-transforms,代码行数:16,代码来源:combine_sample_ids.py

示例12: expand

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def expand(self, pcoll):
    return (pcoll
            | beam.ParDo(_RoundRobinKeyFn(self._count))
            | beam.GroupByKey()
            | beam.FlatMap(lambda kv: kv[1])) 
开发者ID:googlegenomics,项目名称:gcp-variant-transforms,代码行数:7,代码来源:limit_write.py

示例13: expand

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def expand(self, pcollection):
    def parse_molecules(filename):
      with tf.gfile.Open(filename) as f:
        for json_molecule in sdf.parse_molecules(f):
          yield json_molecule

    return (
        pcollection
        | 'Create file patterns' >> beam.Create(self.file_patterns)
        | 'Expand file patterns' >> beam.FlatMap(tf.gfile.Glob)
        | 'Parse molecules' >> beam.ParDo(parse_molecules)
    ) 
开发者ID:GoogleCloudPlatform,项目名称:cloudml-samples,代码行数:14,代码来源:pipeline.py

示例14: _Shuffle

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def _Shuffle(pcoll):  # pylint: disable=invalid-name
  import random
  return (pcoll
          | 'PairWithRandom' >> beam.Map(lambda x: (random.random(), x))
          | 'GroupByRandom' >> beam.GroupByKey()
          | 'DropRandom' >> beam.FlatMap(lambda (k, vs): vs)) 
开发者ID:GoogleCloudPlatform,项目名称:cloudml-samples,代码行数:8,代码来源:preprocess.py

示例15: _Shuffle

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def _Shuffle(pcoll):  # pylint: disable=invalid-name
  """Shuffles a PCollection."""
  import random
  return (pcoll
          | 'PairWithRand' >> beam.Map(lambda x: (random.random(), x))
          | 'GroupByRand' >> beam.GroupByKey()
          | 'DropRand' >> beam.FlatMap(lambda (k, vs): vs)) 
开发者ID:GoogleCloudPlatform,项目名称:cloudml-samples,代码行数:9,代码来源:preprocess.py


注:本文中的apache_beam.FlatMap方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。