本文整理汇总了Python中apache_beam.FlatMap方法的典型用法代码示例。如果您正苦于以下问题:Python apache_beam.FlatMap方法的具体用法?Python apache_beam.FlatMap怎么用?Python apache_beam.FlatMap使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类apache_beam
的用法示例。
在下文中一共展示了apache_beam.FlatMap方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: expand
# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def expand(self, inputs):
pcoll, = inputs
def extract_outputs(outputs, num_outputs):
if len(outputs) != num_outputs:
raise ValueError(
'Analyzer has {} outputs but its implementation produced {} '
'values'.format(num_outputs, len(outputs)))
for i, output in enumerate(outputs):
yield beam.pvalue.TaggedOutput(str(i), output)
output_keys = [str(i) for i in range(self._num_outputs)]
outputs_tuple = (
pcoll |
'ExtractOutputs' >> beam.FlatMap(
extract_outputs, self._num_outputs).with_outputs(*output_keys))
return tuple(outputs_tuple[key] for key in output_keys)
示例2: _clear_shared_state_after_barrier
# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def _clear_shared_state_after_barrier(pipeline, input_barrier):
"""Clears any shared state from within a pipeline context.
This will only be cleared once input_barrier becomes available.
Args:
pipeline: A `beam.Pipeline` object.
input_barrier: A `PCollection` which the pipeline should wait for.
Returns:
An empty `PCollection`.
"""
empty_pcoll = input_barrier | 'MakeCheapBarrier' >> beam.FlatMap(
lambda x: None)
return (pipeline
| 'PrepareToClearSharedKeepAlives' >> beam.Create([None])
| 'WaitAndClearSharedKeepAlives' >> beam.Map(
lambda x, empty_side_input: shared.Shared().acquire(lambda: None),
beam.pvalue.AsIter(empty_pcoll)))
示例3: expand
# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def expand(self, pcollection):
def get_dir_list(file_dir, suffix=""):
file_list = []
for file_name in os.listdir(file_dir):
f = os.path.join(file_dir, file_name)
if file_name.endswith(suffix):
file_list.append(f)
return file_list
def get_events(filename):
catalog, wavename = read_nordic(filename, return_wavnames=True)
for event in catalog.events:
for pick in event.picks:
pick.waveform_id.wavename = wavename
yield event
return (
pcollection
| 'Create file directory' >> beam.Create(self.file_patterns)
| 'List all files' >> beam.FlatMap(get_dir_list)
| 'Get event' >> beam.FlatMap(get_events)
)
示例4: shuffle
# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def shuffle(p):
"""Shuffles data from PCollection.
Args:
p: PCollection.
Returns:
PCollection of shuffled data.
"""
class _AddRandomKey(beam.DoFn):
def process(self, element):
yield random.random(), element
shuffled_data = (
p
| 'PairWithRandom' >> beam.ParDo(_AddRandomKey())
| 'GroupByRandom' >> beam.GroupByKey()
| 'DropRandom' >> beam.FlatMap(lambda (k, vs): vs))
return shuffled_data
示例5: shuffle_data
# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def shuffle_data(p):
"""Shuffles data from PCollection.
Args:
p: PCollection.
Returns:
PCollection of shuffled data.
"""
class _AddRandomKey(beam.DoFn):
def process(self, element):
yield (random.random(), element)
shuffled_data = (
p
| 'PairWithRandom' >> beam.ParDo(_AddRandomKey())
| 'GroupByRandom' >> beam.GroupByKey()
| 'DropRandom' >> beam.FlatMap(lambda (k, vs): vs))
return shuffled_data
示例6: run
# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def run(p, args):
"""Creates a pipeline to build and write train/val/test datasets."""
# pylint: disable=no-value-for-parameter
query = bq_query.query
if not args.cloud:
query = "{} LIMIT 10".format(query)
raw_data = (p
| "ReadBQ" >> ReadBQ(query)
| "HandleNullUserTags" >> beam.Map(_handle_null_user_tags)
| "NormalizeUserTags" >> beam.Map(_normalize_user_tags))
data = _run_tft_fn(raw_data, _preprocess_tft, args.tft_dir,
args.user_min_count, args.item_min_count)
data = (data
| "FilterData" >> beam.FlatMap(_filter_data)
| "CleanTags" >> beam.Map(_clean_tags))
data = _split_data(data)
for name, dataset in data:
dataset | "Write{}Output".format(name) >> WriteOutput(
name, args.output_dir, constants.TRAIN_SPEC, args.plain_text)
示例7: _lint
# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def _lint(self, examples):
feature_val_w_counts = (
examples
| 'Tuplize' >> beam.FlatMap(
utils.example_tuplizer(self._counted_features))
| 'FlattenFeatureVals' >> beam.FlatMap(self._flatten_feature_vals)
| 'CountFeatureVals' >> beam.combiners.Count.PerElement())
if hasattr(self, '_count_transformer'):
feature_val_w_counts |= 'TransformCounts' >> self._count_transformer
return (
feature_val_w_counts
| 'PairValWithCount' >> beam.Map(self._shift_key)
| 'GroupByFeature' >> beam.GroupByKey()
| 'ValCountsToDict' >> beam.Map(self._val_counts_as_dict)
| 'GenResults' >> beam.Map(self._check_feature)
| 'DropUnwarned' >> beam.Filter(bool)
| 'AsList' >> beam.combiners.ToList()
| 'ToResult' >> beam.Map(self._to_result))
示例8: expand
# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def expand(self, pcoll):
# Create an empty PCollection that depends on pcoll.
empty = pcoll | beam.FlatMap(lambda x: ())
return pcoll | beam.Map(lambda x, unused: x, beam.pvalue.AsIter(empty))
示例9: expand
# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def expand(self, pcoll):
return (pcoll
| 'MapVariantsByKey' >> beam.FlatMap(self._map_by_variant_keys)
| 'GroupVariantsByKey' >> beam.GroupByKey()
| 'MergeVariantsByKey' >> beam.FlatMap(self._merge_variants_by_key))
示例10: expand
# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def expand(self, estimates):
return (estimates
| 'MapSamplesToValueCount' >> beam.FlatMap(
self._get_sample_ids)
| 'GroupAllSamples' >> beam.GroupByKey())
示例11: expand
# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def expand(self, pcoll):
if self._preserve_sample_order:
return (pcoll
| 'GetSampleIds' >> beam.Map(self._get_sample_ids)
| 'RemoveDuplicates' >> beam.RemoveDuplicates()
| 'Combine' >> beam.combiners.ToList()
| 'ExtractUniqueSampleIds'
>> beam.ParDo(self._extract_unique_sample_ids))
else:
return (pcoll
| 'GetSampleIds' >> beam.FlatMap(self._get_sample_ids)
| 'RemoveDuplicates' >> beam.RemoveDuplicates()
| 'Combine' >> beam.combiners.ToList()
| 'SortSampleIds' >> beam.ParDo(sorted))
示例12: expand
# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def expand(self, pcoll):
return (pcoll
| beam.ParDo(_RoundRobinKeyFn(self._count))
| beam.GroupByKey()
| beam.FlatMap(lambda kv: kv[1]))
示例13: expand
# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def expand(self, pcollection):
def parse_molecules(filename):
with tf.gfile.Open(filename) as f:
for json_molecule in sdf.parse_molecules(f):
yield json_molecule
return (
pcollection
| 'Create file patterns' >> beam.Create(self.file_patterns)
| 'Expand file patterns' >> beam.FlatMap(tf.gfile.Glob)
| 'Parse molecules' >> beam.ParDo(parse_molecules)
)
示例14: _Shuffle
# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def _Shuffle(pcoll): # pylint: disable=invalid-name
import random
return (pcoll
| 'PairWithRandom' >> beam.Map(lambda x: (random.random(), x))
| 'GroupByRandom' >> beam.GroupByKey()
| 'DropRandom' >> beam.FlatMap(lambda (k, vs): vs))
示例15: _Shuffle
# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import FlatMap [as 别名]
def _Shuffle(pcoll): # pylint: disable=invalid-name
"""Shuffles a PCollection."""
import random
return (pcoll
| 'PairWithRand' >> beam.Map(lambda x: (random.random(), x))
| 'GroupByRand' >> beam.GroupByKey()
| 'DropRand' >> beam.FlatMap(lambda (k, vs): vs))