Python apache_beam.Filter方法代码示例

本文整理汇总了Python中apache_beam.Filter方法的典型用法代码示例。如果您正苦于以下问题：Python apache_beam.Filter方法的具体用法？Python apache_beam.Filter怎么用？Python apache_beam.Filter使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类apache_beam的用法示例。

在下文中一共展示了apache_beam.Filter方法的6个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: create_glyphazzn_dataset

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import Filter [as 别名]
def create_glyphazzn_dataset(filepattern, output_path):
  """Creates a glyphazzn dataset, from raw Parquetio to TFRecords."""
  def pipeline(root):
    """Pipeline for creating glyphazzn dataset."""
    attrs = ['uni', 'width', 'vwidth', 'sfd', 'id', 'binary_fp']

    examples = root | 'Read' >> beam.io.parquetio.ReadFromParquet(
        file_pattern=filepattern, columns=attrs)

    examples = examples | 'FilterBadIcons' >> beam.Filter(_is_valid_glyph)
    examples = examples | 'ConvertToPath' >> beam.Map(_convert_to_path)
    examples = examples | 'FilterBadPathLenghts' >> beam.Filter(_is_valid_path)
    examples = examples | 'ProcessAndConvert' >> beam.Map(_create_example)
    (examples | 'WriteToTFRecord' >> beam.io.tfrecordio.WriteToTFRecord(
        output_path, num_shards=90))
  return pipeline

开发者ID:magenta，项目名称:magenta，代码行数:18，代码来源:datagen_beam.py

示例2: expand

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import Filter [as 别名]
def expand(self, examples):
    """Runs the linters on the data and writes out the results.

    The order in which the linters run is unspecified.

    Args:
      examples: A `PTransform` that yields a `PCollection` of `tf.Examples`.

    Returns:
      A pipeline containing the `DataLinter` `PTransform`s.
    """
    coders = (beam.coders.coders.StrUtf8Coder(),
              beam.coders.coders.ProtoCoder(lint_result_pb2.LintResult))
    return (
        [examples | linter for linter in self._linters if linter.should_run()]
        | 'MergeResults' >> beam.Flatten()
        | 'DropEmpty' >> beam.Filter(lambda (_, r): r and len(r.warnings))
        | 'ToDict' >> beam.combiners.ToDict()
        | 'WriteResults' >> beam.io.textio.WriteToText(
            self._results_path,
            coder=beam.coders.coders.PickleCoder(),
            shard_name_template=''))

开发者ID:brain-research，项目名称:data-linter，代码行数:24，代码来源:data_linter.py

示例3: _lint

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import Filter [as 别名]
def _lint(self, examples):
    feature_val_w_counts = (
        examples
        | 'Tuplize' >> beam.FlatMap(
            utils.example_tuplizer(self._counted_features))
        | 'FlattenFeatureVals' >> beam.FlatMap(self._flatten_feature_vals)
        | 'CountFeatureVals' >> beam.combiners.Count.PerElement())

    if hasattr(self, '_count_transformer'):
      feature_val_w_counts |= 'TransformCounts' >> self._count_transformer

    return (
        feature_val_w_counts
        | 'PairValWithCount' >> beam.Map(self._shift_key)
        | 'GroupByFeature' >> beam.GroupByKey()
        | 'ValCountsToDict' >> beam.Map(self._val_counts_as_dict)
        | 'GenResults' >> beam.Map(self._check_feature)
        | 'DropUnwarned' >> beam.Filter(bool)
        | 'AsList' >> beam.combiners.ToList()
        | 'ToResult' >> beam.Map(self._to_result))

开发者ID:brain-research，项目名称:data-linter，代码行数:22，代码来源:linters.py

示例4: expand

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import Filter [as 别名]
def expand(self, inputs):
    pcoll, = inputs

    # Create a PCollection of (count, element) pairs, then iterates over
    # this to create a single element PCollection containing this list of
    # pairs in sorted order by decreasing counts (and by values for equal
    # counts).

    # TODO(b/112916494): Unify the graph in both cases once possible.
    if (self._vocab_ordering_type ==
        _VocabOrderingType.WEIGHTED_MUTUAL_INFORMATION):
      flatten_map_fn = _flatten_to_key_and_means_accumulator_list
      combine_transform = _MutualInformationTransformAccumulate()  # pylint: disable=no-value-for-parameter
    elif self._vocab_ordering_type == _VocabOrderingType.WEIGHTED_FREQUENCY:
      flatten_map_fn = _flatten_value_and_weights_to_list_of_tuples
      combine_transform = beam.CombinePerKey(sum)
    elif self._vocab_ordering_type == _VocabOrderingType.WEIGHTED_LABELS:
      flatten_map_fn = _flatten_value_and_labeled_weights_to_list_of_tuples
      combine_transform = beam.CombinePerKey(sum_labeled_weights)
    else:
      flatten_map_fn = _flatten_value_to_list
      combine_transform = beam.combiners.Count.PerElement()

    result = (
        pcoll
        | 'FlattenTokensAndMaybeWeightsLabels' >> beam.FlatMap(flatten_map_fn)
        | 'CountPerToken' >> combine_transform)

    if self._input_dtype == tf.string:
      # TODO(b/62379925) Filter empty strings or strings containing the \n or \r
      # tokens since index_table_from_file doesn't allow empty rows.
      def is_problematic_string(kv):
        string, _ = kv  # Ignore counts.
        return string and b'\n' not in string and b'\r' not in string

      result |= 'FilterProblematicStrings' >> beam.Filter(is_problematic_string)

    return result

开发者ID:tensorflow，项目名称:transform，代码行数:40，代码来源:analyzer_impls.py

示例5: _count_transformer

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import Filter [as 别名]
def _count_transformer(self):
    return (
        'DropNaN' >> beam.Filter(lambda (f_v, _): not np.isnan(f_v[1]))
        | 'IsIntegral' >> beam.Map(
            lambda (f_v, c): ((f_v[0], f_v[1] % 1 == 0), c))
        | 'Count' >> beam.CombinePerKey(sum))

开发者ID:brain-research，项目名称:data-linter，代码行数:8，代码来源:linters.py

示例6: expand

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import Filter [as 别名]
def expand(self, sliced_record_batchs_and_ys: Tuple[types.SlicedRecordBatch,
                                                      _SlicedYKey]):
    sliced_record_batchs, y_keys = sliced_record_batchs_and_ys

    # _SlicedXYKey(slice, x_path, x, y), xy_count
    partial_copresence_counts = (
        sliced_record_batchs
        | 'ToPartialCopresenceCounts' >> beam.FlatMap(
            _to_partial_copresence_counts, self._y_path, self._x_paths,
            self._y_boundaries, self._weight_column_name))

    # Compute placerholder copresence counts.
    # partial_copresence_counts will only include x-y pairs that are present,
    # but we would also like to keep track of x-y pairs that never appear, as
    # long as x and y independently occur in the slice.

    # _SlicedXKey(slice, x_path, x), x_count
    x_counts = (
        sliced_record_batchs
        | 'ToPartialXCounts' >> beam.FlatMap(
            _to_partial_x_counts, self._x_paths, self._weight_column_name)
        | 'SumXCounts' >> beam.CombinePerKey(sum))
    if self._min_x_count:
      x_counts = x_counts | 'FilterXCounts' >> beam.Filter(
          lambda kv: kv[1] > self._min_x_count)

    # _SlicedXYKey(slice, x_path, x, y), 0
    placeholder_copresence_counts = (
        (x_counts, y_keys)
        | 'GetPlaceholderCopresenceCounts' >> _GetPlaceholderCopresenceCounts(
            self._x_paths, self._min_x_count))

    def move_y_to_value(key, xy_count):
      return _SlicedXKey(key.slice_key, key.x_path, key.x), (key.y, xy_count)

    # _SlicedXKey(slice, x_path, x), (y, xy_count)
    copresence_counts = (
        (placeholder_copresence_counts, partial_copresence_counts)
        | 'FlattenCopresenceCounts' >> beam.Flatten()
        | 'SumCopresencePairs' >> beam.CombinePerKey(sum)
        | 'MoveYToValue' >> beam.MapTuple(move_y_to_value))

    # _SlicedYKey(slice, y), _ConditionalYRate(x_path, x, xy_count, x_count)
    return ({
        'x_count': x_counts,
        'xy_counts': copresence_counts
    }
            | 'CoGroupByForConditionalYRates' >> beam.CoGroupByKey()
            | 'JoinXCounts' >> beam.FlatMap(_join_x_counts))

开发者ID:tensorflow，项目名称:data-validation，代码行数:51，代码来源:lift_stats_generator.py

注：本文中的apache_beam.Filter方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。