当前位置: 首页>>代码示例>>Python>>正文


Python apache_beam.CombineFn方法代码示例

本文整理汇总了Python中apache_beam.CombineFn方法的典型用法代码示例。如果您正苦于以下问题:Python apache_beam.CombineFn方法的具体用法?Python apache_beam.CombineFn怎么用?Python apache_beam.CombineFn使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在apache_beam的用法示例。


在下文中一共展示了apache_beam.CombineFn方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import CombineFn [as 别名]
def __init__(
      self,
      num_jackknife_samples: int,
      skip_ci_metric_keys: Optional[Set[metric_types.MetricKey]] = None):
    """Initializes a _MergeJackknifeSamples CombineFn.

    Args:
      num_jackknife_samples: The number of samples computed per slice.
      skip_ci_metric_keys: Set of metric keys for which to skip confidence
        interval computation. For metric keys in this set, just the unsampled
        value will be returned.
    """
    self._num_jackknife_samples = num_jackknife_samples
    self._skip_ci_metric_keys = skip_ci_metric_keys
    self._num_slices_counter = beam.metrics.Metrics.counter(
        constants.METRICS_NAMESPACE, 'num_slices')
    self._missing_samples_counter = beam.metrics.Metrics.counter(
        constants.METRICS_NAMESPACE, 'num_slices_missing_jackknife_samples')
    self._small_samples_counter = beam.metrics.Metrics.counter(
        constants.METRICS_NAMESPACE, 'num_slices_with_small_jackknife_samples')
    self._sample_id_key = metric_types.MetricKey(_JACKKNIFE_SAMPLE_ID_KEY) 
开发者ID:tensorflow,项目名称:model-analysis,代码行数:23,代码来源:jackknife.py

示例2: __init__

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import CombineFn [as 别名]
def __init__(self,
               combiner,
               tf_config,
               is_combining_accumulators,
               should_extract_output=None):
    """Init method for _CombinerWrapper.

    Args:
      combiner: A `analyzer_nodes.Combiner` object used to combine.
      tf_config: A `tf.ConfigProto`.
      is_combining_accumulators: A bool which indicates whether this is
        combining single or batched inputs, or already accumulated objects.
      should_extract_output: A bool which indicates whether this should call the
        combiner's extract_output method in extract_output. If not specified, we
        assume it's the same value as `should_extract_output`.
    """
    self._combiner = combiner
    self._tf_config = tf_config
    self._is_combining_accumulators = is_combining_accumulators
    if should_extract_output is None:
      should_extract_output = is_combining_accumulators
    self._should_extract_output = should_extract_output

    # TODO(b/135541366): Move this to CombineFn.setup once it exists.
    # That should help simplify several aspects of Quantiles state management.
    if isinstance(combiner, analyzers.QuantilesCombiner):
      combiner.initialize_local_state(tf_config) 
开发者ID:tensorflow,项目名称:transform,代码行数:29,代码来源:analyzer_impls.py

示例3: check_size

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import CombineFn [as 别名]
def check_size(p, name, path):
  """Performs checks on the input pipeline and stores stats in specfied path.

  Checks performed: counts rows and derives class distribution.

  Args:
    p: PCollection, input pipeline.
    name: string, unique identifier for the beam step.
    path: string: path to store stats.

  Returns:
    PCollection
  """

  class _Combine(beam.CombineFn):
    """Counts and take the average of positive classes in the pipeline."""

    def create_accumulator(self):
      return (0.0, 0.0)

    def add_input(self, sum_count, inputs):
      (s, count) = sum_count
      return s + inputs, count + 1

    def merge_accumulators(self, accumulators):
      sums, counts = zip(*accumulators)
      return sum(sums), sum(counts)

    # We should not consider the case count == 0 as an error (class initialized
    # with count == 0).
    def extract_output(self, sum_count):
      (s, count) = sum_count
      return count, (1.0 * s / count) if count else float('NaN')

  return (p
          | 'CheckMapTo_1_{}'.format(name) >>
          beam.Map(lambda x: x[constants.LABEL_COLUMN])
          | 'CheckSum_{}'.format(name) >> beam.CombineGlobally(_Combine())
          | 'CheckRecord_{}'.format(name) >> beam.io.WriteToText(
              '{}.txt'.format(path))) 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:42,代码来源:preprocess.py

示例4: __init__

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import CombineFn [as 别名]
def __init__(self, model_loaders: Dict[Text, types.ModelLoader]):
    """Initializes CombineFn using dict of loaders keyed by model location."""
    self._model_loaders = model_loaders
    self._loaded_models = None
    self._model_load_seconds = None
    self._model_load_seconds_distribution = beam.metrics.Metrics.distribution(
        constants.METRICS_NAMESPACE, 'model_load_seconds') 
开发者ID:tensorflow,项目名称:model-analysis,代码行数:9,代码来源:model_util.py

示例5: __new__

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import CombineFn [as 别名]
def __new__(cls, keys: List[MetricKey], preprocessor: beam.DoFn,
              combiner: beam.CombineFn):
    return super(MetricComputation, cls).__new__(cls, keys, preprocessor,
                                                 combiner) 
开发者ID:tensorflow,项目名称:model-analysis,代码行数:6,代码来源:metric_types.py

示例6: QueryBasedMetricsEvaluator

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import CombineFn [as 别名]
def QueryBasedMetricsEvaluator(  # pylint: disable=invalid-name
    query_id: Text,
    prediction_key: Text,
    combine_fns: List[beam.CombineFn],
    metrics_key: Text = constants.METRICS_KEY,
    run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME,
) -> evaluator.Evaluator:
  """Creates an Evaluator for evaluating metrics and plots.

  Args:
    query_id: Key of query ID column in the features dictionary.
    prediction_key: Key in predictions dictionary to use as the prediction (for
      sorting examples within the query). Use the empty string if the Estimator
      returns a predictions Tensor (not a dictionary).
    combine_fns: List of query based metrics combine functions.
    metrics_key: Name to use for metrics key in Evaluation output.
    run_after: Extractor to run after (None means before any extractors).

  Returns:
    Evaluator for computing query-based metrics. The output will be stored under
    'metrics' and 'plots' keys.
  """
  # pylint: disable=no-value-for-parameter
  return evaluator.Evaluator(
      stage_name='EvaluateQueryBasedMetrics',
      run_after=run_after,
      ptransform=EvaluateQueryBasedMetrics(
          query_id=query_id,
          prediction_key=prediction_key,
          combine_fns=combine_fns,
          metrics_key=metrics_key))
  # pylint: enable=no-value-for-parameter 
开发者ID:tensorflow,项目名称:model-analysis,代码行数:34,代码来源:query_based_metrics_evaluator.py

示例7: EvaluateQueryBasedMetrics

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import CombineFn [as 别名]
def EvaluateQueryBasedMetrics(  # pylint: disable=invalid-name
    extracts: beam.pvalue.PCollection,
    prediction_key: Text,
    query_id: Text,
    combine_fns: List[beam.CombineFn],
    metrics_key: Text = constants.METRICS_KEY,
) -> evaluator.Evaluation:
  """Evaluates query-based metrics.

  Args:
    extracts: PCollection of Extracts. The extracts MUST contain a
      FeaturesPredictionsLabels extract keyed by
      tfma.FEATURE_PREDICTION_LABELS_KEY and a list of SliceKeyType extracts
      keyed by tfma.SLICE_KEY_TYPES_KEY. Typically these will be added by
      calling the default_extractors function.
    prediction_key: Key in predictions dictionary to use as the prediction (for
      sorting examples within the query). Use the empty string if the Estimator
      returns a predictions Tensor (not a dictionary).
    query_id: Key of query ID column in the features dictionary.
    combine_fns: List of query based metrics combine functions.
    metrics_key: Name to use for metrics key in Evaluation output.

  Returns:
    Evaluation containing metrics dictionaries keyed by 'metrics'.
  """

  # pylint: disable=no-value-for-parameter
  metrics = (
      extracts
      | 'Filter' >> extractor.Filter(include=[
          constants.FEATURES_PREDICTIONS_LABELS_KEY,
          constants.SLICE_KEY_TYPES_KEY
      ])
      | 'ComputeQueryBasedMetrics' >> ComputeQueryBasedMetrics(
          query_id=query_id,
          combine_fns=combine_fns,
          prediction_key=prediction_key))
  # pylint: enable=no-value-for-parameter

  return {metrics_key: metrics} 
开发者ID:tensorflow,项目名称:model-analysis,代码行数:42,代码来源:query_based_metrics_evaluator.py

示例8: _make_loo_accumulators

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import CombineFn [as 别名]
def _make_loo_accumulators(
    accumulators: List[_AccumulatorType],
    combiner: beam.CombineFn) -> Iterator[_AccumulatorType]:
  """Yields accumulators which each leave out one value in accumulators.

  Args:
    accumulators: Tuple of values for which to compute complements
    combiner: A combiner to use for creating and merging accumulators.

  Yields:
    Leave-one-out accumulators for each element in `accumulators`. The ith
    accumulator will be the result of merging all accumulators but the ith,
    along with the accumulator passed as `complement`.
  """

  def make_loo_accumulators_rec(
      accumulators: List[_AccumulatorType], complement: _AccumulatorType,
      combiner: beam.CombineFn) -> Iterator[_AccumulatorType]:
    """Recursive helper to compute leave one out accumulators."""
    if len(accumulators) == 1:
      yield complement
    else:
      split_idx = int(len(accumulators) / 2)
      left, right = accumulators[:split_idx], accumulators[split_idx:]
      left_c = copy.deepcopy(complement)
      left_c = combiner.merge_accumulators([left_c] + right)
      for c in make_loo_accumulators_rec(left, left_c, combiner):
        yield c
      # reuse the complement accumulator on the right recursion.
      right_c = combiner.merge_accumulators([complement] + left)
      for c in make_loo_accumulators_rec(right, right_c, combiner):
        yield c

  # TODO(b/151445942) use `yield from` when we stop supporting python < 3.3
  for acc in make_loo_accumulators_rec(accumulators,
                                       combiner.create_accumulator(), combiner):
    yield acc


# TODO(b/152812821): Disble Beam annotations support due to failure in:
# //third_party/py/tensorflow_model_analysis/evaluators:jackknife_test.python3
# Output type hint violation at JackknifeCombinePerKey: expected Tuple[Union[
# Tuple[Tuple[str, Union[float, int, str]], ...], Tuple[]], Tuple[Dict[
# MetricKey, Any], ...]], got Tuple[Union[Tuple[Tuple[str, Union[float, int,
# str]], ...], Tuple[]], Dict[MetricKey, Any]]
#
# Since @beam.typehints.no_annotations is not available yet, part of the output
# type is put in quotes, which currently makes Beam ignore the hint. 
开发者ID:tensorflow,项目名称:model-analysis,代码行数:50,代码来源:jackknife.py

示例9: _make_jackknife_samples

# 需要导入模块: import apache_beam [as 别名]
# 或者: from apache_beam import CombineFn [as 别名]
def _make_jackknife_samples(
    slice_partitions: Tuple[slicer.SliceKeyType,
                            Sequence[_PartitionInfo]], combiner: beam.CombineFn
) -> Iterator[Tuple[slicer.SliceKeyType, 'metric_types.MetricsDict']]:
  """Computes leave-one-out and unsampled ouputs for the combiner.

  This function creates leave-one-out combiner outputs by combining all but one
  accumulator and extracting the output. Second, it creates an unsampled output
  using all of the accumulators and extracts an unsampled output. The keys
  yielded by thus function are augmented versions of the input slice key in
  which the sample ID (or a special placeholder ID for the unsampled value) has
  been added.

  Args:
    slice_partitions: The result of GroupByKey in which the key is a slice_key,
      and the grouped stream consists of per-partition _PartitionInfo tuples in
      which the first element is an accumulator for that partition, the second
      element is the size of that partition, and the third element is the
      partition ID.
    combiner: The combiner to be used for converting accumulators to outputs.

  Yields:
    Tuples of the form (slice_key, metrics), for each jackknife sample and for
    the unsampled value.
  """
  slice_key, accumulators_sizes_and_ids = slice_partitions
  accumulators, sizes, partition_ids = zip(*accumulators_sizes_and_ids)
  unsampled_accumulator = None
  for i, loo_accumulator in enumerate(
      _make_loo_accumulators(list(accumulators), combiner)):
    # yield sampled output with sample_id of the leftout partition
    sample_id_key = (_JACKKNIFE_SAMPLE_ID_KEY, partition_ids[i])
    yield slice_key + (sample_id_key,), combiner.extract_output(loo_accumulator)
    if i == 0:
      # Create the unsampled accumulator from sample 0 and its complement.
      unsampled_accumulator = combiner.merge_accumulators(
          [loo_accumulator, accumulators[0]])

  # yield unsampled output along with total count as a special metric
  count_dict = {_JACKKNIFE_EXAMPLE_COUNT_METRIC_KEY: sum(sizes)}
  sample_id_key = ((_JACKKNIFE_SAMPLE_ID_KEY, _JACKKNIFE_FULL_SAMPLE_ID),)
  unsampled_output = combiner.extract_output(unsampled_accumulator)
  unsampled_key = slice_key + sample_id_key
  unsampled_val = unsampled_output + (count_dict,)
  yield unsampled_key, unsampled_val 
开发者ID:tensorflow,项目名称:model-analysis,代码行数:47,代码来源:jackknife.py


注:本文中的apache_beam.CombineFn方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。