Python tensorflow_transform.scale_to_z_score方法代码示例

本文整理汇总了Python中tensorflow_transform.scale_to_z_score方法的典型用法代码示例。如果您正苦于以下问题：Python tensorflow_transform.scale_to_z_score方法的具体用法？Python tensorflow_transform.scale_to_z_score怎么用？Python tensorflow_transform.scale_to_z_score使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow_transform的用法示例。

在下文中一共展示了tensorflow_transform.scale_to_z_score方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: preprocessing_fn

# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import scale_to_z_score [as 别名]
def preprocessing_fn(inputs):
  """tf.transform's callback function for preprocessing inputs.

  Args:
    inputs: map from feature keys to raw not-yet-transformed features.

  Returns:
    Map from string feature key to transformed feature operations.
  """
  outputs = {}

  for key in _FEATURE_KEYS:
    std_input = tft.scale_to_z_score(inputs[key])
    assert isinstance(std_input, tf.Tensor)
    outputs[_transformed_name(key)] = std_input
  outputs[_transformed_name(_LABEL_KEY)] = inputs[_LABEL_KEY]

  return outputs


# TFX Trainer will call this function.

开发者ID:tensorflow，项目名称:tfx，代码行数:23，代码来源:iris_utils_sklearn.py

示例2: preprocessing_fn

# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import scale_to_z_score [as 别名]
def preprocessing_fn(inputs):
  """tf.transform's callback function for preprocessing inputs.

  Args:
    inputs: map from feature keys to raw not-yet-transformed features.

  Returns:
    Map from string feature key to transformed feature operations.
  """
  outputs = {}

  for key in _FEATURE_KEYS:
    outputs[_transformed_name(key)] = tft.scale_to_z_score(inputs[key])
  # TODO(b/157064428): Support label transformation for Keras.
  # Do not apply label transformation as it will result in wrong evaluation.
  outputs[_transformed_name(_LABEL_KEY)] = inputs[_LABEL_KEY]

  return outputs


# TFX Tuner will call this function.

开发者ID:tensorflow，项目名称:tfx，代码行数:23，代码来源:iris_utils_native_keras.py

示例3: preprocessing_fn

# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import scale_to_z_score [as 别名]
def preprocessing_fn(inputs):
    out = dict()

    for key in taxi.DENSE_FLOAT_FEATURE_KEYS:
        # Preserve this feature as a dense float, setting nan's to the mean.
        out[taxi.transformed_name(key)] = tft.scale_to_z_score(
            taxi.fill_in_missing(inputs[key]))

    for key in taxi.VOCAB_FEATURE_KEYS:
        # Build a vocabulary for this feature.
        out[taxi.transformed_name(key)] = tft.compute_and_apply_vocabulary(
            taxi.fill_in_missing(inputs[key]), top_k=10, num_oov_buckets=10)

    for key in taxi.BUCKET_FEATURE_KEYS:
        out[taxi.transformed_name(key)] = tft.bucketize(taxi.fill_in_missing(inputs[key]),
                                                        num_buckets=10)

    for key in taxi.CATEGORICAL_FEATURE_KEYS:
        out[taxi.transformed_name(key)] = taxi.fill_in_missing(inputs[key])

    # Was this passenger a big tipper?
    taxi_fare = taxi.fill_in_missing(inputs[taxi.FARE_KEY])
    tips = taxi.fill_in_missing(inputs[taxi.LABEL_KEY])
    out[taxi.transformed_name(taxi.LABEL_KEY)] = tf.where(
        tf.is_nan(taxi_fare),
        tf.cast(tf.zeros_like(taxi_fare), tf.int64),
        # Test if the tip was > 20% of the fare.
        tf.cast(tf.greater(tips, tf.multiply(taxi_fare, tf.constant(0.2))), tf.int64)
    )

    return out

开发者ID:spotify，项目名称:spotify-tensorflow，代码行数:33，代码来源:main.py

示例4: dummy_preprocessing_fn

# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import scale_to_z_score [as 别名]
def dummy_preprocessing_fn(inputs):
    out = dict()
    out["test_feature_fx"] = tft.scale_to_z_score(inputs["test_feature"])
    return out

开发者ID:spotify，项目名称:spotify-tensorflow，代码行数:6，代码来源:tft_test.py

示例5: preprocess

# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import scale_to_z_score [as 别名]
def preprocess(inputs):
    """tf.transform's callback function for preprocessing inputs.
    Args:
      inputs: map from feature keys to raw not-yet-transformed features.
    Returns:
      Map from string feature key to transformed feature operations.
    """
    outputs = {}
    for key in DENSE_FLOAT_FEATURE_KEYS:
        # Preserve this feature as a dense float, setting nan's to the mean.
        outputs[key] = transform.scale_to_z_score(inputs[key])

    for key in VOCAB_FEATURE_KEYS:
        # Build a vocabulary for this feature.
        if inputs[key].dtype == tf.string:
            vocab_tensor = inputs[key]
        else:
            vocab_tensor = tf.as_string(inputs[key])
        outputs[key] = transform.string_to_int(
            vocab_tensor, vocab_filename='vocab_' + key,
            top_k=VOCAB_SIZE, num_oov_buckets=OOV_SIZE)

    for key in BUCKET_FEATURE_KEYS:
        outputs[key] = transform.bucketize(inputs[key], FEATURE_BUCKET_COUNT)

    for key in CATEGORICAL_FEATURE_KEYS:
        outputs[key] = tf.to_int64(inputs[key])

    taxi_fare = inputs[FARE_KEY]
    taxi_tip = inputs[LABEL_KEY]
    # Test if the tip was > 20% of the fare.
    tip_threshold = tf.multiply(taxi_fare, tf.constant(0.2))
    outputs[LABEL_KEY] = tf.logical_and(
        tf.logical_not(tf.is_nan(taxi_fare)),
        tf.greater(taxi_tip, tip_threshold))

    return outputs

开发者ID:kubeflow-kale，项目名称:kale，代码行数:39，代码来源:preprocessing.py

示例6: testScaleToZScore

# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import scale_to_z_score [as 别名]
def testScaleToZScore(self, input_data, output_data, elementwise):
    self._SkipIfExternalEnvironmentAnd(
        self._UseTFXIO(), 'Skipping large test cases; b/147698868')

    def preprocessing_fn(inputs):
      x = inputs['x']
      x_cast = tf.cast(x, tf.as_dtype(input_data.dtype))
      x_scaled = tft.scale_to_z_score(x_cast, elementwise=elementwise)
      self.assertEqual(x_scaled.dtype, tf.as_dtype(output_data.dtype))
      return {'x_scaled': tf.cast(x_scaled, tf.float32)}

    input_data_dicts = [{'x': x} for x in input_data]
    expected_data_dicts = [{'x_scaled': x_scaled} for x_scaled in output_data]
    input_metadata = tft_unit.metadata_from_feature_spec({
        'x':
            tf.io.FixedLenFeature(
                input_data.shape[1:],
                tft_unit.canonical_numeric_dtype(tf.as_dtype(
                    input_data.dtype))),
    })
    expected_metadata = tft_unit.metadata_from_feature_spec({
        'x_scaled': tf.io.FixedLenFeature(output_data.shape[1:], tf.float32),
    })
    self.assertAnalyzeAndTransformResults(
        input_data_dicts, input_metadata,
        preprocessing_fn, expected_data_dicts, expected_metadata)

开发者ID:tensorflow，项目名称:transform，代码行数:28，代码来源:impl_test.py

示例7: scale_to_z_score

# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import scale_to_z_score [as 别名]
def scale_to_z_score(x, elementwise=False, name=None, output_dtype=None):
  """Returns a standardized column with mean 0 and variance 1.

  Scaling to z-score subtracts out the mean and divides by standard deviation.
  Note that the standard deviation computed here is based on the biased variance
  (0 delta degrees of freedom), as computed by analyzers.var.

  Args:
    x: A numeric `Tensor` or `SparseTensor`.
    elementwise: If true, scales each element of the tensor independently;
        otherwise uses the mean and variance of the whole tensor.
    name: (Optional) A name for this operation.
    output_dtype: (Optional) If not None, casts the output tensor to this type.

  Returns:
    A `Tensor` or `SparseTensor` containing the input column scaled to mean 0
    and variance 1 (standard deviation 1), given by: (x - mean(x)) / std_dev(x).
    If `x` is floating point, the mean will have the same type as `x`. If `x` is
    integral, the output is cast to tf.float32.

    Note that TFLearn generally permits only tf.int64 and tf.float32, so casting
    this scaler's output may be necessary.
  """
  with tf.compat.v1.name_scope(name, 'scale_to_z_score'):
    return _scale_to_z_score_internal(
        x=x,
        key=None,
        elementwise=elementwise,
        key_vocabulary_filename=None,
        output_dtype=output_dtype)

开发者ID:tensorflow，项目名称:transform，代码行数:32，代码来源:mappers.py

示例8: preprocessing_fn

# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import scale_to_z_score [as 别名]
def preprocessing_fn(inputs):
  """tf.transform's callback function for preprocessing inputs.

  Args:
    inputs: map from feature keys to raw not-yet-transformed features.

  Returns:
    Map from string feature key to transformed feature operations.
  """
  outputs = {}
  for key in _DENSE_FLOAT_FEATURE_KEYS:
    # Preserve this feature as a dense float, setting nan's to the mean.
    outputs[_transformed_name(key)] = tft.scale_to_z_score(
        _fill_in_missing(inputs[key]))

  for key in _VOCAB_FEATURE_KEYS:
    # Build a vocabulary for this feature.
    outputs[_transformed_name(key)] = tft.compute_and_apply_vocabulary(
        _fill_in_missing(inputs[key]),
        top_k=_VOCAB_SIZE,
        num_oov_buckets=_OOV_SIZE)

  for key in _BUCKET_FEATURE_KEYS:
    outputs[_transformed_name(key)] = tft.bucketize(
        _fill_in_missing(inputs[key]), _FEATURE_BUCKET_COUNT,
        always_return_num_quantiles=False)

  for key in _CATEGORICAL_FEATURE_KEYS:
    outputs[_transformed_name(key)] = _fill_in_missing(inputs[key])

  # Was this passenger a big tipper?
  taxi_fare = _fill_in_missing(inputs[_FARE_KEY])
  tips = _fill_in_missing(inputs[_LABEL_KEY])
  outputs[_transformed_name(_LABEL_KEY)] = tf.where(
      tf.is_nan(taxi_fare),
      tf.cast(tf.zeros_like(taxi_fare), tf.int64),
      # Test if the tip was > 20% of the fare.
      tf.cast(
          tf.greater(tips, tf.multiply(taxi_fare, tf.constant(0.2))), tf.int64))

  return outputs

开发者ID:kubeflow，项目名称:pipelines，代码行数:43，代码来源:taxi_utils.py

示例9: preprocessing_fn

# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import scale_to_z_score [as 别名]
def preprocessing_fn(inputs):
  """tf.transform's callback function for preprocessing inputs.

  Args:
    inputs: map from feature keys to raw not-yet-transformed features.

  Returns:
    Map from string feature key to transformed feature operations.
  """
  outputs = {}
  for key in _DENSE_FLOAT_FEATURE_KEYS:
    # Preserve this feature as a dense float, setting nan's to the mean.
    outputs[_transformed_name(key)] = tft.scale_to_z_score(
        _fill_in_missing(inputs[key]))

  for key in _VOCAB_FEATURE_KEYS:
    # Build a vocabulary for this feature.
    outputs[_transformed_name(key)] = tft.compute_and_apply_vocabulary(
        _fill_in_missing(inputs[key]),
        top_k=_VOCAB_SIZE,
        num_oov_buckets=_OOV_SIZE)

  for key in _BUCKET_FEATURE_KEYS:
    outputs[_transformed_name(key)] = tft.bucketize(
        _fill_in_missing(inputs[key]), _FEATURE_BUCKET_COUNT)

  for key in _CATEGORICAL_FEATURE_KEYS:
    outputs[_transformed_name(key)] = _fill_in_missing(inputs[key])

  # Was this passenger a big tipper?
  taxi_fare = _fill_in_missing(inputs[_FARE_KEY])
  tips = _fill_in_missing(inputs[_LABEL_KEY])
  outputs[_transformed_name(_LABEL_KEY)] = tf.compat.v1.where(
      tf.math.is_nan(taxi_fare),
      tf.cast(tf.zeros_like(taxi_fare), tf.int64),
      # Test if the tip was > 20% of the fare.
      tf.cast(
          tf.greater(tips, tf.multiply(taxi_fare, tf.constant(0.2))), tf.int64))

  return outputs

开发者ID:tensorflow，项目名称:tfx，代码行数:42，代码来源:taxi_utils_bqml.py

示例10: preprocessing_fn

# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import scale_to_z_score [as 别名]
def preprocessing_fn(inputs):
  """tf.transform's callback function for preprocessing inputs.

  Args:
    inputs: map from feature keys to raw not-yet-transformed features.

  Returns:
    Map from string feature key to transformed feature operations.
  """
  outputs = {}
  for key in features.DENSE_FLOAT_FEATURE_KEYS:
    # Preserve this feature as a dense float, setting nan's to the mean.
    outputs[features.transformed_name(key)] = tft.scale_to_z_score(
        _fill_in_missing(inputs[key]))

  for key in features.VOCAB_FEATURE_KEYS:
    # Build a vocabulary for this feature.
    outputs[features.transformed_name(key)] = tft.compute_and_apply_vocabulary(
        _fill_in_missing(inputs[key]),
        top_k=features.VOCAB_SIZE,
        num_oov_buckets=features.OOV_SIZE)

  for key, num_buckets in zip(features.BUCKET_FEATURE_KEYS,
                              features.BUCKET_FEATURE_BUCKET_COUNT):
    outputs[features.transformed_name(key)] = tft.bucketize(
        _fill_in_missing(inputs[key]),
        num_buckets)

  for key in features.CATEGORICAL_FEATURE_KEYS:
    outputs[features.transformed_name(key)] = _fill_in_missing(inputs[key])

  # TODO(b/157064428): Support label transformation for Keras.
  # Do not apply label transformation as it will result in wrong evaluation.
  outputs[features.transformed_name(
      features.LABEL_KEY)] = inputs[features.LABEL_KEY]

  return outputs

开发者ID:tensorflow，项目名称:tfx，代码行数:39，代码来源:preprocessing.py

示例11: preprocessing_fn

# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import scale_to_z_score [as 别名]
def preprocessing_fn(inputs):
  """tf.transform's callback function for preprocessing inputs.

  Args:
    inputs: map from feature keys to raw not-yet-transformed features.

  Returns:
    Map from string feature key to transformed feature operations.
  """
  outputs = {}
  for key in _DENSE_FLOAT_FEATURE_KEYS:
    # Preserve this feature as a dense float, setting nan's to the mean.
    outputs[_transformed_name(key)] = tft.scale_to_z_score(
        _fill_in_missing(_identity(inputs[key])))

  for key in _VOCAB_FEATURE_KEYS:
    # Build a vocabulary for this feature.
    outputs[_transformed_name(key)] = tft.compute_and_apply_vocabulary(
        _fill_in_missing(inputs[key]),
        top_k=_VOCAB_SIZE,
        num_oov_buckets=_OOV_SIZE)

  for key in _BUCKET_FEATURE_KEYS:
    outputs[_transformed_name(key)] = tft.bucketize(
        _fill_in_missing(inputs[key]),
        _FEATURE_BUCKET_COUNT)

  for key in _CATEGORICAL_FEATURE_KEYS:
    outputs[_transformed_name(key)] = _fill_in_missing(inputs[key])

  # Was this passenger a big tipper?
  taxi_fare = _fill_in_missing(inputs[_FARE_KEY])
  tips = _fill_in_missing(inputs[_LABEL_KEY])
  outputs[_transformed_name(_LABEL_KEY)] = tf.compat.v1.where(
      tf.math.is_nan(taxi_fare),
      tf.cast(tf.zeros_like(taxi_fare), tf.int64),
      # Test if the tip was > 20% of the fare.
      tf.cast(
          tf.greater(tips, tf.multiply(taxi_fare, tf.constant(0.2))), tf.int64))

  return outputs

开发者ID:tensorflow，项目名称:tfx，代码行数:43，代码来源:transform_module.py

示例12: testScaleToZScoreSparse

# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import scale_to_z_score [as 别名]
def testScaleToZScoreSparse(self, input_dtype, elementwise):
    self._SkipIfExternalEnvironmentAnd(
        self._UseTFXIO(), 'Skipping large test cases; b/147698868')

    def preprocessing_fn(inputs):
      z_score = tf.sparse.to_dense(
          tft.scale_to_z_score(
              tf.cast(inputs['x'], input_dtype), elementwise=elementwise),
          default_value=np.nan)
      z_score.set_shape([None, 4])
      self.assertEqual(z_score.dtype, _mean_output_dtype(input_dtype))
      return {
          'x_scaled': tf.cast(z_score, tf.float32)
      }

    input_data = [
        {'idx': [0, 1], 'val': [-4, 10]},
        {'idx': [0, 1], 'val': [2, 4]},
    ]
    input_metadata = tft_unit.metadata_from_feature_spec({
        'x':
            tf.io.SparseFeature('idx', 'val',
                                tft_unit.canonical_numeric_dtype(input_dtype),
                                4)
    })
    if elementwise:
      # Mean(x) = [-1, 7]
      # Var(x) = [9, 9]
      # StdDev(x) = [3, 3]
      expected_data = [
          {
              'x_scaled': [-1., 1.,
                           float('nan'),
                           float('nan')]  # [(-4 +1 ) / 3, (10 -7) / 3]
          },
          {
              'x_scaled': [1., -1.,
                           float('nan'),
                           float('nan')]  # [(2 + 1) / 3, (4 - 7) / 3]
          }
      ]
    else:
      # Mean = 3
      # Var = 25
      # Std Dev = 5
      expected_data = [
          {
              'x_scaled': [-1.4, 1.4, float('nan'),
                           float('nan')]  # [(-4 - 3) / 5, (10 - 3) / 5]
          },
          {
              'x_scaled': [-.2, .2, float('nan'),
                           float('nan')]  # [(2 - 3) / 5, (4 - 3) / 5]
          }
      ]
    expected_metadata = tft_unit.metadata_from_feature_spec(
        {'x_scaled': tf.io.FixedLenFeature([4], tf.float32)})
    self.assertAnalyzeAndTransformResults(input_data, input_metadata,
                                          preprocessing_fn, expected_data,
                                          expected_metadata)

开发者ID:tensorflow，项目名称:transform，代码行数:62，代码来源:impl_test.py

示例13: scale_to_z_score_per_key

# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import scale_to_z_score [as 别名]
def scale_to_z_score_per_key(x,
                             key,
                             elementwise=False,
                             key_vocabulary_filename=None,
                             name=None,
                             output_dtype=None):
  """Returns a standardized column with mean 0 and variance 1, grouped per key.

  Scaling to z-score subtracts out the mean and divides by standard deviation.
  Note that the standard deviation computed here is based on the biased variance
  (0 delta degrees of freedom), as computed by analyzers.var.

  Args:
    x: A numeric `Tensor` or `SparseTensor`.
    key: A Tensor or `SparseTensor` of dtype tf.string.
        Must meet one of the following conditions:
        0. key is None
        1. Both x and key are dense,
        2. Both x and key are sparse and `key` must exactly match `x` in
        everything except values,
        3. The axis=1 index of each x matches its index of dense key.
    elementwise: If true, scales each element of the tensor independently;
        otherwise uses the mean and variance of the whole tensor.
        Currently, not supported for per-key operations.
    key_vocabulary_filename: (Optional) The file name for the per-key file.
      If None, this combiner will assume the keys fit in memory and will not
      store the analyzer result in a file. If '', a file name will be chosen
      based on the current TensorFlow scope. If not '', it should be unique
      within a given preprocessing function.
    name: (Optional) A name for this operation.
    output_dtype: (Optional) If not None, casts the output tensor to this type.

  Returns:
    A `Tensor` or `SparseTensor` containing the input column scaled to mean 0
    and variance 1 (standard deviation 1), grouped per key if a key is provided.

    That is, for all keys k: (x - mean(x)) / std_dev(x) for all x with key k.
    If `x` is floating point, the mean will have the same type as `x`. If `x` is
    integral, the output is cast to tf.float32.

    Note that TFLearn generally permits only tf.int64 and tf.float32, so casting
    this scaler's output may be necessary.
  """
  with tf.compat.v1.name_scope(name, 'scale_to_z_score_per_key'):
    if key is None:
      tf.errors.InvalidArgumentError(
          'key is None, call `tft.scale_to_z_score` instead')
    return _scale_to_z_score_internal(
        x=x,
        key=key,
        elementwise=elementwise,
        key_vocabulary_filename=key_vocabulary_filename,
        output_dtype=output_dtype)

开发者ID:tensorflow，项目名称:transform，代码行数:55，代码来源:mappers.py

示例14: _scale_to_z_score_internal

# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import scale_to_z_score [as 别名]
def _scale_to_z_score_internal(
    x, key, elementwise, key_vocabulary_filename, output_dtype):
  """Implementation for scale_to_z_score."""
  # x_mean will be float16, float32, or float64, depending on type of x
  if key is None:
    x_mean, x_var = analyzers._mean_and_var(  # pylint: disable=protected-access
        x,
        reduce_instance_dims=not elementwise,
        output_dtype=output_dtype)
  else:
    if elementwise:
      raise NotImplementedError('Per-key elementwise reduction not supported')

    mean_and_var_per_key_result = analyzers._mean_and_var_per_key(  # pylint: disable=protected-access
        x, key, key_vocabulary_filename=key_vocabulary_filename,
        output_dtype=output_dtype)

    if key_vocabulary_filename is None:
      key_vocab, key_means, key_vars = mean_and_var_per_key_result
      x_mean, x_var = tf_utils.map_per_key_reductions((key_means, key_vars),
                                                      key, key_vocab, x)
    else:
      mean_var_for_key = tf_utils.apply_per_key_vocabulary(
          mean_and_var_per_key_result, key, target_ndims=x.get_shape().ndims)
      x_mean, x_var = (mean_var_for_key[:, 0], mean_var_for_key[:, 1])

  compose_result_fn = _make_sparse_tensor_wrapper_if_sparse(x)
  x_values = x

  if isinstance(x, tf.SparseTensor):
    x_values = x.values
    if elementwise:
      # Only supports SparseTensors with rank 2.
      x.get_shape().assert_has_rank(2)

      x_mean = tf.gather(x_mean, x.indices[:, 1])
      x_var = tf.gather(x_var, x.indices[:, 1])

  numerator = tf.cast(x_values, x_mean.dtype) - x_mean
  denominator = tf.sqrt(x_var)
  cond = tf.not_equal(denominator, 0)

  if cond.shape.as_list() != x_values.shape.as_list():
    # Repeats cond when necessary across the batch dimension for it to be
    # compatible with the shape of numerator.
    cond = tf.cast(
        tf.zeros_like(numerator) + tf.cast(cond, numerator.dtype),
        dtype=tf.bool)

  deviation_values = tf.where(cond, tf.divide(numerator, denominator),
                              numerator)
  return compose_result_fn(deviation_values)

开发者ID:tensorflow，项目名称:transform，代码行数:54，代码来源:mappers.py

示例15: preprocessing_fn

# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import scale_to_z_score [as 别名]
def preprocessing_fn(inputs):
  """tf.transform's callback function for preprocessing inputs.

  Args:
    inputs: map from feature keys to raw not-yet-transformed features.

  Returns:
    Map from string feature key to transformed feature operations.
  """
  outputs = {}
  for key in _DENSE_FLOAT_FEATURE_KEYS:
    # Preserve this feature as a dense float, setting nan's to the mean.
    outputs[_transformed_name(key)] = tft.scale_to_z_score(
        _fill_in_missing(inputs[key]))

  for key in _VOCAB_FEATURE_KEYS:
    # Build a vocabulary for this feature.
    outputs[_transformed_name(key)] = tft.compute_and_apply_vocabulary(
        _fill_in_missing(inputs[key]),
        top_k=_VOCAB_SIZE,
        num_oov_buckets=_OOV_SIZE)

  for key in _BUCKET_FEATURE_KEYS:
    outputs[_transformed_name(key)] = tft.bucketize(
        _fill_in_missing(inputs[key]),
        _FEATURE_BUCKET_COUNT)

  for key in _CATEGORICAL_FEATURE_KEYS:
    outputs[_transformed_name(key)] = _fill_in_missing(inputs[key])

  # Was this passenger a big tipper?
  taxi_fare = _fill_in_missing(inputs[_FARE_KEY])
  tips = _fill_in_missing(inputs[_LABEL_KEY])
  outputs[_transformed_name(_LABEL_KEY)] = tf.where(
      tf.math.is_nan(taxi_fare),
      tf.cast(tf.zeros_like(taxi_fare), tf.int64),
      # Test if the tip was > 20% of the fare.
      tf.cast(
          tf.greater(tips, tf.multiply(taxi_fare, tf.constant(0.2))), tf.int64))

  return outputs


# TFX Trainer will call this function.

开发者ID:tensorflow，项目名称:tfx，代码行数:46，代码来源:taxi_utils_solution.py

注：本文中的tensorflow_transform.scale_to_z_score方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。