本文整理汇总了Python中tensorflow_transform.bucketize方法的典型用法代码示例。如果您正苦于以下问题:Python tensorflow_transform.bucketize方法的具体用法?Python tensorflow_transform.bucketize怎么用?Python tensorflow_transform.bucketize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow_transform
的用法示例。
在下文中一共展示了tensorflow_transform.bucketize方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: preprocessing_fn
# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import bucketize [as 别名]
def preprocessing_fn(inputs):
out = dict()
for key in taxi.DENSE_FLOAT_FEATURE_KEYS:
# Preserve this feature as a dense float, setting nan's to the mean.
out[taxi.transformed_name(key)] = tft.scale_to_z_score(
taxi.fill_in_missing(inputs[key]))
for key in taxi.VOCAB_FEATURE_KEYS:
# Build a vocabulary for this feature.
out[taxi.transformed_name(key)] = tft.compute_and_apply_vocabulary(
taxi.fill_in_missing(inputs[key]), top_k=10, num_oov_buckets=10)
for key in taxi.BUCKET_FEATURE_KEYS:
out[taxi.transformed_name(key)] = tft.bucketize(taxi.fill_in_missing(inputs[key]),
num_buckets=10)
for key in taxi.CATEGORICAL_FEATURE_KEYS:
out[taxi.transformed_name(key)] = taxi.fill_in_missing(inputs[key])
# Was this passenger a big tipper?
taxi_fare = taxi.fill_in_missing(inputs[taxi.FARE_KEY])
tips = taxi.fill_in_missing(inputs[taxi.LABEL_KEY])
out[taxi.transformed_name(taxi.LABEL_KEY)] = tf.where(
tf.is_nan(taxi_fare),
tf.cast(tf.zeros_like(taxi_fare), tf.int64),
# Test if the tip was > 20% of the fare.
tf.cast(tf.greater(tips, tf.multiply(taxi_fare, tf.constant(0.2))), tf.int64)
)
return out
示例2: preprocess
# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import bucketize [as 别名]
def preprocess(inputs):
"""tf.transform's callback function for preprocessing inputs.
Args:
inputs: map from feature keys to raw not-yet-transformed features.
Returns:
Map from string feature key to transformed feature operations.
"""
outputs = {}
for key in DENSE_FLOAT_FEATURE_KEYS:
# Preserve this feature as a dense float, setting nan's to the mean.
outputs[key] = transform.scale_to_z_score(inputs[key])
for key in VOCAB_FEATURE_KEYS:
# Build a vocabulary for this feature.
if inputs[key].dtype == tf.string:
vocab_tensor = inputs[key]
else:
vocab_tensor = tf.as_string(inputs[key])
outputs[key] = transform.string_to_int(
vocab_tensor, vocab_filename='vocab_' + key,
top_k=VOCAB_SIZE, num_oov_buckets=OOV_SIZE)
for key in BUCKET_FEATURE_KEYS:
outputs[key] = transform.bucketize(inputs[key], FEATURE_BUCKET_COUNT)
for key in CATEGORICAL_FEATURE_KEYS:
outputs[key] = tf.to_int64(inputs[key])
taxi_fare = inputs[FARE_KEY]
taxi_tip = inputs[LABEL_KEY]
# Test if the tip was > 20% of the fare.
tip_threshold = tf.multiply(taxi_fare, tf.constant(0.2))
outputs[LABEL_KEY] = tf.logical_and(
tf.logical_not(tf.is_nan(taxi_fare)),
tf.greater(taxi_tip, tip_threshold))
return outputs
示例3: preprocessing_fn
# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import bucketize [as 别名]
def preprocessing_fn(inputs):
"""tf.transform's callback function for preprocessing inputs.
Args:
inputs: map from feature keys to raw not-yet-transformed features.
Returns:
Map from string feature key to transformed feature operations.
"""
outputs = {}
for key in _DENSE_FLOAT_FEATURE_KEYS:
# Preserve this feature as a dense float, setting nan's to the mean.
outputs[_transformed_name(key)] = tft.scale_to_z_score(
_fill_in_missing(inputs[key]))
for key in _VOCAB_FEATURE_KEYS:
# Build a vocabulary for this feature.
outputs[_transformed_name(key)] = tft.compute_and_apply_vocabulary(
_fill_in_missing(inputs[key]),
top_k=_VOCAB_SIZE,
num_oov_buckets=_OOV_SIZE)
for key in _BUCKET_FEATURE_KEYS:
outputs[_transformed_name(key)] = tft.bucketize(
_fill_in_missing(inputs[key]), _FEATURE_BUCKET_COUNT,
always_return_num_quantiles=False)
for key in _CATEGORICAL_FEATURE_KEYS:
outputs[_transformed_name(key)] = _fill_in_missing(inputs[key])
# Was this passenger a big tipper?
taxi_fare = _fill_in_missing(inputs[_FARE_KEY])
tips = _fill_in_missing(inputs[_LABEL_KEY])
outputs[_transformed_name(_LABEL_KEY)] = tf.where(
tf.is_nan(taxi_fare),
tf.cast(tf.zeros_like(taxi_fare), tf.int64),
# Test if the tip was > 20% of the fare.
tf.cast(
tf.greater(tips, tf.multiply(taxi_fare, tf.constant(0.2))), tf.int64))
return outputs
示例4: preprocessing_fn
# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import bucketize [as 别名]
def preprocessing_fn(inputs):
"""tf.transform's callback function for preprocessing inputs.
Args:
inputs: map from feature keys to raw not-yet-transformed features.
Returns:
Map from string feature key to transformed feature operations.
"""
outputs = {}
for key in _DENSE_FLOAT_FEATURE_KEYS:
# Preserve this feature as a dense float, setting nan's to the mean.
outputs[_transformed_name(key)] = tft.scale_to_z_score(
_fill_in_missing(inputs[key]))
for key in _VOCAB_FEATURE_KEYS:
# Build a vocabulary for this feature.
outputs[_transformed_name(key)] = tft.compute_and_apply_vocabulary(
_fill_in_missing(inputs[key]),
top_k=_VOCAB_SIZE,
num_oov_buckets=_OOV_SIZE)
for key in _BUCKET_FEATURE_KEYS:
outputs[_transformed_name(key)] = tft.bucketize(
_fill_in_missing(inputs[key]), _FEATURE_BUCKET_COUNT)
for key in _CATEGORICAL_FEATURE_KEYS:
outputs[_transformed_name(key)] = _fill_in_missing(inputs[key])
# Was this passenger a big tipper?
taxi_fare = _fill_in_missing(inputs[_FARE_KEY])
tips = _fill_in_missing(inputs[_LABEL_KEY])
outputs[_transformed_name(_LABEL_KEY)] = tf.compat.v1.where(
tf.math.is_nan(taxi_fare),
tf.cast(tf.zeros_like(taxi_fare), tf.int64),
# Test if the tip was > 20% of the fare.
tf.cast(
tf.greater(tips, tf.multiply(taxi_fare, tf.constant(0.2))), tf.int64))
return outputs
示例5: preprocessing_fn
# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import bucketize [as 别名]
def preprocessing_fn(inputs):
"""tf.transform's callback function for preprocessing inputs.
Args:
inputs: map from feature keys to raw not-yet-transformed features.
Returns:
Map from string feature key to transformed feature operations.
"""
outputs = {}
for key in _DENSE_FLOAT_FEATURE_KEYS:
# Preserve this feature as a dense float, setting nan's to the mean.
outputs[_transformed_name(key)] = tft.scale_to_z_score(
_fill_in_missing(inputs[key]))
for key in _VOCAB_FEATURE_KEYS:
# Build a vocabulary for this feature.
outputs[_transformed_name(key)] = tft.compute_and_apply_vocabulary(
_fill_in_missing(inputs[key]),
top_k=_VOCAB_SIZE,
num_oov_buckets=_OOV_SIZE)
for key in _BUCKET_FEATURE_KEYS:
outputs[_transformed_name(key)] = tft.bucketize(
_fill_in_missing(inputs[key]),
_FEATURE_BUCKET_COUNT)
for key in _CATEGORICAL_FEATURE_KEYS:
outputs[_transformed_name(key)] = _fill_in_missing(inputs[key])
# TODO(b/157064428): Support label transformation for Keras.
# Do not apply label transformation as it will result in wrong evaluation.
outputs[_transformed_name(_LABEL_KEY)] = inputs[_LABEL_KEY]
return outputs
# TFX Trainer will call this function.
示例6: preprocessing_fn
# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import bucketize [as 别名]
def preprocessing_fn(inputs):
"""tf.transform's callback function for preprocessing inputs.
Args:
inputs: map from feature keys to raw not-yet-transformed features.
Returns:
Map from string feature key to transformed feature operations.
"""
outputs = {}
for key in _DENSE_FLOAT_FEATURE_KEYS:
# Preserve this feature as a dense float, setting nan's to the mean.
outputs[_transformed_name(key)] = tft.scale_to_z_score(
_fill_in_missing(_identity(inputs[key])))
for key in _VOCAB_FEATURE_KEYS:
# Build a vocabulary for this feature.
outputs[_transformed_name(key)] = tft.compute_and_apply_vocabulary(
_fill_in_missing(inputs[key]),
top_k=_VOCAB_SIZE,
num_oov_buckets=_OOV_SIZE)
for key in _BUCKET_FEATURE_KEYS:
outputs[_transformed_name(key)] = tft.bucketize(
_fill_in_missing(inputs[key]),
_FEATURE_BUCKET_COUNT)
for key in _CATEGORICAL_FEATURE_KEYS:
outputs[_transformed_name(key)] = _fill_in_missing(inputs[key])
# Was this passenger a big tipper?
taxi_fare = _fill_in_missing(inputs[_FARE_KEY])
tips = _fill_in_missing(inputs[_LABEL_KEY])
outputs[_transformed_name(_LABEL_KEY)] = tf.compat.v1.where(
tf.math.is_nan(taxi_fare),
tf.cast(tf.zeros_like(taxi_fare), tf.int64),
# Test if the tip was > 20% of the fare.
tf.cast(
tf.greater(tips, tf.multiply(taxi_fare, tf.constant(0.2))), tf.int64))
return outputs
示例7: testSavedModelWithAnnotations
# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import bucketize [as 别名]
def testSavedModelWithAnnotations(self):
"""Test serialization/deserialization as a saved model with annotations."""
def preprocessing_fn(inputs):
# Bucketization applies annotations to the output schema
return {
'x_bucketized': tft.bucketize(inputs['x'], num_buckets=4),
'y_vocab': tft.compute_and_apply_vocabulary(inputs['y']),
}
input_data = [{
'x': 1,
'y': 'foo',
}, {
'x': 2,
'y': 'bar',
}, {
'x': 3,
'y': 'foo',
}, {
'x': 4,
'y': 'foo',
}]
input_metadata = tft_unit.metadata_from_feature_spec({
'x': tf.io.FixedLenFeature([], tf.float32),
'y': tf.io.FixedLenFeature([], tf.string),
})
temp_dir = self.get_temp_dir()
# Force a batch size of 1 to ensure that occurences are correctly aggregated
# across batches when computing the total vocabulary size.
with beam_impl.Context(temp_dir=temp_dir, desired_batch_size=1):
input_data, input_metadata = self._MaybeConvertInputsToTFXIO(
input_data, input_metadata)
transform_fn = ((input_data, input_metadata)
| beam_impl.AnalyzeDataset(preprocessing_fn))
# Write transform_fn to serialize annotation collections to SavedModel
_ = transform_fn | transform_fn_io.WriteTransformFn(temp_dir)
# Ensure that the annotations survive the round trip to SavedModel.
tf_transform_output = tft.TFTransformOutput(temp_dir)
savedmodel_dir = tf_transform_output.transform_savedmodel_dir
schema = beam_impl._infer_metadata_from_saved_model(savedmodel_dir)._schema
self.assertLen(schema.feature, 2)
for feature in schema.feature:
if feature.name == 'x_bucketized':
self.assertLen(feature.annotation.extra_metadata, 1)
for annotation in feature.annotation.extra_metadata:
message = annotations_pb2.BucketBoundaries()
annotation.Unpack(message)
self.assertAllClose(list(message.boundaries), [2, 3, 4])
elif feature.name == 'y_vocab':
self.assertLen(feature.annotation.extra_metadata, 0)
else:
raise ValueError('Unexpected feature with metadata: {}'.format(
feature.name))
# Vocabularies create a top-level schema annotation for each vocab file.
self.assertLen(schema.annotation.extra_metadata, 1)
message = annotations_pb2.VocabularyMetadata()
annotation = schema.annotation.extra_metadata[0]
annotation.Unpack(message)
self.assertEqual(message.unfiltered_vocabulary_size, 2)
示例8: preprocessing_fn
# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import bucketize [as 别名]
def preprocessing_fn(inputs):
"""tf.transform's callback function for preprocessing inputs.
Args:
inputs: map from feature keys to raw not-yet-transformed features.
Returns:
Map from string feature key to transformed feature operations.
"""
outputs = {}
for key in _DENSE_FLOAT_FEATURE_KEYS:
# Preserve this feature as a dense float, setting nan's to the mean.
outputs[_transformed_name(key)] = tft.scale_to_z_score(
_fill_in_missing(inputs[key]))
for key in _VOCAB_FEATURE_KEYS:
# Build a vocabulary for this feature.
outputs[_transformed_name(key)] = tft.compute_and_apply_vocabulary(
_fill_in_missing(inputs[key]),
top_k=_VOCAB_SIZE,
num_oov_buckets=_OOV_SIZE)
for key in _BUCKET_FEATURE_KEYS:
outputs[_transformed_name(key)] = tft.bucketize(
_fill_in_missing(inputs[key]),
_FEATURE_BUCKET_COUNT)
for key in _CATEGORICAL_FEATURE_KEYS:
outputs[_transformed_name(key)] = _fill_in_missing(inputs[key])
# Was this passenger a big tipper?
taxi_fare = _fill_in_missing(inputs[_FARE_KEY])
tips = _fill_in_missing(inputs[_LABEL_KEY])
outputs[_transformed_name(_LABEL_KEY)] = tf.where(
tf.math.is_nan(taxi_fare),
tf.cast(tf.zeros_like(taxi_fare), tf.int64),
# Test if the tip was > 20% of the fare.
tf.cast(
tf.greater(tips, tf.multiply(taxi_fare, tf.constant(0.2))), tf.int64))
return outputs
# TFX Trainer will call this function.
示例9: preprocessing_fn
# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import bucketize [as 别名]
def preprocessing_fn(inputs):
"""tf.transform's callback function for preprocessing inputs.
Args:
inputs: map from feature keys to raw not-yet-transformed features.
Returns:
Map from string feature key to transformed feature operations.
"""
outputs = {}
for key in taxi.DENSE_FLOAT_FEATURE_KEYS:
# Preserve this feature as a dense float, setting nan's to the mean.
outputs[taxi.transformed_name(key)] = transform.scale_to_z_score(
_fill_in_missing(inputs[key]))
for key in taxi.VOCAB_FEATURE_KEYS:
# Build a vocabulary for this feature.
outputs[
taxi.transformed_name(key)] = transform.compute_and_apply_vocabulary(
_fill_in_missing(inputs[key]),
top_k=taxi.VOCAB_SIZE,
num_oov_buckets=taxi.OOV_SIZE)
for key in taxi.BUCKET_FEATURE_KEYS:
outputs[taxi.transformed_name(key)] = transform.bucketize(
_fill_in_missing(inputs[key]), taxi.FEATURE_BUCKET_COUNT)
for key in taxi.CATEGORICAL_FEATURE_KEYS:
outputs[taxi.transformed_name(key)] = _fill_in_missing(inputs[key])
# Was this passenger a big tipper?
taxi_fare = _fill_in_missing(inputs[taxi.FARE_KEY])
tips = _fill_in_missing(inputs[taxi.LABEL_KEY])
outputs[taxi.transformed_name(taxi.LABEL_KEY)] = tf.where(
tf.is_nan(taxi_fare),
tf.cast(tf.zeros_like(taxi_fare), tf.int64),
# Test if the tip was > 20% of the fare.
tf.cast(
tf.greater(tips, tf.multiply(taxi_fare, tf.constant(0.2))),
tf.int64))
return outputs
示例10: preprocessing_fn
# 需要导入模块: import tensorflow_transform [as 别名]
# 或者: from tensorflow_transform import bucketize [as 别名]
def preprocessing_fn(inputs):
"""tf.transform's callback function for preprocessing inputs.
Args:
inputs: map from feature keys to raw not-yet-transformed features.
Returns:
Map from string feature key to transformed feature operations.
"""
outputs = {}
for key in taxi.DENSE_FLOAT_FEATURE_KEYS:
# Preserve this feature as a dense float, setting nan's to the mean.
outputs[taxi.transformed_name(key)] = transform.scale_to_z_score(
_fill_in_missing(inputs[key]))
for key in taxi.VOCAB_FEATURE_KEYS:
# Build a vocabulary for this feature.
outputs[
taxi.transformed_name(key)] = transform.compute_and_apply_vocabulary(
_fill_in_missing(inputs[key]),
top_k=taxi.VOCAB_SIZE,
num_oov_buckets=taxi.OOV_SIZE)
for key in taxi.BUCKET_FEATURE_KEYS:
outputs[taxi.transformed_name(key)] = transform.bucketize(
_fill_in_missing(inputs[key]), taxi.FEATURE_BUCKET_COUNT)
for key in taxi.CATEGORICAL_FEATURE_KEYS:
outputs[taxi.transformed_name(key)] = _fill_in_missing(inputs[key])
# Was this passenger a big tipper?
taxi_fare = _fill_in_missing(inputs[taxi.FARE_KEY])
tips = _fill_in_missing(inputs[taxi.LABEL_KEY])
outputs[taxi.transformed_name(taxi.LABEL_KEY)] = tf.where(
tf.is_nan(taxi_fare),
tf.cast(tf.zeros_like(taxi_fare), tf.int64),
# Test if the tip was > 5% of the fare.
tf.cast(
tf.greater(tips, tf.multiply(taxi_fare, tf.constant(0.05))),
tf.int64))
return outputs