本文整理汇总了Python中tensorflow_transform.tf_metadata.dataset_metadata.DatasetMetadata方法的典型用法代码示例。如果您正苦于以下问题:Python dataset_metadata.DatasetMetadata方法的具体用法?Python dataset_metadata.DatasetMetadata怎么用?Python dataset_metadata.DatasetMetadata使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow_transform.tf_metadata.dataset_metadata
的用法示例。
在下文中一共展示了dataset_metadata.DatasetMetadata方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _create_raw_metadata
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def _create_raw_metadata():
"""Create a DatasetMetadata for the raw data."""
column_schemas = {
key: dataset_schema.ColumnSchema(
tf.string, [], dataset_schema.FixedColumnRepresentation())
for key in CATEGORICAL_FEATURE_KEYS
}
column_schemas.update({
key: dataset_schema.ColumnSchema(
tf.float32, [], dataset_schema.FixedColumnRepresentation())
for key in NUMERIC_FEATURE_KEYS
})
column_schemas[LABEL_KEY] = dataset_schema.ColumnSchema(
tf.string, [], dataset_schema.FixedColumnRepresentation())
raw_data_metadata = dataset_metadata.DatasetMetadata(dataset_schema.Schema(
column_schemas))
return raw_data_metadata
示例2: convert_to_tfxio_api_inputs
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def convert_to_tfxio_api_inputs(
self, legacy_input_data, legacy_input_metadata, label='input_data'):
"""Converts from the legacy TFT API inputs to TFXIO-based inputs.
Args:
legacy_input_data: a PCollection of instance dicts.
legacy_input_metadata: a tft.DatasetMetadata.
label: label for the PTransform that translates `legacy_input_data` into
the TFXIO input data. Set to different values if this method is called
multiple times in a beam Pipeline.
Returns:
A tuple of a PCollection of `pyarrow.RecordBatch` and a
`tensor_adapter.TensorAdapterConfig`. This tuple can be fed directly to
TFT's `{Analyze,Transform,AnalyzeAndTransform}Dataset` APIs.
"""
tfxio_impl = _LegacyCompatibilityTFXIO(legacy_input_metadata.schema)
input_data = (
legacy_input_data |
('LegacyFormatToTfxio[%s]' % label >> tfxio_impl.BeamSource(
beam_impl.Context.get_desired_batch_size())))
return input_data, tfxio_impl.TensorAdapterConfig()
示例3: read_metadata
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def read_metadata(path):
"""Load metadata in JSON format from a path into a new DatasetMetadata."""
schema_file = os.path.join(path, 'schema.pbtxt')
legacy_schema_file = os.path.join(path, 'v1-json', 'schema.json')
if file_io.file_exists(schema_file):
text_proto = file_io.FileIO(schema_file, 'r').read()
schema_proto = text_format.Parse(text_proto, schema_pb2.Schema(),
allow_unknown_extension=True)
elif file_io.file_exists(legacy_schema_file):
schema_json = file_io.FileIO(legacy_schema_file, 'r').read()
schema_proto = _parse_schema_json(schema_json)
else:
raise IOError(
'Schema file {} does not exist and neither did legacy format file '
'{}'.format(schema_file, legacy_schema_file))
return dataset_metadata.DatasetMetadata(schema_proto)
示例4: main
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def main(_):
# Define schema.
raw_metadata = dataset_metadata.DatasetMetadata(
dataset_schema.from_feature_spec({
'text': tf.FixedLenFeature([], tf.string),
'language_code': tf.FixedLenFeature([], tf.string),
}))
# Add in padding tokens.
reserved_tokens = FLAGS.reserved_tokens
if FLAGS.num_pad_tokens:
padded_tokens = ['<pad>']
padded_tokens += ['<pad%d>' % i for i in range(1, FLAGS.num_pad_tokens)]
reserved_tokens = padded_tokens + reserved_tokens
params = learner.Params(FLAGS.upper_thresh, FLAGS.lower_thresh,
FLAGS.num_iterations, FLAGS.max_input_tokens,
FLAGS.max_token_length, FLAGS.max_unique_chars,
FLAGS.vocab_size, FLAGS.slack_ratio,
FLAGS.include_joiner_token, FLAGS.joiner,
reserved_tokens)
generate_vocab(FLAGS.data_file, FLAGS.vocab_file, FLAGS.metrics_file,
raw_metadata, params)
示例5: __init__
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def __init__(self,
dataset,
tf_metadata_schema,
preprocessing_fn,
transform_input_dataset_metadata,
generate_dataset=False):
"""Constructor.
Args:
dataset: BenchmarkDataset object.
tf_metadata_schema: tf.Metadata schema.
preprocessing_fn: preprocessing_fn.
transform_input_dataset_metadata: dataset_metadata.DatasetMetadata.
generate_dataset: If True, generates the raw dataset and appropriate
intermediate outputs (just the TFT SavedModel for now) necessary for
other benchmarks.
"""
self._dataset = dataset
self._tf_metadata_schema = tf_metadata_schema
self._preprocessing_fn = preprocessing_fn
self._transform_input_dataset_metadata = transform_input_dataset_metadata
self._generate_dataset = generate_dataset
示例6: _get_common_variables
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def _get_common_variables(dataset):
"""Returns metadata schema, preprocessing fn, input dataset metadata."""
tf_metadata_schema = benchmark_utils.read_schema(
dataset.tf_metadata_schema_path())
preprocessing_fn = dataset.tft_preprocessing_fn()
feature_spec = schema_utils.schema_as_feature_spec(
tf_metadata_schema).feature_spec
transform_input_columns = (
tft.get_transform_input_columns(preprocessing_fn, feature_spec))
transform_input_dataset_metadata = dataset_metadata.DatasetMetadata(
schema_utils.schema_from_feature_spec({
feature: feature_spec[feature] for feature in transform_input_columns
}))
return CommonVariablesTuple(
tf_metadata_schema=tf_metadata_schema,
preprocessing_fn=preprocessing_fn,
transform_input_dataset_metadata=transform_input_dataset_metadata)
示例7: _GetSchemaProto
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def _GetSchemaProto(
metadata: dataset_metadata.DatasetMetadata) -> schema_pb2.Schema:
"""Gets the schema proto associated with a DatasetMetadata.
This is needed because tensorflow_transform 0.13 and tensorflow_transform 0.14
have a different API for DatasetMetadata.
Args:
metadata: A dataset_metadata.DatasetMetadata.
Returns:
A schema_pb2.Schema.
"""
# `schema` is either a Schema proto or dataset_schema.Schema.
schema = metadata.schema
# In the case where it's a dataset_schema.Schema, fetch the schema proto.
return getattr(schema, '_schema_proto', schema)
示例8: _ReadExamples
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def _ReadExamples(
pipeline: beam.Pipeline, dataset: _Dataset,
input_dataset_metadata: dataset_metadata.DatasetMetadata
) -> beam.pvalue.PCollection:
"""Reads examples from the given `dataset`.
Args:
pipeline: beam pipeline.
dataset: A `_Dataset` object that represents the data to read.
input_dataset_metadata: A `dataset_metadata.DatasetMetadata`. Not used.
Returns:
A PCollection containing KV pairs of bytes.
"""
del input_dataset_metadata
assert dataset.file_format == labels.FORMAT_TFRECORD, dataset.file_format
return (
pipeline
| 'Read' >> beam.io.ReadFromTFRecord(
dataset.file_pattern,
coder=beam.coders.BytesCoder(),
# TODO(b/114938612): Eventually remove this override.
validate=False)
| 'AddKey' >> beam.Map(lambda x: (None, x)))
示例9: _ReadMetadata
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def _ReadMetadata(self, data_format: Text,
schema_path: Text) -> dataset_metadata.DatasetMetadata:
"""Returns a dataset_metadata.DatasetMetadata for the input data.
Args:
data_format: name of the input data format.
schema_path: path to schema file.
Returns:
A dataset_metadata.DatasetMetadata representing the provided set of
columns.
"""
if self._ShouldDecodeAsRawExample(data_format):
return dataset_metadata.DatasetMetadata(_RAW_EXAMPLE_SCHEMA)
schema_proto = self._GetSchema(schema_path)
# For compatibility with tensorflow_transform 0.13 and 0.14, we create and
# then update a DatasetMetadata.
result = dataset_metadata.DatasetMetadata(dataset_schema.Schema({}))
_GetSchemaProto(result).CopyFrom(schema_proto)
return result
示例10: store_transformed_data
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def store_transformed_data(data, schema, path, name=''):
"""Stores data from input pipeline into TFRecord in the specified path.
Args:
data: `PCollection`, input pipeline.
schema: `DatasetMetadata` object, describes schema of the input pipeline.
path: string, where to write output.
name: string: name describing pipeline to be written.
Returns:
PCollection
"""
p = (
data
| 'WriteData{}'.format(name) >> tfrecordio.WriteToTFRecord(
path, coder=example_proto_coder.ExampleProtoCoder(schema.schema)))
return p
示例11: main
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def main():
def preprocessing_fn(inputs):
"""Preprocess input columns into transformed columns."""
x = inputs['x']
y = inputs['y']
s = inputs['s']
x_centered = x - tft.mean(x)
y_normalized = tft.scale_to_0_1(y)
s_integerized = tft.compute_and_apply_vocabulary(s)
x_centered_times_y_normalized = (x_centered * y_normalized)
return {
'x_centered': x_centered,
'y_normalized': y_normalized,
'x_centered_times_y_normalized': x_centered_times_y_normalized,
's_integerized': s_integerized
}
raw_data = [
{'x': 1, 'y': 1, 's': 'hello'},
{'x': 2, 'y': 2, 's': 'world'},
{'x': 3, 'y': 3, 's': 'hello'}
]
raw_data_metadata = dataset_metadata.DatasetMetadata(
schema_utils.schema_from_feature_spec({
's': tf.io.FixedLenFeature([], tf.string),
'y': tf.io.FixedLenFeature([], tf.float32),
'x': tf.io.FixedLenFeature([], tf.float32),
}))
with tft_beam.Context(temp_dir=tempfile.mkdtemp()):
transformed_dataset, transform_fn = ( # pylint: disable=unused-variable
(raw_data, raw_data_metadata) | tft_beam.AnalyzeAndTransformDataset(
preprocessing_fn))
transformed_data, transformed_metadata = transformed_dataset # pylint: disable=unused-variable
pprint.pprint(transformed_data)
示例12: metadata_from_feature_spec
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def metadata_from_feature_spec(feature_spec, domains=None):
"""Construct a DatasetMetadata from a feature spec.
Args:
feature_spec: A feature spec
domains: A dict containing domains of features
Returns:
A `tft.tf_metadata.dataset_metadata.DatasetMetadata` object.
"""
return dataset_metadata.DatasetMetadata(
schema_utils.schema_from_feature_spec(feature_spec, domains))
示例13: _infer_metadata_from_saved_model
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def _infer_metadata_from_saved_model(saved_model_dir):
"""Infers a DatasetMetadata for outputs of a SavedModel."""
with tf.compat.v1.Graph().as_default() as graph:
with tf.compat.v1.Session(graph=graph) as session:
_, outputs = (
saved_transform_io.partially_apply_saved_transform_internal(
saved_model_dir, {}))
session.run(tf.compat.v1.global_variables_initializer())
session.run(tf.compat.v1.tables_initializer())
return dataset_metadata.DatasetMetadata(
schema=schema_inference.infer_feature_schema(outputs, graph, session))
示例14: _remove_columns_from_metadata
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def _remove_columns_from_metadata(metadata, excluded_columns):
"""Remove columns from metadata without mutating original metadata."""
feature_spec, domains = schema_utils.schema_as_feature_spec(metadata.schema)
new_feature_spec = {name: spec for name, spec in feature_spec.items()
if name not in excluded_columns}
new_domains = {name: spec for name, spec in domains.items()
if name not in excluded_columns}
return dataset_metadata.DatasetMetadata(
schema_utils.schema_from_feature_spec(new_feature_spec, new_domains))
示例15: write_metadata
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def write_metadata(metadata, path):
"""Write metadata to given path, in JSON format.
Args:
metadata: A `DatasetMetadata` to write.
path: a path to a directory where metadata should be written.
"""
if not file_io.file_exists(path):
file_io.recursive_create_dir(path)
schema_file = os.path.join(path, 'schema.pbtxt')
ascii_proto = text_format.MessageToString(metadata.schema)
file_io.atomic_write_string_to_file(schema_file, ascii_proto, overwrite=True)