本文整理汇总了Python中tensorflow_transform.tf_metadata.dataset_schema.Schema方法的典型用法代码示例。如果您正苦于以下问题:Python dataset_schema.Schema方法的具体用法?Python dataset_schema.Schema怎么用?Python dataset_schema.Schema使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow_transform.tf_metadata.dataset_schema
的用法示例。
在下文中一共展示了dataset_schema.Schema方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _create_raw_metadata
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def _create_raw_metadata():
"""Create a DatasetMetadata for the raw data."""
column_schemas = {
key: dataset_schema.ColumnSchema(
tf.string, [], dataset_schema.FixedColumnRepresentation())
for key in CATEGORICAL_FEATURE_KEYS
}
column_schemas.update({
key: dataset_schema.ColumnSchema(
tf.float32, [], dataset_schema.FixedColumnRepresentation())
for key in NUMERIC_FEATURE_KEYS
})
column_schemas[LABEL_KEY] = dataset_schema.ColumnSchema(
tf.string, [], dataset_schema.FixedColumnRepresentation())
raw_data_metadata = dataset_metadata.DatasetMetadata(dataset_schema.Schema(
column_schemas))
return raw_data_metadata
示例2: _GetSchemaProto
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def _GetSchemaProto(
metadata: dataset_metadata.DatasetMetadata) -> schema_pb2.Schema:
"""Gets the schema proto associated with a DatasetMetadata.
This is needed because tensorflow_transform 0.13 and tensorflow_transform 0.14
have a different API for DatasetMetadata.
Args:
metadata: A dataset_metadata.DatasetMetadata.
Returns:
A schema_pb2.Schema.
"""
# `schema` is either a Schema proto or dataset_schema.Schema.
schema = metadata.schema
# In the case where it's a dataset_schema.Schema, fetch the schema proto.
return getattr(schema, '_schema_proto', schema)
示例3: _ReadMetadata
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def _ReadMetadata(self, data_format: Text,
schema_path: Text) -> dataset_metadata.DatasetMetadata:
"""Returns a dataset_metadata.DatasetMetadata for the input data.
Args:
data_format: name of the input data format.
schema_path: path to schema file.
Returns:
A dataset_metadata.DatasetMetadata representing the provided set of
columns.
"""
if self._ShouldDecodeAsRawExample(data_format):
return dataset_metadata.DatasetMetadata(_RAW_EXAMPLE_SCHEMA)
schema_proto = self._GetSchema(schema_path)
# For compatibility with tensorflow_transform 0.13 and 0.14, we create and
# then update a DatasetMetadata.
result = dataset_metadata.DatasetMetadata(dataset_schema.Schema({}))
_GetSchemaProto(result).CopyFrom(schema_proto)
return result
示例4: _GetDecodeFunction
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def _GetDecodeFunction(self, data_format: Union[Text, int],
schema: dataset_schema.Schema) -> Any:
"""Returns the decode function for `data_format`.
Args:
data_format: name of data format.
schema: a dataset_schema.Schema for the data.
Returns:
Function for decoding examples.
"""
if self._ShouldDecodeAsRawExample(data_format):
if self._IsDataFormatSequenceExample(data_format):
absl.logging.warning(
'TFX Transform doesn\'t officially support tf.SequenceExample, '
'follow b/38235367 to track official support progress. We do not '
'guarantee not to break your pipeline if you use Transform with a '
'tf.SequenceExample data type. Use at your own risk.')
return lambda x: {RAW_EXAMPLE_KEY: x}
else:
return tft.coders.ExampleProtoCoder(schema, serialized=True).decode
示例5: __init__
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def __init__(self, schema):
if isinstance(schema, dict):
schema = dataset_schema.Schema(schema)
self._schema = schema
示例6: test_feature_spec_unsupported_dtype
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def test_feature_spec_unsupported_dtype(self):
with self.assertRaisesRegexp(ValueError, 'invalid dtype'):
sch.Schema({
'fixed_float': sch.ColumnSchema(
tf.float64, [], sch.FixedColumnRepresentation())
})
示例7: test_schema_equality
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def test_schema_equality(self):
schema1 = sch.Schema(column_schemas={
'fixed_int': sch.ColumnSchema(
tf.int64, [2], sch.FixedColumnRepresentation()),
'var_float': sch.ColumnSchema(
tf.float32, None, sch.ListColumnRepresentation())
})
schema2 = sch.Schema(column_schemas={
'fixed_int': sch.ColumnSchema(
tf.int64, [2], sch.FixedColumnRepresentation()),
'var_float': sch.ColumnSchema(
tf.float32, None, sch.ListColumnRepresentation())
})
schema3 = sch.Schema(column_schemas={
'fixed_int': sch.ColumnSchema(
tf.int64, [2], sch.FixedColumnRepresentation()),
'var_float': sch.ColumnSchema(
tf.string, None, sch.ListColumnRepresentation())
})
schema4 = sch.Schema(column_schemas={
'fixed_int': sch.ColumnSchema(
tf.int64, [2], sch.FixedColumnRepresentation())
})
self.assertEqual(schema1, schema2)
self.assertNotEqual(schema1, schema3)
self.assertNotEqual(schema1, schema4)
示例8: get_manually_created_schema
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def get_manually_created_schema():
"""Provide a test schema built from scratch using the Schema classes."""
return sch.Schema(_COLUMN_SCHEMAS)
示例9: get_metadata
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def get_metadata():
from tensorflow_transform.tf_metadata import dataset_schema
from tensorflow_transform.tf_metadata import dataset_metadata
metadata = dataset_metadata.DatasetMetadata(dataset_schema.Schema({
'id': dataset_schema.ColumnSchema(
tf.string, [], dataset_schema.FixedColumnRepresentation()),
'text': dataset_schema.ColumnSchema(
tf.string, [], dataset_schema.FixedColumnRepresentation())
}))
return metadata
示例10: make_tft_input_metadata
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def make_tft_input_metadata(schema):
"""Create tf-transform metadata from given schema."""
tft_schema = {}
for col_schema in schema:
col_type = col_schema['type']
col_name = col_schema['name']
if col_type == 'NUMBER':
tft_schema[col_name] = dataset_schema.ColumnSchema(
tf.float32, [], dataset_schema.FixedColumnRepresentation(default_value=0.0))
elif col_type in ['CATEGORY', 'TEXT', 'IMAGE_URL', 'KEY']:
tft_schema[col_name] = dataset_schema.ColumnSchema(
tf.string, [], dataset_schema.FixedColumnRepresentation(default_value=''))
return dataset_metadata.DatasetMetadata(dataset_schema.Schema(tft_schema))
示例11: _GetSchema
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def _GetSchema(self, schema_path: Text) -> schema_pb2.Schema:
"""Gets a tf.metadata schema.
Args:
schema_path: Path to schema file.
Returns:
A tf.metadata schema.
"""
schema_reader = io_utils.SchemaReader()
return schema_reader.read(schema_path)
示例12: _GenerateStats
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def _GenerateStats(
pcoll: beam.pvalue.PCollection,
stats_output_path: Text,
schema: schema_pb2.Schema,
stats_options: tfdv.StatsOptions,
) -> beam.pvalue.PDone:
"""Generates statistics.
Args:
pcoll: PCollection of examples.
stats_output_path: path where statistics is written to.
schema: schema.
stats_options: An instance of `tfdv.StatsOptions()` used when computing
statistics.
Returns:
beam.pvalue.PDone.
"""
def _FilterInternalColumn(record_batch):
filtered_column_names = []
filtered_columns = []
for i, column_name in enumerate(record_batch.schema.names):
if column_name != _TRANSFORM_INTERNAL_FEATURE_FOR_KEY:
filtered_column_names.append(column_name)
filtered_columns.append(record_batch.column(i))
return pa.RecordBatch.from_arrays(filtered_columns, filtered_column_names)
pcoll |= 'FilterInternalColumn' >> beam.Map(_FilterInternalColumn)
stats_options.schema = schema
# pylint: disable=no-value-for-parameter
return (
pcoll
| 'GenerateStatistics' >> tfdv.GenerateStatistics(stats_options)
| 'WriteStats' >> Executor._WriteStats(stats_output_path))
# TODO(b/150456345): Obviate this once TFXIO-in-Transform rollout is
# completed.
示例13: __init__
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def __init__(self, schema: Optional[schema_pb2.Schema]):
self._serialized_schema = schema.SerializeToString() if schema else None
示例14: process
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def process(self, element: Dict[Text, Any], schema: schema_pb2.Schema
) -> Generator[Tuple[Any, Any], None, None]:
if self._coder is None:
self._coder = tft.coders.ExampleProtoCoder(schema, serialized=True)
# Make sure that the synthetic key feature doesn't get encoded.
key = element.get(_TRANSFORM_INTERNAL_FEATURE_FOR_KEY, None)
if key is not None:
element = element.copy()
del element[_TRANSFORM_INTERNAL_FEATURE_FOR_KEY]
yield (key, self._coder.encode(element))
示例15: _CreateTFXIO
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def _CreateTFXIO(self, dataset: _Dataset,
schema: schema_pb2.Schema) -> tfxio.TFXIO:
"""Creates a TFXIO instance for `dataset`."""
if self._ShouldDecodeAsRawExample(dataset.data_format):
return raw_tf_record.RawTfRecordTFXIO(
file_pattern=dataset.file_pattern,
raw_record_column_name=RAW_EXAMPLE_KEY,
telemetry_descriptors=[_TRANSFORM_COMPONENT_DESCRIPTOR])
else:
return tf_example_record.TFExampleRecord(
file_pattern=dataset.file_pattern,
validate=False,
telemetry_descriptors=[_TRANSFORM_COMPONENT_DESCRIPTOR],
schema=schema)