Python dataset_schema.Schema方法代码示例

本文整理汇总了Python中tensorflow_transform.tf_metadata.dataset_schema.Schema方法的典型用法代码示例。如果您正苦于以下问题：Python dataset_schema.Schema方法的具体用法？Python dataset_schema.Schema怎么用？Python dataset_schema.Schema使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow_transform.tf_metadata.dataset_schema的用法示例。

在下文中一共展示了dataset_schema.Schema方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _create_raw_metadata

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def _create_raw_metadata():
  """Create a DatasetMetadata for the raw data."""
  column_schemas = {
      key: dataset_schema.ColumnSchema(
          tf.string, [], dataset_schema.FixedColumnRepresentation())
      for key in CATEGORICAL_FEATURE_KEYS
  }
  column_schemas.update({
      key: dataset_schema.ColumnSchema(
          tf.float32, [], dataset_schema.FixedColumnRepresentation())
      for key in NUMERIC_FEATURE_KEYS
  })
  column_schemas[LABEL_KEY] = dataset_schema.ColumnSchema(
      tf.string, [], dataset_schema.FixedColumnRepresentation())

  raw_data_metadata = dataset_metadata.DatasetMetadata(dataset_schema.Schema(
      column_schemas))
  return raw_data_metadata

开发者ID:GoogleCloudPlatform，项目名称:cloudml-samples，代码行数:20，代码来源:input_metadata.py

示例2: _GetSchemaProto

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def _GetSchemaProto(
    metadata: dataset_metadata.DatasetMetadata) -> schema_pb2.Schema:
  """Gets the schema proto associated with a DatasetMetadata.

  This is needed because tensorflow_transform 0.13 and tensorflow_transform 0.14
  have a different API for DatasetMetadata.

  Args:
    metadata: A dataset_metadata.DatasetMetadata.

  Returns:
    A schema_pb2.Schema.
  """
  # `schema` is either a Schema proto or dataset_schema.Schema.
  schema = metadata.schema
  # In the case where it's a dataset_schema.Schema, fetch the schema proto.
  return getattr(schema, '_schema_proto', schema)

开发者ID:tensorflow，项目名称:tfx，代码行数:19，代码来源:executor.py

示例3: _ReadMetadata

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def _ReadMetadata(self, data_format: Text,
                    schema_path: Text) -> dataset_metadata.DatasetMetadata:
    """Returns a dataset_metadata.DatasetMetadata for the input data.

    Args:
      data_format: name of the input data format.
      schema_path: path to schema file.

    Returns:
      A dataset_metadata.DatasetMetadata representing the provided set of
          columns.
    """

    if self._ShouldDecodeAsRawExample(data_format):
      return dataset_metadata.DatasetMetadata(_RAW_EXAMPLE_SCHEMA)
    schema_proto = self._GetSchema(schema_path)
    # For compatibility with tensorflow_transform 0.13 and 0.14, we create and
    # then update a DatasetMetadata.
    result = dataset_metadata.DatasetMetadata(dataset_schema.Schema({}))
    _GetSchemaProto(result).CopyFrom(schema_proto)
    return result

开发者ID:tensorflow，项目名称:tfx，代码行数:23，代码来源:executor.py

示例4: _GetDecodeFunction

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def _GetDecodeFunction(self, data_format: Union[Text, int],
                         schema: dataset_schema.Schema) -> Any:
    """Returns the decode function for `data_format`.

    Args:
      data_format: name of data format.
      schema: a dataset_schema.Schema for the data.

    Returns:
      Function for decoding examples.
    """
    if self._ShouldDecodeAsRawExample(data_format):
      if self._IsDataFormatSequenceExample(data_format):
        absl.logging.warning(
            'TFX Transform doesn\'t officially support tf.SequenceExample, '
            'follow b/38235367 to track official support progress. We do not '
            'guarantee not to break your pipeline if you use Transform with a '
            'tf.SequenceExample data type. Use at your own risk.')
      return lambda x: {RAW_EXAMPLE_KEY: x}
    else:
      return tft.coders.ExampleProtoCoder(schema, serialized=True).decode

开发者ID:tensorflow，项目名称:tfx，代码行数:23，代码来源:executor.py

示例5: init

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def __init__(self, schema):
    if isinstance(schema, dict):
      schema = dataset_schema.Schema(schema)
    self._schema = schema

开发者ID:tensorflow，项目名称:transform，代码行数:6，代码来源:dataset_metadata.py

示例6: test_feature_spec_unsupported_dtype

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def test_feature_spec_unsupported_dtype(self):
    with self.assertRaisesRegexp(ValueError, 'invalid dtype'):
      sch.Schema({
          'fixed_float': sch.ColumnSchema(
              tf.float64, [], sch.FixedColumnRepresentation())
      })

开发者ID:tensorflow，项目名称:transform，代码行数:8，代码来源:dataset_schema_test.py

示例7: test_schema_equality

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def test_schema_equality(self):
    schema1 = sch.Schema(column_schemas={
        'fixed_int': sch.ColumnSchema(
            tf.int64, [2], sch.FixedColumnRepresentation()),
        'var_float': sch.ColumnSchema(
            tf.float32, None, sch.ListColumnRepresentation())
    })
    schema2 = sch.Schema(column_schemas={
        'fixed_int': sch.ColumnSchema(
            tf.int64, [2], sch.FixedColumnRepresentation()),
        'var_float': sch.ColumnSchema(
            tf.float32, None, sch.ListColumnRepresentation())
    })
    schema3 = sch.Schema(column_schemas={
        'fixed_int': sch.ColumnSchema(
            tf.int64, [2], sch.FixedColumnRepresentation()),
        'var_float': sch.ColumnSchema(
            tf.string, None, sch.ListColumnRepresentation())
    })
    schema4 = sch.Schema(column_schemas={
        'fixed_int': sch.ColumnSchema(
            tf.int64, [2], sch.FixedColumnRepresentation())
    })

    self.assertEqual(schema1, schema2)
    self.assertNotEqual(schema1, schema3)
    self.assertNotEqual(schema1, schema4)

开发者ID:tensorflow，项目名称:transform，代码行数:29，代码来源:dataset_schema_test.py

示例8: get_manually_created_schema

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def get_manually_created_schema():
  """Provide a test schema built from scratch using the Schema classes."""
  return sch.Schema(_COLUMN_SCHEMAS)

开发者ID:tensorflow，项目名称:transform，代码行数:5，代码来源:test_common.py

示例9: get_metadata

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def get_metadata():
  from tensorflow_transform.tf_metadata import dataset_schema
  from tensorflow_transform.tf_metadata import dataset_metadata

  metadata = dataset_metadata.DatasetMetadata(dataset_schema.Schema({
    'id': dataset_schema.ColumnSchema(
      tf.string, [], dataset_schema.FixedColumnRepresentation()),
    'text': dataset_schema.ColumnSchema(
      tf.string, [], dataset_schema.FixedColumnRepresentation())
  }))
  return metadata

开发者ID:GoogleCloudPlatform，项目名称:realtime-embeddings-matching，代码行数:13，代码来源:pipeline.py

示例10: make_tft_input_metadata

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def make_tft_input_metadata(schema):
  """Create tf-transform metadata from given schema."""
  tft_schema = {}

  for col_schema in schema:
    col_type = col_schema['type']
    col_name = col_schema['name']
    if col_type == 'NUMBER':
      tft_schema[col_name] = dataset_schema.ColumnSchema(
          tf.float32, [], dataset_schema.FixedColumnRepresentation(default_value=0.0))
    elif col_type in ['CATEGORY', 'TEXT', 'IMAGE_URL', 'KEY']:
      tft_schema[col_name] = dataset_schema.ColumnSchema(
          tf.string, [], dataset_schema.FixedColumnRepresentation(default_value=''))
  return dataset_metadata.DatasetMetadata(dataset_schema.Schema(tft_schema))

开发者ID:kubeflow，项目名称:pipelines，代码行数:16，代码来源:task.py

示例11: _GetSchema

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def _GetSchema(self, schema_path: Text) -> schema_pb2.Schema:
    """Gets a tf.metadata schema.

    Args:
      schema_path: Path to schema file.

    Returns:
      A tf.metadata schema.
    """
    schema_reader = io_utils.SchemaReader()
    return schema_reader.read(schema_path)

开发者ID:tensorflow，项目名称:tfx，代码行数:13，代码来源:executor.py

示例12: _GenerateStats

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def _GenerateStats(
      pcoll: beam.pvalue.PCollection,
      stats_output_path: Text,
      schema: schema_pb2.Schema,
      stats_options: tfdv.StatsOptions,
  ) -> beam.pvalue.PDone:
    """Generates statistics.

    Args:
      pcoll: PCollection of examples.
      stats_output_path: path where statistics is written to.
      schema: schema.
      stats_options: An instance of `tfdv.StatsOptions()` used when computing
        statistics.

    Returns:
      beam.pvalue.PDone.
    """
    def _FilterInternalColumn(record_batch):
      filtered_column_names = []
      filtered_columns = []
      for i, column_name in enumerate(record_batch.schema.names):
        if column_name != _TRANSFORM_INTERNAL_FEATURE_FOR_KEY:
          filtered_column_names.append(column_name)
          filtered_columns.append(record_batch.column(i))
      return pa.RecordBatch.from_arrays(filtered_columns, filtered_column_names)

    pcoll |= 'FilterInternalColumn' >> beam.Map(_FilterInternalColumn)
    stats_options.schema = schema
    # pylint: disable=no-value-for-parameter
    return (
        pcoll
        | 'GenerateStatistics' >> tfdv.GenerateStatistics(stats_options)
        | 'WriteStats' >> Executor._WriteStats(stats_output_path))

  # TODO(b/150456345): Obviate this once TFXIO-in-Transform rollout is
  # completed.

开发者ID:tensorflow，项目名称:tfx，代码行数:39，代码来源:executor.py

示例13: init

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def __init__(self, schema: Optional[schema_pb2.Schema]):
      self._serialized_schema = schema.SerializeToString() if schema else None

开发者ID:tensorflow，项目名称:tfx，代码行数:4，代码来源:executor.py

示例14: process

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import Schema [as 别名]
def process(self, element: Dict[Text, Any], schema: schema_pb2.Schema
               ) -> Generator[Tuple[Any, Any], None, None]:
      if self._coder is None:
        self._coder = tft.coders.ExampleProtoCoder(schema, serialized=True)

      # Make sure that the synthetic key feature doesn't get encoded.
      key = element.get(_TRANSFORM_INTERNAL_FEATURE_FOR_KEY, None)
      if key is not None:
        element = element.copy()
        del element[_TRANSFORM_INTERNAL_FEATURE_FOR_KEY]
      yield (key, self._coder.encode(element))