当前位置: 首页>>代码示例>>Python>>正文


Python dataset_metadata.DatasetMetadata方法代码示例

本文整理汇总了Python中tensorflow_transform.tf_metadata.dataset_metadata.DatasetMetadata方法的典型用法代码示例。如果您正苦于以下问题:Python dataset_metadata.DatasetMetadata方法的具体用法?Python dataset_metadata.DatasetMetadata怎么用?Python dataset_metadata.DatasetMetadata使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在tensorflow_transform.tf_metadata.dataset_metadata的用法示例。


在下文中一共展示了dataset_metadata.DatasetMetadata方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _create_raw_metadata

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def _create_raw_metadata():
  """Create a DatasetMetadata for the raw data."""
  column_schemas = {
      key: dataset_schema.ColumnSchema(
          tf.string, [], dataset_schema.FixedColumnRepresentation())
      for key in CATEGORICAL_FEATURE_KEYS
  }
  column_schemas.update({
      key: dataset_schema.ColumnSchema(
          tf.float32, [], dataset_schema.FixedColumnRepresentation())
      for key in NUMERIC_FEATURE_KEYS
  })
  column_schemas[LABEL_KEY] = dataset_schema.ColumnSchema(
      tf.string, [], dataset_schema.FixedColumnRepresentation())

  raw_data_metadata = dataset_metadata.DatasetMetadata(dataset_schema.Schema(
      column_schemas))
  return raw_data_metadata 
开发者ID:GoogleCloudPlatform,项目名称:cloudml-samples,代码行数:20,代码来源:input_metadata.py

示例2: convert_to_tfxio_api_inputs

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def convert_to_tfxio_api_inputs(
      self, legacy_input_data, legacy_input_metadata, label='input_data'):
    """Converts from the legacy TFT API inputs to TFXIO-based inputs.

    Args:
      legacy_input_data: a PCollection of instance dicts.
      legacy_input_metadata: a tft.DatasetMetadata.
      label: label for the PTransform that translates `legacy_input_data` into
        the TFXIO input data. Set to different values if this method is called
        multiple times in a beam Pipeline.
    Returns:
      A tuple of a PCollection of `pyarrow.RecordBatch` and a
      `tensor_adapter.TensorAdapterConfig`. This tuple can be fed directly to
      TFT's `{Analyze,Transform,AnalyzeAndTransform}Dataset` APIs.
    """
    tfxio_impl = _LegacyCompatibilityTFXIO(legacy_input_metadata.schema)
    input_data = (
        legacy_input_data |
        ('LegacyFormatToTfxio[%s]' % label >> tfxio_impl.BeamSource(
            beam_impl.Context.get_desired_batch_size())))
    return input_data, tfxio_impl.TensorAdapterConfig() 
开发者ID:tensorflow,项目名称:transform,代码行数:23,代码来源:tft_unit.py

示例3: read_metadata

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def read_metadata(path):
  """Load metadata in JSON format from a path into a new DatasetMetadata."""
  schema_file = os.path.join(path, 'schema.pbtxt')
  legacy_schema_file = os.path.join(path, 'v1-json', 'schema.json')
  if file_io.file_exists(schema_file):
    text_proto = file_io.FileIO(schema_file, 'r').read()
    schema_proto = text_format.Parse(text_proto, schema_pb2.Schema(),
                                     allow_unknown_extension=True)
  elif file_io.file_exists(legacy_schema_file):
    schema_json = file_io.FileIO(legacy_schema_file, 'r').read()
    schema_proto = _parse_schema_json(schema_json)
  else:
    raise IOError(
        'Schema file {} does not exist and neither did legacy format file '
        '{}'.format(schema_file, legacy_schema_file))
  return dataset_metadata.DatasetMetadata(schema_proto) 
开发者ID:tensorflow,项目名称:transform,代码行数:18,代码来源:metadata_io.py

示例4: main

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def main(_):
  # Define schema.
  raw_metadata = dataset_metadata.DatasetMetadata(
      dataset_schema.from_feature_spec({
          'text': tf.FixedLenFeature([], tf.string),
          'language_code': tf.FixedLenFeature([], tf.string),
      }))

  # Add in padding tokens.
  reserved_tokens = FLAGS.reserved_tokens
  if FLAGS.num_pad_tokens:
    padded_tokens = ['<pad>']
    padded_tokens += ['<pad%d>' % i for i in range(1, FLAGS.num_pad_tokens)]
    reserved_tokens = padded_tokens + reserved_tokens

  params = learner.Params(FLAGS.upper_thresh, FLAGS.lower_thresh,
                          FLAGS.num_iterations, FLAGS.max_input_tokens,
                          FLAGS.max_token_length, FLAGS.max_unique_chars,
                          FLAGS.vocab_size, FLAGS.slack_ratio,
                          FLAGS.include_joiner_token, FLAGS.joiner,
                          reserved_tokens)

  generate_vocab(FLAGS.data_file, FLAGS.vocab_file, FLAGS.metrics_file,
                 raw_metadata, params) 
开发者ID:tensorflow,项目名称:text,代码行数:26,代码来源:generate_vocab.py

示例5: __init__

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def __init__(self,
               dataset,
               tf_metadata_schema,
               preprocessing_fn,
               transform_input_dataset_metadata,
               generate_dataset=False):
    """Constructor.

    Args:
      dataset: BenchmarkDataset object.
      tf_metadata_schema: tf.Metadata schema.
      preprocessing_fn: preprocessing_fn.
      transform_input_dataset_metadata: dataset_metadata.DatasetMetadata.
      generate_dataset: If True, generates the raw dataset and appropriate
        intermediate outputs (just the TFT SavedModel for now) necessary for
        other benchmarks.
    """
    self._dataset = dataset
    self._tf_metadata_schema = tf_metadata_schema
    self._preprocessing_fn = preprocessing_fn
    self._transform_input_dataset_metadata = transform_input_dataset_metadata
    self._generate_dataset = generate_dataset 
开发者ID:tensorflow,项目名称:tfx,代码行数:24,代码来源:tft_benchmark_base.py

示例6: _get_common_variables

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def _get_common_variables(dataset):
  """Returns metadata schema, preprocessing fn, input dataset metadata."""

  tf_metadata_schema = benchmark_utils.read_schema(
      dataset.tf_metadata_schema_path())

  preprocessing_fn = dataset.tft_preprocessing_fn()

  feature_spec = schema_utils.schema_as_feature_spec(
      tf_metadata_schema).feature_spec
  transform_input_columns = (
      tft.get_transform_input_columns(preprocessing_fn, feature_spec))
  transform_input_dataset_metadata = dataset_metadata.DatasetMetadata(
      schema_utils.schema_from_feature_spec({
          feature: feature_spec[feature] for feature in transform_input_columns
      }))

  return CommonVariablesTuple(
      tf_metadata_schema=tf_metadata_schema,
      preprocessing_fn=preprocessing_fn,
      transform_input_dataset_metadata=transform_input_dataset_metadata) 
开发者ID:tensorflow,项目名称:tfx,代码行数:23,代码来源:tft_benchmark_base.py

示例7: _GetSchemaProto

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def _GetSchemaProto(
    metadata: dataset_metadata.DatasetMetadata) -> schema_pb2.Schema:
  """Gets the schema proto associated with a DatasetMetadata.

  This is needed because tensorflow_transform 0.13 and tensorflow_transform 0.14
  have a different API for DatasetMetadata.

  Args:
    metadata: A dataset_metadata.DatasetMetadata.

  Returns:
    A schema_pb2.Schema.
  """
  # `schema` is either a Schema proto or dataset_schema.Schema.
  schema = metadata.schema
  # In the case where it's a dataset_schema.Schema, fetch the schema proto.
  return getattr(schema, '_schema_proto', schema) 
开发者ID:tensorflow,项目名称:tfx,代码行数:19,代码来源:executor.py

示例8: _ReadExamples

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def _ReadExamples(
      pipeline: beam.Pipeline, dataset: _Dataset,
      input_dataset_metadata: dataset_metadata.DatasetMetadata
  ) -> beam.pvalue.PCollection:
    """Reads examples from the given `dataset`.

    Args:
      pipeline: beam pipeline.
      dataset: A `_Dataset` object that represents the data to read.
      input_dataset_metadata: A `dataset_metadata.DatasetMetadata`. Not used.

    Returns:
      A PCollection containing KV pairs of bytes.
    """
    del input_dataset_metadata
    assert dataset.file_format == labels.FORMAT_TFRECORD, dataset.file_format

    return (
        pipeline
        | 'Read' >> beam.io.ReadFromTFRecord(
            dataset.file_pattern,
            coder=beam.coders.BytesCoder(),
            # TODO(b/114938612): Eventually remove this override.
            validate=False)
        | 'AddKey' >> beam.Map(lambda x: (None, x))) 
开发者ID:tensorflow,项目名称:tfx,代码行数:27,代码来源:executor.py

示例9: _ReadMetadata

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def _ReadMetadata(self, data_format: Text,
                    schema_path: Text) -> dataset_metadata.DatasetMetadata:
    """Returns a dataset_metadata.DatasetMetadata for the input data.

    Args:
      data_format: name of the input data format.
      schema_path: path to schema file.

    Returns:
      A dataset_metadata.DatasetMetadata representing the provided set of
          columns.
    """

    if self._ShouldDecodeAsRawExample(data_format):
      return dataset_metadata.DatasetMetadata(_RAW_EXAMPLE_SCHEMA)
    schema_proto = self._GetSchema(schema_path)
    # For compatibility with tensorflow_transform 0.13 and 0.14, we create and
    # then update a DatasetMetadata.
    result = dataset_metadata.DatasetMetadata(dataset_schema.Schema({}))
    _GetSchemaProto(result).CopyFrom(schema_proto)
    return result 
开发者ID:tensorflow,项目名称:tfx,代码行数:23,代码来源:executor.py

示例10: store_transformed_data

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def store_transformed_data(data, schema, path, name=''):
  """Stores data from input pipeline into TFRecord in the specified path.

  Args:
    data: `PCollection`, input pipeline.
    schema: `DatasetMetadata` object, describes schema of the input pipeline.
    path: string, where to write output.
    name: string: name describing pipeline to be written.

  Returns:
    PCollection
  """

  p = (
      data
      | 'WriteData{}'.format(name) >> tfrecordio.WriteToTFRecord(
          path, coder=example_proto_coder.ExampleProtoCoder(schema.schema)))
  return p 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:20,代码来源:preprocess.py

示例11: main

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def main():
  def preprocessing_fn(inputs):
    """Preprocess input columns into transformed columns."""
    x = inputs['x']
    y = inputs['y']
    s = inputs['s']
    x_centered = x - tft.mean(x)
    y_normalized = tft.scale_to_0_1(y)
    s_integerized = tft.compute_and_apply_vocabulary(s)
    x_centered_times_y_normalized = (x_centered * y_normalized)
    return {
        'x_centered': x_centered,
        'y_normalized': y_normalized,
        'x_centered_times_y_normalized': x_centered_times_y_normalized,
        's_integerized': s_integerized
    }

  raw_data = [
      {'x': 1, 'y': 1, 's': 'hello'},
      {'x': 2, 'y': 2, 's': 'world'},
      {'x': 3, 'y': 3, 's': 'hello'}
  ]

  raw_data_metadata = dataset_metadata.DatasetMetadata(
      schema_utils.schema_from_feature_spec({
          's': tf.io.FixedLenFeature([], tf.string),
          'y': tf.io.FixedLenFeature([], tf.float32),
          'x': tf.io.FixedLenFeature([], tf.float32),
      }))

  with tft_beam.Context(temp_dir=tempfile.mkdtemp()):
    transformed_dataset, transform_fn = (  # pylint: disable=unused-variable
        (raw_data, raw_data_metadata) | tft_beam.AnalyzeAndTransformDataset(
            preprocessing_fn))

  transformed_data, transformed_metadata = transformed_dataset  # pylint: disable=unused-variable

  pprint.pprint(transformed_data) 
开发者ID:tensorflow,项目名称:transform,代码行数:40,代码来源:simple_example.py

示例12: metadata_from_feature_spec

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def metadata_from_feature_spec(feature_spec, domains=None):
  """Construct a DatasetMetadata from a feature spec.

  Args:
    feature_spec: A feature spec
    domains: A dict containing domains of features

  Returns:
    A `tft.tf_metadata.dataset_metadata.DatasetMetadata` object.
  """
  return dataset_metadata.DatasetMetadata(
      schema_utils.schema_from_feature_spec(feature_spec, domains)) 
开发者ID:tensorflow,项目名称:transform,代码行数:14,代码来源:tft_unit.py

示例13: _infer_metadata_from_saved_model

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def _infer_metadata_from_saved_model(saved_model_dir):
  """Infers a DatasetMetadata for outputs of a SavedModel."""
  with tf.compat.v1.Graph().as_default() as graph:
    with tf.compat.v1.Session(graph=graph) as session:
      _, outputs = (
          saved_transform_io.partially_apply_saved_transform_internal(
              saved_model_dir, {}))

      session.run(tf.compat.v1.global_variables_initializer())
      session.run(tf.compat.v1.tables_initializer())
      return dataset_metadata.DatasetMetadata(
          schema=schema_inference.infer_feature_schema(outputs, graph, session)) 
开发者ID:tensorflow,项目名称:transform,代码行数:14,代码来源:impl.py

示例14: _remove_columns_from_metadata

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def _remove_columns_from_metadata(metadata, excluded_columns):
  """Remove columns from metadata without mutating original metadata."""
  feature_spec, domains = schema_utils.schema_as_feature_spec(metadata.schema)
  new_feature_spec = {name: spec for name, spec in feature_spec.items()
                      if name not in excluded_columns}
  new_domains = {name: spec for name, spec in domains.items()
                 if name not in excluded_columns}
  return dataset_metadata.DatasetMetadata(
      schema_utils.schema_from_feature_spec(new_feature_spec, new_domains)) 
开发者ID:tensorflow,项目名称:transform,代码行数:11,代码来源:impl.py

示例15: write_metadata

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_metadata [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_metadata import DatasetMetadata [as 别名]
def write_metadata(metadata, path):
  """Write metadata to given path, in JSON format.

  Args:
    metadata: A `DatasetMetadata` to write.
    path: a path to a directory where metadata should be written.
  """
  if not file_io.file_exists(path):
    file_io.recursive_create_dir(path)
  schema_file = os.path.join(path, 'schema.pbtxt')
  ascii_proto = text_format.MessageToString(metadata.schema)
  file_io.atomic_write_string_to_file(schema_file, ascii_proto, overwrite=True) 
开发者ID:tensorflow,项目名称:transform,代码行数:14,代码来源:metadata_io.py


注:本文中的tensorflow_transform.tf_metadata.dataset_metadata.DatasetMetadata方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。