当前位置: 首页>>代码示例>>Python>>正文


Python dataset_schema.from_feature_spec方法代码示例

本文整理汇总了Python中tensorflow_transform.tf_metadata.dataset_schema.from_feature_spec方法的典型用法代码示例。如果您正苦于以下问题:Python dataset_schema.from_feature_spec方法的具体用法?Python dataset_schema.from_feature_spec怎么用?Python dataset_schema.from_feature_spec使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在tensorflow_transform.tf_metadata.dataset_schema的用法示例。


在下文中一共展示了dataset_schema.from_feature_spec方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: make_input_schema

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def make_input_schema(mode=tf.contrib.learn.ModeKeys.TRAIN):
  """Input schema definition.

  Args:
    mode: tf.contrib.learn.ModeKeys specifying if the schema is being used for
      train/eval or prediction.
  Returns:
    A `Schema` object.
  """
  result = ({} if mode == tf.contrib.learn.ModeKeys.INFER
            else {'clicked': tf.FixedLenFeature(shape=[], dtype=tf.int64)})
  for name in INTEGER_COLUMN_NAMES:
    result[name] = tf.FixedLenFeature(
        shape=[], dtype=tf.int64, default_value=-1)
  for name in CATEGORICAL_COLUMN_NAMES:
    result[name] = tf.FixedLenFeature(shape=[], dtype=tf.string,
                                      default_value='')

  return dataset_schema.from_feature_spec(result) 
开发者ID:GoogleCloudPlatform,项目名称:cloudml-samples,代码行数:21,代码来源:criteo.py

示例2: _make_schema

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def _make_schema(columns, types, default_values):
  """Input schema definition.

  Args:
    columns: column names for fields appearing in input.
    types: column types for fields appearing in input.
    default_values: default values for fields appearing in input.
  Returns:
    feature_set dictionary of string to *Feature.
  """
  result = {}
  assert len(columns) == len(types)
  assert len(columns) == len(default_values)
  for c, t, v in zip(columns, types, default_values):
    if isinstance(t, list):
      result[c] = tf.VarLenFeature(dtype=t[0])
    else:
      result[c] = tf.FixedLenFeature(shape=[], dtype=t, default_value=v)
  return dataset_schema.from_feature_spec(result) 
开发者ID:GoogleCloudPlatform,项目名称:cloudml-samples,代码行数:21,代码来源:movielens.py

示例3: make_input_schema

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def make_input_schema(mode=tf.contrib.learn.ModeKeys.TRAIN):
  """Input schema definition.

  Args:
    mode: tf.contrib.learn.ModeKeys specifying if the schema is being used for
      train/eval or prediction.
  Returns:
    A `Schema` object.
  """
  result = ({} if mode == tf.contrib.learn.ModeKeys.INFER else {
      'score': tf.FixedLenFeature(shape=[], dtype=tf.float32)
  })
  result.update({
      'subreddit': tf.FixedLenFeature(shape=[], dtype=tf.string),
      'author': tf.FixedLenFeature(shape=[], dtype=tf.string),
      'comment_body': tf.FixedLenFeature(shape=[], dtype=tf.string,
                                         default_value=''),
      'comment_parent_body': tf.FixedLenFeature(shape=[], dtype=tf.string,
                                                default_value=''),
      'toplevel': tf.FixedLenFeature(shape=[], dtype=tf.int64),
  })
  return dataset_schema.from_feature_spec(result) 
开发者ID:GoogleCloudPlatform,项目名称:cloudml-samples,代码行数:24,代码来源:reddit.py

示例4: main

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def main(_):
  # Define schema.
  raw_metadata = dataset_metadata.DatasetMetadata(
      dataset_schema.from_feature_spec({
          'text': tf.FixedLenFeature([], tf.string),
          'language_code': tf.FixedLenFeature([], tf.string),
      }))

  # Add in padding tokens.
  reserved_tokens = FLAGS.reserved_tokens
  if FLAGS.num_pad_tokens:
    padded_tokens = ['<pad>']
    padded_tokens += ['<pad%d>' % i for i in range(1, FLAGS.num_pad_tokens)]
    reserved_tokens = padded_tokens + reserved_tokens

  params = learner.Params(FLAGS.upper_thresh, FLAGS.lower_thresh,
                          FLAGS.num_iterations, FLAGS.max_input_tokens,
                          FLAGS.max_token_length, FLAGS.max_unique_chars,
                          FLAGS.vocab_size, FLAGS.slack_ratio,
                          FLAGS.include_joiner_token, FLAGS.joiner,
                          reserved_tokens)

  generate_vocab(FLAGS.data_file, FLAGS.vocab_file, FLAGS.metrics_file,
                 raw_metadata, params) 
开发者ID:tensorflow,项目名称:text,代码行数:26,代码来源:generate_vocab.py

示例5: make_input_schema

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def make_input_schema():
  """Builds the schema of the data read from BigQuery.

  Appends key column to schema for inference.

  Returns:
    A dictionary mapping keys of column names to `tf.FixedLenFeature` instances.
  """

  feature_spec = {}
  for c in constants.FEATURE_COLUMNS:
    feature_spec[c] = tf.FixedLenFeature(shape=[], dtype=tf.float32)
  feature_spec[constants.LABEL_COLUMN] = tf.FixedLenFeature(
      shape=[], dtype=tf.int64)
  feature_spec[constants.KEY_COLUMN] = tf.FixedLenFeature(
      shape=[], dtype=tf.int64)

  return dataset_schema.from_feature_spec(feature_spec) 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:20,代码来源:preprocess.py

示例6: _run_tft_fn

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def _run_tft_fn(raw_data, tft_fn, transform_fn_path, user_freq, item_freq):
  """Applys the TensorFlow Transform function to the given data.

  Args:
    raw_data: a dict of shape {$user_key: $user_id, $item_key: ...}.
    tft_fn: a TensorFlow Transform function.
    transform_fn_path: the location to save transformation outputs to.
    user_freq: minimum frequency of a user to include it in the user vocab.
    item_freq: minimum frequency of an item to include it in the item vocab.

  Returns:
    A pCollection of dicts, where each dict is an element of raw_data with the
      preprocess_fn applied to it:
      {$user_key: $user_id, $item_key: $item_id, $count_key: $count}.
  """
  raw_data_metadata = tft.tf_metadata.dataset_metadata.DatasetMetadata(
      tft.tf_metadata.dataset_schema.from_feature_spec(constants.TRAIN_SPEC))
  transformed_dataset, transform_fn = (
      (raw_data, raw_data_metadata)
      | beam_impl.AnalyzeAndTransformDataset(
          lambda x: tft_fn(x, user_freq, item_freq)))
  (transform_fn | "WriteTransformFn" >>
   tft.beam.tft_beam_io.transform_fn_io.WriteTransformFn(
       os.path.join(transform_fn_path, "transform_fn")))
  return transformed_dataset[0] 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:27,代码来源:preprocess.py

示例7: WriteOutput

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def WriteOutput(p, prefix, output_dir, feature_spec, plain_text=False):
  """Writes the given pCollection as a TF-Record.

  Args:
    p: a pCollection.
    prefix: prefix for location tf-record will be written to.
    output_dir: the directory or bucket to write the json data.
    feature_spec: the feature spec of the tf-record to be written.
    plain_text: if true, write the output as plain text instead.
  """
  path = os.path.join(output_dir, prefix)
  shuffled = p | "ShuffleData" >> Shuffle()  # pylint: disable=no-value-for-parameter

  if plain_text:
    shuffled | "WriteToText" >> beam.io.WriteToText(
        path, file_name_suffix=".txt")
    return

  schema = dataset_schema.from_feature_spec(feature_spec)
  coder = coders.ExampleProtoCoder(schema)
  shuffled | "WriteTFRecord" >> beam.io.tfrecordio.WriteToTFRecord(
      path,
      coder=coder,
      file_name_suffix=".tfrecord") 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:26,代码来源:preprocess.py

示例8: __init__

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def __init__(self, feature_spec):
        super(ExampleWithFeatureSpecDecoder, self).__init__()
        schema = dataset_schema.from_feature_spec(feature_spec)
        self._coder = example_proto_coder.ExampleProtoCoder(schema) 
开发者ID:spotify,项目名称:spotify-tensorflow,代码行数:6,代码来源:example_decoders.py

示例9: main

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def main(_):
  # Generate schema of input data.
  raw_metadata = dataset_metadata.DatasetMetadata(
      dataset_schema.from_feature_spec({
          'text': tf.FixedLenFeature([], tf.string),
          'language_code': tf.FixedLenFeature([], tf.string),
      }))

  pipeline = word_count(FLAGS.input_path, FLAGS.output_path, raw_metadata)
  pipeline.run().wait_until_finish() 
开发者ID:tensorflow,项目名称:text,代码行数:12,代码来源:generate_word_counts.py

示例10: _make_proto_coder

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def _make_proto_coder(schema):
  raw_feature_spec = _get_raw_feature_spec(schema)
  raw_schema = dataset_schema.from_feature_spec(raw_feature_spec)
  return tft_coders.ExampleProtoCoder(raw_schema) 
开发者ID:tensorflow,项目名称:tfx,代码行数:6,代码来源:chicago_taxi_client.py

示例11: _make_csv_coder

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def _make_csv_coder(schema, column_names):
  """Return a coder for tf.transform to read csv files."""
  raw_feature_spec = _get_raw_feature_spec(schema)
  parsing_schema = dataset_schema.from_feature_spec(raw_feature_spec)
  return tft_coders.CsvCoder(column_names, parsing_schema) 
开发者ID:tensorflow,项目名称:tfx,代码行数:7,代码来源:chicago_taxi_client.py

示例12: run

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def run(p, params):
  """Defines Beam preprocessing pipeline.

  Performs the following:
    - Reads text files from pattern.
    - Split text files in train and validation sets.

  Args:
    p: PCollection, initial pipeline.
    params: Object holding a set of parameters as name-value pairs.
  """

  path_pattern = os.path.join(params.input_dir, '*', '*{}'.format(
      constants.FILE_EXTENSION))
  data = (
      p
      | 'ListFiles' >> beam.Create(gfile.Glob(path_pattern))
      | 'ReadFiles' >> beam.ParDo(ReadFile())
      | 'SplitData' >> beam.ParDo(
          _SplitData(),
          train_size=params.train_size,
          val_label=_DatasetType.VAL.name).with_outputs(
              _DatasetType.VAL.name, main=_DatasetType.TRAIN.name))

  schema = dataset_schema.from_feature_spec(utils.get_processed_data_schema())
  for dataset in _DatasetType:
    if not dataset.value:
      continue
    _ = (
        data[dataset.name]
        | 'Shuffle{}'.format(dataset.name) >> shuffle()  # pylint: disable=no-value-for-parameter
        | 'WriteFiles{}'.format(dataset.name) >> tfrecordio.WriteToTFRecord(
            os.path.join(params.output_dir, dataset.name + constants.TFRECORD),
            coder=example_proto_coder.ExampleProtoCoder(schema))) 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:36,代码来源:preprocess.py

示例13: get_raw_dataset_metadata

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def get_raw_dataset_metadata():
    return dataset_metadata.DatasetMetadata(
        dataset_schema.from_feature_spec(RAW_FEATURE_SPEC)) 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:5,代码来源:features.py

示例14: make_proto_coder

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def make_proto_coder(schema):
  raw_feature_spec = get_raw_feature_spec(schema)
  raw_schema = dataset_schema.from_feature_spec(raw_feature_spec)
  return tft_coders.ExampleProtoCoder(raw_schema) 
开发者ID:amygdala,项目名称:code-snippets,代码行数:6,代码来源:taxi.py

示例15: make_csv_coder

# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def make_csv_coder(schema):
  """Return a coder for tf.transform to read csv files."""
  raw_feature_spec = get_raw_feature_spec(schema)
  parsing_schema = dataset_schema.from_feature_spec(raw_feature_spec)
  return tft_coders.CsvCoder(CSV_COLUMN_NAMES, parsing_schema) 
开发者ID:amygdala,项目名称:code-snippets,代码行数:7,代码来源:taxi.py


注:本文中的tensorflow_transform.tf_metadata.dataset_schema.from_feature_spec方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。