本文整理汇总了Python中tensorflow_transform.tf_metadata.dataset_schema.from_feature_spec方法的典型用法代码示例。如果您正苦于以下问题:Python dataset_schema.from_feature_spec方法的具体用法?Python dataset_schema.from_feature_spec怎么用?Python dataset_schema.from_feature_spec使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow_transform.tf_metadata.dataset_schema
的用法示例。
在下文中一共展示了dataset_schema.from_feature_spec方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: make_input_schema
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def make_input_schema(mode=tf.contrib.learn.ModeKeys.TRAIN):
"""Input schema definition.
Args:
mode: tf.contrib.learn.ModeKeys specifying if the schema is being used for
train/eval or prediction.
Returns:
A `Schema` object.
"""
result = ({} if mode == tf.contrib.learn.ModeKeys.INFER
else {'clicked': tf.FixedLenFeature(shape=[], dtype=tf.int64)})
for name in INTEGER_COLUMN_NAMES:
result[name] = tf.FixedLenFeature(
shape=[], dtype=tf.int64, default_value=-1)
for name in CATEGORICAL_COLUMN_NAMES:
result[name] = tf.FixedLenFeature(shape=[], dtype=tf.string,
default_value='')
return dataset_schema.from_feature_spec(result)
示例2: _make_schema
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def _make_schema(columns, types, default_values):
"""Input schema definition.
Args:
columns: column names for fields appearing in input.
types: column types for fields appearing in input.
default_values: default values for fields appearing in input.
Returns:
feature_set dictionary of string to *Feature.
"""
result = {}
assert len(columns) == len(types)
assert len(columns) == len(default_values)
for c, t, v in zip(columns, types, default_values):
if isinstance(t, list):
result[c] = tf.VarLenFeature(dtype=t[0])
else:
result[c] = tf.FixedLenFeature(shape=[], dtype=t, default_value=v)
return dataset_schema.from_feature_spec(result)
示例3: make_input_schema
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def make_input_schema(mode=tf.contrib.learn.ModeKeys.TRAIN):
"""Input schema definition.
Args:
mode: tf.contrib.learn.ModeKeys specifying if the schema is being used for
train/eval or prediction.
Returns:
A `Schema` object.
"""
result = ({} if mode == tf.contrib.learn.ModeKeys.INFER else {
'score': tf.FixedLenFeature(shape=[], dtype=tf.float32)
})
result.update({
'subreddit': tf.FixedLenFeature(shape=[], dtype=tf.string),
'author': tf.FixedLenFeature(shape=[], dtype=tf.string),
'comment_body': tf.FixedLenFeature(shape=[], dtype=tf.string,
default_value=''),
'comment_parent_body': tf.FixedLenFeature(shape=[], dtype=tf.string,
default_value=''),
'toplevel': tf.FixedLenFeature(shape=[], dtype=tf.int64),
})
return dataset_schema.from_feature_spec(result)
示例4: main
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def main(_):
# Define schema.
raw_metadata = dataset_metadata.DatasetMetadata(
dataset_schema.from_feature_spec({
'text': tf.FixedLenFeature([], tf.string),
'language_code': tf.FixedLenFeature([], tf.string),
}))
# Add in padding tokens.
reserved_tokens = FLAGS.reserved_tokens
if FLAGS.num_pad_tokens:
padded_tokens = ['<pad>']
padded_tokens += ['<pad%d>' % i for i in range(1, FLAGS.num_pad_tokens)]
reserved_tokens = padded_tokens + reserved_tokens
params = learner.Params(FLAGS.upper_thresh, FLAGS.lower_thresh,
FLAGS.num_iterations, FLAGS.max_input_tokens,
FLAGS.max_token_length, FLAGS.max_unique_chars,
FLAGS.vocab_size, FLAGS.slack_ratio,
FLAGS.include_joiner_token, FLAGS.joiner,
reserved_tokens)
generate_vocab(FLAGS.data_file, FLAGS.vocab_file, FLAGS.metrics_file,
raw_metadata, params)
示例5: make_input_schema
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def make_input_schema():
"""Builds the schema of the data read from BigQuery.
Appends key column to schema for inference.
Returns:
A dictionary mapping keys of column names to `tf.FixedLenFeature` instances.
"""
feature_spec = {}
for c in constants.FEATURE_COLUMNS:
feature_spec[c] = tf.FixedLenFeature(shape=[], dtype=tf.float32)
feature_spec[constants.LABEL_COLUMN] = tf.FixedLenFeature(
shape=[], dtype=tf.int64)
feature_spec[constants.KEY_COLUMN] = tf.FixedLenFeature(
shape=[], dtype=tf.int64)
return dataset_schema.from_feature_spec(feature_spec)
示例6: _run_tft_fn
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def _run_tft_fn(raw_data, tft_fn, transform_fn_path, user_freq, item_freq):
"""Applys the TensorFlow Transform function to the given data.
Args:
raw_data: a dict of shape {$user_key: $user_id, $item_key: ...}.
tft_fn: a TensorFlow Transform function.
transform_fn_path: the location to save transformation outputs to.
user_freq: minimum frequency of a user to include it in the user vocab.
item_freq: minimum frequency of an item to include it in the item vocab.
Returns:
A pCollection of dicts, where each dict is an element of raw_data with the
preprocess_fn applied to it:
{$user_key: $user_id, $item_key: $item_id, $count_key: $count}.
"""
raw_data_metadata = tft.tf_metadata.dataset_metadata.DatasetMetadata(
tft.tf_metadata.dataset_schema.from_feature_spec(constants.TRAIN_SPEC))
transformed_dataset, transform_fn = (
(raw_data, raw_data_metadata)
| beam_impl.AnalyzeAndTransformDataset(
lambda x: tft_fn(x, user_freq, item_freq)))
(transform_fn | "WriteTransformFn" >>
tft.beam.tft_beam_io.transform_fn_io.WriteTransformFn(
os.path.join(transform_fn_path, "transform_fn")))
return transformed_dataset[0]
示例7: WriteOutput
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def WriteOutput(p, prefix, output_dir, feature_spec, plain_text=False):
"""Writes the given pCollection as a TF-Record.
Args:
p: a pCollection.
prefix: prefix for location tf-record will be written to.
output_dir: the directory or bucket to write the json data.
feature_spec: the feature spec of the tf-record to be written.
plain_text: if true, write the output as plain text instead.
"""
path = os.path.join(output_dir, prefix)
shuffled = p | "ShuffleData" >> Shuffle() # pylint: disable=no-value-for-parameter
if plain_text:
shuffled | "WriteToText" >> beam.io.WriteToText(
path, file_name_suffix=".txt")
return
schema = dataset_schema.from_feature_spec(feature_spec)
coder = coders.ExampleProtoCoder(schema)
shuffled | "WriteTFRecord" >> beam.io.tfrecordio.WriteToTFRecord(
path,
coder=coder,
file_name_suffix=".tfrecord")
示例8: __init__
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def __init__(self, feature_spec):
super(ExampleWithFeatureSpecDecoder, self).__init__()
schema = dataset_schema.from_feature_spec(feature_spec)
self._coder = example_proto_coder.ExampleProtoCoder(schema)
示例9: main
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def main(_):
# Generate schema of input data.
raw_metadata = dataset_metadata.DatasetMetadata(
dataset_schema.from_feature_spec({
'text': tf.FixedLenFeature([], tf.string),
'language_code': tf.FixedLenFeature([], tf.string),
}))
pipeline = word_count(FLAGS.input_path, FLAGS.output_path, raw_metadata)
pipeline.run().wait_until_finish()
示例10: _make_proto_coder
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def _make_proto_coder(schema):
raw_feature_spec = _get_raw_feature_spec(schema)
raw_schema = dataset_schema.from_feature_spec(raw_feature_spec)
return tft_coders.ExampleProtoCoder(raw_schema)
示例11: _make_csv_coder
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def _make_csv_coder(schema, column_names):
"""Return a coder for tf.transform to read csv files."""
raw_feature_spec = _get_raw_feature_spec(schema)
parsing_schema = dataset_schema.from_feature_spec(raw_feature_spec)
return tft_coders.CsvCoder(column_names, parsing_schema)
示例12: run
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def run(p, params):
"""Defines Beam preprocessing pipeline.
Performs the following:
- Reads text files from pattern.
- Split text files in train and validation sets.
Args:
p: PCollection, initial pipeline.
params: Object holding a set of parameters as name-value pairs.
"""
path_pattern = os.path.join(params.input_dir, '*', '*{}'.format(
constants.FILE_EXTENSION))
data = (
p
| 'ListFiles' >> beam.Create(gfile.Glob(path_pattern))
| 'ReadFiles' >> beam.ParDo(ReadFile())
| 'SplitData' >> beam.ParDo(
_SplitData(),
train_size=params.train_size,
val_label=_DatasetType.VAL.name).with_outputs(
_DatasetType.VAL.name, main=_DatasetType.TRAIN.name))
schema = dataset_schema.from_feature_spec(utils.get_processed_data_schema())
for dataset in _DatasetType:
if not dataset.value:
continue
_ = (
data[dataset.name]
| 'Shuffle{}'.format(dataset.name) >> shuffle() # pylint: disable=no-value-for-parameter
| 'WriteFiles{}'.format(dataset.name) >> tfrecordio.WriteToTFRecord(
os.path.join(params.output_dir, dataset.name + constants.TFRECORD),
coder=example_proto_coder.ExampleProtoCoder(schema)))
示例13: get_raw_dataset_metadata
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def get_raw_dataset_metadata():
return dataset_metadata.DatasetMetadata(
dataset_schema.from_feature_spec(RAW_FEATURE_SPEC))
示例14: make_proto_coder
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def make_proto_coder(schema):
raw_feature_spec = get_raw_feature_spec(schema)
raw_schema = dataset_schema.from_feature_spec(raw_feature_spec)
return tft_coders.ExampleProtoCoder(raw_schema)
示例15: make_csv_coder
# 需要导入模块: from tensorflow_transform.tf_metadata import dataset_schema [as 别名]
# 或者: from tensorflow_transform.tf_metadata.dataset_schema import from_feature_spec [as 别名]
def make_csv_coder(schema):
"""Return a coder for tf.transform to read csv files."""
raw_feature_spec = get_raw_feature_spec(schema)
parsing_schema = dataset_schema.from_feature_spec(raw_feature_spec)
return tft_coders.CsvCoder(CSV_COLUMN_NAMES, parsing_schema)