本文整理汇总了Python中tensorflow.contrib.layers.python.layers.feature_column.create_feature_spec_for_parsing函数的典型用法代码示例。如果您正苦于以下问题:Python create_feature_spec_for_parsing函数的具体用法?Python create_feature_spec_for_parsing怎么用?Python create_feature_spec_for_parsing使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了create_feature_spec_for_parsing函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: make_parsing_export_strategy
def make_parsing_export_strategy(feature_columns,
default_output_alternative_key=None,
assets_extra=None,
as_text=False,
exports_to_keep=5,
target_core=False,
strip_default_attrs=False):
# pylint: disable=line-too-long
"""Create an ExportStrategy for use with Experiment, using `FeatureColumn`s.
Creates a SavedModel export that expects to be fed with a single string
Tensor containing serialized tf.Examples. At serving time, incoming
tf.Examples will be parsed according to the provided `FeatureColumn`s.
Args:
feature_columns: An iterable of `FeatureColumn`s representing the features
that must be provided at serving time (excluding labels!).
default_output_alternative_key: the name of the head to serve when an
incoming serving request does not explicitly request a specific head.
Must be `None` if the estimator inherits from ${tf.estimator.Estimator}
or for single-headed models.
assets_extra: A dict specifying how to populate the assets.extra directory
within the exported SavedModel. Each key should give the destination
path (including the filename) relative to the assets.extra directory.
The corresponding value gives the full path of the source file to be
copied. For example, the simple case of copying a single file without
renaming it is specified as
`{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
as_text: whether to write the SavedModel proto in text format.
exports_to_keep: Number of exports to keep. Older exports will be
garbage-collected. Defaults to 5. Set to None to disable garbage
collection.
target_core: If True, prepare an ExportStrategy for use with
tensorflow.python.estimator.*. If False (default), prepare an
ExportStrategy for use with tensorflow.contrib.learn.python.learn.*.
strip_default_attrs: Boolean. If `True`, default-valued attributes will be
removed from the NodeDefs. For a detailed guide, see
[Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
Returns:
An ExportStrategy that can be passed to the Experiment constructor.
"""
# pylint: enable=line-too-long
feature_spec = feature_column.create_feature_spec_for_parsing(feature_columns)
if target_core:
serving_input_fn = (
core_export.build_parsing_serving_input_receiver_fn(feature_spec))
else:
serving_input_fn = (
input_fn_utils.build_parsing_serving_input_fn(feature_spec))
return make_export_strategy(
serving_input_fn,
default_output_alternative_key=default_output_alternative_key,
assets_extra=assets_extra,
as_text=as_text,
exports_to_keep=exports_to_keep,
strip_default_attrs=strip_default_attrs)
示例2: parse_feature_columns_from_examples
def parse_feature_columns_from_examples(serialized,
feature_columns,
name=None,
example_names=None):
"""Parses tf.Examples to extract tensors for given feature_columns.
This is a wrapper of 'tf.parse_example'. A typical usage is as follows:
```python
columns_to_tensor = parse_feature_columns_from_examples(
serialized=my_data,
feature_columns=my_features)
# Where my_features are:
# Define features and transformations
country = sparse_column_with_keys(column_name="native_country",
keys=["US", "BRA", ...])
country_emb = embedding_column(sparse_id_column=country, dimension=3,
combiner="sum")
occupation = sparse_column_with_hash_bucket(column_name="occupation",
hash_bucket_size=1000)
occupation_emb = embedding_column(sparse_id_column=occupation, dimension=16,
combiner="sum")
occupation_x_country = crossed_column(columns=[occupation, country],
hash_bucket_size=10000)
age = real_valued_column("age")
age_buckets = bucketized_column(
source_column=age,
boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
my_features = [occupation_emb, age_buckets, country_emb]
```
Args:
serialized: A vector (1-D Tensor) of strings, a batch of binary
serialized `Example` protos.
feature_columns: An iterable containing all the feature columns. All items
should be instances of classes derived from _FeatureColumn.
name: A name for this operation (optional).
example_names: A vector (1-D Tensor) of strings (optional), the names of
the serialized protos in the batch.
Returns:
A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values.
"""
check_feature_columns(feature_columns)
columns_to_tensors = parsing_ops.parse_example(
serialized=serialized,
features=fc.create_feature_spec_for_parsing(feature_columns),
name=name,
example_names=example_names)
transformer = _Transformer(columns_to_tensors)
for column in sorted(set(feature_columns), key=lambda x: x.key):
transformer.transform(column)
return columns_to_tensors
示例3: parse_feature_columns_from_sequence_examples
def parse_feature_columns_from_sequence_examples(
serialized,
context_feature_columns,
sequence_feature_columns,
name=None,
example_name=None):
"""Parses tf.SequenceExamples to extract tensors for given `FeatureColumn`s.
Args:
serialized: A scalar (0-D Tensor) of type string, a single serialized
`SequenceExample` proto.
context_feature_columns: An iterable containing the feature columns for
context features. All items should be instances of classes derived from
`_FeatureColumn`. Can be `None`.
sequence_feature_columns: An iterable containing the feature columns for
sequence features. All items should be instances of classes derived from
`_FeatureColumn`. Can be `None`.
name: A name for this operation (optional).
example_name: A scalar (0-D Tensor) of type string (optional), the names of
the serialized proto.
Returns:
A tuple consisting of (context_features, sequence_features)
* context_features: a dict mapping `FeatureColumns` from
`context_feature_columns` to their parsed `Tensors`/`SparseTensor`s.
* sequence_features: a dict mapping `FeatureColumns` from
`sequence_feature_columns` to their parsed `Tensors`/`SparseTensor`s.
"""
# Sequence example parsing requires a single (scalar) example.
try:
serialized = array_ops.reshape(serialized, [])
except ValueError as e:
raise ValueError(
'serialized must contain as single sequence example. Batching must be '
'done after parsing for sequence examples. Error: {}'.format(e))
if context_feature_columns is None:
context_feature_columns = []
if sequence_feature_columns is None:
sequence_feature_columns = []
check_feature_columns(context_feature_columns)
context_feature_spec = fc.create_feature_spec_for_parsing(
context_feature_columns)
check_feature_columns(sequence_feature_columns)
sequence_feature_spec = fc._create_sequence_feature_spec_for_parsing( # pylint: disable=protected-access
sequence_feature_columns, allow_missing_by_default=False)
return parsing_ops.parse_single_sequence_example(serialized,
context_feature_spec,
sequence_feature_spec,
example_name,
name)
示例4: parse_feature_columns_from_examples
def parse_feature_columns_from_examples(serialized,
feature_columns,
name=None,
example_names=None):
"""Parses tf.Examples to extract tensors for given feature_columns.
This is a wrapper of 'tf.parse_example'. A typical usage is as follows:
```
columns_to_tensor = tf.contrib.layers.parse_feature_columns_from_examples(
serialized=my_data,
feature_columns=my_features)
# Where my_features are:
# Define features and transformations
country = sparse_column_with_keys("country", ["US", "BRA", ...])
country_embedding = embedding_column(query_word, dimension=3, combiner="sum")
query_word = sparse_column_with_hash_bucket(
"query_word", hash_bucket_size=int(1e6))
query_embedding = embedding_column(query_word, dimension=16, combiner="sum")
age_bucket = bucketized_column(real_valued_column("age"),
boundaries=[18+i*5 for i in range(10)])
my_features = [query_embedding, age_bucket, country_embedding]
```
Args:
serialized: A vector (1-D Tensor) of strings, a batch of binary
serialized `Example` protos.
feature_columns: An iterable containing all the feature columns. All items
should be instances of classes derived from _FeatureColumn.
name: A name for this operation (optional).
example_names: A vector (1-D Tensor) of strings (optional), the names of
the serialized protos in the batch.
Returns:
A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values.
"""
columns_to_tensors = parsing_ops.parse_example(
serialized=serialized,
features=fc.create_feature_spec_for_parsing(feature_columns),
name=name,
example_names=example_names)
transformer = _Transformer(columns_to_tensors)
for column in sorted(set(feature_columns), key=lambda x: x.key):
transformer.transform(column)
return columns_to_tensors
示例5: testCreateFeatureSpec_ExperimentalColumns
def testCreateFeatureSpec_ExperimentalColumns(self):
real_valued_col0 = fc._real_valued_var_len_column(
"real_valued_column0", is_sparse=True)
real_valued_col1 = fc._real_valued_var_len_column(
"real_valued_column1", dtype=dtypes.int64, default_value=0,
is_sparse=False)
feature_columns = set([real_valued_col0, real_valued_col1])
expected_config = {
"real_valued_column0": parsing_ops.VarLenFeature(dtype=dtypes.float32),
"real_valued_column1":
parsing_ops.FixedLenSequenceFeature(
[], dtype=dtypes.int64, allow_missing=True, default_value=0),
}
config = fc.create_feature_spec_for_parsing(feature_columns)
self.assertDictEqual(expected_config, config)
示例6: testCreateFeatureSpec_RealValuedColumnWithDefaultValue
def testCreateFeatureSpec_RealValuedColumnWithDefaultValue(self):
real_valued_col1 = fc.real_valued_column(
"real_valued_column1", default_value=2)
real_valued_col2 = fc.real_valued_column(
"real_valued_column2", 5, default_value=4)
real_valued_col3 = fc.real_valued_column(
"real_valued_column3", default_value=[8])
real_valued_col4 = fc.real_valued_column(
"real_valued_column4", 3, default_value=[1, 0, 6])
real_valued_col5 = fc._real_valued_var_len_column(
"real_valued_column5", default_value=2, is_sparse=True)
real_valued_col6 = fc._real_valued_var_len_column(
"real_valued_column6",
dtype=dtypes.int64,
default_value=1,
is_sparse=False)
feature_columns = [
real_valued_col1, real_valued_col2, real_valued_col3, real_valued_col4,
real_valued_col5, real_valued_col6
]
config = fc.create_feature_spec_for_parsing(feature_columns)
self.assertEqual(6, len(config))
self.assertDictEqual(
{
"real_valued_column1":
parsing_ops.FixedLenFeature(
[1], dtype=dtypes.float32, default_value=[2.]),
"real_valued_column2":
parsing_ops.FixedLenFeature(
[5],
dtype=dtypes.float32,
default_value=[4., 4., 4., 4., 4.]),
"real_valued_column3":
parsing_ops.FixedLenFeature(
[1], dtype=dtypes.float32, default_value=[8.]),
"real_valued_column4":
parsing_ops.FixedLenFeature(
[3], dtype=dtypes.float32, default_value=[1., 0., 6.]),
"real_valued_column5":
parsing_ops.VarLenFeature(dtype=dtypes.float32),
"real_valued_column6":
parsing_ops.FixedLenSequenceFeature(
[], dtype=dtypes.int64, allow_missing=True, default_value=1)
},
config)
示例7: _build_estimator_for_resource_export_test
def _build_estimator_for_resource_export_test():
def _input_fn():
iris = base.load_iris()
return {
'feature': constant_op.constant(iris.data, dtype=dtypes.float32)
}, constant_op.constant(
iris.target, shape=[150], dtype=dtypes.int32)
feature_columns = [
feature_column_lib.real_valued_column('feature', dimension=4)
]
def resource_constant_model_fn(unused_features, unused_labels, mode):
"""A model_fn that loads a constant from a resource and serves it."""
assert mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
model_fn.ModeKeys.INFER)
const = constant_op.constant(-1, dtype=dtypes.int64)
table = lookup.MutableHashTable(
dtypes.string, dtypes.int64, const, name='LookupTableModel')
update_global_step = variables.get_global_step().assign_add(1)
if mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL):
key = constant_op.constant(['key'])
value = constant_op.constant([42], dtype=dtypes.int64)
train_op_1 = table.insert(key, value)
training_state = lookup.MutableHashTable(
dtypes.string, dtypes.int64, const, name='LookupTableTrainingState')
training_op_2 = training_state.insert(key, value)
return (const, const,
control_flow_ops.group(train_op_1, training_op_2,
update_global_step))
if mode == model_fn.ModeKeys.INFER:
key = constant_op.constant(['key'])
prediction = table.lookup(key)
return prediction, const, update_global_step
est = estimator.Estimator(model_fn=resource_constant_model_fn)
est.fit(input_fn=_input_fn, steps=1)
feature_spec = feature_column_lib.create_feature_spec_for_parsing(
feature_columns)
serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec)
return est, serving_input_fn
示例8: make_parsing_export_strategy
def make_parsing_export_strategy(feature_columns, exports_to_keep=5):
"""Create an ExportStrategy for use with Experiment, using `FeatureColumn`s.
Creates a SavedModel export that expects to be fed with a single string
Tensor containing serialized tf.Examples. At serving time, incoming
tf.Examples will be parsed according to the provided `FeatureColumn`s.
Args:
feature_columns: An iterable of `FeatureColumn`s representing the features
that must be provided at serving time (excluding labels!).
exports_to_keep: Number of exports to keep. Older exports will be
garbage-collected. Defaults to 5. Set to None to disable garbage
collection.
Returns:
An ExportStrategy that can be passed to the Experiment constructor.
"""
feature_spec = feature_column.create_feature_spec_for_parsing(feature_columns)
serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec)
return make_export_strategy(serving_input_fn, exports_to_keep=exports_to_keep)
示例9: make_parsing_export_strategy
def make_parsing_export_strategy(feature_columns,
default_output_alternative_key=None,
assets_extra=None,
as_text=False,
exports_to_keep=5):
"""Create an ExportStrategy for use with Experiment, using `FeatureColumn`s.
Creates a SavedModel export that expects to be fed with a single string
Tensor containing serialized tf.Examples. At serving time, incoming
tf.Examples will be parsed according to the provided `FeatureColumn`s.
Args:
feature_columns: An iterable of `FeatureColumn`s representing the features
that must be provided at serving time (excluding labels!).
default_output_alternative_key: the name of the head to serve when an
incoming serving request does not explicitly request a specific head.
Must be `None` if the estimator inherits from ${tf.estimator.Estimator}
or for single-headed models.
assets_extra: A dict specifying how to populate the assets.extra directory
within the exported SavedModel. Each key should give the destination
path (including the filename) relative to the assets.extra directory.
The corresponding value gives the full path of the source file to be
copied. For example, the simple case of copying a single file without
renaming it is specified as
`{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
as_text: whether to write the SavedModel proto in text format.
exports_to_keep: Number of exports to keep. Older exports will be
garbage-collected. Defaults to 5. Set to None to disable garbage
collection.
Returns:
An ExportStrategy that can be passed to the Experiment constructor.
"""
feature_spec = feature_column.create_feature_spec_for_parsing(feature_columns)
serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec)
return make_export_strategy(
serving_input_fn,
default_output_alternative_key=default_output_alternative_key,
assets_extra=assets_extra,
as_text=as_text,
exports_to_keep=exports_to_keep)
示例10: _build_estimator_for_export_tests
def _build_estimator_for_export_tests(tmpdir):
def _input_fn():
iris = base.load_iris()
return {
'feature': constant_op.constant(
iris.data, dtype=dtypes.float32)
}, constant_op.constant(
iris.target, shape=[150], dtype=dtypes.int32)
feature_columns = [
feature_column_lib.real_valued_column(
'feature', dimension=4)
]
est = linear.LinearRegressor(feature_columns)
est.fit(input_fn=_input_fn, steps=20)
feature_spec = feature_column_lib.create_feature_spec_for_parsing(
feature_columns)
serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec)
# hack in an op that uses an asset, in order to test asset export.
# this is not actually valid, of course.
def serving_input_fn_with_asset():
features, labels, inputs = serving_input_fn()
vocab_file_name = os.path.join(tmpdir, 'my_vocab_file')
vocab_file = gfile.GFile(vocab_file_name, mode='w')
vocab_file.write(VOCAB_FILE_CONTENT)
vocab_file.close()
hashtable = lookup.HashTable(
lookup.TextFileStringTableInitializer(vocab_file_name), 'x')
features['bogus_lookup'] = hashtable.lookup(
math_ops.to_int64(features['feature']))
return input_fn_utils.InputFnOps(features, labels, inputs)
return est, serving_input_fn_with_asset
示例11: testCreateFeatureSpec_RealValuedColumnWithDefaultValue
def testCreateFeatureSpec_RealValuedColumnWithDefaultValue(self):
real_valued_col1 = fc.real_valued_column(
"real_valued_column1", default_value=2)
real_valued_col2 = fc.real_valued_column(
"real_valued_column2", 5, default_value=4)
real_valued_col3 = fc.real_valued_column(
"real_valued_column3", default_value=[8])
real_valued_col4 = fc.real_valued_column(
"real_valued_column4", 3, default_value=[1, 0, 6])
real_valued_col5 = fc.real_valued_column(
"real_valued_column5", dimension=None, default_value=2)
feature_columns = [
real_valued_col1, real_valued_col2, real_valued_col3, real_valued_col4,
real_valued_col5
]
config = fc.create_feature_spec_for_parsing(feature_columns)
self.assertEqual(5, len(config))
self.assertDictEqual(
{
"real_valued_column1":
parsing_ops.FixedLenFeature(
[1], dtype=dtypes.float32, default_value=[2.]),
"real_valued_column2":
parsing_ops.FixedLenFeature(
[5],
dtype=dtypes.float32,
default_value=[4., 4., 4., 4., 4.]),
"real_valued_column3":
parsing_ops.FixedLenFeature(
[1], dtype=dtypes.float32, default_value=[8.]),
"real_valued_column4":
parsing_ops.FixedLenFeature(
[3], dtype=dtypes.float32, default_value=[1., 0., 6.]),
"real_valued_column5":
parsing_ops.VarLenFeature(dtype=dtypes.float32)
},
config)
示例12: parse_feature_columns_from_examples
def parse_feature_columns_from_examples(serialized,
feature_columns,
name=None,
example_names=None):
"""Parses tf.Examples to extract tensors for given feature_columns.
This is a wrapper of 'tf.parse_example'.
Example:
```python
columns_to_tensor = parse_feature_columns_from_examples(
serialized=my_data,
feature_columns=my_features)
# Where my_features are:
# Define features and transformations
sparse_feature_a = sparse_column_with_keys(
column_name="sparse_feature_a", keys=["AB", "CD", ...])
embedding_feature_a = embedding_column(
sparse_id_column=sparse_feature_a, dimension=3, combiner="sum")
sparse_feature_b = sparse_column_with_hash_bucket(
column_name="sparse_feature_b", hash_bucket_size=1000)
embedding_feature_b = embedding_column(
sparse_id_column=sparse_feature_b, dimension=16, combiner="sum")
crossed_feature_a_x_b = crossed_column(
columns=[sparse_feature_a, sparse_feature_b], hash_bucket_size=10000)
real_feature = real_valued_column("real_feature")
real_feature_buckets = bucketized_column(
source_column=real_feature, boundaries=[...])
my_features = [embedding_feature_b, real_feature_buckets, embedding_feature_a]
```
Args:
serialized: A vector (1-D Tensor) of strings, a batch of binary
serialized `Example` protos.
feature_columns: An iterable containing all the feature columns. All items
should be instances of classes derived from _FeatureColumn.
name: A name for this operation (optional).
example_names: A vector (1-D Tensor) of strings (optional), the names of
the serialized protos in the batch.
Returns:
A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values.
"""
check_feature_columns(feature_columns)
columns_to_tensors = parsing_ops.parse_example(
serialized=serialized,
features=fc.create_feature_spec_for_parsing(feature_columns),
name=name,
example_names=example_names)
transformer = _Transformer(columns_to_tensors)
for column in sorted(set(feature_columns), key=lambda x: x.key):
transformer.transform(column)
return columns_to_tensors
示例13: testCreateFeatureSpec
def testCreateFeatureSpec(self):
sparse_col = fc.sparse_column_with_hash_bucket(
"sparse_column", hash_bucket_size=100)
embedding_col = fc.embedding_column(
fc.sparse_column_with_hash_bucket(
"sparse_column_for_embedding", hash_bucket_size=10),
dimension=4)
sparse_id_col = fc.sparse_column_with_keys("id_column",
["marlo", "omar", "stringer"])
weighted_id_col = fc.weighted_sparse_column(sparse_id_col,
"id_weights_column")
real_valued_col1 = fc.real_valued_column("real_valued_column1")
real_valued_col2 = fc.real_valued_column("real_valued_column2", 5)
real_valued_col3 = fc.real_valued_column(
"real_valued_column3", dimension=None)
bucketized_col1 = fc.bucketized_column(
fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4])
bucketized_col2 = fc.bucketized_column(
fc.real_valued_column("real_valued_column_for_bucketization2", 4),
[0, 4])
a = fc.sparse_column_with_hash_bucket("cross_aaa", hash_bucket_size=100)
b = fc.sparse_column_with_hash_bucket("cross_bbb", hash_bucket_size=100)
cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000)
feature_columns = set([
sparse_col, embedding_col, weighted_id_col, real_valued_col1,
real_valued_col2, real_valued_col3, bucketized_col1, bucketized_col2,
cross_col
])
expected_config = {
"sparse_column":
parsing_ops.VarLenFeature(dtypes.string),
"sparse_column_for_embedding":
parsing_ops.VarLenFeature(dtypes.string),
"id_column":
parsing_ops.VarLenFeature(dtypes.string),
"id_weights_column":
parsing_ops.VarLenFeature(dtypes.float32),
"real_valued_column1":
parsing_ops.FixedLenFeature(
[1], dtype=dtypes.float32),
"real_valued_column2":
parsing_ops.FixedLenFeature(
[5], dtype=dtypes.float32),
"real_valued_column3":
parsing_ops.VarLenFeature(dtype=dtypes.float32),
"real_valued_column_for_bucketization1":
parsing_ops.FixedLenFeature(
[1], dtype=dtypes.float32),
"real_valued_column_for_bucketization2":
parsing_ops.FixedLenFeature(
[4], dtype=dtypes.float32),
"cross_aaa":
parsing_ops.VarLenFeature(dtypes.string),
"cross_bbb":
parsing_ops.VarLenFeature(dtypes.string)
}
config = fc.create_feature_spec_for_parsing(feature_columns)
self.assertDictEqual(expected_config, config)
# Test that the same config is parsed out if we pass a dictionary.
feature_columns_dict = {
str(i): val
for i, val in enumerate(feature_columns)
}
config = fc.create_feature_spec_for_parsing(feature_columns_dict)
self.assertDictEqual(expected_config, config)
示例14: testCreateFeatureSpec
def testCreateFeatureSpec(self):
sparse_col = fc.sparse_column_with_hash_bucket(
"sparse_column", hash_bucket_size=100)
embedding_col = fc.embedding_column(
fc.sparse_column_with_hash_bucket(
"sparse_column_for_embedding", hash_bucket_size=10),
dimension=4)
str_sparse_id_col = fc.sparse_column_with_keys(
"str_id_column", ["marlo", "omar", "stringer"])
int32_sparse_id_col = fc.sparse_column_with_keys(
"int32_id_column", [42, 1, -1000], dtype=dtypes.int32)
int64_sparse_id_col = fc.sparse_column_with_keys(
"int64_id_column", [42, 1, -1000], dtype=dtypes.int64)
weighted_id_col = fc.weighted_sparse_column(str_sparse_id_col,
"str_id_weights_column")
real_valued_col1 = fc.real_valued_column("real_valued_column1")
real_valued_col2 = fc.real_valued_column("real_valued_column2", 5)
real_valued_col3 = fc._real_valued_var_len_column(
"real_valued_column3", is_sparse=True)
real_valued_col4 = fc._real_valued_var_len_column(
"real_valued_column4", dtype=dtypes.int64, default_value=0,
is_sparse=False)
bucketized_col1 = fc.bucketized_column(
fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4])
bucketized_col2 = fc.bucketized_column(
fc.real_valued_column("real_valued_column_for_bucketization2", 4),
[0, 4])
a = fc.sparse_column_with_hash_bucket("cross_aaa", hash_bucket_size=100)
b = fc.sparse_column_with_hash_bucket("cross_bbb", hash_bucket_size=100)
cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000)
one_hot_col = fc.one_hot_column(fc.sparse_column_with_hash_bucket(
"sparse_column_for_one_hot", hash_bucket_size=100))
scattered_embedding_col = fc.scattered_embedding_column(
"scattered_embedding_column", size=100, dimension=10, hash_key=1)
feature_columns = set([
sparse_col, embedding_col, weighted_id_col, int32_sparse_id_col,
int64_sparse_id_col, real_valued_col1, real_valued_col2,
real_valued_col3, real_valued_col4, bucketized_col1, bucketized_col2,
cross_col, one_hot_col, scattered_embedding_col
])
expected_config = {
"sparse_column":
parsing_ops.VarLenFeature(dtypes.string),
"sparse_column_for_embedding":
parsing_ops.VarLenFeature(dtypes.string),
"str_id_column":
parsing_ops.VarLenFeature(dtypes.string),
"int32_id_column":
parsing_ops.VarLenFeature(dtypes.int32),
"int64_id_column":
parsing_ops.VarLenFeature(dtypes.int64),
"str_id_weights_column":
parsing_ops.VarLenFeature(dtypes.float32),
"real_valued_column1":
parsing_ops.FixedLenFeature(
[1], dtype=dtypes.float32),
"real_valued_column2":
parsing_ops.FixedLenFeature(
[5], dtype=dtypes.float32),
"real_valued_column3":
parsing_ops.VarLenFeature(dtype=dtypes.float32),
"real_valued_column4":
parsing_ops.FixedLenSequenceFeature(
[], dtype=dtypes.int64, allow_missing=True, default_value=0),
"real_valued_column_for_bucketization1":
parsing_ops.FixedLenFeature(
[1], dtype=dtypes.float32),
"real_valued_column_for_bucketization2":
parsing_ops.FixedLenFeature(
[4], dtype=dtypes.float32),
"cross_aaa":
parsing_ops.VarLenFeature(dtypes.string),
"cross_bbb":
parsing_ops.VarLenFeature(dtypes.string),
"sparse_column_for_one_hot":
parsing_ops.VarLenFeature(dtypes.string),
"scattered_embedding_column":
parsing_ops.VarLenFeature(dtypes.string),
}
config = fc.create_feature_spec_for_parsing(feature_columns)
self.assertDictEqual(expected_config, config)
# Test that the same config is parsed out if we pass a dictionary.
feature_columns_dict = {
str(i): val
for i, val in enumerate(feature_columns)
}
config = fc.create_feature_spec_for_parsing(feature_columns_dict)
self.assertDictEqual(expected_config, config)
示例15: testCreateFeatureSpec
def testCreateFeatureSpec(self):
sparse_col = fc.sparse_column_with_hash_bucket(
"sparse_column", hash_bucket_size=100)
embedding_col = fc.embedding_column(
fc.sparse_column_with_hash_bucket(
"sparse_column_for_embedding", hash_bucket_size=10),
dimension=4)
str_sparse_id_col = fc.sparse_column_with_keys(
"str_id_column", ["marlo", "omar", "stringer"])
int32_sparse_id_col = fc.sparse_column_with_keys(
"int32_id_column", [42, 1, -1000], dtype=dtypes.int32)
int64_sparse_id_col = fc.sparse_column_with_keys(
"int64_id_column", [42, 1, -1000], dtype=dtypes.int64)
weighted_id_col = fc.weighted_sparse_column(str_sparse_id_col,
"str_id_weights_column")
real_valued_col1 = fc.real_valued_column("real_valued_column1")
real_valued_col2 = fc.real_valued_column("real_valued_column2", 5)
bucketized_col1 = fc.bucketized_column(
fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4])
bucketized_col2 = fc.bucketized_column(
fc.real_valued_column("real_valued_column_for_bucketization2", 4),
[0, 4])
a = fc.sparse_column_with_hash_bucket("cross_aaa", hash_bucket_size=100)
b = fc.sparse_column_with_hash_bucket("cross_bbb", hash_bucket_size=100)
cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000)
one_hot_col = fc.one_hot_column(fc.sparse_column_with_hash_bucket(
"sparse_column_for_one_hot", hash_bucket_size=100))
scattered_embedding_col = fc.scattered_embedding_column(
"scattered_embedding_column", size=100, dimension=10, hash_key=1)
feature_columns = set([
sparse_col, embedding_col, weighted_id_col, int32_sparse_id_col,
int64_sparse_id_col, real_valued_col1, real_valued_col2,
bucketized_col1, bucketized_col2, cross_col, one_hot_col,
scattered_embedding_col
])
expected_config = {
"sparse_column":
parsing_ops.VarLenFeature(dtypes.string),
"sparse_column_for_embedding":
parsing_ops.VarLenFeature(dtypes.string),
"str_id_column":
parsing_ops.VarLenFeature(dtypes.string),
"int32_id_column":
parsing_ops.VarLenFeature(dtypes.int32),
"int64_id_column":
parsing_ops.VarLenFeature(dtypes.int64),
"str_id_weights_column":
parsing_ops.VarLenFeature(dtypes.float32),
"real_valued_column1":
parsing_ops.FixedLenFeature(
[1], dtype=dtypes.float32),
"real_valued_column2":
parsing_ops.FixedLenFeature(
[5], dtype=dtypes.float32),
"real_valued_column_for_bucketization1":
parsing_ops.FixedLenFeature(
[1], dtype=dtypes.float32),
"real_valued_column_for_bucketization2":
parsing_ops.FixedLenFeature(
[4], dtype=dtypes.float32),
"cross_aaa":
parsing_ops.VarLenFeature(dtypes.string),
"cross_bbb":
parsing_ops.VarLenFeature(dtypes.string),
"sparse_column_for_one_hot":
parsing_ops.VarLenFeature(dtypes.string),
"scattered_embedding_column":
parsing_ops.VarLenFeature(dtypes.string),
}
config = fc.create_feature_spec_for_parsing(feature_columns)
self.assertDictEqual(expected_config, config)
# Tests that contrib feature columns work with core library:
config_core = fc_core.make_parse_example_spec(feature_columns)
self.assertDictEqual(expected_config, config_core)
# Test that the same config is parsed out if we pass a dictionary.
feature_columns_dict = {
str(i): val
for i, val in enumerate(feature_columns)
}
config = fc.create_feature_spec_for_parsing(feature_columns_dict)
self.assertDictEqual(expected_config, config)