本文整理汇总了Python中tensorflow.python.feature_column.feature_column.embedding_column函数的典型用法代码示例。如果您正苦于以下问题:Python embedding_column函数的具体用法?Python embedding_column怎么用?Python embedding_column使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了embedding_column函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testFromScratchWithCustomRNNCellFn
def testFromScratchWithCustomRNNCellFn(self):
def train_input_fn():
return {
'tokens':
sparse_tensor.SparseTensor(
values=['the', 'cat', 'sat'],
indices=[[0, 0], [0, 1], [0, 2]],
dense_shape=[1, 3]),
}, [[1]]
col = seq_fc.sequence_categorical_column_with_hash_bucket(
'tokens', hash_bucket_size=10)
embed = fc.embedding_column(col, dimension=2)
input_units = 2
cell_units = [4, 2]
n_classes = 2
def rnn_cell_fn(mode):
del mode # unused
cells = [rnn_cell.BasicRNNCell(num_units=n) for n in cell_units]
return rnn_cell.MultiRNNCell(cells)
est = rnn.RNNClassifier(
sequence_feature_columns=[embed],
rnn_cell_fn=rnn_cell_fn,
n_classes=n_classes,
model_dir=self._model_dir)
# Train for a few steps, and validate final checkpoint.
num_steps = 10
est.train(input_fn=train_input_fn, steps=num_steps)
self._assert_checkpoint(n_classes, input_units, cell_units, num_steps)
示例2: _build_feature_columns
def _build_feature_columns(self):
col = fc.categorical_column_with_identity(
'int_ctx', num_buckets=100)
ctx_cols = [
fc.embedding_column(col, dimension=10),
fc.numeric_column('float_ctx')]
identity_col = sfc.sequence_categorical_column_with_identity(
'int_list', num_buckets=10)
bucket_col = sfc.sequence_categorical_column_with_hash_bucket(
'bytes_list', hash_bucket_size=100)
seq_cols = [
fc.embedding_column(identity_col, dimension=10),
fc.embedding_column(bucket_col, dimension=20)]
return ctx_cols, seq_cols
示例3: _testExampleWeight
def _testExampleWeight(self, n_classes):
def train_input_fn():
return {
'tokens':
sparse_tensor.SparseTensor(
values=['the', 'cat', 'sat', 'dog', 'barked'],
indices=[[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]],
dense_shape=[2, 3]),
'w': [[1], [2]],
}, [[1], [0]]
col = seq_fc.sequence_categorical_column_with_hash_bucket(
'tokens', hash_bucket_size=10)
embed = fc.embedding_column(col, dimension=2)
input_units = 2
cell_units = [4, 2]
est = rnn.RNNClassifier(
num_units=cell_units,
sequence_feature_columns=[embed],
n_classes=n_classes,
weight_column='w',
model_dir=self._model_dir)
# Train for a few steps, and validate final checkpoint.
num_steps = 10
est.train(input_fn=train_input_fn, steps=num_steps)
self._assert_checkpoint(n_classes, input_units, cell_units, num_steps)
示例4: test_dnn_classifier
def test_dnn_classifier(self):
embedding = feature_column_lib.embedding_column(
feature_column_lib.categorical_column_with_vocabulary_list(
'wire_cast', ['kima', 'omar', 'stringer']), 8)
dnn = estimator_lib.DNNClassifier(
feature_columns=[embedding], hidden_units=[3, 1])
def train_input_fn():
return dataset_ops.Dataset.from_tensors(({
'wire_cast': [['omar'], ['kima']]
}, [[0], [1]])).repeat(3)
def eval_input_fn():
return dataset_ops.Dataset.from_tensors(({
'wire_cast': [['stringer'], ['kima']]
}, [[0], [1]])).repeat(2)
evaluator = hooks_lib.InMemoryEvaluatorHook(
dnn, eval_input_fn, name='in-memory')
dnn.train(train_input_fn, hooks=[evaluator])
self.assertTrue(os.path.isdir(dnn.eval_dir('in-memory')))
step_keyword_to_value = summary_step_keyword_to_value_mapping(
dnn.eval_dir('in-memory'))
final_metrics = dnn.evaluate(eval_input_fn)
step = final_metrics[ops.GraphKeys.GLOBAL_STEP]
for summary_tag in final_metrics:
if summary_tag == ops.GraphKeys.GLOBAL_STEP:
continue
self.assertEqual(final_metrics[summary_tag],
step_keyword_to_value[step][summary_tag])
示例5: test_sequence_length_with_empty_rows
def test_sequence_length_with_empty_rows(self):
"""Tests _sequence_length when some examples do not have ids."""
vocabulary_size = 3
sparse_input = sparse_tensor.SparseTensorValue(
# example 0, ids []
# example 1, ids [2]
# example 2, ids [0, 1]
# example 3, ids []
# example 4, ids [1]
# example 5, ids []
indices=((1, 0), (2, 0), (2, 1), (4, 0)),
values=(2, 0, 1, 1),
dense_shape=(6, 2))
expected_sequence_length = [0, 1, 2, 0, 1, 0]
categorical_column = sfc.sequence_categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
embedding_column = fc.embedding_column(
categorical_column, dimension=2)
_, sequence_length = embedding_column._get_sequence_dense_tensor(
_LazyBuilder({'aaa': sparse_input}))
with monitored_session.MonitoredSession() as sess:
self.assertAllEqual(
expected_sequence_length, sequence_length.eval(session=sess))
示例6: testWarmStartInputLayerEmbeddingColumn
def testWarmStartInputLayerEmbeddingColumn(self):
# Create old and new vocabs for embedding column "sc_vocab".
prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
"old_vocab")
new_vocab_path = self._write_vocab(
["orange", "guava", "banana", "apple", "raspberry", "blueberry"],
"new_vocab")
# Save checkpoint from which to warm-start.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
_ = variable_scope.get_variable(
"input_layer/sc_vocab_embedding/embedding_weights",
initializer=[[0.5, 0.4], [1., 1.1], [2., 2.2], [3., 3.3]])
self._write_checkpoint(sess)
def _partitioner(shape, dtype): # pylint:disable=unused-argument
# Partition each var into 2 equal slices.
partitions = [1] * len(shape)
partitions[0] = min(2, shape[0].value)
return partitions
# Create feature columns.
sc_vocab = fc.categorical_column_with_vocabulary_file(
"sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6)
emb_vocab = fc.embedding_column(
categorical_column=sc_vocab,
dimension=2,
# Can't use constant_initializer with load_and_remap. In practice,
# use a truncated normal initializer.
initializer=init_ops.random_uniform_initializer(
minval=0.42, maxval=0.42))
all_deep_cols = [emb_vocab]
# New graph, new session with warmstarting.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
cols_to_vars = {}
with variable_scope.variable_scope("", partitioner=_partitioner):
# Create the variables.
fc.input_layer(
features=self._create_dummy_inputs(),
feature_columns=all_deep_cols,
cols_to_vars=cols_to_vars)
ws_settings = ws_util._WarmStartSettings(
self.get_temp_dir(), col_to_prev_vocab={
emb_vocab: prev_vocab_path
})
ws_util._warmstart_input_layer(cols_to_vars, ws_settings)
sess.run(variables.global_variables_initializer())
# Verify weights were correctly warmstarted. Var corresponding to
# emb_vocab should be correctly warmstarted after vocab remapping.
# Missing values are filled in with the EmbeddingColumn's initializer.
self._assert_cols_to_vars(
cols_to_vars, {
emb_vocab: [
np.array([[3., 3.3], [2., 2.2], [1., 1.1]]),
np.array([[0.5, 0.4], [0.42, 0.42], [0.42, 0.42]])
]
}, sess)
示例7: _sequence_embedding_column
def _sequence_embedding_column(
categorical_column, dimension, initializer=None, ckpt_to_load_from=None,
tensor_name_in_ckpt=None, max_norm=None, trainable=True):
"""Returns a feature column that represents sequences of embeddings.
Use this to convert sequence categorical data into dense representation for
input to sequence NN, such as RNN.
Example:
```python
watches = sequence_categorical_column_with_identity(
'watches', num_buckets=1000)
watches_embedding = _sequence_embedding_column(watches, dimension=10)
columns = [watches]
features = tf.parse_example(..., features=make_parse_example_spec(columns))
input_layer, sequence_length = sequence_input_layer(features, columns)
rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
outputs, state = tf.nn.dynamic_rnn(
rnn_cell, inputs=input_layer, sequence_length=sequence_length)
```
Args:
categorical_column: A `_SequenceCategoricalColumn` created with a
`sequence_cateogrical_column_with_*` function.
dimension: Integer dimension of the embedding.
initializer: Initializer function used to initialize the embeddings.
ckpt_to_load_from: String representing checkpoint name/pattern from which to
restore column weights. Required if `tensor_name_in_ckpt` is not `None`.
tensor_name_in_ckpt: Name of the `Tensor` in `ckpt_to_load_from` from
which to restore the column weights. Required if `ckpt_to_load_from` is
not `None`.
max_norm: If not `None`, embedding values are l2-normalized to this value.
trainable: Whether or not the embedding is trainable. Default is True.
Returns:
A `_SequenceCategoricalToDenseColumn`.
Raises:
ValueError: If `categorical_column` is not the right type.
"""
if not isinstance(categorical_column, _SequenceCategoricalColumn):
raise ValueError(
'categorical_column must be of type _SequenceCategoricalColumn. '
'Given (type {}): {}'.format(
type(categorical_column), categorical_column))
return _SequenceCategoricalToDenseColumn(
fc.embedding_column(
categorical_column,
dimension=dimension,
initializer=initializer,
ckpt_to_load_from=ckpt_to_load_from,
tensor_name_in_ckpt=tensor_name_in_ckpt,
max_norm=max_norm,
trainable=trainable))
示例8: testParseExampleInputFn
def testParseExampleInputFn(self):
"""Tests complete flow with input_fn constructed from parse_example."""
n_classes = 3
batch_size = 10
words = [b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept']
_, examples_file = tempfile.mkstemp()
writer = python_io.TFRecordWriter(examples_file)
for _ in range(batch_size):
sequence_length = random.randint(1, len(words))
sentence = random.sample(words, sequence_length)
label = random.randint(0, n_classes - 1)
example = example_pb2.Example(features=feature_pb2.Features(
feature={
'tokens':
feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
value=sentence)),
'label':
feature_pb2.Feature(int64_list=feature_pb2.Int64List(
value=[label])),
}))
writer.write(example.SerializeToString())
writer.close()
col = seq_fc.sequence_categorical_column_with_hash_bucket(
'tokens', hash_bucket_size=10)
embed = fc.embedding_column(col, dimension=2)
feature_columns = [embed]
feature_spec = parsing_utils.classifier_parse_example_spec(
feature_columns,
label_key='label',
label_dtype=dtypes.int64)
def _train_input_fn():
dataset = readers.make_batched_features_dataset(
examples_file, batch_size, feature_spec)
return dataset.map(lambda features: (features, features.pop('label')))
def _eval_input_fn():
dataset = readers.make_batched_features_dataset(
examples_file, batch_size, feature_spec, num_epochs=1)
return dataset.map(lambda features: (features, features.pop('label')))
def _predict_input_fn():
dataset = readers.make_batched_features_dataset(
examples_file, batch_size, feature_spec, num_epochs=1)
def features_fn(features):
features.pop('label')
return features
return dataset.map(features_fn)
self._test_complete_flow(
feature_columns=feature_columns,
train_input_fn=_train_input_fn,
eval_input_fn=_eval_input_fn,
predict_input_fn=_predict_input_fn,
n_classes=n_classes,
batch_size=batch_size)
示例9: test_warm_starting_selective_variables
def test_warm_starting_selective_variables(self):
"""Tests selecting variables to warm-start."""
age = feature_column.numeric_column('age')
city = feature_column.embedding_column(
feature_column.categorical_column_with_vocabulary_list(
'city', vocabulary_list=['Mountain View', 'Palo Alto']),
dimension=5)
# Create a DNNLinearCombinedClassifier and train to save a checkpoint.
dnn_lc_classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
linear_feature_columns=[age],
dnn_feature_columns=[city],
dnn_hidden_units=[256, 128],
model_dir=self._ckpt_and_vocab_dir,
n_classes=4,
linear_optimizer='SGD',
dnn_optimizer='SGD')
dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1)
# Create a second DNNLinearCombinedClassifier, warm-started from the first.
# Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't
# have accumulator values that change).
warm_started_dnn_lc_classifier = (
dnn_linear_combined.DNNLinearCombinedClassifier(
linear_feature_columns=[age],
dnn_feature_columns=[city],
dnn_hidden_units=[256, 128],
n_classes=4,
linear_optimizer=gradient_descent.GradientDescentOptimizer(
learning_rate=0.0),
dnn_optimizer=gradient_descent.GradientDescentOptimizer(
learning_rate=0.0),
# The provided regular expression will only warm-start the deep
# portion of the model.
warm_start_from=estimator.WarmStartSettings(
ckpt_to_initialize_from=dnn_lc_classifier.model_dir,
vars_to_warm_start='.*(dnn).*')))
warm_started_dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1)
for variable_name in warm_started_dnn_lc_classifier.get_variable_names():
if 'dnn' in variable_name:
self.assertAllClose(
dnn_lc_classifier.get_variable_value(variable_name),
warm_started_dnn_lc_classifier.get_variable_value(variable_name))
elif 'linear' in variable_name:
linear_values = warm_started_dnn_lc_classifier.get_variable_value(
variable_name)
# Since they're not warm-started, the linear weights will be
# zero-initialized.
self.assertAllClose(np.zeros_like(linear_values), linear_values)
示例10: test_get_sequence_dense_tensor
def test_get_sequence_dense_tensor(self):
vocabulary_size = 3
sparse_input = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids [0, 1]
# example 2, ids []
# example 3, ids [1]
indices=((0, 0), (1, 0), (1, 1), (3, 0)),
values=(2, 0, 1, 1),
dense_shape=(4, 2))
embedding_dimension = 2
embedding_values = (
(1., 2.), # id 0
(3., 5.), # id 1
(7., 11.) # id 2
)
def _initializer(shape, dtype, partition_info):
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
self.assertEqual(dtypes.float32, dtype)
self.assertIsNone(partition_info)
return embedding_values
expected_lookups = [
# example 0, ids [2]
[[7., 11.], [0., 0.]],
# example 1, ids [0, 1]
[[1., 2.], [3., 5.]],
# example 2, ids []
[[0., 0.], [0., 0.]],
# example 3, ids [1]
[[3., 5.], [0., 0.]],
]
categorical_column = sfc.sequence_categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
embedding_column = fc.embedding_column(
categorical_column, dimension=embedding_dimension,
initializer=_initializer)
embedding_lookup, _ = embedding_column._get_sequence_dense_tensor(
_LazyBuilder({'aaa': sparse_input}))
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(
('embedding_weights:0',), tuple([v.name for v in global_vars]))
with monitored_session.MonitoredSession() as sess:
self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess))
self.assertAllEqual(expected_lookups, embedding_lookup.eval(session=sess))
示例11: _sequence_embedding_column
def _sequence_embedding_column(
categorical_column, dimension, initializer=None, ckpt_to_load_from=None,
tensor_name_in_ckpt=None, max_norm=None, trainable=True):
if not isinstance(categorical_column, _SequenceCategoricalColumn):
raise ValueError(
'categorical_column must be of type _SequenceCategoricalColumn. '
'Given (type {}): {}'.format(
type(categorical_column), categorical_column))
return _SequenceEmbeddingColumn(
fc.embedding_column(
categorical_column,
dimension=dimension,
initializer=initializer,
ckpt_to_load_from=ckpt_to_load_from,
tensor_name_in_ckpt=tensor_name_in_ckpt,
max_norm=max_norm,
trainable=trainable))
示例12: testNumpyInputFn
def testNumpyInputFn(self):
"""Tests complete flow with numpy_input_fn."""
n_classes = 3
batch_size = 10
words = ['dog', 'cat', 'bird', 'the', 'a', 'sat', 'flew', 'slept']
# Numpy only supports dense input, so all examples will have same length.
# TODO(b/73160931): Update test when support for prepadded data exists.
sequence_length = 3
features = []
for _ in range(batch_size):
sentence = random.sample(words, sequence_length)
features.append(sentence)
x_data = np.array(features)
y_data = np.random.randint(n_classes, size=batch_size)
train_input_fn = numpy_io.numpy_input_fn(
x={'tokens': x_data},
y=y_data,
batch_size=batch_size,
num_epochs=None,
shuffle=True)
eval_input_fn = numpy_io.numpy_input_fn(
x={'tokens': x_data},
y=y_data,
batch_size=batch_size,
shuffle=False)
predict_input_fn = numpy_io.numpy_input_fn(
x={'tokens': x_data},
batch_size=batch_size,
shuffle=False)
col = seq_fc.sequence_categorical_column_with_hash_bucket(
'tokens', hash_bucket_size=10)
embed = fc.embedding_column(col, dimension=2)
feature_columns = [embed]
self._test_complete_flow(
feature_columns=feature_columns,
train_input_fn=train_input_fn,
eval_input_fn=eval_input_fn,
predict_input_fn=predict_input_fn,
n_classes=n_classes,
batch_size=batch_size)
示例13: _test_complete_flow
def _test_complete_flow(
self, train_input_fn, eval_input_fn, predict_input_fn, n_classes,
batch_size):
col = seq_fc.sequence_categorical_column_with_hash_bucket(
'tokens', hash_bucket_size=10)
embed = fc.embedding_column(col, dimension=2)
feature_columns = [embed]
cell_units = [4, 2]
est = rnn.RNNClassifier(
num_units=cell_units,
sequence_feature_columns=feature_columns,
n_classes=n_classes,
model_dir=self._model_dir)
# TRAIN
num_steps = 10
est.train(train_input_fn, steps=num_steps)
# EVALUATE
scores = est.evaluate(eval_input_fn)
self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
self.assertIn('loss', six.iterkeys(scores))
# PREDICT
predicted_proba = np.array([
x[prediction_keys.PredictionKeys.PROBABILITIES]
for x in est.predict(predict_input_fn)
])
self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
# EXPORT
feature_spec = {
'tokens': parsing_ops.VarLenFeature(dtypes.string),
'label': parsing_ops.FixedLenFeature([1], dtypes.int64),
}
serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
feature_spec)
export_dir = est.export_savedmodel(tempfile.mkdtemp(),
serving_input_receiver_fn)
self.assertTrue(gfile.Exists(export_dir))
示例14: testConflictingRNNCellFn
def testConflictingRNNCellFn(self):
col = seq_fc.sequence_categorical_column_with_hash_bucket(
'tokens', hash_bucket_size=10)
embed = fc.embedding_column(col, dimension=2)
cell_units = [4, 2]
with self.assertRaisesRegexp(
ValueError,
'num_units and cell_type must not be specified when using rnn_cell_fn'):
rnn.RNNClassifier(
sequence_feature_columns=[embed],
rnn_cell_fn=lambda x: x,
num_units=cell_units)
with self.assertRaisesRegexp(
ValueError,
'num_units and cell_type must not be specified when using rnn_cell_fn'):
rnn.RNNClassifier(
sequence_feature_columns=[embed],
rnn_cell_fn=lambda x: x,
cell_type='lstm')
示例15: test_sequence_length
def test_sequence_length(self):
vocabulary_size = 3
sparse_input = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids [0, 1]
indices=((0, 0), (1, 0), (1, 1)),
values=(2, 0, 1),
dense_shape=(2, 2))
expected_sequence_length = [1, 2]
categorical_column = sfc.sequence_categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
embedding_column = fc.embedding_column(
categorical_column, dimension=2)
_, sequence_length = embedding_column._get_sequence_dense_tensor(
_LazyBuilder({'aaa': sparse_input}))
with monitored_session.MonitoredSession() as sess:
sequence_length = sess.run(sequence_length)
self.assertAllEqual(expected_sequence_length, sequence_length)
self.assertEqual(np.int64, sequence_length.dtype)