本文整理汇总了Python中tensorflow.python.feature_column.feature_column.categorical_column_with_vocabulary_file函数的典型用法代码示例。如果您正苦于以下问题:Python categorical_column_with_vocabulary_file函数的具体用法?Python categorical_column_with_vocabulary_file怎么用?Python categorical_column_with_vocabulary_file使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了categorical_column_with_vocabulary_file函数的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testWarmStartMoreSettingsNoPartitioning
def testWarmStartMoreSettingsNoPartitioning(self):
# Create old and new vocabs for sparse column "sc_vocab".
prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
"old_vocab")
new_vocab_path = self._write_vocab(
["orange", "guava", "banana", "apple", "raspberry",
"blueberry"], "new_vocab")
# Create feature columns.
sc_hash = fc.categorical_column_with_hash_bucket(
"sc_hash", hash_bucket_size=15)
sc_keys = fc.categorical_column_with_vocabulary_list(
"sc_keys", vocabulary_list=["a", "b", "c", "e"])
sc_vocab = fc.categorical_column_with_vocabulary_file(
"sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6)
all_linear_cols = [sc_hash, sc_keys, sc_vocab]
# Save checkpoint from which to warm-start.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
variable_scope.get_variable(
"linear_model/sc_hash/weights", shape=[15, 1], initializer=norms())
sc_keys_weights = variable_scope.get_variable(
"some_other_name", shape=[4, 1], initializer=rand())
variable_scope.get_variable(
"linear_model/sc_vocab/weights",
initializer=[[0.5], [1.], [2.], [3.]])
self._write_checkpoint(sess)
prev_keys_val = sess.run(sc_keys_weights)
# New graph, new session with warmstarting.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
cols_to_vars = self._create_linear_model(all_linear_cols,
partitioner=None)
vocab_info = ws_util._VocabInfo(
new_vocab=sc_vocab.vocabulary_file,
new_vocab_size=sc_vocab.vocabulary_size,
num_oov_buckets=sc_vocab.num_oov_buckets,
old_vocab=prev_vocab_path
)
ws_settings = ws_util._WarmStartSettings(
self.get_temp_dir(),
vars_to_warmstart=".*(sc_keys|sc_vocab).*",
var_name_to_vocab_info={
ws_util._infer_var_name(cols_to_vars[sc_vocab]): vocab_info
},
var_name_to_prev_var_name={
ws_util._infer_var_name(cols_to_vars[sc_keys]):
"some_other_name"
})
ws_util._warmstart(ws_settings)
sess.run(variables.global_variables_initializer())
# Verify weights were correctly warmstarted. Var corresponding to
# sc_hash should not be warm-started. Var corresponding to sc_vocab
# should be correctly warmstarted after vocab remapping.
self._assert_cols_to_vars(cols_to_vars, {
sc_keys: [prev_keys_val],
sc_hash: [np.zeros([15, 1])],
sc_vocab: [np.array([[3.], [2.], [1.], [0.5], [0.], [0.]])]
}, sess)
示例2: sequence_categorical_column_with_vocabulary_file
def sequence_categorical_column_with_vocabulary_file(
key, vocabulary_file, vocabulary_size=None, num_oov_buckets=0,
default_value=None, dtype=dtypes.string):
"""A sequence of categorical terms where ids use a vocabulary file.
Pass this to `embedding_column` or `indicator_column` to convert sequence
categorical data into dense representation for input to sequence NN, such as
RNN.
Example:
```python
states = sequence_categorical_column_with_vocabulary_file(
key='states', vocabulary_file='/us/states.txt', vocabulary_size=50,
num_oov_buckets=5)
states_embedding = embedding_column(states, dimension=10)
columns = [states_embedding]
features = tf.parse_example(..., features=make_parse_example_spec(columns))
input_layer, sequence_length = sequence_input_layer(features, columns)
rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
outputs, state = tf.nn.dynamic_rnn(
rnn_cell, inputs=input_layer, sequence_length=sequence_length)
```
Args:
key: A unique string identifying the input feature.
vocabulary_file: The vocabulary file name.
vocabulary_size: Number of the elements in the vocabulary. This must be no
greater than length of `vocabulary_file`, if less than length, later
values are ignored. If None, it is set to the length of `vocabulary_file`.
num_oov_buckets: Non-negative integer, the number of out-of-vocabulary
buckets. All out-of-vocabulary inputs will be assigned IDs in the range
`[vocabulary_size, vocabulary_size+num_oov_buckets)` based on a hash of
the input value. A positive `num_oov_buckets` can not be specified with
`default_value`.
default_value: The integer ID value to return for out-of-vocabulary feature
values, defaults to `-1`. This can not be specified with a positive
`num_oov_buckets`.
dtype: The type of features. Only string and integer types are supported.
Returns:
A `_SequenceCategoricalColumn`.
Raises:
ValueError: `vocabulary_file` is missing or cannot be opened.
ValueError: `vocabulary_size` is missing or < 1.
ValueError: `num_oov_buckets` is a negative integer.
ValueError: `num_oov_buckets` and `default_value` are both specified.
ValueError: `dtype` is neither string nor integer.
"""
return fc_old._SequenceCategoricalColumn(
fc_old.categorical_column_with_vocabulary_file(
key=key,
vocabulary_file=vocabulary_file,
vocabulary_size=vocabulary_size,
num_oov_buckets=num_oov_buckets,
default_value=default_value,
dtype=dtype))
示例3: testWarmStart_SparseColumnVocabulary
def testWarmStart_SparseColumnVocabulary(self):
# Create vocab for sparse column "sc_vocab".
vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
"vocab")
# Create feature column.
sc_vocab = fc.categorical_column_with_vocabulary_file(
"sc_vocab", vocabulary_file=vocab_path, vocabulary_size=4)
# Save checkpoint from which to warm-start.
_, prev_vocab_val = self._create_prev_run_var(
"linear_model/sc_vocab/weights", shape=[4, 1], initializer=ones())
partitioner = lambda shape, dtype: [1] * len(shape)
# New graph, new session WITHOUT warm-starting.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
cols_to_vars = self._create_linear_model([sc_vocab], partitioner)
sess.run(variables.global_variables_initializer())
# Without warm-starting, the weights should be initialized using default
# initializer (which is init_ops.zeros_initializer).
self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [np.zeros([4, 1])]},
sess)
# New graph, new session with warm-starting.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
cols_to_vars = self._create_linear_model([sc_vocab], partitioner)
# Since old vocab is not explicitly set in WarmStartSettings, the old
# vocab is assumed to be same as new vocab.
ws_util.warm_start(
self.get_temp_dir(), vars_to_warm_start=".*sc_vocab.*")
sess.run(variables.global_variables_initializer())
# Verify weights were correctly warm-started.
self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [prev_vocab_val]},
sess)
示例4: testWarmStartInputLayerEmbeddingColumn
def testWarmStartInputLayerEmbeddingColumn(self):
# Create old and new vocabs for embedding column "sc_vocab".
prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
"old_vocab")
new_vocab_path = self._write_vocab(
["orange", "guava", "banana", "apple", "raspberry", "blueberry"],
"new_vocab")
# Save checkpoint from which to warm-start.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
_ = variable_scope.get_variable(
"input_layer/sc_vocab_embedding/embedding_weights",
initializer=[[0.5, 0.4], [1., 1.1], [2., 2.2], [3., 3.3]])
self._write_checkpoint(sess)
def _partitioner(shape, dtype): # pylint:disable=unused-argument
# Partition each var into 2 equal slices.
partitions = [1] * len(shape)
partitions[0] = min(2, shape[0].value)
return partitions
# Create feature columns.
sc_vocab = fc.categorical_column_with_vocabulary_file(
"sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6)
emb_vocab = fc.embedding_column(
categorical_column=sc_vocab,
dimension=2,
# Can't use constant_initializer with load_and_remap. In practice,
# use a truncated normal initializer.
initializer=init_ops.random_uniform_initializer(
minval=0.42, maxval=0.42))
all_deep_cols = [emb_vocab]
# New graph, new session with warmstarting.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
cols_to_vars = {}
with variable_scope.variable_scope("", partitioner=_partitioner):
# Create the variables.
fc.input_layer(
features=self._create_dummy_inputs(),
feature_columns=all_deep_cols,
cols_to_vars=cols_to_vars)
ws_settings = ws_util._WarmStartSettings(
self.get_temp_dir(), col_to_prev_vocab={
emb_vocab: prev_vocab_path
})
ws_util._warmstart_input_layer(cols_to_vars, ws_settings)
sess.run(variables.global_variables_initializer())
# Verify weights were correctly warmstarted. Var corresponding to
# emb_vocab should be correctly warmstarted after vocab remapping.
# Missing values are filled in with the EmbeddingColumn's initializer.
self._assert_cols_to_vars(
cols_to_vars, {
emb_vocab: [
np.array([[3., 3.3], [2., 2.2], [1., 1.1]]),
np.array([[0.5, 0.4], [0.42, 0.42], [0.42, 0.42]])
]
}, sess)
示例5: testWarmStartInputLayerMoreSettings
def testWarmStartInputLayerMoreSettings(self):
# Create old and new vocabs for sparse column "sc_vocab".
prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
"old_vocab")
new_vocab_path = self._write_vocab(
["orange", "guava", "banana", "apple", "raspberry",
"blueberry"], "new_vocab")
# Create feature columns.
sc_hash = fc.categorical_column_with_hash_bucket(
"sc_hash", hash_bucket_size=15)
sc_keys = fc.categorical_column_with_vocabulary_list(
"sc_keys", vocabulary_list=["a", "b", "c", "e"])
sc_vocab = fc.categorical_column_with_vocabulary_file(
"sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6)
all_linear_cols = [sc_hash, sc_keys, sc_vocab]
# Save checkpoint from which to warm-start.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
_ = variable_scope.get_variable(
"linear_model/sc_hash/weights", shape=[15, 1], initializer=norms())
sc_keys_weights = variable_scope.get_variable(
"some_other_name", shape=[4, 1], initializer=rand())
_ = variable_scope.get_variable(
"linear_model/sc_vocab/weights",
initializer=[[0.5], [1.], [2.], [3.]])
self._write_checkpoint(sess)
prev_keys_val = sess.run(sc_keys_weights)
def _partitioner(shape, dtype): # pylint:disable=unused-argument
# Partition each var into 2 equal slices.
partitions = [1] * len(shape)
partitions[0] = min(2, shape[0].value)
return partitions
# New graph, new session with warmstarting.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
cols_to_vars = self._create_linear_model(all_linear_cols, _partitioner)
ws_settings = ws_util._WarmStartSettings(
self.get_temp_dir(),
col_to_prev_vocab={sc_vocab: prev_vocab_path},
col_to_prev_tensor={sc_keys: "some_other_name"},
exclude_columns=[sc_hash])
ws_util._warmstart_input_layer(cols_to_vars, ws_settings)
sess.run(variables.global_variables_initializer())
# Verify weights were correctly warmstarted. Var corresponding to
# sc_hash should not be warm-started. Var corresponding to sc_vocab
# should be correctly warmstarted after vocab remapping.
self._assert_cols_to_vars(cols_to_vars, {
sc_keys:
np.split(prev_keys_val, 2),
sc_hash: [np.zeros([8, 1]), np.zeros([7, 1])],
sc_vocab: [
np.array([[3.], [2.], [1.]]),
np.array([[0.5], [0.], [0.]])
]
}, sess)
示例6: testWarmStart_SparseColumnVocabularyConstrainedVocabSizes
def testWarmStart_SparseColumnVocabularyConstrainedVocabSizes(self):
# Create old vocabulary, and use a size smaller than the total number of
# entries.
old_vocab_path = self._write_vocab(["apple", "guava", "banana"],
"old_vocab")
old_vocab_size = 2 # ['apple', 'guava']
# Create new vocab for sparse column "sc_vocab".
current_vocab_path = self._write_vocab(
["apple", "banana", "guava", "orange"], "current_vocab")
# Create feature column. Only use 2 of the actual entries, resulting in
# ['apple', 'banana'] for the new vocabulary.
sc_vocab = fc.categorical_column_with_vocabulary_file(
"sc_vocab", vocabulary_file=current_vocab_path, vocabulary_size=2)
# Save checkpoint from which to warm-start.
self._create_prev_run_var(
"linear_model/sc_vocab/weights", shape=[2, 1], initializer=ones())
partitioner = lambda shape, dtype: [1] * len(shape)
# New graph, new session WITHOUT warmstarting.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
cols_to_vars = self._create_linear_model([sc_vocab], partitioner)
sess.run(variables.global_variables_initializer())
# Without warmstarting, the weights should be initialized using default
# initializer (which is init_ops.zeros_initializer).
self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [np.zeros([2, 1])]},
sess)
# New graph, new session with warmstarting.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
cols_to_vars = self._create_linear_model([sc_vocab], partitioner)
vocab_info = ws_util._VocabInfo(
new_vocab=sc_vocab.vocabulary_file,
new_vocab_size=sc_vocab.vocabulary_size,
num_oov_buckets=sc_vocab.num_oov_buckets,
old_vocab=old_vocab_path,
old_vocab_size=old_vocab_size
)
warmstart_settings = ws_util._WarmStartSettings(
ckpt_to_initialize_from=self.get_temp_dir(),
vars_to_warmstart=".*sc_vocab.*",
var_name_to_vocab_info={
"linear_model/sc_vocab/weights": vocab_info
})
ws_util._warmstart(warmstart_settings)
sess.run(variables.global_variables_initializer())
# Verify weights were correctly warmstarted. 'banana' isn't in the
# first two entries of the old vocabulary, so it's newly initialized.
self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [[[1], [0]]]}, sess)
示例7: testWarmStartVarsToWarmstartIsNone
def testWarmStartVarsToWarmstartIsNone(self):
# Create old and new vocabs for sparse column "sc_vocab".
prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
"old_vocab")
new_vocab_path = self._write_vocab(
["orange", "guava", "banana", "apple", "raspberry",
"blueberry"], "new_vocab")
# Create feature columns.
sc_hash = fc.categorical_column_with_hash_bucket(
"sc_hash", hash_bucket_size=15)
sc_keys = fc.categorical_column_with_vocabulary_list(
"sc_keys", vocabulary_list=["a", "b", "c", "e"])
sc_vocab = fc.categorical_column_with_vocabulary_file(
"sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6)
all_linear_cols = [sc_hash, sc_keys, sc_vocab]
# Save checkpoint from which to warm-start.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
variable_scope.get_variable(
"linear_model/sc_hash/weights", shape=[15, 1], initializer=norms())
variable_scope.get_variable(
"some_other_name", shape=[4, 1], initializer=rand())
variable_scope.get_variable(
"linear_model/sc_vocab/weights",
initializer=[[0.5], [1.], [2.], [3.]])
self._write_checkpoint(sess)
def _partitioner(shape, dtype): # pylint:disable=unused-argument
# Partition each var into 2 equal slices.
partitions = [1] * len(shape)
partitions[0] = min(2, shape[0].value)
return partitions
# New graph, new session with warm-starting.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
cols_to_vars = self._create_linear_model(all_linear_cols, _partitioner)
vocab_info = ws_util.VocabInfo(
new_vocab=sc_vocab.vocabulary_file,
new_vocab_size=sc_vocab.vocabulary_size,
num_oov_buckets=sc_vocab.num_oov_buckets,
old_vocab=prev_vocab_path)
ws_settings = ws_util.WarmStartSettings(
self.get_temp_dir(),
# The special value of None here will ensure that only the variable
# specified in var_name_to_vocab_info (sc_vocab embedding) is
# warm-started.
vars_to_warm_start=None,
var_name_to_vocab_info={
ws_util._infer_var_name(cols_to_vars[sc_vocab]): vocab_info
},
# Even though this is provided, the None value for
# vars_to_warm_start overrides the logic, and this will not be
# warm-started.
var_name_to_prev_var_name={
ws_util._infer_var_name(cols_to_vars[sc_keys]):
"some_other_name"
})
ws_util._warm_start(ws_settings)
sess.run(variables.global_variables_initializer())
# Verify weights were correctly warm-started. Var corresponding to
# sc_vocab should be correctly warm-started after vocab remapping,
# and neither of the other two should be warm-started..
self._assert_cols_to_vars(cols_to_vars, {
sc_keys: [np.zeros([2, 1]), np.zeros([2, 1])],
sc_hash: [np.zeros([8, 1]), np.zeros([7, 1])],
sc_vocab: [
np.array([[3.], [2.], [1.]]),
np.array([[0.5], [0.], [0.]])
]
}, sess)
示例8: testWarmStart_MultipleCols
def testWarmStart_MultipleCols(self):
# Create vocab for sparse column "sc_vocab".
vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
"vocab")
# Create feature columns.
sc_int = fc.categorical_column_with_identity("sc_int", num_buckets=10)
sc_hash = fc.categorical_column_with_hash_bucket(
"sc_hash", hash_bucket_size=15)
sc_keys = fc.categorical_column_with_vocabulary_list(
"sc_keys", vocabulary_list=["a", "b", "c", "e"])
sc_vocab = fc.categorical_column_with_vocabulary_file(
"sc_vocab", vocabulary_file=vocab_path, vocabulary_size=4)
real = fc.numeric_column("real")
real_bucket = fc.bucketized_column(real, boundaries=[0., 1., 2., 3.])
cross = fc.crossed_column([sc_keys, sc_vocab], hash_bucket_size=20)
all_linear_cols = [sc_int, sc_hash, sc_keys, sc_vocab, real_bucket, cross]
# Save checkpoint from which to warm-start. Also create a bias variable,
# so we can check that it's also warm-started.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
sc_int_weights = variable_scope.get_variable(
"linear_model/sc_int/weights", shape=[10, 1], initializer=ones())
sc_hash_weights = variable_scope.get_variable(
"linear_model/sc_hash/weights", shape=[15, 1], initializer=norms())
sc_keys_weights = variable_scope.get_variable(
"linear_model/sc_keys/weights", shape=[4, 1], initializer=rand())
sc_vocab_weights = variable_scope.get_variable(
"linear_model/sc_vocab/weights", shape=[4, 1], initializer=ones())
real_bucket_weights = variable_scope.get_variable(
"linear_model/real_bucketized/weights",
shape=[5, 1],
initializer=norms())
cross_weights = variable_scope.get_variable(
"linear_model/sc_keys_X_sc_vocab/weights",
shape=[20, 1],
initializer=rand())
bias = variable_scope.get_variable(
"linear_model/bias_weights",
shape=[1],
initializer=rand())
self._write_checkpoint(sess)
(prev_int_val, prev_hash_val, prev_keys_val, prev_vocab_val,
prev_bucket_val, prev_cross_val, prev_bias_val) = sess.run([
sc_int_weights, sc_hash_weights, sc_keys_weights, sc_vocab_weights,
real_bucket_weights, cross_weights, bias
])
partitioner = lambda shape, dtype: [1] * len(shape)
# New graph, new session WITHOUT warm-starting.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
cols_to_vars = self._create_linear_model(all_linear_cols, partitioner)
sess.run(variables.global_variables_initializer())
# Without warm-starting, all weights should be initialized using default
# initializer (which is init_ops.zeros_initializer).
self._assert_cols_to_vars(cols_to_vars, {
sc_int: [np.zeros([10, 1])],
sc_hash: [np.zeros([15, 1])],
sc_keys: [np.zeros([4, 1])],
sc_vocab: [np.zeros([4, 1])],
real_bucket: [np.zeros([5, 1])],
cross: [np.zeros([20, 1])],
}, sess)
# New graph, new session with warm-starting.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
cols_to_vars = self._create_linear_model(all_linear_cols, partitioner)
vocab_info = ws_util.VocabInfo(
new_vocab=sc_vocab.vocabulary_file,
new_vocab_size=sc_vocab.vocabulary_size,
num_oov_buckets=sc_vocab.num_oov_buckets,
old_vocab=vocab_path)
ws_util._warm_start(
ws_util.WarmStartSettings(
self.get_temp_dir(),
var_name_to_vocab_info={
"linear_model/sc_vocab/weights": vocab_info
}))
sess.run(variables.global_variables_initializer())
# Verify weights were correctly warm-started.
self._assert_cols_to_vars(cols_to_vars, {
sc_int: [prev_int_val],
sc_hash: [prev_hash_val],
sc_keys: [prev_keys_val],
sc_vocab: [prev_vocab_val],
real_bucket: [prev_bucket_val],
cross: [prev_cross_val],
"bias": [prev_bias_val],
}, sess)
示例9: testWarmStartEmbeddingColumnLinearModel
def testWarmStartEmbeddingColumnLinearModel(self):
# Create old and new vocabs for embedding column "sc_vocab".
prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
"old_vocab")
new_vocab_path = self._write_vocab(
["orange", "guava", "banana", "apple", "raspberry", "blueberry"],
"new_vocab")
# Save checkpoint from which to warm-start.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
variable_scope.get_variable(
"linear_model/sc_vocab_embedding/embedding_weights",
initializer=[[0.5, 0.4], [1., 1.1], [2., 2.2], [3., 3.3]])
variable_scope.get_variable(
"linear_model/sc_vocab_embedding/weights",
initializer=[[0.69], [0.71]])
self._write_checkpoint(sess)
def _partitioner(shape, dtype): # pylint:disable=unused-argument
# Partition each var into 2 equal slices.
partitions = [1] * len(shape)
partitions[0] = min(2, shape[0].value)
return partitions
# Create feature columns.
sc_vocab = fc.categorical_column_with_vocabulary_file(
"sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6)
emb_vocab = fc.embedding_column(
categorical_column=sc_vocab,
dimension=2)
all_deep_cols = [emb_vocab]
# New graph, new session with warm-starting.
with ops.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
cols_to_vars = {}
with variable_scope.variable_scope("", partitioner=_partitioner):
# Create the variables.
fc.linear_model(
features=self._create_dummy_inputs(),
feature_columns=all_deep_cols,
cols_to_vars=cols_to_vars)
# Construct the vocab_info for the embedding weight.
vocab_info = ws_util.VocabInfo(
new_vocab=sc_vocab.vocabulary_file,
new_vocab_size=sc_vocab.vocabulary_size,
num_oov_buckets=sc_vocab.num_oov_buckets,
old_vocab=prev_vocab_path,
# Can't use constant_initializer with load_and_remap. In practice,
# use a truncated normal initializer.
backup_initializer=init_ops.random_uniform_initializer(
minval=0.42, maxval=0.42))
ws_util.warm_start(
self.get_temp_dir(),
vars_to_warm_start=".*sc_vocab.*",
var_name_to_vocab_info={
"linear_model/sc_vocab_embedding/embedding_weights": vocab_info
})
sess.run(variables.global_variables_initializer())
# Verify weights were correctly warm-started. Var corresponding to
# emb_vocab should be correctly warm-started after vocab remapping.
# Missing values are filled in with the EmbeddingColumn's initializer.
self._assert_cols_to_vars(
cols_to_vars,
{
emb_vocab: [
# linear weights part 0.
np.array([[0.69]]),
# linear weights part 1.
np.array([[0.71]]),
# embedding_weights part 0.
np.array([[3., 3.3], [2., 2.2], [1., 1.1]]),
# embedding_weights part 1.
np.array([[0.5, 0.4], [0.42, 0.42], [0.42, 0.42]])
]
},
sess)