本文整理汇总了Python中tensorflow.models.rnn.seq2seq.sequence_loss_by_example函数的典型用法代码示例。如果您正苦于以下问题:Python sequence_loss_by_example函数的具体用法?Python sequence_loss_by_example怎么用?Python sequence_loss_by_example使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sequence_loss_by_example函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self, vocabularySize, config_param):
self.vocabularySize = vocabularySize
self.config = config_param
self._inputX = tf.placeholder(tf.int32, [self.config.batch_size, self.config.sequence_size], "InputsX")
self._inputTargetsY = tf.placeholder(tf.int32, [self.config.batch_size, self.config.sequence_size], "InputTargetsY")
#Converting Input in an Embedded form
with tf.device("/cpu:0"): #Tells Tensorflow what GPU to use specifically
embedding = tf.get_variable("embedding", [self.vocabularySize, self.config.embeddingSize])
embeddingLookedUp = tf.nn.embedding_lookup(embedding, self._inputX)
inputs = tf.split(1, self.config.sequence_size, embeddingLookedUp)
inputTensorsAsList = [tf.squeeze(input_, [1]) for input_ in inputs]
#Define Tensor RNN
singleRNNCell = rnn_cell.BasicRNNCell(self.config.hidden_size)
self.multilayerRNN = rnn_cell.MultiRNNCell([singleRNNCell] * self.config.num_layers)
self._initial_state = self.multilayerRNN.zero_state(self.config.batch_size, tf.float32)
#Defining Logits
hidden_layer_output, states = rnn.rnn(self.multilayerRNN, inputTensorsAsList, initial_state=self._initial_state)
hidden_layer_output = tf.reshape(tf.concat(1, hidden_layer_output), [-1, self.config.hidden_size])
self._logits = tf.nn.xw_plus_b(hidden_layer_output, tf.get_variable("softmax_w", [self.config.hidden_size, self.vocabularySize]), tf.get_variable("softmax_b", [self.vocabularySize]))
self._predictionSoftmax = tf.nn.softmax(self._logits)
#Define the loss
loss = seq2seq.sequence_loss_by_example([self._logits], [tf.reshape(self._inputTargetsY, [-1])], [tf.ones([self.config.batch_size * self.config.sequence_size])], self.vocabularySize)
self._cost = tf.div(tf.reduce_sum(loss), self.config.batch_size)
self._final_state = states[-1]
示例2: __init__
def __init__(self, args, infer=False):
self.args = args
if infer:
args.batch_size = 1
args.seq_length = 1
additional_cell_args = {}
if args.model == 'rnn':
cell_fn = rnn_cell.BasicRNNCell
elif args.model == 'gru':
cell_fn = rnn_cell.GRUCell
elif args.model == 'lstm':
cell_fn = rnn_cell.BasicLSTMCell
elif args.model == 'gridlstm':
cell_fn = grid_rnn.Grid2LSTMCell
additional_cell_args.update({'use_peepholes': True, 'forget_bias': 1.0})
elif args.model == 'gridgru':
cell_fn = grid_rnn.Grid2GRUCell
else:
raise Exception("model type not supported: {}".format(args.model))
cell = cell_fn(args.rnn_size, **additional_cell_args)
self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.initial_state = cell.zero_state(args.batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
def loop(prev, _):
prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.embedding_lookup(embedding, prev_symbol)
outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell,
loop_function=loop if infer else None, scope='rnnlm')
output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
self.probs = tf.nn.softmax(self.logits)
loss = seq2seq.sequence_loss_by_example([self.logits],
[tf.reshape(self.targets, [-1])],
[tf.ones([args.batch_size * args.seq_length])],
args.vocab_size)
self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
self.final_state = last_state
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
args.grad_clip)
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))
示例3: __init__
def __init__(self, is_training, config):
self.batch_size = batch_size = config.batch_size # size for mini batch training
self.num_steps = num_steps = config.num_steps # maximum number of training iteration?
size = config.hidden_size # state size
feature_size = config.feature_size
self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps, feature_size])
self._targets = tf.placeholder(tf.int32, [batch_size, num_steps, feature_size])
basic_cell = rnn_cell.BasicLSTMCell(size)
if is_training and config.keep_prob < 1: # use dropout
basic_cell = rnn_cell.DropoutWrapper(basic_cell, output_keep_prob=config.keep_prob)
cell = rnn_cell.MultiRNNCell([basic_cell] * config.num_layers) # multiple layers
self._initial_state = cell.zero_state(batch_size, tf.float32)
inputs = self._input_data
print inputs
print "haha"
if is_training and config.keep_prob < 1:
inputs = tf.nn.dropout(inputs, config.keep_prob)
# inputs = [tf.squeeze(input_, [1])
# for input_ in tf.split(1, num_steps, inputs)]
# outputs, states = rnn.rnn(
# cell, inputs, initial_state=self._initial_state)
#
outputs = []
states = []
state = self._initial_state
with tf.variable_scope("RNN"):
for time_step in range(num_steps):
if time_step > 0:
tf.get_variable_scope().reuse_variables()
(cell_output, state) = cell(inputs[:, time_step, :], state)
outputs.append(cell_output)
states.append(state)
print outputs
output = tf.reshape(tf.concat(1, outputs), [-1, size])
print output
logits = tf.nn.xw_plus_b(
output, tf.get_variable("softmax_w", [size, feature_size]), tf.get_variable("softmax_b", [feature_size])
)
loss = seq2seq.sequence_loss_by_example(
[logits], [tf.reshape(self._targets, [-1])], [tf.ones([batch_size * num_steps])], feature_size
)
self._cost = cost = tf.reduce_sum(loss) / batch_size
self._final_state = states[-1]
if not is_training:
return
self._lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm)
optimizer = tf.train.GradientDescentOptimizer(self.lr)
self._train_op = optimizer.apply_gradients(zip(grads, tvars))
示例4: testSequenceLossByExample
def testSequenceLossByExample(self):
with self.test_session() as sess:
output_classes = 5
logits = [tf.constant(i + 0.5, shape=[2, output_classes])
for i in xrange(3)]
targets = [tf.constant(i, tf.int32, shape=[2]) for i in xrange(3)]
weights = [tf.constant(1.0, shape=[2]) for i in xrange(3)]
average_loss_per_example = seq2seq.sequence_loss_by_example(
logits, targets, weights, output_classes,
average_across_timesteps=True)
res = sess.run(average_loss_per_example)
self.assertAllClose(res, np.asarray([1.609438, 1.609438]))
loss_per_sequence = seq2seq.sequence_loss_by_example(
logits, targets, weights, output_classes,
average_across_timesteps=False)
res = sess.run(loss_per_sequence)
self.assertAllClose(res, np.asarray([4.828314, 4.828314]))
示例5: __init__
def __init__(self, is_training, config):
self.batch_size = batch_size = config.batch_size
self.num_steps = num_steps = config.num_steps
size = config.hidden_size
output_size = config.output_size
self._input_data = tf.placeholder(tf.float32, [batch_size, num_steps, size])
self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])
lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
if is_training and config.keep_prob < 1:
lstm_cell = rnn_cell.DropoutWrapper(
lstm_cell, output_keep_prob=config.keep_prob)
cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)
self._initial_state = cell.zero_state(batch_size, tf.float32)
inputs = self._input_data
outputs = []
states = []
state = self._initial_state
with tf.variable_scope("RNN"):
for time_step in range(num_steps):
if time_step > 0: tf.get_variable_scope().reuse_variables()
(cell_output, state) = cell(inputs[:, time_step, :], state)
outputs.append(cell_output)
states.append(state)
output = tf.reshape(tf.concat(1, outputs), [-1, size])
logits = tf.nn.xw_plus_b(output,
tf.get_variable("softmax_w", [size, output_size]),
tf.get_variable("softmax_b", [output_size]))
loss = seq2seq.sequence_loss_by_example([logits],
[tf.reshape(self._targets, [-1])],
[tf.ones([batch_size * num_steps])],
output_size)
self._cost = cost = tf.reduce_sum(loss) / batch_size
self._final_state = states[-1]
self._output = output
self._logits = logits
if not is_training:
return
self._lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
config.max_grad_norm)
optimizer = tf.train.GradientDescentOptimizer(self.lr)
self._train_op = optimizer.apply_gradients(zip(grads, tvars))
示例6: __init__
def __init__(self, args, sampling=False):
self.args = args
if sampling:
args.batch_size = 1
args.seq_length = 1
basic_cell = rnn_cell.BasicLSTMCell(args.rnn_size)
self.cell = rnn_cell.MultiRNNCell([basic_cell] * args.num_layers)
self.input_data = tf.placeholder(tf.int32,
[args.batch_size, args.seq_length])
self.targets = tf.placeholder(tf.int32,
[args.batch_size, args.seq_length])
self.initial_state = self.cell.zero_state(args.batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w",
[args.rnn_size, args.vocab_size])
softmax_b = tf.get_variable("softmax_b",
[args.vocab_size])
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding",
[args.vocab_size, args.rnn_size])
inputs = tf.split(1, args.seq_length,
tf.nn.embedding_lookup(embedding, self.input_data))
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
def loop(prev, _):
prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.embedding_lookup(embedding, prev_symbol)
outputs, last_state = seq2seq.rnn_decoder(
inputs, self.initial_state, self.cell,
loop_function=loop if sampling else None, scope='rnnlm')
output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
self.probs = tf.nn.softmax(self.logits)
loss = seq2seq.sequence_loss_by_example(
[self.logits],
[tf.reshape(self.targets, [-1])],
[tf.ones([args.batch_size * args.seq_length])],
args.vocab_size)
self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
self.final_state = last_state
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
args.grad_clip)
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))
示例7: __init__
def __init__(self, conf):
self.conf = conf
cell_fw = BasicLSTMCell(self.conf.rnn_size)
cell_bw = BasicLSTMCell(self.conf.rnn_size)
if conf.keep_prob < 1.0 and not conf.infer:
cell_fw = DropoutWrapper(cell_fw, output_keep_prob=conf.keep_prob)
cell_bw = DropoutWrapper(cell_bw, output_keep_prob=conf.keep_prob)
self.cell_fw = cell_fw = MultiRNNCell([cell_fw] * self.conf.num_layers)
self.cell_bw = cell_bw = MultiRNNCell([cell_bw] * self.conf.num_layers)
self.input_data = tf.placeholder(tf.int32, [self.conf.batch_size, self.conf.seq_length])
self.targets = tf.placeholder(tf.int32, [self.conf.batch_size, self.conf.seq_length])
self.initial_state_fw = cell_fw.zero_state(self.conf.batch_size, tf.float32)
self.initial_state_bw = cell_bw.zero_state(self.conf.batch_size, tf.float32)
with tf.variable_scope('rnn'):
softmax_w = tf.get_variable("softmax_w", [self.conf.rnn_size*2, self.conf.output_size])
softmax_b = tf.get_variable("softmax_b", [self.conf.output_size])
embedding = tf.get_variable("embedding", [self.conf.nerloader.vocab_size, self.conf.rnn_size])
_inputs = tf.nn.embedding_lookup(embedding, self.input_data)
if conf.keep_prob < 1.0 and not conf.infer:
_inputs = tf.nn.dropout(_inputs,conf.keep_prob)
inputs = tf.split(1, conf.seq_length, _inputs)
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
outputs_bi = rnn.bidirectional_rnn(cell_fw, cell_bw, inputs, initial_state_fw=self.initial_state_fw, initial_state_bw=self.initial_state_bw, scope='rnn')
output = tf.reshape(tf.concat(1, outputs_bi), [-1, self.conf.rnn_size*2])
self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
self.probs = tf.nn.softmax(self.logits)
self.loss_weights = [tf.ones([self.conf.batch_size * self.conf.seq_length])]
loss = seq2seq.sequence_loss_by_example([self.logits],
[tf.reshape(self.targets, [-1])],
self.loss_weights)
self.cost = (tf.reduce_sum(loss) / self.conf.batch_size / self.conf.seq_length)
tf.scalar_summary("loss",self.cost)
self.out = output
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
self.conf.grad_clip)
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))
self.merged_summary_op = tf.merge_all_summaries()
示例8: __init__
#.........这里部分代码省略.........
out_proj = hidden_size
if hidden_proj > 0:
out_proj = hidden_proj
with tf.device("/cpu:0"):
w = tf.get_variable("proj_w", [out_proj, vocab_size])
w_t = tf.transpose(w)
b = tf.get_variable("proj_b", [vocab_size])
self.output_projection = (w, b)
sampled_softmax = False
# Sampled softmax only makes sense if we sample less than vocabulary size.
if 0 < num_samples < vocab_size:
sampled_softmax = True
def sampled_loss(logits, labels):
with tf.device("/cpu:0"):
labels = tf.reshape(labels, [-1, 1])
losses = tf.nn.sampled_softmax_loss(w_t, b, logits, labels, num_samples, vocab_size)
return losses
loss_function = sampled_loss
with tf.device("/cpu:0"):
# input come as one big tensor so we have to split it into a list of tensors to run the rnn cell
embedding = tf.Variable(
tf.random_uniform(
[vocab_size, proj_size],
minval=-init_scale, maxval=init_scale
),
name="embedding"
)
# embedding = tf.get_variable("embedding", [vocab_size, proj_size])
inputs_train = [tf.nn.embedding_lookup(embedding, i) for i in self.input_data_train]
inputs_valid = [tf.nn.embedding_lookup(embedding, i) for i in self.input_data_valid]
with tf.variable_scope("RNN", initializer=initializer):
if attentive:
outputs_train, state_train, _ = lm_ops.apply_attentive_lm(
self.cell, inputs_train, sequence_length=array_ops.squeeze(math_ops.add_n(self.mask_train)),
projection_attention_f=projection_attention_f, output_form=output_form,
dropout=self.dropout_feed, initializer=initializer, dtype=tf.float32
)
outputs_valid, state_valid, _ = lm_ops.apply_attentive_lm(
self.cell, inputs_valid, sequence_length=array_ops.squeeze(math_ops.add_n(self.mask_valid)),
projection_attention_f=projection_attention_f, output_form=output_form,
dropout=self.dropout_feed, initializer=initializer, dtype=tf.float32
)
else:
outputs_train, state_train = lm_ops.apply_lm(
self.cell, inputs_train, sequence_length=math_ops.add_n(self.mask_train),
dropout=self.dropout_feed, dtype=tf.float32
)
outputs_valid, state_valid = lm_ops.apply_lm(
self.cell, inputs_valid, sequence_length=math_ops.add_n(self.mask_valid),
dropout=self.dropout_feed, dtype=tf.float32
)
if sampled_softmax is False:
logits_train = [tf.nn.xw_plus_b(o, self.output_projection[0], self.output_projection[1])
for o in outputs_train]
logits_valid = [tf.nn.xw_plus_b(o, self.output_projection[0], self.output_projection[1])
for o in outputs_valid]
else:
logits_train = outputs_train
logits_valid = outputs_valid
loss_train = seq2seq.sequence_loss_by_example(
logits_train, self.targets_train, self.mask_train, average_across_timesteps=True
)
loss_valid = seq2seq.sequence_loss_by_example(
logits_valid, self.targets_valid, self.mask_valid, average_across_timesteps=True
)
self._cost_train = cost = tf.reduce_sum(loss_train) / float(batch_size)
self._final_state_train = state_train
self._cost_valid = tf.reduce_sum(loss_valid) / float(batch_size)
self._final_state_valid = state_valid
if not is_training:
return
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
max_grad_norm)
opt = optimization_ops.get_optimizer(optimizer, learning_rate)
self._train_op = opt.apply_gradients(zip(grads, tvars), global_step=self.global_step)
self._valid_op = tf.no_op()
self.saver = tf.train.Saver(tf.all_variables())
self.saver_best = tf.train.Saver(tf.all_variables())
示例9: __init__
def __init__(self, is_training, config):
"""constructs a graph"""
self.batch_size = batch_size = config.batch_size
self.num_steps = num_steps = config.num_steps
size = config.hidden_size
vocab_size = config.vocab_size
self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps],
name="input_data")
self._targets = tf.placeholder(tf.int32, [batch_size, num_steps],
name="targets")
# here it is
lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=1.0)
if is_training and config.keep_prob < 1:
lstm_cell = rnn_cell.DropoutWrapper(
lstm_cell, output_keep_prob=config.keep_prob)
cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)
self._initial_state = cell.zero_state(batch_size, tf.float32)
# do an embedding (always on cpu)
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [vocab_size, size])
inputs = tf.split(
1, num_steps, tf.nn.embedding_lookup(embedding, self._input_data))
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
if is_training and config.keep_prob < 1:
inputs = [tf.nn.dropout(input_, config.keep_prob) for input_ in inputs]
from tensorflow.models.rnn import rnn
outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state)
# reshape
outputs = tf.reshape(tf.concat(1, outputs), [-1, size])
logits = tf.nn.xw_plus_b(outputs,
tf.get_variable("softmax_W", [size,vocab_size]),
tf.get_variable("softmax_b", [vocab_size]))
self._softmax_out = tf.nn.softmax(logits) # this is just used for sampling
loss = seq2seq.sequence_loss_by_example([logits],
[tf.reshape(self._targets,[-1])],
[tf.ones([batch_size * num_steps])],
vocab_size)
self._cost = cost = tf.div(tf.reduce_sum(loss),
tf.constant(batch_size, dtype=tf.float32))
self._final_state = states[-1]
if not is_training:
return # don't need to optimisation ops
self._lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
config.max_grad_norm)
# actually the simple guy does good
# with the grad clipping and the lr schedule and whatnot
#ftrl?
#optimizer = tf.train.GradientDescentOptimizer(self.lr)
optimizer = tf.train.FtrlOptimizer(self.lr)
self._train_op = optimizer.apply_gradients(zip(grads, tvars))
示例10: enumerate
inputs_dis = [tf.matmul(tf.squeeze(i, [1]), embedding) for i in inputs_dis]
state = initial_state_dis
outputs = []
for i, inp in enumerate(inputs_dis):
if i > 0:
tf.get_variable_scope().reuse_variables()
output, state = cell_dis(inp, state)
outputs.append(output)
last_state = state
output_tf = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
logits = tf.nn.xw_plus_b(output_tf, softmax_w, softmax_b)
probs = tf.nn.softmax(logits)
loss = seq2seq.sequence_loss_by_example(
[logits],
[tf.reshape(targets, [-1])],
[tf.ones([args.batch_size * args.seq_length])],
2)
cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
final_state = last_state
lr = tf.Variable(0.0, trainable = False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars, aggregation_method = 2), args.grad_clip)
optimizer = tf.train.AdamOptimizer(lr)
train_op = optimizer.apply_gradients(zip(grads, tvars))
示例11: __init__
#.........这里部分代码省略.........
# outputs_forward = tf.reshape(tf.concat(1, outputs), [-1, self.lstm_size],
# name="reshape-outputs_forward")
# outputs_backward = tf.reshape(tf.concat(1, outputs_rev), [-1, self.lstm_size],
# name="reshape-outputs_backward")
# forward_w = tf.get_variable("forward_w", [self.lstm_size, self.lstm_size])
# backward_w = tf.get_variable("backward_w", [self.lstm_size, self.lstm_size])
# non_linearity_bias = tf.get_variable("non_linearity_b", [self.lstm_size])
outputs_bidi = [tf.concat(1, [o1, o2]) for o1, o2 in zip(outputs, reversed(outputs_rev))]
# output = tf.tanh(tf.matmul(outputs_forward, forward_w) + tf.matmul(outputs_backward, backward_w) + non_linearity_bias)
output = tf.reshape(tf.concat(1, outputs_bidi), [-1, 2 * self.lstm_size], name="reshape-outputs_bidi")
output_dropped = tf.nn.dropout(output, self.dropout_prob[1])
# We are computing only the logits, not the actual softmax -- while
# computing the loss, it is done by the sequence_loss_by_example and
# during the runtime classification, the argmax over logits is enough.
softmax_w = tf.get_variable("softmax_w", [2 * self.lstm_size, len(tagset)])
logits_flatten = tf.nn.xw_plus_b(output_dropped, softmax_w, tf.get_variable("softmax_b", [len(tagset)]))
# tf.get_variable_scope().reuse_variables()
regularize.append(softmax_w)
self.logits = tf.reshape(logits_flatten, [-1, num_steps, len(tagset)], name="reshape-logits")
estimated_tags_flat = tf.to_int32(tf.argmax(logits_flatten, dimension=1))
self.last_state = last_state
# output maks: compute loss only if it insn't a padded word (i.e. zero index)
output_mask = tf.reshape(tf.to_float(tf.not_equal(self.tags, 0)), [-1])
gt_tags_flat = tf.reshape(self.tags, [-1])
tagging_loss = seq2seq.sequence_loss_by_example(
logits=[logits_flatten], targets=[gt_tags_flat], weights=[output_mask]
)
tagging_accuracy = tf.reduce_sum(
tf.to_float(tf.equal(estimated_tags_flat, gt_tags_flat)) * output_mask
) / tf.reduce_sum(output_mask)
tf.scalar_summary("train_accuracy", tagging_accuracy, collections=["train"])
tf.scalar_summary("dev_accuracy", tagging_accuracy, collections=["dev"])
self.cost = tf.reduce_mean(tagging_loss)
tf.scalar_summary("train_tagging_loss", tf.reduce_mean(tagging_loss), collections=["train"])
tf.scalar_summary("dev_tagging_loss", tf.reduce_mean(tagging_loss), collections=["dev"])
if generate_lemmas:
with tf.variable_scope("decoder"):
self.lemma_chars = tf.placeholder(tf.int32, [None, num_steps, num_chars + 2], name="lemma_chars")
lemma_state_size = self.lstm_size
lemma_w = tf.Variable(tf.random_uniform([lemma_state_size, len(alphabet)], 0.5), name="state_to_char_w")
lemma_b = tf.Variable(tf.fill([len(alphabet)], -math.log(len(alphabet))), name="state_to_char_b")
lemma_char_embeddings = tf.Variable(
tf.random_uniform(
[len(alphabet), lemma_state_size / (2 if supply_form_characters_to_lemma else 1)], -0.5, 0.5
),
name="char_embeddings",
)
lemma_char_inputs = [
tf.squeeze(input_, [1])
for input_ in tf.split(
示例12: __init__
def __init__(self, CellType, is_training, config):
self.batch_size = batch_size = config.batch_size
self.num_steps = num_steps = config.num_steps
size = config.hidden_size
vocab_size = config.vocab_size
self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps], name="input_data")
self.targets = tf.placeholder(tf.int32, [batch_size, num_steps], name="targets")
lstm_cell = CellType(size)
if is_training and config.keep_prob < 1:
lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=config.keep_prob)
cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)
self.initial_state = cell.zero_state(batch_size, tf.float32)
# initializer used for reusable variable initializer (see `get_variable`)
initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale)
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [vocab_size, size], initializer=initializer)
inputs = tf.nn.embedding_lookup(embedding, self.input_data)
if is_training and config.keep_prob < 1:
inputs = tf.nn.dropout(inputs, config.keep_prob)
outputs = []
states = []
state = self.initial_state
with tf.variable_scope("RNN", initializer=initializer):
for time_step in range(num_steps):
if time_step > 0:
tf.get_variable_scope().reuse_variables()
inputs_slice = inputs[:,time_step,:]
(cell_output, state) = cell(inputs_slice, state)
outputs.append(cell_output)
states.append(state)
self.final_state = states[-1]
output = tf.reshape(tf.concat(1, outputs), [-1, size])
w = tf.get_variable("softmax_w",
[size, vocab_size],
initializer=initializer)
b = tf.get_variable("softmax_b", [vocab_size], initializer=initializer)
logits = tf.nn.xw_plus_b(output, w, b) # compute logits for loss
targets = tf.reshape(self.targets, [-1]) # reshape our target outputs
weights = tf.ones([batch_size * num_steps]) # used to scale the loss average
# computes loss and performs softmax on our fully-connected output layer
loss = sequence_loss_by_example([logits], [targets], [weights], vocab_size)
self.cost = cost = tf.div(tf.reduce_sum(loss), batch_size, name="cost")
if is_training:
# setup learning rate variable to decay
self.lr = tf.Variable(1.0, trainable=False)
# define training operation and clip the gradients
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm)
optimizer = tf.train.GradientDescentOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars), name="train")
else:
# if this model isn't for training (i.e. testing/validation) then we don't do anything here
self.train_op = tf.no_op()
示例13: __init__
def __init__(self, args, infer=False):
self.args = args
if infer:
args.batch_size = 1
args.seq_length = 1
if args.model == 'rnn': cell_fn = jzRNNCell
elif args.model == 'gru': cell_fn = jzGRUCell
elif args.model == 'lstm': cell_fn = jzLSTMCell
else: raise Exception("model type not supported: {}".format(args.model))
if args.activation == 'tanh': cell_af = tf.tanh
elif args.activation == 'sigmoid': cell_af = tf.sigmoid
elif args.activation == 'relu': cell_af = tf.nn.relu
else: raise Exception("activation function not supported: {}".format(args.activation))
self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
with tf.variable_scope('rnnlm'):
if not args.bidirectional:
softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
else:
softmax_w = tf.get_variable("softmax_w", [args.rnn_size*2, args.vocab_size])
softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
inputs = [tf.nn.dropout(tf.squeeze(input_, [1]),args.dropout) for input_ in inputs]
# one-directional RNN (nothing changed here..)
if not args.bidirectional:
cell = cell_fn(args.rnn_size,activation=cell_af)
self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
self.initial_state = cell.zero_state(args.batch_size, tf.float32)
def loop(prev, _):
prev = tf.matmul(prev, softmax_w) + softmax_b
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.embedding_lookup(embedding, prev_symbol)
outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
# bi-directional RNN
else:
lstm_fw = cell_fn(args.rnn_size,activation=cell_af)
lstm_bw = cell_fn(args.rnn_size,activation=cell_af)
self.lstm_fw = lstm_fw = rnn_cell.MultiRNNCell([lstm_fw]*args.num_layers)
self.lstm_bw = lstm_bw = rnn_cell.MultiRNNCell([lstm_bw]*args.num_layers)
self.initial_state_fw = lstm_fw.zero_state(args.batch_size,tf.float32)
self.initial_state_bw = lstm_bw.zero_state(args.batch_size,tf.float32)
outputs,_,_ = rnn.bidirectional_rnn(lstm_fw, lstm_bw, inputs,
initial_state_fw=self.initial_state_fw,
initial_state_bw=self.initial_state_bw,
sequence_length=args.batch_size)
output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size*2])
self.logits = tf.matmul(tf.nn.dropout(output,args.dropout), softmax_w) + softmax_b
self.probs = tf.nn.softmax(self.logits)
loss = seq2seq.sequence_loss_by_example([self.logits],
[tf.reshape(self.targets, [-1])],
[tf.ones([args.batch_size * args.seq_length])],
args.vocab_size)
self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
self.final_state = last_state
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
args.grad_clip)
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))
示例14: __init__
def __init__(self, args, infer=False):
self.args = args
# if infer:
# args.batch_size = 1
# args.seq_length = 1
if args.model == 'rnn':
cell_fn = rnn_cell.BasicRNNCell
elif args.model == 'gru':
cell_fn = rnn_cell.GRUCell
elif args.model == 'lstm':
cell_fn = rnn_cell.BasicLSTMCell
else:
raise Exception("model type not supported: {}".format(args.model))
cell = cell_fn(args.rnn_size)
self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
#self.seq_length = tf.placeholder(tf.int32)
#args.seq_length = self.seq_length
self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.targets = tf.placeholder(tf.int32, [args.batch_size])
self.initial_state = cell.zero_state(args.batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
# len(inputs)==args.seq_length, shape(inputs[0])==(args.batch_size, args.rnn_size)
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
def loop(prev, _):
return None # TODO
prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.embedding_lookup(embedding, prev_symbol)
# len(outputs)==args.seq_length, shape(outputs[0])==(args.batch_size, args.rnn_size)
outputs, states = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
# # shape(output) = (batch_size*seq_length, rnn_size)
# output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
def handle_outputs(use_lastone=True):
""" Shape of return is [batch_size, rnn_size].
"""
if use_lastone:
return outputs[-1]
output = tf.add_n(outputs)
output = tf.div(output, len(outputs))
return output
output = handle_outputs(use_lastone=False)
# shape(logits) = (batch_size, vocab_size)
self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
self.probs = tf.nn.softmax(self.logits)
loss = seq2seq.sequence_loss_by_example([self.logits],
[tf.reshape(self.targets, [-1])],
[tf.ones([args.batch_size])],
args.vocab_size)
self.cost = tf.reduce_sum(loss) / args.batch_size
_ = tf.scalar_summary('cost', self.cost)
# Evaluate accuracy
correct_pred = tf.equal(tf.cast(tf.argmax(self.logits, 1), tf.int32), tf.reshape(self.targets, [-1]))
self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
_ = tf.scalar_summary('accuracy', self.accuracy)
self.final_state = states
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
args.grad_clip)
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))
示例15: loop
inputs = tf.split(1, seq_length, tf.nn.embedding_lookup(embedding, input_data))
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
# Loop function for seq2seq
def loop(prev, _):
prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.embedding_lookup(embedding, prev_symbol)
# Output of RNN
outputs, last_state = seq2seq.rnn_decoder(inputs, initial_state, cell, loop_function=None, scope='rnnlm')
output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])
logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
# Next word probability
probs = tf.nn.softmax(logits)
# Define LOSS
loss = seq2seq.sequence_loss_by_example([logits], # Input
[tf.reshape(targets, [-1])], # Target
[tf.ones([batch_size * seq_length])], # Weight
vocab_size)
# Define Optimizer
cost = tf.reduce_sum(loss) / batch_size / seq_length
final_state = last_state
lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_clip)
_optm = tf.train.AdamOptimizer(lr)
optm = _optm.apply_gradients(zip(grads, tvars))
print ("Network Ready")
# In[ ]: