本文整理汇总了Python中tensorflow.python.ops.math_ops.tanh函数的典型用法代码示例。如果您正苦于以下问题:Python tanh函数的具体用法?Python tanh怎么用?Python tanh使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了tanh函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: body
def body(i, prev_c, prev_h, actions, log_probs):
# pylint: disable=g-long-lambda
signal = control_flow_ops.cond(
math_ops.equal(i, 0),
lambda: array_ops.tile(device_go_embedding,
[self.hparams.num_children, 1]),
lambda: embedding_ops.embedding_lookup(device_embeddings,
actions.read(i - 1))
)
if self.hparams.keep_prob is not None:
signal = nn_ops.dropout(signal, self.hparams.keep_prob)
next_c, next_h = lstm(signal, prev_c, prev_h, w_lstm, forget_bias)
query = math_ops.matmul(next_h, attn_w_2)
query = array_ops.reshape(
query, [self.hparams.num_children, 1, self.hparams.hidden_size])
query = math_ops.tanh(query + attn_mem)
query = array_ops.reshape(query, [
self.hparams.num_children * self.num_groups, self.hparams.hidden_size
])
query = math_ops.matmul(query, attn_v)
query = array_ops.reshape(query,
[self.hparams.num_children, self.num_groups])
query = nn_ops.softmax(query)
query = array_ops.reshape(query,
[self.hparams.num_children, self.num_groups, 1])
query = math_ops.reduce_sum(attn_mem * query, axis=1)
query = array_ops.concat([next_h, query], axis=1)
logits = math_ops.matmul(query, device_softmax)
logits /= self.hparams.temperature
if self.hparams.tanh_constant > 0:
logits = math_ops.tanh(logits) * self.hparams.tanh_constant
if self.hparams.logits_std_noise > 0:
num_in_logits = math_ops.cast(
array_ops.size(logits), dtype=dtypes.float32)
avg_norm = math_ops.divide(
linalg_ops.norm(logits), math_ops.sqrt(num_in_logits))
logits_noise = random_ops.random_normal(
array_ops.shape(logits),
stddev=self.hparams.logits_std_noise * avg_norm)
logits = control_flow_ops.cond(
self.global_step > self.hparams.stop_noise_step, lambda: logits,
lambda: logits + logits_noise)
if mode == "sample":
next_y = random_ops.multinomial(logits, 1, seed=self.hparams.seed)
elif mode == "greedy":
next_y = math_ops.argmax(logits, 1)
elif mode == "target":
next_y = array_ops.slice(y, [0, i], [-1, 1])
else:
raise NotImplementedError
next_y = math_ops.to_int32(next_y)
next_y = array_ops.reshape(next_y, [self.hparams.num_children])
actions = actions.write(i, next_y)
log_probs += nn_ops.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=next_y)
return i + 1, next_c, next_h, actions, log_probs
示例2: LSTMCell
def LSTMCell(cls, x, mprev, cprev, weights):
xm = array_ops.concat([x, mprev], 1)
i_i, i_g, f_g, o_g = array_ops.split(
value=math_ops.matmul(xm, weights), num_or_size_splits=4, axis=1)
new_c = math_ops.sigmoid(f_g) * cprev + math_ops.sigmoid(
i_g) * math_ops.tanh(i_i)
new_c = clip_ops.clip_by_value(new_c, -50.0, 50.0)
new_m = math_ops.sigmoid(o_g) * math_ops.tanh(new_c)
return new_m, new_c
示例3: _bahdanau_score
def _bahdanau_score(processed_query, keys, normalize):
"""Implements Bahdanau-style (additive) scoring function.
This attention has two forms. The first is Bhandanau attention,
as described in:
Dzmitry Bahdanau, Kyunghyun Cho, Yoshua Bengio.
"Neural Machine Translation by Jointly Learning to Align and Translate."
ICLR 2015. https://arxiv.org/abs/1409.0473
The second is the normalized form. This form is inspired by the
weight normalization article:
Tim Salimans, Diederik P. Kingma.
"Weight Normalization: A Simple Reparameterization to Accelerate
Training of Deep Neural Networks."
https://arxiv.org/abs/1602.07868
To enable the second form, set `normalize=True`.
Args:
processed_query: Tensor, shape `[batch_size, num_units]` to compare to keys.
keys: Processed memory, shape `[batch_size, max_time, num_units]`.
normalize: Whether to normalize the score function.
Returns:
A `[batch_size, max_time]` tensor of unnormalized score values.
"""
dtype = processed_query.dtype
# Get the number of hidden units from the trailing dimension of keys
num_units = keys.shape[2].value or array_ops.shape(keys)[2]
# Reshape from [batch_size, ...] to [batch_size, 1, ...] for broadcasting.
processed_query = array_ops.expand_dims(processed_query, 1)
v = variable_scope.get_variable(
"attention_v", [num_units], dtype=dtype)
if normalize:
# Scalar used in weight normalization
g = variable_scope.get_variable(
"attention_g", dtype=dtype,
initializer=math.sqrt((1. / num_units)))
# Bias added prior to the nonlinearity
b = variable_scope.get_variable(
"attention_b", [num_units], dtype=dtype,
initializer=init_ops.zeros_initializer())
# normed_v = g * v / ||v||
normed_v = g * v * math_ops.rsqrt(
math_ops.reduce_sum(math_ops.square(v)))
return math_ops.reduce_sum(
normed_v * math_ops.tanh(keys + processed_query + b), [2])
else:
return math_ops.reduce_sum(v * math_ops.tanh(keys + processed_query), [2])
示例4: attention
def attention(decoder_state, coverage=None):
"""Calculate the context vector and attention distribution from the decoder state.
Args:
decoder_state: state of the decoder
coverage: Optional. Previous timestep's coverage vector, shape (batch_size, attn_len, 1, 1).
Returns:
context_vector: weighted sum of encoder_states
attn_dist: attention distribution
coverage: new coverage vector. shape (batch_size, attn_len, 1, 1)
"""
with variable_scope.variable_scope("Attention"):
# Pass the decoder state through a linear layer (this is W_s s_t + b_attn in the paper)
decoder_features = linear(decoder_state, attention_vec_size, True) # shape (batch_size, attention_vec_size)
decoder_features = tf.expand_dims(tf.expand_dims(decoder_features, 1), 1) # reshape to (batch_size, 1, 1, attention_vec_size)
def masked_attention(e):
"""Take softmax of e then apply enc_padding_mask and re-normalize"""
attn_dist = nn_ops.softmax(e) # take softmax. shape (batch_size, attn_length)
attn_dist *= enc_padding_mask # apply mask
masked_sums = tf.reduce_sum(attn_dist, axis=1) # shape (batch_size)
return attn_dist / tf.reshape(masked_sums, [-1, 1]) # re-normalize
if use_coverage and coverage is not None: # non-first step of coverage
# Multiply coverage vector by w_c to get coverage_features.
coverage_features = nn_ops.conv2d(coverage, w_c, [1, 1, 1, 1], "SAME") # c has shape (batch_size, attn_length, 1, attention_vec_size)
# Calculate v^T tanh(W_h h_i + W_s s_t + w_c c_i^t + b_attn)
e = math_ops.reduce_sum(v * math_ops.tanh(encoder_features + decoder_features + coverage_features), [2, 3]) # shape (batch_size,attn_length)
# Calculate attention distribution
attn_dist = masked_attention(e)
# Update coverage vector
coverage += array_ops.reshape(attn_dist, [batch_size, -1, 1, 1])
else:
# Calculate v^T tanh(W_h h_i + W_s s_t + b_attn)
e = math_ops.reduce_sum(v * math_ops.tanh(encoder_features + decoder_features), [2, 3]) # calculate e
# Calculate attention distribution
attn_dist = masked_attention(e)
if use_coverage: # first step of training
coverage = tf.expand_dims(tf.expand_dims(attn_dist,2),2) # initialize coverage
# Calculate the context vector from attn_dist and encoder_states
context_vector = math_ops.reduce_sum(array_ops.reshape(attn_dist, [batch_size, -1, 1, 1]) * encoder_states, [1, 2]) # shape (batch_size, attn_size).
context_vector = array_ops.reshape(context_vector, [-1, attn_size])
return context_vector, attn_dist, coverage
示例5: __call__
def __call__(self, inputs, state, scope=None):
"""Long short-term memory cell (LSTM)."""
with vs.variable_scope(scope or type(self).__name__): # "BasicLSTMCell"
# Parameters of gates are concatenated into one multiply for efficiency.
c, h = array_ops.split(1, 2, state)
concat = linear([inputs, h], 4 * self._num_units, True)
# i = input_gate, j = new_input, f = forget_gate, o = output_gate
i, j, f, o = array_ops.split(1, 4, concat)
new_c = c * sigmoid(f + self._forget_bias) + sigmoid(i) * tanh(j)
new_h = tanh(new_c) * sigmoid(o)
return new_h, array_ops.concat(1, [new_c, new_h])
示例6: attention
def attention(query):
"""Put attention masks on hidden using hidden_features and query."""
ds = [] # Results of attention reads will be stored here.
if nest.is_sequence(query): # If the query is a tuple, flatten it.
query_list = nest.flatten(query)
for q in query_list: # Check that ndims == 2 if specified.
ndims = q.get_shape().ndims
if ndims:
assert ndims == 2
query = array_ops.concat(1, query_list)
for i in xrange(num_heads):
with variable_scope.variable_scope("Attention_%d" % i):
y = linear(query, attention_vec_size, True)
y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
# Attention mask is a softmax of v^T * tanh(...).
s = math_ops.reduce_sum(
v[i] * math_ops.tanh(hidden_features[i] + y), [2, 3])
# multiply with source mask, then do softmax
if src_mask is not None:
s = s * src_mask
a = nn_ops.softmax(s)
# Now calculate the attention-weighted vector d.
d = math_ops.reduce_sum(
array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
[1, 2])
ds.append(array_ops.reshape(d, [-1, attn_size]))
return ds
示例7: embed
def embed(self, func, embedding_classes, embedding_size, inputs, dtype=None, scope=None,
keep_prob=1.0, initializer=None):
embedder_cell = func(self._cell, embedding_classes, embedding_size, initializer=initializer)
# Like rnn(..) in rnn.py, but we call only the Embedder, not the RNN cell
outputs = []
with vs.variable_scope(scope or "Embedder") as varscope:
if varscope.caching_device is None:
varscope.set_caching_device(lambda op: op.device)
for time, input_ in enumerate(inputs):
if time > 0: vs.get_variable_scope().reuse_variables()
embedding = embedder_cell.__call__(input_, scope)
if keep_prob < 1:
embedding = tf.nn.dropout(embedding, keep_prob)
# annotation = C~_t = tanh ( E(x_t) + b_c)
b_c = tf.get_variable("annotation_b", [embedding_size])
annotation = tanh(tf.nn.bias_add(embedding, b_c))
# weighted annotation = i_t * C~_t
# i = sigmoid ( E(x_t) + b_i)
b_i = tf.get_variable("input_b", [embedding_size])
i = sigmoid(tf.nn.bias_add(embedding, b_i))
w_annotation = i * annotation
outputs.append(w_annotation)
# return empty state, will be initialized by decoder
batch_size = array_ops.shape(inputs[0])[0]
state = self._cell.zero_state(batch_size, dtype)
return (outputs, state)
示例8: attention
def attention(query, use_attention=False):
"""Put attention masks on hidden using hidden_features and query."""
attn_weights = []
ds = [] # Results of attention reads will be stored here.
for i in xrange(num_heads):
with variable_scope.variable_scope("Attention_%d" % i):
y = rnn_cell._linear(query, attention_vec_size, True)
y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
# Attention mask is a softmax of v^T * tanh(...).
s = math_ops.reduce_sum(
v[i] * math_ops.tanh(hidden_features[i] + y), [2, 3])
if use_attention is False: # apply mean pooling
weights = tf.tile(sequence_length, tf.stack([attn_length]))
weights = array_ops.reshape(weights, tf.shape(s))
a = array_ops.ones(tf.shape(s), dtype=dtype) / math_ops.to_float(weights)
# a = array_ops.ones(tf.shape(s), dtype=dtype) / math_ops.to_float(tf.shape(s)[1])
else:
a = nn_ops.softmax(s)
attn_weights.append(a)
# Now calculate the attention-weighted vector d.
d = math_ops.reduce_sum(
array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
[1, 2])
ds.append(array_ops.reshape(d, [-1, attn_size]))
return attn_weights, ds
示例9: call
def call(self, inputs, state):
"""
"""
(c_prev, m_prev) = state
self._batch_size = inputs.shape[0].value or array_ops.shape(inputs)[0]
scope = vs.get_variable_scope()
with vs.variable_scope(scope, initializer=self._initializer):
x = array_ops.concat([inputs, m_prev], axis=1)
with vs.variable_scope("first_gemm"):
if self._linear1 is None:
# no bias for bottleneck
self._linear1 = _Linear(x, self._fact_size, False)
R_fact = self._linear1(x)
with vs.variable_scope("second_gemm"):
if self._linear2 is None:
self._linear2 = _Linear(R_fact, 4*self._num_units, True)
R = self._linear2(R_fact)
i, j, f, o = array_ops.split(R, 4, 1)
c = (math_ops.sigmoid(f + self._forget_bias) * c_prev +
math_ops.sigmoid(i) * math_ops.tanh(j))
m = math_ops.sigmoid(o) * self._activation(c)
if self._num_proj is not None:
with vs.variable_scope("projection"):
if self._linear3 is None:
self._linear3 = _Linear(m, self._num_proj, False)
m = self._linear3(m)
new_state = rnn_cell_impl.LSTMStateTuple(c, m)
return m, new_state
示例10: _logits_cumulative
def _logits_cumulative(self, inputs, stop_gradient):
"""Evaluate logits of the cumulative densities.
Args:
inputs: The values at which to evaluate the cumulative densities, expected
to be a `Tensor` of shape `(channels, 1, batch)`.
stop_gradient: Boolean. Whether to add `array_ops.stop_gradient` calls so
that the gradient of the output with respect to the density model
parameters is disconnected (the gradient with respect to `inputs` is
left untouched).
Returns:
A `Tensor` of the same shape as `inputs`, containing the logits of the
cumulative densities evaluated at the given inputs.
"""
logits = inputs
for i in range(len(self.filters) + 1):
matrix = self._matrices[i]
if stop_gradient:
matrix = array_ops.stop_gradient(matrix)
logits = math_ops.matmul(matrix, logits)
bias = self._biases[i]
if stop_gradient:
bias = array_ops.stop_gradient(bias)
logits += bias
if i < len(self._factors):
factor = self._factors[i]
if stop_gradient:
factor = array_ops.stop_gradient(factor)
logits += factor * math_ops.tanh(logits)
return logits
示例11: testOptimizerInit
def testOptimizerInit(self):
with ops.Graph().as_default():
layer_collection = lc.LayerCollection()
inputs = array_ops.ones((2, 1)) * 2
weights_val = np.ones((1, 1), dtype=np.float32) * 3.
weights = variable_scope.get_variable(
'w', initializer=array_ops.constant(weights_val))
bias = variable_scope.get_variable(
'b', initializer=init_ops.zeros_initializer(), shape=(1, 1))
output = math_ops.matmul(inputs, weights) + bias
layer_collection.register_fully_connected((weights, bias), inputs, output)
logits = math_ops.tanh(output)
targets = array_ops.constant([[0.], [1.]])
output = math_ops.reduce_mean(
nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets))
layer_collection.register_categorical_predictive_distribution(logits)
optimizer.KfacOptimizer(
0.1,
0.2,
0.3,
layer_collection,
momentum=0.5,
momentum_type='regular')
示例12: __call__
def __call__(self, inputs, state, scope=None):
with _checked_scope(self, scope or "rwa_cell", reuse=self._reuse):
h, n, d, a_max = state
with vs.variable_scope("u"):
u = _linear(inputs, self._num_units, True)
with vs.variable_scope("g"):
g = _linear([inputs, h], self._num_units, True)
with vs.variable_scope("a"):
a = _linear([inputs, h], self._num_units, False) # The bias term when factored out of the numerator and denominator cancels and is unnecessary
z = tf.multiply(u, tanh(g))
a_newmax = tf.maximum(a_max, a)
exp_diff = tf.exp(a_max - a_newmax)
exp_scaled = tf.exp(a - a_newmax)
n = tf.multiply(n, exp_diff) + tf.multiply(z, exp_scaled) # Numerically stable update of numerator
d = tf.multiply(d, exp_diff) + exp_scaled # Numerically stable update of denominator
h_new = self._activation(tf.div(n, d))
new_state = RWACellTuple(h_new, n, d, a_newmax)
return h_new, new_state
示例13: downscale
def downscale(self, inp):
with vs.variable_scope("Downscale"):
inp2d = tf.reshape(tf.transpose(inp, perm=[1, 0, 2]), [-1, 2 * self.size])
out2d = rnn_cell.linear(inp2d, self.size, True, 1.0)
out3d = tf.reshape(out2d, [self.batch_size, -1, self.size])
out3d = tf.transpose(out3d, perm=[1, 0, 2])
out = tanh(out3d)
return out
示例14: __init__
def __init__(self, num_units, encoder_output, scope=None):
self.hs = encoder_output
with vs.variable_scope(scope or type(self).__name__):
with vs.variable_scope("Attn1"):
hs2d = tf.reshape(self.hs, [-1, num_units])
phi_hs2d = tanh(rnn_cell.linear(hs2d, num_units, True, 1.0))
self.phi_hs = tf.reshape(phi_hs2d, tf.shape(self.hs))
super(GRUCellAttn, self).__init__(num_units)
示例15: attention
def attention(query):
"""Point on hidden using hidden_features and query."""
with vs.variable_scope("Attention"):
y = rnn_cell.linear(query, attention_vec_size, True)
y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
# Attention mask is a softmax of v^T * tanh(...).
s = math_ops.reduce_sum(
v * math_ops.tanh(hidden_features + y), [2, 3])
return s