本文整理汇总了Python中tensorflow.rsqrt函数的典型用法代码示例。如果您正苦于以下问题:Python rsqrt函数的具体用法?Python rsqrt怎么用?Python rsqrt使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了rsqrt函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _apply_noisy_update
def _apply_noisy_update(self, mom, grad, var):
# Compute and apply the gradient update following
# preconditioned Langevin dynamics
stddev = tf.where(
tf.squeeze(self._counter > self._burnin),
tf.cast(tf.rsqrt(self._learning_rate), grad.dtype),
tf.zeros([], grad.dtype))
# Keep an exponentially weighted moving average of squared gradients.
# Not thread safe
decay_tensor = tf.cast(self._decay_tensor, grad.dtype)
new_mom = decay_tensor * mom + (1. - decay_tensor) * tf.square(grad)
preconditioner = tf.rsqrt(
new_mom + tf.cast(self._diagonal_bias, grad.dtype))
# Compute gradients of the preconsitionaer
_, preconditioner_grads = diag_jacobian(
xs=var,
ys=preconditioner,
parallel_iterations=self._parallel_iterations)
mean = 0.5 * (preconditioner * grad *
tf.cast(self._data_size, grad.dtype)
- preconditioner_grads[0])
stddev *= tf.sqrt(preconditioner)
result_shape = tf.broadcast_dynamic_shape(tf.shape(mean),
tf.shape(stddev))
with tf.control_dependencies([tf.assign(mom, new_mom)]):
return tf.random_normal(shape=result_shape,
mean=mean,
stddev=stddev,
dtype=grad.dtype)
示例2: batch_normalized_linear_layer
def batch_normalized_linear_layer(state_below, scope_name, n_inputs, n_outputs, stddev, wd, eps=.00001, test=False):
"""
A linear layer with batch normalization
"""
with tf.variable_scope(scope_name) as scope:
weight = _variable_with_weight_decay(
"weights", shape=[n_inputs, n_outputs],
stddev=stddev, wd=wd
)
act = tf.matmul(state_below, weight)
# get moments
act_mean, act_variance = tf.nn.moments(act, [0])
# get mean and variance variables
mean = _variable_on_cpu('bn_mean', [n_outputs], tf.constant_initializer(0.0), trainable=False)
variance = _variable_on_cpu('bn_variance', [n_outputs], tf.constant_initializer(1.0), trainable=False)
# assign the moments
if not test:
assign_mean = mean.assign(act_mean)
assign_variance = variance.assign(act_variance)
act_bn = tf.mul((act - act_mean), tf.rsqrt(act_variance + eps), name=scope.name+"_bn")
else:
act_bn = tf.mul((act - mean), tf.rsqrt(variance + eps), name=scope.name+"_bn")
beta = _variable_on_cpu("beta", [n_outputs], tf.constant_initializer(0.0))
gamma = _variable_on_cpu("gamma", [n_outputs], tf.constant_initializer(1.0))
bn = tf.add(tf.mul(act_bn, gamma), beta)
# output = tf.nn.relu(bn, name=scope.name)
output = randomized_relu(bn, .1, name=scope.name, is_training=(not test))
if not test:
output = control_flow_ops.with_dependencies(dependencies=[assign_mean, assign_variance], output_tensor=output)
_activation_summary(output)
return output
示例3: cosine_distances
def cosine_distances(test, support):
"""Computes pairwise cosine distances between provided tensors
Parameters
----------
test: tf.Tensor
Of shape (n_test, n_feat)
support: tf.Tensor
Of shape (n_support, n_feat)
Returns
-------
tf.Tensor:
Of shape (n_test, n_support)
"""
rnorm_test = tf.rsqrt(
tf.reduce_sum(tf.square(test), 1, keep_dims=True)) + 1e-7
rnorm_support = tf.rsqrt(
tf.reduce_sum(tf.square(support), 1, keep_dims=True)) + 1e-7
test_normalized = test * rnorm_test
support_normalized = support * rnorm_support
# Transpose for mul
support_normalized_t = tf.transpose(support_normalized, perm=[1, 0])
g = tf.matmul(test_normalized, support_normalized_t) # Gram matrix
return g
示例4: batch_normalized_conv_layer
def batch_normalized_conv_layer(state_below, scope_name, n_inputs, n_outputs, filter_shape, stddev, wd, eps=.00001, test=False):
"""
Convolutional layer with batch normalization
"""
with tf.variable_scope(scope_name) as scope:
kernel = _variable_with_weight_decay(
"weights", shape=[filter_shape[0], filter_shape[1], n_inputs, n_outputs],
stddev=stddev, wd=wd
)
conv = tf.nn.conv2d(state_below, kernel, [1, 1, 1, 1], padding='SAME')
# get moments
conv_mean, conv_variance = tf.nn.moments(conv, [0, 1, 2])
# get mean and variance variables
mean = _variable_on_cpu("bn_mean", [n_outputs], tf.constant_initializer(0.0), False)
variance = _variable_on_cpu("bn_variance", [n_outputs], tf.constant_initializer(1.0), False)
# assign the moments
if not test:
assign_mean = mean.assign(conv_mean)
assign_variance = variance.assign(conv_variance)
conv_bn = tf.mul((conv - conv_mean), tf.rsqrt(conv_variance + eps), name=scope.name+"_bn")
else:
conv_bn = tf.mul((conv - mean), tf.rsqrt(variance + eps), name=scope.name+"_bn")
beta = _variable_on_cpu("beta", [n_outputs], tf.constant_initializer(0.0))
gamma = _variable_on_cpu("gamma", [n_outputs], tf.constant_initializer(1.0))
bn = tf.add(tf.mul(conv_bn, gamma), beta)
# output = tf.nn.relu(bn, name=scope.name)
output = randomized_relu(bn, .1, name=scope.name, is_training=(not test))
if not test:
output = control_flow_ops.with_dependencies(dependencies=[assign_mean, assign_variance], output_tensor=output)
_activation_summary(output)
return output
示例5: _resource_apply_dense
def _resource_apply_dense(self, grad, var):
grad_squared = tf.square(grad) + 1e-30
grad_squared_mean = tf.reduce_mean(grad_squared)
decay_rate = self._decay_rate
update_scale = self._learning_rate
if self._multiply_by_parameter_scale:
update_scale *= self._parameter_scale(var)
# HACK: Make things dependent on grad.
# This confounds the XLA rewriter and keeps it from fusing computations
# across different variables. This fusion is a bad for HBM usage, since
# it causes the gradients to persist in memory.
decay_rate += grad_squared_mean * 1e-30
update_scale += grad_squared_mean * 1e-30
# END HACK
mixing_rate = 1.0 - decay_rate
shape = var.get_shape().as_list()
updates = []
if self._should_use_factored_second_moment_estimate(shape):
grad_squared_row_mean = tf.reduce_mean(grad_squared, 1)
grad_squared_col_mean = tf.reduce_mean(grad_squared, 0)
vr = self.get_slot(var, "vr")
new_vr = (decay_rate * vr + mixing_rate * grad_squared_row_mean)
vc = self.get_slot(var, "vc")
new_vc = (decay_rate * vc + mixing_rate * grad_squared_col_mean)
vr_update = tf.assign(vr, new_vr, use_locking=self._use_locking)
vc_update = tf.assign(vc, new_vc, use_locking=self._use_locking)
updates = [vr_update, vc_update]
long_term_mean = tf.reduce_mean(new_vr)
r_factor = tf.rsqrt(new_vr / long_term_mean)
c_factor = tf.rsqrt(new_vc)
x = grad * tf.expand_dims(r_factor, 1) * tf.expand_dims(c_factor, 0)
else:
v = self.get_slot(var, "v")
new_v = decay_rate * v + mixing_rate * grad_squared
v_update = tf.assign(v, new_v, use_locking=self._use_locking)
updates = [v_update]
x = grad * tf.rsqrt(new_v)
if self._clipping_threshold is not None:
clipping_denom = tf.maximum(1.0, reduce_rms(x) / self._clipping_threshold)
x /= clipping_denom
subtrahend = update_scale * x
if self._beta1:
m = self.get_slot(var, "m")
new_m = self._beta1 * m + (1.0 - self._beta1) * subtrahend
updates.append(tf.assign(m, new_m, use_locking=self._use_locking))
subtrahend = new_m
var_update = tf.assign_sub(var, subtrahend, use_locking=self._use_locking)
updates = [var_update] + updates
return tf.group(*updates)
示例6: l2_normalize
def l2_normalize(incoming, dim, epsilon=1e-12, name="l2_normalize"):
""" L2 Normalization.
Normalizes along dimension `dim` using an L2 norm.
For a 1-D tensor with `dim = 0`, computes
```
output = x / sqrt(max(sum(x**2), epsilon))
```
For `x` with more dimensions, independently normalizes each 1-D slice along
dimension `dim`.
Arguments:
incoming: `Tensor`. Incoming Tensor.
dim: `int`. Dimension along which to normalize.
epsilon: `float`. A lower bound value for the norm. Will use
`sqrt(epsilon)` as the divisor if `norm < sqrt(epsilon)`.
name: `str`. A name for this layer (optional).
Returns:
A `Tensor` with the same shape as `x`.
"""
with tf.variable_op_scope([incoming], name) as name:
x = tf.ops.convert_to_tensor(incoming, name="x")
square_sum = tf.reduce_sum(tf.square(x), [dim], keep_dims=True)
x_inv_norm = tf.rsqrt(tf.maximum(square_sum, epsilon))
return tf.mul(x, x_inv_norm, name=name)
示例7: full_batchnorm
def full_batchnorm(pre_activations, batch, epsilon=1e-8, train=True,
beta_init=tf.constant_initializer(0),
gamma_init=tf.constant_initializer(1)):
"""Does full batch normalisation of pre activations.
Expects to get given something pre-nonlinearity.
This is only set up for feed forward nets, in order to work properly for
recurrent nets we will need to know what step we are up to, as in the
paper they calculate population statistics at every time step.
Args:
pre_activations: the logits who will be normalised. We assume this is
of shape [batch_size, num_units]
batch: the data which generated the logits, which we need to calculate
statistics used to normalise.
train: if true, the statistics will be recalculated for each batch. If not,
then the average from the training phase will be used.
Returns:
batch normalised activations.
"""
# get beta and gamma
num_units = pre_activations.get_shape()[0]
beta = tf.get_variable('beta', [num_units])
gamma = tf.get_variable('gamma', [num_units])
mean, variance = tf.nn.moments(pre_activations, [0])
isqr = tf.rsqrt(variance+epsilon)
centered = pre_activations - mean
return beta + gamma * centered * isqr
示例8: ae_latent_softmax
def ae_latent_softmax(latents_pred, latents_discrete_hot, vocab_size, hparams):
"""Latent prediction and loss.
Args:
latents_pred: Tensor of shape [..., depth].
latents_discrete_hot: Tensor of shape [..., vocab_size].
vocab_size: an int representing the vocab size.
hparams: tf.contrib.training.HParams.
Returns:
sample: Tensor of shape [...], a sample from a multinomial distribution.
loss: Tensor of shape [...], the softmax cross-entropy.
"""
with tf.variable_scope("latent_logits"):
latents_logits = tf.layers.dense(latents_pred, vocab_size,
name="logits_dense")
if hparams.logit_normalization:
latents_logits *= tf.rsqrt(1e-8 +
tf.reduce_mean(tf.square(latents_logits)))
loss = tf.nn.softmax_cross_entropy_with_logits_v2(
labels=latents_discrete_hot, logits=latents_logits)
# TODO(trandustin): tease this out from ae_latent_softmax.
# we use just the loss portion to anchor prior / encoder on text.
sample = multinomial_sample(latents_logits,
vocab_size,
hparams.sampling_method,
hparams.sampling_temp)
return sample, loss
示例9: BatchClipByL2norm
def BatchClipByL2norm(t, upper_bound, name=None):
"""Clip an array of tensors by L2 norm.
Shrink each dimension-0 slice of tensor (for matrix it is each row) such
that the l2 norm is at most upper_bound. Here we clip each row as it
corresponds to each example in the batch.
Args:
t: the input tensor.
upper_bound: the upperbound of the L2 norm.
name: optional name.
Returns:
the clipped tensor.
"""
assert upper_bound > 0
with tf.op_scope([t, upper_bound], name, "batch_clip_by_l2norm") as name:
saved_shape = tf.shape(t)
batch_size = tf.slice(saved_shape, [0], [1])
t2 = tf.reshape(t, tf.concat(0, [batch_size, [-1]]))
upper_bound_inv = tf.fill(tf.slice(saved_shape, [0], [1]),
tf.constant(1.0/upper_bound))
# Add a small number to avoid divide by 0
l2norm_inv = tf.rsqrt(tf.reduce_sum(t2 * t2, [1]) + 0.000001)
scale = tf.minimum(l2norm_inv, upper_bound_inv) * upper_bound
clipped_t = tf.matmul(tf.diag(scale), t2)
clipped_t = tf.reshape(clipped_t, saved_shape, name=name)
return clipped_t
示例10: compute_next_h_d
def compute_next_h_d(self, meta_opt, w_bot, w_top, bias, x, z, d, backward_w):
""" Propogate error back down the network while computing hidden state.
"""
if z is None:
z = x
h = meta_opt.compute_h(x, z, d, bias, w_bot,
w_top) # [bs x 60 x h_channels]
# compute the next d
delta = meta_opt.next_delta(z, h, d)
if backward_w is not None:
def delta_matmul(w, delta):
d = tf.transpose(delta, [0, 2, 1]) # [bs x delta_channels x n_units)
d = snt.BatchApply(lambda x: tf.matmul(x, w, transpose_b=True))(d)
d = tf.transpose(d, [0, 2, 1])
return d
# replace the "backward pass" with a random matrix.
d = delta_matmul(backward_w, delta) # [bs x 60 x delta_channels]
var = tf.reduce_mean(tf.square(d), [2], keepdims=True)
d = d * tf.rsqrt(1e-6 + var)
return h, d
示例11: _norm
def _norm(x, g=None, b=None, e=1e-5, axis=[1]):
u = tf.reduce_mean(x, axis=axis, keepdims=True)
s = tf.reduce_mean(tf.square(x-u), axis=axis, keepdims=True)
x = (x - u) * tf.rsqrt(s + e)
if g is not None and b is not None:
x = x*g + b
return x
示例12: multihead_attn
def multihead_attn(q, k, v):
# q, k, v have shape [batch, heads, sequence, features]
w = tf.matmul(q, k, transpose_b=True)
w = w * tf.rsqrt(tf.cast(v.shape[-1].value, w.dtype))
w = tf.nn.softmax(w)
a = tf.matmul(w, v)
return a
示例13: simple_attention
def simple_attention(target, source, bias=None, summaries=True):
"""A simple attention function.
Args:
target: a `Tensor` with shape `[batch, target_timesteps, depth]` or
`[batch, target_timesteps_1, target_timesteps_2, depth]`
source: a `Tensor` with shape `[batch, source_timesteps, depth]` or
`[batch, source_timesteps_1, source_timesteps_2, depth]`
bias: an optional `Tensor` with shape `[batch, timesteps, 1, 1]` used
to mask the attention to not attend to padding of input.
summaries: Boolean, whether to output summaries.
Returns:
a `Tensor` with same shape as `target`
"""
with tf.name_scope("simple_attention", [target, source]):
target_shape = tf.shape(target)
source_shape = tf.shape(source)
target = tf.reshape(target, [
target_shape[0], target_shape[1] * target_shape[2], target_shape[3]
])
source = tf.reshape(source, [
source_shape[0], source_shape[1] * source_shape[2], source_shape[3]
])
attention = tf.matmul(target, source, transpose_b=True)
attention *= tf.rsqrt(tf.to_float(tf.shape(target)[2]))
if bias is not None:
attention += tf.expand_dims(tf.squeeze(bias, axis=[2, 3]), axis=1)
attention = tf.nn.softmax(attention)
if summaries and not tf.get_variable_scope().reuse:
tf.summary.image("attention", tf.expand_dims(attention, 3), max_outputs=5)
attended = tf.matmul(attention, source)
return tf.reshape(attended, target_shape)
示例14: layer_norm
def layer_norm(x: tf.Tensor, epsilon: float = 1e-6) -> tf.Tensor:
"""Layer normalize the tensor x, averaging over the last dimension.
Implementation based on tensor2tensor.
Arguments:
x: The ``Tensor`` to normalize.
epsilon: The smoothing parameter of the normalization.
Returns:
The normalized tensor.
"""
with tf.variable_scope("LayerNorm"):
gamma = get_variable(
name="gamma",
shape=[x.get_shape()[-1]],
dtype=tf.float32,
initializer=tf.ones_initializer())
beta = get_variable(
name="beta",
shape=[x.get_shape()[-1]],
dtype=tf.float32,
initializer=tf.zeros_initializer())
mean = tf.reduce_mean(x, axis=[-1], keepdims=True)
variance = tf.reduce_mean(
tf.square(x - mean),
axis=[-1],
keepdims=True)
norm_x = (x - mean) * tf.rsqrt(variance + epsilon)
return norm_x * gamma + beta
示例15: _opsBatchNorm
def _opsBatchNorm(self, x, m, v, beta, gamma, epsilon,
scale_after_normalization):
y = (x - m) * tf.rsqrt(v + epsilon)
if scale_after_normalization:
y = gamma * y
y += beta
return y