Python tensorflow.rsqrt函数代码示例

本文整理汇总了Python中tensorflow.rsqrt函数的典型用法代码示例。如果您正苦于以下问题：Python rsqrt函数的具体用法？Python rsqrt怎么用？Python rsqrt使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了rsqrt函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _apply_noisy_update

  def _apply_noisy_update(self, mom, grad, var):
    # Compute and apply the gradient update following
    # preconditioned Langevin dynamics
    stddev = tf.where(
        tf.squeeze(self._counter > self._burnin),
        tf.cast(tf.rsqrt(self._learning_rate), grad.dtype),
        tf.zeros([], grad.dtype))
    # Keep an exponentially weighted moving average of squared gradients.
    # Not thread safe
    decay_tensor = tf.cast(self._decay_tensor, grad.dtype)
    new_mom = decay_tensor * mom + (1. - decay_tensor) * tf.square(grad)
    preconditioner = tf.rsqrt(
        new_mom + tf.cast(self._diagonal_bias, grad.dtype))

    # Compute gradients of the preconsitionaer
    _, preconditioner_grads = diag_jacobian(
        xs=var,
        ys=preconditioner,
        parallel_iterations=self._parallel_iterations)

    mean = 0.5 * (preconditioner * grad *
                  tf.cast(self._data_size, grad.dtype)
                  - preconditioner_grads[0])
    stddev *= tf.sqrt(preconditioner)
    result_shape = tf.broadcast_dynamic_shape(tf.shape(mean),
                                              tf.shape(stddev))
    with tf.control_dependencies([tf.assign(mom, new_mom)]):
      return tf.random_normal(shape=result_shape,
                              mean=mean,
                              stddev=stddev,
                              dtype=grad.dtype)

开发者ID:asudomoeva，项目名称:probability，代码行数:31，代码来源:sgld.py

示例2: batch_normalized_linear_layer

def batch_normalized_linear_layer(state_below, scope_name, n_inputs, n_outputs, stddev, wd, eps=.00001, test=False):
    """
    A linear layer with batch normalization
    """
    with tf.variable_scope(scope_name) as scope:
        weight = _variable_with_weight_decay(
            "weights", shape=[n_inputs, n_outputs],
            stddev=stddev, wd=wd
        )
        act = tf.matmul(state_below, weight)
        # get moments
        act_mean, act_variance = tf.nn.moments(act, [0])
        # get mean and variance variables
        mean = _variable_on_cpu('bn_mean', [n_outputs], tf.constant_initializer(0.0), trainable=False)
        variance = _variable_on_cpu('bn_variance', [n_outputs], tf.constant_initializer(1.0), trainable=False)
        # assign the moments

        if not test:
            assign_mean = mean.assign(act_mean)
            assign_variance = variance.assign(act_variance)
            act_bn = tf.mul((act - act_mean), tf.rsqrt(act_variance + eps), name=scope.name+"_bn")
        else:
            act_bn = tf.mul((act - mean), tf.rsqrt(variance + eps), name=scope.name+"_bn")

        beta = _variable_on_cpu("beta", [n_outputs], tf.constant_initializer(0.0))
        gamma = _variable_on_cpu("gamma", [n_outputs], tf.constant_initializer(1.0))
        bn = tf.add(tf.mul(act_bn, gamma), beta)
        # output = tf.nn.relu(bn, name=scope.name)
        output = randomized_relu(bn, .1, name=scope.name, is_training=(not test))
        if not test:
            output = control_flow_ops.with_dependencies(dependencies=[assign_mean, assign_variance], output_tensor=output)
        _activation_summary(output)
    return output

开发者ID:Greyh4t，项目名称:captcha_crusher，代码行数:33，代码来源:layers.py

示例3: cosine_distances

def cosine_distances(test, support):
  """Computes pairwise cosine distances between provided tensors

  Parameters
  ----------
  test: tf.Tensor
    Of shape (n_test, n_feat)
  support: tf.Tensor
    Of shape (n_support, n_feat)

  Returns
  -------
  tf.Tensor:
    Of shape (n_test, n_support)
  """
  rnorm_test = tf.rsqrt(
      tf.reduce_sum(tf.square(test), 1, keep_dims=True)) + 1e-7
  rnorm_support = tf.rsqrt(
      tf.reduce_sum(tf.square(support), 1, keep_dims=True)) + 1e-7
  test_normalized = test * rnorm_test
  support_normalized = support * rnorm_support

  # Transpose for mul
  support_normalized_t = tf.transpose(support_normalized, perm=[1, 0])
  g = tf.matmul(test_normalized, support_normalized_t)  # Gram matrix
  return g

开发者ID:joegomes，项目名称:deepchem，代码行数:26，代码来源:model_ops.py

示例4: batch_normalized_conv_layer

def batch_normalized_conv_layer(state_below, scope_name, n_inputs, n_outputs, filter_shape, stddev, wd, eps=.00001, test=False):
    """
    Convolutional layer with batch normalization
    """
    with tf.variable_scope(scope_name) as scope:
        kernel = _variable_with_weight_decay(
            "weights", shape=[filter_shape[0], filter_shape[1], n_inputs, n_outputs],
            stddev=stddev, wd=wd
        )
        conv = tf.nn.conv2d(state_below, kernel, [1, 1, 1, 1], padding='SAME')
        # get moments
        conv_mean, conv_variance = tf.nn.moments(conv, [0, 1, 2])
        # get mean and variance variables
        mean = _variable_on_cpu("bn_mean", [n_outputs], tf.constant_initializer(0.0), False)
        variance = _variable_on_cpu("bn_variance", [n_outputs], tf.constant_initializer(1.0), False)
        # assign the moments

        if not test:
            assign_mean = mean.assign(conv_mean)
            assign_variance = variance.assign(conv_variance)
            conv_bn = tf.mul((conv - conv_mean), tf.rsqrt(conv_variance + eps), name=scope.name+"_bn")
        else:
            conv_bn = tf.mul((conv - mean), tf.rsqrt(variance + eps), name=scope.name+"_bn")

        beta = _variable_on_cpu("beta", [n_outputs], tf.constant_initializer(0.0))
        gamma = _variable_on_cpu("gamma", [n_outputs], tf.constant_initializer(1.0))
        bn = tf.add(tf.mul(conv_bn, gamma), beta)
        # output = tf.nn.relu(bn, name=scope.name)
        output = randomized_relu(bn, .1, name=scope.name, is_training=(not test))
        if not test:
            output = control_flow_ops.with_dependencies(dependencies=[assign_mean, assign_variance], output_tensor=output)
        _activation_summary(output)

    return output

开发者ID:Greyh4t，项目名称:captcha_crusher，代码行数:34，代码来源:layers.py

示例5: _resource_apply_dense

 def _resource_apply_dense(self, grad, var):
   grad_squared = tf.square(grad) + 1e-30
   grad_squared_mean = tf.reduce_mean(grad_squared)
   decay_rate = self._decay_rate
   update_scale = self._learning_rate
   if self._multiply_by_parameter_scale:
     update_scale *= self._parameter_scale(var)
   # HACK: Make things dependent on grad.
   # This confounds the XLA rewriter and keeps it from fusing computations
   # across different variables.  This fusion is a bad for HBM usage, since
   # it causes the gradients to persist in memory.
   decay_rate += grad_squared_mean * 1e-30
   update_scale += grad_squared_mean * 1e-30
   # END HACK
   mixing_rate = 1.0 - decay_rate
   shape = var.get_shape().as_list()
   updates = []
   if self._should_use_factored_second_moment_estimate(shape):
     grad_squared_row_mean = tf.reduce_mean(grad_squared, 1)
     grad_squared_col_mean = tf.reduce_mean(grad_squared, 0)
     vr = self.get_slot(var, "vr")
     new_vr = (decay_rate * vr + mixing_rate * grad_squared_row_mean)
     vc = self.get_slot(var, "vc")
     new_vc = (decay_rate * vc + mixing_rate * grad_squared_col_mean)
     vr_update = tf.assign(vr, new_vr, use_locking=self._use_locking)
     vc_update = tf.assign(vc, new_vc, use_locking=self._use_locking)
     updates = [vr_update, vc_update]
     long_term_mean = tf.reduce_mean(new_vr)
     r_factor = tf.rsqrt(new_vr / long_term_mean)
     c_factor = tf.rsqrt(new_vc)
     x = grad * tf.expand_dims(r_factor, 1) * tf.expand_dims(c_factor, 0)
   else:
     v = self.get_slot(var, "v")
     new_v = decay_rate * v + mixing_rate * grad_squared
     v_update = tf.assign(v, new_v, use_locking=self._use_locking)
     updates = [v_update]
     x = grad * tf.rsqrt(new_v)
   if self._clipping_threshold is not None:
     clipping_denom = tf.maximum(1.0, reduce_rms(x) / self._clipping_threshold)
     x /= clipping_denom
   subtrahend = update_scale * x
   if self._beta1:
     m = self.get_slot(var, "m")
     new_m = self._beta1 * m + (1.0 - self._beta1) * subtrahend
     updates.append(tf.assign(m, new_m, use_locking=self._use_locking))
     subtrahend = new_m
   var_update = tf.assign_sub(var, subtrahend, use_locking=self._use_locking)
   updates = [var_update] + updates
   return tf.group(*updates)

开发者ID:chqiwang，项目名称:tensor2tensor，代码行数:49，代码来源:adafactor.py

示例6: l2_normalize

def l2_normalize(incoming, dim, epsilon=1e-12, name="l2_normalize"):
    """ L2 Normalization.

    Normalizes along dimension `dim` using an L2 norm.

    For a 1-D tensor with `dim = 0`, computes
    ```
    output = x / sqrt(max(sum(x**2), epsilon))
    ```

    For `x` with more dimensions, independently normalizes each 1-D slice along
    dimension `dim`.

    Arguments:
        incoming: `Tensor`. Incoming Tensor.
        dim: `int`. Dimension along which to normalize.
        epsilon: `float`. A lower bound value for the norm. Will use
            `sqrt(epsilon)` as the divisor if `norm < sqrt(epsilon)`.
        name: `str`. A name for this layer (optional).

    Returns:
      A `Tensor` with the same shape as `x`.
    """
    with tf.variable_op_scope([incoming], name) as name:
        x = tf.ops.convert_to_tensor(incoming, name="x")
        square_sum = tf.reduce_sum(tf.square(x), [dim], keep_dims=True)
        x_inv_norm = tf.rsqrt(tf.maximum(square_sum, epsilon))

    return tf.mul(x, x_inv_norm, name=name)

开发者ID:MLDL，项目名称:tflearn，代码行数:29，代码来源:normalization.py

示例7: full_batchnorm

def full_batchnorm(pre_activations, batch, epsilon=1e-8, train=True,
                   beta_init=tf.constant_initializer(0),
                   gamma_init=tf.constant_initializer(1)):
    """Does full batch normalisation of pre activations.
    Expects to get given something pre-nonlinearity.

    This is only set up for feed forward nets, in order to work properly for
    recurrent nets we will need to know what step we are up to, as in the 
    paper they calculate population statistics at every time step.

    Args:
      pre_activations: the logits who will be normalised. We assume this is
        of shape [batch_size, num_units]
      batch: the data which generated the logits, which we need to calculate
        statistics used to normalise.
      train: if true, the statistics will be recalculated for each batch. If not,
        then the average from the training phase will be used.

    Returns:
      batch normalised activations.
    """
    # get beta and gamma
    num_units = pre_activations.get_shape()[0]
    beta = tf.get_variable('beta', [num_units])
    gamma = tf.get_variable('gamma', [num_units])
    mean, variance = tf.nn.moments(pre_activations, [0])
    isqr = tf.rsqrt(variance+epsilon)
    centered = pre_activations - mean
    return beta + gamma * centered * isqr

开发者ID:PFCM，项目名称:weightnorm，代码行数:29，代码来源:weightnorm.py

示例8: ae_latent_softmax

def ae_latent_softmax(latents_pred, latents_discrete_hot, vocab_size, hparams):
  """Latent prediction and loss.

  Args:
    latents_pred: Tensor of shape [..., depth].
    latents_discrete_hot: Tensor of shape [..., vocab_size].
    vocab_size: an int representing the vocab size.
    hparams: tf.contrib.training.HParams.

  Returns:
    sample: Tensor of shape [...], a sample from a multinomial distribution.
    loss: Tensor of shape [...], the softmax cross-entropy.
  """
  with tf.variable_scope("latent_logits"):
    latents_logits = tf.layers.dense(latents_pred, vocab_size,
                                     name="logits_dense")
    if hparams.logit_normalization:
      latents_logits *= tf.rsqrt(1e-8 +
                                 tf.reduce_mean(tf.square(latents_logits)))
    loss = tf.nn.softmax_cross_entropy_with_logits_v2(
        labels=latents_discrete_hot, logits=latents_logits)

    # TODO(trandustin): tease this out from ae_latent_softmax.
    # we use just the loss portion to anchor prior / encoder on text.
    sample = multinomial_sample(latents_logits,
                                vocab_size,
                                hparams.sampling_method,
                                hparams.sampling_temp)
    return sample, loss

开发者ID:qixiuai，项目名称:tensor2tensor，代码行数:29，代码来源:latent_layers.py

示例9: BatchClipByL2norm

def BatchClipByL2norm(t, upper_bound, name=None):
  """Clip an array of tensors by L2 norm.

  Shrink each dimension-0 slice of tensor (for matrix it is each row) such
  that the l2 norm is at most upper_bound. Here we clip each row as it
  corresponds to each example in the batch.

  Args:
    t: the input tensor.
    upper_bound: the upperbound of the L2 norm.
    name: optional name.
  Returns:
    the clipped tensor.
  """

  assert upper_bound > 0
  with tf.op_scope([t, upper_bound], name, "batch_clip_by_l2norm") as name:
    saved_shape = tf.shape(t)
    batch_size = tf.slice(saved_shape, [0], [1])
    t2 = tf.reshape(t, tf.concat(0, [batch_size, [-1]]))
    upper_bound_inv = tf.fill(tf.slice(saved_shape, [0], [1]),
                              tf.constant(1.0/upper_bound))
    # Add a small number to avoid divide by 0
    l2norm_inv = tf.rsqrt(tf.reduce_sum(t2 * t2, [1]) + 0.000001)
    scale = tf.minimum(l2norm_inv, upper_bound_inv) * upper_bound
    clipped_t = tf.matmul(tf.diag(scale), t2)
    clipped_t = tf.reshape(clipped_t, saved_shape, name=name)
  return clipped_t

开发者ID:Peratham，项目名称:models，代码行数:28，代码来源:utils.py

示例10: compute_next_h_d

  def compute_next_h_d(self, meta_opt, w_bot, w_top, bias, x, z, d, backward_w):
    """ Propogate error back down the network while computing hidden state.
    """
    if z is None:
      z = x

    h = meta_opt.compute_h(x, z, d, bias, w_bot,
                           w_top)  # [bs x 60 x h_channels]

    # compute the next d
    delta = meta_opt.next_delta(z, h, d)

    if backward_w is not None:

      def delta_matmul(w, delta):
        d = tf.transpose(delta, [0, 2, 1])  # [bs x delta_channels x n_units)
        d = snt.BatchApply(lambda x: tf.matmul(x, w, transpose_b=True))(d)
        d = tf.transpose(d, [0, 2, 1])
        return d

      # replace the "backward pass" with a random matrix.
      d = delta_matmul(backward_w, delta)  # [bs x 60 x delta_channels]
      var = tf.reduce_mean(tf.square(d), [2], keepdims=True)
      d = d * tf.rsqrt(1e-6 + var)

    return h, d

开发者ID:ALISCIFP，项目名称:models，代码行数:26，代码来源:more_local_weight_update.py

示例11: _norm

 def _norm(x, g=None, b=None, e=1e-5, axis=[1]):
     u = tf.reduce_mean(x, axis=axis, keepdims=True)
     s = tf.reduce_mean(tf.square(x-u), axis=axis, keepdims=True)
     x = (x - u) * tf.rsqrt(s + e)
     if g is not None and b is not None:
         x = x*g + b
     return x

开发者ID:dpressel，项目名称:baseline，代码行数:7，代码来源:tfy.py

示例12: multihead_attn

def multihead_attn(q, k, v):
  # q, k, v have shape [batch, heads, sequence, features]
  w = tf.matmul(q, k, transpose_b=True)
  w = w * tf.rsqrt(tf.cast(v.shape[-1].value, w.dtype))
  w = tf.nn.softmax(w)
  a = tf.matmul(w, v)
  return a

开发者ID:girving，项目名称:pentago，代码行数:7，代码来源:model.py

示例13: simple_attention

def simple_attention(target, source, bias=None, summaries=True):
  """A simple attention function.

  Args:
    target: a `Tensor` with shape `[batch, target_timesteps, depth]` or
     `[batch, target_timesteps_1, target_timesteps_2, depth]`
    source: a `Tensor` with shape `[batch, source_timesteps, depth]` or
     `[batch, source_timesteps_1, source_timesteps_2, depth]`
    bias: an optional `Tensor` with shape `[batch, timesteps, 1, 1]` used
     to mask the attention to not attend to padding of input.
    summaries: Boolean, whether to output summaries.

  Returns:
    a `Tensor` with same shape as `target`
  """
  with tf.name_scope("simple_attention", [target, source]):
    target_shape = tf.shape(target)
    source_shape = tf.shape(source)
    target = tf.reshape(target, [
        target_shape[0], target_shape[1] * target_shape[2], target_shape[3]
    ])
    source = tf.reshape(source, [
        source_shape[0], source_shape[1] * source_shape[2], source_shape[3]
    ])
    attention = tf.matmul(target, source, transpose_b=True)
    attention *= tf.rsqrt(tf.to_float(tf.shape(target)[2]))
    if bias is not None:
      attention += tf.expand_dims(tf.squeeze(bias, axis=[2, 3]), axis=1)
    attention = tf.nn.softmax(attention)
    if summaries and not tf.get_variable_scope().reuse:
      tf.summary.image("attention", tf.expand_dims(attention, 3), max_outputs=5)
    attended = tf.matmul(attention, source)
    return tf.reshape(attended, target_shape)

开发者ID:TrunksLegendary，项目名称:tensor2tensor，代码行数:33，代码来源:common_layers.py

示例14: layer_norm

def layer_norm(x: tf.Tensor, epsilon: float = 1e-6) -> tf.Tensor:
    """Layer normalize the tensor x, averaging over the last dimension.

    Implementation based on tensor2tensor.

    Arguments:
        x: The ``Tensor`` to normalize.
        epsilon: The smoothing parameter of the normalization.

    Returns:
        The normalized tensor.
    """
    with tf.variable_scope("LayerNorm"):
        gamma = get_variable(
            name="gamma",
            shape=[x.get_shape()[-1]],
            dtype=tf.float32,
            initializer=tf.ones_initializer())
        beta = get_variable(
            name="beta",
            shape=[x.get_shape()[-1]],
            dtype=tf.float32,
            initializer=tf.zeros_initializer())

        mean = tf.reduce_mean(x, axis=[-1], keepdims=True)
        variance = tf.reduce_mean(
            tf.square(x - mean),
            axis=[-1],
            keepdims=True)
        norm_x = (x - mean) * tf.rsqrt(variance + epsilon)
        return norm_x * gamma + beta

开发者ID:ufal，项目名称:neuralmonkey，代码行数:31，代码来源:tf_utils.py

示例15: _opsBatchNorm

 def _opsBatchNorm(self, x, m, v, beta, gamma, epsilon,
                   scale_after_normalization):
   y = (x - m) * tf.rsqrt(v + epsilon)
   if scale_after_normalization:
     y = gamma * y
   y += beta
   return y

开发者ID:DapengLan，项目名称:tensorflow，代码行数:7，代码来源:nn_test.py

注：本文中的tensorflow.rsqrt函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。