当前位置: 首页>>代码示例>>Python>>正文


Python math_ops.minimum函数代码示例

本文整理汇总了Python中tensorflow.python.ops.math_ops.minimum函数的典型用法代码示例。如果您正苦于以下问题:Python minimum函数的具体用法?Python minimum怎么用?Python minimum使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了minimum函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: weighted_resample

def weighted_resample(inputs, weights, overall_rate, scope=None,
                      mean_decay=0.999, warmup=10, seed=None):
  """Performs an approximate weighted resampling of `inputs`.

  This method chooses elements from `inputs` where each item's rate of
  selection is proportional to its value in `weights`, and the average
  rate of selection across all inputs (and many invocations!) is
  `overall_rate`.

  Args:
    inputs: A list of tensors whose first dimension is `batch_size`.
    weights: A `[batch_size]`-shaped tensor with each batch member's weight.
    overall_rate: Desired overall rate of resampling.
    scope: Scope to use for the op.
    mean_decay: How quickly to decay the running estimate of the mean weight.
    warmup: Until the resulting tensor has been evaluated `warmup`
      times, the resampling menthod uses the true mean over all calls
      as its weight estimate, rather than a decayed mean.
    seed: Random seed.

  Returns:
    A list of tensors exactly like `inputs`, but with an unknown (and
      possibly zero) first dimension.
    A tensor containing the effective resampling rate used for each output.

  """
  # Algorithm: Just compute rates as weights/mean_weight *
  # overall_rate. This way the average weight corresponds to the
  # overall rate, and a weight twice the average has twice the rate,
  # etc.
  with ops.name_scope(scope, 'weighted_resample', inputs) as opscope:
    # First: Maintain a running estimated mean weight, with decay
    # adjusted (by also maintaining an invocation count) during the
    # warmup period so that at the beginning, there aren't too many
    # zeros mixed in, throwing the average off.

    with variable_scope.variable_scope(scope, 'estimate_mean', inputs):
      count_so_far = variable_scope.get_local_variable(
          'resample_count', initializer=0)

      estimated_mean = variable_scope.get_local_variable(
          'estimated_mean', initializer=0.0)

      count = count_so_far.assign_add(1)
      real_decay = math_ops.minimum(
          math_ops.truediv((count - 1), math_ops.minimum(count, warmup)),
          mean_decay)

      batch_mean = math_ops.reduce_mean(weights)
      mean = moving_averages.assign_moving_average(
          estimated_mean, batch_mean, real_decay, zero_debias=False)

    # Then, normalize the weights into rates using the mean weight and
    # overall target rate:
    rates = weights * overall_rate / mean

    results = resample_at_rate([rates] + inputs, rates,
                               scope=opscope, seed=seed, back_prop=False)

    return (results[1:], results[0])
开发者ID:ComeOnGetMe,项目名称:tensorflow,代码行数:60,代码来源:resample.py

示例2: _renorm_correction_and_moments

  def _renorm_correction_and_moments(self, mean, variance, training):
    """Returns the correction and update values for renorm."""
    stddev = math_ops.sqrt(variance + self.epsilon)
    # Compute the average mean and standard deviation, as if they were
    # initialized with this batch's moments.
    mixed_renorm_mean = (self.renorm_mean +
                         (1. - self.renorm_mean_weight) * mean)
    mixed_renorm_stddev = (self.renorm_stddev +
                           (1. - self.renorm_stddev_weight) * stddev)
    # Compute the corrections for batch renorm.
    r = stddev / mixed_renorm_stddev
    d = (mean - mixed_renorm_mean) / mixed_renorm_stddev
    # Ensure the corrections use pre-update moving averages.
    with ops.control_dependencies([r, d]):
      mean = array_ops.identity(mean)
      stddev = array_ops.identity(stddev)
    rmin, rmax, dmax = [self.renorm_clipping.get(key)
                        for key in ['rmin', 'rmax', 'dmax']]
    if rmin is not None:
      r = math_ops.maximum(r, rmin)
    if rmax is not None:
      r = math_ops.minimum(r, rmax)
    if dmax is not None:
      d = math_ops.maximum(d, -dmax)
      d = math_ops.minimum(d, dmax)
    # When not training, use r=1, d=0, and decay=1 meaning no updates.
    r = _smart_select(training, lambda: r, lambda: array_ops.ones_like(r))
    d = _smart_select(training, lambda: d, lambda: array_ops.zeros_like(d))
    decay = _smart_select(training, lambda: self.renorm_momentum, lambda: 1.)

    def _update_renorm_variable(var, weight, value):
      """Updates a moving average and weight, returns the unbiased value."""
      # Update the variables without zero debiasing. The debiasing will be
      # accomplished by dividing the exponential moving average by the weight.
      # For example, after a single update, the moving average would be
      # (1-decay) * value. and the weight will be 1-decay, with their ratio
      # giving value.
      # Make sure the weight is not updated until before r and d computation.
      value = array_ops.identity(value)
      with ops.control_dependencies([value]):
        weight_value = array_ops.constant(1., dtype=weight.dtype)
      new_var = moving_averages.assign_moving_average(
          var, value, decay, zero_debias=False)
      new_weight = moving_averages.assign_moving_average(
          weight, weight_value, decay, zero_debias=False)
      return new_var / new_weight

    with ops.colocate_with(self.moving_mean):
      new_mean = _update_renorm_variable(self.renorm_mean,
                                         self.renorm_mean_weight,
                                         mean)
    with ops.colocate_with(self.moving_variance):
      new_stddev = _update_renorm_variable(self.renorm_stddev,
                                           self.renorm_stddev_weight,
                                           stddev)
      # Make sqrt(moving_variance + epsilon) = new_stddev.
      new_variance = math_ops.square(new_stddev) - self.epsilon

    return (r, d, new_mean, new_variance)
开发者ID:ilya-edrenkin,项目名称:tensorflow,代码行数:59,代码来源:normalization.py

示例3: huber_loss

def huber_loss(labels, predictions, weights=1.0, delta=1.0, scope=None,
               loss_collection=ops.GraphKeys.LOSSES,
               reduction=Reduction.WEIGHTED_SUM_BY_NONZERO_WEIGHTS):
  """Adds a Huber Loss term to the training procedure.

  For each value x in `error=labels-predictions`, the following is calculated:

  ```
    0.5 * x^2                  if |x| <= d
    0.5 * d^2 + d * (|x| - d)  if |x| > d
  ```

  where d is `delta`.

  See: https://en.wikipedia.org/wiki/Huber_loss

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    delta: `float`, the point where the huber loss function
      changes from a quadratic to linear.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
  with ops.name_scope(scope, "huber_loss",
                      (predictions, labels, weights)) as scope:
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    error = math_ops.subtract(predictions, labels)
    abs_error = math_ops.abs(error)
    quadratic = math_ops.minimum(abs_error, delta)
    # The following expression is the same in value as
    # tf.maximum(abs_error - delta, 0), but importantly the gradient for the
    # expression when abs_error == delta is 0 (for tf.maximum it would be 1).
    # This is necessary to avoid doubling the gradient, since there is already a
    # nonzero contribution to the gradient from the quadratic term.
    linear = (abs_error - quadratic)
    losses = 0.5 * quadratic**2 + delta * linear
    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
开发者ID:LUTAN,项目名称:tensorflow,代码行数:60,代码来源:losses_impl.py

示例4: saturate_cast

def saturate_cast(image, dtype):
  """Performs a safe cast of image data to `dtype`.

  This function casts the data in image to `dtype`, without applying any
  scaling. If there is a danger that image data would over or underflow in the
  cast, this op applies the appropriate clamping before the cast.

  Args:
    image: An image to cast to a different data type.
    dtype: A `DType` to cast `image` to.

  Returns:
    `image`, safely cast to `dtype`.
  """
  clamped = image

  # When casting to a type with smaller representable range, clamp.
  # Note that this covers casting to unsigned types as well.
  if image.dtype.min < dtype.min and image.dtype.max > dtype.max:
    clamped = clip_ops.clip_by_value(clamped,
                                     math_ops.cast(dtype.min, image.dtype),
                                     math_ops.cast(dtype.max, image.dtype))
  elif image.dtype.min < dtype.min:
    clamped = math_ops.maximum(clamped, math_ops.cast(dtype.min, image.dtype))
  elif image.dtype.max > dtype.max:
    clamped = math_ops.minimum(clamped, math_ops.cast(dtype.max, image.dtype))

  return math_ops.cast(clamped, dtype)
开发者ID:DapengLan,项目名称:tensorflow,代码行数:28,代码来源:image_ops.py

示例5: __call__

  def __call__(self, shape, dtype=None, partition_info=None):
    if dtype is None:
      dtype = self.dtype
    # Check the shape
    if len(shape) < 2:
      raise ValueError("The tensor to initialize must be "
                       "at least two-dimensional")
    # Flatten the input shape with the last dimension remaining
    # its original shape so it works for conv2d
    num_rows = 1
    for dim in shape[:-1]:
      num_rows *= dim
    num_cols = shape[-1]
    flat_shape = (num_rows, num_cols)

    # Generate a random matrix
    a = random_ops.random_normal(flat_shape, dtype=dtype, seed=self.seed)
    # Compute the qr factorization
    q, r = linalg_ops.qr(a, full_matrices=False)
    # Make Q uniform
    square_len = math_ops.minimum(num_rows, num_cols)
    d = array_ops.diag_part(r[:square_len, :square_len])
    ph = d / math_ops.abs(d)
    q *= ph
    # Pad zeros to Q (if rows smaller than cols)
    if num_rows < num_cols:
      padding = array_ops.zeros([num_rows, num_cols - num_rows], dtype=dtype)
      q = array_ops.concat([q, padding], 1)
    return self.gain * array_ops.reshape(q, shape)
开发者ID:finardi,项目名称:tensorflow,代码行数:29,代码来源:init_ops.py

示例6: _adaptive_max_norm

def _adaptive_max_norm(norm, std_factor, decay, global_step, epsilon, name):
  """Find max_norm given norm and previous average."""
  with vs.variable_scope(name, "AdaptiveMaxNorm", [norm]):
    log_norm = math_ops.log(norm + epsilon)

    def moving_average(name, value, decay):
      moving_average_variable = vs.get_variable(
          name,
          shape=value.get_shape(),
          dtype=value.dtype,
          initializer=init_ops.zeros_initializer(),
          trainable=False)
      return moving_averages.assign_moving_average(
          moving_average_variable, value, decay, zero_debias=False)

    # quicker adaptation at the beginning
    if global_step is not None:
      n = math_ops.to_float(global_step)
      decay = math_ops.minimum(decay, n / (n + 1.))

    # update averages
    mean = moving_average("mean", log_norm, decay)
    sq_mean = moving_average("sq_mean", math_ops.square(log_norm), decay)

    variance = sq_mean - math_ops.square(mean)
    std = math_ops.sqrt(math_ops.maximum(epsilon, variance))
    max_norms = math_ops.exp(mean + std_factor * std)
    return max_norms, mean
开发者ID:AlbertXiebnu,项目名称:tensorflow,代码行数:28,代码来源:optimizers.py

示例7: clip_by_norm

def clip_by_norm(t, clip_norm, name=None):
  """Clips tensor values to a maximum L2-norm.

  Given a tensor `t`, and a maximum clip value `clip_norm`, this operation
  normalizes `t` so that its L2-norm is less than or equal to `clip_norm'.
  Specifically, if the L2-norm is already less than or equal to `clip_norm`,
  then `t` is not modified. If the L2-norm is greater than `clip_norm`, then
  this operation returns a tensor of the same type and shape as `t` with its
  values set to:

  `t * clip_norm / l2norm(t)`

  In this case, the L2-norm of the output tensor is `clip_norm`.

  This operation is typically used to clip gradients before applying them with
  an optimizer.

  Args:
    t: A `Tensor`.
    clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value.
    name: A name for the operation (optional).

  Returns:
    A clipped `Tensor`.
  """
  with ops.op_scope([t, clip_norm], name, "clip_by_norm") as name:
    t = ops.convert_to_tensor(t, name="t")

    # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
    l2norm_inv = math_ops.rsqrt(
        math_ops.reduce_sum(t * t, math_ops.range(array_ops.rank(t))))
    tclip = array_ops.identity(t * clip_norm * math_ops.minimum(
        l2norm_inv, constant_op.constant(1.0 / clip_norm)), name=name)

  return tclip
开发者ID:niclar,项目名称:tensorflow,代码行数:35,代码来源:clip_ops.py

示例8: _update_clip_coeff

  def _update_clip_coeff(self, grads_and_vars, precon_grads_and_vars):
    """Computes the scale factor for the update to satisfy the norm constraint.

    Defined as min(1, sqrt(c / r^T F r)), where c is the norm constraint,
    F is the approximate Fisher matrix, and r is the update vector, i.e.
    -alpha * v, where alpha is the learning rate, and v is the preconditioned
    gradient.

    This is based on Section 5 of Ba et al., Distributed Second-Order
    Optimization using Kronecker-Factored Approximations. Note that they
    absorb the learning rate alpha (which they denote eta_max) into the formula
    for the coefficient, while in our implementation, the rescaling is done
    before multiplying by alpha. Hence, our formula differs from theirs by a
    factor of alpha.

    Args:
      grads_and_vars: List of (gradient, variable) pairs.
      precon_grads_and_vars: List of (preconditioned gradient, variable) pairs.
        Must be the result of calling `self._fisher_est.multiply_inverse`
        on `grads_and_vars`.

    Returns:
      Scalar representing the coefficient which should be applied to the
      preconditioned gradients to satisfy the norm constraint.
    """
    sq_norm_grad = self._squared_fisher_norm(grads_and_vars,
                                             precon_grads_and_vars)
    sq_norm_up = sq_norm_grad * self._learning_rate**2
    return math_ops.minimum(1.,
                            math_ops.sqrt(self._norm_constraint / sq_norm_up))
开发者ID:ebrevdo,项目名称:tensorflow,代码行数:30,代码来源:optimizer.py

示例9: gradient_clipping

  def gradient_clipping(grads_and_vars):
    """Internal function for adaptive clipping."""
    grads, variables = zip(*grads_and_vars)

    norm = clip_ops.global_norm(grads)

    max_norm, log_mean = _adaptive_max_norm(norm, std_factor, decay,
                                            global_step, epsilon, name)

    # reports the max gradient norm for debugging
    if report_summary:
      summary.scalar("global_norm/adaptive_max_gradient_norm", max_norm)

    # factor will be 1. if norm is smaller than max_norm
    factor = array_ops.where(norm < max_norm,
                             array_ops.ones_like(norm),
                             math_ops.exp(log_mean) / norm)

    if static_max_norm is not None:
      factor = math_ops.minimum(static_max_norm / norm, factor)

    # apply factor
    clipped_grads = []
    for grad in grads:
      if grad is None:
        clipped_grads.append(None)
      elif isinstance(grad, ops.IndexedSlices):
        clipped_grads.append(
            ops.IndexedSlices(grad.values * factor, grad.indices,
                              grad.dense_shape))
      else:
        clipped_grads.append(grad * factor)

    return list(zip(clipped_grads, variables))
开发者ID:AlbertXiebnu,项目名称:tensorflow,代码行数:34,代码来源:optimizers.py

示例10: decayed_lr

  def decayed_lr(learning_rate, global_step, decay_steps, initial_variance,
                 variance_decay, num_periods, alpha, beta, name):
    """Helper to recompute learning rate; most helpful in eager-mode."""
    with ops.name_scope(name, "NoisyLinearCosineDecay",
                        [learning_rate, global_step]) as name:
      learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
      dtype = learning_rate.dtype
      decay_steps = math_ops.cast(decay_steps, dtype)
      initial_variance = math_ops.cast(initial_variance, dtype)
      variance_decay = math_ops.cast(variance_decay, dtype)
      num_periods = math_ops.cast(num_periods, dtype)
      alpha = math_ops.cast(alpha, dtype)
      beta = math_ops.cast(beta, dtype)

      global_step_recomp = math_ops.cast(global_step, dtype)
      global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
      linear_decayed = (decay_steps - global_step_recomp) / decay_steps
      variance = initial_variance / (
          math_ops.pow(1.0 + global_step_recomp, variance_decay))
      std = math_ops.sqrt(variance)
      noisy_linear_decayed = (
          linear_decayed + random_ops.random_normal(
              linear_decayed.shape, stddev=std))

      completed_fraction = global_step_recomp / decay_steps
      fraction = 2.0 * num_periods * completed_fraction
      cosine_decayed = 0.5 * (
          1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction))
      noisy_linear_cosine_decayed = (
          (alpha + noisy_linear_decayed) * cosine_decayed + beta)

      return math_ops.multiply(
          learning_rate, noisy_linear_cosine_decayed, name=name)
开发者ID:AnishShah,项目名称:tensorflow,代码行数:33,代码来源:learning_rate_decay_v2.py

示例11: huber_loss

def huber_loss(y_true, y_pred, delta=1.0):
  """Computes Huber loss value.

  For each value x in `error=y_true-y_pred`, the following is calculated:

  ```
  0.5 * x^2                  if |x| <= d
  0.5 * d^2 + d * (|x| - d)  if |x| > d
  ```
  where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss

  Args:
    y_true: tensor of true targets.
    y_pred: tensor of predicted targets.
    delta: A float, the point where the Huber loss function changes from a
      quadratic to linear.

  Returns:
    Tensor with one scalar loss entry per sample.
  """
  y_pred = math_ops.cast(y_pred, dtype=K.floatx())
  y_true = math_ops.cast(y_true, dtype=K.floatx())
  error = math_ops.subtract(y_pred, y_true)
  abs_error = math_ops.abs(error)
  quadratic = math_ops.minimum(abs_error, delta)
  linear = math_ops.subtract(abs_error, quadratic)
  return math_ops.add(
      math_ops.multiply(
          ops.convert_to_tensor(0.5, dtype=quadratic.dtype),
          math_ops.multiply(quadratic, quadratic)),
      math_ops.multiply(delta, linear))
开发者ID:adit-chandra,项目名称:tensorflow,代码行数:31,代码来源:losses.py

示例12: get_best

  def get_best(self, n):
    """Return the indices and values of the n highest scores in the TopN."""

    def refresh_shortlist():
      """Update the shortlist with the highest scores in id_to_score."""
      new_scores, new_ids = nn_ops.top_k(self.id_to_score, self.shortlist_size)
      smallest_new_score = math_ops.reduce_min(new_scores)
      new_length = math_ops.reduce_sum(
          math_ops.to_int32(math_ops.greater(new_scores, dtypes.float32.min)))
      u1 = self.sl_ids.assign(
          math_ops.to_int64(array_ops.concat([[new_length], new_ids], 0)))
      u2 = self.sl_scores.assign(
          array_ops.concat([[smallest_new_score], new_scores], 0))
      self.last_ops = [u1, u2]
      return control_flow_ops.group(u1, u2)

    # We only need to refresh the shortlist if n is greater than the
    # current shortlist size (which is stored in sl_ids[0]).
    with ops.control_dependencies(self.last_ops):
      cond_op = control_flow_ops.cond(n > self.sl_ids[0], refresh_shortlist,
                                      control_flow_ops.no_op)
      with ops.control_dependencies([cond_op]):
        topk_values, topk_indices = nn_ops.top_k(
            self.sl_scores,
            math_ops.minimum(n, math_ops.to_int32(self.sl_ids[0])))
        # topk_indices are the indices into the shortlist, we want to return
        # the indices into id_to_score
        gathered_indices = array_ops.gather(self.sl_ids, topk_indices)
        return gathered_indices, topk_values
开发者ID:AliMiraftab,项目名称:tensorflow,代码行数:29,代码来源:topn.py

示例13: __call__

  def __call__(self, step):
    with ops.name_scope(
        self.name, "PolynomialDecay",
        [self.initial_learning_rate, step, self.decay_steps,
         self.end_learning_rate, self.power]
    ) as name:
      initial_learning_rate = ops.convert_to_tensor(
          self.initial_learning_rate, name="initial_learning_rate")
      dtype = initial_learning_rate.dtype
      end_learning_rate = math_ops.cast(self.end_learning_rate, dtype)
      power = math_ops.cast(self.power, dtype)

      global_step_recomp = math_ops.cast(step, dtype)
      decay_steps_recomp = math_ops.cast(self.decay_steps, dtype)
      if self.cycle:
        # Find the first multiple of decay_steps that is bigger than
        # global_step. If global_step is zero set the multiplier to 1
        multiplier = control_flow_ops.cond(
            math_ops.equal(global_step_recomp, 0), lambda: 1.0,
            lambda: math_ops.ceil(global_step_recomp / self.decay_steps))
        decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier)
      else:
        # Make sure that the global_step used is not bigger than decay_steps.
        global_step_recomp = math_ops.minimum(global_step_recomp,
                                              self.decay_steps)

      p = math_ops.div(global_step_recomp, decay_steps_recomp)
      return math_ops.add(
          math_ops.multiply(initial_learning_rate - end_learning_rate,
                            math_ops.pow(1 - p, power)),
          end_learning_rate,
          name=name)
开发者ID:terrytangyuan,项目名称:tensorflow,代码行数:32,代码来源:learning_rate_schedule.py

示例14: clip_by_value

def clip_by_value(t, clip_value_min, clip_value_max,
                  name=None):
  """Clips tensor values to a specified min and max.

  Given a tensor `t`, this operation returns a tensor of the same type and
  shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`.
  Any values less than `clip_value_min` are set to `clip_value_min`. Any values
  greater than `clip_value_max` are set to `clip_value_max`.

  Args:
    t: A `Tensor`.
    clip_value_min: A 0-D (scalar) `Tensor`. The minimum value to clip by.
    clip_value_max: A 0-D (scalar) `Tensor`. The maximum value to clip by.
    name: A name for the operation (optional).

  Returns:
    A clipped `Tensor`.
  """
  with ops.name_scope(name, "clip_by_value",
                      [t, clip_value_min, clip_value_max]) as name:
    t = ops.convert_to_tensor(t, name="t")

    # Go through list of tensors, for each value in each tensor clip
    t_min = math_ops.minimum(t, clip_value_max)
    t_max = math_ops.maximum(t_min, clip_value_min, name=name)

  return t_max
开发者ID:821760408-sp,项目名称:tensorflow,代码行数:27,代码来源:clip_ops.py

示例15: _get_scores

def _get_scores(log_probs, sequence_lengths, length_penalty_weight,
                coverage_penalty_weight, finished, accumulated_attention_probs):
  """Calculates scores for beam search hypotheses.

  Args:
    log_probs: The log probabilities with shape
      `[batch_size, beam_width, vocab_size]`.
    sequence_lengths: The array of sequence lengths.
    length_penalty_weight: Float weight to penalize length. Disabled with 0.0.
    coverage_penalty_weight: Float weight to penalize the coverage of source
      sentence. Disabled with 0.0.
    finished: A boolean tensor of shape `[batch_size, beam_width]` that
      specifies which elements in the beam are finished already.
    accumulated_attention_probs: Accumulated attention probabilities up to the
      current time step, with shape `[batch_size, beam_width, max_time]` if
      coverage_penalty_weight is not 0.0.

  Returns:
    The scores normalized by the length_penalty and coverage_penalty.

  Raises:
    ValueError: accumulated_attention_probs is None when coverage penalty is
      enabled.
  """
  length_penalty_ = _length_penalty(
      sequence_lengths=sequence_lengths, penalty_factor=length_penalty_weight)
  length_penalty_ = math_ops.cast(length_penalty_, dtype=log_probs.dtype)
  scores = log_probs / length_penalty_

  coverage_penalty_weight = ops.convert_to_tensor(
      coverage_penalty_weight, name="coverage_penalty_weight")
  if coverage_penalty_weight.shape.ndims != 0:
    raise ValueError("coverage_penalty_weight should be a scalar, "
                     "but saw shape: %s" % coverage_penalty_weight.shape)

  if tensor_util.constant_value(coverage_penalty_weight) == 0.0:
    return scores

  if accumulated_attention_probs is None:
    raise ValueError(
        "accumulated_attention_probs can be None only if coverage penalty is "
        "disabled.")

  # Add source sequence length mask before computing coverage penalty.
  accumulated_attention_probs = array_ops.where(
      math_ops.equal(accumulated_attention_probs, 0.0),
      array_ops.ones_like(accumulated_attention_probs),
      accumulated_attention_probs)

  # coverage penalty =
  #     sum over `max_time` {log(min(accumulated_attention_probs, 1.0))}
  coverage_penalty = math_ops.reduce_sum(
      math_ops.log(math_ops.minimum(accumulated_attention_probs, 1.0)), 2)
  # Apply coverage penalty to finished predictions.
  coverage_penalty *= math_ops.to_float(finished)
  weighted_coverage_penalty = coverage_penalty * coverage_penalty_weight
  # Reshape from [batch_size, beam_width] to [batch_size, beam_width, 1]
  weighted_coverage_penalty = array_ops.expand_dims(
      weighted_coverage_penalty, 2)
  return scores + weighted_coverage_penalty
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:60,代码来源:beam_search_decoder.py


注:本文中的tensorflow.python.ops.math_ops.minimum函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。