Python common_attention.attention_bias_ignore_padding方法代码示例

本文整理汇总了Python中tensor2tensor.layers.common_attention.attention_bias_ignore_padding方法的典型用法代码示例。如果您正苦于以下问题：Python common_attention.attention_bias_ignore_padding方法的具体用法？Python common_attention.attention_bias_ignore_padding怎么用？Python common_attention.attention_bias_ignore_padding使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensor2tensor.layers.common_attention的用法示例。

在下文中一共展示了common_attention.attention_bias_ignore_padding方法的8个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: prepare_question_encoder

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import attention_bias_ignore_padding [as 别名]
def prepare_question_encoder(inputs, hparams):
  """Prepare question encoder.

  Args:
    inputs: a Tensor.
    hparams: run hyperparameters

  Returns:
    encoder_input: a Tensor, bottom of encoder stack
    encoder_self_attention_bias: a bias tensor for use in encoder self-attention
  """
  encoder_input = inputs
  # Usual case - not a packed dataset.
  encoder_padding = common_attention.embedding_to_padding(encoder_input)
  ignore_padding = common_attention.attention_bias_ignore_padding(
      encoder_padding)
  encoder_self_attention_bias = ignore_padding
  if hparams.pos == "timing":
    encoder_input = common_attention.add_timing_signal_1d(encoder_input)
  elif hparams.pos == "emb":
    encoder_input = common_attention.add_positional_embedding(
        encoder_input, hparams.max_length, "inputs_positional_embedding",
        None)
  return (encoder_input, encoder_self_attention_bias)

开发者ID:tensorflow，项目名称:tensor2tensor，代码行数:26，代码来源:vqa_self_attention.py

示例2: sample_p

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import attention_bias_ignore_padding [as 别名]
def sample_p(
      self, targets_length, temp, check_invertibility=False, targets_mask=None,
      **kwargs):
    hparams = self._hparams
    if targets_mask is None:
      targets_mask = ops.sequence_mask(targets_length, hparams)
    decoder_self_attention_bias = (
        common_attention.attention_bias_ignore_padding(1.0 - targets_mask))
    batch_size, targets_max_length = (
        common_layers.shape_list(targets_mask)[:2])
    prior_shape = [batch_size, targets_max_length, hparams.latent_size]
    noise = tf.random.normal(prior_shape, stddev=temp)
    p_dist = None

    if hparams.prior_type == "standard_normal":
      z_p = noise
    elif hparams.prior_type == "diagonal_normal":
      diag_prior_params = ops.cond_prior(
          "diag_prior", hparams, tf.zeros(prior_shape), targets_mask,
          hparams.latent_size*2, decoder_self_attention_bias, **kwargs)
      p_dist = gops.diagonal_normal(diag_prior_params, "diag_prior")
      z_p = p_dist.loc + p_dist.scale * noise
    elif hparams.prior_type in ["affine", "additive", "rq"]:
      n_levels = len(hparams.depths.split("/"))
      divi = max(1, hparams.factor**(n_levels-1))
      flow_prior_shape = [
          batch_size, targets_max_length//divi, hparams.latent_size]
      noise = tf.random_normal(flow_prior_shape, stddev=temp)
      z_p, _, _, _ = glow.glow(
          "glow", noise, targets_mask, decoder_self_attention_bias,
          inverse=True, init=False, hparams=self._fparams,
          disable_dropout=True, temp=temp, **kwargs)
      if self.is_evaluating and check_invertibility:
        noise_inv, _, _, _ = glow.glow(
            "glow", z_p, targets_mask, decoder_self_attention_bias,
            inverse=False, init=False, hparams=self._fparams,
            disable_dropout=True, **kwargs)
        z_diff = noise - noise_inv
        tf.summary.scalar("flow_recon_inverse", tf.reduce_max(tf.abs(z_diff)))
    return z_p, p_dist

开发者ID:tensorflow，项目名称:tensor2tensor，代码行数:42，代码来源:transformer_vae_flow_prior.py

示例3: prepare_image_question_encoder

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import attention_bias_ignore_padding [as 别名]
def prepare_image_question_encoder(image_feat, question, hparams):
  """Prepare encoder.

  Args:
    image_feat: a Tensor.
    question: a Tensor.
    hparams: run hyperparameters

  Returns:
    encoder_input: a Tensor, bottom of encoder stack
    encoder_self_attention_bias: a bias tensor for use in encoder self-attention
  """

  encoder_input = tf.concat([image_feat, question], axis=1)
  encoder_padding = common_attention.embedding_to_padding(encoder_input)
  ignore_padding = common_attention.attention_bias_ignore_padding(
      encoder_padding)
  encoder_self_attention_bias = ignore_padding
  encoder_decoder_attention_bias = ignore_padding
  # Usual case - not a packed dataset.
  if hparams.pos == "timing":
    question = common_attention.add_timing_signal_1d(question)
  elif hparams.pos == "emb":
    question = common_attention.add_positional_embedding(
        question, hparams.max_length, "inputs_positional_embedding",
        None)
  encoder_input = tf.concat([image_feat, question], axis=1)

  return (encoder_input, encoder_self_attention_bias,
          encoder_decoder_attention_bias)

开发者ID:tensorflow，项目名称:tensor2tensor，代码行数:32，代码来源:vqa_self_attention.py

示例4: get_attention_bias

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import attention_bias_ignore_padding [as 别名]
def get_attention_bias(sequence_length, maxlen=None):
  """Create attention bias so attention is not applied at padding position."""
  # attention_bias: [batch, 1, 1, memory_length]
  mask = tf.sequence_mask(sequence_length, maxlen=maxlen)
  nonpadding = tf.to_float(mask)
  invert_sequence_mask = tf.to_float(tf.logical_not(mask))
  attention_bias = common_attention.attention_bias_ignore_padding(
      invert_sequence_mask)
  return nonpadding, attention_bias

开发者ID:mlperf，项目名称:training_results_v0.5，代码行数:11，代码来源:model.py

示例5: transformer_prepare_decoder

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import attention_bias_ignore_padding [as 别名]
def transformer_prepare_decoder(targets, hparams, features=None):
  """Prepare one shard of the model for the decoder.

  Args:
    targets: a Tensor.
    hparams: run hyperparameters
    features: optionally pass the entire features dictionary as well.
      This is needed now for "packed" datasets.

  Returns:
    decoder_input: a Tensor, bottom of decoder stack
    decoder_self_attention_bias: a bias tensor for use in decoder self-attention
  """
  if hparams.causal_decoder_self_attention:
    # Causal attention.
    if hparams.prepend_mode == "prepend_inputs_full_attention":
      decoder_self_attention_bias = (
          common_attention.attention_bias_prepend_inputs_full_attention(
              common_attention.embedding_to_padding(targets)))
    else:
      decoder_self_attention_bias = (
          common_attention.attention_bias_lower_triangle(
              common_layers.shape_list(targets)[1]))
  else:
    # Full attention.
    decoder_padding = common_attention.embedding_to_padding(targets)
    decoder_self_attention_bias = (
        common_attention.attention_bias_ignore_padding(decoder_padding))

  if features and "targets_segmentation" in features:
    # "Packed" dataset - keep the examples from seeing each other.
    targets_segmentation = features["targets_segmentation"]
    targets_position = features["targets_position"]
    decoder_self_attention_bias += common_attention.attention_bias_same_segment(
        targets_segmentation, targets_segmentation)
  else:
    targets_position = None
  if hparams.proximity_bias:
    decoder_self_attention_bias += common_attention.attention_bias_proximal(
        common_layers.shape_list(targets)[1])
  decoder_input = common_layers.shift_right_3d(targets)
  if hparams.pos == "timing":
    if targets_position is not None:
      decoder_input = common_attention.add_timing_signal_1d_given_position(
          decoder_input, targets_position)
    else:
      decoder_input = common_attention.add_timing_signal_1d(decoder_input)
  elif hparams.pos == "emb":
    decoder_input = common_attention.add_positional_embedding(
        decoder_input, hparams.max_length, "targets_positional_embedding",
        targets_position)

  if hparams.activation_dtype == "bfloat16":
    decoder_self_attention_bias = tf.cast(decoder_self_attention_bias,
                                          tf.bfloat16)
  return (decoder_input, decoder_self_attention_bias)

开发者ID:akzaidi，项目名称:fine-lm，代码行数:58，代码来源:transformer.py

示例6: transformer_prepare_decoder

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import attention_bias_ignore_padding [as 别名]
def transformer_prepare_decoder(targets, hparams, features=None, pad=None):
  """Prepare one shard of the model for the decoder.

  Args:
    targets: a Tensor.
    hparams: run hyperparameters
    features: optionally pass the entire features dictionary as well. This is
      needed now for "packed" datasets.
    pad: vector to use for padding when shifting targets right

  Returns:
    decoder_input: a Tensor, bottom of decoder stack
    decoder_self_attention_bias: a bias tensor for use in decoder self-attention
  """
  if hparams.causal_decoder_self_attention:
    # Causal attention.
    if hparams.prepend_mode == "prepend_inputs_full_attention":
      decoder_self_attention_bias = (
          common_attention.attention_bias_prepend_inputs_full_attention(
              common_attention.embedding_to_padding(targets)))
    else:
      decoder_self_attention_bias = (
          common_attention.attention_bias_lower_triangle(
              common_layers.shape_list(targets)[1]))
  else:
    # Full attention.
    decoder_padding = common_attention.embedding_to_padding(targets)
    decoder_self_attention_bias = (
        common_attention.attention_bias_ignore_padding(decoder_padding))

  if features and "targets_segmentation" in features:
    # "Packed" dataset - keep the examples from seeing each other.
    targets_segmentation = features["targets_segmentation"]
    targets_position = features["targets_position"]
    decoder_self_attention_bias += common_attention.attention_bias_same_segment(
        targets_segmentation, targets_segmentation)
  else:
    targets_position = None
  if hparams.proximity_bias:
    decoder_self_attention_bias += common_attention.attention_bias_proximal(
        common_layers.shape_list(targets)[1])
  decoder_input = common_layers.shift_right_3d(targets, pad)
  if hparams.pos == "timing":
    if targets_position is not None:
      decoder_input = common_attention.add_timing_signal_1d_given_position(
          decoder_input, targets_position)
    else:
      decoder_input = common_attention.add_timing_signal_1d(decoder_input)
  elif hparams.pos == "timing_from_features":
    decoder_input = common_attention.add_timing_signals_from_features(
        decoder_input, features, hparams.position_features)
  elif hparams.pos == "emb":
    decoder_input = common_attention.add_positional_embedding(
        decoder_input, hparams.max_length, "targets_positional_embedding",
        targets_position)

  if hparams.activation_dtype == "bfloat16":
    decoder_self_attention_bias = tf.cast(decoder_self_attention_bias,
                                          tf.bfloat16)
  return (decoder_input, decoder_self_attention_bias)

开发者ID:tensorflow，项目名称:tensor2tensor，代码行数:62，代码来源:transformer.py

示例7: compute_iw_marginal

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import attention_bias_ignore_padding [as 别名]
def compute_iw_marginal(
      self, targets, targets_mask, decoder_self_attention_bias, features,
      n_samples, reduce_mean=True, **kwargs):
    hparams = self._hparams
    z_q, log_q_z, _ = self.sample_q(
        targets, targets_mask, decoder_self_attention_bias,
        n_samples=n_samples, temp=1.0, **kwargs)  # [K*B, L, C]
    iw_kwargs = {key: ops.prepare_for_iw(value, n_samples) for (
        key, value) in kwargs.items()}
    iw_targets_mask = ops.prepare_for_iw(targets_mask, n_samples)
    iw_decoder_self_attention_bias = (
        common_attention.attention_bias_ignore_padding(1.0 - iw_targets_mask))
    iw_features = copy.copy(features)
    iw_features["targets"] = ops.prepare_for_iw(
        features["targets"], n_samples)

    log_p_z_base, log_abs_det = self.compute_prior_log_prob(
        z_q, iw_targets_mask, iw_decoder_self_attention_bias,
        check_invertibility=False, **iw_kwargs)
    log_p_z = log_p_z_base + log_abs_det

    body_output = ops.decoder(
        "decoder", z_q, hparams, iw_decoder_self_attention_bias, **iw_kwargs)
    logits = self.top(body_output, iw_features)
    numerator, denominator = self.loss_iw(logits, iw_features)
    numerator = tf.reduce_sum(numerator[..., 0, 0], 1)  # [K*B]
    denominator = tf.reduce_sum(denominator[..., 0, 0], 1)  # [K*B]
    log_p_x = -1 * numerator / denominator
    log_q_z = gops.reduce_mean_over_l_sum_over_c(log_q_z, iw_targets_mask)
    log_p_z = log_p_z / tf.reduce_sum(iw_targets_mask, 1)

    log_p_x, log_q_z, log_p_z = [ops.unprepare_for_iw(ii, n_samples) for ii in [
        log_p_x, log_q_z, log_p_z]]

    log_w_n = log_p_z - log_q_z
    log_w_n = tf.nn.log_softmax(log_w_n, axis=0)  # [K, B]

    iw_marginal = log_p_x + log_w_n
    iw_marginal = tf.reduce_logsumexp(iw_marginal, 0)  # [B]

    if reduce_mean:
      iw_marginal = tf.cast(tf.reduce_mean(iw_marginal, 0), tf.float32)  # [1]
    else:
      iw_marginal = tf.cast(iw_marginal, tf.float32)  # [1]
    return iw_marginal

开发者ID:tensorflow，项目名称:tensor2tensor，代码行数:47，代码来源:transformer_vae_flow_prior.py

示例8: transformer_prepare_decoder

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import attention_bias_ignore_padding [as 别名]
def transformer_prepare_decoder(targets, hparams, features=None, pad=None):
  """Prepare one shard of the model for the decoder.

  Args:
    targets: a Tensor.
    hparams: run hyperparameters
    features: optionally pass the entire features dictionary as well. This is
      needed now for "packed" datasets.
    pad: vector to use for padding when shifting targets right

  Returns:
    decoder_input: a Tensor, bottom of decoder stack
    decoder_self_attention_bias: a bias tensor for use in decoder self-attention
  """
  if hparams.causal_decoder_self_attention:
    # Causal attention.
    if hparams.prepend_mode == "prepend_inputs_full_attention":
      decoder_self_attention_bias = (
          common_attention.attention_bias_prepend_inputs_full_attention(
              common_attention.embedding_to_padding(targets)))
    else:
      decoder_self_attention_bias = (
          common_attention.attention_bias_lower_triangle(
              common_layers.shape_list(targets)[1]))
  else:
    # Full attention.
    decoder_padding = common_attention.embedding_to_padding(targets)
    decoder_self_attention_bias = (
        common_attention.attention_bias_ignore_padding(decoder_padding))

  if features and "targets_segmentation" in features:
    # "Packed" dataset - keep the examples from seeing each other.
    targets_segmentation = features["targets_segmentation"]
    targets_position = features["targets_position"]
    decoder_self_attention_bias += common_attention.attention_bias_same_segment(
        targets_segmentation, targets_segmentation)
  else:
    targets_position = None
  if hparams.proximity_bias:
    decoder_self_attention_bias += common_attention.attention_bias_proximal(
        common_layers.shape_list(targets)[1])
  decoder_input = common_layers.shift_right_3d(targets, pad)
  if hparams.pos == "timing":
    if targets_position is not None:
      decoder_input = common_attention.add_timing_signal_1d_given_position(
          decoder_input, targets_position)
    else:
      decoder_input = common_attention.add_timing_signal_1d(decoder_input)
  elif hparams.pos == "emb":
    decoder_input = common_attention.add_positional_embedding(
        decoder_input, hparams.max_length, "targets_positional_embedding",
        targets_position)

  if hparams.activation_dtype == "bfloat16":
    decoder_self_attention_bias = tf.cast(decoder_self_attention_bias,
                                          tf.bfloat16)
  return (decoder_input, decoder_self_attention_bias)

开发者ID:yyht，项目名称:BERT，代码行数:59，代码来源:transformer.py

注：本文中的tensor2tensor.layers.common_attention.attention_bias_ignore_padding方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。