当前位置: 首页>>代码示例>>Python>>正文


Python common_attention.add_timing_signal_1d方法代码示例

本文整理汇总了Python中tensor2tensor.layers.common_attention.add_timing_signal_1d方法的典型用法代码示例。如果您正苦于以下问题:Python common_attention.add_timing_signal_1d方法的具体用法?Python common_attention.add_timing_signal_1d怎么用?Python common_attention.add_timing_signal_1d使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在tensor2tensor.layers.common_attention的用法示例。


在下文中一共展示了common_attention.add_timing_signal_1d方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: attention_lm_prepare_decoder

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def attention_lm_prepare_decoder(targets, hparams):
  """Prepare one shard of the model for the decoder.

  Args:
    targets: a Tensor.
    hparams: run hyperparameters

  Returns:
    decoder_input: a Tensor, bottom of decoder stack
    decoder_self_attention_bias: a Tensor, containing large negative values
    to implement masked attention and possibly biases for diagonal alignments
  """
  if hparams.prepend_mode == "prepend_inputs_full_attention":
    decoder_self_attention_bias = (
        common_attention.attention_bias_prepend_inputs_full_attention(
            common_attention.embedding_to_padding(targets)))
  else:
    decoder_self_attention_bias = (
        common_attention.attention_bias_lower_triangle(
            common_layers.shape_list(targets)[1]))
  decoder_input = common_layers.shift_right_3d(targets)
  if hparams.pos == "timing":
    decoder_input = common_attention.add_timing_signal_1d(decoder_input)
  return (decoder_input, decoder_self_attention_bias) 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:26,代码来源:attention_lm.py

示例2: prepare_question_encoder

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def prepare_question_encoder(inputs, hparams):
  """Prepare question encoder.

  Args:
    inputs: a Tensor.
    hparams: run hyperparameters

  Returns:
    encoder_input: a Tensor, bottom of encoder stack
    encoder_self_attention_bias: a bias tensor for use in encoder self-attention
  """
  encoder_input = inputs
  # Usual case - not a packed dataset.
  encoder_padding = common_attention.embedding_to_padding(encoder_input)
  ignore_padding = common_attention.attention_bias_ignore_padding(
      encoder_padding)
  encoder_self_attention_bias = ignore_padding
  if hparams.pos == "timing":
    encoder_input = common_attention.add_timing_signal_1d(encoder_input)
  elif hparams.pos == "emb":
    encoder_input = common_attention.add_positional_embedding(
        encoder_input, hparams.max_length, "inputs_positional_embedding",
        None)
  return (encoder_input, encoder_self_attention_bias) 
开发者ID:tensorflow,项目名称:tensor2tensor,代码行数:26,代码来源:vqa_self_attention.py

示例3: posterior

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def posterior(
    name, hparams, targets, targets_mask, decoder_self_attention_bias,
    **kwargs):
  """Compute mu and sigma for diagonal normal posterior q(z|x,y)."""
  with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
    decoder_input = drop_2d(targets, hparams.mode, hparams.posterior_2d_dropout)
    decoder_input = common_attention.add_timing_signal_1d(decoder_input)
    decoder_input = tf.nn.dropout(decoder_input,
                                  rate=hparams.layer_prepostprocess_dropout)
    decoder_output = transformer_decoder_layers(
        "block",
        n_layers=hparams.n_posterior_layers,
        decoder_input=decoder_input,
        hparams=hparams,
        decoder_self_attention_bias=decoder_self_attention_bias,
        **kwargs)
    decoder_output = gops.dense_weightnorm(
        "h2o_out", decoder_output, hparams.latent_size * 2, targets_mask,
        init_scale=0.0, init=False)
    return decoder_output 
开发者ID:tensorflow,项目名称:tensor2tensor,代码行数:22,代码来源:transformer_vae_flow_prior_ops.py

示例4: cond_prior

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def cond_prior(
    name, hparams, decoder_input, targets_mask, output_size,
    decoder_self_attention_bias, init_scale=0.0, **kwargs):
  """Compute hidden states for parameters for conditional prior."""
  with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
    decoder_input = common_attention.add_timing_signal_1d(decoder_input)
    decoder_input = tf.nn.dropout(decoder_input,
                                  rate=hparams.layer_prepostprocess_dropout)
    decoder_output = transformer_decoder_layers(
        "block",
        n_layers=hparams.n_posterior_layers,
        decoder_input=decoder_input,
        hparams=hparams,
        decoder_self_attention_bias=decoder_self_attention_bias,
        **kwargs)
    decoder_output = gops.dense_weightnorm(
        "h2o_out", decoder_output, output_size, targets_mask,
        init_scale=init_scale, init=False)
    return decoder_output 
开发者ID:tensorflow,项目名称:tensor2tensor,代码行数:21,代码来源:transformer_vae_flow_prior_ops.py

示例5: decoder

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def decoder(name, latents, hparams, decoder_self_attention_bias, **kwargs):
  """Compute final hidden states for p(y|z,x)."""
  with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
    decoder_input = drop_2d(latents, hparams.mode, hparams.decoder_2d_dropout)
    if hparams.pos_attn:
      decoder_input = gops.positional_attention(
          "pos_attn", decoder_input, decoder_self_attention_bias, hparams)
    else:
      decoder_input = common_attention.add_timing_signal_1d(decoder_input)
    if common_layers.shape_list(latents)[-1] != hparams.hidden_size:
      decoder_input = gops.dense("lat2hid", latents, hparams.hidden_size)
    decoder_output = transformer_decoder_layers(
        "block",
        n_layers=hparams.n_decoder_layers,
        decoder_input=decoder_input,
        hparams=hparams,
        decoder_self_attention_bias=decoder_self_attention_bias,
        **kwargs)
    batch_size, targets_length = common_layers.shape_list(decoder_output)[:2]
    decoder_output = tf.reshape(
        decoder_output, [batch_size, targets_length, 1, hparams.hidden_size])
    # Expand since t2t expects 4d tensors.
    return decoder_output 
开发者ID:tensorflow,项目名称:tensor2tensor,代码行数:25,代码来源:transformer_vae_flow_prior_ops.py

示例6: attend

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def attend(x, source, hparams, name):
  """Self-attention layer with source as memory antecedent."""
  with tf.variable_scope(name):
    x = tf.squeeze(x, axis=2)
    if len(source.get_shape()) > 3:
      source = tf.squeeze(source, axis=2)
    source = common_attention.add_timing_signal_1d(source)
    y = common_attention.multihead_attention(
        common_layers.layer_preprocess(x, hparams), source, None,
        hparams.attention_key_channels or hparams.hidden_size,
        hparams.attention_value_channels or hparams.hidden_size,
        hparams.hidden_size, hparams.num_heads,
        hparams.attention_dropout)
    res = common_layers.layer_postprocess(x, y, hparams)
    return tf.expand_dims(res, axis=2) 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:17,代码来源:transformer_vae.py

示例7: prepare_decoder

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def prepare_decoder(targets, target_space_emb):
  """Prepare decoder."""
  decoder_self_attention_bias = (
      common_attention.attention_bias_lower_triangle(tf.shape(targets)[1]))
  target_space_emb = tf.reshape(target_space_emb, [1, 1, -1])
  target_space_emb = tf.tile(target_space_emb, [tf.shape(targets)[0], 1, 1])
  decoder_input = common_layers.shift_right_3d(
      targets, pad_value=target_space_emb)
  decoder_input = common_attention.add_timing_signal_1d(decoder_input)
  return (decoder_input, decoder_self_attention_bias) 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:12,代码来源:multimodel.py

示例8: attention_lm_moe_prepare_decoder

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def attention_lm_moe_prepare_decoder(targets, hparams):
  """Prepare one shard of the model for the decoder.

  Args:
    targets: a Tensor.
    hparams: run hyperparameters

  Returns:
    decoder_input: a Tensor, bottom of decoder stack
    decoder_self_attention_bias: a Tensor, containing large negative values
    to implement masked attention and possibly biases for diagonal alignments
    pad_remover (expert_utils.PadRemover): an util object to remove padding
  """
  targets_pad_mask = common_attention.embedding_to_padding(targets)
  with tf.name_scope("pad_remover"):
    # Because of the shift_right, the <eos> token will be considered as
    # padding. In practice, it doesn't really matter, due to the triangular
    # mask, this token should never be attended.
    pad_remover = expert_utils.PadRemover(targets_pad_mask)

  if hparams.prepend_mode == "prepend_inputs_full_attention":
    decoder_self_attention_bias = (
        common_attention.attention_bias_prepend_inputs_full_attention(
            targets_pad_mask))
  else:
    decoder_self_attention_bias = (
        common_attention.attention_bias_lower_triangle(tf.shape(targets)[1]))
  decoder_input = common_layers.shift_right_3d(targets)
  if hparams.pos == "timing":
    decoder_input = common_attention.add_timing_signal_1d(decoder_input)
  return (decoder_input, decoder_self_attention_bias, pad_remover) 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:33,代码来源:attention_lm_moe.py

示例9: prepare_image_question_encoder

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def prepare_image_question_encoder(image_feat, question, hparams):
  """Prepare encoder.

  Args:
    image_feat: a Tensor.
    question: a Tensor.
    hparams: run hyperparameters

  Returns:
    encoder_input: a Tensor, bottom of encoder stack
    encoder_self_attention_bias: a bias tensor for use in encoder self-attention
  """

  encoder_input = tf.concat([image_feat, question], axis=1)
  encoder_padding = common_attention.embedding_to_padding(encoder_input)
  ignore_padding = common_attention.attention_bias_ignore_padding(
      encoder_padding)
  encoder_self_attention_bias = ignore_padding
  encoder_decoder_attention_bias = ignore_padding
  # Usual case - not a packed dataset.
  if hparams.pos == "timing":
    question = common_attention.add_timing_signal_1d(question)
  elif hparams.pos == "emb":
    question = common_attention.add_positional_embedding(
        question, hparams.max_length, "inputs_positional_embedding",
        None)
  encoder_input = tf.concat([image_feat, question], axis=1)

  return (encoder_input, encoder_self_attention_bias,
          encoder_decoder_attention_bias) 
开发者ID:tensorflow,项目名称:tensor2tensor,代码行数:32,代码来源:vqa_recurrent_self_attention.py

示例10: transformer_prepare_decoder

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def transformer_prepare_decoder(targets, hparams, features=None):
  """Prepare one shard of the model for the decoder.

  Args:
    targets: a Tensor.
    hparams: run hyperparameters
    features: optionally pass the entire features dictionary as well.
      This is needed now for "packed" datasets.

  Returns:
    decoder_input: a Tensor, bottom of decoder stack
    decoder_self_attention_bias: a bias tensor for use in decoder self-attention
  """
  if hparams.causal_decoder_self_attention:
    # Causal attention.
    if hparams.prepend_mode == "prepend_inputs_full_attention":
      decoder_self_attention_bias = (
          common_attention.attention_bias_prepend_inputs_full_attention(
              common_attention.embedding_to_padding(targets)))
    else:
      decoder_self_attention_bias = (
          common_attention.attention_bias_lower_triangle(
              common_layers.shape_list(targets)[1]))
  else:
    # Full attention.
    decoder_padding = common_attention.embedding_to_padding(targets)
    decoder_self_attention_bias = (
        common_attention.attention_bias_ignore_padding(decoder_padding))

  if features and "targets_segmentation" in features:
    # "Packed" dataset - keep the examples from seeing each other.
    targets_segmentation = features["targets_segmentation"]
    targets_position = features["targets_position"]
    decoder_self_attention_bias += common_attention.attention_bias_same_segment(
        targets_segmentation, targets_segmentation)
  else:
    targets_position = None
  if hparams.proximity_bias:
    decoder_self_attention_bias += common_attention.attention_bias_proximal(
        common_layers.shape_list(targets)[1])
  decoder_input = common_layers.shift_right_3d(targets)
  if hparams.pos == "timing":
    if targets_position is not None:
      decoder_input = common_attention.add_timing_signal_1d_given_position(
          decoder_input, targets_position)
    else:
      decoder_input = common_attention.add_timing_signal_1d(decoder_input)
  elif hparams.pos == "emb":
    decoder_input = common_attention.add_positional_embedding(
        decoder_input, hparams.max_length, "targets_positional_embedding",
        targets_position)

  if hparams.activation_dtype == "bfloat16":
    decoder_self_attention_bias = tf.cast(decoder_self_attention_bias,
                                          tf.bfloat16)
  return (decoder_input, decoder_self_attention_bias) 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:58,代码来源:transformer.py

示例11: transformer_prepare_decoder

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def transformer_prepare_decoder(targets, hparams, features=None, pad=None):
  """Prepare one shard of the model for the decoder.

  Args:
    targets: a Tensor.
    hparams: run hyperparameters
    features: optionally pass the entire features dictionary as well. This is
      needed now for "packed" datasets.
    pad: vector to use for padding when shifting targets right

  Returns:
    decoder_input: a Tensor, bottom of decoder stack
    decoder_self_attention_bias: a bias tensor for use in decoder self-attention
  """
  if hparams.causal_decoder_self_attention:
    # Causal attention.
    if hparams.prepend_mode == "prepend_inputs_full_attention":
      decoder_self_attention_bias = (
          common_attention.attention_bias_prepend_inputs_full_attention(
              common_attention.embedding_to_padding(targets)))
    else:
      decoder_self_attention_bias = (
          common_attention.attention_bias_lower_triangle(
              common_layers.shape_list(targets)[1]))
  else:
    # Full attention.
    decoder_padding = common_attention.embedding_to_padding(targets)
    decoder_self_attention_bias = (
        common_attention.attention_bias_ignore_padding(decoder_padding))

  if features and "targets_segmentation" in features:
    # "Packed" dataset - keep the examples from seeing each other.
    targets_segmentation = features["targets_segmentation"]
    targets_position = features["targets_position"]
    decoder_self_attention_bias += common_attention.attention_bias_same_segment(
        targets_segmentation, targets_segmentation)
  else:
    targets_position = None
  if hparams.proximity_bias:
    decoder_self_attention_bias += common_attention.attention_bias_proximal(
        common_layers.shape_list(targets)[1])
  decoder_input = common_layers.shift_right_3d(targets, pad)
  if hparams.pos == "timing":
    if targets_position is not None:
      decoder_input = common_attention.add_timing_signal_1d_given_position(
          decoder_input, targets_position)
    else:
      decoder_input = common_attention.add_timing_signal_1d(decoder_input)
  elif hparams.pos == "timing_from_features":
    decoder_input = common_attention.add_timing_signals_from_features(
        decoder_input, features, hparams.position_features)
  elif hparams.pos == "emb":
    decoder_input = common_attention.add_positional_embedding(
        decoder_input, hparams.max_length, "targets_positional_embedding",
        targets_position)

  if hparams.activation_dtype == "bfloat16":
    decoder_self_attention_bias = tf.cast(decoder_self_attention_bias,
                                          tf.bfloat16)
  return (decoder_input, decoder_self_attention_bias) 
开发者ID:tensorflow,项目名称:tensor2tensor,代码行数:62,代码来源:transformer.py

示例12: transformer_decoder_block

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def transformer_decoder_block(name,
                              n_layers,
                              x,
                              x_mask,
                              output_size,
                              init,
                              **kwargs):
  """A transformation block composed of transformer decoder layers.

  Args:
    name: variable scope.
    n_layers: number of transformer layers.
    x: input to transformation.
    x_mask: mask.
    output_size: output dimensionality.
    init: data-dependent init for weightnorm parameters.
    **kwargs: Constains hparams, encoder_output,
      encoder_decoder_attention_bias and decoder_self_attention_bias

  Returns:
    outputs: Tensor of shape [batch_size, length, output_size].
  """
  with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
    hparams = kwargs.pop("hparams")
    disable_dropout = kwargs.pop("disable_dropout")
    if disable_dropout:
      hparams = copy.deepcopy(hparams)
      hparams.attention_dropout = 0.0
      hparams.layer_prepostprocess_dropout = 0.0
      hparams.relu_dropout = 0.0
    n_channels = common_layers.shape_list(x)[-1]
    if n_channels != hparams.hidden_size:
      hparams = copy.deepcopy(hparams)
      hparams.hidden_size = n_channels

    outputs = common_attention.add_timing_signal_1d(x)
    with tf.variable_scope("decoder", reuse=tf.AUTO_REUSE):
      for layer_idx in range(n_layers):
        outputs = transformer_decoder_layer(
            decoder_input=outputs,
            layer_idx=layer_idx,
            hparams=hparams,
            **kwargs)
    outputs = common_layers.layer_preprocess(outputs, hparams)
    outputs = dense_weightnorm(
        "h2o", outputs, output_size, x_mask, init_scale=0.0, init=init)
    return outputs 
开发者ID:tensorflow,项目名称:tensor2tensor,代码行数:49,代码来源:transformer_glow_layers_ops.py

示例13: transformer_prepare_decoder

# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def transformer_prepare_decoder(targets, hparams, features=None, pad=None):
  """Prepare one shard of the model for the decoder.

  Args:
    targets: a Tensor.
    hparams: run hyperparameters
    features: optionally pass the entire features dictionary as well. This is
      needed now for "packed" datasets.
    pad: vector to use for padding when shifting targets right

  Returns:
    decoder_input: a Tensor, bottom of decoder stack
    decoder_self_attention_bias: a bias tensor for use in decoder self-attention
  """
  if hparams.causal_decoder_self_attention:
    # Causal attention.
    if hparams.prepend_mode == "prepend_inputs_full_attention":
      decoder_self_attention_bias = (
          common_attention.attention_bias_prepend_inputs_full_attention(
              common_attention.embedding_to_padding(targets)))
    else:
      decoder_self_attention_bias = (
          common_attention.attention_bias_lower_triangle(
              common_layers.shape_list(targets)[1]))
  else:
    # Full attention.
    decoder_padding = common_attention.embedding_to_padding(targets)
    decoder_self_attention_bias = (
        common_attention.attention_bias_ignore_padding(decoder_padding))

  if features and "targets_segmentation" in features:
    # "Packed" dataset - keep the examples from seeing each other.
    targets_segmentation = features["targets_segmentation"]
    targets_position = features["targets_position"]
    decoder_self_attention_bias += common_attention.attention_bias_same_segment(
        targets_segmentation, targets_segmentation)
  else:
    targets_position = None
  if hparams.proximity_bias:
    decoder_self_attention_bias += common_attention.attention_bias_proximal(
        common_layers.shape_list(targets)[1])
  decoder_input = common_layers.shift_right_3d(targets, pad)
  if hparams.pos == "timing":
    if targets_position is not None:
      decoder_input = common_attention.add_timing_signal_1d_given_position(
          decoder_input, targets_position)
    else:
      decoder_input = common_attention.add_timing_signal_1d(decoder_input)
  elif hparams.pos == "emb":
    decoder_input = common_attention.add_positional_embedding(
        decoder_input, hparams.max_length, "targets_positional_embedding",
        targets_position)

  if hparams.activation_dtype == "bfloat16":
    decoder_self_attention_bias = tf.cast(decoder_self_attention_bias,
                                          tf.bfloat16)
  return (decoder_input, decoder_self_attention_bias) 
开发者ID:yyht,项目名称:BERT,代码行数:59,代码来源:transformer.py


注:本文中的tensor2tensor.layers.common_attention.add_timing_signal_1d方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。