本文整理汇总了Python中tensor2tensor.layers.common_attention.add_timing_signal_1d方法的典型用法代码示例。如果您正苦于以下问题:Python common_attention.add_timing_signal_1d方法的具体用法?Python common_attention.add_timing_signal_1d怎么用?Python common_attention.add_timing_signal_1d使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensor2tensor.layers.common_attention
的用法示例。
在下文中一共展示了common_attention.add_timing_signal_1d方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: attention_lm_prepare_decoder
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def attention_lm_prepare_decoder(targets, hparams):
"""Prepare one shard of the model for the decoder.
Args:
targets: a Tensor.
hparams: run hyperparameters
Returns:
decoder_input: a Tensor, bottom of decoder stack
decoder_self_attention_bias: a Tensor, containing large negative values
to implement masked attention and possibly biases for diagonal alignments
"""
if hparams.prepend_mode == "prepend_inputs_full_attention":
decoder_self_attention_bias = (
common_attention.attention_bias_prepend_inputs_full_attention(
common_attention.embedding_to_padding(targets)))
else:
decoder_self_attention_bias = (
common_attention.attention_bias_lower_triangle(
common_layers.shape_list(targets)[1]))
decoder_input = common_layers.shift_right_3d(targets)
if hparams.pos == "timing":
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
return (decoder_input, decoder_self_attention_bias)
示例2: prepare_question_encoder
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def prepare_question_encoder(inputs, hparams):
"""Prepare question encoder.
Args:
inputs: a Tensor.
hparams: run hyperparameters
Returns:
encoder_input: a Tensor, bottom of encoder stack
encoder_self_attention_bias: a bias tensor for use in encoder self-attention
"""
encoder_input = inputs
# Usual case - not a packed dataset.
encoder_padding = common_attention.embedding_to_padding(encoder_input)
ignore_padding = common_attention.attention_bias_ignore_padding(
encoder_padding)
encoder_self_attention_bias = ignore_padding
if hparams.pos == "timing":
encoder_input = common_attention.add_timing_signal_1d(encoder_input)
elif hparams.pos == "emb":
encoder_input = common_attention.add_positional_embedding(
encoder_input, hparams.max_length, "inputs_positional_embedding",
None)
return (encoder_input, encoder_self_attention_bias)
示例3: posterior
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def posterior(
name, hparams, targets, targets_mask, decoder_self_attention_bias,
**kwargs):
"""Compute mu and sigma for diagonal normal posterior q(z|x,y)."""
with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
decoder_input = drop_2d(targets, hparams.mode, hparams.posterior_2d_dropout)
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
decoder_input = tf.nn.dropout(decoder_input,
rate=hparams.layer_prepostprocess_dropout)
decoder_output = transformer_decoder_layers(
"block",
n_layers=hparams.n_posterior_layers,
decoder_input=decoder_input,
hparams=hparams,
decoder_self_attention_bias=decoder_self_attention_bias,
**kwargs)
decoder_output = gops.dense_weightnorm(
"h2o_out", decoder_output, hparams.latent_size * 2, targets_mask,
init_scale=0.0, init=False)
return decoder_output
示例4: cond_prior
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def cond_prior(
name, hparams, decoder_input, targets_mask, output_size,
decoder_self_attention_bias, init_scale=0.0, **kwargs):
"""Compute hidden states for parameters for conditional prior."""
with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
decoder_input = tf.nn.dropout(decoder_input,
rate=hparams.layer_prepostprocess_dropout)
decoder_output = transformer_decoder_layers(
"block",
n_layers=hparams.n_posterior_layers,
decoder_input=decoder_input,
hparams=hparams,
decoder_self_attention_bias=decoder_self_attention_bias,
**kwargs)
decoder_output = gops.dense_weightnorm(
"h2o_out", decoder_output, output_size, targets_mask,
init_scale=init_scale, init=False)
return decoder_output
示例5: decoder
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def decoder(name, latents, hparams, decoder_self_attention_bias, **kwargs):
"""Compute final hidden states for p(y|z,x)."""
with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
decoder_input = drop_2d(latents, hparams.mode, hparams.decoder_2d_dropout)
if hparams.pos_attn:
decoder_input = gops.positional_attention(
"pos_attn", decoder_input, decoder_self_attention_bias, hparams)
else:
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
if common_layers.shape_list(latents)[-1] != hparams.hidden_size:
decoder_input = gops.dense("lat2hid", latents, hparams.hidden_size)
decoder_output = transformer_decoder_layers(
"block",
n_layers=hparams.n_decoder_layers,
decoder_input=decoder_input,
hparams=hparams,
decoder_self_attention_bias=decoder_self_attention_bias,
**kwargs)
batch_size, targets_length = common_layers.shape_list(decoder_output)[:2]
decoder_output = tf.reshape(
decoder_output, [batch_size, targets_length, 1, hparams.hidden_size])
# Expand since t2t expects 4d tensors.
return decoder_output
示例6: attend
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def attend(x, source, hparams, name):
"""Self-attention layer with source as memory antecedent."""
with tf.variable_scope(name):
x = tf.squeeze(x, axis=2)
if len(source.get_shape()) > 3:
source = tf.squeeze(source, axis=2)
source = common_attention.add_timing_signal_1d(source)
y = common_attention.multihead_attention(
common_layers.layer_preprocess(x, hparams), source, None,
hparams.attention_key_channels or hparams.hidden_size,
hparams.attention_value_channels or hparams.hidden_size,
hparams.hidden_size, hparams.num_heads,
hparams.attention_dropout)
res = common_layers.layer_postprocess(x, y, hparams)
return tf.expand_dims(res, axis=2)
示例7: prepare_decoder
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def prepare_decoder(targets, target_space_emb):
"""Prepare decoder."""
decoder_self_attention_bias = (
common_attention.attention_bias_lower_triangle(tf.shape(targets)[1]))
target_space_emb = tf.reshape(target_space_emb, [1, 1, -1])
target_space_emb = tf.tile(target_space_emb, [tf.shape(targets)[0], 1, 1])
decoder_input = common_layers.shift_right_3d(
targets, pad_value=target_space_emb)
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
return (decoder_input, decoder_self_attention_bias)
示例8: attention_lm_moe_prepare_decoder
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def attention_lm_moe_prepare_decoder(targets, hparams):
"""Prepare one shard of the model for the decoder.
Args:
targets: a Tensor.
hparams: run hyperparameters
Returns:
decoder_input: a Tensor, bottom of decoder stack
decoder_self_attention_bias: a Tensor, containing large negative values
to implement masked attention and possibly biases for diagonal alignments
pad_remover (expert_utils.PadRemover): an util object to remove padding
"""
targets_pad_mask = common_attention.embedding_to_padding(targets)
with tf.name_scope("pad_remover"):
# Because of the shift_right, the <eos> token will be considered as
# padding. In practice, it doesn't really matter, due to the triangular
# mask, this token should never be attended.
pad_remover = expert_utils.PadRemover(targets_pad_mask)
if hparams.prepend_mode == "prepend_inputs_full_attention":
decoder_self_attention_bias = (
common_attention.attention_bias_prepend_inputs_full_attention(
targets_pad_mask))
else:
decoder_self_attention_bias = (
common_attention.attention_bias_lower_triangle(tf.shape(targets)[1]))
decoder_input = common_layers.shift_right_3d(targets)
if hparams.pos == "timing":
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
return (decoder_input, decoder_self_attention_bias, pad_remover)
示例9: prepare_image_question_encoder
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def prepare_image_question_encoder(image_feat, question, hparams):
"""Prepare encoder.
Args:
image_feat: a Tensor.
question: a Tensor.
hparams: run hyperparameters
Returns:
encoder_input: a Tensor, bottom of encoder stack
encoder_self_attention_bias: a bias tensor for use in encoder self-attention
"""
encoder_input = tf.concat([image_feat, question], axis=1)
encoder_padding = common_attention.embedding_to_padding(encoder_input)
ignore_padding = common_attention.attention_bias_ignore_padding(
encoder_padding)
encoder_self_attention_bias = ignore_padding
encoder_decoder_attention_bias = ignore_padding
# Usual case - not a packed dataset.
if hparams.pos == "timing":
question = common_attention.add_timing_signal_1d(question)
elif hparams.pos == "emb":
question = common_attention.add_positional_embedding(
question, hparams.max_length, "inputs_positional_embedding",
None)
encoder_input = tf.concat([image_feat, question], axis=1)
return (encoder_input, encoder_self_attention_bias,
encoder_decoder_attention_bias)
示例10: transformer_prepare_decoder
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def transformer_prepare_decoder(targets, hparams, features=None):
"""Prepare one shard of the model for the decoder.
Args:
targets: a Tensor.
hparams: run hyperparameters
features: optionally pass the entire features dictionary as well.
This is needed now for "packed" datasets.
Returns:
decoder_input: a Tensor, bottom of decoder stack
decoder_self_attention_bias: a bias tensor for use in decoder self-attention
"""
if hparams.causal_decoder_self_attention:
# Causal attention.
if hparams.prepend_mode == "prepend_inputs_full_attention":
decoder_self_attention_bias = (
common_attention.attention_bias_prepend_inputs_full_attention(
common_attention.embedding_to_padding(targets)))
else:
decoder_self_attention_bias = (
common_attention.attention_bias_lower_triangle(
common_layers.shape_list(targets)[1]))
else:
# Full attention.
decoder_padding = common_attention.embedding_to_padding(targets)
decoder_self_attention_bias = (
common_attention.attention_bias_ignore_padding(decoder_padding))
if features and "targets_segmentation" in features:
# "Packed" dataset - keep the examples from seeing each other.
targets_segmentation = features["targets_segmentation"]
targets_position = features["targets_position"]
decoder_self_attention_bias += common_attention.attention_bias_same_segment(
targets_segmentation, targets_segmentation)
else:
targets_position = None
if hparams.proximity_bias:
decoder_self_attention_bias += common_attention.attention_bias_proximal(
common_layers.shape_list(targets)[1])
decoder_input = common_layers.shift_right_3d(targets)
if hparams.pos == "timing":
if targets_position is not None:
decoder_input = common_attention.add_timing_signal_1d_given_position(
decoder_input, targets_position)
else:
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
elif hparams.pos == "emb":
decoder_input = common_attention.add_positional_embedding(
decoder_input, hparams.max_length, "targets_positional_embedding",
targets_position)
if hparams.activation_dtype == "bfloat16":
decoder_self_attention_bias = tf.cast(decoder_self_attention_bias,
tf.bfloat16)
return (decoder_input, decoder_self_attention_bias)
示例11: transformer_prepare_decoder
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def transformer_prepare_decoder(targets, hparams, features=None, pad=None):
"""Prepare one shard of the model for the decoder.
Args:
targets: a Tensor.
hparams: run hyperparameters
features: optionally pass the entire features dictionary as well. This is
needed now for "packed" datasets.
pad: vector to use for padding when shifting targets right
Returns:
decoder_input: a Tensor, bottom of decoder stack
decoder_self_attention_bias: a bias tensor for use in decoder self-attention
"""
if hparams.causal_decoder_self_attention:
# Causal attention.
if hparams.prepend_mode == "prepend_inputs_full_attention":
decoder_self_attention_bias = (
common_attention.attention_bias_prepend_inputs_full_attention(
common_attention.embedding_to_padding(targets)))
else:
decoder_self_attention_bias = (
common_attention.attention_bias_lower_triangle(
common_layers.shape_list(targets)[1]))
else:
# Full attention.
decoder_padding = common_attention.embedding_to_padding(targets)
decoder_self_attention_bias = (
common_attention.attention_bias_ignore_padding(decoder_padding))
if features and "targets_segmentation" in features:
# "Packed" dataset - keep the examples from seeing each other.
targets_segmentation = features["targets_segmentation"]
targets_position = features["targets_position"]
decoder_self_attention_bias += common_attention.attention_bias_same_segment(
targets_segmentation, targets_segmentation)
else:
targets_position = None
if hparams.proximity_bias:
decoder_self_attention_bias += common_attention.attention_bias_proximal(
common_layers.shape_list(targets)[1])
decoder_input = common_layers.shift_right_3d(targets, pad)
if hparams.pos == "timing":
if targets_position is not None:
decoder_input = common_attention.add_timing_signal_1d_given_position(
decoder_input, targets_position)
else:
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
elif hparams.pos == "timing_from_features":
decoder_input = common_attention.add_timing_signals_from_features(
decoder_input, features, hparams.position_features)
elif hparams.pos == "emb":
decoder_input = common_attention.add_positional_embedding(
decoder_input, hparams.max_length, "targets_positional_embedding",
targets_position)
if hparams.activation_dtype == "bfloat16":
decoder_self_attention_bias = tf.cast(decoder_self_attention_bias,
tf.bfloat16)
return (decoder_input, decoder_self_attention_bias)
示例12: transformer_decoder_block
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def transformer_decoder_block(name,
n_layers,
x,
x_mask,
output_size,
init,
**kwargs):
"""A transformation block composed of transformer decoder layers.
Args:
name: variable scope.
n_layers: number of transformer layers.
x: input to transformation.
x_mask: mask.
output_size: output dimensionality.
init: data-dependent init for weightnorm parameters.
**kwargs: Constains hparams, encoder_output,
encoder_decoder_attention_bias and decoder_self_attention_bias
Returns:
outputs: Tensor of shape [batch_size, length, output_size].
"""
with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
hparams = kwargs.pop("hparams")
disable_dropout = kwargs.pop("disable_dropout")
if disable_dropout:
hparams = copy.deepcopy(hparams)
hparams.attention_dropout = 0.0
hparams.layer_prepostprocess_dropout = 0.0
hparams.relu_dropout = 0.0
n_channels = common_layers.shape_list(x)[-1]
if n_channels != hparams.hidden_size:
hparams = copy.deepcopy(hparams)
hparams.hidden_size = n_channels
outputs = common_attention.add_timing_signal_1d(x)
with tf.variable_scope("decoder", reuse=tf.AUTO_REUSE):
for layer_idx in range(n_layers):
outputs = transformer_decoder_layer(
decoder_input=outputs,
layer_idx=layer_idx,
hparams=hparams,
**kwargs)
outputs = common_layers.layer_preprocess(outputs, hparams)
outputs = dense_weightnorm(
"h2o", outputs, output_size, x_mask, init_scale=0.0, init=init)
return outputs
示例13: transformer_prepare_decoder
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import add_timing_signal_1d [as 别名]
def transformer_prepare_decoder(targets, hparams, features=None, pad=None):
"""Prepare one shard of the model for the decoder.
Args:
targets: a Tensor.
hparams: run hyperparameters
features: optionally pass the entire features dictionary as well. This is
needed now for "packed" datasets.
pad: vector to use for padding when shifting targets right
Returns:
decoder_input: a Tensor, bottom of decoder stack
decoder_self_attention_bias: a bias tensor for use in decoder self-attention
"""
if hparams.causal_decoder_self_attention:
# Causal attention.
if hparams.prepend_mode == "prepend_inputs_full_attention":
decoder_self_attention_bias = (
common_attention.attention_bias_prepend_inputs_full_attention(
common_attention.embedding_to_padding(targets)))
else:
decoder_self_attention_bias = (
common_attention.attention_bias_lower_triangle(
common_layers.shape_list(targets)[1]))
else:
# Full attention.
decoder_padding = common_attention.embedding_to_padding(targets)
decoder_self_attention_bias = (
common_attention.attention_bias_ignore_padding(decoder_padding))
if features and "targets_segmentation" in features:
# "Packed" dataset - keep the examples from seeing each other.
targets_segmentation = features["targets_segmentation"]
targets_position = features["targets_position"]
decoder_self_attention_bias += common_attention.attention_bias_same_segment(
targets_segmentation, targets_segmentation)
else:
targets_position = None
if hparams.proximity_bias:
decoder_self_attention_bias += common_attention.attention_bias_proximal(
common_layers.shape_list(targets)[1])
decoder_input = common_layers.shift_right_3d(targets, pad)
if hparams.pos == "timing":
if targets_position is not None:
decoder_input = common_attention.add_timing_signal_1d_given_position(
decoder_input, targets_position)
else:
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
elif hparams.pos == "emb":
decoder_input = common_attention.add_positional_embedding(
decoder_input, hparams.max_length, "targets_positional_embedding",
targets_position)
if hparams.activation_dtype == "bfloat16":
decoder_self_attention_bias = tf.cast(decoder_self_attention_bias,
tf.bfloat16)
return (decoder_input, decoder_self_attention_bias)