本文整理汇总了Python中tensor2tensor.layers.common_layers.shift_right_3d方法的典型用法代码示例。如果您正苦于以下问题:Python common_layers.shift_right_3d方法的具体用法?Python common_layers.shift_right_3d怎么用?Python common_layers.shift_right_3d使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensor2tensor.layers.common_layers
的用法示例。
在下文中一共展示了common_layers.shift_right_3d方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: attention_lm_prepare_decoder
# 需要导入模块: from tensor2tensor.layers import common_layers [as 别名]
# 或者: from tensor2tensor.layers.common_layers import shift_right_3d [as 别名]
def attention_lm_prepare_decoder(targets, hparams):
"""Prepare one shard of the model for the decoder.
Args:
targets: a Tensor.
hparams: run hyperparameters
Returns:
decoder_input: a Tensor, bottom of decoder stack
decoder_self_attention_bias: a Tensor, containing large negative values
to implement masked attention and possibly biases for diagonal alignments
"""
if hparams.prepend_mode == "prepend_inputs_full_attention":
decoder_self_attention_bias = (
common_attention.attention_bias_prepend_inputs_full_attention(
common_attention.embedding_to_padding(targets)))
else:
decoder_self_attention_bias = (
common_attention.attention_bias_lower_triangle(
common_layers.shape_list(targets)[1]))
decoder_input = common_layers.shift_right_3d(targets)
if hparams.pos == "timing":
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
return (decoder_input, decoder_self_attention_bias)
示例2: prepare_decoder
# 需要导入模块: from tensor2tensor.layers import common_layers [as 别名]
# 或者: from tensor2tensor.layers.common_layers import shift_right_3d [as 别名]
def prepare_decoder(targets, target_space_emb):
"""Prepare decoder."""
decoder_self_attention_bias = (
common_attention.attention_bias_lower_triangle(tf.shape(targets)[1]))
target_space_emb = tf.reshape(target_space_emb, [1, 1, -1])
target_space_emb = tf.tile(target_space_emb, [tf.shape(targets)[0], 1, 1])
decoder_input = common_layers.shift_right_3d(
targets, pad_value=target_space_emb)
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
return (decoder_input, decoder_self_attention_bias)
示例3: attention_lm_moe_prepare_decoder
# 需要导入模块: from tensor2tensor.layers import common_layers [as 别名]
# 或者: from tensor2tensor.layers.common_layers import shift_right_3d [as 别名]
def attention_lm_moe_prepare_decoder(targets, hparams):
"""Prepare one shard of the model for the decoder.
Args:
targets: a Tensor.
hparams: run hyperparameters
Returns:
decoder_input: a Tensor, bottom of decoder stack
decoder_self_attention_bias: a Tensor, containing large negative values
to implement masked attention and possibly biases for diagonal alignments
pad_remover (expert_utils.PadRemover): an util object to remove padding
"""
targets_pad_mask = common_attention.embedding_to_padding(targets)
with tf.name_scope("pad_remover"):
# Because of the shift_right, the <eos> token will be considered as
# padding. In practice, it doesn't really matter, due to the triangular
# mask, this token should never be attended.
pad_remover = expert_utils.PadRemover(targets_pad_mask)
if hparams.prepend_mode == "prepend_inputs_full_attention":
decoder_self_attention_bias = (
common_attention.attention_bias_prepend_inputs_full_attention(
targets_pad_mask))
else:
decoder_self_attention_bias = (
common_attention.attention_bias_lower_triangle(tf.shape(targets)[1]))
decoder_input = common_layers.shift_right_3d(targets)
if hparams.pos == "timing":
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
return (decoder_input, decoder_self_attention_bias, pad_remover)
示例4: transformer_prepare_decoder
# 需要导入模块: from tensor2tensor.layers import common_layers [as 别名]
# 或者: from tensor2tensor.layers.common_layers import shift_right_3d [as 别名]
def transformer_prepare_decoder(targets, hparams, features=None):
"""Prepare one shard of the model for the decoder.
Args:
targets: a Tensor.
hparams: run hyperparameters
features: optionally pass the entire features dictionary as well.
This is needed now for "packed" datasets.
Returns:
decoder_input: a Tensor, bottom of decoder stack
decoder_self_attention_bias: a bias tensor for use in decoder self-attention
"""
if hparams.causal_decoder_self_attention:
# Causal attention.
if hparams.prepend_mode == "prepend_inputs_full_attention":
decoder_self_attention_bias = (
common_attention.attention_bias_prepend_inputs_full_attention(
common_attention.embedding_to_padding(targets)))
else:
decoder_self_attention_bias = (
common_attention.attention_bias_lower_triangle(
common_layers.shape_list(targets)[1]))
else:
# Full attention.
decoder_padding = common_attention.embedding_to_padding(targets)
decoder_self_attention_bias = (
common_attention.attention_bias_ignore_padding(decoder_padding))
if features and "targets_segmentation" in features:
# "Packed" dataset - keep the examples from seeing each other.
targets_segmentation = features["targets_segmentation"]
targets_position = features["targets_position"]
decoder_self_attention_bias += common_attention.attention_bias_same_segment(
targets_segmentation, targets_segmentation)
else:
targets_position = None
if hparams.proximity_bias:
decoder_self_attention_bias += common_attention.attention_bias_proximal(
common_layers.shape_list(targets)[1])
decoder_input = common_layers.shift_right_3d(targets)
if hparams.pos == "timing":
if targets_position is not None:
decoder_input = common_attention.add_timing_signal_1d_given_position(
decoder_input, targets_position)
else:
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
elif hparams.pos == "emb":
decoder_input = common_attention.add_positional_embedding(
decoder_input, hparams.max_length, "targets_positional_embedding",
targets_position)
if hparams.activation_dtype == "bfloat16":
decoder_self_attention_bias = tf.cast(decoder_self_attention_bias,
tf.bfloat16)
return (decoder_input, decoder_self_attention_bias)
示例5: prepare_decoder
# 需要导入模块: from tensor2tensor.layers import common_layers [as 别名]
# 或者: from tensor2tensor.layers.common_layers import shift_right_3d [as 别名]
def prepare_decoder(targets, hparams):
"""Prepare decoder for images."""
targets_shape = common_layers.shape_list(targets)
channels = hparams.num_channels
curr_infer_length = None
# during training, images are [batch, IMG_LEN, IMG_LEN, 3].
# At inference, they are [batch, curr_infer_length, 1, 1]
if hparams.mode == tf.contrib.learn.ModeKeys.INFER:
curr_infer_length = targets_shape[1]
if hparams.block_raster_scan:
assert hparams.img_len*channels % hparams.query_shape[1] == 0
assert hparams.img_len % hparams.query_shape[0] == 0
total_block_width = hparams.img_len*channels
# Decoding is in block raster scan order. We divide the image into
# hparams.query_shape blocks and then decode each block in raster scan.
# To make that compatible with our inference pipeline, pad the target so
# that rows is a multiple of query_shape and columns is a multiple of
# hparams.img_len*channels
curr_infer_length = targets_shape[1]
block_padding_factor = total_block_width * hparams.query_shape[0]
targets = tf.pad(targets, [
[0, 0], [0, -curr_infer_length % block_padding_factor],
[0, 0], [0, 0]])
num_blocks = total_block_width // hparams.query_shape[1]
# Reshape the image to represent blocks
target_blocks = tf.reshape(
targets, [targets_shape[0], -1, num_blocks, hparams.query_shape[0],
hparams.query_shape[1]])
# Transpose to read the image in 2D fashion.
targets = tf.transpose(target_blocks, [0, 1, 3, 2, 4])
else:
# add padding to make sure the size of targets is a multiple of img_height
# times number of channels. This is needed for positional encodings and
# for doing the RGB lookup.
padding_factor = channels * hparams.img_len
targets = tf.pad(targets, [
[0, 0], [0, -curr_infer_length % padding_factor], [0, 0], [0, 0]])
targets = tf.reshape(targets,
[targets_shape[0], -1, hparams.img_len, channels])
# Preprocess image
x = prepare_image(targets, hparams, name="dec_channels")
x_shape = common_layers.shape_list(x)
if (hparams.dec_attention_type == AttentionType.LOCAL_2D or
hparams.dec_attention_type == AttentionType.LOCAL_BLOCK):
x = common_attention.right_shift_blockwise(x, hparams.query_shape)
x = add_pos_signals(x, hparams, "dec_pos")
else:
# Add position signals
x = tf.reshape(x, [targets_shape[0],
x_shape[1]*x_shape[2], hparams.hidden_size])
x = common_layers.shift_right_3d(x)
x = tf.reshape(x, [targets_shape[0],
x_shape[1], x_shape[2], hparams.hidden_size])
x = add_pos_signals(x, hparams, "dec_pos")
x = common_layers.cast_like(x, targets)
return x, x_shape[1], x_shape[2]
示例6: transformer_prepare_decoder
# 需要导入模块: from tensor2tensor.layers import common_layers [as 别名]
# 或者: from tensor2tensor.layers.common_layers import shift_right_3d [as 别名]
def transformer_prepare_decoder(targets, hparams, features=None, pad=None):
"""Prepare one shard of the model for the decoder.
Args:
targets: a Tensor.
hparams: run hyperparameters
features: optionally pass the entire features dictionary as well. This is
needed now for "packed" datasets.
pad: vector to use for padding when shifting targets right
Returns:
decoder_input: a Tensor, bottom of decoder stack
decoder_self_attention_bias: a bias tensor for use in decoder self-attention
"""
if hparams.causal_decoder_self_attention:
# Causal attention.
if hparams.prepend_mode == "prepend_inputs_full_attention":
decoder_self_attention_bias = (
common_attention.attention_bias_prepend_inputs_full_attention(
common_attention.embedding_to_padding(targets)))
else:
decoder_self_attention_bias = (
common_attention.attention_bias_lower_triangle(
common_layers.shape_list(targets)[1]))
else:
# Full attention.
decoder_padding = common_attention.embedding_to_padding(targets)
decoder_self_attention_bias = (
common_attention.attention_bias_ignore_padding(decoder_padding))
if features and "targets_segmentation" in features:
# "Packed" dataset - keep the examples from seeing each other.
targets_segmentation = features["targets_segmentation"]
targets_position = features["targets_position"]
decoder_self_attention_bias += common_attention.attention_bias_same_segment(
targets_segmentation, targets_segmentation)
else:
targets_position = None
if hparams.proximity_bias:
decoder_self_attention_bias += common_attention.attention_bias_proximal(
common_layers.shape_list(targets)[1])
decoder_input = common_layers.shift_right_3d(targets, pad)
if hparams.pos == "timing":
if targets_position is not None:
decoder_input = common_attention.add_timing_signal_1d_given_position(
decoder_input, targets_position)
else:
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
elif hparams.pos == "timing_from_features":
decoder_input = common_attention.add_timing_signals_from_features(
decoder_input, features, hparams.position_features)
elif hparams.pos == "emb":
decoder_input = common_attention.add_positional_embedding(
decoder_input, hparams.max_length, "targets_positional_embedding",
targets_position)
if hparams.activation_dtype == "bfloat16":
decoder_self_attention_bias = tf.cast(decoder_self_attention_bias,
tf.bfloat16)
return (decoder_input, decoder_self_attention_bias)
示例7: prepare_decoder
# 需要导入模块: from tensor2tensor.layers import common_layers [as 别名]
# 或者: from tensor2tensor.layers.common_layers import shift_right_3d [as 别名]
def prepare_decoder(targets, hparams):
"""Prepare decoder for images."""
targets_shape = common_layers.shape_list(targets)
channels = hparams.num_channels
curr_infer_length = None
# during training, images are [batch, IMG_LEN, IMG_LEN, 3].
# At inference, they are [batch, curr_infer_length, 1, 1]
if hparams.mode == tf.estimator.ModeKeys.PREDICT:
curr_infer_length = targets_shape[1]
if hparams.block_raster_scan:
assert hparams.img_len*channels % hparams.query_shape[1] == 0
assert hparams.img_len % hparams.query_shape[0] == 0
total_block_width = hparams.img_len*channels
# Decoding is in block raster scan order. We divide the image into
# hparams.query_shape blocks and then decode each block in raster scan.
# To make that compatible with our inference pipeline, pad the target so
# that rows is a multiple of query_shape and columns is a multiple of
# hparams.img_len*channels
curr_infer_length = targets_shape[1]
block_padding_factor = total_block_width * hparams.query_shape[0]
targets = tf.pad(targets, [
[0, 0], [0, -curr_infer_length % block_padding_factor],
[0, 0], [0, 0]])
num_blocks = total_block_width // hparams.query_shape[1]
# Reshape the image to represent blocks
target_blocks = tf.reshape(
targets, [targets_shape[0], -1, num_blocks, hparams.query_shape[0],
hparams.query_shape[1]])
# Transpose to read the image in 2D fashion.
targets = tf.transpose(target_blocks, [0, 1, 3, 2, 4])
else:
# add padding to make sure the size of targets is a multiple of img_height
# times number of channels. This is needed for positional encodings and
# for doing the RGB lookup.
padding_factor = channels * hparams.img_len
targets = tf.pad(targets, [
[0, 0], [0, -curr_infer_length % padding_factor], [0, 0], [0, 0]])
targets = tf.reshape(targets,
[targets_shape[0], -1, hparams.img_len, channels])
# Preprocess image
x = prepare_image(targets, hparams, name="dec_channels")
x_shape = common_layers.shape_list(x)
if (hparams.dec_attention_type == AttentionType.LOCAL_2D or
hparams.dec_attention_type == AttentionType.LOCAL_BLOCK):
x = common_attention.right_shift_blockwise(x, hparams.query_shape)
x = add_pos_signals(x, hparams, "dec_pos")
else:
# Add position signals
x = tf.reshape(x, [targets_shape[0],
x_shape[1]*x_shape[2], hparams.hidden_size])
x = common_layers.shift_right_3d(x)
x = tf.reshape(x, [targets_shape[0],
x_shape[1], x_shape[2], hparams.hidden_size])
x = add_pos_signals(x, hparams, "dec_pos")
x = common_layers.cast_like(x, targets)
return x, x_shape[1], x_shape[2]
示例8: transformer_prepare_decoder
# 需要导入模块: from tensor2tensor.layers import common_layers [as 别名]
# 或者: from tensor2tensor.layers.common_layers import shift_right_3d [as 别名]
def transformer_prepare_decoder(targets, hparams, features=None, pad=None):
"""Prepare one shard of the model for the decoder.
Args:
targets: a Tensor.
hparams: run hyperparameters
features: optionally pass the entire features dictionary as well. This is
needed now for "packed" datasets.
pad: vector to use for padding when shifting targets right
Returns:
decoder_input: a Tensor, bottom of decoder stack
decoder_self_attention_bias: a bias tensor for use in decoder self-attention
"""
if hparams.causal_decoder_self_attention:
# Causal attention.
if hparams.prepend_mode == "prepend_inputs_full_attention":
decoder_self_attention_bias = (
common_attention.attention_bias_prepend_inputs_full_attention(
common_attention.embedding_to_padding(targets)))
else:
decoder_self_attention_bias = (
common_attention.attention_bias_lower_triangle(
common_layers.shape_list(targets)[1]))
else:
# Full attention.
decoder_padding = common_attention.embedding_to_padding(targets)
decoder_self_attention_bias = (
common_attention.attention_bias_ignore_padding(decoder_padding))
if features and "targets_segmentation" in features:
# "Packed" dataset - keep the examples from seeing each other.
targets_segmentation = features["targets_segmentation"]
targets_position = features["targets_position"]
decoder_self_attention_bias += common_attention.attention_bias_same_segment(
targets_segmentation, targets_segmentation)
else:
targets_position = None
if hparams.proximity_bias:
decoder_self_attention_bias += common_attention.attention_bias_proximal(
common_layers.shape_list(targets)[1])
decoder_input = common_layers.shift_right_3d(targets, pad)
if hparams.pos == "timing":
if targets_position is not None:
decoder_input = common_attention.add_timing_signal_1d_given_position(
decoder_input, targets_position)
else:
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
elif hparams.pos == "emb":
decoder_input = common_attention.add_positional_embedding(
decoder_input, hparams.max_length, "targets_positional_embedding",
targets_position)
if hparams.activation_dtype == "bfloat16":
decoder_self_attention_bias = tf.cast(decoder_self_attention_bias,
tf.bfloat16)
return (decoder_input, decoder_self_attention_bias)