本文整理汇总了Python中tensor2tensor.layers.common_attention.multihead_attention方法的典型用法代码示例。如果您正苦于以下问题:Python common_attention.multihead_attention方法的具体用法?Python common_attention.multihead_attention怎么用?Python common_attention.multihead_attention使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensor2tensor.layers.common_attention
的用法示例。
在下文中一共展示了common_attention.multihead_attention方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: encdec_attention_1d
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import multihead_attention [as 别名]
def encdec_attention_1d(x,
encoder_output,
encoder_decoder_attention_bias,
hparams):
"""Local 1d self attention."""
x, x_shape, is_4d = maybe_reshape_4d_to_3d(x)
encoder_output, _, _ = maybe_reshape_4d_to_3d(encoder_output)
with tf.variable_scope("encdec_attention"):
# Encoder Decoder attention
y = common_attention.multihead_attention(
x,
encoder_output,
encoder_decoder_attention_bias,
hparams.attention_key_channels or hparams.hidden_size,
hparams.attention_value_channels or hparams.hidden_size,
hparams.hidden_size,
hparams.num_heads,
hparams.attention_dropout,
name="encdec_attention")
if is_4d:
y = tf.reshape(y, x_shape)
y.set_shape([None, None, None, hparams.hidden_size])
return y
示例2: attend
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import multihead_attention [as 别名]
def attend(x, source, hparams, name):
"""Self-attention layer with source as memory antecedent."""
with tf.variable_scope(name):
x = tf.squeeze(x, axis=2)
if len(source.get_shape()) > 3:
source = tf.squeeze(source, axis=2)
source = common_attention.add_timing_signal_1d(source)
y = common_attention.multihead_attention(
common_layers.layer_preprocess(x, hparams), source, None,
hparams.attention_key_channels or hparams.hidden_size,
hparams.attention_value_channels or hparams.hidden_size,
hparams.hidden_size, hparams.num_heads,
hparams.attention_dropout)
res = common_layers.layer_postprocess(x, y, hparams)
return tf.expand_dims(res, axis=2)
示例3: transformer_encoder_attention_unit
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import multihead_attention [as 别名]
def transformer_encoder_attention_unit(x,
hparams,
encoder_self_attention_bias,
attention_dropout_broadcast_dims,
save_weights_to=None,
make_image_summary=True):
"""Applies multihead attention function which is parametrised for encoding.
Args:
x: input
hparams: model hyper-parameters
encoder_self_attention_bias: a bias tensor for use in encoder self-attention
attention_dropout_broadcast_dims: Fpr noise broadcasting in the dropout
layers to save memory during training
save_weights_to: an optional dictionary to capture attention weights for
visualization; the weights tensor will be appended there under a string
key created from the variable scope (including name).
make_image_summary: Whether to make an attention image summary.
Returns:
the output tensor
"""
with tf.variable_scope("self_attention"):
y = common_attention.multihead_attention(
common_layers.layer_preprocess(x, hparams),
None,
encoder_self_attention_bias,
hparams.attention_key_channels or hparams.hidden_size,
hparams.attention_value_channels or hparams.hidden_size,
hparams.hidden_size,
hparams.num_heads,
hparams.attention_dropout,
attention_type=hparams.self_attention_type,
save_weights_to=save_weights_to,
max_relative_position=hparams.max_relative_position,
make_image_summary=make_image_summary,
dropout_broadcast_dims=attention_dropout_broadcast_dims)
x = common_layers.layer_postprocess(x, y, hparams)
return x
示例4: attention_lm_decoder
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import multihead_attention [as 别名]
def attention_lm_decoder(decoder_input,
decoder_self_attention_bias,
hparams,
name="decoder"):
"""A stack of attention_lm layers.
Args:
decoder_input: a Tensor
decoder_self_attention_bias: bias Tensor for self-attention
(see common_attention.attention_bias())
hparams: hyperparameters for model
name: a string
Returns:
y: a Tensors
"""
x = decoder_input
with tf.variable_scope(name):
for layer in range(hparams.num_hidden_layers):
with tf.variable_scope("layer_%d" % layer):
with tf.variable_scope("self_attention"):
y = common_attention.multihead_attention(
common_layers.layer_preprocess(
x, hparams), None, decoder_self_attention_bias,
hparams.attention_key_channels or hparams.hidden_size,
hparams.attention_value_channels or hparams.hidden_size,
hparams.hidden_size, hparams.num_heads, hparams.attention_dropout)
x = common_layers.layer_postprocess(x, y, hparams)
with tf.variable_scope("ffn"):
y = common_layers.conv_hidden_relu(
common_layers.layer_preprocess(x, hparams),
hparams.filter_size,
hparams.hidden_size,
dropout=hparams.relu_dropout)
x = common_layers.layer_postprocess(x, y, hparams)
return common_layers.layer_preprocess(x, hparams)
示例5: local_within_block_attention
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import multihead_attention [as 别名]
def local_within_block_attention(x,
self_attention_bias,
hparams,
attention_type="local_within_block_mask_right",
q_padding="VALID",
kv_padding="VALID"):
"""Local within block self attention."""
x_new, x_shape, is_4d = maybe_reshape_4d_to_3d(x)
with tf.variable_scope("local_within_block"):
y = common_attention.multihead_attention(
common_layers.layer_preprocess(x_new, hparams),
None,
self_attention_bias,
hparams.attention_key_channels or hparams.hidden_size,
hparams.attention_value_channels or hparams.hidden_size,
hparams.hidden_size,
hparams.num_heads,
hparams.attention_dropout,
attention_type=attention_type,
block_width=hparams.block_width,
block_length=hparams.block_length,
q_padding=q_padding,
kv_padding=kv_padding,
q_filter_width=hparams.q_filter_width,
kv_filter_width=hparams.kv_filter_width,
name="local_within_block")
if is_4d:
y = tf.reshape(y, x_shape)
return y
示例6: local_attention_1d
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import multihead_attention [as 别名]
def local_attention_1d(x,
hparams,
attention_type="local_unmasked",
q_padding="VALID",
kv_padding="VALID"):
"""Local 1d self attention."""
# self-attention
x, x_shape, is_4d = maybe_reshape_4d_to_3d(x)
with tf.variable_scope("local_1d_self_att"):
y = common_attention.multihead_attention(
x,
None,
None,
hparams.attention_key_channels or hparams.hidden_size,
hparams.attention_value_channels or hparams.hidden_size,
hparams.hidden_size,
hparams.num_heads,
hparams.attention_dropout,
attention_type=attention_type,
shared_rel=hparams.shared_rel,
block_width=hparams.block_width,
block_length=hparams.block_length,
q_padding=q_padding,
kv_padding=kv_padding,
q_filter_width=hparams.q_filter_width,
kv_filter_width=hparams.kv_filter_width,
make_image_summary=False,
name="self_attention")
if is_4d:
y = tf.reshape(y, x_shape)
return y
示例7: dilated_attention_1d
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import multihead_attention [as 别名]
def dilated_attention_1d(x,
hparams,
attention_type="masked_dilated_1d",
q_padding="VALID",
kv_padding="VALID",
gap_size=2):
"""Dilated 1d self attention."""
# self-attention
x, x_shape, is_4d = maybe_reshape_4d_to_3d(x)
with tf.variable_scope("masked_dilated_1d"):
y = common_attention.multihead_attention(
x,
None,
None,
hparams.attention_key_channels or hparams.hidden_size,
hparams.attention_value_channels or hparams.hidden_size,
hparams.hidden_size,
hparams.num_heads,
hparams.attention_dropout,
attention_type=attention_type,
block_width=hparams.block_width,
block_length=hparams.block_length,
q_padding=q_padding,
kv_padding=kv_padding,
q_filter_width=hparams.q_filter_width,
kv_filter_width=hparams.kv_filter_width,
gap_size=gap_size,
num_memory_blocks=hparams.num_memory_blocks,
name="self_attention")
if is_4d:
y = tf.reshape(y, x_shape)
y.set_shape([None, None, None, hparams.hidden_size])
return y
示例8: full_self_attention
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import multihead_attention [as 别名]
def full_self_attention(x,
self_attention_bias,
hparams,
q_padding="LEFT",
kv_padding="LEFT"):
"""Full self-attention layer."""
x, x_shape, is_4d = maybe_reshape_4d_to_3d(x)
if self_attention_bias is not None:
self_attention_bias = get_self_attention_bias(x)
with tf.variable_scope("self_att"):
y = common_attention.multihead_attention(
x,
None,
self_attention_bias,
hparams.attention_key_channels or hparams.hidden_size,
hparams.attention_value_channels or hparams.hidden_size,
hparams.hidden_size,
hparams.num_heads,
hparams.attention_dropout,
q_filter_width=hparams.q_filter_width,
kv_filter_width=hparams.kv_filter_width,
q_padding=q_padding,
kv_padding=kv_padding,
name="self_att")
if is_4d:
y = tf.reshape(y, [x_shape[0], x_shape[1], x_shape[2], x_shape[3]])
y.set_shape([None, None, None, hparams.hidden_size])
return y
示例9: _add_attend_to_encoder_cache
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import multihead_attention [as 别名]
def _add_attend_to_encoder_cache(cache, attention_name, hparams, num_layers,
key_channels, value_channels,
vars_3d_num_heads, scope_prefix,
encoder_output):
"""Add attend-to-encoder layers to cache."""
for layer in range(num_layers):
layer_name = "layer_%d" % layer
with tf.variable_scope("%sdecoder/%s/%s/multihead_attention" %
(scope_prefix, layer_name, attention_name)):
k_encdec = common_attention.compute_attention_component(
encoder_output,
key_channels,
name="k",
vars_3d_num_heads=vars_3d_num_heads)
k_encdec = common_attention.split_heads(k_encdec, hparams.num_heads)
v_encdec = common_attention.compute_attention_component(
encoder_output,
value_channels,
name="v",
vars_3d_num_heads=vars_3d_num_heads)
v_encdec = common_attention.split_heads(v_encdec, hparams.num_heads)
cache[layer_name][attention_name] = {
"k_encdec": k_encdec,
"v_encdec": v_encdec
}
return cache
示例10: _apply_logic
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import multihead_attention [as 别名]
def _apply_logic(self,
input_tensor,
output_depth,
hparams,
var_scope_suffix,
nonpadding,
mask_future,
decoder_self_attention_bias=None,
attention_dropout_broadcast_dims=None,
**kwargs):
"""Applies attention logic to `input_tensor`."""
with tf.variable_scope("standard_attention_layer_" + var_scope_suffix):
hidden_depth = int(
input_tensor.shape.as_list()[-1] * self._hidden_dim_multiplier)
attention_bias = decoder_self_attention_bias
# TODO(davidso): This dropout rate differs from the other layers. This
# should be fixed so that they all use the same dropout
# rate.
num_heads = self._num_heads
if num_heads is None:
num_heads = hparams.num_heads
logic_output = common_attention.multihead_attention(
input_tensor,
None,
attention_bias,
hidden_depth,
hidden_depth,
output_depth,
num_heads,
hparams.attention_dropout,
attention_type=hparams.self_attention_type,
max_relative_position=hparams.max_relative_position,
dropout_broadcast_dims=attention_dropout_broadcast_dims)
return logic_output
示例11: testMultiheadAttentionWithLayerCollection
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import multihead_attention [as 别名]
def testMultiheadAttentionWithLayerCollection(self):
"""Testing multihead attention with layer collection for kfac."""
x = tf.zeros([3, 4, 5], tf.float32)
layer_collection = kfac.LayerCollection()
common_attention.multihead_attention(
x, None, None, 10, 10, 10, 2, 0.2,
layer_collection=layer_collection)
self.assertLen(layer_collection.get_blocks(), 4)