本文整理汇总了Python中tensor2tensor.layers.common_attention.attention_bias_ignore_padding方法的典型用法代码示例。如果您正苦于以下问题:Python common_attention.attention_bias_ignore_padding方法的具体用法?Python common_attention.attention_bias_ignore_padding怎么用?Python common_attention.attention_bias_ignore_padding使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensor2tensor.layers.common_attention
的用法示例。
在下文中一共展示了common_attention.attention_bias_ignore_padding方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: prepare_question_encoder
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import attention_bias_ignore_padding [as 别名]
def prepare_question_encoder(inputs, hparams):
"""Prepare question encoder.
Args:
inputs: a Tensor.
hparams: run hyperparameters
Returns:
encoder_input: a Tensor, bottom of encoder stack
encoder_self_attention_bias: a bias tensor for use in encoder self-attention
"""
encoder_input = inputs
# Usual case - not a packed dataset.
encoder_padding = common_attention.embedding_to_padding(encoder_input)
ignore_padding = common_attention.attention_bias_ignore_padding(
encoder_padding)
encoder_self_attention_bias = ignore_padding
if hparams.pos == "timing":
encoder_input = common_attention.add_timing_signal_1d(encoder_input)
elif hparams.pos == "emb":
encoder_input = common_attention.add_positional_embedding(
encoder_input, hparams.max_length, "inputs_positional_embedding",
None)
return (encoder_input, encoder_self_attention_bias)
示例2: sample_p
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import attention_bias_ignore_padding [as 别名]
def sample_p(
self, targets_length, temp, check_invertibility=False, targets_mask=None,
**kwargs):
hparams = self._hparams
if targets_mask is None:
targets_mask = ops.sequence_mask(targets_length, hparams)
decoder_self_attention_bias = (
common_attention.attention_bias_ignore_padding(1.0 - targets_mask))
batch_size, targets_max_length = (
common_layers.shape_list(targets_mask)[:2])
prior_shape = [batch_size, targets_max_length, hparams.latent_size]
noise = tf.random.normal(prior_shape, stddev=temp)
p_dist = None
if hparams.prior_type == "standard_normal":
z_p = noise
elif hparams.prior_type == "diagonal_normal":
diag_prior_params = ops.cond_prior(
"diag_prior", hparams, tf.zeros(prior_shape), targets_mask,
hparams.latent_size*2, decoder_self_attention_bias, **kwargs)
p_dist = gops.diagonal_normal(diag_prior_params, "diag_prior")
z_p = p_dist.loc + p_dist.scale * noise
elif hparams.prior_type in ["affine", "additive", "rq"]:
n_levels = len(hparams.depths.split("/"))
divi = max(1, hparams.factor**(n_levels-1))
flow_prior_shape = [
batch_size, targets_max_length//divi, hparams.latent_size]
noise = tf.random_normal(flow_prior_shape, stddev=temp)
z_p, _, _, _ = glow.glow(
"glow", noise, targets_mask, decoder_self_attention_bias,
inverse=True, init=False, hparams=self._fparams,
disable_dropout=True, temp=temp, **kwargs)
if self.is_evaluating and check_invertibility:
noise_inv, _, _, _ = glow.glow(
"glow", z_p, targets_mask, decoder_self_attention_bias,
inverse=False, init=False, hparams=self._fparams,
disable_dropout=True, **kwargs)
z_diff = noise - noise_inv
tf.summary.scalar("flow_recon_inverse", tf.reduce_max(tf.abs(z_diff)))
return z_p, p_dist
示例3: prepare_image_question_encoder
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import attention_bias_ignore_padding [as 别名]
def prepare_image_question_encoder(image_feat, question, hparams):
"""Prepare encoder.
Args:
image_feat: a Tensor.
question: a Tensor.
hparams: run hyperparameters
Returns:
encoder_input: a Tensor, bottom of encoder stack
encoder_self_attention_bias: a bias tensor for use in encoder self-attention
"""
encoder_input = tf.concat([image_feat, question], axis=1)
encoder_padding = common_attention.embedding_to_padding(encoder_input)
ignore_padding = common_attention.attention_bias_ignore_padding(
encoder_padding)
encoder_self_attention_bias = ignore_padding
encoder_decoder_attention_bias = ignore_padding
# Usual case - not a packed dataset.
if hparams.pos == "timing":
question = common_attention.add_timing_signal_1d(question)
elif hparams.pos == "emb":
question = common_attention.add_positional_embedding(
question, hparams.max_length, "inputs_positional_embedding",
None)
encoder_input = tf.concat([image_feat, question], axis=1)
return (encoder_input, encoder_self_attention_bias,
encoder_decoder_attention_bias)
示例4: get_attention_bias
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import attention_bias_ignore_padding [as 别名]
def get_attention_bias(sequence_length, maxlen=None):
"""Create attention bias so attention is not applied at padding position."""
# attention_bias: [batch, 1, 1, memory_length]
mask = tf.sequence_mask(sequence_length, maxlen=maxlen)
nonpadding = tf.to_float(mask)
invert_sequence_mask = tf.to_float(tf.logical_not(mask))
attention_bias = common_attention.attention_bias_ignore_padding(
invert_sequence_mask)
return nonpadding, attention_bias
示例5: transformer_prepare_decoder
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import attention_bias_ignore_padding [as 别名]
def transformer_prepare_decoder(targets, hparams, features=None):
"""Prepare one shard of the model for the decoder.
Args:
targets: a Tensor.
hparams: run hyperparameters
features: optionally pass the entire features dictionary as well.
This is needed now for "packed" datasets.
Returns:
decoder_input: a Tensor, bottom of decoder stack
decoder_self_attention_bias: a bias tensor for use in decoder self-attention
"""
if hparams.causal_decoder_self_attention:
# Causal attention.
if hparams.prepend_mode == "prepend_inputs_full_attention":
decoder_self_attention_bias = (
common_attention.attention_bias_prepend_inputs_full_attention(
common_attention.embedding_to_padding(targets)))
else:
decoder_self_attention_bias = (
common_attention.attention_bias_lower_triangle(
common_layers.shape_list(targets)[1]))
else:
# Full attention.
decoder_padding = common_attention.embedding_to_padding(targets)
decoder_self_attention_bias = (
common_attention.attention_bias_ignore_padding(decoder_padding))
if features and "targets_segmentation" in features:
# "Packed" dataset - keep the examples from seeing each other.
targets_segmentation = features["targets_segmentation"]
targets_position = features["targets_position"]
decoder_self_attention_bias += common_attention.attention_bias_same_segment(
targets_segmentation, targets_segmentation)
else:
targets_position = None
if hparams.proximity_bias:
decoder_self_attention_bias += common_attention.attention_bias_proximal(
common_layers.shape_list(targets)[1])
decoder_input = common_layers.shift_right_3d(targets)
if hparams.pos == "timing":
if targets_position is not None:
decoder_input = common_attention.add_timing_signal_1d_given_position(
decoder_input, targets_position)
else:
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
elif hparams.pos == "emb":
decoder_input = common_attention.add_positional_embedding(
decoder_input, hparams.max_length, "targets_positional_embedding",
targets_position)
if hparams.activation_dtype == "bfloat16":
decoder_self_attention_bias = tf.cast(decoder_self_attention_bias,
tf.bfloat16)
return (decoder_input, decoder_self_attention_bias)
示例6: transformer_prepare_decoder
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import attention_bias_ignore_padding [as 别名]
def transformer_prepare_decoder(targets, hparams, features=None, pad=None):
"""Prepare one shard of the model for the decoder.
Args:
targets: a Tensor.
hparams: run hyperparameters
features: optionally pass the entire features dictionary as well. This is
needed now for "packed" datasets.
pad: vector to use for padding when shifting targets right
Returns:
decoder_input: a Tensor, bottom of decoder stack
decoder_self_attention_bias: a bias tensor for use in decoder self-attention
"""
if hparams.causal_decoder_self_attention:
# Causal attention.
if hparams.prepend_mode == "prepend_inputs_full_attention":
decoder_self_attention_bias = (
common_attention.attention_bias_prepend_inputs_full_attention(
common_attention.embedding_to_padding(targets)))
else:
decoder_self_attention_bias = (
common_attention.attention_bias_lower_triangle(
common_layers.shape_list(targets)[1]))
else:
# Full attention.
decoder_padding = common_attention.embedding_to_padding(targets)
decoder_self_attention_bias = (
common_attention.attention_bias_ignore_padding(decoder_padding))
if features and "targets_segmentation" in features:
# "Packed" dataset - keep the examples from seeing each other.
targets_segmentation = features["targets_segmentation"]
targets_position = features["targets_position"]
decoder_self_attention_bias += common_attention.attention_bias_same_segment(
targets_segmentation, targets_segmentation)
else:
targets_position = None
if hparams.proximity_bias:
decoder_self_attention_bias += common_attention.attention_bias_proximal(
common_layers.shape_list(targets)[1])
decoder_input = common_layers.shift_right_3d(targets, pad)
if hparams.pos == "timing":
if targets_position is not None:
decoder_input = common_attention.add_timing_signal_1d_given_position(
decoder_input, targets_position)
else:
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
elif hparams.pos == "timing_from_features":
decoder_input = common_attention.add_timing_signals_from_features(
decoder_input, features, hparams.position_features)
elif hparams.pos == "emb":
decoder_input = common_attention.add_positional_embedding(
decoder_input, hparams.max_length, "targets_positional_embedding",
targets_position)
if hparams.activation_dtype == "bfloat16":
decoder_self_attention_bias = tf.cast(decoder_self_attention_bias,
tf.bfloat16)
return (decoder_input, decoder_self_attention_bias)
示例7: compute_iw_marginal
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import attention_bias_ignore_padding [as 别名]
def compute_iw_marginal(
self, targets, targets_mask, decoder_self_attention_bias, features,
n_samples, reduce_mean=True, **kwargs):
hparams = self._hparams
z_q, log_q_z, _ = self.sample_q(
targets, targets_mask, decoder_self_attention_bias,
n_samples=n_samples, temp=1.0, **kwargs) # [K*B, L, C]
iw_kwargs = {key: ops.prepare_for_iw(value, n_samples) for (
key, value) in kwargs.items()}
iw_targets_mask = ops.prepare_for_iw(targets_mask, n_samples)
iw_decoder_self_attention_bias = (
common_attention.attention_bias_ignore_padding(1.0 - iw_targets_mask))
iw_features = copy.copy(features)
iw_features["targets"] = ops.prepare_for_iw(
features["targets"], n_samples)
log_p_z_base, log_abs_det = self.compute_prior_log_prob(
z_q, iw_targets_mask, iw_decoder_self_attention_bias,
check_invertibility=False, **iw_kwargs)
log_p_z = log_p_z_base + log_abs_det
body_output = ops.decoder(
"decoder", z_q, hparams, iw_decoder_self_attention_bias, **iw_kwargs)
logits = self.top(body_output, iw_features)
numerator, denominator = self.loss_iw(logits, iw_features)
numerator = tf.reduce_sum(numerator[..., 0, 0], 1) # [K*B]
denominator = tf.reduce_sum(denominator[..., 0, 0], 1) # [K*B]
log_p_x = -1 * numerator / denominator
log_q_z = gops.reduce_mean_over_l_sum_over_c(log_q_z, iw_targets_mask)
log_p_z = log_p_z / tf.reduce_sum(iw_targets_mask, 1)
log_p_x, log_q_z, log_p_z = [ops.unprepare_for_iw(ii, n_samples) for ii in [
log_p_x, log_q_z, log_p_z]]
log_w_n = log_p_z - log_q_z
log_w_n = tf.nn.log_softmax(log_w_n, axis=0) # [K, B]
iw_marginal = log_p_x + log_w_n
iw_marginal = tf.reduce_logsumexp(iw_marginal, 0) # [B]
if reduce_mean:
iw_marginal = tf.cast(tf.reduce_mean(iw_marginal, 0), tf.float32) # [1]
else:
iw_marginal = tf.cast(iw_marginal, tf.float32) # [1]
return iw_marginal
示例8: transformer_prepare_decoder
# 需要导入模块: from tensor2tensor.layers import common_attention [as 别名]
# 或者: from tensor2tensor.layers.common_attention import attention_bias_ignore_padding [as 别名]
def transformer_prepare_decoder(targets, hparams, features=None, pad=None):
"""Prepare one shard of the model for the decoder.
Args:
targets: a Tensor.
hparams: run hyperparameters
features: optionally pass the entire features dictionary as well. This is
needed now for "packed" datasets.
pad: vector to use for padding when shifting targets right
Returns:
decoder_input: a Tensor, bottom of decoder stack
decoder_self_attention_bias: a bias tensor for use in decoder self-attention
"""
if hparams.causal_decoder_self_attention:
# Causal attention.
if hparams.prepend_mode == "prepend_inputs_full_attention":
decoder_self_attention_bias = (
common_attention.attention_bias_prepend_inputs_full_attention(
common_attention.embedding_to_padding(targets)))
else:
decoder_self_attention_bias = (
common_attention.attention_bias_lower_triangle(
common_layers.shape_list(targets)[1]))
else:
# Full attention.
decoder_padding = common_attention.embedding_to_padding(targets)
decoder_self_attention_bias = (
common_attention.attention_bias_ignore_padding(decoder_padding))
if features and "targets_segmentation" in features:
# "Packed" dataset - keep the examples from seeing each other.
targets_segmentation = features["targets_segmentation"]
targets_position = features["targets_position"]
decoder_self_attention_bias += common_attention.attention_bias_same_segment(
targets_segmentation, targets_segmentation)
else:
targets_position = None
if hparams.proximity_bias:
decoder_self_attention_bias += common_attention.attention_bias_proximal(
common_layers.shape_list(targets)[1])
decoder_input = common_layers.shift_right_3d(targets, pad)
if hparams.pos == "timing":
if targets_position is not None:
decoder_input = common_attention.add_timing_signal_1d_given_position(
decoder_input, targets_position)
else:
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
elif hparams.pos == "emb":
decoder_input = common_attention.add_positional_embedding(
decoder_input, hparams.max_length, "targets_positional_embedding",
targets_position)
if hparams.activation_dtype == "bfloat16":
decoder_self_attention_bias = tf.cast(decoder_self_attention_bias,
tf.bfloat16)
return (decoder_input, decoder_self_attention_bias)