本文整理汇总了Python中tensor2tensor.models.transformer.transformer_prepare_decoder方法的典型用法代码示例。如果您正苦于以下问题:Python transformer.transformer_prepare_decoder方法的具体用法?Python transformer.transformer_prepare_decoder怎么用?Python transformer.transformer_prepare_decoder使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensor2tensor.models.transformer
的用法示例。
在下文中一共展示了transformer.transformer_prepare_decoder方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: decode_transformer
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import transformer_prepare_decoder [as 别名]
def decode_transformer(encoder_output, encoder_decoder_attention_bias, targets,
hparams, name):
"""Original Transformer decoder."""
with tf.variable_scope(name):
targets = common_layers.flatten4d3d(targets)
decoder_input, decoder_self_bias = (
transformer.transformer_prepare_decoder(targets, hparams))
decoder_input = tf.nn.dropout(decoder_input,
1.0 - hparams.layer_prepostprocess_dropout)
decoder_output = transformer.transformer_decoder(
decoder_input, encoder_output, decoder_self_bias,
encoder_decoder_attention_bias, hparams)
decoder_output = tf.expand_dims(decoder_output, axis=2)
decoder_output_shape = common_layers.shape_list(decoder_output)
decoder_output = tf.reshape(
decoder_output, [decoder_output_shape[0], -1, 1, hparams.hidden_size])
# Expand since t2t expects 4d tensors.
return decoder_output
示例2: body
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import transformer_prepare_decoder [as 别名]
def body(self, features):
hparams = self._hparams
targets = features["targets"]
inputs = features["inputs"]
target_space = features["target_space_id"]
inputs = common_layers.flatten4d3d(inputs)
targets = common_layers.flatten4d3d(targets)
(encoder_input, encoder_self_attention_bias,
encoder_decoder_attention_bias) = (transformer.transformer_prepare_encoder(
inputs, target_space, hparams))
(decoder_input,
decoder_self_attention_bias) = transformer.transformer_prepare_decoder(
targets, hparams)
encoder_input = tf.nn.dropout(encoder_input,
1.0 - hparams.layer_prepostprocess_dropout)
decoder_input = tf.nn.dropout(decoder_input,
1.0 - hparams.layer_prepostprocess_dropout)
encoder_output = transformer_revnet_encoder(
encoder_input, encoder_self_attention_bias, hparams)
decoder_output = transformer_revnet_decoder(
decoder_input, encoder_output, decoder_self_attention_bias,
encoder_decoder_attention_bias, hparams)
decoder_output = tf.expand_dims(decoder_output, 2)
return decoder_output
示例3: _prepare_decoder
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import transformer_prepare_decoder [as 别名]
def _prepare_decoder(self, targets):
"""Process the transformer decoder input."""
targets = common_layers.flatten4d3d(targets)
output = transformer.transformer_prepare_decoder(
targets, self._hparams, features=None,
)
deco_input, deco_self_attention_bias = output
deco_input = tf.nn.dropout(
deco_input, 1.0 - self._hparams.layer_prepostprocess_dropout
)
return deco_input, deco_self_attention_bias
示例4: transformer_moe_base
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import transformer_prepare_decoder [as 别名]
def transformer_moe_base():
"""Set of hyperparameters."""
hparams = common_hparams.basic_params1()
hparams.norm_type = "layer"
hparams.hidden_size = 512
hparams.batch_size = 4096
hparams.max_length = 2001
hparams.max_input_seq_length = 2000
hparams.max_target_seq_length = 2000
hparams.dropout = 0.0
hparams.clip_grad_norm = 0. # i.e. no gradient clipping
hparams.optimizer_adam_epsilon = 1e-9
hparams.learning_rate_decay_scheme = "noam"
hparams.learning_rate = 0.1
hparams.learning_rate_warmup_steps = 2000
hparams.initializer_gain = 1.0
hparams.num_hidden_layers = 5
hparams.initializer = "uniform_unit_scaling"
hparams.weight_decay = 0.0
hparams.optimizer_adam_beta1 = 0.9
hparams.optimizer_adam_beta2 = 0.98
hparams.num_sampled_classes = 0
hparams.label_smoothing = 0.0
hparams.shared_embedding_and_softmax_weights = True
# According to noam, ("n", "da") seems better for harder-to-learn models
hparams.layer_preprocess_sequence = "n"
hparams.layer_postprocess_sequence = "da"
# Hparams used by transformer_prepare_decoder() function
hparams.add_hparam("pos", "timing") # timing, none
hparams.add_hparam("proximity_bias", False)
hparams.add_hparam("causal_decoder_self_attention", True)
hparams = common_attention.add_standard_attention_hparams(hparams)
# Decoder layers type. If set, num_decoder_layers parameter will be ignored
# and the number of decoder layer will be deduced from the string
# See top file comment for example of usage
hparams.add_hparam("layer_types", "")
# Default attention type (ex: a, loc, red,...) and feed-forward type (ex: fc,
# sep, moe,...)
hparams.add_hparam("default_att", "a")
hparams.add_hparam("default_ff", "fc")
return hparams
示例5: __init__
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import transformer_prepare_decoder [as 别名]
def __init__(self, features_info=None, input_names=None, target_names=None,
hidden_size=512, filter_size=2048):
super(Transformer, self).__init__()
# TODO(lukaszkaiser): gin'ify and split into encoder/decoder classes.
self._has_input = True if input_names else False
self._input_name = input_names[0]
self._target_name = target_names[0]
try:
target_vocab_size = features_info[self._target_name].num_classes
except AttributeError:
target_vocab_size = features_info[self._target_name].encoder.vocab_size
hparams = transformer.transformer_base()
hparams.hidden_size = hidden_size
hparams.filter_size = filter_size
# Now the model.
self._embedding = tf.keras.layers.Embedding(
target_vocab_size, hidden_size, mask_zero=True)
def transformer_encoder(inputs, features):
return transformer.transformer_encode(
transformer_layers.transformer_encoder, inputs, None,
hparams, features=features)
def transformer_prepare_decoder(targets, features):
return transformer.transformer_prepare_decoder(targets, hparams, features)
def transformer_decoder(decoder_input, encoder_output,
encoder_decoder_attention_bias,
decoder_self_attention_bias,
features):
return transformer.transformer_decode(
transformer.transformer_decoder,
decoder_input,
encoder_output,
encoder_decoder_attention_bias,
decoder_self_attention_bias,
hparams,
nonpadding=transformer.features_to_nonpadding(features, "targets"))
if self._has_input:
self._encoder = keras_utils.FunctionLayer(transformer_encoder)
self._prepare_decoder = keras_utils.FunctionLayer(
transformer_prepare_decoder)
self._decoder = keras_utils.FunctionLayer(transformer_decoder)
self._logits = tf.keras.layers.Dense(
target_vocab_size, activation=None)