本文整理汇总了Python中tensor2tensor.models.transformer.Transformer方法的典型用法代码示例。如果您正苦于以下问题:Python transformer.Transformer方法的具体用法?Python transformer.Transformer怎么用?Python transformer.Transformer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensor2tensor.models.transformer
的用法示例。
在下文中一共展示了transformer.Transformer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _beam_decode
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import Transformer [as 别名]
def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha):
"""Beam search decoding.
Args:
features: an map of string to `Tensor`
decode_length: an integer. How many additional timesteps to decode.
beam_size: number of beams.
top_beams: an integer. How many of the beams to return.
alpha: Float that controls the length penalty. larger the alpha, stronger
the preference for longer translations.
Returns:
A dict of decoding results {
"outputs": integer `Tensor` of decoded ids of shape
[batch_size, <= decode_length] if beam_size == 1 or
[batch_size, top_beams, <= decode_length]
"scores": decoding log probs from the beam search,
None if using greedy decoding (beam_size=1)
}
"""
# Caching is not ebabled in Universal Transformer
# TODO(dehghani): Support fast decoding for Universal Transformer
return self._beam_decode_slow(features, decode_length, beam_size,
top_beams, alpha)
示例2: testTransformerWithoutProblem
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import Transformer [as 别名]
def testTransformerWithoutProblem(self):
hparams = transformer.transformer_test()
embedded_inputs = np.random.random_sample(
(BATCH_SIZE, INPUT_LENGTH, 1, hparams.hidden_size))
embedded_targets = np.random.random_sample(
(BATCH_SIZE, TARGET_LENGTH, 1, hparams.hidden_size))
transformed_features = {
"inputs": tf.constant(embedded_inputs, dtype=tf.float32),
"targets": tf.constant(embedded_targets, dtype=tf.float32)
}
model = transformer.Transformer(hparams)
body_out, _ = model(transformed_features)
self.assertAllEqual(
body_out.get_shape().as_list(),
[BATCH_SIZE, TARGET_LENGTH, 1, hparams.hidden_size])
示例3: testVarNames
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import Transformer [as 别名]
def testVarNames(self):
with tf.Graph().as_default():
model, features = get_model(
mode=tf.estimator.ModeKeys.PREDICT,
model_cls=transformer.TransformerScorer)
_ = model.infer(features)
scorer_vars = [v.name for v in tf.global_variables()]
with tf.Graph().as_default():
model, features = get_model(
mode=tf.estimator.ModeKeys.EVAL,
model_cls=transformer.TransformerScorer)
_ = model(features)
scorer_eval_vars = [v.name for v in tf.global_variables()]
with tf.Graph().as_default():
model, features = get_model(
mode=tf.estimator.ModeKeys.EVAL,
model_cls=transformer.Transformer)
_ = model(features)
transformer_vars = [v.name for v in tf.global_variables()]
self.assertEqual(sorted(scorer_vars), sorted(transformer_vars))
self.assertEqual(sorted(scorer_eval_vars), sorted(transformer_vars))
示例4: add_evolved_transformer_hparams
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import Transformer [as 别名]
def add_evolved_transformer_hparams(hparams):
"""Add Evolved Transformer hparams.
Note: These are for the Adam optimizer, not the Adafactor optimizer used in
the paper.
Args:
hparams: Current hparams.
Returns:
hparams updated with Evolved Transformer values.
"""
# Evolved Transformer "layers" are twice as deep as Transformer, so roughly
# halve the number that we use. These numbers are taken from
# arxiv.org/abs/1901.11117 .
hparams.num_encoder_layers = 3
hparams.num_decoder_layers = 4
# Learning rate and decay scheme that mimics the transformer Adam config,
# but with cosine decay instead of rsqrt.
hparams.learning_rate_constant /= hparams.learning_rate_warmup_steps ** 0.5
hparams.learning_rate_schedule = (
"constant*linear_warmup*single_cycle_cos_decay*rsqrt_hidden_size")
return hparams
示例5: transformer_revnet_big
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import Transformer [as 别名]
def transformer_revnet_big():
"""Base hparams for TransformerRevnet."""
hparams = transformer_revnet_base()
# The TransformerRevnet uses significantly less memory than the Transformer.
# Increase batch size and model size.
hparams.batch_size *= 2
hparams.hidden_size *= 2
hparams.num_heads *= 2
hparams.num_hidden_layers += 1
return hparams
示例6: encode
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import Transformer [as 别名]
def encode(self, inputs, target_space, hparams, features=None, losses=None):
"""Encode transformer inputs.
Args:
inputs: Transformer inputs [batch_size, input_length, input_height,
hidden_dim] which will be flattened along the two spatial dimensions.
target_space: scalar, target space ID.
hparams: hyperparmeters for model.
features: optionally pass the entire features dictionary as well.
This is needed now for "packed" datasets.
losses: Unused.
Returns:
Tuple of:
encoder_output: Encoder representation.
[batch_size, input_length, hidden_dim]
encoder_extra_output: which is extra encoder output used in some
variants of the model (e.g. in ACT, to pass the ponder-time to body)
"""
del losses
inputs = common_layers.flatten4d3d(inputs)
(encoder_input, self_attention_bias, _) = (
transformer.transformer_prepare_encoder(inputs, target_space, hparams))
encoder_input = tf.nn.dropout(encoder_input,
1.0 - hparams.layer_prepostprocess_dropout)
(encoder_output, encoder_extra_output) = (
universal_transformer_util.universal_transformer_encoder(
encoder_input,
self_attention_bias,
hparams,
nonpadding=transformer.features_to_nonpadding(features, "inputs"),
save_weights_to=self.attention_weights))
return encoder_output, encoder_extra_output
示例7: get_model
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import Transformer [as 别名]
def get_model(hparams=None, mode=tf.estimator.ModeKeys.TRAIN,
has_input=True, model_cls=transformer.Transformer):
if hparams is None:
hparams = transformer.transformer_tiny()
hparams.hidden_size = 8
hparams.filter_size = 32
hparams.num_heads = 1
hparams.layer_prepostprocess_dropout = 0.0
p_hparams = problem_hparams.test_problem_hparams(VOCAB_SIZE, VOCAB_SIZE)
if not has_input:
p_hparams.input_modality = {}
hparams.problem_hparams = p_hparams
inputs = -1 + np.random.random_integers(
VOCAB_SIZE, size=(BATCH_SIZE, INPUT_LENGTH, 1, 1))
targets = -1 + np.random.random_integers(
VOCAB_SIZE, size=(BATCH_SIZE, TARGET_LENGTH, 1, 1))
features = {
"targets": tf.constant(targets, dtype=tf.int32, name="targets"),
"target_space_id": tf.constant(1, dtype=tf.int32)
}
if has_input:
features["inputs"] = tf.constant(inputs, dtype=tf.int32, name="inputs")
return model_cls(hparams, mode, p_hparams), features
示例8: body
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import Transformer [as 别名]
def body(self, features):
"""Transformer main model_fn.
Args:
features: Map of features to the model. Should contain the following:
"inputs": Transformer inputs.
[batch_size, input_length, 1, hidden_dim].
"targets": Target decoder outputs.
[batch_size, target_length, 1, hidden_dim]
"target_space_id": A scalar int from data_generators.problem.SpaceID.
Returns:
A 2-tuple containing:
Logit tensor. [batch_size, decoder_length, vocab_size]
Map of keys to loss tensors. Should contain the following:
"training": Training loss (shift == 0).
"auxiliary": Auxiliary loss (shift != 0).
"""
output = super(TransformerAux, self).body(features)
output, losses = self._normalize_body_output(output)
aux = 0.0
for shift in self._extract_shift_values():
loss_num, loss_den = self.auxiliary_loss(output, features, shift)
aux += loss_num / loss_den
losses["auxiliary"] = aux
return output, losses
示例9: _beam_decode
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import Transformer [as 别名]
def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha,
use_tpu=False):
"""Beam search decoding.
Args:
features: an map of string to `Tensor`
decode_length: an integer. How many additional timesteps to decode.
beam_size: number of beams.
top_beams: an integer. How many of the beams to return.
alpha: Float that controls the length penalty. larger the alpha, stronger
the preference for longer translations.
use_tpu: Whether we should use TPU or not.
Returns:
A dict of decoding results {
"outputs": integer `Tensor` of decoded ids of shape
[batch_size, <= decode_length] if beam_size == 1 or
[batch_size, top_beams, <= decode_length]
"scores": decoding log probs from the beam search,
None if using greedy decoding (beam_size=1)
}
"""
# Caching is not ebabled in Universal Transformer
# TODO(dehghani): Support fast decoding for Universal Transformer
return self._beam_decode_slow(features, decode_length, beam_size,
top_beams, alpha, use_tpu)
示例10: universal_transformer_base
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import Transformer [as 别名]
def universal_transformer_base():
"""Base parameters for Universal Transformer."""
hparams = transformer.transformer_base()
# To have a similar capacity to the transformer_base with 6 layers,
# we need to increase the size of the UT's layer
# since, in fact, UT has a single layer repeating multiple times.
hparams.hidden_size = 1024
hparams.filter_size = 4096
hparams.num_heads = 16
hparams.layer_prepostprocess_dropout = 0.3
hparams = update_hparams_for_universal_transformer(hparams)
return hparams
示例11: adaptive_universal_transformer_multilayer_tpu
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import Transformer [as 别名]
def adaptive_universal_transformer_multilayer_tpu():
"""Multi-layer config for adaptive Transformer on TPU."""
hparams = adaptive_universal_transformer_base_tpu()
hparams.num_inrecurrence_layers = 2
hparams.mix_with_transformer = "before_ut,after_ut"
hparams.num_mixedin_layers = 1
hparams.transformer_ffn_type = "sepconv"
# TODO(lukaszkaiser): the options below don't work on TPU yet, make them work.
# hparams.add_step_timing_signal = True
# hparams.add_sru = True
# hparams.self_attention_type = "dot_product_relative_v2"
# hparams.max_relative_position = 256
return hparams
示例12: adaptive_universal_transformer_multilayer_hard
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import Transformer [as 别名]
def adaptive_universal_transformer_multilayer_hard():
"""Multi-layer config for adaptive Transformer with hard attention."""
hparams = adaptive_universal_transformer_multilayer_tpu()
hparams.batch_size = 256
hparams.hard_attention_k = 8
hparams.add_step_timing_signal = True
# hparams.add_sru = True # This is very slow on GPUs, does it help?
hparams.self_attention_type = "dot_product_relative_v2"
hparams.max_relative_position = 256
return hparams
示例13: evolved_transformer_tiny
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import Transformer [as 别名]
def evolved_transformer_tiny():
"""Base parameters for Evolved Transformer model."""
hparams = add_evolved_transformer_hparams(transformer.transformer_tiny())
hparams.learning_rate_schedule = (
"constant*single_cycle_cos_decay")
return hparams
示例14: evolved_transformer_big
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import Transformer [as 别名]
def evolved_transformer_big():
"""Big parameters for Evolved Transformer model on WMT."""
return add_evolved_transformer_hparams(transformer.transformer_big())
示例15: evolved_transformer_deep
# 需要导入模块: from tensor2tensor.models import transformer [as 别名]
# 或者: from tensor2tensor.models.transformer import Transformer [as 别名]
def evolved_transformer_deep():
"""Deep parameters for Evolved Transformer model on WMT."""
hparams = add_evolved_transformer_hparams(transformer.transformer_big())
hparams.num_encoder_layers = 9
hparams.num_decoder_layers = 10
hparams.hidden_size = 640
return hparams