Python common_layers.dense_relu_dense方法代码示例

本文整理汇总了Python中tensor2tensor.layers.common_layers.dense_relu_dense方法的典型用法代码示例。如果您正苦于以下问题：Python common_layers.dense_relu_dense方法的具体用法？Python common_layers.dense_relu_dense怎么用？Python common_layers.dense_relu_dense使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensor2tensor.layers.common_layers的用法示例。

在下文中一共展示了common_layers.dense_relu_dense方法的6个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: body

# 需要导入模块: from tensor2tensor.layers import common_layers [as 别名]
# 或者: from tensor2tensor.layers.common_layers import dense_relu_dense [as 别名]
def body(self, features):
    assert self._hparams.block_size > 0
    assert not common_layers.is_xla_compiled()
    assert "targets_segmentation" not in features

    decoder_output = super(TransformerBlockParallel, self).body(features)
    assert not isinstance(decoder_output, tuple)
    assert len(decoder_output.shape) == 4

    relu_dropout_broadcast_dims = (
        common_layers.comma_separated_string_to_integer_list(
            getattr(self._hparams, "relu_dropout_broadcast_dims", "")))

    with tf.variable_scope("block_size_%d" % self._hparams.block_size):
      block_output = common_layers.dense_relu_dense(
          decoder_output,
          self._hparams.block_size * self._hparams.filter_size,
          self._hparams.block_size * self._hparams.hidden_size,
          dropout=self._hparams.relu_dropout,
          dropout_broadcast_dims=relu_dropout_broadcast_dims)

    batch_size, length = common_layers.shape_list(decoder_output)[:2]
    block_output = tf.reshape(block_output, [
        batch_size,
        length,
        self._hparams.block_size,
        self._hparams.hidden_size
    ])

    block_output = common_layers.layer_postprocess(
        decoder_output, block_output, self._hparams)

    return block_output

开发者ID:tensorflow，项目名称:tensor2tensor，代码行数:35，代码来源:transformer_parallel.py

示例2: transformer_base_v1

# 需要导入模块: from tensor2tensor.layers import common_layers [as 别名]
# 或者: from tensor2tensor.layers.common_layers import dense_relu_dense [as 别名]
def transformer_base_v1():
  """Set of hyperparameters."""
  hparams = common_hparams.basic_params1()
  hparams.norm_type = "layer"
  hparams.hidden_size = 512
  hparams.batch_size = 4096
  hparams.max_length = 256
  hparams.clip_grad_norm = 0.  # i.e. no gradient clipping
  hparams.optimizer_adam_epsilon = 1e-9
  hparams.learning_rate_schedule = "legacy"
  hparams.learning_rate_decay_scheme = "noam"
  hparams.learning_rate = 0.1
  hparams.learning_rate_warmup_steps = 4000
  hparams.initializer_gain = 1.0
  hparams.num_hidden_layers = 6
  hparams.initializer = "uniform_unit_scaling"
  hparams.weight_decay = 0.0
  hparams.optimizer_adam_beta1 = 0.9
  hparams.optimizer_adam_beta2 = 0.98
  hparams.num_sampled_classes = 0
  hparams.label_smoothing = 0.1
  hparams.shared_embedding_and_softmax_weights = True
  hparams.symbol_modality_num_shards = 16

  # Add new ones like this.
  hparams.add_hparam("filter_size", 2048)
  # Layer-related flags. If zero, these fall back on hparams.num_hidden_layers.
  hparams.add_hparam("num_encoder_layers", 0)
  hparams.add_hparam("num_decoder_layers", 0)
  # Attention-related flags.
  hparams.add_hparam("num_heads", 8)
  hparams.add_hparam("attention_key_channels", 0)
  hparams.add_hparam("attention_value_channels", 0)
  hparams.add_hparam("ffn_layer", "dense_relu_dense")
  hparams.add_hparam("parameter_attention_key_channels", 0)
  hparams.add_hparam("parameter_attention_value_channels", 0)
  # All hyperparameters ending in "dropout" are automatically set to 0.0
  # when not in training mode.
  hparams.add_hparam("attention_dropout", 0.0)
  hparams.add_hparam("attention_dropout_broadcast_dims", "")
  hparams.add_hparam("relu_dropout", 0.0)
  hparams.add_hparam("relu_dropout_broadcast_dims", "")
  hparams.add_hparam("pos", "timing")  # timing, none
  hparams.add_hparam("nbr_decoder_problems", 1)
  hparams.add_hparam("proximity_bias", False)
  hparams.add_hparam("causal_decoder_self_attention", True)
  hparams.add_hparam("use_pad_remover", True)
  hparams.add_hparam("self_attention_type", "dot_product")
  hparams.add_hparam("max_relative_position", 0)
  hparams.add_hparam("conv_first_kernel", 3)
  hparams.add_hparam("attention_variables_3d", False)
  hparams.add_hparam("use_target_space_embedding", True)
  # These parameters are only used when ffn_layer=="local_moe_tpu"
  hparams.add_hparam("moe_overhead_train", 1.0)
  hparams.add_hparam("moe_overhead_eval", 2.0)
  hparams.moe_num_experts = 16
  hparams.moe_loss_coef = 1e-3
  return hparams

开发者ID:akzaidi，项目名称:fine-lm，代码行数:60，代码来源:transformer.py

示例3: ffn_layer

# 需要导入模块: from tensor2tensor.layers import common_layers [as 别名]
# 或者: from tensor2tensor.layers.common_layers import dense_relu_dense [as 别名]
def ffn_layer(x, hparams, losses=None):
  """ffn layer transformer."""
  with tf.variable_scope("ffn"):
    if hparams.ffn_layer == "none":
      return x
    if hparams.ffn_layer == "conv_hidden_relu":
      y = common_layers.dense_relu_dense(
          x,
          hparams.filter_size,
          hparams.hidden_size,
          dropout=hparams.relu_dropout)
    elif hparams.ffn_layer == "normed_conv_hidden_relu":
      y = common_layers.normed_conv_hidden_relu(
          x,
          hparams.norm_type,
          hparams.layer_norm_epsilon,
          hparams.filter_size,
          hparams.hidden_size,
          dropout=hparams.relu_dropout,
          norm_name="convnorm")
    elif hparams.ffn_layer == "self_attention_ffn":
      x_shape = tf.shape(x)
      x = tf.reshape(x, [x_shape[0], -1, hparams.hidden_size])
      y = common_attention.ffn_self_attention_layer(
          x, hparams.filter_size, hparams.hidden_size, hparams.num_parts,
          hparams.attention_dropout, hparams.share_kv)
      y = tf.reshape(y, x_shape)
    elif hparams.ffn_layer == "local_moe_tpu":
      overhead = (hparams.moe_overhead_train
                  if hparams.mode == tf.estimator.ModeKeys.TRAIN
                  else hparams.moe_overhead_eval)
      x, x_shape, is_4d = maybe_reshape_4d_to_3d(x)
      y, loss = expert_utils.local_moe_tpu(
          x, hparams.filter_size // 2,
          hparams.hidden_size,
          hparams.moe_num_experts, overhead=overhead,
          loss_coef=hparams.moe_loss_coef)
      if is_4d:
        y = tf.reshape(y, x_shape)
      if losses is None:
        raise ValueError(
            "transformer_ffn_layer with type local_moe_tpu must pass in "
            "a losses list")
      losses.append(loss)
    else:
      assert hparams.ffn_layer == "glu_ffn"
      y = common_layers.gated_linear_unit_layer(x)
    return y

开发者ID:akzaidi，项目名称:fine-lm，代码行数:50，代码来源:common_image_attention.py

示例4: image_encoder

# 需要导入模块: from tensor2tensor.layers import common_layers [as 别名]
# 或者: from tensor2tensor.layers.common_layers import dense_relu_dense [as 别名]
def image_encoder(image_feat,
                  hparams,
                  name="image_encoder",
                  save_weights_to=None,
                  make_image_summary=True):
  """A stack of self attention layers."""

  x = image_feat
  with tf.variable_scope(name):
    for layer in range(hparams.num_encoder_layers or hparams.num_hidden_layers):
      with tf.variable_scope("layer_%d" % layer):
        with tf.variable_scope("self_attention"):
          y = vqa_layers.multihead_attention(
              common_layers.layer_preprocess(x, hparams),
              None,
              None,
              hparams.attention_key_channels or hparams.image_hidden_size,
              hparams.attention_value_channels or hparams.image_hidden_size,
              hparams.image_hidden_size,
              hparams.num_heads,
              hparams.attention_dropout,
              attention_type=hparams.self_attention_type,
              save_weights_to=save_weights_to,
              max_relative_position=None,
              make_image_summary=make_image_summary,
              dropout_broadcast_dims=None,
              max_length=None,
              vars_3d=False,
              scale_otproduct=hparams.scale_dotproduct)
          utils.collect_named_outputs("norms", "image_feat_self_attention",
                                      tf.norm(y, axis=-1))
          x = common_layers.layer_postprocess(x, y, hparams)
          utils.collect_named_outputs(
              "norms", "image_feat_self_attention_zero_add",
              tf.norm(x, axis=-1))
        with tf.variable_scope("ffn"):
          y = common_layers.dense_relu_dense(
              common_layers.layer_preprocess(x, hparams),
              hparams.image_filter_size,
              hparams.image_hidden_size,
              dropout=hparams.relu_dropout,
              dropout_broadcast_dims=None)
          utils.collect_named_outputs("norms", "image_feat_ffn",
                                      tf.norm(y, axis=-1))
          x = common_layers.layer_postprocess(x, y, hparams)
          utils.collect_named_outputs("norms", "image_feat_ffn_zero_add",
                                      tf.norm(x, axis=-1))
    # if normalization is done in layer_preprocess, then it should also be done
    # on the output, since the output can grow very large, being the sum of
    # a whole stack of unnormalized layer outputs.
    return common_layers.layer_preprocess(x, hparams)

开发者ID:tensorflow，项目名称:tensor2tensor，代码行数:53，代码来源:vqa_attention.py

示例5: image_encoder

# 需要导入模块: from tensor2tensor.layers import common_layers [as 别名]
# 或者: from tensor2tensor.layers.common_layers import dense_relu_dense [as 别名]
def image_encoder(image_feat,
                  hparams,
                  name="image_encoder",
                  save_weights_to=None,
                  make_image_summary=True):
  """A stack of self attention layers."""

  x = image_feat
  image_hidden_size = hparams.image_hidden_size or hparams.hidden_size
  image_filter_size = hparams.image_filter_size or hparams.filter_size
  with tf.variable_scope(name):
    for layer in range(hparams.num_encoder_layers or hparams.num_hidden_layers):
      with tf.variable_scope("layer_%d" % layer):
        with tf.variable_scope("self_attention"):
          y = vqa_layers.multihead_attention(
              common_layers.layer_preprocess(x, hparams),
              None,
              None,
              hparams.attention_key_channels or image_hidden_size,
              hparams.attention_value_channels or image_hidden_size,
              image_hidden_size,
              hparams.num_heads,
              hparams.attention_dropout,
              attention_type=hparams.image_self_attention_type,
              save_weights_to=save_weights_to,
              make_image_summary=make_image_summary,
              scale_dotproduct=hparams.scale_dotproduct,
          )
          utils.collect_named_outputs(
              "norms", "image_feat_self_attention_%d"%(layer),
              tf.norm(y, axis=-1))
          x = common_layers.layer_postprocess(x, y, hparams)
          utils.collect_named_outputs(
              "norms", "image_feat_self_attention_postprocess_%d"%(layer),
              tf.norm(x, axis=-1))
        with tf.variable_scope("ffn"):
          y = common_layers.dense_relu_dense(
              common_layers.layer_preprocess(x, hparams),
              image_filter_size,
              image_hidden_size,
              dropout=hparams.relu_dropout,
          )
          utils.collect_named_outputs(
              "norms", "image_feat_ffn_%d"%(layer), tf.norm(y, axis=-1))
          x = common_layers.layer_postprocess(x, y, hparams)
          utils.collect_named_outputs(
              "norms", "image_feat_ffn_postprocess_%d"%(layer),
              tf.norm(x, axis=-1))
    # if normalization is done in layer_preprocess, then it should also be done
    # on the output, since the output can grow very large, being the sum of
    # a whole stack of unnormalized layer outputs.
    return common_layers.layer_preprocess(x, hparams)

开发者ID:tensorflow，项目名称:tensor2tensor，代码行数:54，代码来源:vqa_self_attention.py

示例6: question_encoder

# 需要导入模块: from tensor2tensor.layers import common_layers [as 别名]
# 或者: from tensor2tensor.layers.common_layers import dense_relu_dense [as 别名]
def question_encoder(question,
                     question_self_attention_bias,
                     hparams,
                     name="question_encoder",
                     save_weights_to=None,
                     make_image_summary=True):
  """A stack of self attention layers."""
  x = question
  with tf.variable_scope(name):
    for layer in range(hparams.num_encoder_layers or hparams.num_hidden_layers):
      with tf.variable_scope("layer_%d" % layer):
        with tf.variable_scope("self_attention"):
          y = vqa_layers.multihead_attention(
              common_layers.layer_preprocess(x, hparams),
              None,
              question_self_attention_bias,
              hparams.attention_key_channels or hparams.hidden_size,
              hparams.attention_value_channels or hparams.hidden_size,
              hparams.hidden_size,
              hparams.num_heads,
              hparams.attention_dropout,
              attention_type=hparams.question_self_attention_type,
              block_length=hparams.block_length,
              save_weights_to=save_weights_to,
              make_image_summary=make_image_summary,
              scale_dotproduct=hparams.scale_dotproduct,
          )
          utils.collect_named_outputs(
              "norms", "query_self_attention_%d"%(layer),
              tf.norm(y, axis=-1))
          x = common_layers.layer_postprocess(x, y, hparams)
          utils.collect_named_outputs(
              "norms", "query_self_attention_postprocess_%d"%(layer),
              tf.norm(x, axis=-1))
        with tf.variable_scope("ffn"):
          y = common_layers.dense_relu_dense(
              common_layers.layer_preprocess(x, hparams),
              hparams.filter_size,
              hparams.hidden_size,
              dropout=hparams.relu_dropout,
              )
          utils.collect_named_outputs(
              "norms", "query_ffn_%d"%(layer), tf.norm(y, axis=-1))
          x = common_layers.layer_postprocess(x, y, hparams)
          utils.collect_named_outputs(
              "norms", "query_ffn_postprocess_%d"%(layer),
              tf.norm(x, axis=-1))
    # if normalization is done in layer_preprocess, then it should also be done
    # on the output, since the output can grow very large, being the sum of
    # a whole stack of unnormalized layer outputs.
    return common_layers.layer_preprocess(x, hparams)

开发者ID:tensorflow，项目名称:tensor2tensor，代码行数:53，代码来源:vqa_self_attention.py

注：本文中的tensor2tensor.layers.common_layers.dense_relu_dense方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。