本文整理汇总了Python中keras.layers.Softmax方法的典型用法代码示例。如果您正苦于以下问题:Python layers.Softmax方法的具体用法?Python layers.Softmax怎么用?Python layers.Softmax使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类keras.layers
的用法示例。
在下文中一共展示了layers.Softmax方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __output
# 需要导入模块: from keras import layers [as 别名]
# 或者: from keras.layers import Softmax [as 别名]
def __output(self, dec_output):
output_dropout_layer = Dropout(self.output_dropout)
output_layer = Conv1D(self.tgt_vocab_size + 1,
kernel_size=1,
activation=gelu,
kernel_regularizer=regularizers.l2(self.l2_reg_penalty),
name='output_layer')
output_softmax_layer = Softmax(name="word_predictions")
if self.use_crf:
return output_layer(output_dropout_layer(dec_output))
else:
return output_softmax_layer(output_layer(output_dropout_layer(dec_output)))
示例2: create_model
# 需要导入模块: from keras import layers [as 别名]
# 或者: from keras.layers import Softmax [as 别名]
def create_model(input_shape: tuple, nb_classes: int, init_with_imagenet: bool = False, learning_rate: float = 0.01):
weights = None
if init_with_imagenet:
weights = "imagenet"
model = VGG16(input_shape=input_shape,
classes=nb_classes,
weights=weights,
include_top=False)
# "Shallow" VGG for Cifar10
x = model.get_layer('block3_pool').output
x = layers.Flatten(name='Flatten')(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dense(nb_classes)(x)
x = layers.Softmax()(x)
model = models.Model(model.input, x)
loss = losses.categorical_crossentropy
optimizer = optimizers.SGD(lr=learning_rate, decay=0.99)
model.compile(optimizer, loss, metrics=["accuracy"])
return model
示例3: build_resnet_generator
# 需要导入模块: from keras import layers [as 别名]
# 或者: from keras.layers import Softmax [as 别名]
def build_resnet_generator(input_shape, n_filters, n_residual_blocks,
seq_len, vocabulary_size):
inputs = Input(shape=input_shape)
# Dense 1: 1 x seq_len x n_filters
x = Dense(1 * seq_len * n_filters, input_shape=input_shape)(inputs)
x = Reshape((1, seq_len, n_filters))(x)
# ResNet blocks
x = resnet_block(x, n_residual_blocks, n_filters)
# Output layer
x = Conv2D(filters=vocabulary_size, kernel_size=1, padding='same')(x)
x = Softmax(axis=3)(x)
# create model graph
model = Model(inputs=inputs, outputs=x, name='Generator')
print("\nGenerator ResNet")
model.summary()
return model
开发者ID:PacktPublishing,项目名称:Hands-On-Generative-Adversarial-Networks-with-Keras,代码行数:23,代码来源:models.py
示例4: __call__
# 需要导入模块: from keras import layers [as 别名]
# 或者: from keras.layers import Softmax [as 别名]
def __call__(self, q, k, v, attn_mask=None, scale=1.0):
"""
:param q: Queries 张量,形状为[N, T_q, D_q]
:param k: Keys 张量,形状为[N, T_k, D_k]
:param v: Values 张量,形状为[N, T_v, D_v]
:param attn_mask: 注意力掩码,形状为[N, T_q, T_k]
:param scale: 缩放因子,浮点标量
:return: 上下文张量和注意力张量
"""
attention = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=(2, 2)) * scale)([q, k]) # [N, T_q, T_k]
if attn_mask is not None:
# 为需要掩码的地方设置一个负无穷,softmax之后就会趋近于0
attention = Lambda(lambda x: (-1e+10) * (1 - x[0]) + x[1])([attn_mask, attention])
attention = Softmax(axis=-1)(attention)
attention = Dropout(self.attention_dropout)(attention) # [N, T_q, T_k]
context = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=(2, 1)))([attention, v]) # [N, T_q, D_q]
return context, attention
示例5: test_softmax
# 需要导入模块: from keras import layers [as 别名]
# 或者: from keras.layers import Softmax [as 别名]
def test_softmax():
for axis in [1, -1]:
layer_test(layers.Softmax, kwargs={'axis': axis},
input_shape=(2, 3, 4))
示例6: __init__
# 需要导入模块: from keras import layers [as 别名]
# 或者: from keras.layers import Softmax [as 别名]
def __init__(self,
src_vocab_size,
src_max_len,
tgt_vocab_size,
tgt_max_len,
optimizer=Adam(lr=1e-3),
num_layers=6,
model_dim=512,
num_heads=8,
ffn_dim=2048,
dropout=0.2,
src_tokenizer=None,
tgt_tokenizer=None,
weights_path=None):
self.optimizer = optimizer
self.src_max_len = src_max_len
self.tgt_max_len = tgt_max_len
self.src_vocab_size = src_vocab_size
self.tgt_vocab_size = tgt_vocab_size
self.model_dim = model_dim
self.num_layers = num_layers
self.num_heads = num_heads
self.ffn_dim = ffn_dim
self.dropout = dropout
self.decode_model = None # used in beam_search
self.encode_model = None # used in beam_search
self.src_tokenizer = src_tokenizer
self.tgt_tokenizer = tgt_tokenizer
self.encoder = Encoder(src_vocab_size, src_max_len, num_layers, model_dim,
num_heads, ffn_dim, dropout)
self.decoder = Decoder(tgt_vocab_size, tgt_max_len, num_layers, model_dim,
num_heads, ffn_dim, dropout)
self.linear = Dense(tgt_vocab_size + 1, use_bias=False)
self.softmax = Softmax(axis=2)
self.pred_model, self.model = self.__build_model()
if weights_path is not None:
self.model.load_weights(weights_path)
示例7: universal_transformer_gpt_model
# 需要导入模块: from keras import layers [as 别名]
# 或者: from keras.layers import Softmax [as 别名]
def universal_transformer_gpt_model(
max_seq_length: int, vocabulary_size: int,
word_embedding_size: int, transformer_depth: int,
num_heads: int, transformer_dropout: float = 0.1,
embedding_dropout: float = 0.6,
l2_reg_penalty: float = 1e-6,
confidence_penalty_weight: float = 0.1):
"""
A model which is similar to the one described by OpenAI in paper
"Improving Language Understanding by Generative Pre-Training", except
that it relies L2 regularization of the word embedding matrix
(instead of the dropout), and uses Universal Transformer architecture.
"""
word_ids = Input(shape=(max_seq_length,), dtype='int32', name='word_ids')
l2_regularizer = (regularizers.l2(l2_reg_penalty) if l2_reg_penalty
else None)
embedding_layer = ReusableEmbedding(
vocabulary_size, word_embedding_size,
input_length=max_seq_length,
name='bpe_embeddings',
# Regularization is based on paper "A Comparative Study on
# Regularization Strategies for Embedding-based Neural Networks"
# https://arxiv.org/pdf/1508.03721.pdf
embeddings_regularizer=l2_regularizer)
output_layer = TiedOutputEmbedding(
projection_regularizer=l2_regularizer,
projection_dropout=embedding_dropout,
name='word_prediction_logits')
coordinate_embedding_layer = TransformerCoordinateEmbedding(
transformer_depth,
name='coordinate_embedding')
transformer_act_layer = TransformerACT(name='adaptive_computation_time')
transformer_block = TransformerBlock(
name='transformer', num_heads=num_heads,
residual_dropout=transformer_dropout,
attention_dropout=transformer_dropout,
use_masking=True, vanilla_wiring=False)
output_softmax_layer = Softmax(name='word_predictions')
next_step_input, embedding_matrix = embedding_layer(word_ids)
act_output = next_step_input
for i in range(transformer_depth):
next_step_input = coordinate_embedding_layer(next_step_input, step=i)
next_step_input = transformer_block(next_step_input)
next_step_input, act_output = transformer_act_layer(next_step_input)
transformer_act_layer.finalize()
next_step_input = act_output
word_predictions = output_softmax_layer(
output_layer([next_step_input, embedding_matrix]))
model = Model(inputs=[word_ids], outputs=[word_predictions])
# Penalty for confidence of the output distribution, as described in
# "Regularizing Neural Networks by Penalizing Confident
# Output Distributions" (https://arxiv.org/abs/1701.06548)
confidence_penalty = K.mean(
confidence_penalty_weight *
K.sum(word_predictions * K.log(word_predictions), axis=-1))
model.add_loss(confidence_penalty)
return model
示例8: vanilla_transformer_gpt_model
# 需要导入模块: from keras import layers [as 别名]
# 或者: from keras.layers import Softmax [as 别名]
def vanilla_transformer_gpt_model(
max_seq_length: int, vocabulary_size: int,
word_embedding_size: int, transformer_depth: int,
num_heads: int, transformer_dropout: float = 0.1,
embedding_dropout: float = 0.6,
l2_reg_penalty: float = 1e-6,
confidence_penalty_weight: float = 0.1):
"""
A model which is almost identical to the one described by OpenAI in paper
"Improving Language Understanding by Generative Pre-Training", except
that it uses L2 regularization of the word embedding matrix,
instead of the dropout.
"""
word_ids = Input(shape=(max_seq_length,), dtype='int32', name='word_ids')
l2_regularizer = (regularizers.l2(l2_reg_penalty) if l2_reg_penalty
else None)
embedding_layer = ReusableEmbedding(
vocabulary_size, word_embedding_size,
input_length=max_seq_length,
name='bpe_embeddings',
# Regularization is based on paper "A Comparative Study on
# Regularization Strategies for Embedding-based Neural Networks"
# https://arxiv.org/pdf/1508.03721.pdf
embeddings_regularizer=l2_regularizer)
output_layer = TiedOutputEmbedding(
projection_regularizer=l2_regularizer,
projection_dropout=embedding_dropout,
name='word_prediction_logits')
coordinate_embedding_layer = TransformerCoordinateEmbedding(
1,
name='coordinate_embedding')
output_softmax_layer = Softmax(name='word_predictions')
next_step_input, embedding_matrix = embedding_layer(word_ids)
next_step_input = coordinate_embedding_layer(next_step_input, step=0)
for i in range(transformer_depth):
next_step_input = (
TransformerBlock(
name='transformer' + str(i), num_heads=num_heads,
residual_dropout=transformer_dropout,
attention_dropout=transformer_dropout,
use_masking=True,
vanilla_wiring=True)
(next_step_input))
word_predictions = output_softmax_layer(
output_layer([next_step_input, embedding_matrix]))
model = Model(inputs=[word_ids], outputs=[word_predictions])
# Penalty for confidence of the output distribution, as described in
# "Regularizing Neural Networks by Penalizing Confident
# Output Distributions" (https://arxiv.org/abs/1701.06548)
confidence_penalty = K.mean(
confidence_penalty_weight *
K.sum(word_predictions * K.log(word_predictions), axis=-1))
model.add_loss(confidence_penalty)
return model