本文整理汇总了Python中torch.nn.functional.gelu方法的典型用法代码示例。如果您正苦于以下问题:Python functional.gelu方法的具体用法?Python functional.gelu怎么用?Python functional.gelu使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.nn.functional
的用法示例。
在下文中一共展示了functional.gelu方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from torch.nn import functional [as 别名]
# 或者: from torch.nn.functional import gelu [as 别名]
def __init__(self, dim, dim_hidden, relu_dropout=0, activation='relu'):
super(TransformerFFN, self).__init__()
self.relu_dropout = nn.Dropout(p=relu_dropout)
if activation == 'relu':
self.nonlinear = F.relu
elif activation == 'gelu':
self.nonlinear = F.gelu
else:
raise ValueError(
"Don't know how to handle --activation {}".format(activation)
)
self.lin1 = nn.Linear(dim, dim_hidden)
self.lin2 = nn.Linear(dim_hidden, dim)
nn.init.xavier_uniform_(self.lin1.weight)
nn.init.xavier_uniform_(self.lin2.weight)
# TODO: initialize biases to 0
示例2: __init__
# 需要导入模块: from torch.nn import functional [as 别名]
# 或者: from torch.nn.functional import gelu [as 别名]
def __init__(self, config: BartConfig):
super().__init__()
self.embed_dim = config.d_model
self.self_attn = SelfAttention(
embed_dim=self.embed_dim, num_heads=config.decoder_attention_heads, dropout=config.attention_dropout,
)
self.dropout = config.dropout
self.activation_fn = F.gelu
self.activation_dropout = config.activation_dropout
self.self_attn_layer_norm = LayerNorm(self.embed_dim)
self.encoder_attn = SelfAttention(
self.embed_dim,
config.decoder_attention_heads,
dropout=config.attention_dropout,
encoder_decoder_attention=True,
)
self.encoder_attn_layer_norm = LayerNorm(self.embed_dim)
self.fc1 = nn.Linear(self.embed_dim, config.decoder_ffn_dim)
self.fc2 = nn.Linear(config.decoder_ffn_dim, self.embed_dim)
self.final_layer_norm = LayerNorm(self.embed_dim)
示例3: __init__
# 需要导入模块: from torch.nn import functional [as 别名]
# 或者: from torch.nn.functional import gelu [as 别名]
def __init__(self, num_classes, embedding_dim):
"""Get logits for elements by conditioning on utterance embedding.
Args:
num_classes (int): An int containing the number of classes for which logits are to be generated.
embedding_dim (int): hidden size of the BERT
Returns:
A tensor of shape (batch_size, num_elements, num_classes) containing the logits.
"""
super().__init__()
self.num_classes = num_classes
self.utterance_proj = nn.Linear(embedding_dim, embedding_dim)
self.activation = F.gelu
self.layer1 = nn.Linear(2 * embedding_dim, embedding_dim)
self.layer2 = nn.Linear(embedding_dim, num_classes)
示例4: _gelu_python
# 需要导入模块: from torch.nn import functional [as 别名]
# 或者: from torch.nn.functional import gelu [as 别名]
def _gelu_python(x):
""" Original Implementation of the gelu activation function in Google Bert repo when initially created.
For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
This is now written in C in torch.nn.functional
Also see https://arxiv.org/abs/1606.08415
"""
return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
示例5: gelu_new
# 需要导入模块: from torch.nn import functional [as 别名]
# 或者: from torch.nn.functional import gelu [as 别名]
def gelu_new(x):
""" Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
Also see https://arxiv.org/abs/1606.08415
"""
return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))))
示例6: gelu_new
# 需要导入模块: from torch.nn import functional [as 别名]
# 或者: from torch.nn.functional import gelu [as 别名]
def gelu_new(x):
""" Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
Also see https://arxiv.org/abs/1606.08415
"""
return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
示例7: forward
# 需要导入模块: from torch.nn import functional [as 别名]
# 或者: from torch.nn.functional import gelu [as 别名]
def forward(self, x):
return gelu(x)