本文整理匯總了Python中torch.nn.GLU屬性的典型用法代碼示例。如果您正苦於以下問題:Python nn.GLU屬性的具體用法?Python nn.GLU怎麽用?Python nn.GLU使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在類torch.nn
的用法示例。
在下文中一共展示了nn.GLU屬性的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: get_activation
# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def get_activation(self, act):
if act == 'tanh':
act = nn.Tanh()
elif act == 'relu':
act = nn.ReLU()
elif act == 'softplus':
act = nn.Softplus()
elif act == 'rrelu':
act = nn.RReLU()
elif act == 'leakyrelu':
act = nn.LeakyReLU()
elif act == 'elu':
act = nn.ELU()
elif act == 'selu':
act = nn.SELU()
elif act == 'glu':
act = nn.GLU()
else:
print('Defaulting to tanh activations...')
act = nn.Tanh()
return act
示例2: GatedLinear
# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def GatedLinear(in_features, out_features, dropout=0., bias=True):
"""Weight-normalized Linear layer (input: B x T x C) with interspersed GLU units"""
return nn.Sequential(
Linear(in_features, out_features*4, dropout, bias),
nn.GLU(),
Linear(out_features*2, out_features*2, dropout, bias),
nn.GLU(),
Linear(out_features, out_features, dropout, bias)
)
示例3: add_args
# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def add_args(parser):
"""Add model-specific arguments to the parser."""
parser.add_argument('--dropout', default=0.1, type=float, metavar='D',
help='dropout probability')
parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
help='encoder embedding dimension')
parser.add_argument('--encoder-layers', type=str, metavar='EXPR',
help='encoder layers [(dim, kernel_size), ...]')
parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
help='decoder embedding dimension')
parser.add_argument('--decoder-layers', type=str, metavar='EXPR',
help='decoder layers [(dim, kernel_size), ...]')
parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
help='decoder output embedding dimension')
parser.add_argument('--decoder-attention', type=str, metavar='EXPR',
help='decoder attention [True, ...]')
parser.add_argument('--self-attention', default='False', type=str, metavar='EXPR',
help='decoder self-attention layers, ex: [True] + [False]*5')
parser.add_argument('--multihead-attention-nheads', default=1, type=int,
help='Number of heads to use in attention')
parser.add_argument('--multihead-self-attention-nheads', default=1, type=int,
help='Number of heads to use in self-attention')
parser.add_argument('--encoder-attention', type=str, metavar='EXPR', default='False',
help='encoder attention [True, ...]')
parser.add_argument('--encoder-attention-nheads', default=1, type=int,
help='Number of heads to use in encoder attention')
parser.add_argument('--project-input', type=str, metavar='EXPR', default='False',
help='Use projections in self-attention [True, ...]')
parser.add_argument('--gated-attention', type=str, metavar='EXPR', default='False',
help='Use GLU layers in self-attention projections [True, ...]')
parser.add_argument('--downsample', type=str, metavar='EXPR', default='False',
help='Use downsampling in self-attention [True, ...]')
parser.add_argument('--pretrained-checkpoint', metavar='DIR', default='',
help='path to load checkpoint from pretrained model')
parser.add_argument('--pretrained', type=str, metavar='EXPR', default='False',
help='use pretrained model when training [True, ...]')
示例4: add_args
# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def add_args(parser):
"""Add model-specific arguments to the parser."""
# fmt: off
parser.add_argument('--dropout', type=float, metavar='D',
help='dropout probability')
parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
help='encoder embedding dimension')
parser.add_argument('--encoder-layers', type=str, metavar='EXPR',
help='encoder layers [(dim, kernel_size), ...]')
parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
help='decoder embedding dimension')
parser.add_argument('--decoder-layers', type=str, metavar='EXPR',
help='decoder layers [(dim, kernel_size), ...]')
parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
help='decoder output embedding dimension')
parser.add_argument('--decoder-attention', type=str, metavar='EXPR',
help='decoder attention [True, ...]')
parser.add_argument('--self-attention', type=str, metavar='EXPR',
help='decoder self-attention layers, ex: [True] + [False]*5')
parser.add_argument('--multihead-attention-nheads', type=int,
help='Number of heads to use in attention')
parser.add_argument('--multihead-self-attention-nheads', type=int,
help='Number of heads to use in self-attention')
parser.add_argument('--encoder-attention', type=str, metavar='EXPR',
help='encoder attention [True, ...]')
parser.add_argument('--encoder-attention-nheads', type=int,
help='Number of heads to use in encoder attention')
parser.add_argument('--project-input', type=str, metavar='EXPR',
help='Use projections in self-attention [True, ...]')
parser.add_argument('--gated-attention', type=str, metavar='EXPR',
help='Use GLU layers in self-attention projections [True, ...]')
parser.add_argument('--downsample', type=str, metavar='EXPR',
help='Use downsampling in self-attention [True, ...]')
parser.add_argument('--pretrained-checkpoint', metavar='DIR',
help='path to load checkpoint from pretrained model')
parser.add_argument('--pretrained', type=str, metavar='EXPR',
help='use pretrained model when training [True, ...]')
# fmt: on
示例5: __init__
# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def __init__(self, args, kernel_size=0):
super().__init__()
self.embed_dim = args.encoder_embed_dim
self.conv_dim = args.encoder_conv_dim
padding_l = kernel_size // 2 if kernel_size % 2 == 1 else ((kernel_size - 1) // 2, kernel_size // 2)
if args.encoder_glu:
self.linear1 = Linear(self.embed_dim, 2*self.conv_dim)
self.act = nn.GLU()
else:
self.linear1 = Linear(self.embed_dim, self.conv_dim)
self.act = None
if args.encoder_conv_type == 'lightweight':
self.conv = LightweightConv(self.conv_dim, kernel_size, padding_l=padding_l,
weight_softmax=args.weight_softmax,
num_heads=args.encoder_attention_heads,
weight_dropout=args.weight_dropout)
elif args.encoder_conv_type == 'dynamic':
self.conv = DynamicConv(self.conv_dim, kernel_size, padding_l=padding_l,
weight_softmax=args.weight_softmax,
num_heads=args.encoder_attention_heads,
weight_dropout=args.weight_dropout)
else:
raise NotImplementedError
self.linear2 = Linear(self.conv_dim, self.embed_dim)
self.dropout = args.dropout
self.relu_dropout = args.relu_dropout
self.input_dropout = args.input_dropout
self.normalize_before = args.encoder_normalize_before
self.fc1 = Linear(self.embed_dim, args.encoder_ffn_embed_dim)
self.fc2 = Linear(args.encoder_ffn_embed_dim, self.embed_dim)
self.layer_norms = nn.ModuleList([LayerNorm(self.embed_dim) for _ in range(2)])
示例6: __init__
# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def __init__(self, h, d_model, dropout=0.1, scale=1, project_k_v=1, use_output_layer=1, do_aoa=0, norm_q=0, dropout_aoa=0.3):
super(MultiHeadedDotAttention, self).__init__()
assert d_model * scale % h == 0
# We assume d_v always equals d_k
self.d_k = d_model * scale // h
self.h = h
# Do we need to do linear projections on K and V?
self.project_k_v = project_k_v
# normalize the query?
if norm_q:
self.norm = LayerNorm(d_model)
else:
self.norm = lambda x:x
self.linears = clones(nn.Linear(d_model, d_model * scale), 1 + 2 * project_k_v)
# output linear layer after the multi-head attention?
self.output_layer = nn.Linear(d_model * scale, d_model)
# apply aoa after attention?
self.use_aoa = do_aoa
if self.use_aoa:
self.aoa_layer = nn.Sequential(nn.Linear((1 + scale) * d_model, 2 * d_model), nn.GLU())
# dropout to the input of AoA layer
if dropout_aoa > 0:
self.dropout_aoa = nn.Dropout(p=dropout_aoa)
else:
self.dropout_aoa = lambda x:x
if self.use_aoa or not use_output_layer:
# AoA doesn't need the output linear layer
del self.output_layer
self.output_layer = lambda x:x
self.attn = None
self.dropout = nn.Dropout(p=dropout)
示例7: add_args
# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def add_args(parser):
"""Add model-specific arguments to the parser."""
parser.add_argument('--dropout', type=float, metavar='D',
help='dropout probability')
parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
help='encoder embedding dimension')
parser.add_argument('--encoder-layers', type=str, metavar='EXPR',
help='encoder layers [(dim, kernel_size), ...]')
parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
help='decoder embedding dimension')
parser.add_argument('--decoder-layers', type=str, metavar='EXPR',
help='decoder layers [(dim, kernel_size), ...]')
parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
help='decoder output embedding dimension')
parser.add_argument('--decoder-attention', type=str, metavar='EXPR',
help='decoder attention [True, ...]')
parser.add_argument('--self-attention', type=str, metavar='EXPR',
help='decoder self-attention layers, ex: [True] + [False]*5')
parser.add_argument('--multihead-attention-nheads', type=int,
help='Number of heads to use in attention')
parser.add_argument('--multihead-self-attention-nheads', type=int,
help='Number of heads to use in self-attention')
parser.add_argument('--encoder-attention', type=str, metavar='EXPR',
help='encoder attention [True, ...]')
parser.add_argument('--encoder-attention-nheads', type=int,
help='Number of heads to use in encoder attention')
parser.add_argument('--project-input', type=str, metavar='EXPR',
help='Use projections in self-attention [True, ...]')
parser.add_argument('--gated-attention', type=str, metavar='EXPR',
help='Use GLU layers in self-attention projections [True, ...]')
parser.add_argument('--downsample', type=str, metavar='EXPR',
help='Use downsampling in self-attention [True, ...]')
parser.add_argument('--pretrained-checkpoint', metavar='DIR',
help='path to load checkpoint from pretrained model')
parser.add_argument('--pretrained', type=str, metavar='EXPR',
help='use pretrained model when training [True, ...]')
示例8: __init__
# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def __init__(
self,
wshare,
n_feat,
dropout_rate,
kernel_size_str,
lnum,
use_kernel_mask=False,
use_bias=False,
):
"""Construct Lightweight 2-Dimentional Convolution layer."""
super(LightweightConvolution2D, self).__init__()
assert n_feat % wshare == 0
self.wshare = wshare
self.use_kernel_mask = use_kernel_mask
self.dropout_rate = dropout_rate
self.kernel_size = int(kernel_size_str.split("_")[lnum])
self.padding_size = int(self.kernel_size / 2)
# linear -> GLU -> lightconv -> linear
self.linear1 = nn.Linear(n_feat, n_feat * 2)
self.linear2 = nn.Linear(n_feat * 2, n_feat)
self.act = nn.GLU()
# lightconv related
self.weight = nn.Parameter(
torch.Tensor(self.wshare, 1, self.kernel_size).uniform_(0, 1)
)
self.weight_f = nn.Parameter(
torch.Tensor(1, 1, self.kernel_size).uniform_(0, 1)
)
self.use_bias = use_bias
if self.use_bias:
self.bias = nn.Parameter(torch.Tensor(n_feat))
# mask of kernel
kernel_mask0 = torch.zeros(self.wshare, int(self.kernel_size / 2))
kernel_mask1 = torch.ones(self.wshare, int(self.kernel_size / 2 + 1))
self.kernel_mask = torch.cat((kernel_mask1, kernel_mask0), dim=-1).unsqueeze(1)
示例9: __init__
# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def __init__(
self,
wshare,
n_feat,
dropout_rate,
kernel_size_str,
lnum,
use_kernel_mask=False,
use_bias=False,
):
"""Construct Lightweight Convolution layer."""
super(LightweightConvolution, self).__init__()
assert n_feat % wshare == 0
self.wshare = wshare
self.use_kernel_mask = use_kernel_mask
self.dropout_rate = dropout_rate
self.kernel_size = int(kernel_size_str.split("_")[lnum])
self.padding_size = int(self.kernel_size / 2)
# linear -> GLU -> lightconv -> linear
self.linear1 = nn.Linear(n_feat, n_feat * 2)
self.linear2 = nn.Linear(n_feat, n_feat)
self.act = nn.GLU()
# lightconv related
self.weight = nn.Parameter(
torch.Tensor(self.wshare, 1, self.kernel_size).uniform_(0, 1)
)
self.use_bias = use_bias
if self.use_bias:
self.bias = nn.Parameter(torch.Tensor(n_feat))
# mask of kernel
kernel_mask0 = torch.zeros(self.wshare, int(self.kernel_size / 2))
kernel_mask1 = torch.ones(self.wshare, int(self.kernel_size / 2 + 1))
self.kernel_mask = torch.cat((kernel_mask1, kernel_mask0), dim=-1).unsqueeze(1)
示例10: __init__
# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def __init__(
self,
wshare,
n_feat,
dropout_rate,
kernel_size_str,
lnum,
use_kernel_mask=False,
use_bias=False,
):
"""Construct Dynamic 2-Dimentional Convolution layer."""
super(DynamicConvolution2D, self).__init__()
assert n_feat % wshare == 0
self.wshare = wshare
self.use_kernel_mask = use_kernel_mask
self.dropout_rate = dropout_rate
self.kernel_size = int(kernel_size_str.split("_")[lnum])
self.padding_size = int(self.kernel_size / 2)
self.attn_t = None
self.attn_f = None
# linear -> GLU -- -> lightconv -> linear
# \ /
# Linear
self.linear1 = nn.Linear(n_feat, n_feat * 2)
self.linear2 = nn.Linear(n_feat * 2, n_feat)
self.linear_weight = nn.Linear(n_feat, self.wshare * 1 * self.kernel_size)
nn.init.xavier_uniform(self.linear_weight.weight)
self.linear_weight_f = nn.Linear(n_feat, self.kernel_size)
nn.init.xavier_uniform(self.linear_weight_f.weight)
self.act = nn.GLU()
# dynamic conv related
self.use_bias = use_bias
if self.use_bias:
self.bias = nn.Parameter(torch.Tensor(n_feat))
示例11: __init__
# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def __init__(self, config, embedding=None):
super(rnn_encoder, self).__init__()
self.embedding = embedding if embedding is not None else nn.Embedding(config.src_vocab_size, config.emb_size)
self.hidden_size = config.hidden_size
self.config = config
if config.swish:
self.sw1 = nn.Sequential(nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=1, padding=0), nn.BatchNorm1d(config.hidden_size), nn.ReLU())
self.sw3 = nn.Sequential(nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=1, padding=0), nn.ReLU(), nn.BatchNorm1d(config.hidden_size),
nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=3, padding=1), nn.ReLU(), nn.BatchNorm1d(config.hidden_size))
self.sw33 = nn.Sequential(nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=1, padding=0), nn.ReLU(), nn.BatchNorm1d(config.hidden_size),
nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=3, padding=1), nn.ReLU(), nn.BatchNorm1d(config.hidden_size),
nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=3, padding=1), nn.ReLU(), nn.BatchNorm1d(config.hidden_size))
self.linear = nn.Sequential(nn.Linear(2*config.hidden_size, 2*config.hidden_size), nn.GLU(), nn.Dropout(config.dropout))
self.filter_linear = nn.Linear(3*config.hidden_size, config.hidden_size)
self.tanh = nn.Tanh()
self.sigmoid = nn.Sigmoid()
if config.selfatt:
if config.attention == 'None':
self.attention = None
elif config.attention == 'bahdanau':
self.attention = models.bahdanau_attention(config.hidden_size, config.emb_size, config.pool_size)
elif config.attention == 'luong':
self.attention = models.luong_attention(config.hidden_size, config.emb_size, config.pool_size)
elif config.attention == 'luong_gate':
self.attention = models.luong_gate_attention(config.hidden_size, config.emb_size)
if config.cell == 'gru':
self.rnn = nn.GRU(input_size=config.emb_size, hidden_size=config.hidden_size,
num_layers=config.enc_num_layers, dropout=config.dropout,
bidirectional=config.bidirectional)
else:
self.rnn = nn.LSTM(input_size=config.emb_size, hidden_size=config.hidden_size,
num_layers=config.enc_num_layers, dropout=config.dropout,
bidirectional=config.bidirectional)
示例12: __init__
# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def __init__(self, args, kernel_size=0):
super().__init__()
self.embed_dim = args.encoder_embed_dim
self.conv_dim = args.encoder_conv_dim
padding_l = kernel_size // 2 if kernel_size % 2 == 1 else ((kernel_size - 1) // 2, kernel_size // 2)
if args.encoder_glu:
self.linear1 = Linear(self.embed_dim, 2*self.conv_dim)
self.act = nn.GLU()
else:
self.linear1 = Linear(self.embed_dim, self.conv_dim)
self.act = None
if args.encoder_conv_type == 'lightweight':
self.conv = LightweightConv1dTBC(self.conv_dim, kernel_size, padding_l=padding_l,
weight_softmax=args.weight_softmax,
num_heads=args.encoder_attention_heads,
weight_dropout=args.weight_dropout)
elif args.encoder_conv_type == 'dynamic':
self.conv = DynamicConv1dTBC(self.conv_dim, kernel_size, padding_l=padding_l,
weight_softmax=args.weight_softmax,
num_heads=args.encoder_attention_heads,
weight_dropout=args.weight_dropout)
else:
raise NotImplementedError
self.linear2 = Linear(self.conv_dim, self.embed_dim)
self.dropout = args.dropout
self.relu_dropout = args.relu_dropout
self.input_dropout = args.input_dropout
self.normalize_before = args.encoder_normalize_before
self.fc1 = Linear(self.embed_dim, args.encoder_ffn_embed_dim)
self.fc2 = Linear(args.encoder_ffn_embed_dim, self.embed_dim)
self.layer_norms = nn.ModuleList([LayerNorm(self.embed_dim) for _ in range(2)])
示例13: __init__
# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def __init__(self, kernel_size, in_ch, out_ch, bottlececk_dim=0, dropout=0.):
super().__init__()
self.conv_residual = None
if in_ch != out_ch:
self.conv_residual = nn.utils.weight_norm(
nn.Conv2d(in_channels=in_ch,
out_channels=out_ch,
kernel_size=(1, 1)), name='weight', dim=0)
self.dropout_residual = nn.Dropout(p=dropout)
self.pad_left = nn.ConstantPad2d((0, 0, kernel_size - 1, 0), 0)
layers = OrderedDict()
if bottlececk_dim == 0:
layers['conv'] = nn.utils.weight_norm(
nn.Conv2d(in_channels=in_ch,
out_channels=out_ch * 2,
kernel_size=(kernel_size, 1)), name='weight', dim=0)
# TODO(hirofumi0810): padding?
layers['dropout'] = nn.Dropout(p=dropout)
layers['glu'] = nn.GLU()
elif bottlececk_dim > 0:
layers['conv_in'] = nn.utils.weight_norm(
nn.Conv2d(in_channels=in_ch,
out_channels=bottlececk_dim,
kernel_size=(1, 1)), name='weight', dim=0)
layers['dropout_in'] = nn.Dropout(p=dropout)
layers['conv_bottleneck'] = nn.utils.weight_norm(
nn.Conv2d(in_channels=bottlececk_dim,
out_channels=bottlececk_dim,
kernel_size=(kernel_size, 1)), name='weight', dim=0)
layers['dropout'] = nn.Dropout(p=dropout)
layers['glu'] = nn.GLU()
layers['conv_out'] = nn.utils.weight_norm(
nn.Conv2d(in_channels=bottlececk_dim,
out_channels=out_ch * 2,
kernel_size=(1, 1)), name='weight', dim=0)
layers['dropout_out'] = nn.Dropout(p=dropout)
self.layers = nn.Sequential(layers)
示例14: forward
# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def forward(self, query, key, value, mask):
"""Forward of 'Lightweight 2-Dimentional Convolution'.
This function takes query, key and value but uses only query.
This is just for compatibility with self-attention layer (attention.py)
Args:
query (torch.Tensor): (batch, time1, d_model) input tensor
key (torch.Tensor): (batch, time2, d_model) NOT USED
value (torch.Tensor): (batch, time2, d_model) NOT USED
mask (torch.Tensor): (batch, time1, time2) mask
Return:
x (torch.Tensor): (batch, time1, d_model) ouput
"""
# linear -> GLU -> lightconv -> linear
x = query
B, T, C = x.size()
H = self.wshare
# first liner layer
x = self.linear1(x)
# GLU activation
x = self.act(x)
# convolution along frequency axis
weight_f = F.softmax(self.weight_f, dim=-1)
weight_f = F.dropout(weight_f, self.dropout_rate, training=self.training)
weight_new = torch.zeros(
B * T, 1, self.kernel_size, device=x.device, dtype=x.dtype
).copy_(weight_f)
xf = F.conv1d(
x.view(1, B * T, C), weight_new, padding=self.padding_size, groups=B * T
).view(B, T, C)
# lightconv
x = x.transpose(1, 2).contiguous().view(-1, H, T) # B x C x T
weight = F.dropout(self.weight, self.dropout_rate, training=self.training)
if self.use_kernel_mask:
self.kernel_mask = self.kernel_mask.to(x.device)
weight = weight.masked_fill(self.kernel_mask == 0.0, float("-inf"))
weight = F.softmax(weight, dim=-1)
x = F.conv1d(x, weight, padding=self.padding_size, groups=self.wshare).view(
B, C, T
)
if self.use_bias:
x = x + self.bias.view(1, -1, 1)
x = x.transpose(1, 2) # B x T x C
x = torch.cat((x, xf), -1) # B x T x Cx2
if mask is not None and not self.use_kernel_mask:
mask = mask.transpose(-1, -2)
x = x.masked_fill(mask == 0, 0.0)
# second linear layer
x = self.linear2(x)
return x