當前位置: 首頁>>代碼示例>>Python>>正文


Python nn.GLU屬性代碼示例

本文整理匯總了Python中torch.nn.GLU屬性的典型用法代碼示例。如果您正苦於以下問題:Python nn.GLU屬性的具體用法?Python nn.GLU怎麽用?Python nn.GLU使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在torch.nn的用法示例。


在下文中一共展示了nn.GLU屬性的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: get_activation

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def get_activation(self, act):
        if act == 'tanh':
            act = nn.Tanh()
        elif act == 'relu':
            act = nn.ReLU()
        elif act == 'softplus':
            act = nn.Softplus()
        elif act == 'rrelu':
            act = nn.RReLU()
        elif act == 'leakyrelu':
            act = nn.LeakyReLU()
        elif act == 'elu':
            act = nn.ELU()
        elif act == 'selu':
            act = nn.SELU()
        elif act == 'glu':
            act = nn.GLU()
        else:
            print('Defaulting to tanh activations...')
            act = nn.Tanh()
        return act 
開發者ID:blei-lab,項目名稱:causal-text-embeddings,代碼行數:23,代碼來源:supervised_topic_model.py

示例2: GatedLinear

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def GatedLinear(in_features, out_features, dropout=0., bias=True):
    """Weight-normalized Linear layer (input: B x T x C) with interspersed GLU units"""
    return nn.Sequential(
        Linear(in_features, out_features*4, dropout, bias),
        nn.GLU(),
        Linear(out_features*2, out_features*2, dropout, bias),
        nn.GLU(),
        Linear(out_features, out_features, dropout, bias)
    ) 
開發者ID:nusnlp,項目名稱:crosentgec,代碼行數:11,代碼來源:downsampled_multihead_attention.py

示例3: add_args

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def add_args(parser):
        """Add model-specific arguments to the parser."""
        parser.add_argument('--dropout', default=0.1, type=float, metavar='D',
                            help='dropout probability')
        parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
                            help='encoder embedding dimension')
        parser.add_argument('--encoder-layers', type=str, metavar='EXPR',
                            help='encoder layers [(dim, kernel_size), ...]')
        parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
                            help='decoder embedding dimension')
        parser.add_argument('--decoder-layers', type=str, metavar='EXPR',
                            help='decoder layers [(dim, kernel_size), ...]')
        parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
                            help='decoder output embedding dimension')
        parser.add_argument('--decoder-attention', type=str, metavar='EXPR',
                            help='decoder attention [True, ...]')
        parser.add_argument('--self-attention', default='False', type=str, metavar='EXPR',
                            help='decoder self-attention layers, ex: [True] + [False]*5')
        parser.add_argument('--multihead-attention-nheads', default=1, type=int,
                            help='Number of heads to use in attention')
        parser.add_argument('--multihead-self-attention-nheads', default=1, type=int,
                            help='Number of heads to use in self-attention')
        parser.add_argument('--encoder-attention', type=str, metavar='EXPR', default='False',
                            help='encoder attention [True, ...]')
        parser.add_argument('--encoder-attention-nheads', default=1, type=int,
                            help='Number of heads to use in encoder attention')
        parser.add_argument('--project-input', type=str, metavar='EXPR', default='False',
                            help='Use projections in self-attention [True, ...]')
        parser.add_argument('--gated-attention', type=str, metavar='EXPR', default='False',
                            help='Use GLU layers in self-attention projections [True, ...]')
        parser.add_argument('--downsample', type=str, metavar='EXPR', default='False',
                            help='Use downsampling in self-attention [True, ...]')
        parser.add_argument('--pretrained-checkpoint', metavar='DIR', default='',
                            help='path to load checkpoint from pretrained model')
        parser.add_argument('--pretrained', type=str, metavar='EXPR', default='False',
                            help='use pretrained model when training [True, ...]') 
開發者ID:nusnlp,項目名稱:crosentgec,代碼行數:38,代碼來源:fconv_self_att.py

示例4: add_args

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def add_args(parser):
        """Add model-specific arguments to the parser."""
        # fmt: off
        parser.add_argument('--dropout', type=float, metavar='D',
                            help='dropout probability')
        parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
                            help='encoder embedding dimension')
        parser.add_argument('--encoder-layers', type=str, metavar='EXPR',
                            help='encoder layers [(dim, kernel_size), ...]')
        parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
                            help='decoder embedding dimension')
        parser.add_argument('--decoder-layers', type=str, metavar='EXPR',
                            help='decoder layers [(dim, kernel_size), ...]')
        parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
                            help='decoder output embedding dimension')
        parser.add_argument('--decoder-attention', type=str, metavar='EXPR',
                            help='decoder attention [True, ...]')
        parser.add_argument('--self-attention', type=str, metavar='EXPR',
                            help='decoder self-attention layers, ex: [True] + [False]*5')
        parser.add_argument('--multihead-attention-nheads', type=int,
                            help='Number of heads to use in attention')
        parser.add_argument('--multihead-self-attention-nheads', type=int,
                            help='Number of heads to use in self-attention')
        parser.add_argument('--encoder-attention', type=str, metavar='EXPR',
                            help='encoder attention [True, ...]')
        parser.add_argument('--encoder-attention-nheads', type=int,
                            help='Number of heads to use in encoder attention')
        parser.add_argument('--project-input', type=str, metavar='EXPR',
                            help='Use projections in self-attention [True, ...]')
        parser.add_argument('--gated-attention', type=str, metavar='EXPR',
                            help='Use GLU layers in self-attention projections [True, ...]')
        parser.add_argument('--downsample', type=str, metavar='EXPR',
                            help='Use downsampling in self-attention [True, ...]')
        parser.add_argument('--pretrained-checkpoint', metavar='DIR',
                            help='path to load checkpoint from pretrained model')
        parser.add_argument('--pretrained', type=str, metavar='EXPR',
                            help='use pretrained model when training [True, ...]')
        # fmt: on 
開發者ID:pytorch,項目名稱:fairseq,代碼行數:40,代碼來源:fconv_self_att.py

示例5: __init__

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def __init__(self, args, kernel_size=0):
        super().__init__()
        self.embed_dim = args.encoder_embed_dim
        self.conv_dim = args.encoder_conv_dim
        padding_l = kernel_size // 2 if kernel_size % 2 == 1 else ((kernel_size - 1) // 2, kernel_size // 2)

        if args.encoder_glu:
            self.linear1 = Linear(self.embed_dim, 2*self.conv_dim)
            self.act = nn.GLU()
        else:
            self.linear1 = Linear(self.embed_dim, self.conv_dim)
            self.act = None
        if args.encoder_conv_type == 'lightweight':
            self.conv = LightweightConv(self.conv_dim, kernel_size, padding_l=padding_l,
                                        weight_softmax=args.weight_softmax,
                                        num_heads=args.encoder_attention_heads,
                                        weight_dropout=args.weight_dropout)
        elif args.encoder_conv_type == 'dynamic':
            self.conv = DynamicConv(self.conv_dim, kernel_size, padding_l=padding_l,
                                    weight_softmax=args.weight_softmax,
                                    num_heads=args.encoder_attention_heads,
                                    weight_dropout=args.weight_dropout)
        else:
            raise NotImplementedError
        self.linear2 = Linear(self.conv_dim, self.embed_dim)

        self.dropout = args.dropout
        self.relu_dropout = args.relu_dropout
        self.input_dropout = args.input_dropout
        self.normalize_before = args.encoder_normalize_before
        self.fc1 = Linear(self.embed_dim, args.encoder_ffn_embed_dim)
        self.fc2 = Linear(args.encoder_ffn_embed_dim, self.embed_dim)
        self.layer_norms = nn.ModuleList([LayerNorm(self.embed_dim) for _ in range(2)]) 
開發者ID:pytorch,項目名稱:fairseq,代碼行數:35,代碼來源:lightconv.py

示例6: __init__

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def __init__(self, h, d_model, dropout=0.1, scale=1, project_k_v=1, use_output_layer=1, do_aoa=0, norm_q=0, dropout_aoa=0.3):
        super(MultiHeadedDotAttention, self).__init__()
        assert d_model * scale % h == 0
        # We assume d_v always equals d_k
        self.d_k = d_model * scale // h
        self.h = h

        # Do we need to do linear projections on K and V?
        self.project_k_v = project_k_v

        # normalize the query?
        if norm_q:
            self.norm = LayerNorm(d_model)
        else:
            self.norm = lambda x:x
        self.linears = clones(nn.Linear(d_model, d_model * scale), 1 + 2 * project_k_v)

        # output linear layer after the multi-head attention?
        self.output_layer = nn.Linear(d_model * scale, d_model)

        # apply aoa after attention?
        self.use_aoa = do_aoa
        if self.use_aoa:
            self.aoa_layer =  nn.Sequential(nn.Linear((1 + scale) * d_model, 2 * d_model), nn.GLU())
            # dropout to the input of AoA layer
            if dropout_aoa > 0:
                self.dropout_aoa = nn.Dropout(p=dropout_aoa)
            else:
                self.dropout_aoa = lambda x:x

        if self.use_aoa or not use_output_layer:
            # AoA doesn't need the output linear layer
            del self.output_layer
            self.output_layer = lambda x:x

        self.attn = None
        self.dropout = nn.Dropout(p=dropout) 
開發者ID:husthuaan,項目名稱:AAT,代碼行數:39,代碼來源:AoAModel.py

示例7: add_args

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def add_args(parser):
        """Add model-specific arguments to the parser."""
        parser.add_argument('--dropout', type=float, metavar='D',
                            help='dropout probability')
        parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
                            help='encoder embedding dimension')
        parser.add_argument('--encoder-layers', type=str, metavar='EXPR',
                            help='encoder layers [(dim, kernel_size), ...]')
        parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
                            help='decoder embedding dimension')
        parser.add_argument('--decoder-layers', type=str, metavar='EXPR',
                            help='decoder layers [(dim, kernel_size), ...]')
        parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
                            help='decoder output embedding dimension')
        parser.add_argument('--decoder-attention', type=str, metavar='EXPR',
                            help='decoder attention [True, ...]')
        parser.add_argument('--self-attention', type=str, metavar='EXPR',
                            help='decoder self-attention layers, ex: [True] + [False]*5')
        parser.add_argument('--multihead-attention-nheads', type=int,
                            help='Number of heads to use in attention')
        parser.add_argument('--multihead-self-attention-nheads', type=int,
                            help='Number of heads to use in self-attention')
        parser.add_argument('--encoder-attention', type=str, metavar='EXPR',
                            help='encoder attention [True, ...]')
        parser.add_argument('--encoder-attention-nheads', type=int,
                            help='Number of heads to use in encoder attention')
        parser.add_argument('--project-input', type=str, metavar='EXPR',
                            help='Use projections in self-attention [True, ...]')
        parser.add_argument('--gated-attention', type=str, metavar='EXPR',
                            help='Use GLU layers in self-attention projections [True, ...]')
        parser.add_argument('--downsample', type=str, metavar='EXPR',
                            help='Use downsampling in self-attention [True, ...]')
        parser.add_argument('--pretrained-checkpoint', metavar='DIR',
                            help='path to load checkpoint from pretrained model')
        parser.add_argument('--pretrained', type=str, metavar='EXPR',
                            help='use pretrained model when training [True, ...]') 
開發者ID:mlperf,項目名稱:training_results_v0.5,代碼行數:38,代碼來源:fconv_self_att.py

示例8: __init__

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def __init__(
        self,
        wshare,
        n_feat,
        dropout_rate,
        kernel_size_str,
        lnum,
        use_kernel_mask=False,
        use_bias=False,
    ):
        """Construct Lightweight 2-Dimentional Convolution layer."""
        super(LightweightConvolution2D, self).__init__()

        assert n_feat % wshare == 0
        self.wshare = wshare
        self.use_kernel_mask = use_kernel_mask
        self.dropout_rate = dropout_rate
        self.kernel_size = int(kernel_size_str.split("_")[lnum])
        self.padding_size = int(self.kernel_size / 2)

        # linear -> GLU -> lightconv -> linear
        self.linear1 = nn.Linear(n_feat, n_feat * 2)
        self.linear2 = nn.Linear(n_feat * 2, n_feat)
        self.act = nn.GLU()

        # lightconv related
        self.weight = nn.Parameter(
            torch.Tensor(self.wshare, 1, self.kernel_size).uniform_(0, 1)
        )
        self.weight_f = nn.Parameter(
            torch.Tensor(1, 1, self.kernel_size).uniform_(0, 1)
        )
        self.use_bias = use_bias
        if self.use_bias:
            self.bias = nn.Parameter(torch.Tensor(n_feat))

        # mask of kernel
        kernel_mask0 = torch.zeros(self.wshare, int(self.kernel_size / 2))
        kernel_mask1 = torch.ones(self.wshare, int(self.kernel_size / 2 + 1))
        self.kernel_mask = torch.cat((kernel_mask1, kernel_mask0), dim=-1).unsqueeze(1) 
開發者ID:espnet,項目名稱:espnet,代碼行數:42,代碼來源:lightconv2d.py

示例9: __init__

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def __init__(
        self,
        wshare,
        n_feat,
        dropout_rate,
        kernel_size_str,
        lnum,
        use_kernel_mask=False,
        use_bias=False,
    ):
        """Construct Lightweight Convolution layer."""
        super(LightweightConvolution, self).__init__()

        assert n_feat % wshare == 0
        self.wshare = wshare
        self.use_kernel_mask = use_kernel_mask
        self.dropout_rate = dropout_rate
        self.kernel_size = int(kernel_size_str.split("_")[lnum])
        self.padding_size = int(self.kernel_size / 2)

        # linear -> GLU -> lightconv -> linear
        self.linear1 = nn.Linear(n_feat, n_feat * 2)
        self.linear2 = nn.Linear(n_feat, n_feat)
        self.act = nn.GLU()

        # lightconv related
        self.weight = nn.Parameter(
            torch.Tensor(self.wshare, 1, self.kernel_size).uniform_(0, 1)
        )
        self.use_bias = use_bias
        if self.use_bias:
            self.bias = nn.Parameter(torch.Tensor(n_feat))

        # mask of kernel
        kernel_mask0 = torch.zeros(self.wshare, int(self.kernel_size / 2))
        kernel_mask1 = torch.ones(self.wshare, int(self.kernel_size / 2 + 1))
        self.kernel_mask = torch.cat((kernel_mask1, kernel_mask0), dim=-1).unsqueeze(1) 
開發者ID:espnet,項目名稱:espnet,代碼行數:39,代碼來源:lightconv.py

示例10: __init__

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def __init__(
        self,
        wshare,
        n_feat,
        dropout_rate,
        kernel_size_str,
        lnum,
        use_kernel_mask=False,
        use_bias=False,
    ):
        """Construct Dynamic 2-Dimentional Convolution layer."""
        super(DynamicConvolution2D, self).__init__()

        assert n_feat % wshare == 0
        self.wshare = wshare
        self.use_kernel_mask = use_kernel_mask
        self.dropout_rate = dropout_rate
        self.kernel_size = int(kernel_size_str.split("_")[lnum])
        self.padding_size = int(self.kernel_size / 2)
        self.attn_t = None
        self.attn_f = None

        # linear -> GLU -- -> lightconv -> linear
        #               \        /
        #                 Linear
        self.linear1 = nn.Linear(n_feat, n_feat * 2)
        self.linear2 = nn.Linear(n_feat * 2, n_feat)
        self.linear_weight = nn.Linear(n_feat, self.wshare * 1 * self.kernel_size)
        nn.init.xavier_uniform(self.linear_weight.weight)
        self.linear_weight_f = nn.Linear(n_feat, self.kernel_size)
        nn.init.xavier_uniform(self.linear_weight_f.weight)
        self.act = nn.GLU()

        # dynamic conv related
        self.use_bias = use_bias
        if self.use_bias:
            self.bias = nn.Parameter(torch.Tensor(n_feat)) 
開發者ID:espnet,項目名稱:espnet,代碼行數:39,代碼來源:dynamic_conv2d.py

示例11: __init__

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def __init__(self, config, embedding=None):
        super(rnn_encoder, self).__init__()

        self.embedding = embedding if embedding is not None else nn.Embedding(config.src_vocab_size, config.emb_size)
        self.hidden_size = config.hidden_size
        self.config = config

        if config.swish:
            self.sw1 = nn.Sequential(nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=1, padding=0), nn.BatchNorm1d(config.hidden_size), nn.ReLU())
            self.sw3 = nn.Sequential(nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=1, padding=0), nn.ReLU(), nn.BatchNorm1d(config.hidden_size),
                                     nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=3, padding=1), nn.ReLU(), nn.BatchNorm1d(config.hidden_size))
            self.sw33 = nn.Sequential(nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=1, padding=0), nn.ReLU(), nn.BatchNorm1d(config.hidden_size),
                                      nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=3, padding=1), nn.ReLU(), nn.BatchNorm1d(config.hidden_size),
                                      nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=3, padding=1), nn.ReLU(), nn.BatchNorm1d(config.hidden_size))
            self.linear = nn.Sequential(nn.Linear(2*config.hidden_size, 2*config.hidden_size), nn.GLU(), nn.Dropout(config.dropout))
            self.filter_linear = nn.Linear(3*config.hidden_size, config.hidden_size)
            self.tanh = nn.Tanh()
            self.sigmoid = nn.Sigmoid()

        if config.selfatt:
            if config.attention == 'None':
                self.attention = None
            elif config.attention == 'bahdanau':
                self.attention = models.bahdanau_attention(config.hidden_size, config.emb_size, config.pool_size)
            elif config.attention == 'luong':
                self.attention = models.luong_attention(config.hidden_size, config.emb_size, config.pool_size)
            elif config.attention == 'luong_gate':
                self.attention = models.luong_gate_attention(config.hidden_size, config.emb_size)

        if config.cell == 'gru':
            self.rnn = nn.GRU(input_size=config.emb_size, hidden_size=config.hidden_size,
                              num_layers=config.enc_num_layers, dropout=config.dropout,
                              bidirectional=config.bidirectional)
        else:
            self.rnn = nn.LSTM(input_size=config.emb_size, hidden_size=config.hidden_size,
                               num_layers=config.enc_num_layers, dropout=config.dropout,
                               bidirectional=config.bidirectional) 
開發者ID:lancopku,項目名稱:Global-Encoding,代碼行數:39,代碼來源:rnn.py

示例12: __init__

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def __init__(self, args, kernel_size=0):
        super().__init__()
        self.embed_dim = args.encoder_embed_dim
        self.conv_dim = args.encoder_conv_dim
        padding_l = kernel_size // 2 if kernel_size % 2 == 1 else ((kernel_size - 1) // 2, kernel_size // 2)

        if args.encoder_glu:
            self.linear1 = Linear(self.embed_dim, 2*self.conv_dim)
            self.act = nn.GLU()
        else:
            self.linear1 = Linear(self.embed_dim, self.conv_dim)
            self.act = None
        if args.encoder_conv_type == 'lightweight':
            self.conv = LightweightConv1dTBC(self.conv_dim, kernel_size, padding_l=padding_l,
                                             weight_softmax=args.weight_softmax,
                                             num_heads=args.encoder_attention_heads,
                                             weight_dropout=args.weight_dropout)
        elif args.encoder_conv_type == 'dynamic':
            self.conv = DynamicConv1dTBC(self.conv_dim, kernel_size, padding_l=padding_l,
                                         weight_softmax=args.weight_softmax,
                                         num_heads=args.encoder_attention_heads,
                                         weight_dropout=args.weight_dropout)
        else:
            raise NotImplementedError
        self.linear2 = Linear(self.conv_dim, self.embed_dim)

        self.dropout = args.dropout
        self.relu_dropout = args.relu_dropout
        self.input_dropout = args.input_dropout
        self.normalize_before = args.encoder_normalize_before
        self.fc1 = Linear(self.embed_dim, args.encoder_ffn_embed_dim)
        self.fc2 = Linear(args.encoder_ffn_embed_dim, self.embed_dim)
        self.layer_norms = nn.ModuleList([LayerNorm(self.embed_dim) for _ in range(2)]) 
開發者ID:kakaobrain,項目名稱:helo_word,代碼行數:35,代碼來源:lightconv.py

示例13: __init__

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def __init__(self, kernel_size, in_ch, out_ch, bottlececk_dim=0, dropout=0.):
        super().__init__()

        self.conv_residual = None
        if in_ch != out_ch:
            self.conv_residual = nn.utils.weight_norm(
                nn.Conv2d(in_channels=in_ch,
                          out_channels=out_ch,
                          kernel_size=(1, 1)), name='weight', dim=0)
            self.dropout_residual = nn.Dropout(p=dropout)

        self.pad_left = nn.ConstantPad2d((0, 0, kernel_size - 1, 0), 0)

        layers = OrderedDict()
        if bottlececk_dim == 0:
            layers['conv'] = nn.utils.weight_norm(
                nn.Conv2d(in_channels=in_ch,
                          out_channels=out_ch * 2,
                          kernel_size=(kernel_size, 1)), name='weight', dim=0)
            # TODO(hirofumi0810): padding?
            layers['dropout'] = nn.Dropout(p=dropout)
            layers['glu'] = nn.GLU()

        elif bottlececk_dim > 0:
            layers['conv_in'] = nn.utils.weight_norm(
                nn.Conv2d(in_channels=in_ch,
                          out_channels=bottlececk_dim,
                          kernel_size=(1, 1)), name='weight', dim=0)
            layers['dropout_in'] = nn.Dropout(p=dropout)
            layers['conv_bottleneck'] = nn.utils.weight_norm(
                nn.Conv2d(in_channels=bottlececk_dim,
                          out_channels=bottlececk_dim,
                          kernel_size=(kernel_size, 1)), name='weight', dim=0)
            layers['dropout'] = nn.Dropout(p=dropout)
            layers['glu'] = nn.GLU()
            layers['conv_out'] = nn.utils.weight_norm(
                nn.Conv2d(in_channels=bottlececk_dim,
                          out_channels=out_ch * 2,
                          kernel_size=(1, 1)), name='weight', dim=0)
            layers['dropout_out'] = nn.Dropout(p=dropout)

        self.layers = nn.Sequential(layers) 
開發者ID:hirofumi0810,項目名稱:neural_sp,代碼行數:44,代碼來源:glu.py

示例14: forward

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import GLU [as 別名]
def forward(self, query, key, value, mask):
        """Forward of 'Lightweight 2-Dimentional Convolution'.

        This function takes query, key and value but uses only query.
        This is just for compatibility with self-attention layer (attention.py)

        Args:
            query (torch.Tensor): (batch, time1, d_model) input tensor
            key (torch.Tensor): (batch, time2, d_model) NOT USED
            value (torch.Tensor): (batch, time2, d_model) NOT USED
            mask (torch.Tensor): (batch, time1, time2) mask

        Return:
            x (torch.Tensor): (batch, time1, d_model) ouput

        """
        # linear -> GLU -> lightconv -> linear
        x = query
        B, T, C = x.size()
        H = self.wshare

        # first liner layer
        x = self.linear1(x)

        # GLU activation
        x = self.act(x)

        # convolution along frequency axis
        weight_f = F.softmax(self.weight_f, dim=-1)
        weight_f = F.dropout(weight_f, self.dropout_rate, training=self.training)
        weight_new = torch.zeros(
            B * T, 1, self.kernel_size, device=x.device, dtype=x.dtype
        ).copy_(weight_f)
        xf = F.conv1d(
            x.view(1, B * T, C), weight_new, padding=self.padding_size, groups=B * T
        ).view(B, T, C)

        # lightconv
        x = x.transpose(1, 2).contiguous().view(-1, H, T)  # B x C x T
        weight = F.dropout(self.weight, self.dropout_rate, training=self.training)
        if self.use_kernel_mask:
            self.kernel_mask = self.kernel_mask.to(x.device)
            weight = weight.masked_fill(self.kernel_mask == 0.0, float("-inf"))
        weight = F.softmax(weight, dim=-1)
        x = F.conv1d(x, weight, padding=self.padding_size, groups=self.wshare).view(
            B, C, T
        )
        if self.use_bias:
            x = x + self.bias.view(1, -1, 1)
        x = x.transpose(1, 2)  # B x T x C
        x = torch.cat((x, xf), -1)  # B x T x Cx2

        if mask is not None and not self.use_kernel_mask:
            mask = mask.transpose(-1, -2)
            x = x.masked_fill(mask == 0, 0.0)

        # second linear layer
        x = self.linear2(x)
        return x 
開發者ID:espnet,項目名稱:espnet,代碼行數:61,代碼來源:lightconv2d.py


注:本文中的torch.nn.GLU屬性示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。