Python nn.MultiheadAttention方法代碼示例

本文整理匯總了Python中torch.nn.MultiheadAttention方法的典型用法代碼示例。如果您正苦於以下問題：Python nn.MultiheadAttention方法的具體用法？Python nn.MultiheadAttention怎麽用？Python nn.MultiheadAttention使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類torch.nn的用法示例。

在下文中一共展示了nn.MultiheadAttention方法的12個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: init

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import MultiheadAttention [as 別名]
def __init__(self, embed_dim, num_heads, keep_prob_attention, keep_prob_residual, keep_prob_mlp, n_ctx=512,
                 scale=False, use_builtin_mha=False):
        if use_builtin_mha:
            self.attention = nn.MultiheadAttention(embed_dim=embed_dim,
                                                   num_heads=num_heads,
                                                   dropout=keep_prob_attention)
        else:
            self.attention = ModifiedMultiHeadedAttention(num_state=embed_dim,
                                                          n_ctx=n_ctx,
                                                          num_heads=num_heads,
                                                          keep_prob_attention=keep_prob_attention,
                                                          keep_prob_residual=keep_prob_residual,
                                                          scale=scale)
        self.layer_norm1 = LayerNorm(embed_dim)
        self.mlp = MultiLayerPerceptron(4 * embed_dim, embed_dim, keep_prob_mlp)
        self.layer_norm2 = LayerNorm(embed_dim)

開發者ID:hbahadirsahin，項目名稱:nlp-experiments-in-pytorch，代碼行數:18，代碼來源:Transformer_OpenAI.py

示例2: init

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import MultiheadAttention [as 別名]
def __init__(self, embed_size, hidden_size, output_size, n_layer=2, dropout=0.5, pretrained=None):
        super(Decoder, self).__init__()
        self.embed_size, self.hidden_size = embed_size, hidden_size
        self.output_size = output_size
        self.n_layer = n_layer

        self.embed = nn.Embedding(output_size, embed_size)
        self.rnn = nn.GRU(hidden_size + embed_size, hidden_size, 
                          num_layers=n_layer, dropout=(0 if n_layer == 1 else dropout))
        self.out = nn.Linear(hidden_size, output_size)
        self.pos_emb = PositionEmbedding(embed_size, dropout=dropout)
        self.self_attention_context1 = nn.MultiheadAttention(embed_size, 8)
        self.layer_norm1 = nn.LayerNorm(embed_size)
        self.droput1 = nn.Dropout(p=dropout)
        self.self_attention_context2 = nn.MultiheadAttention(embed_size, 8)
        self.layer_norm2 = nn.LayerNorm(embed_size)
        self.droput2 = nn.Dropout(p=dropout)
        # self.self_attention_context3 = nn.MultiheadAttention(embed_size, 8)
        # self.layer_norm3 = nn.LayerNorm(embed_size)
        # self.droput3 = nn.Dropout(p=dropout)
        
        self.self_attention = nn.MultiheadAttention(hidden_size, 8)
        self.word_level_attn = Attention(embed_size)
        self.init_weight()

開發者ID:gmftbyGMFTBY，項目名稱:MultiTurnDialogZoo，代碼行數:26，代碼來源:MReCoSa_RA.py

示例3: init

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import MultiheadAttention [as 別名]
def __init__(self, input_size, embed_size, output_size, utter_hidden, 
                 decoder_hidden, teach_force=0.5, pad=1, sos=1, dropout=0.5, 
                 utter_n_layer=1, pretrained=None):
        super(MReCoSa, self).__init__()
        self.encoder = Encoder(input_size, embed_size, utter_hidden, n_layers=utter_n_layer,
                               dropout=dropout, pretrained=pretrained)
        self.decoder = Decoder(embed_size, decoder_hidden, output_size, n_layer=utter_n_layer,
                               dropout=dropout, pretrained=pretrained)
        self.teach_force = teach_force
        self.pad, self.sos = pad, sos
        self.output_size = output_size
        self.pos_emb = PositionEmbedding(embed_size, dropout=dropout)
        self.self_attention_context1 = nn.MultiheadAttention(embed_size, 8)
        self.layer_norm1 = nn.LayerNorm(embed_size)
        self.self_attention_context2 = nn.MultiheadAttention(embed_size, 8)
        self.layer_norm2 = nn.LayerNorm(embed_size)
        self.self_attention_context3 = nn.MultiheadAttention(embed_size, 8)
        self.layer_norm3 = nn.LayerNorm(embed_size)

開發者ID:gmftbyGMFTBY，項目名稱:MultiTurnDialogZoo，代碼行數:20，代碼來源:MReCoSa.py

示例4: init

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import MultiheadAttention [as 別名]
def __init__(self, embed_dim, hidden_dim, num_embeddings, num_max_positions, num_heads, num_layers, dropout, causal):
        super().__init__()
        self.causal = causal
        self.tokens_embeddings = nn.Embedding(num_embeddings, embed_dim)
        self.position_embeddings = nn.Embedding(num_max_positions, embed_dim)
        self.dropout = nn.Dropout(dropout)

        self.attentions, self.feed_forwards = nn.ModuleList(), nn.ModuleList()
        self.layer_norms_1, self.layer_norms_2 = nn.ModuleList(), nn.ModuleList()
        for _ in range(num_layers):
            self.attentions.append(nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout))
            self.feed_forwards.append(nn.Sequential(nn.Linear(embed_dim, hidden_dim),
                                                    nn.ReLU(),
                                                    nn.Linear(hidden_dim, embed_dim)))
            self.layer_norms_1.append(nn.LayerNorm(embed_dim, eps=1e-12))
            self.layer_norms_2.append(nn.LayerNorm(embed_dim, eps=1e-12))

開發者ID:prrao87，項目名稱:fine-grained-sentiment，代碼行數:18，代碼來源:model.py

示例5: init

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import MultiheadAttention [as 別名]
def __init__(self, d_model, nhead, dropout=0.1):
        super(SAN, self).__init__()
        self.d_model = d_model
        self.nhead = nhead
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
        self.dropout = nn.Dropout(p=dropout)
        self.norm = nn.LayerNorm(d_model)

開發者ID:lixin4ever，項目名稱:BERT-E2E-ABSA，代碼行數:9，代碼來源:absa_layer.py

示例6: _init_weights

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import MultiheadAttention [as 別名]
def _init_weights(module):
        r"""Initialize weights like BERT - N(0.0, 0.02), bias = 0."""

        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=0.02)
        elif isinstance(module, nn.MultiheadAttention):
            module.in_proj_weight.data.normal_(mean=0.0, std=0.02)
            module.out_proj.weight.data.normal_(mean=0.0, std=0.02)
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=0.02)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()

開發者ID:kdexd，項目名稱:virtex，代碼行數:14，代碼來源:textual_heads.py

示例7: dummy_attention

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import MultiheadAttention [as 別名]
def dummy_attention(key  : torch.Tensor, 
                    query: torch.Tensor, 
                    value: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
    r"""function for dummy in jit-compile features of torch, which have the same inputs and 
    outputs to nn.MultiheadAttention().__call__()
    
    Args:
        key (T): inputs to be passed as output
        query (T): dummy inputs
        value (T): dummy inputs
    
    Returns:
        Tuple[T, T]: values = (key, dummy outputs = torch.Tensor([]))
    """
    return key, torch.Tensor([])

開發者ID:p768lwy3，項目名稱:torecsys，代碼行數:17，代碼來源:operations.py

示例8: show_attention

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import MultiheadAttention [as 別名]
def show_attention(attentions : np.ndarray, 
                   xaxis      : Union[list, str] = None, 
                   yaxis      : Union[list, str] = None, 
                   savedir    : str = None):
    r"""Show attention of MultiheadAttention in a mpl heatmap
    
    Args:
        attentions (np.ndarray), shape = (sequence length, sequence length), dtype = np.float32: Attentions Weights of output of nn.MultiheadAttention
        xaxis (str, optional): string or list of xaxis. Defaults to None.
        yaxis (str, optional): string or list of yaxis. Defaults to None.
        savedir (str, optional): string of directory to save the attention png. Defaults to None.
    """
    # set up figure with colorbar
    fig = plt.figure()
    ax  = fig.add_subplot(111)
    cax = ax.matshow(attentions)
    fig.colorbar(cax)

    # set up axes
    if xaxis is not None:
        if isinstance(xaxis, str):
            xaxis = [""] + xaxis.split(",")
        elif isinstance(xaxis, list):
            xaxis = [""] + xaxis
        ax.set_xticklabels(xaxis, rotation=90)
    
    if yaxis is not None:
        if isinstance(yaxis, str):
            yaxis = [""] + yaxis.split(",")
        elif isinstance(yaxis, list):
            yaxis = [""] + yaxis
        ax.set_yticklabels(yaxis)
    
    # show label at every tick
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
    
    if savedir is None:
        plt.show()
    else:
        plt.savefig(savedir)

開發者ID:p768lwy3，項目名稱:torecsys，代碼行數:43，代碼來源:operations.py

示例9: init

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import MultiheadAttention [as 別名]
def __init__(self, hidden_size, nhead=8, dropout=0.3):
        super(Multi_head_attention_trs, self).__init__()
        self.nhead = nhead
        self.hidden_size = hidden_size
        
        if hidden_size % nhead != 0:
            raise Exception(f'hidden_size must be divisble by nhead, but got {hidden_size}/{nhead}.')
        
        self.multi_head_attention = nn.MultiheadAttention(hidden_size, nhead)
        self.layer_norm = nn.LayerNorm(hidden_size)
        self.final_attn = Attention(hidden_size)

開發者ID:gmftbyGMFTBY，項目名稱:MultiTurnDialogZoo，代碼行數:13，代碼來源:layers.py

示例10: init

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import MultiheadAttention [as 別名]
def __init__(self,
                 d_model: int,
                 nhead: int,
                 dim_feedforward: int = 2048,
                 dropout: float = 0.1) -> None:
        """Initialize a TransformerEncoderLayer.

        Parameters
        ----------
        d_model : int
            The number of expected features in the input.
        n_head : int
            The number of heads in the multiheadattention models.
        dim_feedforward : int, optional
            The dimension of the feedforward network (default=2048).
        dropout : float, optional
            The dropout value (default=0.1).

        """
        super().__init__()
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)

        self.dropout = nn.Dropout(dropout)
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.linear2 = nn.Linear(dim_feedforward, d_model)

        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

開發者ID:asappresearch，項目名稱:flambe，代碼行數:32，代碼來源:transformer.py

示例11: init

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import MultiheadAttention [as 別名]
def __init__(self,
                 d_model: int,
                 nhead: int,
                 dim_feedforward: int = 2048,
                 dropout: float = 0.1,
                 sru_dropout: Optional[float] = None,
                 bidirectional: bool = False,
                 **kwargs: Dict[str, Any]) -> None:
        """Initialize a TransformerSRUEncoderLayer.

        Parameters
        ----------
        d_model : int
            The number of expected features in the input.
        n_head : int
            The number of heads in the multiheadattention models.
        dim_feedforward : int, optional
            The dimension of the feedforward network (default=2048).
        dropout : float, optional
            The dropout value (default=0.1).
        sru_dropout: float, optional
            Dropout for the SRU cell. If not given, uses the same
            dropout value as the rest of the transformer.
        bidirectional: bool
            Whether the SRU module should be bidrectional.
            Defaul ``False``.

        Extra keyword arguments are passed to the SRUCell.

        """
        super().__init__()

        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
        self.sru = SRUCell(d_model,
                           dim_feedforward,
                           dropout,
                           sru_dropout or dropout,
                           bidirectional=bidirectional,
                           has_skip_term=False, **kwargs)

        self.linear2 = nn.Linear(dim_feedforward, d_model)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

開發者ID:asappresearch，項目名稱:flambe，代碼行數:47，代碼來源:transformer_sru.py

示例12: load_state_dict

# 需要導入模塊: from torch import nn [as 別名]
# 或者: from torch.nn import MultiheadAttention [as 別名]
def load_state_dict(self, state_dict):
        """ Loads module from previously saved state.
        Supports loading from both DPMultiheadAttention
        and nn.MultiheadAttention modules
        """
        if "in_proj_weight" in state_dict:
            qweight, kweight, vweight = state_dict["in_proj_weight"].chunk(3, dim=0)

            state_dict["qlinear.weight"] = qweight
            state_dict["klinear.weight"] = kweight
            state_dict["vlinear.weight"] = vweight
            del state_dict["in_proj_weight"]

        if "in_proj_bias" in state_dict:
            qbias, kbias, vbias = state_dict["in_proj_bias"].chunk(3, dim=0)

            state_dict["qlinear.bias"] = qbias
            state_dict["klinear.bias"] = kbias
            state_dict["vlinear.bias"] = vbias
            del state_dict["in_proj_bias"]

        if "bias_k" in state_dict:
            state_dict["seq_bias_k.bias"] = state_dict["bias_k"].squeeze()
            del state_dict["bias_k"]

        if "bias_v" in state_dict:
            state_dict["seq_bias_v.bias"] = state_dict["bias_v"].squeeze()
            del state_dict["bias_v"]

        if "q_proj_weight" in state_dict:
            state_dict["qlinear.weight"] = state_dict["q_proj_weight"]
            del state_dict["q_proj_weight"]

        if "k_proj_weight" in state_dict:
            state_dict["klinear.weight"] = state_dict["k_proj_weight"]
            del state_dict["k_proj_weight"]

        if "v_proj_weight" in state_dict:
            state_dict["vlinear.weight"] = state_dict["v_proj_weight"]
            del state_dict["v_proj_weight"]

        super(DPMultiheadAttention, self).load_state_dict(state_dict)

開發者ID:facebookresearch，項目名稱:pytorch-dp，代碼行數:44，代碼來源:dp_multihead_attention.py

注：本文中的torch.nn.MultiheadAttention方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。

示例1: __init__

示例2: __init__

示例3: __init__

示例4: __init__

示例5: __init__