Python torch.bmm函数代码示例

本文整理汇总了Python中torch.bmm函数的典型用法代码示例。如果您正苦于以下问题：Python bmm函数的具体用法？Python bmm怎么用？Python bmm使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了bmm函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: forward

    def forward(self, inputs):
        x, u = inputs
        x = self.bn0(x)
        x = F.tanh(self.linear1(x))
        x = F.tanh(self.linear2(x))

        V = self.V(x)
        mu = F.tanh(self.mu(x))

        Q = None
        if u is not None:
            num_outputs = mu.size(1)
            L = self.L(x).view(-1, num_outputs, num_outputs)
            L = L * \
                self.tril_mask.expand_as(
                    L) + torch.exp(L) * self.diag_mask.expand_as(L)
            P = torch.bmm(L, L.transpose(2, 1))

            u_mu = (u - mu).unsqueeze(2)
            A = -0.5 * \
                torch.bmm(torch.bmm(u_mu.transpose(2, 1), P), u_mu)[:, :, 0]

            Q = A + V

        return mu, Q, V

开发者ID:lenvdv，项目名称:pytorch-ddpg-naf，代码行数:25，代码来源:naf.py

示例2: predict

 def predict(self, x_de, x_en):
     bs = x_de.size(0)
     emb_de = self.embedding_de(x_de) # bs,n_de,word_dim
     emb_en = self.embedding_en(x_en) # bs,n_en,word_dim
     h_enc = Variable(torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda())
     c_enc = Variable(torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda())
     h_dec = Variable(torch.zeros(self.n_layers, bs, self.hidden_dim).cuda())
     c_dec = Variable(torch.zeros(self.n_layers, bs, self.hidden_dim).cuda())
     enc_h, _ = self.encoder(emb_de, (h_enc, c_enc)) # (bs,n_de,hiddensz*2)
     dec_h, _ = self.decoder(emb_en, (h_dec, c_dec)) # (bs,n_en,hiddensz)
     # all the same. enc_h is bs,n_de,hiddensz*n_directions. h and c are both n_layers*n_directions,bs,hiddensz
     if self.directions == 2:
         scores = torch.bmm(self.dim_reduce(enc_h), dec_h.transpose(1,2))
     else:
         scores = torch.bmm(enc_h, dec_h.transpose(1,2))
     # (bs,n_de,hiddensz) * (bs,hiddensz,n_en) = (bs,n_de,n_en)
     scores[(x_de == pad_token).unsqueeze(2).expand(scores.size())] = -math.inf # binary mask
     attn_dist = F.softmax(scores,dim=1) # bs,n_de,n_en
     context = torch.bmm(attn_dist.transpose(2,1),enc_h)
     # (bs,n_en,n_de) * (bs,n_de,hiddensz*ndirections) = (bs,n_en,hiddensz*ndirections)
     pred = self.vocab_layer(torch.cat([dec_h,context],2)) # bs,n_en,len(EN.vocab)
     pred = pred[:,:-1,:] # alignment
     _, tokens = pred.max(2) # bs,n_en-1
     sauce = Variable(torch.cuda.LongTensor([[sos_token]]*bs)) # bs
     return torch.cat([sauce,tokens],1), attn_dist

开发者ID:anihamde，项目名称:cs287-s18，代码行数:25，代码来源:visualization_junk_copy_to_aws.py

示例3: forward

    def forward(self, feat, right, wrong, batch_wrong, fake=None, fake_diff_mask=None):

        num_wrong = wrong.size(1)
        batch_size = feat.size(0)

        feat = feat.view(-1, self.ninp, 1)
        right_dis = torch.bmm(right.view(-1, 1, self.ninp), feat)
        wrong_dis = torch.bmm(wrong, feat)
        batch_wrong_dis = torch.bmm(batch_wrong, feat)

        wrong_score = torch.sum(torch.exp(wrong_dis - right_dis.expand_as(wrong_dis)),1) \
                + torch.sum(torch.exp(batch_wrong_dis - right_dis.expand_as(batch_wrong_dis)),1)

        loss_dis = torch.sum(torch.log(wrong_score + 1))
        loss_norm = right.norm() + feat.norm() + wrong.norm() + batch_wrong.norm()

        if fake:
            fake_dis = torch.bmm(fake.view(-1, 1, self.ninp), feat)
            fake_score = torch.masked_select(torch.exp(fake_dis - right_dis), fake_diff_mask)

            margin_score = F.relu(torch.log(fake_score + 1) - self.margin)
            loss_fake = torch.sum(margin_score)
            loss_dis += loss_fake
            loss_norm += fake.norm()

        loss = (loss_dis + 0.1 * loss_norm) / batch_size
        if fake:
            return loss, loss_fake.data[0] / batch_size
        else:
            return loss

开发者ID:AashishV，项目名称:visDial.pytorch，代码行数:30，代码来源:model.py

示例4: forward

    def forward(self, vocab):
        with torch.no_grad():
            batch_shape = vocab['sentence'].shape
            s_embedding = self.embedding(vocab['sentence'].cuda())
            a_embedding = self.embedding(vocab['aspect'].cuda())

            packed_s = pack_padded_sequence(s_embedding, vocab['sent_len'], batch_first=True)

        out_s, (h_s, c1) = self.lstm_s(packed_s) # packed output
        out_a, (h_a, c2) = self.lstm_a(a_embedding)

        with torch.no_grad():
            unpacked_out_s, _ = pad_packed_sequence(out_s, batch_first=True)

        # Pair-wise interaction matrix
        I_matrix = torch.bmm(unpacked_out_s, out_a.permute(0,2,1))

        # Column-wise softmax
        a2s_attn = F.softmax(I_matrix, dim=1)

        # Row-wise softmax => Column-wise average => aspect attention
        s2a_attn = F.softmax(I_matrix, dim=2)
        a_attn = torch.mean(s2a_attn, dim=1)

        # Final sentence attn => weighted sum of each individual a2s_attn
        s_attn = torch.bmm(a2s_attn, a_attn.unsqueeze(-1))

        final_rep = torch.bmm(unpacked_out_s.permute(0,2,1), s_attn).squeeze(-1)
        pred = self.fc(final_rep)
        return pred

开发者ID:bearcave9，项目名称:Weekend-Projects，代码行数:30，代码来源:AOA_LSTM.py

示例5: forward

    def forward(self, ht, hs, mask, weighted_ctx=True):
        '''
        ht: batch x ht_dim
        hs: (seq_len x batch x hs_dim, seq_len x batch x ht_dim)
        mask: seq_len x batch
        '''
        hs, hs_ = hs
        # seq_len, batch, _ = hs.size()
        hs = hs.transpose(0, 1)
        hs_ = hs_.transpose(0, 1)
        # hs: batch x seq_len x hs_dim
        # hs_: batch x seq_len x ht_dim
        # hs_ = self.hs2ht(hs)
        # Alignment/Attention Function
        # batch x ht_dim x 1
        ht = ht.unsqueeze(2)
        # batch x seq_len
        score = torch.bmm(hs_, ht).squeeze(2)
        # attn = F.softmax(score, dim=-1)
        attn = F.softmax(score, dim=-1) * mask.transpose(0, 1) + EPSILON
        attn = attn / attn.sum(-1, keepdim=True)

        # Compute weighted sum of hs by attention.
        # batch x 1 x seq_len
        attn = attn.unsqueeze(1)
        if weighted_ctx:
            # batch x hs_dim
            weight_hs = torch.bmm(attn, hs).squeeze(1)
        else:
            weight_hs = None

        return weight_hs, attn

开发者ID:UriSha，项目名称:sigmorphon，代码行数:32，代码来源:model.py

示例6: forward_dot

    def forward_dot(self, hid, ctx, ctx_mask):
        r"""Computes Luong-style dot attention probabilities between
        decoder's hidden state and source annotations.

        Arguments:
            hid(Variable): A set of decoder hidden states of shape `T*B*H`
                where `T` == 1, `B` is batch dim and `H` is hidden state dim.
            ctx(Variable): A set of annotations of shape `S*B*C` where `S`
                is the source timestep dim, `B` is batch dim and `C`
                is annotation dim.
            ctx_mask(FloatTensor): A binary mask of shape `S*B` with zeroes
                in the padded timesteps.

        Returns:
            scores(Variable): A variable of shape `S*B` containing normalized
                attention scores for each position and sample.
            z_t(Variable): A variable of shape `B*H` containing the final
                attended context vector for this target decoding timestep.
        """
        # Apply transformations first to make last dims both C and then
        # shuffle dims to prepare for batch mat-mult
        ctx_ = self.ctx2ctx(ctx).permute(1, 2, 0)   # S*B*C -> S*B*C -> B*C*S
        hid_ = self.hid2ctx(hid).permute(1, 0, 2)   # T*B*H -> T*B*C -> B*T*C

        # 'dot' scores of B*T*S
        scores = F.softmax(torch.bmm(hid_, ctx_), dim=-1)

        # Transform back to hidden_dim for further decoders
        # B*T*S x B*S*C -> B*T*C -> B*T*H
        z_t = self.ctx2hid(torch.bmm(scores, ctx.transpose(0, 1)))

        return scores.transpose(0, 1), z_t.transpose(0, 1)

开发者ID:bardetadrien，项目名称:nmtpytorch，代码行数:32，代码来源:attention.py

示例7: forward

    def forward(self, output, context):
        batch_size = output.size(0)
        hidden_size = output.size(2)
        input_size = context.size(1)

        # (batch, out_len, dim) * (batch, in_len, dim) -> (batch, out_len, in_len)
        attn = torch.bmm(output, context.transpose(1, 2))
        mask = torch.eq(attn, 0).data.byte()
        attn.data.masked_fill_(mask, -float('inf'))
        attn = F.softmax(attn.view(-1, input_size), dim=1).view(batch_size, -1, input_size)

        # (batch, out_len, in_len) * (batch, in_len, dim) -> (batch, out_len, dim)
        mix = torch.bmm(attn, context)

        # concat -> (batch, out_len, 2*dim)
        combined = torch.cat((mix, output), dim=2)

        # output -> (batch, out_len, dim)
        output = F.tanh(self.linear_out(combined.view(-1, 2 * hidden_size))).view(batch_size, -1, hidden_size)


        if not output.is_contiguous():
            output = output.contiguous()

        return output, attn

开发者ID:shruthi0898，项目名称:Writing-editing-Network，代码行数:25，代码来源:attention.py

示例8: backward

    def backward(ctx, grad_output):
        batch1, batch2 = ctx.saved_variables
        grad_add_matrix = grad_batch1 = grad_batch2 = None

        if ctx.needs_input_grad[0]:
            grad_add_matrix = maybe_unexpand(grad_output, ctx.add_matrix_size)
            if ctx.alpha != 1:
                grad_add_matrix = grad_add_matrix.mul(ctx.alpha)

        if any(ctx.needs_input_grad[1:]):
            batch_grad_output = (grad_output
                                 .unsqueeze(0)
                                 .expand(batch1.size(0), batch1.size(1), batch2.size(2)))

        if ctx.needs_input_grad[1]:
            grad_batch1 = torch.bmm(batch_grad_output, batch2.transpose(1, 2))
            if ctx.beta != 1:
                grad_batch1 *= ctx.beta

        if ctx.needs_input_grad[2]:
            grad_batch2 = torch.bmm(batch1.transpose(1, 2), batch_grad_output)
            if ctx.beta != 1:
                grad_batch2 *= ctx.beta

        return grad_add_matrix, grad_batch1, grad_batch2, None, None, None

开发者ID:Northrend，项目名称:pytorch，代码行数:25，代码来源:blas.py

示例9: forward

    def forward(self, q, k, v):
        b_q, t_q, dim_q = list(q.size())
        b_k, t_k, dim_k = list(k.size())
        b_v, t_v, dim_v = list(v.size())
        assert(b_q == b_k and b_k == b_v)  # batch size should be equal
        assert(dim_q == dim_k)  # dims should be equal
        assert(t_k == t_v)  # times should be equal
        b = b_q
        qk = torch.bmm(q, k.transpose(1, 2))  # b x t_q x t_k
        qk.div_(dim_k ** 0.5)
        mask = None
        if self.causal and t_q > 1:
            causal_mask = q.data.new(t_q, t_k).byte().fill_(1).triu_(1)
            mask = Variable(causal_mask.unsqueeze(0).expand(b, t_q, t_k),
                            requires_grad=False)
        if self.mask_k is not None:
            mask_k = self.mask_k.unsqueeze(1).expand(b, t_q, t_k)
            mask = mask_k if mask is None else mask | mask_k
        if self.mask_q is not None:
            mask_q = self.mask_q.unsqueeze(2).expand(b, t_q, t_k)
            mask = mask_q if mask is None else mask | mask_q
        if mask is not None:
            qk.masked_fill_(mask, -1e9)

        sm_qk = F.softmax(qk, dim=2)
        sm_qk = self.dropout(sm_qk)
        return torch.bmm(sm_qk, v), sm_qk  # b x t_q x dim_v

开发者ID:yangkexin，项目名称:seq2seq.pytorch，代码行数:27，代码来源:attention.py

示例10: lstsq

def lstsq(b, y, alpha=0.01):
    """
    Batched linear least-squares for pytorch with optional L1 regularization.

    Parameters
    ----------

    b : shape(L, M, N)
    y : shape(L, M)

    Returns
    -------
    tuple of (coefficients, model, residuals)

    """
    bT = b.transpose(-1, -2)
    AA = torch.bmm(bT, b)
    if alpha != 0:
        diag = torch.diagonal(AA, dim1=1, dim2=2)
        diag += alpha
    RHS = torch.bmm(bT, y[:, :, None])
    X, LU = torch.gesv(RHS, AA)
    fit = torch.bmm(b, X)[..., 0]
    res = y - fit
    return X[..., 0], fit, res

开发者ID:Tillsten，项目名称:skultrafast，代码行数:25，代码来源:pytorch_fitter.py

示例11: bnorm

def bnorm(x, U):
    mx = torch.bmm(U,x)
    subs = x-mx
    subs2 = subs*subs
    vx = torch.bmm(U,subs2)
    out = subs / (vx.clamp(min=1e-10).sqrt() + 1e-5)
    return out

开发者ID:ParsonsZeng，项目名称:DiCoNet，代码行数:7，代码来源:model.py

示例12: forward

    def forward(self, q, k, v, attn_mask=None):

        d_k, d_v = self.d_k, self.d_v
        n_head = self.n_head

        residual = q
        #print('q,k,v:',q.size(),k.size(),v.size())
        mb_size, len_q, q_hidden_size = q.size()
        mb_size, len_k, k_hidden_size = k.size()
        mb_size, len_v, v_hidden_size = v.size()

        # treat as a (n_head) size batch
        q_s = q.repeat(n_head, 1, 1).view(n_head, -1, q_hidden_size) # n_head x (mb_size*len_q) x d_model
        k_s = k.repeat(n_head, 1, 1).view(n_head, -1, k_hidden_size) # n_head x (mb_size*len_k) x d_model
        v_s = v.repeat(n_head, 1, 1).view(n_head, -1, v_hidden_size) # n_head x (mb_size*len_v) x d_model
        #print('q_s,k_s,v_s:',q_s.size(),k_s.size(),v_s.size())
        #print('w_qs',self.w_qs.size())
        # treat the result as a (n_head * mb_size) size batch
        q_s = torch.bmm(q_s, self.w_qs).view(-1, len_q, d_k)   # (n_head*mb_size) x len_q x d_k
        k_s = torch.bmm(k_s, self.w_ks).view(-1, len_k, d_k)   # (n_head*mb_size) x len_k x d_k
        v_s = torch.bmm(v_s, self.w_vs).view(-1, len_v, d_v)   # (n_head*mb_size) x len_v x d_v

        # perform attention, result size = (n_head * mb_size) x len_q x d_v
        #print('attn_mask:',attn_mask.size())
        #print(attn_mask)
        outputs, attns = self.attention.forward(q_s, k_s, v_s, attn_mask=attn_mask.repeat(n_head,1,1))

        # back to original mb_size batch, result size = mb_size x len_q x (n_head*d_v)
        outputs = torch.cat(torch.split(outputs, mb_size, dim=0), dim=-1) 

        # project back to residual size
        outputs = self.proj.forward(outputs)
        outputs = self.dropout(outputs)

        return self.layer_norm(outputs + residual), attns

开发者ID:chickenbestlover，项目名称:DrQA-RN，代码行数:35，代码来源:SubLayers.py

示例13: predict2

 def predict2(self, x_de, beamsz, gen_len):
     emb_de = self.embedding_de(x_de) # "batch size",n_de,word_dim, but "batch size" is 1 in this case!
     h0 = Variable(torch.zeros(self.n_layers*self.directions, 1, self.hidden_dim).cuda())
     c0 = Variable(torch.zeros(self.n_layers*self.directions, 1, self.hidden_dim).cuda())
     enc_h, _ = self.encoder(emb_de, (h0, c0))
     # since enc batch size=1, enc_h is 1,n_de,hiddensz*n_directions
     if self.directions == 2:
         enc_h = self.dim_reduce(enc_h) # 1,n_de,hiddensz
     masterheap = CandList(self.n_layers,self.hidden_dim,enc_h.size(1),beamsz)
     # in the following loop, beamsz is length 1 for first iteration, length true beamsz (100) afterward
     for i in range(gen_len):
         prev = masterheap.get_prev() # beamsz
         emb_t = self.embedding_en(prev) # embed the last thing we generated. beamsz,word_dim
         enc_h_expand = enc_h.expand(prev.size(0),-1,-1) # beamsz,n_de,hiddensz
         
         h, c = masterheap.get_hiddens() # (n_layers,beamsz,hiddensz),(n_layers,beamsz,hiddensz)
         dec_h, (h, c) = self.decoder(emb_t.unsqueeze(1), (h, c)) # dec_h is beamsz,1,hiddensz (batch_first=True)
         scores = torch.bmm(enc_h_expand, dec_h.transpose(1,2)).squeeze(2)
         # (beamsz,n_de,hiddensz) * (beamsz,hiddensz,1) = (beamsz,n_de,1). squeeze to beamsz,n_de
         attn_dist = F.softmax(scores,dim=1)
         if self.attn_type == "hard":
             _, argmax = attn_dist.max(1) # beamsz for each batch, select most likely german word to pay attention to
             one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, argmax.data.unsqueeze(1), 1).cuda())
             context = torch.bmm(one_hot.unsqueeze(1), enc_h_expand).squeeze(1)
         else:
             context = torch.bmm(attn_dist.unsqueeze(1), enc_h_expand).squeeze(1)
         # the difference btwn hard and soft is just whether we use a one_hot or a distribution
         # context is beamsz,hiddensz*n_directions
         pred = self.vocab_layer(torch.cat([dec_h.squeeze(1), context], 1)) # beamsz,len(EN.vocab)
         # TODO: set the columns corresponding to <pad>,<unk>,</s>,etc to 0
         masterheap.update_beam(pred)
         masterheap.update_hiddens(h,c)
         masterheap.update_attentions(attn_dist)
         masterheap.firstloop = False
     return masterheap.probs,masterheap.wordlist,masterheap.attentions

开发者ID:anihamde，项目名称:cs287-s18，代码行数:35，代码来源:models_original.py

示例14: predict

 def predict(self, x_de, x_en):
     bs = x_de.size(0)
     emb_de = self.embedding_de(x_de) # bs,n_de,word_dim
     emb_en = self.embedding_en(x_en) # bs,n_en,word_dim
     h = Variable(torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda())
     c = Variable(torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda())
     enc_h, _ = self.encoder(emb_de, (h, c))
     dec_h, _ = self.decoder(emb_en, (h, c))
     # all the same. enc_h is bs,n_de,hiddensz*n_directions. h and c are both n_layers*n_directions,bs,hiddensz
     if self.directions == 2:
         enc_h = self.dim_reduce(enc_h) # bs,n_de,hiddensz
     scores = torch.bmm(enc_h, dec_h.transpose(1,2))
     # (bs,n_de,hiddensz) * (bs,hiddensz,n_en) = (bs,n_de,n_en)
     y = [Variable(torch.cuda.LongTensor([sos_token]*bs))] # bs
     self.attn = []
     for t in range(x_en.size(1)-1): # iterate over english words, with teacher forcing
         attn_dist = F.softmax(scores[:,:,t],dim=1) # bs,n_de
         self.attn.append(attn_dist.data)
         if self.attn_type == "hard":
             _, argmax = attn_dist.max(1) # bs. for each batch, select most likely german word to pay attention to
             one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, argmax.data.unsqueeze(1), 1).cuda())
             context = torch.bmm(one_hot.unsqueeze(1), enc_h).squeeze(1)
         else:
             context = torch.bmm(attn_dist.unsqueeze(1), enc_h).squeeze(1)
         # the difference btwn hard and soft is just whether we use a one_hot or a distribution
         # context is bs,hiddensz
         pred = self.vocab_layer(torch.cat([dec_h[:,t,:], context], 1)) # bs,len(EN.vocab)
         _, next_token = pred.max(1) # bs
         y.append(next_token)
     self.attn = torch.stack(self.attn, 0).transpose(0, 1) # bs,n_en,n_de (for visualization!)
     y = torch.stack(y,0).transpose(0,1) # bs,n_en
     return y,self.attn

开发者ID:anihamde，项目名称:cs287-s18，代码行数:32，代码来源:models_original.py

示例15: predict

 def predict(self, x, attn_type = "hard"):
     #predict with greedy decoding
     emb = self.embedding(x)
     h = Variable(torch.zeros(1, x.size(0), self.hidden_dim))
     c = Variable(torch.zeros(1, x.size(0), self.hidden_dim))
     enc_h, _ = self.encoder(emb, (h, c))
     y = [Variable(torch.zeros(x.size(0)).long())]
     self.attn = []        
     for t in range(x.size(1)):
         emb_t = self.embedding(y[-1])
         dec_h, (h, c) = self.decoder(emb_t.unsqueeze(1), (h, c))
         scores = torch.bmm(enc_h, dec_h.transpose(1,2)).squeeze(2)
         attn_dist = F.softmax(scores, dim = 1)
         self.attn.append(attn_dist.data)
         if attn_type == "hard":
             _, argmax = attn_dist.max(1)
             one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, argmax.data.unsqueeze(1), 1))
             context = torch.bmm(one_hot.unsqueeze(1), enc_h).squeeze(1)                    
         else:                
             context = torch.bmm(attn_dist.unsqueeze(1), enc_h).squeeze(1)
         pred = self.vocab_layer(torch.cat([dec_h.squeeze(1), context], 1))
         _, next_token = pred.max(1)
         y.append(next_token)
     self.attn = torch.stack(self.attn, 0).transpose(0, 1)
     return torch.stack(y, 0).transpose(0, 1)

开发者ID:anihamde，项目名称:cs287-s18，代码行数:25，代码来源:section4-Copy1.py

注：本文中的torch.bmm函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。