Python functional.softmax函数代码示例

本文整理汇总了Python中torch.nn.functional.softmax函数的典型用法代码示例。如果您正苦于以下问题：Python softmax函数的具体用法？Python softmax怎么用？Python softmax使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了softmax函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: forward

    def forward(self, vocab):
        with torch.no_grad():
            batch_shape = vocab['sentence'].shape
            s_embedding = self.embedding(vocab['sentence'].cuda())
            a_embedding = self.embedding(vocab['aspect'].cuda())

            packed_s = pack_padded_sequence(s_embedding, vocab['sent_len'], batch_first=True)

        out_s, (h_s, c1) = self.lstm_s(packed_s) # packed output
        out_a, (h_a, c2) = self.lstm_a(a_embedding)

        with torch.no_grad():
            unpacked_out_s, _ = pad_packed_sequence(out_s, batch_first=True)

        # Pair-wise interaction matrix
        I_matrix = torch.bmm(unpacked_out_s, out_a.permute(0,2,1))

        # Column-wise softmax
        a2s_attn = F.softmax(I_matrix, dim=1)

        # Row-wise softmax => Column-wise average => aspect attention
        s2a_attn = F.softmax(I_matrix, dim=2)
        a_attn = torch.mean(s2a_attn, dim=1)

        # Final sentence attn => weighted sum of each individual a2s_attn
        s_attn = torch.bmm(a2s_attn, a_attn.unsqueeze(-1))

        final_rep = torch.bmm(unpacked_out_s.permute(0,2,1), s_attn).squeeze(-1)
        pred = self.fc(final_rep)
        return pred

开发者ID:bearcave9，项目名称:Weekend-Projects，代码行数:30，代码来源:AOA_LSTM.py

示例2: softmax

def softmax(tensor):
    r"""
    Wrapper around softmax to make it work with both Tensors and Variables.
    TODO: Remove once https://github.com/pytorch/pytorch/issues/2633 is resolved.
    """
    if not isinstance(tensor, Variable):
        return F.softmax(Variable(tensor), -1).data
    return F.softmax(tensor, -1)

开发者ID:Jsmilemsj，项目名称:pytorch，代码行数:8，代码来源:utils.py

示例3: train

def train(model,trainLoader,criterion, optimizer,evalData = None,
            epoch=1,echoStep=100,evalStep=1000,saveStep=5000,savePath="./"):
    
    if evalData != None:
        evalX,evalY = evalData
        if torch.cuda.is_available():
            evalY = evalY.cuda()
            if isinstance (evalX,list):
                for ti,t in enumerate(evalX):
                    evalX[ti] = evalX[ti].cuda()
            else:
                evalX = evalX.cuda()

    batchLen = len(trainLoader)
    for epochIdx in xrange(epoch):
        for i,batch in enumerate(trainLoader,batchLen * epochIdx + 1):
            x, y = batch            
            if torch.cuda.is_available():
                y = y.cuda()
                if isinstance (x,list):
                    for ti,t in enumerate(x):
                        x[ti] = x[ti].cuda()
                else:
                    x = x.cuda()
            out = model(x)
            loss = criterion(out, y)
            
            prob = F.softmax(out, 1) 
            pred = torch.argmax(out, dim=1)
            correct = pred.eq(y).sum()
            acc = float(correct) / len(y)
            
            #print loss
            if i % echoStep == 0:
                print "Step %d/%d/%d : Loss %.4f , Acc %.4f " %(i,batchLen*epoch,epochIdx+1,float(loss),acc)
            #evaluate
            if i % evalStep == 0 and evalData != None:
                evalOut = model(evalX)
                evalLoss = criterion(evalOut, evalY)
                correct = torch.argmax(F.softmax(evalOut, 1) , dim=1).eq(evalY).sum()
                evalAcc = float(correct) / len(evalY)
                print "------------------------------------------------"
                print "Evaluate %d Sample : Loss %.4f , Acc %.4f " %(evalY.size(0),float(evalLoss),evalAcc)
                print
            #save model        
            if i % saveStep == 0:
                outFile = "%s/m_%d_%d.pt" %(savePath,i,epochIdx+1)
                torch.save(model.state_dict(),outFile)
                print "Save model : %s" %(outFile)

            #backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    outFile = "%s/final.pt" %(savePath)
    torch.save(model.state_dict(),outFile)
    print "Save model : %s" %(outFile)

开发者ID:quanwei888，项目名称:myspace，代码行数:58，代码来源:SimLSTMModel.py

示例4: validate

def validate(eval_loader, model, log, global_step, epoch):
    class_criterion = nn.CrossEntropyLoss(size_average=False, ignore_index=NO_LABEL).cuda()
    meters = AverageMeterSet()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(eval_loader):
        meters.update('data_time', time.time() - end)

        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target.cuda(async=True), volatile=True)

        minibatch_size = len(target_var)
        labeled_minibatch_size = target_var.data.ne(NO_LABEL).sum()
        assert labeled_minibatch_size > 0
        meters.update('labeled_minibatch_size', labeled_minibatch_size)

        # compute output
        output1, output2 = model(input_var)
        softmax1, softmax2 = F.softmax(output1, dim=1), F.softmax(output2, dim=1)
        class_loss = class_criterion(output1, target_var) / minibatch_size

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output1.data, target_var.data, topk=(1, 5))
        meters.update('class_loss', class_loss.data[0], labeled_minibatch_size)
        meters.update('top1', prec1[0], labeled_minibatch_size)
        meters.update('error1', 100.0 - prec1[0], labeled_minibatch_size)
        meters.update('top5', prec5[0], labeled_minibatch_size)
        meters.update('error5', 100.0 - prec5[0], labeled_minibatch_size)

        # measure elapsed time
        meters.update('batch_time', time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            LOG.info(
                'Test: [{0}/{1}]\t'
                'Time {meters[batch_time]:.3f}\t'
                'Data {meters[data_time]:.3f}\t'
                'Class {meters[class_loss]:.4f}\t'
                '[email protected] {meters[top1]:.3f}\t'
                '[email protected] {meters[top5]:.3f}'.format(
                    i, len(eval_loader), meters=meters))

    LOG.info(' * [email protected] {top1.avg:.3f}\[email protected] {top5.avg:.3f}'
          .format(top1=meters['top1'], top5=meters['top5']))
    log.record(epoch, {
        'step': global_step,
        **meters.values(),
        **meters.averages(),
        **meters.sums()
    })

    return meters['top1'].avg

开发者ID:ys2899，项目名称:mean-teacher，代码行数:56，代码来源:main.py

示例5: forward

 def forward(self, x):
     x = F.relu(self.lin1(x))
     out = self.head(x)
     #print(out)
     splits = out.view(x.size()[0],2,9).chunk(2,1)
     #print(splits[1])
     #return torch.stack(list(map(lambda s: F.softmax(s[0]), splits)), 0)
     #print(F.softmax(splits[0]).view(x.size()[0],9))
     print(torch.sum(F.softmax(splits[0]).view(x.size()[0],9),dim=1))
     return F.softmax(splits[0]),F.softmax(splits[1])

开发者ID:ziebalp，项目名称:distributional_dqn，代码行数:10，代码来源:cat_dqn_3.py

示例6: softmax_mse_loss

def softmax_mse_loss(input_logits, target_logits):
    """Takes softmax on both sides and returns MSE loss

    Note:
    - Returns the sum over all examples. Divide by the batch size afterwards
      if you want the mean.
    - Sends gradients to inputs but not the targets.
    """
    assert input_logits.size() == target_logits.size()
    input_softmax = F.softmax(input_logits, dim=1)
    target_softmax = F.softmax(target_logits, dim=1)
    num_classes = input_logits.size()[1]
    return F.mse_loss(input_softmax, target_softmax, size_average=False) / num_classes

开发者ID:ys2899，项目名称:mean-teacher，代码行数:13，代码来源:losses.py

示例7: _region_proposal

    def _region_proposal(self, net_conv_level1, net_conv_level2, net_conv_level3):
        if cfg.NUM_ANCHORS_LEVEL1 != 0:
            rpn_level1 = F.relu(self.rpn_net_level1(net_conv_level1))
            # batch x w x h x l x (num_anchors x 6)
            rpn_bbox_pred_level1 = self.rpn_bbox_pred_net_level1(rpn_level1).permute(0, 2, 3, 4, 1).contiguous()
            # batch x 2 x w x h x l x num_anchors
            rpn_cls_score_level1 = self.rpn_cls_score_net_level1(rpn_level1).view(self.batch_size, 2, cfg.NUM_ANCHORS_LEVEL1, rpn_bbox_pred_level1.size(1), rpn_bbox_pred_level1.size(2), rpn_bbox_pred_level1.size(3)).permute(0, 1, 3, 4, 5, 2).contiguous()

            # batch x 2 x w x h x l x num_anchors
            rpn_cls_prob_level1 = F.softmax(rpn_cls_score_level1) 
            self._predictions["rpn_cls_score_level1"] = rpn_cls_score_level1
            self._predictions["rpn_cls_prob_level1"] = rpn_cls_prob_level1
            self._predictions["rpn_bbox_pred_level1"] = rpn_bbox_pred_level1

        if cfg.NUM_ANCHORS_LEVEL2 != 0:
            rpn_level2 = F.relu(self.rpn_net_level2(net_conv_level2))
            # batch x w x h x l x (num_anchors x 6)
            rpn_bbox_pred_level2 = self.rpn_bbox_pred_net_level2(rpn_level2).permute(0, 2, 3, 4, 1).contiguous()
            # batch x 2 x w x h x l x num_anchors
            rpn_cls_score_level2 = self.rpn_cls_score_net_level2(rpn_level2).view(self.batch_size, 2, cfg.NUM_ANCHORS_LEVEL2, rpn_bbox_pred_level2.size(1), rpn_bbox_pred_level2.size(2), rpn_bbox_pred_level2.size(3)).permute(0, 1, 3, 4, 5, 2).contiguous()

            # batch x 2 x w x h x l x num_anchors
            rpn_cls_prob_level2 = F.softmax(rpn_cls_score_level2) 
            self._predictions["rpn_cls_score_level2"] = rpn_cls_score_level2
            self._predictions["rpn_cls_prob_level2"] = rpn_cls_prob_level2
            self._predictions["rpn_bbox_pred_level2"] = rpn_bbox_pred_level2

        if cfg.NUM_ANCHORS_LEVEL3 != 0:
            rpn_level3 = F.relu(self.rpn_net_level3(net_conv_level3))
            # batch x w x h x l x (num_anchors x 6)
            rpn_bbox_pred_level3 = self.rpn_bbox_pred_net_level3(rpn_level3).permute(0, 2, 3, 4, 1).contiguous()
            # batch x 2 x w x h x l x num_anchors
            rpn_cls_score_level3 = self.rpn_cls_score_net_level3(rpn_level3).view(self.batch_size, 2, cfg.NUM_ANCHORS_LEVEL3, rpn_bbox_pred_level3.size(1), rpn_bbox_pred_level3.size(2), rpn_bbox_pred_level3.size(3)).permute(0, 1, 3, 4, 5, 2).contiguous()
            # batch x 2 x w x h x l x num_anchors
            rpn_cls_prob_level3 = F.softmax(rpn_cls_score_level3) 
            self._predictions["rpn_cls_score_level3"] = rpn_cls_score_level3
            self._predictions["rpn_cls_prob_level3"] = rpn_cls_prob_level3
            self._predictions["rpn_bbox_pred_level3"] = rpn_bbox_pred_level3

        if self._mode == 'TRAIN':
            self._anchor_target_layer(
                    [*rpn_cls_score_level1.shape[2:5]] if cfg.NUM_ANCHORS_LEVEL1 != 0 else None, 
                    [*rpn_cls_score_level2.shape[2:5]] if cfg.NUM_ANCHORS_LEVEL2 != 0 else None,
                    [*rpn_cls_score_level3.shape[2:5]] if cfg.NUM_ANCHORS_LEVEL3 != 0 else None)

        self._proposal_layer(rpn_cls_prob_level1 if cfg.NUM_ANCHORS_LEVEL1 != 0 else None,
                             rpn_bbox_pred_level1 if cfg.NUM_ANCHORS_LEVEL1 !=0 else None,
                             rpn_cls_prob_level2 if cfg.NUM_ANCHORS_LEVEL2 !=0 else None,
                             rpn_bbox_pred_level2 if cfg.NUM_ANCHORS_LEVEL2 !=0 else None,
                             rpn_cls_prob_level3 if cfg.NUM_ANCHORS_LEVEL3 !=0 else None,
                             rpn_bbox_pred_level3 if cfg.NUM_ANCHORS_LEVEL3 !=0 else None)

开发者ID:caskeep，项目名称:3D-SIS，代码行数:51，代码来源:network.py

示例8: train_a2c

def train_a2c(net, mb_obs, mb_rewards, mb_actions, mb_values, optimizer, tb_tracker, step_idx, device="cpu"):
    optimizer.zero_grad()
    mb_adv = mb_rewards - mb_values
    adv_v = torch.FloatTensor(mb_adv).to(device)
    obs_v = torch.FloatTensor(mb_obs).to(device)
    rewards_v = torch.FloatTensor(mb_rewards).to(device)
    actions_t = torch.LongTensor(mb_actions).to(device)
    logits_v, values_v = net(obs_v)
    log_prob_v = F.log_softmax(logits_v, dim=1)
    log_prob_actions_v = adv_v * log_prob_v[range(len(mb_actions)), actions_t]

    loss_policy_v = -log_prob_actions_v.mean()
    loss_value_v = F.mse_loss(values_v.squeeze(-1), rewards_v)

    prob_v = F.softmax(logits_v, dim=1)
    entropy_loss_v = (prob_v * log_prob_v).sum(dim=1).mean()
    loss_v = ENTROPY_BETA * entropy_loss_v + VALUE_LOSS_COEF * loss_value_v + loss_policy_v
    loss_v.backward()
    nn_utils.clip_grad_norm_(net.parameters(), CLIP_GRAD)
    optimizer.step()

    tb_tracker.track("advantage", mb_adv, step_idx)
    tb_tracker.track("values", values_v, step_idx)
    tb_tracker.track("batch_rewards", rewards_v, step_idx)
    tb_tracker.track("loss_entropy", entropy_loss_v, step_idx)
    tb_tracker.track("loss_policy", loss_policy_v, step_idx)
    tb_tracker.track("loss_value", loss_value_v, step_idx)
    tb_tracker.track("loss_total", loss_v, step_idx)
    return obs_v

开发者ID:dhaopku，项目名称:Deep-Reinforcement-Learning-Hands-On，代码行数:29，代码来源:common.py

示例9: iterate_batches

def iterate_batches(envs, net, device="cpu"):
    n_actions = envs[0].action_space.n
    act_selector = ptan.actions.ProbabilityActionSelector()
    obs = [e.reset() for e in envs]
    batch_dones = [[False] for _ in range(NUM_ENVS)]
    total_reward = [0.0] * NUM_ENVS
    total_steps = [0] * NUM_ENVS
    mb_obs = np.zeros((NUM_ENVS, REWARD_STEPS) + IMG_SHAPE, dtype=np.uint8)
    mb_rewards = np.zeros((NUM_ENVS, REWARD_STEPS), dtype=np.float32)
    mb_values = np.zeros((NUM_ENVS, REWARD_STEPS), dtype=np.float32)
    mb_actions = np.zeros((NUM_ENVS, REWARD_STEPS), dtype=np.int32)
    mb_probs = np.zeros((NUM_ENVS, REWARD_STEPS, n_actions), dtype=np.float32)

    while True:
        batch_dones = [[dones[-1]] for dones in batch_dones]
        done_rewards = []
        done_steps = []
        for n in range(REWARD_STEPS):
            obs_v = ptan.agent.default_states_preprocessor(obs).to(device)
            mb_obs[:, n] = obs_v.data.cpu().numpy()
            logits_v, values_v = net(obs_v)
            probs_v = F.softmax(logits_v, dim=1)
            probs = probs_v.data.cpu().numpy()
            actions = act_selector(probs)
            mb_probs[:, n] = probs
            mb_actions[:, n] = actions
            mb_values[:, n] = values_v.squeeze().data.cpu().numpy()
            for e_idx, e in enumerate(envs):
                o, r, done, _ = e.step(actions[e_idx])
                total_reward[e_idx] += r
                total_steps[e_idx] += 1
                if done:
                    o = e.reset()
                    done_rewards.append(total_reward[e_idx])
                    done_steps.append(total_steps[e_idx])
                    total_reward[e_idx] = 0.0
                    total_steps[e_idx] = 0
                obs[e_idx] = o
                mb_rewards[e_idx, n] = r
                batch_dones[e_idx].append(done)
        # obtain values for the last observation
        obs_v = ptan.agent.default_states_preprocessor(obs).to(device)
        _, values_v = net(obs_v)
        values_last = values_v.squeeze().data.cpu().numpy()

        for e_idx, (rewards, dones, value) in enumerate(zip(mb_rewards, batch_dones, values_last)):
            rewards = rewards.tolist()
            if not dones[-1]:
                rewards = discount_with_dones(rewards + [value], dones[1:] + [False], GAMMA)[:-1]
            else:
                rewards = discount_with_dones(rewards, dones[1:], GAMMA)
            mb_rewards[e_idx] = rewards

        out_mb_obs = mb_obs.reshape((-1,) + IMG_SHAPE)
        out_mb_rewards = mb_rewards.flatten()
        out_mb_actions = mb_actions.flatten()
        out_mb_values = mb_values.flatten()
        out_mb_probs = mb_probs.flatten()
        yield out_mb_obs, out_mb_rewards, out_mb_actions, out_mb_values, out_mb_probs, \
              np.array(done_rewards), np.array(done_steps)

开发者ID:dhaopku，项目名称:Deep-Reinforcement-Learning-Hands-On，代码行数:60，代码来源:common.py

示例10: routing

    def routing(self, x, b_IJ, W,batch_size,routing_iter):
        x1 = x.view(batch_size, 256, 1, 6, 6)
        x_tile = x1.repeat(1, 1, 10, 1, 1)
        x_view = x_tile.view(batch_size, 1152, 10, 8, 1)
        stride_i = W.repeat(batch_size, 1, 1, 1, 1)
        stride_j = stride_i.view(batch_size, 1152, 10, 16, 8)
        dot_op = torch.matmul(stride_j, x_view)
        dot_op_stopped = Variable(dot_op.data.clone(), requires_grad=False)

        for r_iter in range(routing_iter):
            id_capsule = F.softmax(b_IJ, dim=2)
            if r_iter == routing_iter - 1:
                route_I = torch.mul(id_capsule, dot_op)
                route_I_sum = torch.sum(route_I, dim=1, keepdim=True) + self.bias
                V_J = squash(route_I_sum,self.epsilon)
            if r_iter < routing_iter - 1:

                dot_op_stopped_tmp = dot_op_stopped.data.numpy()
                dot_op_stopped_tmp = np.reshape(dot_op_stopped_tmp, (batch_size, 1152, 10, 16, 1))
                id_capsule_tmp = id_capsule.data.numpy()
                route_I_tmp = id_capsule_tmp * dot_op_stopped_tmp
                route_I_tmp_sum = np.sum(route_I_tmp, axis=1, keepdims=True) + self.bias.data.numpy()
                V_J_tmp = squash(torch.Tensor(route_I_tmp_sum),self.epsilon)

                V_J_tmp_tiled = np.tile(V_J_tmp.numpy(), (1, 1152, 1, 1, 1))
                dot_op_stopped_tmp = np.reshape(dot_op_stopped_tmp, (batch_size, 1152, 10, 1, 16))

                u_produce_v = np.matmul(dot_op_stopped_tmp, V_J_tmp_tiled)

                b_IJ.data += torch.Tensor(u_produce_v)

        return V_J

开发者ID:VibAltekar，项目名称:DeepLearning_CapNet，代码行数:32，代码来源:model.py

示例11: probs

    def probs(self, generator, outputs, vocab_pointer_switches, context_question_switches, 
        context_attention, question_attention, 
        context_indices, question_indices, 
        oov_to_limited_idx):

        size = list(outputs.size())

        size[-1] = self.generative_vocab_size
        scores = generator(outputs.view(-1, outputs.size(-1))).view(size)
        p_vocab = F.softmax(scores, dim=scores.dim()-1)
        scaled_p_vocab = vocab_pointer_switches.expand_as(p_vocab) * p_vocab

        effective_vocab_size = self.generative_vocab_size + len(oov_to_limited_idx)
        if self.generative_vocab_size < effective_vocab_size:
            size[-1] = effective_vocab_size - self.generative_vocab_size
            buff = Variable(scaled_p_vocab.data.new(*size).fill_(EPSILON))
            scaled_p_vocab = torch.cat([scaled_p_vocab, buff], dim=buff.dim()-1)

        p_context_ptr = Variable(scaled_p_vocab.data.new(*scaled_p_vocab.size()).fill_(EPSILON))
        p_context_ptr.scatter_add_(p_context_ptr.dim()-1, context_indices.unsqueeze(1).expand_as(context_attention), context_attention)
        scaled_p_context_ptr = (context_question_switches * (1 - vocab_pointer_switches)).expand_as(p_context_ptr) * p_context_ptr

        p_question_ptr = Variable(scaled_p_vocab.data.new(*scaled_p_vocab.size()).fill_(EPSILON))
        p_question_ptr.scatter_add_(p_question_ptr.dim()-1, question_indices.unsqueeze(1).expand_as(question_attention), question_attention)
        scaled_p_question_ptr = ((1 - context_question_switches) * (1 - vocab_pointer_switches)).expand_as(p_question_ptr) * p_question_ptr

        probs = scaled_p_vocab + scaled_p_context_ptr + scaled_p_question_ptr
        return probs

开发者ID:AhlamMD，项目名称:decaNLP，代码行数:28，代码来源:multitask_question_answering_network.py

示例12: forward

 def forward(self, image_feat, question_embedding):
     att1 = self.att1.compute_raw_att(image_feat, question_embedding)
     att2 = self.att2.compute_raw_att(image_feat, question_embedding)
     raw_attention = att1 + att2
     # softmax across locations
     attention = F.softmax(raw_attention, dim=1).expand_as(image_feat)
     return attention

开发者ID:xiaojie18，项目名称:pythia，代码行数:7，代码来源:image_attention.py

示例13: forward

 def forward(self, x):
     x = self.features(x)
     a = self.conv6_1(x)
     b = self.conv6_2(x)
     c = self.conv6_3(x)
     a = F.softmax(a, dim=1)
     return c, b, a

开发者ID:g0josh，项目名称:mtcnn，代码行数:7，代码来源:model.py

示例14: probs

    def probs(self, generator, outputs, vocab_pointer_switches, context_question_switches, 
        context_attention, question_attention, 
        context_indices, question_indices, 
        oov_to_limited_idx):

        size = list(outputs.size())

        size[-1] = self.generative_vocab_size
        scores = generator(outputs.view(-1, outputs.size(-1))).view(size)
        p_vocab = F.softmax(scores, dim=scores.dim()-1)
        scaled_p_vocab = vocab_pointer_switches.expand_as(p_vocab) * p_vocab

        effective_vocab_size = self.generative_vocab_size + len(oov_to_limited_idx)
        if self.generative_vocab_size < effective_vocab_size:
            size[-1] = effective_vocab_size - self.generative_vocab_size
            buff = scaled_p_vocab.new_full(size, EPSILON)
            scaled_p_vocab = torch.cat([scaled_p_vocab, buff], dim=buff.dim()-1)

        # p_context_ptr
        scaled_p_vocab.scatter_add_(scaled_p_vocab.dim()-1, context_indices.unsqueeze(1).expand_as(context_attention), 
            (context_question_switches * (1 - vocab_pointer_switches)).expand_as(context_attention) * context_attention)

        # p_question_ptr
        scaled_p_vocab.scatter_add_(scaled_p_vocab.dim()-1, question_indices.unsqueeze(1).expand_as(question_attention), 
            ((1 - context_question_switches) * (1 - vocab_pointer_switches)).expand_as(question_attention) * question_attention)

        return scaled_p_vocab

开发者ID:shaogx，项目名称:decaNLP，代码行数:27，代码来源:multitask_question_answering_network.py

示例15: forward_dot

    def forward_dot(self, hid, ctx, ctx_mask):
        r"""Computes Luong-style dot attention probabilities between
        decoder's hidden state and source annotations.

        Arguments:
            hid(Variable): A set of decoder hidden states of shape `T*B*H`
                where `T` == 1, `B` is batch dim and `H` is hidden state dim.
            ctx(Variable): A set of annotations of shape `S*B*C` where `S`
                is the source timestep dim, `B` is batch dim and `C`
                is annotation dim.
            ctx_mask(FloatTensor): A binary mask of shape `S*B` with zeroes
                in the padded timesteps.

        Returns:
            scores(Variable): A variable of shape `S*B` containing normalized
                attention scores for each position and sample.
            z_t(Variable): A variable of shape `B*H` containing the final
                attended context vector for this target decoding timestep.
        """
        # Apply transformations first to make last dims both C and then
        # shuffle dims to prepare for batch mat-mult
        ctx_ = self.ctx2ctx(ctx).permute(1, 2, 0)   # S*B*C -> S*B*C -> B*C*S
        hid_ = self.hid2ctx(hid).permute(1, 0, 2)   # T*B*H -> T*B*C -> B*T*C

        # 'dot' scores of B*T*S
        scores = F.softmax(torch.bmm(hid_, ctx_), dim=-1)

        # Transform back to hidden_dim for further decoders
        # B*T*S x B*S*C -> B*T*C -> B*T*H
        z_t = self.ctx2hid(torch.bmm(scores, ctx.transpose(0, 1)))

        return scores.transpose(0, 1), z_t.transpose(0, 1)

开发者ID:bardetadrien，项目名称:nmtpytorch，代码行数:32，代码来源:attention.py

注：本文中的torch.nn.functional.softmax函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。