本文整理汇总了Python中torch.nn.functional.softmax函数的典型用法代码示例。如果您正苦于以下问题:Python softmax函数的具体用法?Python softmax怎么用?Python softmax使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了softmax函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: forward
def forward(self, vocab):
with torch.no_grad():
batch_shape = vocab['sentence'].shape
s_embedding = self.embedding(vocab['sentence'].cuda())
a_embedding = self.embedding(vocab['aspect'].cuda())
packed_s = pack_padded_sequence(s_embedding, vocab['sent_len'], batch_first=True)
out_s, (h_s, c1) = self.lstm_s(packed_s) # packed output
out_a, (h_a, c2) = self.lstm_a(a_embedding)
with torch.no_grad():
unpacked_out_s, _ = pad_packed_sequence(out_s, batch_first=True)
# Pair-wise interaction matrix
I_matrix = torch.bmm(unpacked_out_s, out_a.permute(0,2,1))
# Column-wise softmax
a2s_attn = F.softmax(I_matrix, dim=1)
# Row-wise softmax => Column-wise average => aspect attention
s2a_attn = F.softmax(I_matrix, dim=2)
a_attn = torch.mean(s2a_attn, dim=1)
# Final sentence attn => weighted sum of each individual a2s_attn
s_attn = torch.bmm(a2s_attn, a_attn.unsqueeze(-1))
final_rep = torch.bmm(unpacked_out_s.permute(0,2,1), s_attn).squeeze(-1)
pred = self.fc(final_rep)
return pred
示例2: softmax
def softmax(tensor):
r"""
Wrapper around softmax to make it work with both Tensors and Variables.
TODO: Remove once https://github.com/pytorch/pytorch/issues/2633 is resolved.
"""
if not isinstance(tensor, Variable):
return F.softmax(Variable(tensor), -1).data
return F.softmax(tensor, -1)
示例3: train
def train(model,trainLoader,criterion, optimizer,evalData = None,
epoch=1,echoStep=100,evalStep=1000,saveStep=5000,savePath="./"):
if evalData != None:
evalX,evalY = evalData
if torch.cuda.is_available():
evalY = evalY.cuda()
if isinstance (evalX,list):
for ti,t in enumerate(evalX):
evalX[ti] = evalX[ti].cuda()
else:
evalX = evalX.cuda()
batchLen = len(trainLoader)
for epochIdx in xrange(epoch):
for i,batch in enumerate(trainLoader,batchLen * epochIdx + 1):
x, y = batch
if torch.cuda.is_available():
y = y.cuda()
if isinstance (x,list):
for ti,t in enumerate(x):
x[ti] = x[ti].cuda()
else:
x = x.cuda()
out = model(x)
loss = criterion(out, y)
prob = F.softmax(out, 1)
pred = torch.argmax(out, dim=1)
correct = pred.eq(y).sum()
acc = float(correct) / len(y)
#print loss
if i % echoStep == 0:
print "Step %d/%d/%d : Loss %.4f , Acc %.4f " %(i,batchLen*epoch,epochIdx+1,float(loss),acc)
#evaluate
if i % evalStep == 0 and evalData != None:
evalOut = model(evalX)
evalLoss = criterion(evalOut, evalY)
correct = torch.argmax(F.softmax(evalOut, 1) , dim=1).eq(evalY).sum()
evalAcc = float(correct) / len(evalY)
print "------------------------------------------------"
print "Evaluate %d Sample : Loss %.4f , Acc %.4f " %(evalY.size(0),float(evalLoss),evalAcc)
print
#save model
if i % saveStep == 0:
outFile = "%s/m_%d_%d.pt" %(savePath,i,epochIdx+1)
torch.save(model.state_dict(),outFile)
print "Save model : %s" %(outFile)
#backward
optimizer.zero_grad()
loss.backward()
optimizer.step()
outFile = "%s/final.pt" %(savePath)
torch.save(model.state_dict(),outFile)
print "Save model : %s" %(outFile)
示例4: validate
def validate(eval_loader, model, log, global_step, epoch):
class_criterion = nn.CrossEntropyLoss(size_average=False, ignore_index=NO_LABEL).cuda()
meters = AverageMeterSet()
# switch to evaluate mode
model.eval()
end = time.time()
for i, (input, target) in enumerate(eval_loader):
meters.update('data_time', time.time() - end)
input_var = torch.autograd.Variable(input, volatile=True)
target_var = torch.autograd.Variable(target.cuda(async=True), volatile=True)
minibatch_size = len(target_var)
labeled_minibatch_size = target_var.data.ne(NO_LABEL).sum()
assert labeled_minibatch_size > 0
meters.update('labeled_minibatch_size', labeled_minibatch_size)
# compute output
output1, output2 = model(input_var)
softmax1, softmax2 = F.softmax(output1, dim=1), F.softmax(output2, dim=1)
class_loss = class_criterion(output1, target_var) / minibatch_size
# measure accuracy and record loss
prec1, prec5 = accuracy(output1.data, target_var.data, topk=(1, 5))
meters.update('class_loss', class_loss.data[0], labeled_minibatch_size)
meters.update('top1', prec1[0], labeled_minibatch_size)
meters.update('error1', 100.0 - prec1[0], labeled_minibatch_size)
meters.update('top5', prec5[0], labeled_minibatch_size)
meters.update('error5', 100.0 - prec5[0], labeled_minibatch_size)
# measure elapsed time
meters.update('batch_time', time.time() - end)
end = time.time()
if i % args.print_freq == 0:
LOG.info(
'Test: [{0}/{1}]\t'
'Time {meters[batch_time]:.3f}\t'
'Data {meters[data_time]:.3f}\t'
'Class {meters[class_loss]:.4f}\t'
'[email protected] {meters[top1]:.3f}\t'
'[email protected] {meters[top5]:.3f}'.format(
i, len(eval_loader), meters=meters))
LOG.info(' * [email protected] {top1.avg:.3f}\[email protected] {top5.avg:.3f}'
.format(top1=meters['top1'], top5=meters['top5']))
log.record(epoch, {
'step': global_step,
**meters.values(),
**meters.averages(),
**meters.sums()
})
return meters['top1'].avg
示例5: forward
def forward(self, x):
x = F.relu(self.lin1(x))
out = self.head(x)
#print(out)
splits = out.view(x.size()[0],2,9).chunk(2,1)
#print(splits[1])
#return torch.stack(list(map(lambda s: F.softmax(s[0]), splits)), 0)
#print(F.softmax(splits[0]).view(x.size()[0],9))
print(torch.sum(F.softmax(splits[0]).view(x.size()[0],9),dim=1))
return F.softmax(splits[0]),F.softmax(splits[1])
示例6: softmax_mse_loss
def softmax_mse_loss(input_logits, target_logits):
"""Takes softmax on both sides and returns MSE loss
Note:
- Returns the sum over all examples. Divide by the batch size afterwards
if you want the mean.
- Sends gradients to inputs but not the targets.
"""
assert input_logits.size() == target_logits.size()
input_softmax = F.softmax(input_logits, dim=1)
target_softmax = F.softmax(target_logits, dim=1)
num_classes = input_logits.size()[1]
return F.mse_loss(input_softmax, target_softmax, size_average=False) / num_classes
示例7: _region_proposal
def _region_proposal(self, net_conv_level1, net_conv_level2, net_conv_level3):
if cfg.NUM_ANCHORS_LEVEL1 != 0:
rpn_level1 = F.relu(self.rpn_net_level1(net_conv_level1))
# batch x w x h x l x (num_anchors x 6)
rpn_bbox_pred_level1 = self.rpn_bbox_pred_net_level1(rpn_level1).permute(0, 2, 3, 4, 1).contiguous()
# batch x 2 x w x h x l x num_anchors
rpn_cls_score_level1 = self.rpn_cls_score_net_level1(rpn_level1).view(self.batch_size, 2, cfg.NUM_ANCHORS_LEVEL1, rpn_bbox_pred_level1.size(1), rpn_bbox_pred_level1.size(2), rpn_bbox_pred_level1.size(3)).permute(0, 1, 3, 4, 5, 2).contiguous()
# batch x 2 x w x h x l x num_anchors
rpn_cls_prob_level1 = F.softmax(rpn_cls_score_level1)
self._predictions["rpn_cls_score_level1"] = rpn_cls_score_level1
self._predictions["rpn_cls_prob_level1"] = rpn_cls_prob_level1
self._predictions["rpn_bbox_pred_level1"] = rpn_bbox_pred_level1
if cfg.NUM_ANCHORS_LEVEL2 != 0:
rpn_level2 = F.relu(self.rpn_net_level2(net_conv_level2))
# batch x w x h x l x (num_anchors x 6)
rpn_bbox_pred_level2 = self.rpn_bbox_pred_net_level2(rpn_level2).permute(0, 2, 3, 4, 1).contiguous()
# batch x 2 x w x h x l x num_anchors
rpn_cls_score_level2 = self.rpn_cls_score_net_level2(rpn_level2).view(self.batch_size, 2, cfg.NUM_ANCHORS_LEVEL2, rpn_bbox_pred_level2.size(1), rpn_bbox_pred_level2.size(2), rpn_bbox_pred_level2.size(3)).permute(0, 1, 3, 4, 5, 2).contiguous()
# batch x 2 x w x h x l x num_anchors
rpn_cls_prob_level2 = F.softmax(rpn_cls_score_level2)
self._predictions["rpn_cls_score_level2"] = rpn_cls_score_level2
self._predictions["rpn_cls_prob_level2"] = rpn_cls_prob_level2
self._predictions["rpn_bbox_pred_level2"] = rpn_bbox_pred_level2
if cfg.NUM_ANCHORS_LEVEL3 != 0:
rpn_level3 = F.relu(self.rpn_net_level3(net_conv_level3))
# batch x w x h x l x (num_anchors x 6)
rpn_bbox_pred_level3 = self.rpn_bbox_pred_net_level3(rpn_level3).permute(0, 2, 3, 4, 1).contiguous()
# batch x 2 x w x h x l x num_anchors
rpn_cls_score_level3 = self.rpn_cls_score_net_level3(rpn_level3).view(self.batch_size, 2, cfg.NUM_ANCHORS_LEVEL3, rpn_bbox_pred_level3.size(1), rpn_bbox_pred_level3.size(2), rpn_bbox_pred_level3.size(3)).permute(0, 1, 3, 4, 5, 2).contiguous()
# batch x 2 x w x h x l x num_anchors
rpn_cls_prob_level3 = F.softmax(rpn_cls_score_level3)
self._predictions["rpn_cls_score_level3"] = rpn_cls_score_level3
self._predictions["rpn_cls_prob_level3"] = rpn_cls_prob_level3
self._predictions["rpn_bbox_pred_level3"] = rpn_bbox_pred_level3
if self._mode == 'TRAIN':
self._anchor_target_layer(
[*rpn_cls_score_level1.shape[2:5]] if cfg.NUM_ANCHORS_LEVEL1 != 0 else None,
[*rpn_cls_score_level2.shape[2:5]] if cfg.NUM_ANCHORS_LEVEL2 != 0 else None,
[*rpn_cls_score_level3.shape[2:5]] if cfg.NUM_ANCHORS_LEVEL3 != 0 else None)
self._proposal_layer(rpn_cls_prob_level1 if cfg.NUM_ANCHORS_LEVEL1 != 0 else None,
rpn_bbox_pred_level1 if cfg.NUM_ANCHORS_LEVEL1 !=0 else None,
rpn_cls_prob_level2 if cfg.NUM_ANCHORS_LEVEL2 !=0 else None,
rpn_bbox_pred_level2 if cfg.NUM_ANCHORS_LEVEL2 !=0 else None,
rpn_cls_prob_level3 if cfg.NUM_ANCHORS_LEVEL3 !=0 else None,
rpn_bbox_pred_level3 if cfg.NUM_ANCHORS_LEVEL3 !=0 else None)
示例8: train_a2c
def train_a2c(net, mb_obs, mb_rewards, mb_actions, mb_values, optimizer, tb_tracker, step_idx, device="cpu"):
optimizer.zero_grad()
mb_adv = mb_rewards - mb_values
adv_v = torch.FloatTensor(mb_adv).to(device)
obs_v = torch.FloatTensor(mb_obs).to(device)
rewards_v = torch.FloatTensor(mb_rewards).to(device)
actions_t = torch.LongTensor(mb_actions).to(device)
logits_v, values_v = net(obs_v)
log_prob_v = F.log_softmax(logits_v, dim=1)
log_prob_actions_v = adv_v * log_prob_v[range(len(mb_actions)), actions_t]
loss_policy_v = -log_prob_actions_v.mean()
loss_value_v = F.mse_loss(values_v.squeeze(-1), rewards_v)
prob_v = F.softmax(logits_v, dim=1)
entropy_loss_v = (prob_v * log_prob_v).sum(dim=1).mean()
loss_v = ENTROPY_BETA * entropy_loss_v + VALUE_LOSS_COEF * loss_value_v + loss_policy_v
loss_v.backward()
nn_utils.clip_grad_norm_(net.parameters(), CLIP_GRAD)
optimizer.step()
tb_tracker.track("advantage", mb_adv, step_idx)
tb_tracker.track("values", values_v, step_idx)
tb_tracker.track("batch_rewards", rewards_v, step_idx)
tb_tracker.track("loss_entropy", entropy_loss_v, step_idx)
tb_tracker.track("loss_policy", loss_policy_v, step_idx)
tb_tracker.track("loss_value", loss_value_v, step_idx)
tb_tracker.track("loss_total", loss_v, step_idx)
return obs_v
示例9: iterate_batches
def iterate_batches(envs, net, device="cpu"):
n_actions = envs[0].action_space.n
act_selector = ptan.actions.ProbabilityActionSelector()
obs = [e.reset() for e in envs]
batch_dones = [[False] for _ in range(NUM_ENVS)]
total_reward = [0.0] * NUM_ENVS
total_steps = [0] * NUM_ENVS
mb_obs = np.zeros((NUM_ENVS, REWARD_STEPS) + IMG_SHAPE, dtype=np.uint8)
mb_rewards = np.zeros((NUM_ENVS, REWARD_STEPS), dtype=np.float32)
mb_values = np.zeros((NUM_ENVS, REWARD_STEPS), dtype=np.float32)
mb_actions = np.zeros((NUM_ENVS, REWARD_STEPS), dtype=np.int32)
mb_probs = np.zeros((NUM_ENVS, REWARD_STEPS, n_actions), dtype=np.float32)
while True:
batch_dones = [[dones[-1]] for dones in batch_dones]
done_rewards = []
done_steps = []
for n in range(REWARD_STEPS):
obs_v = ptan.agent.default_states_preprocessor(obs).to(device)
mb_obs[:, n] = obs_v.data.cpu().numpy()
logits_v, values_v = net(obs_v)
probs_v = F.softmax(logits_v, dim=1)
probs = probs_v.data.cpu().numpy()
actions = act_selector(probs)
mb_probs[:, n] = probs
mb_actions[:, n] = actions
mb_values[:, n] = values_v.squeeze().data.cpu().numpy()
for e_idx, e in enumerate(envs):
o, r, done, _ = e.step(actions[e_idx])
total_reward[e_idx] += r
total_steps[e_idx] += 1
if done:
o = e.reset()
done_rewards.append(total_reward[e_idx])
done_steps.append(total_steps[e_idx])
total_reward[e_idx] = 0.0
total_steps[e_idx] = 0
obs[e_idx] = o
mb_rewards[e_idx, n] = r
batch_dones[e_idx].append(done)
# obtain values for the last observation
obs_v = ptan.agent.default_states_preprocessor(obs).to(device)
_, values_v = net(obs_v)
values_last = values_v.squeeze().data.cpu().numpy()
for e_idx, (rewards, dones, value) in enumerate(zip(mb_rewards, batch_dones, values_last)):
rewards = rewards.tolist()
if not dones[-1]:
rewards = discount_with_dones(rewards + [value], dones[1:] + [False], GAMMA)[:-1]
else:
rewards = discount_with_dones(rewards, dones[1:], GAMMA)
mb_rewards[e_idx] = rewards
out_mb_obs = mb_obs.reshape((-1,) + IMG_SHAPE)
out_mb_rewards = mb_rewards.flatten()
out_mb_actions = mb_actions.flatten()
out_mb_values = mb_values.flatten()
out_mb_probs = mb_probs.flatten()
yield out_mb_obs, out_mb_rewards, out_mb_actions, out_mb_values, out_mb_probs, \
np.array(done_rewards), np.array(done_steps)
示例10: routing
def routing(self, x, b_IJ, W,batch_size,routing_iter):
x1 = x.view(batch_size, 256, 1, 6, 6)
x_tile = x1.repeat(1, 1, 10, 1, 1)
x_view = x_tile.view(batch_size, 1152, 10, 8, 1)
stride_i = W.repeat(batch_size, 1, 1, 1, 1)
stride_j = stride_i.view(batch_size, 1152, 10, 16, 8)
dot_op = torch.matmul(stride_j, x_view)
dot_op_stopped = Variable(dot_op.data.clone(), requires_grad=False)
for r_iter in range(routing_iter):
id_capsule = F.softmax(b_IJ, dim=2)
if r_iter == routing_iter - 1:
route_I = torch.mul(id_capsule, dot_op)
route_I_sum = torch.sum(route_I, dim=1, keepdim=True) + self.bias
V_J = squash(route_I_sum,self.epsilon)
if r_iter < routing_iter - 1:
dot_op_stopped_tmp = dot_op_stopped.data.numpy()
dot_op_stopped_tmp = np.reshape(dot_op_stopped_tmp, (batch_size, 1152, 10, 16, 1))
id_capsule_tmp = id_capsule.data.numpy()
route_I_tmp = id_capsule_tmp * dot_op_stopped_tmp
route_I_tmp_sum = np.sum(route_I_tmp, axis=1, keepdims=True) + self.bias.data.numpy()
V_J_tmp = squash(torch.Tensor(route_I_tmp_sum),self.epsilon)
V_J_tmp_tiled = np.tile(V_J_tmp.numpy(), (1, 1152, 1, 1, 1))
dot_op_stopped_tmp = np.reshape(dot_op_stopped_tmp, (batch_size, 1152, 10, 1, 16))
u_produce_v = np.matmul(dot_op_stopped_tmp, V_J_tmp_tiled)
b_IJ.data += torch.Tensor(u_produce_v)
return V_J
示例11: probs
def probs(self, generator, outputs, vocab_pointer_switches, context_question_switches,
context_attention, question_attention,
context_indices, question_indices,
oov_to_limited_idx):
size = list(outputs.size())
size[-1] = self.generative_vocab_size
scores = generator(outputs.view(-1, outputs.size(-1))).view(size)
p_vocab = F.softmax(scores, dim=scores.dim()-1)
scaled_p_vocab = vocab_pointer_switches.expand_as(p_vocab) * p_vocab
effective_vocab_size = self.generative_vocab_size + len(oov_to_limited_idx)
if self.generative_vocab_size < effective_vocab_size:
size[-1] = effective_vocab_size - self.generative_vocab_size
buff = Variable(scaled_p_vocab.data.new(*size).fill_(EPSILON))
scaled_p_vocab = torch.cat([scaled_p_vocab, buff], dim=buff.dim()-1)
p_context_ptr = Variable(scaled_p_vocab.data.new(*scaled_p_vocab.size()).fill_(EPSILON))
p_context_ptr.scatter_add_(p_context_ptr.dim()-1, context_indices.unsqueeze(1).expand_as(context_attention), context_attention)
scaled_p_context_ptr = (context_question_switches * (1 - vocab_pointer_switches)).expand_as(p_context_ptr) * p_context_ptr
p_question_ptr = Variable(scaled_p_vocab.data.new(*scaled_p_vocab.size()).fill_(EPSILON))
p_question_ptr.scatter_add_(p_question_ptr.dim()-1, question_indices.unsqueeze(1).expand_as(question_attention), question_attention)
scaled_p_question_ptr = ((1 - context_question_switches) * (1 - vocab_pointer_switches)).expand_as(p_question_ptr) * p_question_ptr
probs = scaled_p_vocab + scaled_p_context_ptr + scaled_p_question_ptr
return probs
示例12: forward
def forward(self, image_feat, question_embedding):
att1 = self.att1.compute_raw_att(image_feat, question_embedding)
att2 = self.att2.compute_raw_att(image_feat, question_embedding)
raw_attention = att1 + att2
# softmax across locations
attention = F.softmax(raw_attention, dim=1).expand_as(image_feat)
return attention
示例13: forward
def forward(self, x):
x = self.features(x)
a = self.conv6_1(x)
b = self.conv6_2(x)
c = self.conv6_3(x)
a = F.softmax(a, dim=1)
return c, b, a
示例14: probs
def probs(self, generator, outputs, vocab_pointer_switches, context_question_switches,
context_attention, question_attention,
context_indices, question_indices,
oov_to_limited_idx):
size = list(outputs.size())
size[-1] = self.generative_vocab_size
scores = generator(outputs.view(-1, outputs.size(-1))).view(size)
p_vocab = F.softmax(scores, dim=scores.dim()-1)
scaled_p_vocab = vocab_pointer_switches.expand_as(p_vocab) * p_vocab
effective_vocab_size = self.generative_vocab_size + len(oov_to_limited_idx)
if self.generative_vocab_size < effective_vocab_size:
size[-1] = effective_vocab_size - self.generative_vocab_size
buff = scaled_p_vocab.new_full(size, EPSILON)
scaled_p_vocab = torch.cat([scaled_p_vocab, buff], dim=buff.dim()-1)
# p_context_ptr
scaled_p_vocab.scatter_add_(scaled_p_vocab.dim()-1, context_indices.unsqueeze(1).expand_as(context_attention),
(context_question_switches * (1 - vocab_pointer_switches)).expand_as(context_attention) * context_attention)
# p_question_ptr
scaled_p_vocab.scatter_add_(scaled_p_vocab.dim()-1, question_indices.unsqueeze(1).expand_as(question_attention),
((1 - context_question_switches) * (1 - vocab_pointer_switches)).expand_as(question_attention) * question_attention)
return scaled_p_vocab
示例15: forward_dot
def forward_dot(self, hid, ctx, ctx_mask):
r"""Computes Luong-style dot attention probabilities between
decoder's hidden state and source annotations.
Arguments:
hid(Variable): A set of decoder hidden states of shape `T*B*H`
where `T` == 1, `B` is batch dim and `H` is hidden state dim.
ctx(Variable): A set of annotations of shape `S*B*C` where `S`
is the source timestep dim, `B` is batch dim and `C`
is annotation dim.
ctx_mask(FloatTensor): A binary mask of shape `S*B` with zeroes
in the padded timesteps.
Returns:
scores(Variable): A variable of shape `S*B` containing normalized
attention scores for each position and sample.
z_t(Variable): A variable of shape `B*H` containing the final
attended context vector for this target decoding timestep.
"""
# Apply transformations first to make last dims both C and then
# shuffle dims to prepare for batch mat-mult
ctx_ = self.ctx2ctx(ctx).permute(1, 2, 0) # S*B*C -> S*B*C -> B*C*S
hid_ = self.hid2ctx(hid).permute(1, 0, 2) # T*B*H -> T*B*C -> B*T*C
# 'dot' scores of B*T*S
scores = F.softmax(torch.bmm(hid_, ctx_), dim=-1)
# Transform back to hidden_dim for further decoders
# B*T*S x B*S*C -> B*T*C -> B*T*H
z_t = self.ctx2hid(torch.bmm(scores, ctx.transpose(0, 1)))
return scores.transpose(0, 1), z_t.transpose(0, 1)