本文整理汇总了Python中torch.autograd.Variable.unsqueeze方法的典型用法代码示例。如果您正苦于以下问题:Python Variable.unsqueeze方法的具体用法?Python Variable.unsqueeze怎么用?Python Variable.unsqueeze使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.autograd.Variable
的用法示例。
在下文中一共展示了Variable.unsqueeze方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: update_parameters
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import unsqueeze [as 别名]
def update_parameters(self, batch):
state_batch = Variable(torch.cat(batch.state))
action_batch = Variable(torch.cat(batch.action))
reward_batch = Variable(torch.cat(batch.reward))
mask_batch = Variable(torch.cat(batch.mask))
next_state_batch = Variable(torch.cat(batch.next_state))
next_action_batch = self.actor_target(next_state_batch)
next_state_action_values = self.critic_target(next_state_batch, next_action_batch)
reward_batch = reward_batch.unsqueeze(1)
mask_batch = mask_batch.unsqueeze(1)
expected_state_action_batch = reward_batch + (self.gamma * mask_batch * next_state_action_values)
self.critic_optim.zero_grad()
state_action_batch = self.critic((state_batch), (action_batch))
value_loss = F.mse_loss(state_action_batch, expected_state_action_batch)
value_loss.backward()
self.critic_optim.step()
self.actor_optim.zero_grad()
policy_loss = -self.critic((state_batch),self.actor((state_batch)))
policy_loss = policy_loss.mean()
policy_loss.backward()
self.actor_optim.step()
soft_update(self.actor_target, self.actor, self.tau)
soft_update(self.critic_target, self.critic, self.tau)
return value_loss.item(), policy_loss.item()
示例2: forward
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import unsqueeze [as 别名]
def forward(self,x,target=None):
if self.training:
assert target is not None
fy_i = x.matmul(self.weight)
batch_size = fy_i.size(0)
fy_i_target = fy_i[list(range(batch_size)),target.data]
weight_target_norm = self.weight[:,target.data].norm(p=2,dim=0)
x_norm = x.norm(p=2,dim=1)
norm_mul = weight_target_norm*x_norm
cos = fy_i_target/norm_mul
sin = 1-cos**2
k = self.find_k(cos)
num_ns = self.margin//2 + 1
binom = Variable(x.data.new(self.binom))
cos_exp = Variable(x.data.new(self.cos_exp))
sin_exp = Variable(x.data.new(self.sin_exp))
signs = Variable(x.data.new(self.signs))
cos_terms = cos.unsqueeze(1)**cos_exp.unsqueeze(0)
sin_tems = sin.unsqueeze(1)**sin_exp.unsqueeze(0)
cosm_terms = (signs.unsqueeze(0)*binom.unsqueeze(0)*cos_terms*sin_tems)
cosm = cosm_terms.sum(1)
fy_i_target = norm_mul * (((-1)**k * cosm) - 2*k)
fy_i[list(range(batch_size)), target.data] = fy_i_target
return fy_i
else:
assert target is None
return x.matmul(self.weight)
示例3: update_parameters
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import unsqueeze [as 别名]
def update_parameters(self, batch):
state_batch = Variable(torch.cat(batch.state))
action_batch = Variable(torch.cat(batch.action))
reward_batch = Variable(torch.cat(batch.reward))
mask_batch = Variable(torch.cat(batch.mask))
next_state_batch = Variable(torch.cat(batch.next_state))
_, _, next_state_values = self.target_model((next_state_batch, None))
reward_batch = reward_batch.unsqueeze(1)
mask_batch = mask_batch.unsqueeze(1)
expected_state_action_values = reward_batch + (self.gamma * mask_batch + next_state_values)
_, state_action_values, _ = self.model((state_batch, action_batch))
loss = MSELoss(state_action_values, expected_state_action_values)
self.optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm(self.model.parameters(), 1)
self.optimizer.step()
soft_update(self.target_model, self.model, self.tau)
return loss.item(), 0
示例4: forward
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import unsqueeze [as 别名]
def forward(self, input):
W, x, Y = input
N = Y.size(-1)
bs = Y.size(0)
mask1 = Variable((W.data[:,:,:,-1] > 0).float())
mask2 = Variable(W.data[:,:,:,0].float().sum(2))
U = Variable(W.data[:,:,:,-1])
xB = self.beta(x) * mask2.unsqueeze(2).expand_as(x) # has size (bs,N,R)
Y = torch.bmm(xB, x.permute(0,2,1)) - (1-mask1)*10000
Y = F.softmax(Y.permute(2,1,0)).permute(2,1,0)
#Y = (Y + Y.permute(0,2,1)) / 2
#Y = Y * mask1
#print(Y[0], mask1)
x = gmul((W, x, Y)) # out has size (bs, N, num_inputs)
x_size = x.size()
x = x.contiguous()
x = x.view(-1, self.num_inputs)
if self.last:
x1 = self.fc1(x)
else:
x1 = F.relu(self.fc1(x)) # has size (bs*N, num_outputs // 2)
x2 = self.fc2(x)
x = torch.cat((x1, x2), 1)
x = x.view(*x_size[:-1], self.num_outputs)
x = x * mask2.unsqueeze(2).expand_as(x)
#x = self.bn_instance(x.permute(0, 2, 1)).permute(0, 2, 1)
x = bnorm(x, U)
x = x * self.gamma.unsqueeze(0).unsqueeze(1).expand_as(x)
return W, x, Y
示例5: forward
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import unsqueeze [as 别名]
def forward(self, tokens: torch.Tensor, mask: torch.Tensor = None): #pylint: disable=arguments-differ
if mask is not None:
tokens = tokens * mask.unsqueeze(-1).float()
# Our input has shape `(batch_size, num_tokens, embedding_dim)`, so we sum out the `num_tokens`
# dimension.
summed = tokens.sum(1)
if self._averaged:
if mask is not None:
lengths = get_lengths_from_binary_sequence_mask(mask)
length_mask = (lengths > 0)
# Set any length 0 to 1, to avoid dividing by zero.
lengths = torch.max(lengths, Variable(lengths.data.new().resize_(1).fill_(1)))
else:
lengths = Variable(tokens.data.new().resize_(1).fill_(tokens.size(1)), requires_grad=False)
length_mask = None
summed = summed / lengths.unsqueeze(-1).float()
if length_mask is not None:
summed = summed * (length_mask > 0).float().unsqueeze(-1)
return summed
示例6: image_cb
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import unsqueeze [as 别名]
def image_cb(self, camera_name, data):
# Track inference time.
start = timer()
# Resize image and convert to tensor.
np_arr = np.fromstring(data.data, np.uint8)
image_np = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
image_np = cv2.resize(image_np, dsize=(self.resize_width,self.resize_height))
img_to_tensor = Image.fromarray(image_np)
img_tensor = transform(img_to_tensor)
if not self.force_cpu:
img_tensor = Variable(img_tensor.unsqueeze(0)).cuda()
else:
img_tensor = Variable(img_tensor.unsqueeze(0))
# Inference.
output = self.graph(img_tensor)
output_data = output.cpu().data.numpy()[0][0]
# Network output values above threshold are lines.
im_threshold = output_data > self.line_thresh
# Get world coordinates of detected lines.
world_points = self.world_point_arrays[camera_name][im_threshold]
# Publish segmentation map.
im_threshold = np.uint8(255*im_threshold)
cv_output = cv2.cvtColor(im_threshold, cv2.COLOR_GRAY2BGR)
msg_out = self.bridge.cv2_to_imgmsg(cv_output, 'bgr8')
msg_out.header.stamp = data.header.stamp
self.im_publishers[camera_name].publish(msg_out)
# Publish pointcloud.
cloud_msg = PointCloud2()
cloud_msg.header.stamp = data.header.stamp
cloud_msg.header.frame_id = 'base_footprint'
cloud_msg.height = 1
cloud_msg.width = len(world_points)
cloud_msg.fields = [
PointField('x', 0, PointField.FLOAT32, 1),
PointField('y', 4, PointField.FLOAT32, 1),
PointField('z', 8, PointField.FLOAT32, 1)]
cloud_msg.is_bigendian = False
cloud_msg.point_step = 12
cloud_msg.row_step = 3 * len(world_points)
cloud_msg.data = world_points.tostring()
self.cloud_publishers[camera_name].publish(cloud_msg)
end = timer()
示例7: infer
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import unsqueeze [as 别名]
def infer(net , img , transform , thresh , cuda , shrink):
if shrink != 1:
img = cv2.resize(img, None, None, fx=shrink, fy=shrink, interpolation=cv2.INTER_LINEAR)
x = torch.from_numpy(transform(img)[0]).permute(2, 0, 1)
x = Variable(x.unsqueeze(0) , volatile=True)
if cuda:
x = x.cuda()
#print (shrink , x.shape)
y = net(x) # forward pass
detections = y.data
# scale each detection back up to the image
scale = torch.Tensor([ img.shape[1]/shrink, img.shape[0]/shrink,
img.shape[1]/shrink, img.shape[0]/shrink] )
det = []
for i in range(detections.size(1)):
j = 0
while detections[0, i, j, 0] >= thresh:
score = detections[0, i, j, 0]
#label_name = labelmap[i-1]
pt = (detections[0, i, j, 1:]*scale).cpu().numpy()
coords = (pt[0], pt[1], pt[2], pt[3])
det.append([pt[0], pt[1], pt[2], pt[3], score])
j += 1
if (len(det)) == 0:
det = [ [0.1,0.1,0.2,0.2,0.01] ]
det = np.array(det)
keep_index = np.where(det[:, 4] >= 0)[0]
det = det[keep_index, :]
return det
示例8: predict2
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import unsqueeze [as 别名]
def predict2(self, x_de, beamsz, gen_len):
emb_de = self.embedding_de(x_de) # "batch size",n_de,word_dim, but "batch size" is 1 in this case!
h0 = Variable(torch.zeros(self.n_layers*self.directions, 1, self.hidden_dim).cuda())
c0 = Variable(torch.zeros(self.n_layers*self.directions, 1, self.hidden_dim).cuda())
enc_h, _ = self.encoder(emb_de, (h0, c0))
# since enc batch size=1, enc_h is 1,n_de,hiddensz*n_directions
if self.directions == 2:
enc_h = self.dim_reduce(enc_h) # 1,n_de,hiddensz
masterheap = CandList(self.n_layers,self.hidden_dim,enc_h.size(1),beamsz)
# in the following loop, beamsz is length 1 for first iteration, length true beamsz (100) afterward
for i in range(gen_len):
prev = masterheap.get_prev() # beamsz
emb_t = self.embedding_en(prev) # embed the last thing we generated. beamsz,word_dim
enc_h_expand = enc_h.expand(prev.size(0),-1,-1) # beamsz,n_de,hiddensz
h, c = masterheap.get_hiddens() # (n_layers,beamsz,hiddensz),(n_layers,beamsz,hiddensz)
dec_h, (h, c) = self.decoder(emb_t.unsqueeze(1), (h, c)) # dec_h is beamsz,1,hiddensz (batch_first=True)
scores = torch.bmm(enc_h_expand, dec_h.transpose(1,2)).squeeze(2)
# (beamsz,n_de,hiddensz) * (beamsz,hiddensz,1) = (beamsz,n_de,1). squeeze to beamsz,n_de
attn_dist = F.softmax(scores,dim=1)
if self.attn_type == "hard":
_, argmax = attn_dist.max(1) # beamsz for each batch, select most likely german word to pay attention to
one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, argmax.data.unsqueeze(1), 1).cuda())
context = torch.bmm(one_hot.unsqueeze(1), enc_h_expand).squeeze(1)
else:
context = torch.bmm(attn_dist.unsqueeze(1), enc_h_expand).squeeze(1)
# the difference btwn hard and soft is just whether we use a one_hot or a distribution
# context is beamsz,hiddensz*n_directions
pred = self.vocab_layer(torch.cat([dec_h.squeeze(1), context], 1)) # beamsz,len(EN.vocab)
# TODO: set the columns corresponding to <pad>,<unk>,</s>,etc to 0
masterheap.update_beam(pred)
masterheap.update_hiddens(h,c)
masterheap.update_attentions(attn_dist)
masterheap.firstloop = False
return masterheap.probs,masterheap.wordlist,masterheap.attentions
示例9: train
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import unsqueeze [as 别名]
def train(ep):
model.train()
total_loss = 0
count = 0
train_idx_list = np.arange(len(X_train), dtype="int32")
np.random.shuffle(train_idx_list)
for idx in train_idx_list:
data_line = X_train[idx]
x, y = Variable(data_line[:-1]), Variable(data_line[1:])
if args.cuda:
x, y = x.cuda(), y.cuda()
optimizer.zero_grad()
output = model(x.unsqueeze(0)).squeeze(0)
loss = -torch.trace(torch.matmul(y, torch.log(output).float().t()) +
torch.matmul((1 - y), torch.log(1 - output).float().t()))
total_loss += loss.data[0]
count += output.size(0)
if args.clip > 0:
torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
loss.backward()
optimizer.step()
if idx > 0 and idx % args.log_interval == 0:
cur_loss = total_loss / count
print("Epoch {:2d} | lr {:.5f} | loss {:.5f}".format(ep, lr, cur_loss))
total_loss = 0.0
count = 0
示例10: predict
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import unsqueeze [as 别名]
def predict(self, x, attn_type = "hard"):
#predict with greedy decoding
emb = self.embedding(x)
h = Variable(torch.zeros(1, x.size(0), self.hidden_dim))
c = Variable(torch.zeros(1, x.size(0), self.hidden_dim))
enc_h, _ = self.encoder(emb, (h, c))
y = [Variable(torch.zeros(x.size(0)).long())]
self.attn = []
for t in range(x.size(1)):
emb_t = self.embedding(y[-1])
dec_h, (h, c) = self.decoder(emb_t.unsqueeze(1), (h, c))
scores = torch.bmm(enc_h, dec_h.transpose(1,2)).squeeze(2)
attn_dist = F.softmax(scores, dim = 1)
self.attn.append(attn_dist.data)
if attn_type == "hard":
_, argmax = attn_dist.max(1)
one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, argmax.data.unsqueeze(1), 1))
context = torch.bmm(one_hot.unsqueeze(1), enc_h).squeeze(1)
else:
context = torch.bmm(attn_dist.unsqueeze(1), enc_h).squeeze(1)
pred = self.vocab_layer(torch.cat([dec_h.squeeze(1), context], 1))
_, next_token = pred.max(1)
y.append(next_token)
self.attn = torch.stack(self.attn, 0).transpose(0, 1)
return torch.stack(y, 0).transpose(0, 1)
示例11: predict
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import unsqueeze [as 别名]
def predict(self, x_de, x_en):
bs = x_de.size(0)
emb_de = self.embedding_de(x_de) # bs,n_de,word_dim
emb_en = self.embedding_en(x_en) # bs,n_en,word_dim
h = Variable(torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda())
c = Variable(torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda())
enc_h, _ = self.encoder(emb_de, (h, c))
dec_h, _ = self.decoder(emb_en, (h, c))
# all the same. enc_h is bs,n_de,hiddensz*n_directions. h and c are both n_layers*n_directions,bs,hiddensz
if self.directions == 2:
enc_h = self.dim_reduce(enc_h) # bs,n_de,hiddensz
scores = torch.bmm(enc_h, dec_h.transpose(1,2))
# (bs,n_de,hiddensz) * (bs,hiddensz,n_en) = (bs,n_de,n_en)
y = [Variable(torch.cuda.LongTensor([sos_token]*bs))] # bs
self.attn = []
for t in range(x_en.size(1)-1): # iterate over english words, with teacher forcing
attn_dist = F.softmax(scores[:,:,t],dim=1) # bs,n_de
self.attn.append(attn_dist.data)
if self.attn_type == "hard":
_, argmax = attn_dist.max(1) # bs. for each batch, select most likely german word to pay attention to
one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, argmax.data.unsqueeze(1), 1).cuda())
context = torch.bmm(one_hot.unsqueeze(1), enc_h).squeeze(1)
else:
context = torch.bmm(attn_dist.unsqueeze(1), enc_h).squeeze(1)
# the difference btwn hard and soft is just whether we use a one_hot or a distribution
# context is bs,hiddensz
pred = self.vocab_layer(torch.cat([dec_h[:,t,:], context], 1)) # bs,len(EN.vocab)
_, next_token = pred.max(1) # bs
y.append(next_token)
self.attn = torch.stack(self.attn, 0).transpose(0, 1) # bs,n_en,n_de (for visualization!)
y = torch.stack(y,0).transpose(0,1) # bs,n_en
return y,self.attn
示例12: accumulate_gradient
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import unsqueeze [as 别名]
def accumulate_gradient(self, batch_sz, states, actions, rewards,
next_states, mask):
""" Compute the temporal difference error.
td_error = (r + gamma * max Q(s_,a)) - Q(s,a)
"""
states = Variable(states)
actions = Variable(actions)
rewards = Variable(rewards)
next_states = Variable(next_states, volatile=True)
# Compute Q(s, a)
q_values = self.policy(states)
q_values = q_values.gather(1, actions.unsqueeze(1))
# Compute Q(s_, a)
q_target_values = None
if next_states.is_cuda:
q_target_values = Variable(torch.zeros(batch_sz).cuda())
else:
q_target_values = Variable(torch.zeros(batch_sz))
# Bootstrap for non-terminal states
q_target_values[mask] = self.target_policy(next_states).max(1)[0][mask]
q_target_values.volatile = False # So we don't mess the huber loss
expected_q_values = (q_target_values * self.gamma) + rewards
# Compute Huber loss
loss = F.smooth_l1_loss(q_values, expected_q_values)
# Accumulate gradients
loss.backward()
示例13: forward
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import unsqueeze [as 别名]
def forward(self, x_de, x_en, update_baseline=True):
bs = x_de.size(0)
# x_de is bs,n_de. x_en is bs,n_en
emb_de = self.embedding_de(x_de) # bs,n_de,word_dim
emb_en = self.embedding_en(x_en) # bs,n_en,word_dim
h0_enc = torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda()
c0_enc = torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda()
h0_dec = torch.zeros(self.n_layers, bs, self.hidden_dim).cuda()
c0_dec = torch.zeros(self.n_layers, bs, self.hidden_dim).cuda()
# hidden vars have dimension n_layers*n_directions,bs,hiddensz
enc_h, _ = self.encoder(emb_de, (Variable(h0_enc), Variable(c0_enc)))
# enc_h is bs,n_de,hiddensz*n_directions. ordering is different from last week because batch_first=True
dec_h, _ = self.decoder(emb_en, (Variable(h0_dec), Variable(c0_dec)))
# dec_h is bs,n_en,hidden_size*n_directions
# we've gotten our encoder/decoder hidden states so we are ready to do attention
# first let's get all our scores, which we can do easily since we are using dot-prod attention
if self.directions == 2:
scores = torch.bmm(self.dim_reduce(enc_h), dec_h.transpose(1,2))
# TODO: any easier ways to reduce dimension?
else:
scores = torch.bmm(enc_h, dec_h.transpose(1,2))
# (bs,n_de,hiddensz*n_directions) * (bs,hiddensz*n_directions,n_en) = (bs,n_de,n_en)
reinforce_loss = 0 # we only use this variable for hard attention
loss = 0
avg_reward = 0
# we just iterate to dec_h.size(1)-1, since there's </s> at the end of each sentence
for t in range(dec_h.size(1)-1): # iterate over english words, with teacher forcing
attn_dist = F.softmax(scores[:, :, t],dim=1) # bs,n_de. these are the alphas (attention scores for each german word)
if self.attn_type == "hard":
cat = torch.distributions.Categorical(attn_dist)
attn_samples = cat.sample() # bs. each element is a sample from categorical distribution
one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, attn_samples.data.unsqueeze(1), 1).cuda()) # bs,n_de
# made a bunch of one-hot vectors
context = torch.bmm(one_hot.unsqueeze(1), enc_h).squeeze(1)
# now we use the one-hot vectors to select correct hidden vectors from enc_h
# (bs,1,n_de) * (bs,n_de,hiddensz*n_directions) = (bs,1,hiddensz*n_directions). squeeze to bs,hiddensz*n_directions
else:
context = torch.bmm(attn_dist.unsqueeze(1), enc_h).squeeze(1) # same dimensions
# (bs,1,n_de) * (bs,n_de,hiddensz*n_directions) = (bs,1,hiddensz*n_directions)
# context is bs,hidden_size*n_directions
# the rnn output and the context together make the decoder "hidden state", which is bs,2*hidden_size*n_directions
pred = self.vocab_layer(torch.cat([dec_h[:,t,:], context], 1)) # bs,len(EN.vocab)
y = x_en[:, t+1] # bs. these are our labels
no_pad = (y != pad_token) # exclude english padding tokens
reward = torch.gather(pred, 1, y.unsqueeze(1)) # bs,1
# reward[i,1] = pred[i,y[i]]. this gets log prob of correct word for each batch. similar to -crossentropy
reward = reward.squeeze(1)[no_pad] # less than bs
if self.attn_type == "hard":
reinforce_loss -= (cat.log_prob(attn_samples[no_pad]) * (reward-self.baseline).detach()).sum()
# reinforce rule (just read the formula), with special baseline
loss -= reward.sum() # minimizing loss is maximizing reward
no_pad_total = (x_en[:,1:] != pad_token).data.sum() # TODO: i think this is right, right?
loss /= no_pad_total
reinforce_loss /= no_pad_total
avg_reward = -loss.data[0]
if update_baseline: # update baseline as a moving average
self.baseline = Variable(0.95*self.baseline.data + 0.05*avg_reward)
return loss, reinforce_loss,avg_reward
示例14: evaluate
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import unsqueeze [as 别名]
def evaluate(encoder, decoder, in_lang, max_length=MAX_LENGTH):
if use_cuda:
in_lang = in_lang.cuda()
input_variable = Variable(in_lang)
input_variable = input_variable.unsqueeze(0)
input_length = input_variable.size(1)
encoder_hidden = encoder.initHidden()
encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size))
encoder_outputs = encoder_outputs.cuda() if use_cuda else encoder_outputs
for ei in range(input_length):
encoder_output, encoder_hidden = encoder(input_variable[:, ei],
encoder_hidden)
encoder_outputs[ei] = encoder_output[0][0]
decoder_input = Variable(torch.LongTensor([[SOS_token]])) # SOS
decoder_input = decoder_input.cuda() if use_cuda else decoder_input
decoder_hidden = encoder_hidden
decoded_words = []
decoder_attentions = torch.zeros(max_length, max_length)
if use_attn:
for di in range(max_length):
decoder_output, decoder_hidden, decoder_attention = decoder(
decoder_input, decoder_hidden, encoder_outputs)
decoder_attentions[di] = decoder_attention.data
topv, topi = decoder_output.data.topk(1)
ni = topi[0][0]
if ni == EOS_token:
decoded_words.append('<EOS>')
break
else:
decoded_words.append(lang_dataset.output_lang.index2word[ni])
decoder_input = Variable(torch.LongTensor([[ni]]))
decoder_input = decoder_input.cuda() if use_cuda else decoder_input
else:
for di in range(max_length):
decoder_output, decoder_hidden = decoder(decoder_input,
decoder_hidden)
topv, topi = decoder_output.data.topk(1)
ni = topi[0][0]
if ni == EOS_token:
decoded_words.append('<EOS>')
break
else:
decoded_words.append(lang_dataset.output_lang.index2word[ni])
decoder_input = Variable(torch.LongTensor([[ni]]))
decoder_input = decoder_input.cuda() if use_cuda else decoder_input
if use_attn:
return decoded_words, decoder_attentions[:di + 1]
else:
return decoded_words
示例15: fwd_merge
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import unsqueeze [as 别名]
def fwd_merge(self, Inputs_N, target, Phis, Bs, lp,
batch, depth, mode='train', epoch=0):
# Flow backwards
Phis, Bs, Inputs_N = Phis[::-1], Bs[::-1], Inputs_N[::-1]
length = self.merge.n
perm = (torch.range(0.0, length)
.unsqueeze(0).expand(self.batch_size, length + 1))
perm = Variable(perm, requires_grad=False).type(dtype_l)
ind = perm[:, :-1].clone()
prob_matrix = Variable(torch.eye(length + 1)).type(dtype)
prob_matrix = prob_matrix.unsqueeze(0).expand(self.batch_size,
length + 1, length + 1)
# concatenate pad_token to input
pad_token = (self.merge.pad_token[:-1].unsqueeze(0)
.expand(self.batch_size, 1, self.input_size))
input = torch.cat((pad_token, Inputs_N[0]), 1)
phis = Phis[0]
input_target = torch.cat((pad_token, Inputs_N[-1]), 1)
input_scale = input
input_norm = input_scale
Perms = [perm]
Points = [input_scale]
for i, scale in enumerate(range(depth)):
if scale < depth - 1:
# fine scales
prob_sc = self.merge(input_scale, phis)
input_norm = torch.cat((pad_token, Inputs_N[scale + 1]), 1)
phis = Phis[scale + 1]
prob_sc, ind, phis, _ = self.eliminate_rows(prob_sc, ind, phis)
comb = self.combine_matrices(prob_matrix, prob_sc, perm,
last=False)
prob_matrix, _, perm = comb
# postprocess before feeding to next scale
hard_out, soft_out = self.outputs(input_norm,
prob_matrix, perm)
input_scale = hard_out
else:
# coarsest scale
if mode == 'test':
prob_sc = self.merge(input_scale, phis,
input_target=None,
target=None)
else:
prob_sc = self.merge(input_scale, phis,
input_target=input_target,
target=target)
comb = self.combine_matrices(prob_matrix, prob_sc, perm,
last=True)
prob_matrix, prob_sc, perm = comb
hard_out, soft_out = self.outputs(input, prob_matrix, perm)
loss, pg_loss = self.merge.compute_loss(prob_matrix, target,
lp=lp)
Perms.append(perm)
Points.append(input_norm)
return loss, pg_loss, Perms