本文整理汇总了Python中torch.zeros_like函数的典型用法代码示例。如果您正苦于以下问题:Python zeros_like函数的具体用法?Python zeros_like怎么用?Python zeros_like使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了zeros_like函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self, block, layers, c_out=1000):
self.inplanes = 64
super(XResNet, self).__init__()
self.conv1 = conv2d(3, 32, 2)
self.conv2 = conv2d(32, 32, 1)
self.conv3 = conv2d(32, 64, 1)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Linear(512 * block.expansion, c_out)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
for m in self.modules():
if isinstance(m, BasicBlock): m.bn2.weight = nn.Parameter(torch.zeros_like(m.bn2.weight))
if isinstance(m, Bottleneck): m.bn3.weight = nn.Parameter(torch.zeros_like(m.bn3.weight))
if isinstance(m, nn.Linear): m.weight.data.normal_(0, 0.01)
示例2: scale_tensor
def scale_tensor(tensor, scale):
"""
Safely scale a tensor without increasing its ``.shape``.
This avoids NANs by assuming ``inf * 0 = 0 * inf = 0``.
"""
if isinstance(tensor, numbers.Number):
if isinstance(scale, numbers.Number):
return tensor * scale
elif tensor == 0:
return torch.zeros_like(scale)
elif tensor == 1:
return scale
else:
return scale
if isinstance(scale, numbers.Number):
if scale == 0:
return torch.zeros_like(tensor)
elif scale == 1:
return tensor
else:
return tensor * scale
result = tensor * scale
result[(scale == 0).expand_as(result)] = 0 # avoid NANs
if result.shape != tensor.shape:
raise ValueError("Broadcasting error: scale is incompatible with tensor: "
"{} vs {}".format(scale.shape, tensor.shape))
return result
示例3: step
def step(self, closure=None):
"""Performs a single optimization step.
Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
loss = None
if closure is not None:
loss = closure()
for group in self.param_groups:
for p in group['params']:
if p.grad is None:
continue
grad = p.grad.data
if grad.is_sparse:
raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
state = self.state[p]
# State initialization
if len(state) == 0:
state['step'] = 0
# Exponential moving average of gradient values
state['exp_avg'] = torch.zeros_like(p.data)
# Exponential moving average of squared gradient values
state['exp_avg_sq'] = torch.zeros_like(p.data)
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
beta1, beta2 = group['betas']
state['step'] += 1
if group['weight_decay'] != 0:
grad = grad.add(group['weight_decay'], p.data)
if state['step'] > 1:
prev_bias_correction1 = 1 - beta1 ** (state['step'] - 1)
prev_bias_correction2 = 1 - beta2 ** (state['step'] - 1)
# Hypergradient for Adam:
h = torch.dot(grad.view(-1), torch.div(exp_avg, exp_avg_sq.sqrt().add_(group['eps'])).view(-1)) * math.sqrt(prev_bias_correction2) / prev_bias_correction1
# Hypergradient descent of the learning rate:
tmp = group['hypergrad_lr'] * h
group['lr'] += tmp.double().cpu()
# Decay the first and second moment running average coefficient
exp_avg.mul_(beta1).add_(1 - beta1, grad)
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
denom = exp_avg_sq.sqrt().add_(group['eps'])
bias_correction1 = 1 - beta1 ** state['step']
bias_correction2 = 1 - beta2 ** state['step']
step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
p.data.addcdiv_(-step_size, exp_avg, denom)
return loss
示例4: step
def step(self, closure=None):
"""Performs a single optimization step.
Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
loss = None
if closure is not None:
loss = closure()
for group in self.param_groups:
for p in group['params']:
if p.grad is None:
continue
grad = p.grad.data
if grad.is_sparse:
raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
state = self.state[p]
# State initialization
if len(state) == 0:
state['step'] = 0
# Exponential moving average of gradient values
state['exp_avg'] = torch.zeros_like(p.data)
# Exponential moving average of squared gradient values
state['exp_avg_sq'] = torch.zeros_like(p.data)
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
beta1, beta2 = group['b1'], group['b2']
state['step'] += 1
# Add grad clipping
if group['max_grad_norm'] > 0:
clip_grad_norm_(p, group['max_grad_norm'])
# Decay the first and second moment running average coefficient
exp_avg.mul_(beta1).add_(1 - beta1, grad)
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
denom = exp_avg_sq.sqrt().add_(group['e'])
bias_correction1 = 1 - beta1 ** state['step']
bias_correction2 = 1 - beta2 ** state['step']
schedule_fct = SCHEDULES[group['schedule']]
lr_scheduled = group['lr'] * schedule_fct(state['step']/group['t_total'], group['warmup'])
step_size = lr_scheduled * math.sqrt(bias_correction2) / bias_correction1
p.data.addcdiv_(-step_size, exp_avg, denom)
# Add weight decay at the end (fixed version)
if (len(p.size()) > 1 or group['vector_l2']) and group['l2'] > 0:
p.data.add_(-lr_scheduled * group['l2'], p.data)
return loss
示例5: manual_forget_mult
def manual_forget_mult(x, f, h=None, batch_first=True, backward=False):
if batch_first: x,f = x.transpose(0,1),f.transpose(0,1)
out = torch.zeros_like(x)
prev = h if h is not None else torch.zeros_like(out[0])
idx_range = range(x.shape[0]-1,-1,-1) if backward else range(x.shape[0])
for i in idx_range:
out[i] = f[i] * x[i] + (1-f[i]) * prev
prev = out[i]
if batch_first: out = out.transpose(0,1)
return out
示例6: step
def step(self, closure=None):
"""Performs a single optimization step.
Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
loss = None
if closure is not None:
loss = closure()
for group in self.param_groups:
for p in group['params']:
if p.grad is None:
continue
grad = p.grad.data
if grad.is_sparse:
raise RuntimeError('RMSprop does not support sparse gradients')
state = self.state[p]
# State initialization
if len(state) == 0:
state['step'] = 0
state['square_avg'] = torch.zeros_like(p.data)
if group['momentum'] > 0:
state['momentum_buffer'] = torch.zeros_like(p.data)
if group['centered']:
state['grad_avg'] = torch.zeros_like(p.data)
square_avg = state['square_avg']
alpha = group['alpha']
state['step'] += 1
if group['weight_decay'] != 0:
grad = grad.add(group['weight_decay'], p.data)
square_avg.mul_(alpha).addcmul_(1 - alpha, grad, grad)
if group['centered']:
grad_avg = state['grad_avg']
grad_avg.mul_(alpha).add_(1 - alpha, grad)
avg = square_avg.addcmul(-1, grad_avg, grad_avg).sqrt().add_(group['eps'])
else:
avg = square_avg.sqrt().add_(group['eps'])
if group['momentum'] > 0:
buf = state['momentum_buffer']
buf.mul_(group['momentum']).addcdiv_(grad, avg)
p.data.add_(-group['lr'], buf)
else:
p.data.addcdiv_(-group['lr'], grad, avg)
return loss
示例7: step
def step(self, closure=None):
"""Performs a single optimization step.
Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
assert len(self.param_groups) == 1
loss = None
if closure is not None:
loss = closure()
group = self.param_groups[0]
weight_decay = group['weight_decay']
momentum = group['momentum']
dampening = group['dampening']
nesterov = group['nesterov']
grad = self._gather_flat_grad_with_weight_decay(weight_decay)
# NOTE: SGDHD has only global state, but we register it as state for
# the first param, because this helps with casting in load_state_dict
state = self.state[self._params[0]]
# State initialization
if len(state) == 0:
state['grad_prev'] = torch.zeros_like(grad)
grad_prev = state['grad_prev']
# Hypergradient for SGD
h = torch.dot(grad, grad_prev)
# Hypergradient descent of the learning rate:
group['lr'] += group['hypergrad_lr'] * h
if momentum != 0:
if 'momentum_buffer' not in state:
buf = state['momentum_buffer'] = torch.zeros_like(grad)
buf.mul_(momentum).add_(grad)
else:
buf = state['momentum_buffer']
buf.mul_(momentum).add_(1 - dampening, grad)
if nesterov:
grad.add_(momentum, buf)
else:
grad = buf
state['grad_prev'] = grad
self._add_grad(-group['lr'], grad)
return loss
示例8: forward
def forward(self, x):
x = torch.tanh(self.fc1(x))
x = torch.tanh(self.fc2(x))
mu = self.fc3(x)
logstd = torch.zeros_like(mu)
std = torch.exp(logstd)
return mu, std
示例9: testOne
def testOne(self):
"""
Equal duty cycle, boost factor 0, k=4, batch size 1
"""
x = self.x
ctx = TestContext()
result = KWinnersCNN.forward(ctx, x, self.dutyCycle, k=4, boostStrength=0.0)
expected = torch.zeros_like(x)
expected[0, 0, 1, 0] = 1.1
expected[0, 0, 1, 1] = 1.2
expected[0, 1, 0, 1] = 1.2
expected[0, 2, 1, 0] = 1.3
self.assertEqual(result.shape, expected.shape)
numCorrect = (result == expected).sum()
self.assertEqual(numCorrect, result.reshape(-1).size()[0])
indices = ctx.saved_tensors[0].reshape(-1)
expectedIndices = torch.tensor([2, 3, 10, 5])
numCorrect = (indices == expectedIndices).sum()
self.assertEqual(numCorrect, 4)
# Test that gradient values are in the right places, that their sum is
# equal, and that they have exactly the right number of nonzeros
grad_x, _, _, _ = KWinnersCNN.backward(ctx, self.gradient)
grad_x = grad_x.reshape(-1)
self.assertEqual(
(grad_x[indices] == self.gradient.reshape(-1)[indices]).sum(), 4)
self.assertAlmostEqual(
grad_x.sum(), self.gradient.reshape(-1)[indices].sum(), places=4)
self.assertEqual(len(grad_x.nonzero()), 4)
示例10: testDutyCycleUpdate
def testDutyCycleUpdate(self):
"""
Start with equal duty cycle, boost factor=0, k=4, batch size=2
"""
x = self.x2
expected = torch.zeros_like(x)
expected[0, 0, 1, 0] = 1.1
expected[0, 0, 1, 1] = 1.2
expected[0, 1, 0, 1] = 1.2
expected[0, 2, 1, 0] = 1.3
expected[1, 0, 0, 0] = 1.4
expected[1, 1, 0, 0] = 1.5
expected[1, 1, 0, 1] = 1.6
expected[1, 2, 1, 1] = 1.7
dutyCycle = torch.zeros((1, 3, 1, 1))
dutyCycle[:] = 1.0 / 3.0
updateDutyCycleCNN(expected, dutyCycle, 2, 2)
newDuty = torch.tensor([1.5000, 1.5000, 1.0000]) / 4.0
diff = (dutyCycle.reshape(-1) - newDuty).abs().sum()
self.assertLessEqual(diff, 0.001)
dutyCycle[:] = 1.0 / 3.0
updateDutyCycleCNN(expected, dutyCycle, 4, 4)
newDuty = torch.tensor([0.3541667, 0.3541667, 0.2916667])
diff = (dutyCycle.reshape(-1) - newDuty).abs().sum()
self.assertLessEqual(diff, 0.001)
示例11: testFour
def testFour(self):
"""
Equal duty cycle, boost factor=0, k=3, batch size=2
"""
x = self.x2
ctx = TestContext()
result = KWinnersCNN.forward(ctx, x, self.dutyCycle, k=3, boostStrength=0.0)
expected = torch.zeros_like(x)
expected[0, 0, 1, 1] = 1.2
expected[0, 1, 0, 1] = 1.2
expected[0, 2, 1, 0] = 1.3
expected[1, 1, 0, 0] = 1.5
expected[1, 1, 0, 1] = 1.6
expected[1, 2, 1, 1] = 1.7
self.assertEqual(result.shape, expected.shape)
numCorrect = (result == expected).sum()
self.assertEqual(numCorrect, result.reshape(-1).size()[0])
indices = ctx.saved_tensors[0]
expectedIndices = torch.tensor([[3, 10, 5], [4, 5, 11]])
numCorrect = (indices == expectedIndices).sum()
self.assertEqual(numCorrect, 6)
# Test that gradient values are in the right places, that their sum is
# equal, and that they have exactly the right number of nonzeros
out_grad, _, _, _ = KWinnersCNN.backward(ctx, self.gradient2)
out_grad = out_grad.reshape(2, -1)
in_grad = self.gradient2.reshape(2, -1)
self.assertEqual((out_grad == in_grad).sum(), 6)
self.assertEqual(len(out_grad.nonzero()), 6)
示例12: forward
def forward(self, input_ids, token_type_ids=None, attention_mask=None):
if attention_mask is None:
attention_mask = torch.ones_like(input_ids)
if token_type_ids is None:
token_type_ids = torch.zeros_like(input_ids)
# We create a 3D attention mask from a 2D tensor mask.
# Sizes are [batch_size, 1, 1, to_seq_length]
# So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
# this attention mask is more simple than the triangular masking of causal attention
# used in OpenAI GPT, we just need to prepare the broadcast dimension here.
extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
# Since attention_mask is 1.0 for positions we want to attend and 0.0 for
# masked positions, this operation will create a tensor which is 0.0 for
# positions we want to attend and -10000.0 for masked positions.
# Since we are adding it to the raw scores before the softmax, this is
# effectively the same as removing these entirely.
extended_attention_mask = extended_attention_mask.to(dtype=next(self.parameters()).dtype) # fp16 compatibility
extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
embedding_output = self.embeddings(input_ids, token_type_ids)
all_encoder_layers = self.encoder(embedding_output, extended_attention_mask)
sequence_output = all_encoder_layers[-1]
pooled_output = self.pooler(sequence_output)
return all_encoder_layers, pooled_output
示例13: sample_conditional_a
def sample_conditional_a(self, resid_image, var_so_far, pixel_1d):
is_on = (pixel_1d < (self.n_discrete_latent - 1)).float()
# pass through galaxy encoder
pixel_2d = self.one_galaxy_vae.pixel_1d_to_2d(pixel_1d)
z_mean, z_var = self.one_galaxy_vae.enc(resid_image, pixel_2d)
# sample z
q_z = Normal(z_mean, z_var.sqrt())
z_sample = q_z.rsample()
# kl term for continuous latent vars
log_q_z = q_z.log_prob(z_sample).sum(1)
p_z = Normal(torch.zeros_like(z_sample), torch.ones_like(z_sample))
log_p_z = p_z.log_prob(z_sample).sum(1)
kl_z = is_on * (log_q_z - log_p_z)
# run through decoder
recon_mean, recon_var = self.one_galaxy_vae.dec(is_on, pixel_2d, z_sample)
# NOTE: we will have to the recon means once we do more detections
# recon_means = recon_mean + image_so_far
# recon_vars = recon_var + var_so_far
return recon_mean, recon_var, is_on, kl_z
示例14: forward
def forward(self, X: Tensor) -> Tensor:
r"""Evaluate Expected Improvement on the candidate set X.
Args:
X: A `b1 x ... bk x 1 x d`-dim batched tensor of `d`-dim design points.
Expected Improvement is computed for each point individually,
i.e., what is considered are the marginal posteriors, not the
joint.
Returns:
A `b1 x ... bk`-dim tensor of Expected Improvement values at the
given design points `X`.
"""
self.best_f = self.best_f.to(X)
posterior = self.model.posterior(X)
self._validate_single_output_posterior(posterior)
mean = posterior.mean
# deal with batch evaluation and broadcasting
view_shape = mean.shape[:-2] if mean.dim() >= X.dim() else X.shape[:-2]
mean = mean.view(view_shape)
sigma = posterior.variance.clamp_min(1e-9).sqrt().view(view_shape)
u = (mean - self.best_f.expand_as(mean)) / sigma
if not self.maximize:
u = -u
normal = Normal(torch.zeros_like(u), torch.ones_like(u))
ucdf = normal.cdf(u)
updf = torch.exp(normal.log_prob(u))
ei = sigma * (updf + u * ucdf)
return ei
示例15: predict2
def predict2(self, x_de, beamsz, gen_len):
emb_de = self.embedding_de(x_de) # "batch size",n_de,word_dim, but "batch size" is 1 in this case!
h0 = Variable(torch.zeros(self.n_layers*self.directions, 1, self.hidden_dim).cuda())
c0 = Variable(torch.zeros(self.n_layers*self.directions, 1, self.hidden_dim).cuda())
enc_h, _ = self.encoder(emb_de, (h0, c0))
# since enc batch size=1, enc_h is 1,n_de,hiddensz*n_directions
if self.directions == 2:
enc_h = self.dim_reduce(enc_h) # 1,n_de,hiddensz
masterheap = CandList(self.n_layers,self.hidden_dim,enc_h.size(1),beamsz)
# in the following loop, beamsz is length 1 for first iteration, length true beamsz (100) afterward
for i in range(gen_len):
prev = masterheap.get_prev() # beamsz
emb_t = self.embedding_en(prev) # embed the last thing we generated. beamsz,word_dim
enc_h_expand = enc_h.expand(prev.size(0),-1,-1) # beamsz,n_de,hiddensz
h, c = masterheap.get_hiddens() # (n_layers,beamsz,hiddensz),(n_layers,beamsz,hiddensz)
dec_h, (h, c) = self.decoder(emb_t.unsqueeze(1), (h, c)) # dec_h is beamsz,1,hiddensz (batch_first=True)
scores = torch.bmm(enc_h_expand, dec_h.transpose(1,2)).squeeze(2)
# (beamsz,n_de,hiddensz) * (beamsz,hiddensz,1) = (beamsz,n_de,1). squeeze to beamsz,n_de
attn_dist = F.softmax(scores,dim=1)
if self.attn_type == "hard":
_, argmax = attn_dist.max(1) # beamsz for each batch, select most likely german word to pay attention to
one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, argmax.data.unsqueeze(1), 1).cuda())
context = torch.bmm(one_hot.unsqueeze(1), enc_h_expand).squeeze(1)
else:
context = torch.bmm(attn_dist.unsqueeze(1), enc_h_expand).squeeze(1)
# the difference btwn hard and soft is just whether we use a one_hot or a distribution
# context is beamsz,hiddensz*n_directions
pred = self.vocab_layer(torch.cat([dec_h.squeeze(1), context], 1)) # beamsz,len(EN.vocab)
# TODO: set the columns corresponding to <pad>,<unk>,</s>,etc to 0
masterheap.update_beam(pred)
masterheap.update_hiddens(h,c)
masterheap.update_attentions(attn_dist)
masterheap.firstloop = False
return masterheap.probs,masterheap.wordlist,masterheap.attentions