当前位置: 首页>>代码示例>>Python>>正文


Python torch.zeros_like函数代码示例

本文整理汇总了Python中torch.zeros_like函数的典型用法代码示例。如果您正苦于以下问题:Python zeros_like函数的具体用法?Python zeros_like怎么用?Python zeros_like使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了zeros_like函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

    def __init__(self, block, layers, c_out=1000):
        self.inplanes = 64
        super(XResNet, self).__init__()
        self.conv1 = conv2d(3, 32, 2)
        self.conv2 = conv2d(32, 32, 1)
        self.conv3 = conv2d(32, 64, 1)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(512 * block.expansion, c_out)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        for m in self.modules():
            if isinstance(m, BasicBlock): m.bn2.weight = nn.Parameter(torch.zeros_like(m.bn2.weight))
            if isinstance(m, Bottleneck): m.bn3.weight = nn.Parameter(torch.zeros_like(m.bn3.weight))
            if isinstance(m, nn.Linear): m.weight.data.normal_(0, 0.01)
开发者ID:SiddharthTiwari,项目名称:fastai,代码行数:25,代码来源:xresnet2.py

示例2: scale_tensor

def scale_tensor(tensor, scale):
    """
    Safely scale a tensor without increasing its ``.shape``.
    This avoids NANs by assuming ``inf * 0 = 0 * inf = 0``.
    """
    if isinstance(tensor, numbers.Number):
        if isinstance(scale, numbers.Number):
            return tensor * scale
        elif tensor == 0:
            return torch.zeros_like(scale)
        elif tensor == 1:
            return scale
        else:
            return scale
    if isinstance(scale, numbers.Number):
        if scale == 0:
            return torch.zeros_like(tensor)
        elif scale == 1:
            return tensor
        else:
            return tensor * scale
    result = tensor * scale
    result[(scale == 0).expand_as(result)] = 0  # avoid NANs
    if result.shape != tensor.shape:
        raise ValueError("Broadcasting error: scale is incompatible with tensor: "
                         "{} vs {}".format(scale.shape, tensor.shape))
    return result
开发者ID:lewisKit,项目名称:pyro,代码行数:27,代码来源:util.py

示例3: step

    def step(self, closure=None):
        """Performs a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data
                if grad.is_sparse:
                    raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')

                state = self.state[p]

                # State initialization
                if len(state) == 0:
                    state['step'] = 0
                    # Exponential moving average of gradient values
                    state['exp_avg'] = torch.zeros_like(p.data)
                    # Exponential moving average of squared gradient values
                    state['exp_avg_sq'] = torch.zeros_like(p.data)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                state['step'] += 1

                if group['weight_decay'] != 0:
                    grad = grad.add(group['weight_decay'], p.data)

                if state['step'] > 1:
                    prev_bias_correction1 = 1 - beta1 ** (state['step'] - 1)
                    prev_bias_correction2 = 1 - beta2 ** (state['step'] - 1)
                    # Hypergradient for Adam:
                    h = torch.dot(grad.view(-1), torch.div(exp_avg, exp_avg_sq.sqrt().add_(group['eps'])).view(-1)) * math.sqrt(prev_bias_correction2) / prev_bias_correction1
                    # Hypergradient descent of the learning rate:
                    tmp = group['hypergrad_lr'] * h
                    group['lr'] += tmp.double().cpu()

                # Decay the first and second moment running average coefficient
                exp_avg.mul_(beta1).add_(1 - beta1, grad)
                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                denom = exp_avg_sq.sqrt().add_(group['eps'])

                bias_correction1 = 1 - beta1 ** state['step']
                bias_correction2 = 1 - beta2 ** state['step']
                step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1

                p.data.addcdiv_(-step_size, exp_avg, denom)

        return loss
开发者ID:chrinide,项目名称:py_ml_utils,代码行数:58,代码来源:adamhd.py

示例4: step

    def step(self, closure=None):
        """Performs a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data
                if grad.is_sparse:
                    raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')

                state = self.state[p]

                # State initialization
                if len(state) == 0:
                    state['step'] = 0
                    # Exponential moving average of gradient values
                    state['exp_avg'] = torch.zeros_like(p.data)
                    # Exponential moving average of squared gradient values
                    state['exp_avg_sq'] = torch.zeros_like(p.data)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['b1'], group['b2']

                state['step'] += 1

                # Add grad clipping
                if group['max_grad_norm'] > 0:
                    clip_grad_norm_(p, group['max_grad_norm'])

                # Decay the first and second moment running average coefficient
                exp_avg.mul_(beta1).add_(1 - beta1, grad)
                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                denom = exp_avg_sq.sqrt().add_(group['e'])

                bias_correction1 = 1 - beta1 ** state['step']
                bias_correction2 = 1 - beta2 ** state['step']

                schedule_fct = SCHEDULES[group['schedule']]
                lr_scheduled = group['lr'] * schedule_fct(state['step']/group['t_total'], group['warmup'])
                step_size = lr_scheduled * math.sqrt(bias_correction2) / bias_correction1

                p.data.addcdiv_(-step_size, exp_avg, denom)

                # Add weight decay at the end (fixed version)
                if (len(p.size()) > 1 or group['vector_l2']) and group['l2'] > 0:
                    p.data.add_(-lr_scheduled * group['l2'], p.data)

        return loss
开发者ID:cclauss,项目名称:pytorch-openai-transformer-lm,代码行数:57,代码来源:opt.py

示例5: manual_forget_mult

def manual_forget_mult(x, f, h=None, batch_first=True, backward=False):
    if batch_first: x,f = x.transpose(0,1),f.transpose(0,1)
    out = torch.zeros_like(x)
    prev = h if h is not None else torch.zeros_like(out[0])
    idx_range = range(x.shape[0]-1,-1,-1) if backward else range(x.shape[0])
    for i in idx_range:
        out[i] = f[i] * x[i] + (1-f[i]) * prev
        prev = out[i]
    if batch_first: out = out.transpose(0,1)
    return out
开发者ID:SiddharthTiwari,项目名称:fastai,代码行数:10,代码来源:test_text_qrnn.py

示例6: step

    def step(self, closure=None):
        """Performs a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data
                if grad.is_sparse:
                    raise RuntimeError('RMSprop does not support sparse gradients')
                state = self.state[p]

                # State initialization
                if len(state) == 0:
                    state['step'] = 0
                    state['square_avg'] = torch.zeros_like(p.data)
                    if group['momentum'] > 0:
                        state['momentum_buffer'] = torch.zeros_like(p.data)
                    if group['centered']:
                        state['grad_avg'] = torch.zeros_like(p.data)

                square_avg = state['square_avg']
                alpha = group['alpha']

                state['step'] += 1

                if group['weight_decay'] != 0:
                    grad = grad.add(group['weight_decay'], p.data)

                square_avg.mul_(alpha).addcmul_(1 - alpha, grad, grad)

                if group['centered']:
                    grad_avg = state['grad_avg']
                    grad_avg.mul_(alpha).add_(1 - alpha, grad)
                    avg = square_avg.addcmul(-1, grad_avg, grad_avg).sqrt().add_(group['eps'])
                else:
                    avg = square_avg.sqrt().add_(group['eps'])

                if group['momentum'] > 0:
                    buf = state['momentum_buffer']
                    buf.mul_(group['momentum']).addcdiv_(grad, avg)
                    p.data.add_(-group['lr'], buf)
                else:
                    p.data.addcdiv_(-group['lr'], grad, avg)

        return loss
开发者ID:Jsmilemsj,项目名称:pytorch,代码行数:54,代码来源:rmsprop.py

示例7: step

    def step(self, closure=None):
        """Performs a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        assert len(self.param_groups) == 1

        loss = None
        if closure is not None:
            loss = closure()

        group = self.param_groups[0]
        weight_decay = group['weight_decay']
        momentum = group['momentum']
        dampening = group['dampening']
        nesterov = group['nesterov']

        grad = self._gather_flat_grad_with_weight_decay(weight_decay)

        # NOTE: SGDHD has only global state, but we register it as state for
        # the first param, because this helps with casting in load_state_dict
        state = self.state[self._params[0]]
        # State initialization
        if len(state) == 0:
            state['grad_prev'] = torch.zeros_like(grad)

        grad_prev = state['grad_prev']
        # Hypergradient for SGD
        h = torch.dot(grad, grad_prev)
        # Hypergradient descent of the learning rate:
        group['lr'] += group['hypergrad_lr'] * h

        if momentum != 0:
            if 'momentum_buffer' not in state:
                buf = state['momentum_buffer'] = torch.zeros_like(grad)
                buf.mul_(momentum).add_(grad)
            else:
                buf = state['momentum_buffer']
                buf.mul_(momentum).add_(1 - dampening, grad)
            if nesterov:
                grad.add_(momentum, buf)
            else:
                grad = buf

        state['grad_prev'] = grad

        self._add_grad(-group['lr'], grad)

        return loss
开发者ID:chrinide,项目名称:py_ml_utils,代码行数:51,代码来源:sgdhd.py

示例8: forward

 def forward(self, x):
     x = torch.tanh(self.fc1(x))
     x = torch.tanh(self.fc2(x))
     mu = self.fc3(x)
     logstd = torch.zeros_like(mu)
     std = torch.exp(logstd)
     return mu, std
开发者ID:lanseyege,项目名称:lets-do-irl,代码行数:7,代码来源:model.py

示例9: testOne

  def testOne(self):
    """
    Equal duty cycle, boost factor 0, k=4, batch size 1
    """
    x = self.x

    ctx = TestContext()

    result = KWinnersCNN.forward(ctx, x, self.dutyCycle, k=4, boostStrength=0.0)

    expected = torch.zeros_like(x)
    expected[0, 0, 1, 0] = 1.1
    expected[0, 0, 1, 1] = 1.2
    expected[0, 1, 0, 1] = 1.2
    expected[0, 2, 1, 0] = 1.3

    self.assertEqual(result.shape, expected.shape)

    numCorrect = (result == expected).sum()
    self.assertEqual(numCorrect, result.reshape(-1).size()[0])

    indices = ctx.saved_tensors[0].reshape(-1)
    expectedIndices = torch.tensor([2, 3, 10, 5])
    numCorrect = (indices == expectedIndices).sum()
    self.assertEqual(numCorrect, 4)

    # Test that gradient values are in the right places, that their sum is
    # equal, and that they have exactly the right number of nonzeros
    grad_x, _, _, _ = KWinnersCNN.backward(ctx, self.gradient)
    grad_x = grad_x.reshape(-1)
    self.assertEqual(
      (grad_x[indices] == self.gradient.reshape(-1)[indices]).sum(), 4)
    self.assertAlmostEqual(
      grad_x.sum(), self.gradient.reshape(-1)[indices].sum(), places=4)
    self.assertEqual(len(grad_x.nonzero()), 4)
开发者ID:rhyolight,项目名称:nupic.research,代码行数:35,代码来源:k_winners_cnn_test.py

示例10: testDutyCycleUpdate

  def testDutyCycleUpdate(self):
    """
    Start with equal duty cycle, boost factor=0, k=4, batch size=2
    """
    x = self.x2

    expected = torch.zeros_like(x)
    expected[0, 0, 1, 0] = 1.1
    expected[0, 0, 1, 1] = 1.2
    expected[0, 1, 0, 1] = 1.2
    expected[0, 2, 1, 0] = 1.3
    expected[1, 0, 0, 0] = 1.4
    expected[1, 1, 0, 0] = 1.5
    expected[1, 1, 0, 1] = 1.6
    expected[1, 2, 1, 1] = 1.7

    dutyCycle = torch.zeros((1, 3, 1, 1))
    dutyCycle[:] = 1.0 / 3.0
    updateDutyCycleCNN(expected, dutyCycle, 2, 2)
    newDuty = torch.tensor([1.5000, 1.5000, 1.0000]) / 4.0
    diff = (dutyCycle.reshape(-1) - newDuty).abs().sum()
    self.assertLessEqual(diff, 0.001)

    dutyCycle[:] = 1.0 / 3.0
    updateDutyCycleCNN(expected, dutyCycle, 4, 4)
    newDuty = torch.tensor([0.3541667, 0.3541667, 0.2916667])
    diff = (dutyCycle.reshape(-1) - newDuty).abs().sum()
    self.assertLessEqual(diff, 0.001)
开发者ID:rhyolight,项目名称:nupic.research,代码行数:28,代码来源:k_winners_cnn_test.py

示例11: testFour

  def testFour(self):
    """
    Equal duty cycle, boost factor=0, k=3, batch size=2
    """
    x = self.x2

    ctx = TestContext()

    result = KWinnersCNN.forward(ctx, x, self.dutyCycle, k=3, boostStrength=0.0)

    expected = torch.zeros_like(x)
    expected[0, 0, 1, 1] = 1.2
    expected[0, 1, 0, 1] = 1.2
    expected[0, 2, 1, 0] = 1.3
    expected[1, 1, 0, 0] = 1.5
    expected[1, 1, 0, 1] = 1.6
    expected[1, 2, 1, 1] = 1.7

    self.assertEqual(result.shape, expected.shape)

    numCorrect = (result == expected).sum()
    self.assertEqual(numCorrect, result.reshape(-1).size()[0])

    indices = ctx.saved_tensors[0]
    expectedIndices = torch.tensor([[3, 10, 5], [4, 5, 11]])
    numCorrect = (indices == expectedIndices).sum()
    self.assertEqual(numCorrect, 6)

    # Test that gradient values are in the right places, that their sum is
    # equal, and that they have exactly the right number of nonzeros
    out_grad, _, _, _ = KWinnersCNN.backward(ctx, self.gradient2)
    out_grad = out_grad.reshape(2, -1)
    in_grad = self.gradient2.reshape(2, -1)
    self.assertEqual((out_grad == in_grad).sum(), 6)
    self.assertEqual(len(out_grad.nonzero()), 6)
开发者ID:rhyolight,项目名称:nupic.research,代码行数:35,代码来源:k_winners_cnn_test.py

示例12: forward

    def forward(self, input_ids, token_type_ids=None, attention_mask=None):
        if attention_mask is None:
            attention_mask = torch.ones_like(input_ids)
        if token_type_ids is None:
            token_type_ids = torch.zeros_like(input_ids)

        # We create a 3D attention mask from a 2D tensor mask.
        # Sizes are [batch_size, 1, 1, to_seq_length]
        # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
        # this attention mask is more simple than the triangular masking of causal attention
        # used in OpenAI GPT, we just need to prepare the broadcast dimension here.
        extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)

        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
        # masked positions, this operation will create a tensor which is 0.0 for
        # positions we want to attend and -10000.0 for masked positions.
        # Since we are adding it to the raw scores before the softmax, this is
        # effectively the same as removing these entirely.
        extended_attention_mask = extended_attention_mask.to(dtype=next(self.parameters()).dtype) # fp16 compatibility
        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0

        embedding_output = self.embeddings(input_ids, token_type_ids)
        all_encoder_layers = self.encoder(embedding_output, extended_attention_mask)
        sequence_output = all_encoder_layers[-1]
        pooled_output = self.pooler(sequence_output)
        return all_encoder_layers, pooled_output
开发者ID:zhouleidcc,项目名称:bert-Chinese-classification-task,代码行数:26,代码来源:modeling.py

示例13: sample_conditional_a

    def sample_conditional_a(self, resid_image, var_so_far, pixel_1d):

        is_on = (pixel_1d < (self.n_discrete_latent - 1)).float()

        # pass through galaxy encoder
        pixel_2d = self.one_galaxy_vae.pixel_1d_to_2d(pixel_1d)
        z_mean, z_var = self.one_galaxy_vae.enc(resid_image, pixel_2d)

        # sample z
        q_z = Normal(z_mean, z_var.sqrt())
        z_sample = q_z.rsample()

        # kl term for continuous latent vars
        log_q_z = q_z.log_prob(z_sample).sum(1)
        p_z = Normal(torch.zeros_like(z_sample), torch.ones_like(z_sample))
        log_p_z = p_z.log_prob(z_sample).sum(1)
        kl_z = is_on * (log_q_z - log_p_z)

        # run through decoder
        recon_mean, recon_var = self.one_galaxy_vae.dec(is_on, pixel_2d, z_sample)

        # NOTE: we will have to the recon means once we do more detections
        # recon_means = recon_mean + image_so_far
        # recon_vars = recon_var + var_so_far

        return recon_mean, recon_var, is_on, kl_z
开发者ID:Runjing-Liu120,项目名称:discrete_vae_experimentation,代码行数:26,代码来源:galaxy_experiments_lib.py

示例14: forward

    def forward(self, X: Tensor) -> Tensor:
        r"""Evaluate Expected Improvement on the candidate set X.

        Args:
            X: A `b1 x ... bk x 1 x d`-dim batched tensor of `d`-dim design points.
                Expected Improvement is computed for each point individually,
                i.e., what is considered are the marginal posteriors, not the
                joint.

        Returns:
            A `b1 x ... bk`-dim tensor of Expected Improvement values at the
            given design points `X`.
        """
        self.best_f = self.best_f.to(X)
        posterior = self.model.posterior(X)
        self._validate_single_output_posterior(posterior)
        mean = posterior.mean
        # deal with batch evaluation and broadcasting
        view_shape = mean.shape[:-2] if mean.dim() >= X.dim() else X.shape[:-2]
        mean = mean.view(view_shape)
        sigma = posterior.variance.clamp_min(1e-9).sqrt().view(view_shape)
        u = (mean - self.best_f.expand_as(mean)) / sigma
        if not self.maximize:
            u = -u
        normal = Normal(torch.zeros_like(u), torch.ones_like(u))
        ucdf = normal.cdf(u)
        updf = torch.exp(normal.log_prob(u))
        ei = sigma * (updf + u * ucdf)
        return ei
开发者ID:saschwan,项目名称:botorch,代码行数:29,代码来源:analytic.py

示例15: predict2

 def predict2(self, x_de, beamsz, gen_len):
     emb_de = self.embedding_de(x_de) # "batch size",n_de,word_dim, but "batch size" is 1 in this case!
     h0 = Variable(torch.zeros(self.n_layers*self.directions, 1, self.hidden_dim).cuda())
     c0 = Variable(torch.zeros(self.n_layers*self.directions, 1, self.hidden_dim).cuda())
     enc_h, _ = self.encoder(emb_de, (h0, c0))
     # since enc batch size=1, enc_h is 1,n_de,hiddensz*n_directions
     if self.directions == 2:
         enc_h = self.dim_reduce(enc_h) # 1,n_de,hiddensz
     masterheap = CandList(self.n_layers,self.hidden_dim,enc_h.size(1),beamsz)
     # in the following loop, beamsz is length 1 for first iteration, length true beamsz (100) afterward
     for i in range(gen_len):
         prev = masterheap.get_prev() # beamsz
         emb_t = self.embedding_en(prev) # embed the last thing we generated. beamsz,word_dim
         enc_h_expand = enc_h.expand(prev.size(0),-1,-1) # beamsz,n_de,hiddensz
         
         h, c = masterheap.get_hiddens() # (n_layers,beamsz,hiddensz),(n_layers,beamsz,hiddensz)
         dec_h, (h, c) = self.decoder(emb_t.unsqueeze(1), (h, c)) # dec_h is beamsz,1,hiddensz (batch_first=True)
         scores = torch.bmm(enc_h_expand, dec_h.transpose(1,2)).squeeze(2)
         # (beamsz,n_de,hiddensz) * (beamsz,hiddensz,1) = (beamsz,n_de,1). squeeze to beamsz,n_de
         attn_dist = F.softmax(scores,dim=1)
         if self.attn_type == "hard":
             _, argmax = attn_dist.max(1) # beamsz for each batch, select most likely german word to pay attention to
             one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, argmax.data.unsqueeze(1), 1).cuda())
             context = torch.bmm(one_hot.unsqueeze(1), enc_h_expand).squeeze(1)
         else:
             context = torch.bmm(attn_dist.unsqueeze(1), enc_h_expand).squeeze(1)
         # the difference btwn hard and soft is just whether we use a one_hot or a distribution
         # context is beamsz,hiddensz*n_directions
         pred = self.vocab_layer(torch.cat([dec_h.squeeze(1), context], 1)) # beamsz,len(EN.vocab)
         # TODO: set the columns corresponding to <pad>,<unk>,</s>,etc to 0
         masterheap.update_beam(pred)
         masterheap.update_hiddens(h,c)
         masterheap.update_attentions(attn_dist)
         masterheap.firstloop = False
     return masterheap.probs,masterheap.wordlist,masterheap.attentions
开发者ID:anihamde,项目名称:cs287-s18,代码行数:35,代码来源:models_original.py


注:本文中的torch.zeros_like函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。