當前位置: 首頁>>代碼示例>>Python>>正文


Python utils.clip_grad_norm_方法代碼示例

本文整理匯總了Python中fairseq.utils.clip_grad_norm_方法的典型用法代碼示例。如果您正苦於以下問題:Python utils.clip_grad_norm_方法的具體用法?Python utils.clip_grad_norm_怎麽用?Python utils.clip_grad_norm_使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在fairseq.utils的用法示例。


在下文中一共展示了utils.clip_grad_norm_方法的9個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_clip_grad_norm_

# 需要導入模塊: from fairseq import utils [as 別名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 別名]
def test_clip_grad_norm_(self):
        params = torch.nn.Parameter(torch.zeros(5)).requires_grad_(False)
        grad_norm = utils.clip_grad_norm_(params, 1.0)
        self.assertTrue(torch.is_tensor(grad_norm))
        self.assertEqual(grad_norm, 0.0)

        params = [torch.nn.Parameter(torch.zeros(5)) for i in range(3)]
        for p in params:
            p.grad = torch.full((5,), fill_value=2.)
        grad_norm = utils.clip_grad_norm_(params, 1.0)
        exp_grad_norm = torch.full((15,), fill_value=2.).norm()
        self.assertTrue(torch.is_tensor(grad_norm))
        self.assertEqual(grad_norm, exp_grad_norm)

        grad_norm = utils.clip_grad_norm_(params, 1.0)
        self.assertAlmostEqual(grad_norm, torch.tensor(1.0)) 
開發者ID:pytorch,項目名稱:fairseq,代碼行數:18,代碼來源:test_utils.py

示例2: clip_grad_norm

# 需要導入模塊: from fairseq import utils [as 別名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 別名]
def clip_grad_norm(self, max_norm, aggregate_norm_fn=None):
        """Clips gradient norm and updates dynamic loss scaler."""
        self._sync_fp16_grads_to_fp32()
        grad_norm = utils.clip_grad_norm_(self.fp32_params, max_norm, aggregate_norm_fn)

        # detect overflow and adjust loss scale
        if self.scaler is not None:
            overflow = DynamicLossScaler.has_overflow(grad_norm)
            prev_scale = self.scaler.loss_scale
            self.scaler.update_scale(overflow)
            if overflow:
                if self.scaler.loss_scale <= self.min_loss_scale:
                    # Use FloatingPointError as an uncommon error that parent
                    # functions can safely catch to stop training.
                    self.scaler.loss_scale = prev_scale
                    raise FloatingPointError((
                        'Minimum loss scale reached ({}). Your loss is probably exploding. '
                        'Try lowering the learning rate, using gradient clipping or '
                        'increasing the batch size.'
                    ).format(self.min_loss_scale))
                raise OverflowError('setting loss scale to: ' + str(self.scaler.loss_scale))

        return grad_norm 
開發者ID:pytorch,項目名稱:fairseq,代碼行數:25,代碼來源:fp16_optimizer.py

示例3: test_clip_grad_norm_

# 需要導入模塊: from fairseq import utils [as 別名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 別名]
def test_clip_grad_norm_(self):
        params = torch.nn.Parameter(torch.zeros(5)).requires_grad_(False)
        grad_norm = utils.clip_grad_norm_(params, 1.0)
        self.assertTrue(torch.is_tensor(grad_norm))
        self.assertEqual(grad_norm, 0.0)

        params = [torch.nn.Parameter(torch.zeros(5)) for i in range(3)]
        for p in params:
            p.grad = torch.full((5,), fill_value=2)
        grad_norm = utils.clip_grad_norm_(params, 1.0)
        exp_grad_norm = torch.full((15,), fill_value=2).norm()
        self.assertTrue(torch.is_tensor(grad_norm))
        self.assertEqual(grad_norm, exp_grad_norm)

        grad_norm = utils.clip_grad_norm_(params, 1.0)
        self.assertAlmostEqual(grad_norm, torch.tensor(1.0)) 
開發者ID:elbayadm,項目名稱:attn2d,代碼行數:18,代碼來源:test_utils.py

示例4: clip_grad_norm

# 需要導入模塊: from fairseq import utils [as 別名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 別名]
def clip_grad_norm(self, max_norm, aggregate_norm_fn=None):
        """Clips gradient norm and updates dynamic loss scaler."""
        self._sync_fp16_grads_to_fp32()
        grad_norm = utils.clip_grad_norm_(self.fp32_params, max_norm, aggregate_norm_fn)

        # detect overflow and adjust loss scale
        overflow = DynamicLossScaler.has_overflow(grad_norm)
        prev_scale = self.scaler.loss_scale
        self.scaler.update_scale(overflow)
        if overflow:
            if self.scaler.loss_scale <= self.min_loss_scale:
                # Use FloatingPointError as an uncommon error that parent
                # functions can safely catch to stop training.
                self.scaler.loss_scale = prev_scale
                raise FloatingPointError((
                    'Minimum loss scale reached ({}). Your loss is probably exploding. '
                    'Try lowering the learning rate, using gradient clipping or '
                    'increasing the batch size.'
                ).format(self.min_loss_scale))
            raise OverflowError('setting loss scale to: ' + str(self.scaler.loss_scale))
        return grad_norm 
開發者ID:elbayadm,項目名稱:attn2d,代碼行數:23,代碼來源:fp16_optimizer.py

示例5: clip_grad_norm

# 需要導入模塊: from fairseq import utils [as 別名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 別名]
def clip_grad_norm(self, max_norm):
        """Clips gradient norm and updates dynamic loss scaler."""
        self._sync_fp16_grads_to_fp32()
        grad_norm = utils.clip_grad_norm_(self.fp32_params.grad.data, max_norm)

        # detect overflow and adjust loss scale
        overflow = DynamicLossScaler.has_overflow(grad_norm)
        self.scaler.update_scale(overflow)
        if overflow:
            if self.scaler.loss_scale <= self.args.min_loss_scale:
                # Use FloatingPointError as an uncommon error that parent
                # functions can safely catch to stop training.
                raise FloatingPointError((
                    'Minimum loss scale reached ({}). Your loss is probably exploding. '
                    'Try lowering the learning rate, using gradient clipping or '
                    'increasing the batch size.'
                ).format(self.args.min_loss_scale))
            raise OverflowError('setting loss scale to: ' + str(self.scaler.loss_scale))
        return grad_norm 
開發者ID:kakaobrain,項目名稱:helo_word,代碼行數:21,代碼來源:fp16_optimizer.py

示例6: _all_reduce_and_rescale

# 需要導入模塊: from fairseq import utils [as 別名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 別名]
def _all_reduce_and_rescale(self, grad_denom):
        # undo effect of dynamic loss scaling on gradients
        grad_denom *= self.scaler.loss_scale

        if self.args.distributed_world_size > 1:
            # flatten grads into a single buffer
            flat_grads = self._flat_grads = self._get_flat_grads(self._flat_grads)

            # scale gradients to avoid overflow in all-reduce
            flat_grads.div_(self.args.distributed_world_size)
            grad_denom /= self.args.distributed_world_size

            # all-reduce flat grads
            torch.distributed.all_reduce(flat_grads)

            # copy grads back to FP32
            self.fp32_params.grad.data.copy_(flat_grads)
        else:
            # single worker: copy grads directly to FP32
            self._get_flat_grads(out=self.fp32_params.grad.data)

        # rescale and clip grads
        self.fp32_params.grad.data.div_(grad_denom)
        grad_norm = utils.clip_grad_norm_(self.fp32_params.grad.data, self.args.clip_norm)

        # detect overflow and adjust loss scale
        overflow = DynamicLossScaler.has_overflow(grad_norm)
        self.scaler.update_scale(overflow)
        if overflow:
            raise OverflowError('setting loss scale to: ' + str(self.scaler.loss_scale))

        return grad_norm 
開發者ID:nusnlp,項目名稱:crosentgec,代碼行數:34,代碼來源:fp16_trainer.py

示例7: _all_reduce_and_rescale

# 需要導入模塊: from fairseq import utils [as 別名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 別名]
def _all_reduce_and_rescale(self, grad_denom):
        # flatten grads into a single buffer and all-reduce
        flat_grads = self._flat_grads = self._get_flat_grads(self._flat_grads)
        if self.args.distributed_world_size > 1:
            torch.distributed.all_reduce(flat_grads)

        # rescale and clip gradients
        flat_grads.div_(grad_denom)
        grad_norm = utils.clip_grad_norm_(flat_grads, self.args.clip_norm)

        # copy grads back into model parameters
        self._set_flat_grads(flat_grads)

        return grad_norm 
開發者ID:nusnlp,項目名稱:crosentgec,代碼行數:16,代碼來源:trainer.py

示例8: clip_grad_norm

# 需要導入模塊: from fairseq import utils [as 別名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 別名]
def clip_grad_norm(self, max_norm, aggregate_norm_fn=None):
        """Clips gradient norm."""
        return utils.clip_grad_norm_(self.params, max_norm, aggregate_norm_fn) 
開發者ID:pytorch,項目名稱:fairseq,代碼行數:5,代碼來源:fairseq_optimizer.py

示例9: _all_reduce_and_rescale

# 需要導入模塊: from fairseq import utils [as 別名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 別名]
def _all_reduce_and_rescale(self, grad_denom, non_empty = True):
        # flatten grads into a single buffer and all-reduce
        flat_grads = self._flat_grads = self._get_flat_grads(out=self._flat_grads, has_grad = non_empty)
        if self.args.distributed_world_size > 1:
            torch.distributed.all_reduce(flat_grads)

        # rescale and clip gradients
        flat_grads.div_(grad_denom)
        grad_norm = utils.clip_grad_norm_(flat_grads, self.args.clip_norm)

        # copy grads back into model parameters
        self._set_flat_grads(flat_grads)

        return grad_norm 
開發者ID:mlperf,項目名稱:training_results_v0.5,代碼行數:16,代碼來源:trainer.py


注:本文中的fairseq.utils.clip_grad_norm_方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。