当前位置: 首页>>代码示例>>Python>>正文


Python utils.clip_grad_norm_方法代码示例

本文整理汇总了Python中fairseq.utils.clip_grad_norm_方法的典型用法代码示例。如果您正苦于以下问题:Python utils.clip_grad_norm_方法的具体用法?Python utils.clip_grad_norm_怎么用?Python utils.clip_grad_norm_使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在fairseq.utils的用法示例。


在下文中一共展示了utils.clip_grad_norm_方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_clip_grad_norm_

# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 别名]
def test_clip_grad_norm_(self):
        params = torch.nn.Parameter(torch.zeros(5)).requires_grad_(False)
        grad_norm = utils.clip_grad_norm_(params, 1.0)
        self.assertTrue(torch.is_tensor(grad_norm))
        self.assertEqual(grad_norm, 0.0)

        params = [torch.nn.Parameter(torch.zeros(5)) for i in range(3)]
        for p in params:
            p.grad = torch.full((5,), fill_value=2.)
        grad_norm = utils.clip_grad_norm_(params, 1.0)
        exp_grad_norm = torch.full((15,), fill_value=2.).norm()
        self.assertTrue(torch.is_tensor(grad_norm))
        self.assertEqual(grad_norm, exp_grad_norm)

        grad_norm = utils.clip_grad_norm_(params, 1.0)
        self.assertAlmostEqual(grad_norm, torch.tensor(1.0)) 
开发者ID:pytorch,项目名称:fairseq,代码行数:18,代码来源:test_utils.py

示例2: clip_grad_norm

# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 别名]
def clip_grad_norm(self, max_norm, aggregate_norm_fn=None):
        """Clips gradient norm and updates dynamic loss scaler."""
        self._sync_fp16_grads_to_fp32()
        grad_norm = utils.clip_grad_norm_(self.fp32_params, max_norm, aggregate_norm_fn)

        # detect overflow and adjust loss scale
        if self.scaler is not None:
            overflow = DynamicLossScaler.has_overflow(grad_norm)
            prev_scale = self.scaler.loss_scale
            self.scaler.update_scale(overflow)
            if overflow:
                if self.scaler.loss_scale <= self.min_loss_scale:
                    # Use FloatingPointError as an uncommon error that parent
                    # functions can safely catch to stop training.
                    self.scaler.loss_scale = prev_scale
                    raise FloatingPointError((
                        'Minimum loss scale reached ({}). Your loss is probably exploding. '
                        'Try lowering the learning rate, using gradient clipping or '
                        'increasing the batch size.'
                    ).format(self.min_loss_scale))
                raise OverflowError('setting loss scale to: ' + str(self.scaler.loss_scale))

        return grad_norm 
开发者ID:pytorch,项目名称:fairseq,代码行数:25,代码来源:fp16_optimizer.py

示例3: test_clip_grad_norm_

# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 别名]
def test_clip_grad_norm_(self):
        params = torch.nn.Parameter(torch.zeros(5)).requires_grad_(False)
        grad_norm = utils.clip_grad_norm_(params, 1.0)
        self.assertTrue(torch.is_tensor(grad_norm))
        self.assertEqual(grad_norm, 0.0)

        params = [torch.nn.Parameter(torch.zeros(5)) for i in range(3)]
        for p in params:
            p.grad = torch.full((5,), fill_value=2)
        grad_norm = utils.clip_grad_norm_(params, 1.0)
        exp_grad_norm = torch.full((15,), fill_value=2).norm()
        self.assertTrue(torch.is_tensor(grad_norm))
        self.assertEqual(grad_norm, exp_grad_norm)

        grad_norm = utils.clip_grad_norm_(params, 1.0)
        self.assertAlmostEqual(grad_norm, torch.tensor(1.0)) 
开发者ID:elbayadm,项目名称:attn2d,代码行数:18,代码来源:test_utils.py

示例4: clip_grad_norm

# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 别名]
def clip_grad_norm(self, max_norm, aggregate_norm_fn=None):
        """Clips gradient norm and updates dynamic loss scaler."""
        self._sync_fp16_grads_to_fp32()
        grad_norm = utils.clip_grad_norm_(self.fp32_params, max_norm, aggregate_norm_fn)

        # detect overflow and adjust loss scale
        overflow = DynamicLossScaler.has_overflow(grad_norm)
        prev_scale = self.scaler.loss_scale
        self.scaler.update_scale(overflow)
        if overflow:
            if self.scaler.loss_scale <= self.min_loss_scale:
                # Use FloatingPointError as an uncommon error that parent
                # functions can safely catch to stop training.
                self.scaler.loss_scale = prev_scale
                raise FloatingPointError((
                    'Minimum loss scale reached ({}). Your loss is probably exploding. '
                    'Try lowering the learning rate, using gradient clipping or '
                    'increasing the batch size.'
                ).format(self.min_loss_scale))
            raise OverflowError('setting loss scale to: ' + str(self.scaler.loss_scale))
        return grad_norm 
开发者ID:elbayadm,项目名称:attn2d,代码行数:23,代码来源:fp16_optimizer.py

示例5: clip_grad_norm

# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 别名]
def clip_grad_norm(self, max_norm):
        """Clips gradient norm and updates dynamic loss scaler."""
        self._sync_fp16_grads_to_fp32()
        grad_norm = utils.clip_grad_norm_(self.fp32_params.grad.data, max_norm)

        # detect overflow and adjust loss scale
        overflow = DynamicLossScaler.has_overflow(grad_norm)
        self.scaler.update_scale(overflow)
        if overflow:
            if self.scaler.loss_scale <= self.args.min_loss_scale:
                # Use FloatingPointError as an uncommon error that parent
                # functions can safely catch to stop training.
                raise FloatingPointError((
                    'Minimum loss scale reached ({}). Your loss is probably exploding. '
                    'Try lowering the learning rate, using gradient clipping or '
                    'increasing the batch size.'
                ).format(self.args.min_loss_scale))
            raise OverflowError('setting loss scale to: ' + str(self.scaler.loss_scale))
        return grad_norm 
开发者ID:kakaobrain,项目名称:helo_word,代码行数:21,代码来源:fp16_optimizer.py

示例6: _all_reduce_and_rescale

# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 别名]
def _all_reduce_and_rescale(self, grad_denom):
        # undo effect of dynamic loss scaling on gradients
        grad_denom *= self.scaler.loss_scale

        if self.args.distributed_world_size > 1:
            # flatten grads into a single buffer
            flat_grads = self._flat_grads = self._get_flat_grads(self._flat_grads)

            # scale gradients to avoid overflow in all-reduce
            flat_grads.div_(self.args.distributed_world_size)
            grad_denom /= self.args.distributed_world_size

            # all-reduce flat grads
            torch.distributed.all_reduce(flat_grads)

            # copy grads back to FP32
            self.fp32_params.grad.data.copy_(flat_grads)
        else:
            # single worker: copy grads directly to FP32
            self._get_flat_grads(out=self.fp32_params.grad.data)

        # rescale and clip grads
        self.fp32_params.grad.data.div_(grad_denom)
        grad_norm = utils.clip_grad_norm_(self.fp32_params.grad.data, self.args.clip_norm)

        # detect overflow and adjust loss scale
        overflow = DynamicLossScaler.has_overflow(grad_norm)
        self.scaler.update_scale(overflow)
        if overflow:
            raise OverflowError('setting loss scale to: ' + str(self.scaler.loss_scale))

        return grad_norm 
开发者ID:nusnlp,项目名称:crosentgec,代码行数:34,代码来源:fp16_trainer.py

示例7: _all_reduce_and_rescale

# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 别名]
def _all_reduce_and_rescale(self, grad_denom):
        # flatten grads into a single buffer and all-reduce
        flat_grads = self._flat_grads = self._get_flat_grads(self._flat_grads)
        if self.args.distributed_world_size > 1:
            torch.distributed.all_reduce(flat_grads)

        # rescale and clip gradients
        flat_grads.div_(grad_denom)
        grad_norm = utils.clip_grad_norm_(flat_grads, self.args.clip_norm)

        # copy grads back into model parameters
        self._set_flat_grads(flat_grads)

        return grad_norm 
开发者ID:nusnlp,项目名称:crosentgec,代码行数:16,代码来源:trainer.py

示例8: clip_grad_norm

# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 别名]
def clip_grad_norm(self, max_norm, aggregate_norm_fn=None):
        """Clips gradient norm."""
        return utils.clip_grad_norm_(self.params, max_norm, aggregate_norm_fn) 
开发者ID:pytorch,项目名称:fairseq,代码行数:5,代码来源:fairseq_optimizer.py

示例9: _all_reduce_and_rescale

# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import clip_grad_norm_ [as 别名]
def _all_reduce_and_rescale(self, grad_denom, non_empty = True):
        # flatten grads into a single buffer and all-reduce
        flat_grads = self._flat_grads = self._get_flat_grads(out=self._flat_grads, has_grad = non_empty)
        if self.args.distributed_world_size > 1:
            torch.distributed.all_reduce(flat_grads)

        # rescale and clip gradients
        flat_grads.div_(grad_denom)
        grad_norm = utils.clip_grad_norm_(flat_grads, self.args.clip_norm)

        # copy grads back into model parameters
        self._set_flat_grads(flat_grads)

        return grad_norm 
开发者ID:mlperf,项目名称:training_results_v0.5,代码行数:16,代码来源:trainer.py


注:本文中的fairseq.utils.clip_grad_norm_方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。