本文整理汇总了Python中torch.nn.utils.clip_grad_norm_方法的典型用法代码示例。如果您正苦于以下问题:Python utils.clip_grad_norm_方法的具体用法?Python utils.clip_grad_norm_怎么用?Python utils.clip_grad_norm_使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.nn.utils
的用法示例。
在下文中一共展示了utils.clip_grad_norm_方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: step
# 需要导入模块: from torch.nn import utils [as 别名]
# 或者: from torch.nn.utils import clip_grad_norm_ [as 别名]
def step(self):
'''
Given that a backward pass has been made, run an optimization step
Internally, this will perform most of the activities associated with a control loop
in standard machine learning environments, depending on the configuration of the object:
Gradient clipping, learning rate schedules, logging, checkpointing, etc.
Returns:
self: The current Approximation object
'''
if self._clip_grad != 0:
utils.clip_grad_norm_(self.model.parameters(), self._clip_grad)
self._optimizer.step()
self._optimizer.zero_grad()
self._target.update()
if self._scheduler:
self._writer.add_schedule(self._name + '/lr', self._optimizer.param_groups[0]['lr'])
self._scheduler.step()
self._checkpointer()
return self
示例2: rescale_gradients
# 需要导入模块: from torch.nn import utils [as 别名]
# 或者: from torch.nn.utils import clip_grad_norm_ [as 别名]
def rescale_gradients(self) -> float:
"""
Performs gradient rescaling. Is a no-op if gradient rescaling is not enabled.
Returns the norm of the gradients.
"""
if self._opt_level is not None:
# See: https://nvidia.github.io/apex/advanced.html#gradient-clipping
parameters_to_clip = [
p for p in amp.master_params(self.optimizer) if p.grad is not None
]
else:
parameters_to_clip = [p for p in self.model.parameters() if p.grad is not None]
if self._grad_norm:
return clip_grad_norm_(parameters_to_clip, self._grad_norm)
else:
return torch.norm(
torch.stack([torch.norm(p.grad.detach()) for p in parameters_to_clip])
)
示例3: test_sparse_clip_grad
# 需要导入模块: from torch.nn import utils [as 别名]
# 或者: from torch.nn.utils import clip_grad_norm_ [as 别名]
def test_sparse_clip_grad(self):
# create a sparse embedding layer, then take gradient
embedding = torch.nn.Embedding(100, 16, sparse=True)
embedding.zero_grad()
ids = (torch.rand(17) * 100).long()
# Set some of the ids to the same value so that the sparse gradient
# has repeated indices. This tests some additional logic.
ids[:5] = 5
loss = embedding(ids).sum()
loss.backward()
assert embedding.weight.grad.is_sparse
# Now try to clip the gradients.
_ = clip_grad_norm_([embedding.weight], 1.5)
# Final norm should be 1.5
grad = embedding.weight.grad.coalesce()
assert grad._values().norm(2.0).item() == pytest.approx(1.5, rel=1e-4)
示例4: _grad_norm
# 需要导入模块: from torch.nn import utils [as 别名]
# 或者: from torch.nn.utils import clip_grad_norm_ [as 别名]
def _grad_norm(self, inputs_batch, target_batch, chunk_batch=1):
self.model.zero_grad()
for inputs, target in zip(inputs_batch.chunk(chunk_batch, dim=0),
target_batch.chunk(chunk_batch, dim=0)):
target = target.to(self.device)
inputs = inputs.to(self.device, dtype=self.dtype)
# compute output
output = self.model(inputs)
loss = self.criterion(output, target)
if chunk_batch > 1:
loss = loss / chunk_batch
loss.backward() # accumulate gradient
grad = clip_grad_norm_(self.model.parameters(), float('inf'))
return grad
示例5: get_grad_fn
# 需要导入模块: from torch.nn import utils [as 别名]
# 或者: from torch.nn.utils import clip_grad_norm_ [as 别名]
def get_grad_fn(agent, clip_grad, max_grad=1e2):
""" monitor gradient for each sub-component"""
params = [p for p in agent.parameters()]
def f():
grad_log = {}
for n, m in agent.named_children():
tot_grad = 0
for p in m.parameters():
if p.grad is not None:
tot_grad += p.grad.norm(2) ** 2
tot_grad = tot_grad ** (1/2)
grad_log['grad_norm'+n] = tot_grad.item()
grad_norm = clip_grad_norm_(
[p for p in params if p.requires_grad], clip_grad)
grad_norm = grad_norm.item()
if max_grad is not None and grad_norm >= max_grad:
print('WARNING: Exploding Gradients {:.2f}'.format(grad_norm))
grad_norm = max_grad
grad_log['grad_norm'] = grad_norm
return grad_log
return f
示例6: get_grad_fn
# 需要导入模块: from torch.nn import utils [as 别名]
# 或者: from torch.nn.utils import clip_grad_norm_ [as 别名]
def get_grad_fn(agent, clip_grad, max_grad=1e2):
""" monitor gradient for each sub-component"""
params = [p for p in agent.parameters()]
def f():
grad_log = {}
for n, m in agent.named_children():
tot_grad = 0
for p in m.parameters():
if p.grad is not None:
tot_grad += p.grad.norm(2) ** 2
tot_grad = tot_grad ** (1/2)
grad_log['grad_norm'+n] = tot_grad.item()
grad_norm = clip_grad_norm_(
[p for p in params if p.requires_grad], clip_grad)
# grad_norm = grad_norm.item()
if max_grad is not None and grad_norm >= max_grad:
print('WARNING: Exploding Gradients {:.2f}'.format(grad_norm))
grad_norm = max_grad
grad_log['grad_norm'] = grad_norm
return grad_log
return f
示例7: _clip_model_norm
# 需要导入模块: from torch.nn import utils [as 别名]
# 或者: from torch.nn.utils import clip_grad_norm_ [as 别名]
def _clip_model_norm(self, clip_max_norm_use, clip_max_norm):
"""
:param clip_max_norm_use: whether to use clip max norm for nn model
:param clip_max_norm: clip max norm max values [float or None]
:return:
"""
if clip_max_norm_use is True:
gclip = None if clip_max_norm == "None" else float(clip_max_norm)
assert isinstance(gclip, float)
utils.clip_grad_norm_(self.model.parameters(), max_norm=gclip)
示例8: clip_grad_norm
# 需要导入模块: from torch.nn import utils [as 别名]
# 或者: from torch.nn.utils import clip_grad_norm_ [as 别名]
def clip_grad_norm(optimizer, max_norm, norm_type=2):
"""Clip the norm of the gradients for all parameters under `optimizer`.
Args:
optimizer (torch.optim.Optimizer):
max_norm (float): The maximum allowable norm of gradients.
norm_type (int): The type of norm to use in computing gradient norms.
"""
for group in optimizer.param_groups:
utils.clip_grad_norm_(group['params'], max_norm, norm_type)
示例9: _epoch_iterator
# 需要导入模块: from torch.nn import utils [as 别名]
# 或者: from torch.nn.utils import clip_grad_norm_ [as 别名]
def _epoch_iterator(self, dataloader):
for padded_seqs, seq_lengths in dataloader:
loss = self.model.likelihood(padded_seqs, seq_lengths).mean()
self.optimizer.zero_grad()
loss.backward()
if self.clip_gradient > 0:
tnnu.clip_grad_norm_(self.model.network.parameters(), self.clip_gradient)
self.optimizer.step()
yield loss
示例10: step
# 需要导入模块: from torch.nn import utils [as 别名]
# 或者: from torch.nn.utils import clip_grad_norm_ [as 别名]
def step(self, loss, optimizer, scheduler, update=True):
"""
Performs one step of the optimizer.
:param loss: value of loss function
:param optimizer: optimizer
:param update: if True executes weight update
"""
loss.backward()
if self.grad_clip != float('inf'):
clip_grad_norm_(self.model.parameters(), self.grad_clip)
if update:
scheduler.step()
optimizer.step()
self.model.zero_grad()
示例11: rescale_gradients
# 需要导入模块: from torch.nn import utils [as 别名]
# 或者: from torch.nn.utils import clip_grad_norm_ [as 别名]
def rescale_gradients(model: Model, grad_norm: Optional[float] = None) -> Optional[float]:
"""
Performs gradient rescaling. Is a no-op if gradient rescaling is not enabled.
"""
if grad_norm:
parameters_to_clip = [p for p in model.parameters() if p.grad is not None]
return clip_grad_norm_(parameters_to_clip, grad_norm)
return None
示例12: _maybe_clip_gradients
# 需要导入模块: from torch.nn import utils [as 别名]
# 或者: from torch.nn.utils import clip_grad_norm_ [as 别名]
def _maybe_clip_gradients(self):
if self._clip_gradients is None:
return lambda: None
return lambda: clip_grad_norm_(self.network.parameters(),
self._clip_gradients)
示例13: _clip_model_norm
# 需要导入模块: from torch.nn import utils [as 别名]
# 或者: from torch.nn.utils import clip_grad_norm_ [as 别名]
def _clip_model_norm(self, clip_max_norm_use, clip_max_norm):
"""
:param clip_max_norm_use: whether to use clip max norm for nn model
:param clip_max_norm: clip max norm max values [float or None]
:return:
"""
if clip_max_norm_use is True:
gclip = None if clip_max_norm == "None" else float(clip_max_norm)
assert isinstance(gclip, float)
utils.clip_grad_norm_(self.parser.model.parameters(), max_norm=gclip)
示例14: get_basic_grad_fn
# 需要导入模块: from torch.nn import utils [as 别名]
# 或者: from torch.nn.utils import clip_grad_norm_ [as 别名]
def get_basic_grad_fn(net, clip_grad, max_grad=1e2):
def f():
grad_norm = clip_grad_norm_(
[p for p in net.parameters() if p.requires_grad], clip_grad)
grad_norm = grad_norm.item()
if max_grad is not None and grad_norm >= max_grad:
print('WARNING: Exploding Gradients {:.2f}'.format(grad_norm))
grad_norm = max_grad
grad_log = {}
grad_log['grad_norm'] = grad_norm
return grad_log
return f
示例15: run_iteration
# 需要导入模块: from torch.nn import utils [as 别名]
# 或者: from torch.nn.utils import clip_grad_norm_ [as 别名]
def run_iteration(self, data_generator, do_backprop=True, run_online_evaluation=False):
"""
gradient clipping improves training stability
:param data_generator:
:param do_backprop:
:param run_online_evaluation:
:return:
"""
data_dict = next(data_generator)
data = data_dict['data']
target = data_dict['target']
data = maybe_to_torch(data)
target = maybe_to_torch(target)
if torch.cuda.is_available():
data = to_cuda(data)
target = to_cuda(target)
self.optimizer.zero_grad()
output = self.network(data)
del data
loss = self.loss(output, target)
if run_online_evaluation:
self.run_online_evaluation(output, target)
del target
if do_backprop:
if not self.fp16 or amp is None or not torch.cuda.is_available():
loss.backward()
else:
with amp.scale_loss(loss, self.optimizer) as scaled_loss:
scaled_loss.backward()
_ = clip_grad_norm_(self.network.parameters(), 12)
self.optimizer.step()
return loss.detach().cpu().numpy()