本文整理汇总了Python中torch.optim.method方法的典型用法代码示例。如果您正苦于以下问题:Python optim.method方法的具体用法?Python optim.method怎么用?Python optim.method使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.optim
的用法示例。
在下文中一共展示了optim.method方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import method [as 别名]
def __init__(self, method, learning_rate, max_grad_norm,
lr_decay=1, start_decay_steps=None, decay_steps=None,
beta1=0.9, beta2=0.999,
adagrad_accum=0.0,
decay_method=None,
warmup_steps=4000,
model_size=None):
self.last_ppl = None
self.learning_rate = learning_rate
self.original_lr = learning_rate
self.max_grad_norm = max_grad_norm
self.method = method
self.lr_decay = lr_decay
self.start_decay_steps = start_decay_steps
self.decay_steps = decay_steps
self._step = 0
self.betas = [beta1, beta2]
self.adagrad_accum = adagrad_accum
self.decay_method = decay_method
self.warmup_steps = warmup_steps
self.model_size = model_size
示例2: __init__
# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import method [as 别名]
def __init__(self, method, learning_rate, max_grad_norm,
lr_decay=1, start_decay_steps=None, decay_steps=None,
beta1=0.9, beta2=0.999,
adagrad_accum=0.0,
decay_method=None,
warmup_steps=4000, weight_decay=0):
self.last_ppl = None
self.learning_rate = learning_rate
self.original_lr = learning_rate
self.max_grad_norm = max_grad_norm
self.method = method
self.lr_decay = lr_decay
self.start_decay_steps = start_decay_steps
self.decay_steps = decay_steps
self.start_decay = False
self._step = 0
self.betas = [beta1, beta2]
self.adagrad_accum = adagrad_accum
self.decay_method = decay_method
self.warmup_steps = warmup_steps
self.weight_decay = weight_decay
示例3: set_parameters
# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import method [as 别名]
def set_parameters(self, params):
""" ? """
self.params = []
self.sparse_params = []
for k, p in params:
if p.requires_grad:
if self.method != 'sparseadam' or "embed" not in k:
self.params.append(p)
else:
self.sparse_params.append(p)
if self.method == 'sgd':
self.optimizer = optim.SGD(self.params, lr=self.learning_rate)
elif self.method == 'adagrad':
self.optimizer = optim.Adagrad(self.params, lr=self.learning_rate)
for group in self.optimizer.param_groups:
for p in group['params']:
self.optimizer.state[p]['sum'] = self.optimizer\
.state[p]['sum'].fill_(self.adagrad_accum)
elif self.method == 'adadelta':
self.optimizer = optim.Adadelta(self.params, lr=self.learning_rate)
elif self.method == 'adam':
self.optimizer = optim.Adam(self.params, lr=self.learning_rate,
betas=self.betas, eps=1e-9)
else:
raise RuntimeError("Invalid optim method: " + self.method)
示例4: __init__
# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import method [as 别名]
def __init__(self, method, learning_rate, max_grad_norm,
lr_decay=1, start_decay_steps=None, decay_steps=None,
beta1=0.9, beta2=0.999,
adagrad_accum=0.0,
decay_method=None,
warmup_steps=4000
):
self.last_ppl = None
self.learning_rate = learning_rate
self.original_lr = learning_rate
self.max_grad_norm = max_grad_norm
self.method = method
self.lr_decay = lr_decay
self.start_decay_steps = start_decay_steps
self.decay_steps = decay_steps
self.start_decay = False
self._step = 0
self.betas = [beta1, beta2]
self.adagrad_accum = adagrad_accum
self.decay_method = decay_method
self.warmup_steps = warmup_steps
示例5: __init__
# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import method [as 别名]
def __init__(self, method, learning_rate, max_grad_norm,
lr_decay=1, start_decay_steps=None, decay_steps=None,
beta1=0.9, beta2=0.999,
adagrad_accum=0.0,
decay_method=None,
warmup_steps=4000,
model_size=None):
self.last_ppl = None
self.learning_rate = learning_rate
self.original_lr = learning_rate
self.max_grad_norm = max_grad_norm
self.method = method
self.lr_decay = lr_decay
self.start_decay_steps = start_decay_steps
self.decay_steps = decay_steps
self.start_decay = False
self._step = 0
self.betas = [beta1, beta2]
self.adagrad_accum = adagrad_accum
self.decay_method = decay_method
self.warmup_steps = warmup_steps
self.model_size = model_size
示例6: set_parameters
# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import method [as 别名]
def set_parameters(self, model):
""" ? """
params = [p for p in model.parameters() if p.requires_grad]
if self.method == 'sgd':
self.optimizer = optim.SGD(params, lr=self.learning_rate)
elif self.method == 'adagrad':
self.optimizer = optim.Adagrad(
self.params,
lr=self.learning_rate,
initial_accumulator_value=self.adagrad_accum)
elif self.method == 'adadelta':
self.optimizer = optim.Adadelta(params, lr=self.learning_rate)
elif self.method == 'adafactor':
self.optimizer = AdaFactor(params, non_constant_decay=True,
enable_factorization=True,
weight_decay=0)
elif self.method == 'adam':
self.optimizer = optim.Adam(params, lr=self.learning_rate,
betas=self.betas, eps=1e-9)
elif self.method == 'sparseadam':
dense = []
sparse = []
for name, param in model.named_parameters():
if not param.requires_grad:
continue
# TODO: Find a better way to check for sparse gradients.
if 'embed' in name:
sparse.append(param)
else:
dense.append(param)
self.optimizer = MultipleOptimizer(
[optim.Adam(dense, lr=self.learning_rate,
betas=self.betas, eps=1e-8),
optim.SparseAdam(sparse, lr=self.learning_rate,
betas=self.betas, eps=1e-8)])
else:
raise RuntimeError("Invalid optim method: " + self.method)
示例7: step
# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import method [as 别名]
def step(self):
"""Update the model parameters based on current gradients.
Optionally, will employ gradient modification or update learning
rate.
"""
self._step += 1
# Decay method used in tensor2tensor.
if self.decay_method == "noam":
lr_scale = (
self.model_size ** (-0.5) *
min(self._step ** (-0.5),
self._step * self.warmup_steps**(-1.5)))
# Decay based on start_decay_steps every decay_steps
elif self.start_decay_steps is not None:
step = self._step - self.start_decay_steps
lr_scale = (self.lr_decay ** (
max(step + self.decay_steps, 0) // self.decay_steps))
else:
lr_scale = 1
self.learning_rate = lr_scale * self.original_lr
for group in self.optimizer.param_groups:
if self.method != 'adafactor':
group['lr'] = self.learning_rate
if self.max_grad_norm:
clip_grad_norm_(group['params'], self.max_grad_norm)
self.optimizer.step()
# Code below is an implementation of https://arxiv.org/pdf/1804.04235.pdf
# inspired but modified from https://github.com/DeadAt0m/adafactor-pytorch
示例8: build_optim
# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import method [as 别名]
def build_optim(model, opt, checkpoint):
""" Build optimizer """
saved_optimizer_state_dict = None
if opt.train_from:
optim = checkpoint['optim']
# We need to save a copy of optim.optimizer.state_dict() for setting
# the, optimizer state later on in Stage 2 in this method, since
# the method optim.set_parameters(model.parameters()) will overwrite
# optim.optimizer, and with ith the values stored in
# optim.optimizer.state_dict()
saved_optimizer_state_dict = optim.optimizer.state_dict()
else:
optim = Optimizer(
opt.optim, opt.learning_rate, opt.max_grad_norm,
lr_decay=opt.learning_rate_decay,
start_decay_steps=opt.start_decay_steps,
decay_steps=opt.decay_steps,
beta1=opt.adam_beta1,
beta2=opt.adam_beta2,
adagrad_accum=opt.adagrad_accumulator_init,
decay_method=opt.decay_method,
warmup_steps=opt.warmup_steps)
optim.set_parameters(model.named_parameters())
if opt.train_from:
optim.optimizer.load_state_dict(saved_optimizer_state_dict)
if use_gpu(opt):
for state in optim.optimizer.state.values():
for k, v in state.items():
if torch.is_tensor(v):
state[k] = v.cuda()
if (optim.method == 'adam') and (len(optim.optimizer.state) < 1):
raise RuntimeError(
"Error: loaded Adam optimizer from existing model" +
" but optimizer state is empty")
return optim
示例9: _set_rate
# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import method [as 别名]
def _set_rate(self, learning_rate):
self.learning_rate = learning_rate
if self.method != 'sparseadam':
self.optimizer.param_groups[0]['lr'] = self.learning_rate
else:
for op in self.optimizer.optimizers:
op.param_groups[0]['lr'] = self.learning_rate
示例10: step
# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import method [as 别名]
def step(self):
"""Update the model parameters based on current gradients.
Optionally, will employ gradient modification or update learning
rate.
"""
self._step += 1
# Decay method used in tensor2tensor.
if self.decay_method == "noam":
self._set_rate(
self.original_lr *
min(self._step ** (-0.5),
self._step * self.warmup_steps**(-1.5)))
else:
if ((self.start_decay_steps is not None) and (
self._step >= self.start_decay_steps)):
self.start_decay = True
if self.start_decay:
if ((self._step - self.start_decay_steps)
% self.decay_steps == 0):
self.learning_rate = self.learning_rate * self.lr_decay
if self.method != 'sparseadam':
self.optimizer.param_groups[0]['lr'] = self.learning_rate
if self.max_grad_norm:
clip_grad_norm_(self.params, self.max_grad_norm)
self.optimizer.step()
示例11: set_parameters
# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import method [as 别名]
def set_parameters(self, params):
""" ? """
self.params = []
self.sparse_params = []
for k, p in params:
if p.requires_grad:
if self.method != 'sparseadam' or "embed" not in k:
self.params.append(p)
else:
self.sparse_params.append(p)
if self.method == 'sgd':
self.optimizer = optim.SGD(self.params, lr=self.learning_rate)
elif self.method == 'adagrad':
self.optimizer = optim.Adagrad(self.params, lr=self.learning_rate)
for group in self.optimizer.param_groups:
for p in group['params']:
self.optimizer.state[p]['sum'] = self.optimizer\
.state[p]['sum'].fill_(self.adagrad_accum)
elif self.method == 'adadelta':
self.optimizer = optim.Adadelta(self.params, lr=self.learning_rate)
elif self.method == 'adam':
self.optimizer = optim.Adam(self.params, lr=self.learning_rate,
betas=self.betas, eps=1e-9)
elif self.method == 'sparseadam':
self.optimizer = MultipleOptimizer(
[optim.Adam(self.params, lr=self.learning_rate,
betas=self.betas, eps=1e-8),
optim.SparseAdam(self.sparse_params, lr=self.learning_rate,
betas=self.betas, eps=1e-8)])
else:
raise RuntimeError("Invalid optim method: " + self.method)
示例12: step
# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import method [as 别名]
def step(self):
"""Update the model parameters based on current gradients.
Optionally, will employ gradient modification or update learning
rate.
"""
self._step += 1
# Decay method used in tensor2tensor.
if self.decay_method == "noam":
self._set_rate(
self.original_lr *
min(self._step ** (-0.5),
self._step * self.warmup_steps**(-1.5)))
# self._set_rate(self.original_lr *self.model_size ** (-0.5) *min(1.0, self._step / self.warmup_steps)*max(self._step, self.warmup_steps)**(-0.5))
# Decay based on start_decay_steps every decay_steps
else:
if ((self.start_decay_steps is not None) and (
self._step >= self.start_decay_steps)):
self.start_decay = True
if self.start_decay:
if ((self._step - self.start_decay_steps)
% self.decay_steps == 0):
self.learning_rate = self.learning_rate * self.lr_decay
if self.method != 'sparseadam':
self.optimizer.param_groups[0]['lr'] = self.learning_rate
if self.max_grad_norm:
clip_grad_norm_(self.params, self.max_grad_norm)
self.optimizer.step()
示例13: step
# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import method [as 别名]
def step(self):
"""Update the model parameters based on current gradients.
Optionally, will employ gradient modification or update learning
rate.
"""
self._step += 1
# Decay method used in tensor2tensor.
if self.decay_method == "noam":
self._set_rate(
self.original_lr *
( self.model_size ** -0.5*min(self._step ** (-0.5),
self._step * self.warmup_steps**(-1.5))))
else:
if ((self.start_decay_steps is not None) and (
self._step >= self.start_decay_steps)):
self.start_decay = True
if self.start_decay:
if ((self._step - self.start_decay_steps)
% self.decay_steps == 0):
self.learning_rate = self.learning_rate * self.lr_decay
if self.method != 'sparseadam':
self.optimizer.param_groups[0]['lr'] = self.learning_rate
if self.max_grad_norm:
clip_grad_norm_(self.params, self.max_grad_norm)
self.optimizer.step()
示例14: step
# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import method [as 别名]
def step(self):
"""Update the model parameters based on current gradients.
Optionally, will employ gradient modification or update learning
rate.
"""
self._step += 1
# Decay method used in tensor2tensor.
if self.decay_method == "noam":
self._set_rate(
self.original_lr *
(self.model_size ** (-0.5) *
min(self._step ** (-0.5),
self._step * self.warmup_steps**(-1.5))))
# Decay based on start_decay_steps every decay_steps
else:
if ((self.start_decay_steps is not None) and (
self._step >= self.start_decay_steps)):
self.start_decay = True
if self.start_decay:
if ((self._step - self.start_decay_steps)
% self.decay_steps == 0):
self.learning_rate = self.learning_rate * self.lr_decay
if self.method != 'sparseadam':
self.optimizer.param_groups[0]['lr'] = self.learning_rate
if self.max_grad_norm:
clip_grad_norm_(self.params, self.max_grad_norm)
self.optimizer.step()
示例15: __init__
# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import method [as 别名]
def __init__(
self,
method,
learning_rate,
max_grad_norm,
lr_decay=1,
start_decay_steps=None,
decay_steps=None,
beta1=0.9,
beta2=0.999,
adagrad_accum=0.0,
decay_method=None,
warmup_steps=4000,
weight_decay=0,
):
self.last_ppl = None
self.learning_rate = learning_rate
self.original_lr = learning_rate
self.max_grad_norm = max_grad_norm
self.method = method
self.lr_decay = lr_decay
self.start_decay_steps = start_decay_steps
self.decay_steps = decay_steps
self.start_decay = False
self._step = 0
self.betas = [beta1, beta2]
self.adagrad_accum = adagrad_accum
self.decay_method = decay_method
self.warmup_steps = warmup_steps
self.weight_decay = weight_decay