本文整理汇总了Python中fairseq.optim.build_optimizer方法的典型用法代码示例。如果您正苦于以下问题:Python optim.build_optimizer方法的具体用法?Python optim.build_optimizer怎么用?Python optim.build_optimizer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类fairseq.optim
的用法示例。
在下文中一共展示了optim.build_optimizer方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _build_optimizer
# 需要导入模块: from fairseq import optim [as 别名]
# 或者: from fairseq.optim import build_optimizer [as 别名]
def _build_optimizer(self):
# create FP32 copy of parameters and grads
params = [p for p in self.model.parameters() if p.requires_grad]
total_param_size = sum(p.data.numel() for p in params)
self.fp32_params = params[0].new(0).float().new(total_param_size)
offset = 0
for p in params:
numel = p.data.numel()
self.fp32_params[offset:offset+numel].copy_(p.data.view(-1))
offset += numel
self.fp32_params = torch.nn.Parameter(self.fp32_params)
self.fp32_params.grad = self.fp32_params.data.new(total_param_size)
# create optimizer using the copied FP32 params
self.optimizer = optim.build_optimizer(self.args, [self.fp32_params])
self.lr_scheduler = lr_scheduler.build_lr_scheduler(self.args, self.optimizer)
示例2: build_optimizer
# 需要导入模块: from fairseq import optim [as 别名]
# 或者: from fairseq.optim import build_optimizer [as 别名]
def build_optimizer(cls, args, params):
"""
Args:
args (argparse.Namespace): fairseq args
params (iterable): iterable of parameters to optimize
"""
flatten = not getattr(args, 'fp16_no_flatten_grads', False)
if getattr(args, 'bf16', False):
flatten = False # mixed precision is faster on TPUs without flat grads
fp32_params = cls.build_fp32_params(params, flatten=flatten)
if flatten:
fp32_optimizer = optim.build_optimizer(args, [fp32_params])
else:
fp32_optimizer = optim.build_optimizer(args, fp32_params)
if flatten and not fp32_optimizer.supports_flat_params:
raise RuntimeError(
'chosen optimizer does not support flat params, '
'please set --fp16-no-flatten-grads'
)
return cls(args, params, fp32_optimizer, fp32_params)
示例3: _build_optimizer
# 需要导入模块: from fairseq import optim [as 别名]
# 或者: from fairseq.optim import build_optimizer [as 别名]
def _build_optimizer(self):
# create FP32 copy of parameters and grads
params = [p for p in self.model.parameters() if p.requires_grad]
total_param_size = sum(p.data.numel() for p in params)
self.fp32_params = params[0].new(0).float().new(total_param_size)
offset = 0
for p in params:
numel = p.data.numel()
self.fp32_params[offset:offset+numel].copy_(p.data.view(-1))
offset += numel
self.fp32_params = torch.nn.Parameter(self.fp32_params)
#self.fp32_params.grad = self.fp32_params.data.new(total_param_size)
# create optimizer using the copied FP32 params
self._optimizer = optim.build_optimizer(self.args, [self.fp32_params])
self.lr_scheduler = lr_scheduler.build_lr_scheduler(self.args, self.optimizer)
示例4: load_checkpoint
# 需要导入模块: from fairseq import optim [as 别名]
# 或者: from fairseq.optim import build_optimizer [as 别名]
def load_checkpoint(self, filename):
"""Load all training state from a checkpoint file."""
extra_state, self._optim_history, last_optim_state = utils.load_model_state(
filename, self.model, cuda_device=torch.cuda.current_device())
if last_optim_state is not None:
# rebuild optimizer after loading model, since params may have changed
self.optimizer = optim.build_optimizer(self.args, self.model.parameters())
self.lr_scheduler = lr_scheduler.build_lr_scheduler(self.args, self.optimizer)
# only reload optimizer and lr_scheduler if they match
last_optim = self._optim_history[-1]
if last_optim['criterion_name'] == self.criterion.__class__.__name__:
self.lr_scheduler.load_state_dict(last_optim['lr_scheduler_state'])
if last_optim['optimizer_name'] == self.optimizer.__class__.__name__:
self.optimizer.load_state_dict(last_optim_state)
self._num_updates = last_optim['num_updates']
return extra_state
示例5: build_optimizer
# 需要导入模块: from fairseq import optim [as 别名]
# 或者: from fairseq.optim import build_optimizer [as 别名]
def build_optimizer(cls, args, params):
"""
Args:
args (argparse.Namespace): fairseq args
params (iterable): iterable of parameters to optimize
"""
flatten = not getattr(args, 'fp16_no_flatten_grads', False)
fp32_params = cls.build_fp32_params(params, flatten=flatten)
if flatten:
fp32_optimizer = optim.build_optimizer(args, [fp32_params])
else:
fp32_optimizer = optim.build_optimizer(args, fp32_params)
if flatten and not fp32_optimizer.supports_flat_params:
raise RuntimeError(
'chosen optimizer does not support flat params, '
'please set --fp16-no-flatten-grads'
)
return cls(args, params, fp32_optimizer, fp32_params)
示例6: _build_optimizer
# 需要导入模块: from fairseq import optim [as 别名]
# 或者: from fairseq.optim import build_optimizer [as 别名]
def _build_optimizer(self):
params = list(filter(lambda p: p.requires_grad, self.model.parameters()))
if self.args.fp16:
if self.cuda and torch.cuda.get_device_capability(0)[0] < 7:
print('| WARNING: your device does NOT support faster training with --fp16, '
'please switch to FP32 which is likely to be faster')
if self.args.memory_efficient_fp16:
self._optimizer = optim.MemoryEfficientFP16Optimizer.build_optimizer(self.args, params)
else:
self._optimizer = optim.FP16Optimizer.build_optimizer(self.args, params)
else:
if self.cuda and torch.cuda.get_device_capability(0)[0] >= 7:
print('| NOTICE: your device may support faster training with --fp16')
self._optimizer = optim.build_optimizer(self.args, params)
# We should initialize the learning rate scheduler immediately after
# building the optimizer, so that the initial learning rate is set.
self._lr_scheduler = lr_scheduler.build_lr_scheduler(self.args, self.optimizer)
示例7: build_optimizer
# 需要导入模块: from fairseq import optim [as 别名]
# 或者: from fairseq.optim import build_optimizer [as 别名]
def build_optimizer(cls, args, params):
"""
Args:
args (argparse.Namespace): fairseq args
params (iterable): iterable of parameters to optimize
"""
# create FP32 copy of parameters and grads
total_param_size = sum(p.data.numel() for p in params)
fp32_params = params[0].new(0).float().new(total_param_size)
offset = 0
for p in params:
numel = p.data.numel()
fp32_params[offset:offset+numel].copy_(p.data.view(-1))
offset += numel
fp32_params = torch.nn.Parameter(fp32_params)
fp32_params.grad = fp32_params.data.new(total_param_size)
fp32_optimizer = optim.build_optimizer(args, [fp32_params])
return cls(args, params, fp32_optimizer, fp32_params)
示例8: _build_optimizer
# 需要导入模块: from fairseq import optim [as 别名]
# 或者: from fairseq.optim import build_optimizer [as 别名]
def _build_optimizer(self):
self.optimizer = optim.build_optimizer(self.args, self.model.parameters())
self.lr_scheduler = lr_scheduler.build_lr_scheduler(self.args, self.optimizer)
示例9: _build_optimizer
# 需要导入模块: from fairseq import optim [as 别名]
# 或者: from fairseq.optim import build_optimizer [as 别名]
def _build_optimizer(self):
params = list(
filter(
lambda p: p.requires_grad,
chain(self.model.parameters(), self.criterion.parameters()),
)
)
if self.args.fp16 or self.args.bf16:
if self.cuda and torch.cuda.get_device_capability(0)[0] < 7:
logger.info(
"NOTE: your device does NOT support faster training with --fp16, "
"please switch to FP32 which is likely to be faster"
)
if self.args.memory_efficient_fp16 or self.args.memory_efficient_bf16:
self._optimizer = optim.MemoryEfficientFP16Optimizer.build_optimizer(
self.args, params
)
else:
self._optimizer = optim.FP16Optimizer.build_optimizer(self.args, params)
else:
if self.cuda and torch.cuda.get_device_capability(0)[0] >= 7:
logger.info("NOTE: your device may support faster training with --fp16")
self._optimizer = optim.build_optimizer(self.args, params)
if self.args.use_bmuf:
self._optimizer = optim.FairseqBMUF(self.args, self._optimizer)
# We should initialize the learning rate scheduler immediately after
# building the optimizer, so that the initial learning rate is set.
self._lr_scheduler = lr_scheduler.build_lr_scheduler(self.args, self.optimizer)
self._lr_scheduler.step_update(0)
示例10: _build_optimizer
# 需要导入模块: from fairseq import optim [as 别名]
# 或者: from fairseq.optim import build_optimizer [as 别名]
def _build_optimizer(self, model):
if self.args.fp16:
if torch.cuda.get_device_capability(0)[0] < 7:
print(
"| WARNING: your device does NOT support faster training "
"with --fp16, please switch to FP32 which is likely to be"
" faster"
)
params = list(filter(lambda p: p.requires_grad, model.parameters()))
self._optimizer = optim.FP16Optimizer.build_optimizer(self.args, params)
else:
if torch.cuda.get_device_capability(0)[0] >= 7:
print("| NOTICE: your device may support faster training with --fp16")
self._optimizer = optim.build_optimizer(self.args, model.parameters())
return self._optimizer
示例11: _build_optimizer
# 需要导入模块: from fairseq import optim [as 别名]
# 或者: from fairseq.optim import build_optimizer [as 别名]
def _build_optimizer(self):
self._optimizer = optim.build_optimizer(self.args, self.model.parameters())
self.lr_scheduler = lr_scheduler.build_lr_scheduler(self.args, self._optimizer)
示例12: __init__
# 需要导入模块: from fairseq import optim [as 别名]
# 或者: from fairseq.optim import build_optimizer [as 别名]
def __init__(self, args, model, criterion):
if not torch.cuda.is_available():
raise NotImplementedError('Training on CPU is not supported')
self.args = args
# copy model and criterion to current device
self.model = model.cuda()
self.criterion = criterion.cuda()
# initialize optimizer and LR scheduler
self.optimizer = optim.build_optimizer(self.args, self.model.parameters())
self.lr_scheduler = lr_scheduler.build_lr_scheduler(self.args, self.optimizer)
# initialize meters
self.meters = OrderedDict()
self.meters['train_loss'] = AverageMeter()
self.meters['train_nll_loss'] = AverageMeter()
self.meters['valid_loss'] = AverageMeter()
self.meters['valid_nll_loss'] = AverageMeter()
self.meters['wps'] = TimeMeter() # words per second
self.meters['ups'] = TimeMeter() # updates per second
self.meters['wpb'] = AverageMeter() # words per batch
self.meters['bsz'] = AverageMeter() # sentences per batch
self.meters['gnorm'] = AverageMeter() # gradient norm
self.meters['clip'] = AverageMeter() # % of updates clipped
self.meters['oom'] = AverageMeter() # out of memory
self._max_bsz_seen = 0
self._num_updates = 0
示例13: _build_optimizer
# 需要导入模块: from fairseq import optim [as 别名]
# 或者: from fairseq.optim import build_optimizer [as 别名]
def _build_optimizer(self):
params = list(
filter(
lambda p: p.requires_grad,
chain(self.model.parameters(), self.criterion.parameters()),
)
)
if self.args.fp16:
if self.cuda and torch.cuda.get_device_capability(0)[0] < 7:
logger.info(
"NOTE: your device does NOT support faster training with --fp16, "
"please switch to FP32 which is likely to be faster"
)
if self.args.memory_efficient_fp16:
self._optimizer = optim.MemoryEfficientFP16Optimizer.build_optimizer(
self.args, params
)
else:
self._optimizer = optim.FP16Optimizer.build_optimizer(self.args, params)
else:
if self.cuda and torch.cuda.get_device_capability(0)[0] >= 7:
logger.info("NOTE: your device may support faster training with --fp16")
self._optimizer = optim.build_optimizer(self.args, params)
if self.args.use_bmuf:
self._optimizer = optim.FairseqBMUF(self.args, self._optimizer)
# We should initialize the learning rate scheduler immediately after
# building the optimizer, so that the initial learning rate is set.
self._lr_scheduler = lr_scheduler.build_lr_scheduler(self.args, self.optimizer)
self._lr_scheduler.step_update(0)