本文整理汇总了Python中apex.fp16_utils.FP16_Optimizer方法的典型用法代码示例。如果您正苦于以下问题:Python fp16_utils.FP16_Optimizer方法的具体用法?Python fp16_utils.FP16_Optimizer怎么用?Python fp16_utils.FP16_Optimizer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类apex.fp16_utils
的用法示例。
在下文中一共展示了fp16_utils.FP16_Optimizer方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: optimizer_cpu_state_dict
# 需要导入模块: from apex import fp16_utils [as 别名]
# 或者: from apex.fp16_utils import FP16_Optimizer [as 别名]
def optimizer_cpu_state_dict(optimizer):
# save cuda RAM
optimizer_state_dict = optimizer.state_dict()
dict_value_to_cpu = lambda d: {k: v.cpu() if isinstance(v, torch.Tensor) else v
for k, v in d.items()}
if 'optimizer_state_dict' in optimizer_state_dict:
# FP16_Optimizer
cuda_state_dict = optimizer_state_dict['optimizer_state_dict']
else:
cuda_state_dict = optimizer_state_dict
if 'state' in cuda_state_dict:
cuda_state_dict['state'] = {k: dict_value_to_cpu(v)
for k, v in cuda_state_dict['state'].items()}
return optimizer_state_dict
示例2: create_lr_scheduler
# 需要导入模块: from apex import fp16_utils [as 别名]
# 或者: from apex.fp16_utils import FP16_Optimizer [as 别名]
def create_lr_scheduler(optimizer, lr_scheduler, **kwargs):
if not isinstance(optimizer, optim.Optimizer):
# assume FP16_Optimizer
optimizer = optimizer.optimizer
if lr_scheduler == 'plateau':
patience = kwargs.get('lr_scheduler_patience', 10) // kwargs.get('validation_interval', 1)
factor = kwargs.get('lr_scheduler_gamma', 0.1)
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=patience, factor=factor, eps=0)
elif lr_scheduler == 'step':
step_size = kwargs['lr_scheduler_step_size']
gamma = kwargs.get('lr_scheduler_gamma', 0.1)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
elif lr_scheduler == 'cos':
max_epochs = kwargs['max_epochs']
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, max_epochs)
elif lr_scheduler == 'milestones':
milestones = kwargs['lr_scheduler_milestones']
gamma = kwargs.get('lr_scheduler_gamma', 0.1)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma)
elif lr_scheduler == 'findlr':
max_steps = kwargs['max_steps']
lr_scheduler = FindLR(optimizer, max_steps)
elif lr_scheduler == 'noam':
warmup_steps = kwargs['lr_scheduler_warmup']
lr_scheduler = NoamLR(optimizer, warmup_steps=warmup_steps)
elif lr_scheduler == 'clr':
step_size = kwargs['lr_scheduler_step_size']
learning_rate = kwargs['learning_rate']
lr_scheduler_gamma = kwargs['lr_scheduler_gamma']
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
T_max=step_size,
eta_min=learning_rate * lr_scheduler_gamma)
else:
raise NotImplementedError("unknown lr_scheduler " + lr_scheduler)
return lr_scheduler
示例3: get_optimizer
# 需要导入模块: from apex import fp16_utils [as 别名]
# 或者: from apex.fp16_utils import FP16_Optimizer [as 别名]
def get_optimizer(obj):
'''
Apex introduces the FP16_optimizer object.
However this isn't really an optimizer, but only a wrapper around one.
This function returns the actual optimizer.
'''
if type(obj) == FP16_Optimizer:
return obj.optimizer
# If obj is not an FP16_Optimizer then we are not running in mixed precision
# and the passed object is already an actual optimizer
return obj
示例4: create_optimizer
# 需要导入模块: from apex import fp16_utils [as 别名]
# 或者: from apex.fp16_utils import FP16_Optimizer [as 别名]
def create_optimizer(net, name, learning_rate, weight_decay, momentum=0, fp16_loss_scale=None,
optimizer_state=None, device=None):
net.float()
use_fp16 = fp16_loss_scale is not None
if use_fp16:
from apex import fp16_utils
net = fp16_utils.network_to_half(net)
device = choose_device(device)
print('use', device)
if device.type == 'cuda':
net = torch.nn.DataParallel(net)
cudnn.benchmark = True
net = net.to(device)
# optimizer
parameters = [p for p in net.parameters() if p.requires_grad]
print('N of parameters', len(parameters))
if name == 'sgd':
optimizer = optim.SGD(parameters, lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
elif name == 'adamw':
from .adamw import AdamW
optimizer = AdamW(parameters, lr=learning_rate, weight_decay=weight_decay)
elif name == 'adam':
optimizer = optim.Adam(parameters, lr=learning_rate, weight_decay=weight_decay)
else:
raise NotImplementedError(name)
if use_fp16:
from apex import fp16_utils
if fp16_loss_scale == 0:
opt_args = dict(dynamic_loss_scale=True)
else:
opt_args = dict(static_loss_scale=fp16_loss_scale)
print('FP16_Optimizer', opt_args)
optimizer = fp16_utils.FP16_Optimizer(optimizer, **opt_args)
else:
optimizer.backward = lambda loss: loss.backward()
if optimizer_state:
if use_fp16 and 'optimizer_state_dict' not in optimizer_state:
# resume FP16_Optimizer.optimizer only
optimizer.optimizer.load_state_dict(optimizer_state)
elif use_fp16 and 'optimizer_state_dict' in optimizer_state:
# resume optimizer from FP16_Optimizer.optimizer
optimizer.load_state_dict(optimizer_state['optimizer_state_dict'])
else:
optimizer.load_state_dict(optimizer_state)
return net, optimizer
示例5: run
# 需要导入模块: from apex import fp16_utils [as 别名]
# 或者: from apex.fp16_utils import FP16_Optimizer [as 别名]
def run(self):
"""
Run XNLI training / evaluation.
"""
params = self.params
# load data
self.data = self.load_data()
if not self.data['dico'] == self._embedder.dico:
raise Exception(("Dictionary in evaluation data (%i words) seems different than the one " +
"in the pretrained model (%i words). Please verify you used the same dictionary, " +
"and the same values for max_vocab and min_count.") % (len(self.data['dico']), len(self._embedder.dico)))
# embedder
self.embedder = copy.deepcopy(self._embedder)
self.embedder.cuda()
# projection layer
self.proj = nn.Sequential(*[
nn.Dropout(params.dropout),
nn.Linear(self.embedder.out_dim, 3)
]).cuda()
# float16
if params.fp16:
assert torch.backends.cudnn.enabled
self.embedder.model = network_to_half(self.embedder.model)
self.proj = network_to_half(self.proj)
# optimizer
self.optimizer = get_optimizer(
list(self.embedder.get_parameters(params.finetune_layers)) +
list(self.proj.parameters()),
params.optimizer
)
if params.fp16:
self.optimizer = FP16_Optimizer(self.optimizer, dynamic_loss_scale=True)
# train and evaluate the model
for epoch in range(params.n_epochs):
# update epoch
self.epoch = epoch
# training
logger.info("XNLI - Training epoch %i ..." % epoch)
self.train()
# evaluation
logger.info("XNLI - Evaluating epoch %i ..." % epoch)
with torch.no_grad():
scores = self.eval()
self.scores.update(scores)
示例6: run
# 需要导入模块: from apex import fp16_utils [as 别名]
# 或者: from apex.fp16_utils import FP16_Optimizer [as 别名]
def run(self, task):
"""
Run GLUE training / evaluation.
"""
params = self.params
# task parameters
self.task = task
params.out_features = N_CLASSES[task]
self.is_classif = task != 'STS-B'
# load data
self.data = self.load_data(task)
if not self.data['dico'] == self._embedder.dico:
raise Exception(("Dictionary in evaluation data (%i words) seems different than the one " +
"in the pretrained model (%i words). Please verify you used the same dictionary, " +
"and the same values for max_vocab and min_count.") % (len(self.data['dico']), len(self._embedder.dico)))
# embedder
self.embedder = copy.deepcopy(self._embedder)
self.embedder.cuda()
# projection layer
self.proj = nn.Sequential(*[
nn.Dropout(params.dropout),
nn.Linear(self.embedder.out_dim, params.out_features)
]).cuda()
# float16
if params.fp16:
assert torch.backends.cudnn.enabled
self.embedder.model = network_to_half(self.embedder.model)
self.proj = network_to_half(self.proj)
# optimizer
self.optimizer = get_optimizer(
list(self.embedder.get_parameters(params.finetune_layers)) +
list(self.proj.parameters()),
params.optimizer
)
if params.fp16:
self.optimizer = FP16_Optimizer(self.optimizer, dynamic_loss_scale=True)
# train and evaluate the model
for epoch in range(params.n_epochs):
# update epoch
self.epoch = epoch
# training
logger.info("GLUE - %s - Training epoch %i ..." % (task, epoch))
self.train()
# evaluation
logger.info("GLUE - %s - Evaluating epoch %i ..." % (task, epoch))
with torch.no_grad():
scores = self.eval()
self.scores.update(scores)