本文整理汇总了Python中apex.optimizers.FP16_Optimizer方法的典型用法代码示例。如果您正苦于以下问题:Python optimizers.FP16_Optimizer方法的具体用法?Python optimizers.FP16_Optimizer怎么用?Python optimizers.FP16_Optimizer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类apex.optimizers
的用法示例。
在下文中一共展示了optimizers.FP16_Optimizer方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: state_dict
# 需要导入模块: from apex import optimizers [as 别名]
# 或者: from apex.optimizers import FP16_Optimizer [as 别名]
def state_dict(self):
"""
Returns a dict containing the current state of this :class:`FP16_Optimizer` instance.
This dict contains attributes of :class:`FP16_Optimizer`, as well as the state_dict
of the contained Pytorch optimizer.
Example::
checkpoint = {}
checkpoint['model'] = model.state_dict()
checkpoint['optimizer'] = optimizer.state_dict()
torch.save(checkpoint, "saved.pth")
"""
state_dict = {}
state_dict['dynamic_loss_scale'] = self.dynamic_loss_scale
state_dict['cur_scale'] = self.cur_scale
state_dict['cur_iter'] = self.cur_iter
if state_dict['dynamic_loss_scale']:
state_dict['last_overflow_iter'] = self.last_overflow_iter
state_dict['scale_factor'] = self.scale_factor
state_dict['scale_window'] = self.scale_window
state_dict['optimizer_state_dict'] = self.optimizer.state_dict()
state_dict['fp32_groups_flat'] = self.fp32_groups_flat
return state_dict
示例2: _set_lr_scheduler
# 需要导入模块: from apex import optimizers [as 别名]
# 或者: from apex.optimizers import FP16_Optimizer [as 别名]
def _set_lr_scheduler(self, model):
lr_scheduler = self.config["lr_scheduler"]
lr_scheduler_config = self.config["lr_scheduler_config"]
# Create warmup scheduler for first warmup_steps warmup_units if applicable
self._set_warmup_scheduler(model)
optimizer_to_config = self.optimizer
# If using half precision, configure the underlying
# optimizer of FP16_Optimizer
if model.config["fp16"]:
optimizer_to_config = self.optimizer.optimizer
# Create regular lr scheduler for use after warmup
if lr_scheduler is None:
lr_scheduler = None
else:
lr_scheduler_config = self.config["lr_scheduler_config"]
if lr_scheduler == "linear":
total_steps = self.batches_per_epoch * self.config["n_epochs"]
cooldown_steps = total_steps - self.warmup_steps
linear_cooldown_func = lambda x: (cooldown_steps - x) / cooldown_steps
lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
optimizer_to_config, linear_cooldown_func
)
elif lr_scheduler == "exponential":
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(
optimizer_to_config, **lr_scheduler_config["exponential_config"]
)
elif lr_scheduler == "reduce_on_plateau":
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer_to_config,
min_lr=lr_scheduler_config["min_lr"],
**lr_scheduler_config["plateau_config"],
)
else:
raise ValueError(
f"Did not recognize lr_scheduler option '{lr_scheduler}'"
)
self.lr_scheduler = lr_scheduler
示例3: create_optimizer
# 需要导入模块: from apex import optimizers [as 别名]
# 或者: from apex.optimizers import FP16_Optimizer [as 别名]
def create_optimizer(model, learning_rate, t_total, loss_scale, fp16, warmup_proportion, state_dict):
# Prepare optimizer
param_optimizer = list(model.named_parameters())
no_decay = [
'bias', 'LayerNorm.bias', 'LayerNorm.weight',
'adapter.down_project.weight', 'adapter.up_project.weight',
]
optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
if fp16:
try:
from apex.optimizers import FP16_Optimizer
from apex.optimizers import FusedAdam
except ImportError:
raise ImportError("Please install apex from https://www.github.com/nvidia/apex "
"to use distributed and fp16 training.")
optimizer = FusedAdam(optimizer_grouped_parameters,
lr=learning_rate,
bias_correction=False,
max_grad_norm=1.0)
if loss_scale == 0:
optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
else:
optimizer = FP16_Optimizer(optimizer, static_loss_scale=loss_scale)
else:
optimizer = BertAdam(optimizer_grouped_parameters,
lr=learning_rate,
warmup=warmup_proportion,
t_total=t_total)
if state_dict is not None:
optimizer.load_state_dict(state_dict)
return optimizer
示例4: load_state_dict
# 需要导入模块: from apex import optimizers [as 别名]
# 或者: from apex.optimizers import FP16_Optimizer [as 别名]
def load_state_dict(self, state_dict):
"""
Loads a state_dict created by an earlier call to state_dict().
If ``fp16_optimizer_instance`` was constructed from some ``init_optimizer``,
whose parameters in turn came from ``model``, it is expected that the user
will call ``model.load_state_dict()`` before
``fp16_optimizer_instance.load_state_dict()`` is called.
Example::
model = torch.nn.Linear(D_in, D_out).cuda().half()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
optimizer = FP16_Optimizer(optimizer, static_loss_scale = 128.0)
...
checkpoint = torch.load("saved.pth")
model.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
"""
# I think it should actually be ok to reload the optimizer before the model.
self.dynamic_loss_scale = state_dict['dynamic_loss_scale']
self.cur_scale = state_dict['cur_scale']
self.cur_iter = state_dict['cur_iter']
if state_dict['dynamic_loss_scale']:
self.last_overflow_iter = state_dict['last_overflow_iter']
self.scale_factor = state_dict['scale_factor']
self.scale_window = state_dict['scale_window']
self.optimizer.load_state_dict(state_dict['optimizer_state_dict'])
# At this point, the optimizer's references to the model's fp32 parameters are up to date.
# The optimizer's hyperparameters and internal buffers are also up to date.
# However, the fp32 master copies of the model's fp16 params stored by the optimizer are still
# out of date. There are two options.
# 1: Refresh the master params from the model's fp16 params.
# This requires less storage but incurs precision loss.
# 2: Save and restore the fp32 master copies separately.
# We choose option 2.
#
# Pytorch Optimizer.load_state_dict casts saved buffers (e.g. momentum) to the type and device
# of their associated parameters, because it's possible those buffers might not exist yet in
# the current optimizer instance. In our case, as long as the current FP16_Optimizer has been
# constructed in the same way as the one whose state_dict we are loading, the same master params
# are guaranteed to exist, so we can just copy_() from the saved master params.
for current, saved in zip(self.fp32_groups_flat, state_dict['fp32_groups_flat']):
current.data.copy_(saved.data)
示例5: set_model
# 需要导入模块: from apex import optimizers [as 别名]
# 或者: from apex.optimizers import FP16_Optimizer [as 别名]
def set_model(self):
print('[Runner] - Initializing Transformer model...')
# build the Transformer model with speech prediction head
model_config = TransformerConfig(self.config)
self.dr = model_config.downsample_rate
self.hidden_size = model_config.hidden_size
self.model = TransformerForMaskedAcousticModel(model_config, self.input_dim, self.output_dim).to(self.device)
self.model.train()
if self.args.multi_gpu:
self.model = torch.nn.DataParallel(self.model)
print('[Runner] - Multi-GPU training Enabled: ' + str(torch.cuda.device_count()))
print('[Runner] - Number of parameters: ' + str(sum(p.numel() for p in self.model.parameters() if p.requires_grad)))
# Setup optimizer
param_optimizer = list(self.model.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
if self.apex:
try:
from apex.optimizers import FP16_Optimizer
from apex.optimizers import FusedAdam
except ImportError:
raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")
optimizer = FusedAdam(optimizer_grouped_parameters,
lr=self.learning_rate,
bias_correction=False,
max_grad_norm=1.0)
if self.config['optimizer']['loss_scale'] == 0:
self.optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
else:
self.optimizer = FP16_Optimizer(optimizer, static_loss_scale=self.config['optimizer']['loss_scale'])
self.warmup_linear = WarmupLinearSchedule(warmup=self.warmup_proportion,
t_total=self.total_steps)
else:
self.optimizer = BertAdam(optimizer_grouped_parameters,
lr=self.learning_rate,
warmup=self.warmup_proportion,
t_total=self.total_steps)
开发者ID:andi611,项目名称:Self-Supervised-Speech-Pretraining-and-Representation-Learning,代码行数:49,代码来源:runner.py