本文整理汇总了Python中apex.amp方法的典型用法代码示例。如果您正苦于以下问题:Python apex.amp方法的具体用法?Python apex.amp怎么用?Python apex.amp使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类apex
的用法示例。
在下文中一共展示了apex.amp方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import apex [as 别名]
# 或者: from apex import amp [as 别名]
def __init__(self, params):
super().__init__(params)
if 'aux' in params:
self.aux_weights = params['aux']['weights']
else:
self.aux_weights = None
self.use_amp = not config.kernel and 'amp' in params
if self.use_amp:
from apex import amp
self.amp = amp
self.nn_module, self.optimizer = self.amp.initialize(
self.nn_module, self.optimizer,
opt_level=params['amp']['opt_level'],
keep_batchnorm_fp32=params['amp']['keep_batchnorm_fp32'],
loss_scale=params['amp']['loss_scale']
)
示例2: single_gpu_train
# 需要导入模块: import apex [as 别名]
# 或者: from apex import amp [as 别名]
def single_gpu_train(self, model):
# call setup
self.setup('fit')
if self.is_function_implemented('setup', model):
model.setup('fit')
model.cuda(self.root_gpu)
# CHOOSE OPTIMIZER
# allow for lr schedulers as well
self.optimizers, self.lr_schedulers, self.optimizer_frequencies = self.init_optimizers(model)
# TODO: remove with dropping NVIDIA AMP support
if self.use_amp and not NATIVE_AMP_AVALAIBLE:
# An example
model, optimizers = model.configure_apex(amp, model, self.optimizers, self.amp_level)
self.optimizers = optimizers
self.reinit_scheduler_properties(self.optimizers, self.lr_schedulers)
self.run_pretrain_routine(model)
示例3: train_step
# 需要导入模块: import apex [as 别名]
# 或者: from apex import amp [as 别名]
def train_step(self, batch)-> dict:
if not self.nn_module.training:
self.nn_module.train()
self.optimizer.zero_grad()
input, target, noisy = self.prepare_batch(batch, self.device)
prediction = self.nn_module(input)
if self.aux_weights is not None:
loss = 0
for pred, weight in zip(prediction, self.aux_weights):
loss += self.loss(pred, target, noisy) * weight
else:
loss = self.loss(prediction, target, noisy)
if self.use_amp:
with self.amp.scale_loss(loss, self.optimizer) as scaled_loss:
scaled_loss.backward()
else:
loss.backward()
self.optimizer.step()
prediction = deep_detach(prediction)
target = deep_detach(target)
return {
'prediction': self.prediction_transform(prediction[0]),
'target': target,
'loss': loss.item(),
'noisy': noisy
}
示例4: set_amp_args
# 需要导入模块: import apex [as 别名]
# 或者: from apex import amp [as 别名]
def set_amp_args(self, amp_args: Optional[Dict[str, Any]]):
"""Disable / enable apex.amp and set the automatic mixed precision parameters.
apex.amp can be utilized for mixed / half precision training.
Args:
amp_args: Dictionary containing arguments to be passed to
amp.initialize. Set to None to disable amp. To enable mixed
precision training, pass amp_args={"opt_level": "O1"} here.
See https://nvidia.github.io/apex/amp.html for more info.
Raises:
RuntimeError: If opt_level is not None and apex is not installed.
Warning: apex needs to be installed to utilize this feature.
"""
self.amp_args = amp_args
if amp_args is None:
logging.info(f"AMP disabled")
else:
if not apex_available:
raise RuntimeError("apex is not installed, cannot enable amp")
logging.info(f"AMP enabled with args {amp_args}")
return self
示例5: get_classy_state
# 需要导入模块: import apex [as 别名]
# 或者: from apex import amp [as 别名]
def get_classy_state(self, deep_copy: bool = False):
"""Returns serialiable state of task
Args:
deep_copy: If true, does a deep copy of state before returning.
"""
optimizer_state = {}
if self.optimizer is not None:
optimizer_state = self.optimizer.get_classy_state()
classy_state_dict = {
"train": self.train,
"base_model": self.base_model.get_classy_state(),
"meters": [meter.get_classy_state() for meter in self.meters],
"optimizer": optimizer_state,
"phase_idx": self.phase_idx,
"train_phase_idx": self.train_phase_idx,
"num_updates": self.num_updates,
"losses": self.losses,
"hooks": {hook.name(): hook.get_classy_state() for hook in self.hooks},
"loss": {},
}
if "train" in self.datasets and self._is_checkpointable_dataset(
self.datasets["train"]
):
classy_state_dict["train_dataset_iterator"] = self.datasets[
"train"
].get_classy_state()
if isinstance(self.loss, ClassyLoss):
classy_state_dict["loss"] = self.loss.get_classy_state()
if self.amp_args is not None:
classy_state_dict["amp"] = apex.amp.state_dict()
if deep_copy:
classy_state_dict = copy.deepcopy(classy_state_dict)
return classy_state_dict
示例6: _check_fp16
# 需要导入模块: import apex [as 别名]
# 或者: from apex import amp [as 别名]
def _check_fp16():
if amp is None:
raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
if not torch.backends.cudnn.enabled:
raise RuntimeError("Amp requires cudnn backend to be enabled.")
示例7: evaluate_corpus_bleu
# 需要导入模块: import apex [as 别名]
# 或者: from apex import amp [as 别名]
def evaluate_corpus_bleu(args, early_stopping=True, stop_no=1000):
args.batch_size = 1
train_iter, FR, EN, train_length = load_dataloaders(args)
src_vocab = len(EN.vocab)
trg_vocab = len(FR.vocab)
cuda = torch.cuda.is_available()
if args.fp16:
from apex import amp
else:
amp = None
net, _, _, _, _, _ = load_model_and_optimizer(args, src_vocab, \
trg_vocab, cuda, amp=amp)
net.eval()
trg_init = FR.vocab.stoi["<sos>"]
trg_init = Variable(torch.LongTensor([trg_init])).unsqueeze(0)
logger.info("Evaluating corpus bleu...")
refs = []; hyps = []
with torch.no_grad():
for i, data in tqdm(enumerate(train_iter), total=len(train_iter)):
trg_input = trg_init
labels = data.FR[:,1:].contiguous().view(-1)
src_mask, trg_mask = create_masks(data.EN, trg_input)
if cuda:
data.EN = data.EN.cuda(); trg_input = trg_input.cuda(); labels = labels.cuda()
src_mask = src_mask.cuda(); trg_mask = trg_mask.cuda()
stepwise_translated_words, final_step_words = net(data.EN, trg_input, src_mask, None,\
infer=True, trg_vocab_obj=FR)
refs.append([stepwise_translated_words]) # need to remove <eos> tokens
hyps.append([FR.vocab.itos[i] for i in labels[:-1]])
if early_stopping and ((i + 1) % stop_no == 0):
print(refs); print(hyps)
break
score = calculate_bleu(refs, hyps, corpus_level=True)
print("Corpus bleu score: %.5f" % score)
return score
示例8: set_classy_state
# 需要导入模块: import apex [as 别名]
# 或者: from apex import amp [as 别名]
def set_classy_state(self, state):
"""Set task state
Args:
state: Dict containing state of a task
"""
# some settings are different in test only
self.train = False if self.test_only else state["train"]
if not self.test_only:
self.phase_idx = state["phase_idx"]
self.num_updates = state["num_updates"]
self.train_phase_idx = state["train_phase_idx"]
self.losses = state["losses"]
for meter, meter_state in zip(self.meters, state["meters"]):
meter.set_classy_state(meter_state)
self.base_model.set_classy_state(state["base_model"])
if self.optimizer is not None:
self.optimizer.set_classy_state(state["optimizer"])
if state.get("loss") and isinstance(self.loss, ClassyLoss):
self.loss.set_classy_state(state["loss"])
if "amp" in state:
apex.amp.load_state_dict(state["amp"])
for hook in self.hooks:
# we still want to be able to run when new hooks are added or old
# hooks are removed
if hook.name() in state["hooks"]:
hook.set_classy_state(state["hooks"][hook.name()])
else:
logging.warn(f"No state found for hook: {hook.name()}")
if "train" in self.datasets and self._is_checkpointable_dataset(
self.datasets["train"]
):
self.datasets["train"].set_classy_state(state.get("train_dataset_iterator"))
# TODO (mannatsingh): Figure out how to set the state of the dataloaders
# Re-build dataloader & re-create iterator.
self._recreate_data_loader_from_dataset()
self.create_data_iterator()
# Set up pytorch module in train vs eval mode, update optimizer.
self._set_model_train_mode()
示例9: train_step
# 需要导入模块: import apex [as 别名]
# 或者: from apex import amp [as 别名]
def train_step(self):
"""Train step to be executed in train loop."""
self.last_batch = None
# Process next sample
sample = next(self.get_data_iterator())
assert isinstance(sample, dict) and "input" in sample and "target" in sample, (
f"Returned sample [{sample}] is not a map with 'input' and"
+ "'target' keys"
)
# Copy sample to GPU
target = sample["target"]
if self.use_gpu:
sample = recursive_copy_to_gpu(sample, non_blocking=True)
if self.mixup_transform is not None:
sample = self.mixup_transform(sample)
with torch.enable_grad():
# Forward pass
output = self.model(sample["input"])
local_loss = self.compute_loss(output, sample)
loss = local_loss.detach().clone()
self.losses.append(loss.data.cpu().item() * target.size(0))
self.update_meters(output, sample)
# Run backwards pass / update optimizer
if self.amp_args is not None:
self.optimizer.zero_grad()
with apex.amp.scale_loss(
local_loss, self.optimizer.optimizer
) as scaled_loss:
scaled_loss.backward()
else:
self.optimizer.backward(local_loss)
self.check_inf_nan(loss)
self.optimizer.update_schedule_on_step(self.where)
self.optimizer.step()
self.num_updates += self.get_global_batchsize()
# Move some data to the task so hooks get a chance to access it
self.last_batch = LastBatchInfo(
loss=loss, output=output, target=target, sample=sample
)
示例10: __init__
# 需要导入模块: import apex [as 别名]
# 或者: from apex import amp [as 别名]
def __init__(self, args=None):
if args is None:
self.args = load_pickle("args.pkl")
else:
self.args = args
self.cuda = torch.cuda.is_available()
self.args.batch_size = 1
if self.args.model_no != 1:
logger.info("Loading tokenizer and model...")
self.tokenizer_en = tokener(args.src_lang)
train_iter, FR, EN, train_length = load_dataloaders(self.args)
self.FR = FR
self.EN = EN
self.train_iter = train_iter
self.train_length = train_length
self.src_vocab = len(EN.vocab)
self.trg_vocab = len(FR.vocab)
if self.args.fp16:
from apex import amp
else:
amp = None
self.amp = amp
net, _, _, _, _, _ = load_model_and_optimizer(self.args, self.src_vocab, \
self.trg_vocab, self.cuda, amp=amp)
self.net = net
self.net.eval()
trg_init = FR.vocab.stoi["<sos>"]
self.trg_init = Variable(torch.LongTensor([trg_init])).unsqueeze(0)
elif self.args.model_no == 1:
from .mass.interactive import Translator
src, tgt = "zh-en".split('-')
logger.info("Loading translator, tokenizer...")
self.translator = Translator(data_path='./data/data-bin/processed_data_%s_%s' % (src, tgt),\
checkpoint_path="./data/checkpoints/%s_%s/checkpoint50.pt" % (src, tgt),\
task='translation',\
user_dir='',\
s=src, t=tgt,\
langs='%s,%s' % (src, tgt),\
mt_steps='%s-%s' % (src, tgt),\
source_langs=src,\
target_langs=tgt,\
beam=5,\
use_cuda=args.cuda)
示例11: call_optimizer_step
# 需要导入模块: import apex [as 别名]
# 或者: from apex import amp [as 别名]
def call_optimizer_step(self, optimizer, opt_idx, batch_idx, split_batch):
# calls .step(), .zero_grad()
# override function to modify this behavior
model = self.get_model()
with self.profiler.profile('optimizer_step'):
lambda_closure = lambda: self.optimizer_closure(
split_batch,
batch_idx,
opt_idx,
optimizer,
self.hiddens
).loss
# apply TPU optimizer
if self.use_tpu and XLA_AVAILABLE:
model.optimizer_step(self.current_epoch, batch_idx,
optimizer, opt_idx, lambda_closure, on_tpu=True)
# for LBFGS do something a bit different
elif isinstance(optimizer, torch.optim.LBFGS):
# native amp + lbfgs is a no go right now
if self.use_amp and NATIVE_AMP_AVALAIBLE:
raise MisconfigurationException(
'native PyTorch amp and lbfgs are not compatible.'
' To request, please file a Github issue in PyTorch and tag @mcarilli')
model.optimizer_step(self.current_epoch, batch_idx, optimizer, opt_idx, lambda_closure,
using_lbfgs=True)
# when using 16-bit
else:
native_amp = self.use_amp and NATIVE_AMP_AVALAIBLE
model.optimizer_step(self.current_epoch, batch_idx, optimizer, opt_idx, lambda_closure,
using_native_amp=native_amp)
# in native 16-bit we need to update scaler after optimizer step
if self.use_amp and NATIVE_AMP_AVALAIBLE:
self.scaler.update()
# model hook
model.on_before_zero_grad(optimizer)
# clear gradients
model.optimizer_zero_grad(self.current_epoch, batch_idx, optimizer, opt_idx)
示例12: dp_train
# 需要导入模块: import apex [as 别名]
# 或者: from apex import amp [as 别名]
def dp_train(self, model):
# call setup after the ddp process has connected
self.setup('fit')
if self.is_function_implemented('setup', model):
model.setup('fit')
# CHOOSE OPTIMIZER
# allow for lr schedulers as well
self.optimizers, self.lr_schedulers, self.optimizer_frequencies = self.init_optimizers(model)
model.cuda(self.root_gpu)
# hack forward to do autocast for the user
model_autocast_original_forward = model.forward
if self.use_amp and NATIVE_AMP_AVALAIBLE:
# wrap the user's forward in autocast and give it back at the end
model.forward = torch.cuda.amp.autocast()(model.forward)
# TODO: remove with dropping NVIDIA AMP support
# check for this bug (amp + dp + !01 doesn't work)
# https://github.com/NVIDIA/apex/issues/227
if self.use_dp and self.use_amp and not NATIVE_AMP_AVALAIBLE:
if self.amp_level == 'O2':
raise MisconfigurationException(
f'Amp level {self.amp_level} with DataParallel is not supported.'
f' See this note from NVIDIA for more info: https://github.com/NVIDIA/apex/issues/227.'
f' We recommend you switch to ddp if you want to use amp')
else:
model, optimizers = model.configure_apex(amp, model, self.optimizers, self.amp_level)
self.reinit_scheduler_properties(optimizers, self.lr_schedulers)
# create list of device ids
device_ids = self.data_parallel_device_ids
if isinstance(device_ids, int):
device_ids = list(range(device_ids))
# set dp device
torch.cuda.set_device(self.root_gpu)
model = LightningDataParallel(model, device_ids=device_ids)
self.run_pretrain_routine(model)
model.forward = model_autocast_original_forward
示例13: horovod_train
# 需要导入模块: import apex [as 别名]
# 或者: from apex import amp [as 别名]
def horovod_train(self, model):
# call setup after the ddp process has connected
self.setup('fit')
if self.is_function_implemented('setup', model):
model.setup('fit')
if torch.cuda.is_available() and self.on_gpu:
# Horovod: pin GPU to local rank
assert self.root_gpu == hvd.local_rank()
torch.cuda.set_device(self.root_gpu)
model.cuda(self.root_gpu)
# avoid duplicating progress bar
if hvd.rank() != 0 and self.progress_bar_callback is not None:
self.progress_bar_callback.disable()
# CHOOSE OPTIMIZER
# allow for lr schedulers as well
self.optimizers, self.lr_schedulers, self.optimizer_frequencies = self.init_optimizers(model)
# Horovod: scale the learning rate by the number of workers to account for
# increased total batch size
for optimizer in self.optimizers:
for param_group in optimizer.param_groups:
param_group['lr'] *= hvd.size()
if self.use_amp:
# An example
model, optimizers = model.configure_apex(amp, model, self.optimizers, self.amp_level)
self.optimizers = optimizers
self.reinit_scheduler_properties(self.optimizers, self.lr_schedulers)
# Horovod: broadcast parameters & optimizer state to ensure consistent initialization
hvd.broadcast_parameters(model.state_dict(), root_rank=0)
for optimizer in self.optimizers:
hvd.broadcast_optimizer_state(optimizer, root_rank=0)
def filter_named_parameters(model, optimizer):
opt_params = set([p for group in optimizer.param_groups for p in group.get('params', [])])
return [(name, p) for name, p in model.named_parameters() if p in opt_params]
# Horovod: wrap optimizers to perform gradient aggregation via allreduce
self.optimizers = [
hvd.DistributedOptimizer(optimizer, named_parameters=filter_named_parameters(model, optimizer))
for optimizer in self.optimizers
]
# Update logger rank info from Horovod to avoid race conditions from different ranks
# creating directories / writing files in the same locations.
self.global_rank = hvd.rank()
rank_zero_only.rank = self.global_rank
with ExitStack() as stack:
for optimizer in self.optimizers:
# Synchronization will be performed explicitly following backward()
stack.enter_context(optimizer.skip_synchronize())
self.run_pretrain_routine(model)
# Make sure all workers have finished training before returning to the user
hvd.join()