本文整理汇总了Python中apex.optimizers.FusedAdam方法的典型用法代码示例。如果您正苦于以下问题:Python optimizers.FusedAdam方法的具体用法?Python optimizers.FusedAdam怎么用?Python optimizers.FusedAdam使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类apex.optimizers
的用法示例。
在下文中一共展示了optimizers.FusedAdam方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_fused_adam_class
# 需要导入模块: from apex import optimizers [as 别名]
# 或者: from apex.optimizers import FusedAdam [as 别名]
def get_fused_adam_class():
"""
Look for the FusedAdam optimizer from apex. We first try to load the
"contrib" interface, which is a bit faster than the main interface,
but is technically deprecated.
"""
try:
# The "deprecated" interface in recent versions of apex is a bit
# faster than the main interface, since we don't use the apex
# optimizer. This can be installed by passing the
# `--deprecated_fused_adam` option when building apex.
global fused_adam_cuda
import importlib
fused_adam_cuda = importlib.import_module("fused_adam_cuda")
return FusedAdamV1
except ImportError:
try:
# fallback to the newer interface
from apex.optimizers import FusedAdam as _FusedAdam # noqa
from apex.multi_tensor_apply import multi_tensor_applier
if multi_tensor_applier.available:
return FusedAdamV2
except ImportError:
pass
return None
示例2: __init__
# 需要导入模块: from apex import optimizers [as 别名]
# 或者: from apex.optimizers import FusedAdam [as 别名]
def __init__(self, params,
lr=1e-3, bias_correction=True,
betas=(0.9, 0.999), eps=1e-8, eps_inside_sqrt=False,
weight_decay=0., max_grad_norm=0., amsgrad=False):
global fused_adam_cuda
import importlib
fused_adam_cuda = importlib.import_module("fused_adam_cuda")
if amsgrad:
raise RuntimeError('FusedAdam does not support the AMSGrad variant.')
defaults = {
'lr': lr,
'bias_correction': bias_correction,
'betas': betas,
'eps': eps,
'weight_decay': weight_decay,
'max_grad_norm': max_grad_norm,
}
super().__init__(params, defaults)
self.eps_mode = 0 if eps_inside_sqrt else 1
示例3: main
# 需要导入模块: from apex import optimizers [as 别名]
# 或者: from apex.optimizers import FusedAdam [as 别名]
def main(step, args, model_state_dict, optimizer_state_dict):
#
# PART2
#
model = build_model(args).cuda()
one_ll = next(model.children()).weight
optimizer = FusedAdam(model.parameters())
ASP.init_model_for_pruning(model, args.pattern, verbosity=args.verbosity, whitelist=args.whitelist, allow_recompute_mask=args.allow_recompute_mask)
ASP.init_optimizer_for_pruning(optimizer)
torch.manual_seed(args.seed2)
model.load_state_dict(model_state_dict)
optimizer.load_state_dict(optimizer_state_dict)
print("Model sparsity is %s" % ("enabled" if ASP.sparsity_is_enabled() else "disabled"))
# train for a few steps with sparse weights
print("SPARSE :: ",one_ll)
step = train_loop(args, model, optimizer, step, args.num_sparse_steps_2)
示例4: create_optimizer
# 需要导入模块: from apex import optimizers [as 别名]
# 或者: from apex.optimizers import FusedAdam [as 别名]
def create_optimizer(model, learning_rate, t_total, loss_scale, fp16, warmup_proportion, state_dict):
# Prepare optimizer
param_optimizer = list(model.named_parameters())
no_decay = [
'bias', 'LayerNorm.bias', 'LayerNorm.weight',
'adapter.down_project.weight', 'adapter.up_project.weight',
]
optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
if fp16:
try:
from apex.optimizers import FP16_Optimizer
from apex.optimizers import FusedAdam
except ImportError:
raise ImportError("Please install apex from https://www.github.com/nvidia/apex "
"to use distributed and fp16 training.")
optimizer = FusedAdam(optimizer_grouped_parameters,
lr=learning_rate,
bias_correction=False,
max_grad_norm=1.0)
if loss_scale == 0:
optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
else:
optimizer = FP16_Optimizer(optimizer, static_loss_scale=loss_scale)
else:
optimizer = BertAdam(optimizer_grouped_parameters,
lr=learning_rate,
warmup=warmup_proportion,
t_total=t_total)
if state_dict is not None:
optimizer.load_state_dict(state_dict)
return optimizer
示例5: optimizer_from_name
# 需要导入模块: from apex import optimizers [as 别名]
# 或者: from apex.optimizers import FusedAdam [as 别名]
def optimizer_from_name(optim_name):
optim_name = optim_name.lower()
if optim_name == "sgd":
return optim.SGD
elif optim_name == "sgdw":
return SGDW
elif optim_name == "adam":
return partial(optim.Adam, eps=2e-5)
elif optim_name == "adamw":
return partial(AdamW_my, eps=2e-5)
elif optim_name == "adamw_gc":
# in this implementation eps in inside sqrt so it can be smaller
return partial(AdamW_my, center=True, eps=1e-7)
elif optim_name == "rmsprop":
# in this implementation eps in inside sqrt so it can be smaller
return partial(RMSprop, eps=1e-7)
elif optim_name == "radam":
return partial(RAdam, eps=2e-5)
elif optim_name in ["fused_sgd", "fusedsgd"]:
return FusedSGD
elif optim_name in ["fused_adam", "fusedadam"]:
return partial(FusedAdam, eps=2e-5)
elif optim_name in ["fused_novograd", "fusednovograd", "novograd"]:
return partial(FusedNovoGrad, eps=2e-5)
else:
raise ValueError(f"Optimizer {optim_name} not found")
示例6: main
# 需要导入模块: from apex import optimizers [as 别名]
# 或者: from apex.optimizers import FusedAdam [as 别名]
def main(args):
model = build_model(args).cuda()
one_ll = next(model.children()).weight
optimizer = FusedAdam(model.parameters())
# only prune linear layers, even though we also support conv1d, conv2d and conv3d
ASP.init_model_for_pruning(model, "m4n2_1d", whitelist=[torch.nn.Linear], allow_recompute_mask=True)
ASP.init_optimizer_for_pruning(optimizer)
step = 0
# train for a few steps with dense weights
print("DENSE :: ",one_ll)
step = train_loop(args, model, optimizer, step, args.num_dense_steps)
# simulate sparsity by inserting zeros into existing dense weights
ASP.compute_sparse_masks()
# train for a few steps with sparse weights
print("SPARSE :: ",one_ll)
step = train_loop(args, model, optimizer, step, args.num_sparse_steps)
# recompute sparse masks
ASP.compute_sparse_masks()
# train for a few steps with sparse weights
print("SPARSE :: ",one_ll)
step = train_loop(args, model, optimizer, step, args.num_sparse_steps_2)
# turn off sparsity
print("SPARSE :: ",one_ll)
ASP.restore_pruned_weights()
# train for a few steps with dense weights
print("DENSE :: ",one_ll)
step = train_loop(args, model, optimizer, step, args.num_dense_steps_2)
示例7: main
# 需要导入模块: from apex import optimizers [as 别名]
# 或者: from apex.optimizers import FusedAdam [as 别名]
def main(args):
#
# PART1
#
torch.manual_seed(args.seed)
model = build_model(args).cuda()
one_ll = next(model.children()).weight
optimizer = FusedAdam(model.parameters())
ASP.init_model_for_pruning(model, args.pattern, verbosity=args.verbosity, whitelist=args.whitelist, allow_recompute_mask=args.allow_recompute_mask)
ASP.init_optimizer_for_pruning(optimizer)
step = 0
# train for a few steps with dense weights
print("DENSE :: ",one_ll)
step = train_loop(args, model, optimizer, step, args.num_dense_steps)
# simulate sparsity by inserting zeros into existing dense weights
ASP.enable_sparsity()
# train for a few steps with sparse weights
print("SPARSE :: ",one_ll)
step = train_loop(args, model, optimizer, step, args.num_sparse_steps)
torch.save({
'step': step,
'verbosity': args.verbosity,
'seed2': args.seed2,
'pattern': args.pattern,
'whitelist': args.whitelist,
'allow_recompute_mask': args.allow_recompute_mask,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
}, args.checkpoint_path)
示例8: main
# 需要导入模块: from apex import optimizers [as 别名]
# 或者: from apex.optimizers import FusedAdam [as 别名]
def main(args):
#
# PART1
#
torch.manual_seed(args.seed)
model = build_model(args).cuda()
one_ll = next(model.children()).weight
optimizer = FusedAdam(model.parameters())
ASP.init_model_for_pruning(model, args.pattern, whitelist=args.whitelist, allow_recompute_mask=args.allow_recompute_mask)
ASP.init_optimizer_for_pruning(optimizer)
step = 0
# train for a few steps with dense weights
print("DENSE :: ",one_ll)
step = train_loop(args, model, optimizer, step, args.num_dense_steps)
# simulate sparsity by inserting zeros into existing dense weights
ASP.enable_sparsity()
# train for a few steps with sparse weights
print("SPARSE :: ",one_ll)
step = train_loop(args, model, optimizer, step, args.num_sparse_steps)
#
# PART 2
#
torch.manual_seed(args.seed2)
# train for a few steps with sparse weights
print("SPARSE :: ",one_ll)
step = train_loop(args, model, optimizer, step, args.num_sparse_steps_2)
示例9: main
# 需要导入模块: from apex import optimizers [as 别名]
# 或者: from apex.optimizers import FusedAdam [as 别名]
def main():
args = parseArgs()
pyprof.nvtx.init()
# pyprof.nvtx.wrap(fused_adam_cuda, 'adam')
N = args.b
C = 3
H = d[args.m]['H']
W = d[args.m]['W']
opts = d[args.m]['opts']
classes = 1000
net = getattr(models, args.m)
net = net(**opts).cuda().half()
net.train()
x = torch.rand(N, C, H, W).cuda().half()
target = torch.empty(N, dtype=torch.long).random_(classes).cuda()
criterion = nn.CrossEntropyLoss().cuda()
if (args.o == "sgd"):
optimizer = torch.optim.SGD(net.parameters(), lr = 0.01, momentum=0.9)
elif (args.o == "adam"):
optimizer = FusedAdam(net.parameters())
else:
assert False
#Warm up without profiler
for i in range(2):
output = net(x)
loss = criterion(output, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.autograd.profiler.emit_nvtx():
profiler.start()
output = net(x)
loss = criterion(output, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
profiler.stop()
示例10: set_model
# 需要导入模块: from apex import optimizers [as 别名]
# 或者: from apex.optimizers import FusedAdam [as 别名]
def set_model(self):
print('[Runner] - Initializing Transformer model...')
# build the Transformer model with speech prediction head
model_config = TransformerConfig(self.config)
self.dr = model_config.downsample_rate
self.hidden_size = model_config.hidden_size
self.model = TransformerForMaskedAcousticModel(model_config, self.input_dim, self.output_dim).to(self.device)
self.model.train()
if self.args.multi_gpu:
self.model = torch.nn.DataParallel(self.model)
print('[Runner] - Multi-GPU training Enabled: ' + str(torch.cuda.device_count()))
print('[Runner] - Number of parameters: ' + str(sum(p.numel() for p in self.model.parameters() if p.requires_grad)))
# Setup optimizer
param_optimizer = list(self.model.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
if self.apex:
try:
from apex.optimizers import FP16_Optimizer
from apex.optimizers import FusedAdam
except ImportError:
raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")
optimizer = FusedAdam(optimizer_grouped_parameters,
lr=self.learning_rate,
bias_correction=False,
max_grad_norm=1.0)
if self.config['optimizer']['loss_scale'] == 0:
self.optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
else:
self.optimizer = FP16_Optimizer(optimizer, static_loss_scale=self.config['optimizer']['loss_scale'])
self.warmup_linear = WarmupLinearSchedule(warmup=self.warmup_proportion,
t_total=self.total_steps)
else:
self.optimizer = BertAdam(optimizer_grouped_parameters,
lr=self.learning_rate,
warmup=self.warmup_proportion,
t_total=self.total_steps)
开发者ID:andi611,项目名称:Self-Supervised-Speech-Pretraining-and-Representation-Learning,代码行数:49,代码来源:runner.py