本文整理汇总了Python中apex.parallel.DistributedDataParallel方法的典型用法代码示例。如果您正苦于以下问题:Python parallel.DistributedDataParallel方法的具体用法?Python parallel.DistributedDataParallel怎么用?Python parallel.DistributedDataParallel使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类apex.parallel
的用法示例。
在下文中一共展示了parallel.DistributedDataParallel方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run_training
# 需要导入模块: from apex import parallel [as 别名]
# 或者: from apex.parallel import DistributedDataParallel [as 别名]
def run_training(self):
"""
if we run with -c then we need to set the correct lr for the first epoch, otherwise it will run the first
continued epoch with self.initial_lr
we also need to make sure deep supervision in the network is enabled for training, thus the wrapper
:return:
"""
self.maybe_update_lr(self.epoch) # if we dont overwrite epoch then self.epoch+1 is used which is not what we
# want at the start of the training
if isinstance(self.network, DDP):
net = self.network.module
else:
net = self.network
ds = net.do_ds
net.do_ds = True
ret = nnUNetTrainer.run_training(self)
net.do_ds = ds
return ret
示例2: validate
# 需要导入模块: from apex import parallel [as 别名]
# 或者: from apex.parallel import DistributedDataParallel [as 别名]
def validate(self, do_mirroring: bool = True, use_sliding_window: bool = True, step_size: float = 0.5,
save_softmax: bool = True, use_gaussian: bool = True, overwrite: bool = True,
validation_folder_name: str = 'validation_raw', debug: bool = False, all_in_gpu: bool = False,
force_separate_z: bool = None, interpolation_order: int = 3, interpolation_order_z=0):
if self.local_rank == 0:
if isinstance(self.network, DDP):
net = self.network.module
else:
net = self.network
ds = net.do_ds
net.do_ds = False
ret = nnUNetTrainer.validate(self, do_mirroring, use_sliding_window, step_size, save_softmax, use_gaussian,
overwrite, validation_folder_name, debug, all_in_gpu,
force_separate_z=force_separate_z, interpolation_order=interpolation_order,
interpolation_order_z=interpolation_order_z)
net.do_ds = ds
return ret
示例3: check_ddp_wrapped
# 需要导入模块: from apex import parallel [as 别名]
# 或者: from apex.parallel import DistributedDataParallel [as 别名]
def check_ddp_wrapped(model: nn.Module) -> bool:
"""
Checks whether model is wrapped with DataParallel/DistributedDataParallel.
"""
parallel_wrappers = nn.DataParallel, nn.parallel.DistributedDataParallel
# Check whether Apex is installed and if it is,
# add Apex's DistributedDataParallel to list of checked types
try:
from apex.parallel import DistributedDataParallel as apex_DDP
parallel_wrappers = parallel_wrappers + (apex_DDP,)
except ImportError:
pass
return isinstance(model, parallel_wrappers)
示例4: __init__
# 需要导入模块: from apex import parallel [as 别名]
# 或者: from apex.parallel import DistributedDataParallel [as 别名]
def __init__(self, system_config, model, distributed=False, gpu=None):
super(NetworkFactory, self).__init__()
self.system_config = system_config
self.gpu = gpu
self.model = DummyModule(model)
self.loss = model.loss
self.network = Network(self.model, self.loss)
if distributed:
from apex.parallel import DistributedDataParallel, convert_syncbn_model
torch.cuda.set_device(gpu)
self.network = self.network.cuda(gpu)
self.network = convert_syncbn_model(self.network)
self.network = DistributedDataParallel(self.network)
else:
self.network = DataParallel(self.network, chunk_sizes=system_config.chunk_sizes)
total_params = 0
for params in self.model.parameters():
num_params = 1
for x in params.size():
num_params *= x
total_params += num_params
print("\033[0;35m " + "total parameters: {}".format(
total_params) + "\033[0m")
if system_config.opt_algo == "adam":
self.optimizer = torch.optim.Adam(
filter(lambda p: p.requires_grad, self.model.parameters())
)
elif system_config.opt_algo == "sgd":
self.optimizer = torch.optim.SGD(
filter(lambda p: p.requires_grad, self.model.parameters()),
lr=system_config.learning_rate,
momentum=0.9, weight_decay=0.0001
)
else:
raise ValueError("unknown optimizer")
示例5: distribute_model_object
# 需要导入模块: from apex import parallel [as 别名]
# 或者: from apex.parallel import DistributedDataParallel [as 别名]
def distribute_model_object(self, mdl):
args = self.args
if args.apex_distributed:
#TODO: try delay_allreduce=True
from apex.parallel import DistributedDataParallel as ApexDDP
mdl = ApexDDP(mdl, delay_allreduce=True)
else:
mdl = DDP(mdl, device_ids=[args.gpu], output_device=args.gpu)
return mdl
示例6: stage_model
# 需要导入模块: from apex import parallel [as 别名]
# 或者: from apex.parallel import DistributedDataParallel [as 别名]
def stage_model(model, fp16, device, local_rank, n_gpu):
if fp16:
model.half()
model.to(device)
if local_rank != -1:
try:
from apex.parallel import DistributedDataParallel as DDP
except ImportError:
raise ImportError("Please install apex from https://www.github.com/nvidia/apex "
"to use distributed and fp16 training.")
model = DDP(model)
elif n_gpu > 1:
model = torch.nn.DataParallel(model)
return model
示例7: __init__
# 需要导入模块: from apex import parallel [as 别名]
# 或者: from apex.parallel import DistributedDataParallel [as 别名]
def __init__(self, system_config, model, distributed=False, gpu=None):
super(NetworkFactory, self).__init__()
self.system_config = system_config
self.gpu = gpu
self.model = DummyModule(model)
self.loss = model.loss
self.network = Network(self.model, self.loss)
if distributed:
from apex.parallel import DistributedDataParallel, convert_syncbn_model
torch.cuda.set_device(gpu)
self.network = self.network.cuda(gpu)
self.network = convert_syncbn_model(self.network)
self.network = DistributedDataParallel(self.network)
else:
self.network = DataParallel(self.network, chunk_sizes=system_config.chunk_sizes)
total_params = 0
for params in self.model.parameters():
num_params = 1
for x in params.size():
num_params *= x
total_params += num_params
print("total parameters: {}".format(total_params))
if system_config.opt_algo == "adam":
self.optimizer = torch.optim.Adam(
filter(lambda p: p.requires_grad, self.model.parameters())
)
elif system_config.opt_algo == "sgd":
self.optimizer = torch.optim.SGD(
filter(lambda p: p.requires_grad, self.model.parameters()),
lr=system_config.learning_rate,
momentum=0.9, weight_decay=0.0001
)
else:
raise ValueError("unknown optimizer")
示例8: main
# 需要导入模块: from apex import parallel [as 别名]
# 或者: from apex.parallel import DistributedDataParallel [as 别名]
def main(cfgs):
Logger.init(**cfgs['logger'])
local_rank = cfgs['local_rank']
world_size = int(os.environ['WORLD_SIZE'])
Log.info('rank: {}, world_size: {}'.format(local_rank, world_size))
log_dir = cfgs['log_dir']
pth_dir = cfgs['pth_dir']
if local_rank == 0:
assure_dir(log_dir)
assure_dir(pth_dir)
aux_config = cfgs.get('auxiliary', None)
network = ModuleBuilder(cfgs['network'], aux_config).cuda()
criterion = build_criterion(cfgs['criterion'], aux_config).cuda()
optimizer = optim.SGD(network.parameters(), **cfgs['optimizer'])
scheduler = PolyLRScheduler(optimizer, **cfgs['scheduler'])
dataset = build_dataset(**cfgs['dataset'], **cfgs['transforms'])
sampler = DistributedSampler4Iter(dataset, world_size=world_size,
rank=local_rank, **cfgs['sampler'])
train_loader = DataLoader(dataset, sampler=sampler, **cfgs['loader'])
cudnn.benchmark = True
torch.manual_seed(666)
torch.cuda.manual_seed(666)
torch.cuda.set_device(local_rank)
dist.init_process_group(backend='nccl', init_method='env://')
model = DistributedDataParallel(network)
model = apex.parallel.convert_syncbn_model(model)
torch.cuda.empty_cache()
train(local_rank, world_size, pth_dir, cfgs['frequency'], criterion,
train_loader, model, optimizer, scheduler)
示例9: init_distributed
# 需要导入模块: from apex import parallel [as 别名]
# 或者: from apex.parallel import DistributedDataParallel [as 别名]
def init_distributed(self, rank, local_rank):
assert not self.distribured_enabled
self.distribured_enabled = True
print("Initializing Distributed, rank {}, local rank {}".format(rank, local_rank))
dist.init_process_group(backend='nccl', rank=rank)
torch.cuda.set_device(local_rank)
self.core = DistributedDataParallel(self.core)
示例10: predict_preprocessed_data_return_seg_and_softmax
# 需要导入模块: from apex import parallel [as 别名]
# 或者: from apex.parallel import DistributedDataParallel [as 别名]
def predict_preprocessed_data_return_seg_and_softmax(self, data: np.ndarray, do_mirroring: bool = True,
mirror_axes: Tuple[int] = None,
use_sliding_window: bool = True,
step_size: float = 0.5, use_gaussian: bool = True,
pad_border_mode: str = 'constant', pad_kwargs: dict = None,
all_in_gpu: bool = True,
verbose: bool = True) -> Tuple[np.ndarray, np.ndarray]:
if pad_border_mode == 'constant' and pad_kwargs is None:
pad_kwargs = {'constant_values': 0}
if do_mirroring and mirror_axes is None:
mirror_axes = self.data_aug_params['mirror_axes']
if do_mirroring:
assert self.data_aug_params["do_mirror"], "Cannot do mirroring as test time augmentation when training " \
"was done without mirroring"
valid = list((SegmentationNetwork, nn.DataParallel, DDP))
assert isinstance(self.network, tuple(valid))
if isinstance(self.network, DDP):
net = self.network.module
else:
net = self.network
ds = net.do_ds
net.do_ds = False
ret = net.predict_3D(data, do_mirroring, mirror_axes, use_sliding_window, step_size, self.patch_size,
self.regions_class_order, use_gaussian, pad_border_mode, pad_kwargs,
all_in_gpu, verbose)
net.do_ds = ds
return ret
示例11: set_model_dist
# 需要导入模块: from apex import parallel [as 别名]
# 或者: from apex.parallel import DistributedDataParallel [as 别名]
def set_model_dist(net):
if has_apex:
net = parallel.DistributedDataParallel(net, delay_allreduce=True)
else:
local_rank = dist.get_rank()
net = nn.parallel.DistributedDataParallel(
net,
device_ids=[local_rank, ],
output_device=local_rank)
return net
示例12: model_multi_gpu
# 需要导入模块: from apex import parallel [as 别名]
# 或者: from apex.parallel import DistributedDataParallel [as 别名]
def model_multi_gpu(model, multi_gpu=False):
if multi_gpu:
model = DDP(model)
print('DDP(model)')
return model
示例13: __init__
# 需要导入模块: from apex import parallel [as 别名]
# 或者: from apex.parallel import DistributedDataParallel [as 别名]
def __init__(self, args, arch, loss, pretrained_weights=None, state=None, cuda=True, fp16=False, distributed=False):
super(ModelAndLoss, self).__init__()
self.arch = arch
self.mask = None
print("=> creating model '{}'".format(arch))
model = models.build_resnet(arch[0], arch[1])
if pretrained_weights is not None:
print("=> using pre-trained model from a file '{}'".format(arch))
model.load_state_dict(pretrained_weights)
if cuda:
model = model.cuda()
if fp16:
model = network_to_half(model)
if distributed:
model = DDP(model)
if not state is None:
model.load_state_dict(state)
# define loss function (criterion) and optimizer
criterion = loss()
if cuda:
criterion = criterion.cuda()
self.model = model
self.loss = criterion
示例14: __init__
# 需要导入模块: from apex import parallel [as 别名]
# 或者: from apex.parallel import DistributedDataParallel [as 别名]
def __init__(self, cfg, weights: Union[str, Dict[str, Any]]):
self.start_iter = 0
self.max_iter = cfg.SOLVER.MAX_ITER
self.cfg = cfg
# We do not make any super call here and implement `__init__` from
# `DefaultTrainer`: we need to initialize mixed precision model before
# wrapping to DDP, so we need to do it this way.
model = self.build_model(cfg)
optimizer = self.build_optimizer(cfg, model)
data_loader = self.build_train_loader(cfg)
scheduler = self.build_lr_scheduler(cfg, optimizer)
# Load pre-trained weights before wrapping to DDP because `ApexDDP` has
# some weird issue with `DetectionCheckpointer`.
# fmt: off
if isinstance(weights, str):
# weights are ``str`` means ImageNet init or resume training.
self.start_iter = (
DetectionCheckpointer(
model, optimizer=optimizer, scheduler=scheduler
).resume_or_load(weights, resume=True).get("iteration", -1) + 1
)
elif isinstance(weights, dict):
# weights are a state dict means our pretrain init.
DetectionCheckpointer(model)._load_model(weights)
# fmt: on
# Enable distributed training if we have multiple GPUs. Use Apex DDP for
# non-FPN backbones because its `delay_allreduce` functionality helps with
# gradient checkpointing.
if dist.get_world_size() > 1:
if global_cfg.get("GRADIENT_CHECKPOINT", False):
model = ApexDDP(model, delay_allreduce=True)
else:
model = nn.parallel.DistributedDataParallel(
model, device_ids=[dist.get_rank()], broadcast_buffers=False
)
# Call `__init__` from grandparent class: `SimpleTrainer`.
SimpleTrainer.__init__(self, model, data_loader, optimizer)
self.scheduler = scheduler
self.checkpointer = DetectionCheckpointer(
model, cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=self.scheduler
)
self.register_hooks(self.build_hooks())
示例15: load_checkpoint_ram
# 需要导入模块: from apex import parallel [as 别名]
# 或者: from apex.parallel import DistributedDataParallel [as 别名]
def load_checkpoint_ram(self, saved_model, train=True):
"""
used for if the checkpoint is already in ram
:param saved_model:
:param train:
:return:
"""
if not self.was_initialized:
self.initialize(train)
new_state_dict = OrderedDict()
curr_state_dict_keys = list(self.network.state_dict().keys())
# if state dict comes form nn.DataParallel but we use non-parallel model here then the state dict keys do not
# match. Use heuristic to make it match
for k, value in saved_model['state_dict'].items():
key = k
if key not in curr_state_dict_keys:
print("duh")
key = key[7:]
new_state_dict[key] = value
# if we are fp16, then we need to reinitialize the network and the optimizer. Otherwise amp will throw an error
if self.fp16:
self.network, self.optimizer, self.lr_scheduler = None, None, None
self.initialize_network()
self.initialize_optimizer_and_scheduler()
# we need to reinitialize DDP here
self.network = DDP(self.network)
self.network.load_state_dict(new_state_dict)
self.epoch = saved_model['epoch']
if train:
optimizer_state_dict = saved_model['optimizer_state_dict']
if optimizer_state_dict is not None:
self.optimizer.load_state_dict(optimizer_state_dict)
if self.lr_scheduler is not None and hasattr(self.lr_scheduler, 'load_state_dict') and saved_model[
'lr_scheduler_state_dict'] is not None:
self.lr_scheduler.load_state_dict(saved_model['lr_scheduler_state_dict'])
if issubclass(self.lr_scheduler.__class__, _LRScheduler):
self.lr_scheduler.step(self.epoch)
self.all_tr_losses, self.all_val_losses, self.all_val_losses_tr_mode, self.all_val_eval_metrics = saved_model[
'plot_stuff']
# after the training is done, the epoch is incremented one more time in my old code. This results in
# self.epoch = 1001 for old trained models when the epoch is actually 1000. This causes issues because
# len(self.all_tr_losses) = 1000 and the plot function will fail. We can easily detect and correct that here
if self.epoch != len(self.all_tr_losses):
self.print_to_log_file("WARNING in loading checkpoint: self.epoch != len(self.all_tr_losses). This is "
"due to an old bug and should only appear when you are loading old models. New "
"models should have this fixed! self.epoch is now set to len(self.all_tr_losses)")
self.epoch = len(self.all_tr_losses)
self.all_tr_losses = self.all_tr_losses[:self.epoch]
self.all_val_losses = self.all_val_losses[:self.epoch]
self.all_val_losses_tr_mode = self.all_val_losses_tr_mode[:self.epoch]
self.all_val_eval_metrics = self.all_val_eval_metrics[:self.epoch]
self.amp_initialized = False
self._maybe_init_amp()