本文整理汇总了Python中torch.utils.data.distributed方法的典型用法代码示例。如果您正苦于以下问题:Python data.distributed方法的具体用法?Python data.distributed怎么用?Python data.distributed使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.utils.data
的用法示例。
在下文中一共展示了data.distributed方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _get_train_data_loader
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import distributed [as 别名]
def _get_train_data_loader(training_dir, is_distributed, batch_size, **kwargs):
logger.info("Get train data loader")
dataset = datasets.MNIST(
training_dir,
train=True,
transform=transforms.Compose(
[transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
),
download=False, # True sets a dependency on an external site for our canaries.
)
train_sampler = (
torch.utils.data.distributed.DistributedSampler(dataset) if is_distributed else None
)
train_loader = torch.utils.data.DataLoader(
dataset,
batch_size=batch_size,
shuffle=train_sampler is None,
sampler=train_sampler,
**kwargs
)
return train_sampler, train_loader
示例2: distributed_predict
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import distributed [as 别名]
def distributed_predict(input, target, model, criterion):
# Allows distributed prediction on uneven batches. Test set isn't always large enough for every GPU to get a batch
batch_size = input.size(0)
output = loss = corr1 = corr5 = valid_batches = 0
if batch_size:
with torch.no_grad():
output = model(input)
loss = criterion(output, target).data
# measure accuracy and record loss
valid_batches = 1
corr1, corr5 = correct(output.data, target, topk=(1, 5))
metrics = torch.tensor([batch_size, valid_batches, loss, corr1, corr5]).float().cuda()
batch_total, valid_batches, reduced_loss, corr1, corr5 = dist_utils.sum_tensor(metrics).cpu().numpy()
reduced_loss = reduced_loss/valid_batches
top1 = corr1*(100.0/batch_total)
top5 = corr5*(100.0/batch_total)
return top1, top5, reduced_loss, batch_total
示例3: get_parser
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import distributed [as 别名]
def get_parser():
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
parser.add_argument('data', metavar='DIR', help='path to dataset')
parser.add_argument('--save-dir', type=str, default=Path.home()/'imagenet_training',
help='Directory to save logs and models.')
parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18',
choices=model_names, help='model architecture'),
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
help='number of data loading workers (default: 4)')
parser.add_argument('-b', '--batch-size', default=256, type=int,
metavar='N', help='mini-batch size (default: 256)')
parser.add_argument('--fp16', action='store_true', help='Run model fp16 mode.')
parser.add_argument('--dist-url', default='file://sync.file', type=str,
help='url used to set up distributed training')
parser.add_argument('--dist-backend', default='nccl', type=str, help='distributed backend')
parser.add_argument('--world-size', default=1, type=int,
help='Number of GPUs to use. Can either be manually set ' +
'or automatically set by using \'python -m multiproc\'.')
parser.add_argument('--rank', default=0, type=int,
help='Used for multi-process training. Can either be manually set ' +
'or automatically set by using \'python -m multiproc\'.')
return parser
示例4: main
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import distributed [as 别名]
def main():
if cfg.gpu is not None:
warnings.warn('You have chosen a specific GPU. This will completely '
'disable data parallelism.')
if cfg.dist_url == "env://" and cfg.world_size == -1:
cfg.world_size = int(os.environ["WORLD_SIZE"])
cfg.distributed = cfg.world_size > 1 or cfg.multiprocessing_distributed
ngpus_per_node = torch.cuda.device_count()
if cfg.multiprocessing_distributed:
# Since we have ngpus_per_node processes per node, the total world_size
# needs to be adjusted accordingly
cfg.world_size = ngpus_per_node * cfg.world_size
# Use torch.multiprocessing.spawn to launch distributed processes: the
# main_worker process function
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, cfg))
else:
# Simply call main_worker function
main_worker(cfg.gpu, ngpus_per_node, cfg)
示例5: get_args
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import distributed [as 别名]
def get_args():
parent_parser = ArgumentParser(add_help=False)
parent_parser.add_argument('--data-path', metavar='DIR', type=str,
help='path to dataset')
parent_parser.add_argument('--save-path', metavar='DIR', default=".", type=str,
help='path to save output')
parent_parser.add_argument('--gpus', type=int, default=1,
help='how many gpus')
parent_parser.add_argument('--distributed-backend', type=str, default='dp', choices=('dp', 'ddp', 'ddp2'),
help='supports three options dp, ddp, ddp2')
parent_parser.add_argument('--use-16bit', dest='use_16bit', action='store_true',
help='if true uses 16 bit precision')
parent_parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
help='evaluate model on validation set')
parser = ImageNetLightningModel.add_model_specific_args(parent_parser)
return parser.parse_args()
示例6: initialize_model
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import distributed [as 别名]
def initialize_model(
arch: str, lr: float, momentum: float, weight_decay: float, device_id: int
):
print(f"=> creating model: {arch}")
model = models.__dict__[arch]()
# For multiprocessing distributed, DistributedDataParallel constructor
# should always set the single device scope, otherwise,
# DistributedDataParallel will use all available devices.
model.cuda(device_id)
cudnn.benchmark = True
model = DistributedDataParallel(model, device_ids=[device_id])
# define loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss().cuda(device_id)
optimizer = SGD(
model.parameters(), lr, momentum=momentum, weight_decay=weight_decay
)
return model, criterion, optimizer
示例7: get_train_loader
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import distributed [as 别名]
def get_train_loader(data_path, batch_size, workers=5, _worker_init_fn=None):
traindir = os.path.join(data_path, 'train')
train_dataset = datasets.ImageFolder(
traindir,
transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
#transforms.ToTensor(), Too slow
#normalize,
]))
if torch.distributed.is_initialized():
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
else:
train_sampler = None
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=batch_size, shuffle=(train_sampler is None),
num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, sampler=train_sampler, collate_fn=fast_collate, drop_last=True)
return train_loader
示例8: get_val_step
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import distributed [as 别名]
def get_val_step(model_and_loss):
def _step(input, target):
input_var = Variable(input)
target_var = Variable(target)
with torch.no_grad():
loss, output = model_and_loss(input_var, target_var)
prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
if torch.distributed.is_initialized():
reduced_loss = reduce_tensor(loss.data)
prec1 = reduce_tensor(prec1)
prec5 = reduce_tensor(prec5)
else:
reduced_loss = loss.data
torch.cuda.synchronize()
return reduced_loss, prec1, prec5
return _step
示例9: _get_train_data_loader
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import distributed [as 别名]
def _get_train_data_loader(batch_size, training_dir, is_distributed, **kwargs):
logger.info("Get train data loader")
dataset = datasets.MNIST(training_dir, train=True, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
]))
train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) if is_distributed else None
return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=train_sampler is None,
sampler=train_sampler, **kwargs)
示例10: main
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import distributed [as 别名]
def main():
args = parser.parse_args()
if(not os.path.exists(os.path.join(args.save_folder, args.dataset, args.network))):
os.makedirs(os.path.join(args.save_folder, args.dataset, args.network))
if args.seed is not None:
random.seed(args.seed)
torch.manual_seed(args.seed)
cudnn.deterministic = True
warnings.warn('You have chosen to seed training. '
'This will turn on the CUDNN deterministic setting, '
'which can slow down your training considerably! '
'You may see unexpected behavior when restarting '
'from checkpoints.')
if args.gpu is not None:
warnings.warn('You have chosen a specific GPU. This will completely '
'disable data parallelism.')
os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '12355'
os.environ['WORLD_SIZE'] = '2'
if args.dist_url == "env://" and args.world_size == -1:
args.world_size = int(os.environ["WORLD_SIZE"])
args.distributed = args.world_size > 1 or args.multiprocessing_distributed
ngpus_per_node = torch.cuda.device_count()
if args.multiprocessing_distributed:
# Since we have ngpus_per_node processes per node, the total world_size
# needs to be adjusted accordingly
args.world_size = ngpus_per_node * args.world_size
# Use torch.multiprocessing.spawn to launch distributed processes: the
# main_worker process function
mp.spawn(main_worker, nprocs=ngpus_per_node,
args=(ngpus_per_node, args))
else:
# Simply call main_worker function
main_worker(args.gpu, ngpus_per_node, args)
示例11: save_checkpoint
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import distributed [as 别名]
def save_checkpoint(epoch):
if hvd.rank() == 0:
filepath = args.checkpoint_format.format(epoch=epoch + 1)
state = {
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
}
torch.save(state, filepath)
# Horovod: average metrics from distributed training.
示例12: main
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import distributed [as 别名]
def main():
args = parser.parse_args()
if args.seed is not None:
random.seed(args.seed)
torch.manual_seed(args.seed)
if not args.cpu:
cudnn.deterministic = True
warnings.warn('You have chosen to seed training. '
'This will turn on the CUDNN deterministic setting, '
'which can slow down your training considerably! '
'You may see unexpected behavior when restarting '
'from checkpoints.')
if args.gpu is not None:
warnings.warn('You have chosen a specific GPU. This will completely '
'disable data parallelism.')
if args.dist_url == "env://" and args.world_size == -1:
args.world_size = int(os.environ["WORLD_SIZE"])
args.distributed = args.world_size > 1 or args.multiprocessing_distributed
ngpus_per_node = torch.cuda.device_count()
if args.multiprocessing_distributed:
# Since we have ngpus_per_node processes per node, the total world_size
# needs to be adjusted accordingly
args.world_size = ngpus_per_node * args.world_size
# Use torch.multiprocessing.spawn to launch distributed processes: the
# main_worker process function
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
else:
# Simply call main_worker function
main_worker(args.gpu, ngpus_per_node, args)
示例13: main
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import distributed [as 别名]
def main():
if args.seed is not None:
random.seed(args.seed)
torch.manual_seed(args.seed)
cudnn.deterministic = True
warnings.warn('You have chosen to seed training. '
'This will turn on the CUDNN deterministic setting, '
'which can slow down your training considerably! '
'You may see unexpected behavior when restarting '
'from checkpoints.')
if args.gpu is not None:
warnings.warn('You have chosen a specific GPU. This will completely '
'disable data parallelism.')
if args.dist_url == "env://" and args.world_size == -1:
args.world_size = int(os.environ["WORLD_SIZE"])
args.distributed = args.world_size > 1 or args.multiprocessing_distributed
ngpus_per_node = torch.cuda.device_count()
if args.multiprocessing_distributed:
# Since we have ngpus_per_node processes per node, the total world_size
# needs to be adjusted accordingly
args.world_size = ngpus_per_node * args.world_size
# Use torch.multiprocessing.spawn to launch distributed processes: the
# main_worker process function
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
else:
# Simply call main_worker function
main_worker(args.gpu, ngpus_per_node, args)
示例14: validate
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import distributed [as 别名]
def validate(val_loader, model, criterion, epoch, start_time):
timer = TimeMeter()
losses = AverageMeter()
top1 = AverageMeter()
top5 = AverageMeter()
model.eval()
eval_start_time = time.time()
for i,(input,target) in enumerate(val_loader):
if args.short_epoch and (i > 10): break
batch_num = i+1
timer.batch_start()
if args.distributed:
top1acc, top5acc, loss, batch_total = distributed_predict(input, target, model, criterion)
else:
with torch.no_grad():
output = model(input)
loss = criterion(output, target).data
batch_total = input.size(0)
top1acc, top5acc = accuracy(output.data, target, topk=(1,5))
# Eval batch done. Logging results
timer.batch_end()
losses.update(to_python_float(loss), to_python_float(batch_total))
top1.update(to_python_float(top1acc), to_python_float(batch_total))
top5.update(to_python_float(top5acc), to_python_float(batch_total))
should_print = (batch_num%args.print_freq == 0) or (batch_num==len(val_loader))
if args.local_rank == 0 and should_print:
output = (f'Test: [{epoch}][{batch_num}/{len(val_loader)}]\t'
f'Time {timer.batch_time.val:.3f} ({timer.batch_time.avg:.3f})\t'
f'Loss {losses.val:.4f} ({losses.avg:.4f})\t'
f'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
f'Acc@5 {top5.val:.3f} ({top5.avg:.3f})')
log.verbose(output)
tb.log_eval(top1.avg, top5.avg, time.time()-eval_start_time)
tb.log('epoch', epoch)
return top1.avg, top5.avg
示例15: preload_data
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import distributed [as 别名]
def preload_data(self, ep, sz, bs, trndir, valdir, **kwargs): # dummy ep var to prevent error
if 'lr' in kwargs: del kwargs['lr'] # in case we mix schedule and data phases
"""Pre-initializes data-loaders. Use set_data to start using it."""
if sz == 128: val_bs = max(bs, 512)
elif sz == 224: val_bs = max(bs, 256)
else: val_bs = max(bs, 128)
return dataloader.get_loaders(trndir, valdir, bs=bs, val_bs=val_bs, sz=sz, workers=args.workers, distributed=args.distributed, **kwargs)
# ### Learning rate scheduler