本文整理汇总了Python中horovod.torch.size方法的典型用法代码示例。如果您正苦于以下问题:Python torch.size方法的具体用法?Python torch.size怎么用?Python torch.size使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类horovod.torch
的用法示例。
在下文中一共展示了torch.size方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_horovod_allgather_error
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import size [as 别名]
def test_horovod_allgather_error(self):
"""Test that the allgather returns an error if any dimension besides
the first is different among the tensors being gathered."""
hvd.init()
rank = hvd.rank()
size = hvd.size()
# This test does not apply if there is only one worker.
if size == 1:
return
tensor_size = [17] * 3
tensor_size[1] = 10 * (rank + 1)
tensor = torch.FloatTensor(*tensor_size).fill_(1).mul_(rank)
try:
hvd.allgather(tensor)
assert False, 'hvd.allgather did not throw error'
except torch.FatalError:
pass
示例2: allreduce_async_
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import size [as 别名]
def allreduce_async_(tensor, average=True, name=None):
"""
A function that performs asynchronous in-place averaging or summation of the input
tensor over all the Horovod processes.
The reduction operation is keyed by the name. If name is not provided, an incremented
auto-generated name is used. The tensor type and shape must be the same on all
Horovod processes for a given name. The reduction will not start until all processes
are ready to send and receive the tensor.
Arguments:
tensor: A tensor to average and sum.
average: A flag indicating whether to compute average or summation,
defaults to average.
name: A name of the reduction operation.
Returns:
A handle to the allreduce operation that can be used with `poll()` or
`synchronize()`.
"""
if average:
tensor.div_(size())
return _allreduce_async(tensor, tensor, name)
示例3: test_horovod_allreduce_grad
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import size [as 别名]
def test_horovod_allreduce_grad(self):
"""Test the correctness of the allreduce gradient."""
hvd.init()
size = hvd.size()
dtypes = [torch.IntTensor, torch.LongTensor,
torch.FloatTensor, torch.DoubleTensor]
if torch.cuda.is_available():
dtypes += [torch.cuda.IntTensor, torch.cuda.LongTensor,
torch.cuda.FloatTensor, torch.cuda.DoubleTensor]
dims = [1, 2, 3]
for dtype, dim in itertools.product(dtypes, dims):
torch.manual_seed(1234)
tensor = torch.FloatTensor(*([17] * dim)).random_(-100, 100)
tensor = tensor.type(dtype)
tensor = torch.autograd.Variable(tensor, requires_grad=True)
summed = hvd.allreduce(tensor, average=False)
summed.backward(torch.ones([17] * dim))
grad_out = tensor.grad.data.numpy()
expected = np.ones([17] * dim) * size
err = np.linalg.norm(expected - grad_out)
self.assertLess(err, 0.00000001,
"gradient %s differs from expected %s, "
"error: %s" % (grad_out, expected, str(err)))
示例4: test_horovod_broadcast_error
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import size [as 别名]
def test_horovod_broadcast_error(self):
"""Test that the broadcast returns an error if any dimension besides
the first is different among the tensors being broadcasted."""
hvd.init()
rank = hvd.rank()
size = hvd.size()
# This test does not apply if there is only one worker.
if size == 1:
return
tensor_size = [17] * 3
tensor_size[1] = 10 * (rank + 1)
tensor = torch.FloatTensor(*tensor_size).fill_(1).mul_(rank)
try:
hvd.broadcast(tensor, 0)
assert False, 'hvd.broadcast did not throw error'
except torch.FatalError:
pass
示例5: test_horovod_broadcast_type_error
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import size [as 别名]
def test_horovod_broadcast_type_error(self):
"""Test that the broadcast returns an error if the types being broadcasted
differ among the processes"""
hvd.init()
rank = hvd.rank()
size = hvd.size()
# This test does not apply if there is only one worker.
if size == 1:
return
tensor_size = [17] * 3
if rank % 2 == 0:
tensor = torch.IntTensor(*tensor_size)
else:
tensor = torch.FloatTensor(*tensor_size)
try:
hvd.broadcast(tensor, 0)
assert False, 'hvd.broadcast did not throw error'
except torch.FatalError:
pass
示例6: get_train_loader
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import size [as 别名]
def get_train_loader(batch_size=25):
if hvd.rank() == 0:
print('Train: ', end="")
train_dataset = datasets.ImageFolder(root=datapath+'/train',
transform=data_transform)
train_sampler = torch.utils.data.distributed.DistributedSampler(
train_dataset, num_replicas=hvd.size(), rank=hvd.rank())
train_loader = DataLoader(train_dataset, batch_size=batch_size,
sampler=train_sampler, num_workers=4, pin_memory=True)
if hvd.rank() == 0:
print('Found', len(train_dataset), 'images belonging to',
len(train_dataset.classes), 'classes')
return train_loader, train_sampler
示例7: adjust_learning_rate
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import size [as 别名]
def adjust_learning_rate(
optimizer, base_lr, warmup_epochs, data_loader, epoch, batch_idx
):
logger = logging.getLogger(__name__)
size = hvd.size() if _DISTRIBUTED else 1
if epoch < warmup_epochs:
epoch += float(batch_idx + 1) / len(data_loader)
lr_adj = 1.0 / size * (epoch * (size - 1) / warmup_epochs + 1)
elif epoch < 30:
lr_adj = 1.0
elif epoch < 60:
lr_adj = 1e-1
elif epoch < 80:
lr_adj = 1e-2
else:
lr_adj = 1e-3
for param_group in optimizer.param_groups:
new_lr = base_lr * size * lr_adj
if param_group["lr"]!=new_lr:
param_group["lr"] = new_lr
if _get_rank()==0:
logger.info(f"setting lr to {param_group['lr']}")
示例8: _has_len
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import size [as 别名]
def _has_len(dataloader: DataLoader) -> bool:
""" Checks if a given Dataloader has __len__ method implemented i.e. if
it is a finite dataloader or infinite dataloader. """
try:
# try getting the length
if len(dataloader) == 0:
raise ValueError('`Dataloader` returned 0 length.'
' Please make sure that your Dataloader at least returns 1 batch')
has_len = True
except TypeError:
has_len = False
except NotImplementedError: # e.g. raised by torchtext if a batch_size_fn is used
has_len = False
if has_len and _has_iterable_dataset(dataloader) and LooseVersion(torch.__version__) >= LooseVersion("1.4.0"):
rank_zero_warn(
'Your `IterableDataset` has `__len__` defined.'
' In combination with multi-processing data loading (e.g. batch size > 1),'
' this can lead to unintended side effects since the samples will be duplicated.'
)
return has_len
示例9: _get_distributed_sampler
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import size [as 别名]
def _get_distributed_sampler(self, dataloader):
if self.use_tpu:
kwargs = dict(num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal())
elif self.use_horovod:
kwargs = dict(num_replicas=hvd.size(), rank=hvd.rank())
else:
world_size = {
'ddp': self.num_nodes * self.num_processes,
'ddp_spawn': self.num_nodes * self.num_processes,
'ddp2': self.num_nodes,
'ddp_cpu': self.num_processes * self.num_nodes
}
assert self.distributed_backend is not None
kwargs = dict(num_replicas=world_size[self.distributed_backend], rank=self.global_rank)
sampler = DistributedSampler(dataloader.dataset, **kwargs)
return sampler
示例10: run_test_from_config
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import size [as 别名]
def run_test_from_config(trainer_options):
"""Trains the default model with the given config."""
set_random_master_port()
ckpt_path = trainer_options['default_root_dir']
trainer_options.update(checkpoint_callback=ModelCheckpoint(ckpt_path))
model = EvalModelTemplate()
run_model_test(trainer_options, model, on_gpu=args.on_gpu, version=0, with_hpc=False)
# Horovod should be initialized following training. If not, this will raise an exception.
assert hvd.size() == 2
if args.on_gpu:
trainer = Trainer(gpus=1, distributed_backend='horovod', max_epochs=1)
# Test the root_gpu property
assert trainer.root_gpu == hvd.local_rank()
示例11: color_frame
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import size [as 别名]
def color_frame(tensor, thick=5, color='green', first=False):
_color_ = {'green': (-1, 1, -1), 'red': (1, -1, -1), 'blue': (-1, -1, 1)}
# tensor = to_data(tensor)
for i in range(thick):
for k in range(tensor.size(1)):
# for nn in [0,-1]: #First and last frame
for nn in [0]: # First
tensor[nn, k, i, :] = _color_[color][k]
if first:
tensor[nn, k, :, i] = _color_[color][k]
tensor[nn, k, tensor.size(2) - i - 1, :] = _color_[color][k]
tensor[nn, k, :, tensor.size(2) - i - 1] = _color_[color][k]
return tensor
# ==================================================================#
# ==================================================================#
示例12: create_circle
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import size [as 别名]
def create_circle(image, size=256):
import numpy as np
import torch
xx, yy = np.mgrid[:size, :size]
# circles contains the squared distance to the (size, size) point
# we are just using the circle equation learnt at school
circle = (xx - size / 2)**2 + (yy - size / 2)**2
bin_circle = (circle <= (size / 2)**2) * 1.
bin_circle = torch.from_numpy(bin_circle).float()
bin_circle = bin_circle.repeat(1, image.size(1), 1, image.size(-1) // size)
image = (image * bin_circle) + (1 - bin_circle).clamp_(min=0, max=1)
return image
# ==================================================================#
# ==================================================================#
示例13: horovod
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import size [as 别名]
def horovod():
try:
import horovod.torch as hvd
except ImportError:
class hvd():
def init(self):
pass
def size(self):
return 1
def rank(self):
return 0
hvd = hvd()
return hvd
# ==================================================================#
# ==================================================================#
示例14: load_inception
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import size [as 别名]
def load_inception(path='data/RafD/normal/inception_v3.pth'):
from torchvision.models import inception_v3
import torch
import torch.nn as nn
state_dict = torch.load(path)
net = inception_v3(pretrained=False, transform_input=True)
print("Loading inception_v3 from " + path)
net.aux_logits = False
num_ftrs = net.fc.in_features
net.fc = nn.Linear(num_ftrs, state_dict['fc.weight'].size(0))
net.load_state_dict(state_dict)
for param in net.parameters():
param.requires_grad = False
return net
# ==================================================================#
# ==================================================================#
示例15: split
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import size [as 别名]
def split(data):
# RaGAN uses different data for Dis and Gen
try:
if data.size(0) == 1:
return data, data
else:
def split(x):
if isinstance(x, (list, tuple)):
_len = len(x)
else:
_len = x.size(0)
return x[:_len // 2], x[_len // 2:]
return split(data)
except ValueError:
return data, data
# ==================================================================#
# ==================================================================#