本文整理汇总了Python中horovod.torch.init方法的典型用法代码示例。如果您正苦于以下问题:Python torch.init方法的具体用法?Python torch.init怎么用?Python torch.init使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类horovod.torch
的用法示例。
在下文中一共展示了torch.init方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import init [as 别名]
def main():
args = parser.parse_args()
if args.seed is not None:
random.seed(args.seed)
torch.manual_seed(args.seed)
cudnn.deterministic = True
warnings.warn('You have chosen to seed training. '
'This will turn on the CUDNN deterministic setting, '
'which can slow down your training considerably! '
'You may see unexpected behavior when restarting '
'from checkpoints.')
hvd.init()
local_rank = hvd.local_rank()
torch.cuda.set_device(local_rank)
main_worker(local_rank, 4, args)
示例2: test_horovod_allreduce_type_error
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import init [as 别名]
def test_horovod_allreduce_type_error(self):
"""Test that the allreduce raises an error if different ranks try to
send tensors of different type."""
hvd.init()
rank = hvd.rank()
size = hvd.size()
# This test does not apply if there is only one worker.
if size == 1:
return
# Same rank, different dimension
dims = [17] * 3
if rank % 2 == 0:
tensor = torch.IntTensor(*dims)
else:
tensor = torch.FloatTensor(*dims)
try:
hvd.allreduce(tensor)
assert False, 'hvd.allreduce did not throw error'
except torch.FatalError:
pass
示例3: test_horovod_allreduce_grad
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import init [as 别名]
def test_horovod_allreduce_grad(self):
"""Test the correctness of the allreduce gradient."""
hvd.init()
size = hvd.size()
dtypes = [torch.IntTensor, torch.LongTensor,
torch.FloatTensor, torch.DoubleTensor]
if torch.cuda.is_available():
dtypes += [torch.cuda.IntTensor, torch.cuda.LongTensor,
torch.cuda.FloatTensor, torch.cuda.DoubleTensor]
dims = [1, 2, 3]
for dtype, dim in itertools.product(dtypes, dims):
torch.manual_seed(1234)
tensor = torch.FloatTensor(*([17] * dim)).random_(-100, 100)
tensor = tensor.type(dtype)
tensor = torch.autograd.Variable(tensor, requires_grad=True)
summed = hvd.allreduce(tensor, average=False)
summed.backward(torch.ones([17] * dim))
grad_out = tensor.grad.data.numpy()
expected = np.ones([17] * dim) * size
err = np.linalg.norm(expected - grad_out)
self.assertLess(err, 0.00000001,
"gradient %s differs from expected %s, "
"error: %s" % (grad_out, expected, str(err)))
示例4: test_horovod_allreduce_grad_average
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import init [as 别名]
def test_horovod_allreduce_grad_average(self):
"""Test the correctness of the allreduce averaged gradient."""
hvd.init()
dtypes = [torch.IntTensor, torch.LongTensor,
torch.FloatTensor, torch.DoubleTensor]
if torch.cuda.is_available():
dtypes += [torch.cuda.IntTensor, torch.cuda.LongTensor,
torch.cuda.FloatTensor, torch.cuda.DoubleTensor]
dims = [1, 2, 3]
for dtype, dim in itertools.product(dtypes, dims):
torch.manual_seed(1234)
tensor = torch.FloatTensor(*([17] * dim)).random_(-100, 100)
tensor = tensor.type(dtype)
tensor = torch.autograd.Variable(tensor, requires_grad=True)
summed = hvd.allreduce(tensor, average=True)
summed.backward(torch.ones([17] * dim))
grad_out = tensor.grad.data.numpy()
expected = np.ones([17] * dim)
err = np.linalg.norm(expected - grad_out)
self.assertLess(err, 0.00000001,
"gradient %s differs from expected %s, "
"error: %s" % (grad_out, expected, str(err)))
示例5: test_horovod_allgather_error
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import init [as 别名]
def test_horovod_allgather_error(self):
"""Test that the allgather returns an error if any dimension besides
the first is different among the tensors being gathered."""
hvd.init()
rank = hvd.rank()
size = hvd.size()
# This test does not apply if there is only one worker.
if size == 1:
return
tensor_size = [17] * 3
tensor_size[1] = 10 * (rank + 1)
tensor = torch.FloatTensor(*tensor_size).fill_(1).mul_(rank)
try:
hvd.allgather(tensor)
assert False, 'hvd.allgather did not throw error'
except torch.FatalError:
pass
示例6: test_horovod_allgather_type_error
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import init [as 别名]
def test_horovod_allgather_type_error(self):
"""Test that the allgather returns an error if the types being gathered
differ among the processes"""
hvd.init()
rank = hvd.rank()
size = hvd.size()
# This test does not apply if there is only one worker.
if size == 1:
return
tensor_size = [17] * 3
if rank % 2 == 0:
tensor = torch.IntTensor(*tensor_size)
else:
tensor = torch.FloatTensor(*tensor_size)
try:
hvd.allgather(tensor)
assert False, 'hvd.allgather did not throw error'
except torch.FatalError:
pass
示例7: test_horovod_broadcast_type_error
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import init [as 别名]
def test_horovod_broadcast_type_error(self):
"""Test that the broadcast returns an error if the types being broadcasted
differ among the processes"""
hvd.init()
rank = hvd.rank()
size = hvd.size()
# This test does not apply if there is only one worker.
if size == 1:
return
tensor_size = [17] * 3
if rank % 2 == 0:
tensor = torch.IntTensor(*tensor_size)
else:
tensor = torch.FloatTensor(*tensor_size)
try:
hvd.broadcast(tensor, 0)
assert False, 'hvd.broadcast did not throw error'
except torch.FatalError:
pass
示例8: test_horovod_broadcast_rank_error
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import init [as 别名]
def test_horovod_broadcast_rank_error(self):
"""Test that the broadcast returns an error if different ranks
specify different root rank."""
hvd.init()
rank = hvd.rank()
size = hvd.size()
# This test does not apply if there is only one worker.
if size == 1:
return
tensor = torch.FloatTensor(*([17] * 3)).fill_(1)
try:
hvd.broadcast(tensor, rank)
assert False, 'hvd.broadcast did not throw error'
except torch.FatalError:
pass
示例9: __init__
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import init [as 别名]
def __init__(self, accumulation_step=1):
hvd.init()
self.local_rank = hvd.local_rank()
self.world_size = hvd.size()
self.rank = hvd.rank()
self.n_gpu = torch.cuda.device_count()
self.node_count = self.world_size // self.n_gpu
self.accumulation_step = accumulation_step
self.count_down = accumulation_step - 1
self._multi_node = self.node_count > 1
if not self._multi_node:
# use PyTorch build-in NCCL backend for single node training
torch.distributed.init_process_group(
backend="nccl",
init_method="tcp://127.0.0.1:6000",
world_size=self.n_gpu,
rank=self.local_rank,
)
示例10: test_horovod_allreduce
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import init [as 别名]
def test_horovod_allreduce(self):
"""Test that the allreduce correctly sums 1D, 2D, 3D tensors."""
hvd.init()
size = hvd.size()
dtypes = [torch.IntTensor, torch.LongTensor,
torch.FloatTensor, torch.DoubleTensor]
if torch.cuda.is_available():
dtypes += [torch.cuda.IntTensor, torch.cuda.LongTensor,
torch.cuda.FloatTensor, torch.cuda.DoubleTensor]
dims = [1, 2, 3]
for dtype, dim in itertools.product(dtypes, dims):
torch.manual_seed(1234)
tensor = torch.FloatTensor(*([17] * dim)).random_(-100, 100)
tensor = tensor.type(dtype)
summed = hvd.allreduce(tensor, average=False)
multiplied = tensor * size
max_difference = summed.data.sub(multiplied).max()
# Threshold for floating point equality depends on number of
# ranks, since we're comparing against precise multiplication.
if size <= 3 or dtype in [torch.IntTensor, torch.LongTensor,
torch.cuda.IntTensor, torch.cuda.LongTensor]:
threshold = 0
elif size < 10:
threshold = 1e-4
elif size < 15:
threshold = 5e-4
else:
break
assert max_difference <= threshold, 'hvd.allreduce produces incorrect results'
示例11: test_horovod_allreduce_average
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import init [as 别名]
def test_horovod_allreduce_average(self):
"""Test that the allreduce correctly sums 1D, 2D, 3D tensors."""
hvd.init()
size = hvd.size()
dtypes = [torch.IntTensor, torch.LongTensor,
torch.FloatTensor, torch.DoubleTensor]
if torch.cuda.is_available():
dtypes += [torch.cuda.IntTensor, torch.cuda.LongTensor,
torch.cuda.FloatTensor, torch.cuda.DoubleTensor]
dims = [1, 2, 3]
for dtype, dim in itertools.product(dtypes, dims):
torch.manual_seed(1234)
tensor = torch.FloatTensor(*([17] * dim)).random_(-100, 100)
tensor = tensor.type(dtype)
averaged = hvd.allreduce(tensor, average=True)
max_difference = averaged.data.sub(tensor).max()
# Threshold for floating point equality depends on number of
# ranks, since we're comparing against precise multiplication.
if size <= 3 or dtype in [torch.IntTensor, torch.LongTensor,
torch.cuda.IntTensor, torch.cuda.LongTensor]:
threshold = 0
elif size < 10:
threshold = 1e-4
elif size < 15:
threshold = 5e-4
else:
break
assert max_difference <= threshold, 'hvd.allreduce produces incorrect results'
示例12: test_horovod_allreduce_inplace
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import init [as 别名]
def test_horovod_allreduce_inplace(self):
"""Test that the allreduce correctly sums 1D, 2D, 3D tensors."""
hvd.init()
size = hvd.size()
dtypes = [torch.IntTensor, torch.LongTensor,
torch.FloatTensor, torch.DoubleTensor]
if torch.cuda.is_available():
dtypes += [torch.cuda.IntTensor, torch.cuda.LongTensor,
torch.cuda.FloatTensor, torch.cuda.DoubleTensor]
dims = [1, 2, 3]
for dtype, dim in itertools.product(dtypes, dims):
torch.manual_seed(1234)
tensor = torch.FloatTensor(*([17] * dim)).random_(-100, 100)
tensor = tensor.type(dtype)
multiplied = tensor * size
hvd.allreduce_(tensor, average=False)
max_difference = tensor.sub(multiplied).max()
# Threshold for floating point equality depends on number of
# ranks, since we're comparing against precise multiplication.
if size <= 3 or dtype in [torch.IntTensor, torch.LongTensor,
torch.cuda.IntTensor, torch.cuda.LongTensor]:
threshold = 0
elif size < 10:
threshold = 1e-4
elif size < 15:
threshold = 5e-4
else:
break
assert max_difference <= threshold, 'hvd.allreduce produces incorrect results'
示例13: test_horovod_allreduce_multi_gpu
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import init [as 别名]
def test_horovod_allreduce_multi_gpu(self):
"""Test that the allreduce works on multiple GPUs."""
# Only do this test if there are GPUs available.
if not torch.cuda.is_available():
return
hvd.init()
local_rank = hvd.local_rank()
size = hvd.size()
iter = 0
dtypes = [torch.cuda.IntTensor, torch.cuda.LongTensor,
torch.cuda.FloatTensor, torch.cuda.DoubleTensor]
dims = [1, 2, 3]
for dtype, dim in itertools.product(dtypes, dims):
iter += 1
torch.manual_seed(1234)
tensor = torch.FloatTensor(*([17] * dim)).random_(-100, 100)
device = local_rank * 2 + (iter + local_rank) % 2
tensor = tensor.cuda(device).type(dtype)
multiplied = tensor * size
hvd.allreduce_(tensor, average=False)
max_difference = tensor.sub(multiplied).max()
# Threshold for floating point equality depends on number of
# ranks, since we're comparing against precise multiplication.
if size <= 3 or dtype in [torch.cuda.IntTensor, torch.cuda.LongTensor]:
threshold = 0
elif size < 10:
threshold = 1e-4
elif size < 15:
threshold = 5e-4
else:
break
assert max_difference <= threshold, 'hvd.allreduce produces incorrect results'
示例14: test_horovod_allreduce_cpu_gpu_error
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import init [as 别名]
def test_horovod_allreduce_cpu_gpu_error(self):
"""Test that the allreduce raises an error if different ranks try to
perform reduction on CPU and GPU."""
# Only do this test if there are GPUs available.
if not torch.cuda.is_available():
return
hvd.init()
rank = hvd.rank()
size = hvd.size()
# This test does not apply if there is only one worker.
if size == 1:
return
# Same rank, different dimension
dims = [17] * 3
if rank % 2 == 0:
tensor = torch.cuda.FloatTensor(*dims)
else:
tensor = torch.FloatTensor(*dims)
try:
hvd.allreduce(tensor)
assert False, 'hvd.allreduce did not throw error'
except torch.FatalError:
pass
示例15: test_horovod_allgather
# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import init [as 别名]
def test_horovod_allgather(self):
"""Test that the allgather correctly gathers 1D, 2D, 3D tensors."""
hvd.init()
rank = hvd.rank()
size = hvd.size()
dtypes = [torch.ByteTensor, torch.CharTensor, torch.ShortTensor,
torch.IntTensor, torch.LongTensor, torch.FloatTensor, torch.DoubleTensor]
if torch.cuda.is_available():
dtypes += [torch.cuda.ByteTensor, torch.cuda.CharTensor, torch.cuda.ShortTensor,
torch.cuda.IntTensor, torch.cuda.LongTensor, torch.cuda.FloatTensor,
torch.cuda.DoubleTensor]
dims = [1, 2, 3]
for dtype, dim in itertools.product(dtypes, dims):
tensor = torch.FloatTensor(*([17] * dim)).fill_(1).mul_(rank)
tensor = tensor.type(dtype)
gathered = hvd.allgather(tensor)
assert list(gathered.shape) == [17 * size] + [17] * (dim - 1)
for i in range(size):
rank_tensor = gathered[i * 17:(i + 1) * 17]
assert list(rank_tensor.shape) == [17] * dim, \
'hvd.allgather produces incorrect gathered shape'
assert rank_tensor.data.min() == i, 'hvd.allgather produces incorrect gathered tensor'
assert rank_tensor.data.max() == i, 'hvd.allgather produces incorrect gathered tensor'