当前位置: 首页>>代码示例>>Python>>正文


Python torch.allreduce方法代码示例

本文整理汇总了Python中horovod.torch.allreduce方法的典型用法代码示例。如果您正苦于以下问题:Python torch.allreduce方法的具体用法?Python torch.allreduce怎么用?Python torch.allreduce使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在horovod.torch的用法示例。


在下文中一共展示了torch.allreduce方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_horovod_allreduce_type_error

# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import allreduce [as 别名]
def test_horovod_allreduce_type_error(self):
        """Test that the allreduce raises an error if different ranks try to
        send tensors of different type."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        # Same rank, different dimension
        dims = [17] * 3
        if rank % 2 == 0:
            tensor = torch.IntTensor(*dims)
        else:
            tensor = torch.FloatTensor(*dims)

        try:
            hvd.allreduce(tensor)
            assert False, 'hvd.allreduce did not throw error'
        except torch.FatalError:
            pass 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:25,代码来源:test_torch.py

示例2: test_horovod_allreduce_grad

# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import allreduce [as 别名]
def test_horovod_allreduce_grad(self):
        """Test the correctness of the allreduce gradient."""
        hvd.init()
        size = hvd.size()
        dtypes = [torch.IntTensor, torch.LongTensor,
                  torch.FloatTensor, torch.DoubleTensor]
        if torch.cuda.is_available():
            dtypes += [torch.cuda.IntTensor, torch.cuda.LongTensor,
                       torch.cuda.FloatTensor, torch.cuda.DoubleTensor]
        dims = [1, 2, 3]
        for dtype, dim in itertools.product(dtypes, dims):
            torch.manual_seed(1234)
            tensor = torch.FloatTensor(*([17] * dim)).random_(-100, 100)
            tensor = tensor.type(dtype)
            tensor = torch.autograd.Variable(tensor, requires_grad=True)
            summed = hvd.allreduce(tensor, average=False)

            summed.backward(torch.ones([17] * dim))
            grad_out = tensor.grad.data.numpy()

            expected = np.ones([17] * dim) * size
            err = np.linalg.norm(expected - grad_out)
            self.assertLess(err, 0.00000001,
                            "gradient %s differs from expected %s, "
                            "error: %s" % (grad_out, expected, str(err))) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:27,代码来源:test_torch.py

示例3: reduce_value

# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import allreduce [as 别名]
def reduce_value(value, average, name):
    """
    Reduce the mean value of a tensor from all GPUs

    Parameters
    ----------
    value : torch.Tensor
        Value to be reduced
    average : bool
        Whether values will be averaged or not
    name : str
        Value name

    Returns
    -------
    value : torch.Tensor
        reduced value
    """
    return hvd.allreduce(value, average=average, name=name) 
开发者ID:TRI-ML,项目名称:packnet-sfm,代码行数:21,代码来源:horovod.py

示例4: _distributed_update

# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import allreduce [as 别名]
def _distributed_update(self):
        if not is_distributed():
            return

        if is_horovod_available():
            import horovod.torch as hvd

            hvd.allreduce(self.track_num)
            hvd.allreduce(self.track_enc)
            hvd.allreduce(self.embed)
        else:
            distributed.all_reduce(self.track_num, op=distributed.ReduceOp.SUM)
            distributed.all_reduce(self.track_enc, op=distributed.ReduceOp.SUM)
            distributed.all_reduce(self.embed, op=distributed.ReduceOp.SUM)
            ws = distributed.get_world_size()
            self.track_num /= ws
            self.track_enc /= ws
            self.embed /= ws 
开发者ID:moskomule,项目名称:homura,代码行数:20,代码来源:vq.py

示例5: metric_average

# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import allreduce [as 别名]
def metric_average(val, name):
    tensor = torch.FloatTensor([val])
    avg_tensor = hvd.allreduce(tensor, name=name)
    return avg_tensor.data[0] 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:6,代码来源:pytorch_mnist.py

示例6: update

# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import allreduce [as 别名]
def update(self, val):
        self.sum += hvd.allreduce(val.cpu(), name=self.name)
        self.n += 1 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:5,代码来源:pytorch_imagenet_resnet50.py

示例7: test_horovod_allreduce

# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import allreduce [as 别名]
def test_horovod_allreduce(self):
        """Test that the allreduce correctly sums 1D, 2D, 3D tensors."""
        hvd.init()
        size = hvd.size()
        dtypes = [torch.IntTensor, torch.LongTensor,
                  torch.FloatTensor, torch.DoubleTensor]
        if torch.cuda.is_available():
            dtypes += [torch.cuda.IntTensor, torch.cuda.LongTensor,
                       torch.cuda.FloatTensor, torch.cuda.DoubleTensor]
        dims = [1, 2, 3]
        for dtype, dim in itertools.product(dtypes, dims):
            torch.manual_seed(1234)
            tensor = torch.FloatTensor(*([17] * dim)).random_(-100, 100)
            tensor = tensor.type(dtype)
            summed = hvd.allreduce(tensor, average=False)
            multiplied = tensor * size
            max_difference = summed.data.sub(multiplied).max()

            # Threshold for floating point equality depends on number of
            # ranks, since we're comparing against precise multiplication.
            if size <= 3 or dtype in [torch.IntTensor, torch.LongTensor,
                                      torch.cuda.IntTensor, torch.cuda.LongTensor]:
                threshold = 0
            elif size < 10:
                threshold = 1e-4
            elif size < 15:
                threshold = 5e-4
            else:
                break

            assert max_difference <= threshold, 'hvd.allreduce produces incorrect results' 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:33,代码来源:test_torch.py

示例8: test_horovod_allreduce_average

# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import allreduce [as 别名]
def test_horovod_allreduce_average(self):
        """Test that the allreduce correctly sums 1D, 2D, 3D tensors."""
        hvd.init()
        size = hvd.size()
        dtypes = [torch.IntTensor, torch.LongTensor,
                  torch.FloatTensor, torch.DoubleTensor]
        if torch.cuda.is_available():
            dtypes += [torch.cuda.IntTensor, torch.cuda.LongTensor,
                       torch.cuda.FloatTensor, torch.cuda.DoubleTensor]
        dims = [1, 2, 3]
        for dtype, dim in itertools.product(dtypes, dims):
            torch.manual_seed(1234)
            tensor = torch.FloatTensor(*([17] * dim)).random_(-100, 100)
            tensor = tensor.type(dtype)
            averaged = hvd.allreduce(tensor, average=True)
            max_difference = averaged.data.sub(tensor).max()

            # Threshold for floating point equality depends on number of
            # ranks, since we're comparing against precise multiplication.
            if size <= 3 or dtype in [torch.IntTensor, torch.LongTensor,
                                      torch.cuda.IntTensor, torch.cuda.LongTensor]:
                threshold = 0
            elif size < 10:
                threshold = 1e-4
            elif size < 15:
                threshold = 5e-4
            else:
                break

            assert max_difference <= threshold, 'hvd.allreduce produces incorrect results' 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:32,代码来源:test_torch.py

示例9: test_horovod_allreduce_inplace

# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import allreduce [as 别名]
def test_horovod_allreduce_inplace(self):
        """Test that the allreduce correctly sums 1D, 2D, 3D tensors."""
        hvd.init()
        size = hvd.size()
        dtypes = [torch.IntTensor, torch.LongTensor,
                  torch.FloatTensor, torch.DoubleTensor]
        if torch.cuda.is_available():
            dtypes += [torch.cuda.IntTensor, torch.cuda.LongTensor,
                       torch.cuda.FloatTensor, torch.cuda.DoubleTensor]
        dims = [1, 2, 3]
        for dtype, dim in itertools.product(dtypes, dims):
            torch.manual_seed(1234)
            tensor = torch.FloatTensor(*([17] * dim)).random_(-100, 100)
            tensor = tensor.type(dtype)
            multiplied = tensor * size
            hvd.allreduce_(tensor, average=False)
            max_difference = tensor.sub(multiplied).max()

            # Threshold for floating point equality depends on number of
            # ranks, since we're comparing against precise multiplication.
            if size <= 3 or dtype in [torch.IntTensor, torch.LongTensor,
                                      torch.cuda.IntTensor, torch.cuda.LongTensor]:
                threshold = 0
            elif size < 10:
                threshold = 1e-4
            elif size < 15:
                threshold = 5e-4
            else:
                break

            assert max_difference <= threshold, 'hvd.allreduce produces incorrect results' 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:33,代码来源:test_torch.py

示例10: test_horovod_allreduce_multi_gpu

# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import allreduce [as 别名]
def test_horovod_allreduce_multi_gpu(self):
        """Test that the allreduce works on multiple GPUs."""
        # Only do this test if there are GPUs available.
        if not torch.cuda.is_available():
            return

        hvd.init()
        local_rank = hvd.local_rank()
        size = hvd.size()

        iter = 0
        dtypes = [torch.cuda.IntTensor, torch.cuda.LongTensor,
                  torch.cuda.FloatTensor, torch.cuda.DoubleTensor]
        dims = [1, 2, 3]
        for dtype, dim in itertools.product(dtypes, dims):
            iter += 1
            torch.manual_seed(1234)
            tensor = torch.FloatTensor(*([17] * dim)).random_(-100, 100)
            device = local_rank * 2 + (iter + local_rank) % 2
            tensor = tensor.cuda(device).type(dtype)
            multiplied = tensor * size
            hvd.allreduce_(tensor, average=False)
            max_difference = tensor.sub(multiplied).max()

            # Threshold for floating point equality depends on number of
            # ranks, since we're comparing against precise multiplication.
            if size <= 3 or dtype in [torch.cuda.IntTensor, torch.cuda.LongTensor]:
                threshold = 0
            elif size < 10:
                threshold = 1e-4
            elif size < 15:
                threshold = 5e-4
            else:
                break

            assert max_difference <= threshold, 'hvd.allreduce produces incorrect results' 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:38,代码来源:test_torch.py

示例11: test_horovod_allreduce_cpu_gpu_error

# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import allreduce [as 别名]
def test_horovod_allreduce_cpu_gpu_error(self):
        """Test that the allreduce raises an error if different ranks try to
        perform reduction on CPU and GPU."""
        # Only do this test if there are GPUs available.
        if not torch.cuda.is_available():
            return

        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        # Same rank, different dimension
        dims = [17] * 3
        if rank % 2 == 0:
            tensor = torch.cuda.FloatTensor(*dims)
        else:
            tensor = torch.FloatTensor(*dims)

        try:
            hvd.allreduce(tensor)
            assert False, 'hvd.allreduce did not throw error'
        except torch.FatalError:
            pass 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:29,代码来源:test_torch.py

示例12: metric_average

# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import allreduce [as 别名]
def metric_average(val, name):
    tensor = torch.tensor(val)
    avg_tensor = hvd.allreduce(tensor, name=name)
    return avg_tensor.item() 
开发者ID:csc-training,项目名称:intro-to-dl,代码行数:6,代码来源:pytorch_dvc_cnn_hvd.py

示例13: update

# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import allreduce [as 别名]
def update(self, val):
        self.sum += hvd.allreduce(val.detach().cpu(), name=self.name)
        self.n += 1 
开发者ID:ddkang,项目名称:advex-uar,代码行数:5,代码来源:trainer.py

示例14: _init_loaders

# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import allreduce [as 别名]
def _init_loaders(self):
        allreduce_batch_size = self.batch_size * self.batches_per_allreduce

        if hvd.local_rank() != 0:
            hvd.allreduce(torch.tensor(0), name='barrier')
        self.train_dataset = datasets.CIFAR10(
                root=self.dataset_path, download=(hvd.local_rank() == 0),
                train=True,
                transform=transforms.Compose([
                        transforms.RandomHorizontalFlip(),
                        transforms.RandomCrop(32, 4),
                        transforms.ToTensor(),
                        self.normalize,]))
        if hvd.local_rank() == 0:
            hvd.allreduce(torch.tensor(0), name='barrier')
        self.val_dataset = datasets.CIFAR10(
                root=self.dataset_path,
                train=False,
                transform=transforms.Compose([
                        transforms.ToTensor(),
                        self.normalize,]))
        self.train_loader = torch.utils.data.DataLoader(
                self.train_dataset, batch_size=allreduce_batch_size,
                shuffle=True, num_workers=8, pin_memory=True)
        self.val_loader = torch.utils.data.DataLoader(
                self.val_dataset, batch_size=allreduce_batch_size,
                shuffle=False, num_workers=8, pin_memory=True) 
开发者ID:ddkang,项目名称:advex-uar,代码行数:29,代码来源:trainer.py

示例15: reduce

# 需要导入模块: from horovod import torch [as 别名]
# 或者: from horovod.torch import allreduce [as 别名]
def reduce(self,
               tensor: torch.Tensor):

        if not torch.is_tensor(tensor):
            return tensor
        if is_distributed() and not self._no_reduce:
            if is_horovod_available():
                import horovod.torch as hvd

                # hvd's all_reduce applies average
                return hvd.allreduce(tensor)
            # pytorch's all_reduce does not applies average
            distributed.all_reduce(tensor, op=distributed.ReduceOp.SUM)
            return tensor / distributed.get_world_size()
        return tensor 
开发者ID:moskomule,项目名称:homura,代码行数:17,代码来源:metrics.py


注:本文中的horovod.torch.allreduce方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。