Python distributed.broadcast方法代碼示例

本文整理匯總了Python中torch.distributed.broadcast方法的典型用法代碼示例。如果您正苦於以下問題：Python distributed.broadcast方法的具體用法？Python distributed.broadcast怎麽用？Python distributed.broadcast使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類torch.distributed的用法示例。

在下文中一共展示了distributed.broadcast方法的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: sync

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import broadcast [as 別名]
def sync(self, src, grp=None, async_op=False):

        keys = []
        handles = []

        for k, t in self.state_dict().items():
            if grp is None:
                h = dist.broadcast(t, src, async_op=True)
            else:
                h = dist.broadcast(t, src, grp, async_op=True)

            keys.append(k)
            handles.append(h)

        if not async_op:
            for k, h in zip(keys, handles):
                h.wait()

        return handles

開發者ID:heronsystems，項目名稱:adeptRL，代碼行數:21，代碼來源:base.py

示例2: broadcast_obj

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import broadcast [as 別名]
def broadcast_obj(self, obj, src, group=None):
        """Broadcasts a given object to all parties."""
        if group is None:
            group = self.main_group

        if self.rank == src:
            assert obj is not None, "src party must provide obj for broadcast"
            buf = pickle.dumps(obj)
            size = torch.tensor(len(buf), dtype=torch.int32)
            arr = torch.from_numpy(numpy.frombuffer(buf, dtype=numpy.int8))

            dist.broadcast(size, src, group=group)
            dist.broadcast(arr, src, group=group)
        else:
            size = torch.tensor(1, dtype=torch.int32)
            dist.broadcast(size, src, group=group)

            data = torch.empty(size=(size,), dtype=torch.int8)
            dist.broadcast(data, src, group=group)
            buf = data.numpy().tobytes()
            obj = serial.restricted_loads(buf)
        return obj

開發者ID:facebookresearch，項目名稱:CrypTen，代碼行數:24，代碼來源:distributed_communicator.py

示例3: broadcast

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import broadcast [as 別名]
def broadcast(self, input, src, batched=False):
        """Broadcasts the tensor to all parties."""
        assert dist.is_initialized(), "initialize the communicator first"
        if batched:
            assert isinstance(input, list), "batched reduce input must be a list"
            reqs = []
            for tensor in input:
                reqs.append(
                    dist.broadcast(tensor, src, group=self.main_group, async_op=True)
                )
            for req in reqs:
                req.wait()
        else:
            assert torch.is_tensor(
                input.data
            ), "unbatched input for reduce must be a torch tensor"
            dist.broadcast(input.data, src, group=self.main_group)
        return input

開發者ID:facebookresearch，項目名稱:CrypTen，代碼行數:20，代碼來源:distributed_communicator.py

示例4: broadcast_coalesced

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import broadcast [as 別名]
def broadcast_coalesced(tensors, src=0, buffer_size=10 * MB):
        r"""
        Broadcast a sequence of tensors to the default group from rank 0.
        Small tensors are first coalesced into a buffer to reduce the number of
        broadcasts.

        tensors (sequence): tensors to broadcast. Each tensor needs to be on the
                            same GPU.
        src (int): src rank. Default: 0.
        buffer_size (int): maximum size of the buffer for coalescing. Default: 10MB.
        """
        for tensors in _take_tensors(tensors, buffer_size):
            flat_tensors = _flatten_dense_tensors(tensors)
            dist.broadcast(flat_tensors, src)
            for old_t, new_t in zip(tensors, _unflatten_dense_tensors(flat_tensors, tensors)):
                old_t.data = new_t

開發者ID:SsnL，項目名稱:dataset-distillation，代碼行數:18，代碼來源:distributed.py

示例5: receive_tensor_helper

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import broadcast [as 別名]
def receive_tensor_helper(tensor, src_rank, group, tag, num_iterations,
                          broadcast):
    dist.barrier() 
    start_time = time.time()
    for i in range(num_iterations):
        if broadcast:
            dist.broadcast(tensor=tensor, group=group, src=src_rank)
        else:
            dist.recv(tensor=tensor.cpu(), src=src_rank, tag=tag)
    end_time = time.time()
    dist.barrier()
    size = tensor.size()[0]
    throughput = (size * 4. * num_iterations) / (
        (end_time - start_time) * 10**9)
    print("Time to receive %s MB: %.3f seconds" %
        ((size * 4.) / 10**6,
         (end_time - start_time) / num_iterations))
    print("Throughput: %.3f GB/s" % throughput)

開發者ID:msr-fiddle，項目名稱:pipedream，代碼行數:20，代碼來源:point_to_point.py

示例6: train_step

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import broadcast [as 別名]
def train_step(self, *args, **kwargs):
        if self.need_reinit:
            if dist.is_initialized():
                # parallel mode
                print("wait for barrier")
                dist.barrier()
                print("start to broadcast")
                for p in self._raw_model.parameters():
                    dist.broadcast(p.data, 0)
                print("wrap with DDP")
                self._ddp_model = nn.parallel.DistributedDataParallel(
                    self._raw_model,
                    broadcast_buffers=False,
                    check_reduction=True,
                )
            else:
                # single worker mode
                # skip all reduce
                print("single worker mode")
                self._ddp_model = self._raw_model

            self._optimizer = optim.SGD(self._ddp_model.parameters(), lr=1e-3)
            self.need_reinit = False
        self._train_step(*args, **kwargs)

開發者ID:caicloud，項目名稱:ftlib，代碼行數:26，代碼來源:main.py

示例7: collect_results_cpu

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import broadcast [as 別名]
def collect_results_cpu(result_part, size, tmpdir=None):
    rank, world_size = get_dist_info()
    # create a tmp dir if it is not specified
    if tmpdir is None:
        MAX_LEN = 512
        # 32 is whitespace
        dir_tensor = torch.full((MAX_LEN, ),
                                32,
                                dtype=torch.uint8,
                                device='cuda')
        if rank == 0:
            tmpdir = tempfile.mkdtemp()
            tmpdir = torch.tensor(
                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
            dir_tensor[:len(tmpdir)] = tmpdir
        dist.broadcast(dir_tensor, 0)
        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
    else:
        mmcv.mkdir_or_exist(tmpdir)
    # dump the part result to the dir
    mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
    dist.barrier()
    # collect all parts
    if rank != 0:
        return None
    else:
        # load results of all parts from tmp dir
        part_list = []
        for i in range(world_size):
            part_file = osp.join(tmpdir, f'part_{i}.pkl')
            part_list.append(mmcv.load(part_file))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        # remove tmp dir
        shutil.rmtree(tmpdir)
        return ordered_results

開發者ID:open-mmlab，項目名稱:mmdetection，代碼行數:42，代碼來源:test.py

示例8: collect_results

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import broadcast [as 別名]
def collect_results(result_part, size, tmpdir=None):
    rank, world_size = get_dist_info()
    # create a tmp dir if it is not specified
    if tmpdir is None:
        MAX_LEN = 512
        # 32 is whitespace
        dir_tensor = torch.full((MAX_LEN, ),
                                32,
                                dtype=torch.uint8,
                                device='cuda')
        if rank == 0:
            tmpdir = tempfile.mkdtemp()
            tmpdir = torch.tensor(
                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
            dir_tensor[:len(tmpdir)] = tmpdir
        dist.broadcast(dir_tensor, 0)
        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
    else:
        mmcv.mkdir_or_exist(tmpdir)
    # dump the part result to the dir
    mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
    dist.barrier()
    # collect all parts
    if rank != 0:
        return None
    else:
        # load results of all parts from tmp dir
        part_list = []
        for i in range(world_size):
            part_file = osp.join(tmpdir, f'part_{i}.pkl')
            part_list.append(mmcv.load(part_file))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        # remove tmp dir
        shutil.rmtree(tmpdir)
        return ordered_results

開發者ID:open-mmlab，項目名稱:mmdetection，代碼行數:42，代碼來源:test_robustness.py

示例9: collect_results

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import broadcast [as 別名]
def collect_results(result_part, size, tmpdir=None):
    rank, world_size = get_dist_info()
    # create a tmp dir if it is not specified
    if tmpdir is None:
        MAX_LEN = 512
        # 32 is whitespace
        dir_tensor = torch.full((MAX_LEN, ),
                                32,
                                dtype=torch.uint8,
                                device='cuda')
        if rank == 0:
            tmpdir = tempfile.mkdtemp()
            tmpdir = torch.tensor(
                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
            dir_tensor[:len(tmpdir)] = tmpdir
        dist.broadcast(dir_tensor, 0)
        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
    else:
        mmcv.mkdir_or_exist(tmpdir)
    # dump the part result to the dir
    mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
    dist.barrier()
    # collect all parts
    if rank != 0:
        return None
    else:
        # load results of all parts from tmp dir
        part_list = []
        for i in range(world_size):
            part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))
            part_list.append(mmcv.load(part_file))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        # remove tmp dir
        shutil.rmtree(tmpdir)
        return ordered_results

開發者ID:dingjiansw101，項目名稱:AerialDetection，代碼行數:42，代碼來源:test_robustness.py

示例10: init

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import broadcast [as 別名]
def __init__(self, module):
        super(DistributedDataParallel, self).__init__()
        self.warn_on_half = True#$ True if dist._backend == dist.dist_backend.GLOO else False

        self.module = module

        for p in self.module.state_dict().values():
            if torch.is_tensor(p):
                dist.broadcast(p, 0)

        def allreduce_params():
            if(self.needs_reduction):
                self.needs_reduction = False
                buckets = {}
                for param in self.module.parameters():
                    if param.requires_grad and param.grad is not None:
                        tp = type(param.data)
                        if tp not in buckets:
                            buckets[tp] = []
                        buckets[tp].append(param)
                if self.warn_on_half:
                    if torch.cuda.HalfTensor in buckets:
                        print("WARNING: gloo dist backend for half parameters may be extremely slow." +
                              " It is recommended to use the NCCL backend in this case.")
                        self.warn_on_half = False

                for tp in buckets:
                    bucket = buckets[tp]
                    grads = [param.grad.data for param in bucket]
                    coalesced = _flatten_dense_tensors(grads)
                    dist.all_reduce(coalesced)
                    coalesced /= dist.get_world_size()
                    for buf, synced in zip(grads, _unflatten_dense_tensors(coalesced, grads)):
                        buf.copy_(synced)

        for param in list(self.module.parameters()):
            if param.requires_grad:
               def allreduce_hook(*unused):
                   param._execution_engine.queue_callback(allreduce_params)
               param.register_hook(allreduce_hook)

開發者ID:salesforce，項目名稱:decaNLP，代碼行數:42，代碼來源:distributed_data_parallel.py

示例11: collect_results

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import broadcast [as 別名]
def collect_results(result_part, size, tmpdir=None):
    rank, world_size = get_dist_info()
    # create a tmp dir if it is not specified
    if tmpdir is None:
        MAX_LEN = 512
        # 32 is whitespace
        dir_tensor = torch.full((MAX_LEN,), 32, dtype=torch.uint8, device='cuda')
        if rank == 0:
            tmpdir = tempfile.mkdtemp()
            tmpdir = torch.Tensor(bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
            dir_tensor[:len(tmpdir)] = tmpdir
        dist.broadcast(dir_tensor, 0)
        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
    else:
        mmcv.mkdir_or_exist(tmpdir)

    # dump the part result to the dir
    mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
    dist.barrier()
    # collect all parts
    if rank != 0:
        return None
    else:
        # load results of all parts from tmp dir
        part_list = []
        for i in range(world_size):
            part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))
            part_list.append(mmcv.load(part_file))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        # remove tmp dir
        shutil.rmtree(tmpdir)
        return ordered_results

開發者ID:DeepMotionAIResearch，項目名稱:DenseMatchingBenchmark，代碼行數:39，代碼來源:test.py

示例12: _broadcast

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import broadcast [as 別名]
def _broadcast(rank, rows, columns):
    source = 0
    tensor = _get_tensor(rank, rows, columns)
    logger.debug('Rank: {},\nTensor BEFORE broadcast: {}'.format(rank, tensor))
    dist.broadcast(tensor, src=source)
    logger.debug('Rank: {},\nTensor AFTER broadcast: {}\n'.format(rank, tensor))

    assert torch.equal(tensor, _get_tensor(source, rows, columns)), \
        'Rank {}: Tensor was not equal to rank {} tensor after broadcast.'.format(rank, source)

開發者ID:aws，項目名稱:sagemaker-pytorch-training-toolkit，代碼行數:11，代碼來源:distributed_operations.py

示例13: add_args

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import broadcast [as 別名]
def add_args(parser):
        """Add optimizer-specific arguments to the parser."""
        parser.add_argument(
            "--block-lr", default=1, type=float, help="block learning rate for bmuf"
        )
        parser.add_argument(
            "--block-momentum",
            default=0.875,
            type=float,
            help="block momentum for bmuf",
        )
        parser.add_argument(
            "--global-sync-iter",
            default=50,
            type=int,
            help="Iteration for syncing global model",
        )
        parser.add_argument(
            "--warmup-iterations",
            default=500,
            type=int,
            help="warmup iterations for model to broadcast",
        )
        parser.add_argument(
            "--use-nbm",
            default=False,
            action="store_true",
            help="Specify whether you want to use classical BM / Nesterov BM",
        )
        parser.add_argument(
            "--average-sync",
            default=False,
            action="store_true",
            help="Specify whether you want to average the local momentum after each sync",
        )

開發者ID:pytorch，項目名稱:fairseq，代碼行數:37，代碼來源:bmuf.py

示例14: _warmup_sync

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import broadcast [as 別名]
def _warmup_sync(self, root_rank=0):
        if self.world_size <= 1:
            return
        # Broadcast the local model to all gpus
        for param in self.params:
            dist.broadcast(param.data, src=root_rank)

        # Update local optimizer state
        if self.average_sync:
            self._optimizer.average_params()
        else:
            self._optimizer.load_state_dict(self.initial_state)

        self._reset_local_data()

開發者ID:pytorch，項目名稱:fairseq，代碼行數:16，代碼來源:bmuf.py

示例15: broadcast_params

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import broadcast [as 別名]
def broadcast_params(model):
    """ broadcast model parameters """
    for p in model.state_dict().values():
        dist.broadcast(p, 0)

開發者ID:XiaohangZhan，項目名稱:conditional-motion-propagation，代碼行數:6，代碼來源:distributed_utils.py

注：本文中的torch.distributed.broadcast方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。