Python distributed.barrier方法代碼示例

本文整理匯總了Python中torch.distributed.barrier方法的典型用法代碼示例。如果您正苦於以下問題：Python distributed.barrier方法的具體用法？Python distributed.barrier怎麽用？Python distributed.barrier使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類torch.distributed的用法示例。

在下文中一共展示了distributed.barrier方法的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: init_distributed_mode

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import barrier [as 別名]
def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)

開發者ID:lopuhin，項目名稱:kaggle-kuzushiji-2019，代碼行數:25，代碼來源:utils.py

示例2: step

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import barrier [as 別名]
def step(self, loss):
        self.optimizer.zero_grad()
        loss.backward()
        dist.barrier()
        handles = []
        for param in self.network.parameters():
            handles.append(dist.all_reduce(param.grad, async_op=True))
        for handle in handles:
            handle.wait()
        if self.divide_grad:
            for param in self.network.parameters():
                param.grad.mul_(1.0 / self.world_sz)
        if self.grad_norm_clip:
            nn.utils.clip_grad_norm_(
                self.network.parameters(), self.grad_norm_clip
            )
        self.optimizer.step()

開發者ID:heronsystems，項目名稱:adeptRL，代碼行數:19，代碼來源:distrib.py

示例3: step

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import barrier [as 別名]
def step(self):
        print(f"learner {self.rank} step")

        # make sure exp_handles are done
        for handle in self.exp_handles:
            handle.wait()

        # batch together exp
        time.sleep(random.randint(0, 3))

        # update with other learners
        dist.barrier(self.learner_group)
        for p in self.network_grads:
            dist.all_reduce(p, group=self.learner_group)
        print(f"learner {self.rank} shared gradients")
        return True

開發者ID:heronsystems，項目名稱:adeptRL，代碼行數:18，代碼來源:ray_container.py

示例4: receive_tensor_helper

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import barrier [as 別名]
def receive_tensor_helper(tensor, src_rank, group, tag, num_iterations,
                          broadcast):
    dist.barrier() 
    start_time = time.time()
    for i in range(num_iterations):
        if broadcast:
            dist.broadcast(tensor=tensor, group=group, src=src_rank)
        else:
            dist.recv(tensor=tensor.cpu(), src=src_rank, tag=tag)
    end_time = time.time()
    dist.barrier()
    size = tensor.size()[0]
    throughput = (size * 4. * num_iterations) / (
        (end_time - start_time) * 10**9)
    print("Time to receive %s MB: %.3f seconds" %
        ((size * 4.) / 10**6,
         (end_time - start_time) / num_iterations))
    print("Throughput: %.3f GB/s" % throughput)

開發者ID:msr-fiddle，項目名稱:pipedream，代碼行數:20，代碼來源:point_to_point.py

示例5: collect_results_cpu

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import barrier [as 別名]
def collect_results_cpu(result_part, size, tmpdir=None):
    rank, world_size = get_dist_info()
    # create a tmp dir if it is not specified
    if tmpdir is None:
        MAX_LEN = 512
        # 32 is whitespace
        dir_tensor = torch.full((MAX_LEN, ),
                                32,
                                dtype=torch.uint8,
                                device='cuda')
        if rank == 0:
            tmpdir = tempfile.mkdtemp()
            tmpdir = torch.tensor(
                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
            dir_tensor[:len(tmpdir)] = tmpdir
        dist.broadcast(dir_tensor, 0)
        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
    else:
        mmcv.mkdir_or_exist(tmpdir)
    # dump the part result to the dir
    mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
    dist.barrier()
    # collect all parts
    if rank != 0:
        return None
    else:
        # load results of all parts from tmp dir
        part_list = []
        for i in range(world_size):
            part_file = osp.join(tmpdir, f'part_{i}.pkl')
            part_list.append(mmcv.load(part_file))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        # remove tmp dir
        shutil.rmtree(tmpdir)
        return ordered_results

開發者ID:open-mmlab，項目名稱:mmdetection，代碼行數:42，代碼來源:test.py

示例6: collect_results

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import barrier [as 別名]
def collect_results(result_part, size, tmpdir=None):
    rank, world_size = get_dist_info()
    # create a tmp dir if it is not specified
    if tmpdir is None:
        MAX_LEN = 512
        # 32 is whitespace
        dir_tensor = torch.full((MAX_LEN, ),
                                32,
                                dtype=torch.uint8,
                                device='cuda')
        if rank == 0:
            tmpdir = tempfile.mkdtemp()
            tmpdir = torch.tensor(
                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
            dir_tensor[:len(tmpdir)] = tmpdir
        dist.broadcast(dir_tensor, 0)
        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
    else:
        mmcv.mkdir_or_exist(tmpdir)
    # dump the part result to the dir
    mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
    dist.barrier()
    # collect all parts
    if rank != 0:
        return None
    else:
        # load results of all parts from tmp dir
        part_list = []
        for i in range(world_size):
            part_file = osp.join(tmpdir, f'part_{i}.pkl')
            part_list.append(mmcv.load(part_file))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        # remove tmp dir
        shutil.rmtree(tmpdir)
        return ordered_results

開發者ID:open-mmlab，項目名稱:mmdetection，代碼行數:42，代碼來源:test_robustness.py

示例7: _init_summary_writer

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import barrier [as 別名]
def _init_summary_writer(self):
        if self.is_master_node():
            self.logging('Init Summary Writer')
            current_time = datetime.now().strftime('%b%d_%H-%M-%S')
            sum_dir = '{}-{}'.format(self.setting.summary_dir_name, current_time)
            self.summary_writer = SummaryWriter(sum_dir)
            self.logging('Writing summary into {}'.format(sum_dir))

        if self.in_distributed_mode():
            # TODO: maybe this can be removed
            dist.barrier()

開發者ID:dolphin-zs，項目名稱:Doc2EDAG，代碼行數:13，代碼來源:base_task.py

示例8: synchronize

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import barrier [as 別名]
def synchronize():
    """
    Helper function to synchronize (barrier) among all processes when
    using distributed training
    """
    if not dist.is_available():
        return
    if not dist.is_initialized():
        return
    world_size = dist.get_world_size()
    if world_size == 1:
        return
    dist.barrier()

開發者ID:Res2Net，項目名稱:Res2Net-maskrcnn，代碼行數:15，代碼來源:comm.py

示例9: after_train_epoch

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import barrier [as 別名]
def after_train_epoch(self, runner):
        if not self.every_n_epochs(runner, self.interval):
            return
        runner.model.eval()
        results = [None for _ in range(len(self.dataset))]
        if runner.rank == 0:
            prog_bar = mmcv.ProgressBar(len(self.dataset))
        for idx in range(runner.rank, len(self.dataset), runner.world_size):
            data = self.dataset[idx]
            data_gpu = scatter(
                collate([data], samples_per_gpu=1),
                [torch.cuda.current_device()])[0]

            # compute output
            with torch.no_grad():
                result = runner.model(
                    return_loss=False, rescale=True, **data_gpu)
            results[idx] = result

            batch_size = runner.world_size
            if runner.rank == 0:
                for _ in range(batch_size):
                    prog_bar.update()

        if runner.rank == 0:
            print('\n')
            dist.barrier()
            for i in range(1, runner.world_size):
                tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(i))
                tmp_results = mmcv.load(tmp_file)
                for idx in range(i, len(results), runner.world_size):
                    results[idx] = tmp_results[idx]
                os.remove(tmp_file)
            self.evaluate(runner, results)
        else:
            tmp_file = osp.join(runner.work_dir,
                                'temp_{}.pkl'.format(runner.rank))
            mmcv.dump(results, tmp_file)
            dist.barrier()
        dist.barrier()

開發者ID:dingjiansw101，項目名稱:AerialDetection，代碼行數:42，代碼來源:eval_hooks.py

示例10: collect_results

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import barrier [as 別名]
def collect_results(result_part, size, tmpdir=None):
    rank, world_size = get_dist_info()
    # create a tmp dir if it is not specified
    if tmpdir is None:
        MAX_LEN = 512
        # 32 is whitespace
        dir_tensor = torch.full((MAX_LEN, ),
                                32,
                                dtype=torch.uint8,
                                device='cuda')
        if rank == 0:
            tmpdir = tempfile.mkdtemp()
            tmpdir = torch.tensor(
                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
            dir_tensor[:len(tmpdir)] = tmpdir
        dist.broadcast(dir_tensor, 0)
        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
    else:
        mmcv.mkdir_or_exist(tmpdir)
    # dump the part result to the dir
    mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
    dist.barrier()
    # collect all parts
    if rank != 0:
        return None
    else:
        # load results of all parts from tmp dir
        part_list = []
        for i in range(world_size):
            part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))
            part_list.append(mmcv.load(part_file))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        # remove tmp dir
        shutil.rmtree(tmpdir)
        return ordered_results

開發者ID:dingjiansw101，項目名稱:AerialDetection，代碼行數:42，代碼來源:test_robustness.py

示例11: summarize_mp

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import barrier [as 別名]
def summarize_mp(predictions, annotations_file, img_list, log_dir, mask=False):
    # Write partial results to file (all workers)
    rank = dist.get_rank()
    with open(path.join(log_dir, "coco_ap_{:02d}.json".format(rank)), "w") as fid:
        json.dump(predictions, fid)
    with open(path.join(log_dir, "img_list_{:02d}.json".format(rank)), "w") as fid:
        json.dump(img_list, fid)

    dist.barrier()

    # Merge results from all workers and run evaluation (only rank 0)
    if rank == 0:
        predictions = []
        img_list = []

        for i in range(dist.get_world_size()):
            coco_ap_file = path.join(log_dir, "coco_ap_{:02d}.json".format(i))
            with open(coco_ap_file) as fid:
                predictions += json.load(fid)
            remove(coco_ap_file)

            img_list_file = path.join(log_dir, "img_list_{:02d}.json".format(i))
            with open(img_list_file) as fid:
                img_list += json.load(fid)
            remove(img_list_file)

        det_map, msk_map = summarize(predictions, annotations_file, img_list, mask)
    else:
        det_map, msk_map = 0, 0

    dist.barrier()

    return det_map, msk_map

開發者ID:mapillary，項目名稱:seamseg，代碼行數:35，代碼來源:coco_ap.py

示例12: collect_results

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import barrier [as 別名]
def collect_results(result_part, size, tmpdir=None):
    rank, world_size = get_dist_info()
    # create a tmp dir if it is not specified
    if tmpdir is None:
        MAX_LEN = 512
        # 32 is whitespace
        dir_tensor = torch.full((MAX_LEN,), 32, dtype=torch.uint8, device='cuda')
        if rank == 0:
            tmpdir = tempfile.mkdtemp()
            tmpdir = torch.Tensor(bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
            dir_tensor[:len(tmpdir)] = tmpdir
        dist.broadcast(dir_tensor, 0)
        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
    else:
        mmcv.mkdir_or_exist(tmpdir)

    # dump the part result to the dir
    mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
    dist.barrier()
    # collect all parts
    if rank != 0:
        return None
    else:
        # load results of all parts from tmp dir
        part_list = []
        for i in range(world_size):
            part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))
            part_list.append(mmcv.load(part_file))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        # remove tmp dir
        shutil.rmtree(tmpdir)
        return ordered_results

開發者ID:DeepMotionAIResearch，項目名稱:DenseMatchingBenchmark，代碼行數:39，代碼來源:test.py

示例13: _barrier

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import barrier [as 別名]
def _barrier(rank):
    logger.debug('Rank: {}, Waiting for other processes before the barrier.'.format(rank))
    dist.barrier()
    logger.debug('Rank: {}, Passing the barrier'.format(rank))

開發者ID:aws，項目名稱:sagemaker-pytorch-training-toolkit，代碼行數:6，代碼來源:distributed_operations.py

示例14: _do_validation

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import barrier [as 別名]
def _do_validation(self):
        with self.ddp_model.no_sync():
            # 因為模型參數不更新，可以關閉同步
            self.callback_manager.on_valid_begin()
            eval_res = self.test_manager.on_valid_begin()
            eval_res = list(filter(lambda x: x is not None, eval_res))
            if len(eval_res):
                eval_res, is_better = list(zip(*eval_res))
                eval_res = eval_res[0]
                is_better = is_better[0]
            else:
                eval_res, is_better = None, None
            if self.metric_key is None and eval_res is not None:
                eval_res0 = list(eval_res.values())[0]
                self.metric_key = list(eval_res0.keys())[0]
            # logger.info('{}, {}'.format(eval_res, is_better))
            # save better model on master node
            if is_better is not None and self.cp_save_path:
                if is_better:
                    self.save_check_point(self._best_save_name(), only_params=False)
            dist.barrier()

            if not self.is_master and self.metric_key is None:
                # 主進程自動得到了metric_key，而其它進程沒有
                prefix = 'best_' + self.model.__class__.__name__
                suffix = self.start_time
                fn_list = os.listdir(self.cp_save_path)
                fn_list = [fn for fn in fn_list if fn.startswith(prefix) and fn.endswith(suffix)]
                if len(fn_list) == 1:
                    best_name = fn_list[0]
                    self.metric_key = best_name[len(prefix):-len(suffix)].strip('_')
            # print('RANK {} metric_key {}'.format(self.rank, self.metric_key))
            self.callback_manager.on_valid_end(
                eval_res, self.metric_key, self.optimizer, is_better)
            self.ddp_model.train()

開發者ID:fastnlp，項目名稱:fastNLP，代碼行數:37，代碼來源:dist_trainer.py

示例15: synchronize_between_processes

# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import barrier [as 別名]
def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

開發者ID:lopuhin，項目名稱:kaggle-kuzushiji-2019，代碼行數:14，代碼來源:utils.py

注：本文中的torch.distributed.barrier方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。