Python distributed.barrier方法代码示例

本文整理汇总了Python中torch.distributed.barrier方法的典型用法代码示例。如果您正苦于以下问题：Python distributed.barrier方法的具体用法？Python distributed.barrier怎么用？Python distributed.barrier使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.distributed的用法示例。

在下文中一共展示了distributed.barrier方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: init_distributed_mode

# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import barrier [as 别名]
def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)

开发者ID:lopuhin，项目名称:kaggle-kuzushiji-2019，代码行数:25，代码来源:utils.py

示例2: step

# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import barrier [as 别名]
def step(self, loss):
        self.optimizer.zero_grad()
        loss.backward()
        dist.barrier()
        handles = []
        for param in self.network.parameters():
            handles.append(dist.all_reduce(param.grad, async_op=True))
        for handle in handles:
            handle.wait()
        if self.divide_grad:
            for param in self.network.parameters():
                param.grad.mul_(1.0 / self.world_sz)
        if self.grad_norm_clip:
            nn.utils.clip_grad_norm_(
                self.network.parameters(), self.grad_norm_clip
            )
        self.optimizer.step()

开发者ID:heronsystems，项目名称:adeptRL，代码行数:19，代码来源:distrib.py

示例3: step

# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import barrier [as 别名]
def step(self):
        print(f"learner {self.rank} step")

        # make sure exp_handles are done
        for handle in self.exp_handles:
            handle.wait()

        # batch together exp
        time.sleep(random.randint(0, 3))

        # update with other learners
        dist.barrier(self.learner_group)
        for p in self.network_grads:
            dist.all_reduce(p, group=self.learner_group)
        print(f"learner {self.rank} shared gradients")
        return True

开发者ID:heronsystems，项目名称:adeptRL，代码行数:18，代码来源:ray_container.py

示例4: receive_tensor_helper

# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import barrier [as 别名]
def receive_tensor_helper(tensor, src_rank, group, tag, num_iterations,
                          broadcast):
    dist.barrier() 
    start_time = time.time()
    for i in range(num_iterations):
        if broadcast:
            dist.broadcast(tensor=tensor, group=group, src=src_rank)
        else:
            dist.recv(tensor=tensor.cpu(), src=src_rank, tag=tag)
    end_time = time.time()
    dist.barrier()
    size = tensor.size()[0]
    throughput = (size * 4. * num_iterations) / (
        (end_time - start_time) * 10**9)
    print("Time to receive %s MB: %.3f seconds" %
        ((size * 4.) / 10**6,
         (end_time - start_time) / num_iterations))
    print("Throughput: %.3f GB/s" % throughput)

开发者ID:msr-fiddle，项目名称:pipedream，代码行数:20，代码来源:point_to_point.py

示例5: collect_results_cpu

# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import barrier [as 别名]
def collect_results_cpu(result_part, size, tmpdir=None):
    rank, world_size = get_dist_info()
    # create a tmp dir if it is not specified
    if tmpdir is None:
        MAX_LEN = 512
        # 32 is whitespace
        dir_tensor = torch.full((MAX_LEN, ),
                                32,
                                dtype=torch.uint8,
                                device='cuda')
        if rank == 0:
            tmpdir = tempfile.mkdtemp()
            tmpdir = torch.tensor(
                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
            dir_tensor[:len(tmpdir)] = tmpdir
        dist.broadcast(dir_tensor, 0)
        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
    else:
        mmcv.mkdir_or_exist(tmpdir)
    # dump the part result to the dir
    mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
    dist.barrier()
    # collect all parts
    if rank != 0:
        return None
    else:
        # load results of all parts from tmp dir
        part_list = []
        for i in range(world_size):
            part_file = osp.join(tmpdir, f'part_{i}.pkl')
            part_list.append(mmcv.load(part_file))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        # remove tmp dir
        shutil.rmtree(tmpdir)
        return ordered_results

开发者ID:open-mmlab，项目名称:mmdetection，代码行数:42，代码来源:test.py

示例6: collect_results

# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import barrier [as 别名]
def collect_results(result_part, size, tmpdir=None):
    rank, world_size = get_dist_info()
    # create a tmp dir if it is not specified
    if tmpdir is None:
        MAX_LEN = 512
        # 32 is whitespace
        dir_tensor = torch.full((MAX_LEN, ),
                                32,
                                dtype=torch.uint8,
                                device='cuda')
        if rank == 0:
            tmpdir = tempfile.mkdtemp()
            tmpdir = torch.tensor(
                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
            dir_tensor[:len(tmpdir)] = tmpdir
        dist.broadcast(dir_tensor, 0)
        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
    else:
        mmcv.mkdir_or_exist(tmpdir)
    # dump the part result to the dir
    mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
    dist.barrier()
    # collect all parts
    if rank != 0:
        return None
    else:
        # load results of all parts from tmp dir
        part_list = []
        for i in range(world_size):
            part_file = osp.join(tmpdir, f'part_{i}.pkl')
            part_list.append(mmcv.load(part_file))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        # remove tmp dir
        shutil.rmtree(tmpdir)
        return ordered_results

开发者ID:open-mmlab，项目名称:mmdetection，代码行数:42，代码来源:test_robustness.py

示例7: _init_summary_writer

# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import barrier [as 别名]
def _init_summary_writer(self):
        if self.is_master_node():
            self.logging('Init Summary Writer')
            current_time = datetime.now().strftime('%b%d_%H-%M-%S')
            sum_dir = '{}-{}'.format(self.setting.summary_dir_name, current_time)
            self.summary_writer = SummaryWriter(sum_dir)
            self.logging('Writing summary into {}'.format(sum_dir))

        if self.in_distributed_mode():
            # TODO: maybe this can be removed
            dist.barrier()

开发者ID:dolphin-zs，项目名称:Doc2EDAG，代码行数:13，代码来源:base_task.py

示例8: synchronize

# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import barrier [as 别名]
def synchronize():
    """
    Helper function to synchronize (barrier) among all processes when
    using distributed training
    """
    if not dist.is_available():
        return
    if not dist.is_initialized():
        return
    world_size = dist.get_world_size()
    if world_size == 1:
        return
    dist.barrier()

开发者ID:Res2Net，项目名称:Res2Net-maskrcnn，代码行数:15，代码来源:comm.py

示例9: after_train_epoch

# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import barrier [as 别名]
def after_train_epoch(self, runner):
        if not self.every_n_epochs(runner, self.interval):
            return
        runner.model.eval()
        results = [None for _ in range(len(self.dataset))]
        if runner.rank == 0:
            prog_bar = mmcv.ProgressBar(len(self.dataset))
        for idx in range(runner.rank, len(self.dataset), runner.world_size):
            data = self.dataset[idx]
            data_gpu = scatter(
                collate([data], samples_per_gpu=1),
                [torch.cuda.current_device()])[0]

            # compute output
            with torch.no_grad():
                result = runner.model(
                    return_loss=False, rescale=True, **data_gpu)
            results[idx] = result

            batch_size = runner.world_size
            if runner.rank == 0:
                for _ in range(batch_size):
                    prog_bar.update()

        if runner.rank == 0:
            print('\n')
            dist.barrier()
            for i in range(1, runner.world_size):
                tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(i))
                tmp_results = mmcv.load(tmp_file)
                for idx in range(i, len(results), runner.world_size):
                    results[idx] = tmp_results[idx]
                os.remove(tmp_file)
            self.evaluate(runner, results)
        else:
            tmp_file = osp.join(runner.work_dir,
                                'temp_{}.pkl'.format(runner.rank))
            mmcv.dump(results, tmp_file)
            dist.barrier()
        dist.barrier()

开发者ID:dingjiansw101，项目名称:AerialDetection，代码行数:42，代码来源:eval_hooks.py

示例10: collect_results

# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import barrier [as 别名]
def collect_results(result_part, size, tmpdir=None):
    rank, world_size = get_dist_info()
    # create a tmp dir if it is not specified
    if tmpdir is None:
        MAX_LEN = 512
        # 32 is whitespace
        dir_tensor = torch.full((MAX_LEN, ),
                                32,
                                dtype=torch.uint8,
                                device='cuda')
        if rank == 0:
            tmpdir = tempfile.mkdtemp()
            tmpdir = torch.tensor(
                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
            dir_tensor[:len(tmpdir)] = tmpdir
        dist.broadcast(dir_tensor, 0)
        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
    else:
        mmcv.mkdir_or_exist(tmpdir)
    # dump the part result to the dir
    mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
    dist.barrier()
    # collect all parts
    if rank != 0:
        return None
    else:
        # load results of all parts from tmp dir
        part_list = []
        for i in range(world_size):
            part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))
            part_list.append(mmcv.load(part_file))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        # remove tmp dir
        shutil.rmtree(tmpdir)
        return ordered_results

开发者ID:dingjiansw101，项目名称:AerialDetection，代码行数:42，代码来源:test_robustness.py

示例11: summarize_mp

# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import barrier [as 别名]
def summarize_mp(predictions, annotations_file, img_list, log_dir, mask=False):
    # Write partial results to file (all workers)
    rank = dist.get_rank()
    with open(path.join(log_dir, "coco_ap_{:02d}.json".format(rank)), "w") as fid:
        json.dump(predictions, fid)
    with open(path.join(log_dir, "img_list_{:02d}.json".format(rank)), "w") as fid:
        json.dump(img_list, fid)

    dist.barrier()

    # Merge results from all workers and run evaluation (only rank 0)
    if rank == 0:
        predictions = []
        img_list = []

        for i in range(dist.get_world_size()):
            coco_ap_file = path.join(log_dir, "coco_ap_{:02d}.json".format(i))
            with open(coco_ap_file) as fid:
                predictions += json.load(fid)
            remove(coco_ap_file)

            img_list_file = path.join(log_dir, "img_list_{:02d}.json".format(i))
            with open(img_list_file) as fid:
                img_list += json.load(fid)
            remove(img_list_file)

        det_map, msk_map = summarize(predictions, annotations_file, img_list, mask)
    else:
        det_map, msk_map = 0, 0

    dist.barrier()

    return det_map, msk_map

开发者ID:mapillary，项目名称:seamseg，代码行数:35，代码来源:coco_ap.py

示例12: collect_results

# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import barrier [as 别名]
def collect_results(result_part, size, tmpdir=None):
    rank, world_size = get_dist_info()
    # create a tmp dir if it is not specified
    if tmpdir is None:
        MAX_LEN = 512
        # 32 is whitespace
        dir_tensor = torch.full((MAX_LEN,), 32, dtype=torch.uint8, device='cuda')
        if rank == 0:
            tmpdir = tempfile.mkdtemp()
            tmpdir = torch.Tensor(bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
            dir_tensor[:len(tmpdir)] = tmpdir
        dist.broadcast(dir_tensor, 0)
        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
    else:
        mmcv.mkdir_or_exist(tmpdir)

    # dump the part result to the dir
    mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
    dist.barrier()
    # collect all parts
    if rank != 0:
        return None
    else:
        # load results of all parts from tmp dir
        part_list = []
        for i in range(world_size):
            part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))
            part_list.append(mmcv.load(part_file))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        # remove tmp dir
        shutil.rmtree(tmpdir)
        return ordered_results

开发者ID:DeepMotionAIResearch，项目名称:DenseMatchingBenchmark，代码行数:39，代码来源:test.py

示例13: _barrier

# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import barrier [as 别名]
def _barrier(rank):
    logger.debug('Rank: {}, Waiting for other processes before the barrier.'.format(rank))
    dist.barrier()
    logger.debug('Rank: {}, Passing the barrier'.format(rank))

开发者ID:aws，项目名称:sagemaker-pytorch-training-toolkit，代码行数:6，代码来源:distributed_operations.py

示例14: _do_validation

# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import barrier [as 别名]
def _do_validation(self):
        with self.ddp_model.no_sync():
            # 因为模型参数不更新，可以关闭同步
            self.callback_manager.on_valid_begin()
            eval_res = self.test_manager.on_valid_begin()
            eval_res = list(filter(lambda x: x is not None, eval_res))
            if len(eval_res):
                eval_res, is_better = list(zip(*eval_res))
                eval_res = eval_res[0]
                is_better = is_better[0]
            else:
                eval_res, is_better = None, None
            if self.metric_key is None and eval_res is not None:
                eval_res0 = list(eval_res.values())[0]
                self.metric_key = list(eval_res0.keys())[0]
            # logger.info('{}, {}'.format(eval_res, is_better))
            # save better model on master node
            if is_better is not None and self.cp_save_path:
                if is_better:
                    self.save_check_point(self._best_save_name(), only_params=False)
            dist.barrier()

            if not self.is_master and self.metric_key is None:
                # 主进程自动得到了metric_key，而其它进程没有
                prefix = 'best_' + self.model.__class__.__name__
                suffix = self.start_time
                fn_list = os.listdir(self.cp_save_path)
                fn_list = [fn for fn in fn_list if fn.startswith(prefix) and fn.endswith(suffix)]
                if len(fn_list) == 1:
                    best_name = fn_list[0]
                    self.metric_key = best_name[len(prefix):-len(suffix)].strip('_')
            # print('RANK {} metric_key {}'.format(self.rank, self.metric_key))
            self.callback_manager.on_valid_end(
                eval_res, self.metric_key, self.optimizer, is_better)
            self.ddp_model.train()

开发者ID:fastnlp，项目名称:fastNLP，代码行数:37，代码来源:dist_trainer.py

示例15: synchronize_between_processes

# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import barrier [as 别名]
def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

开发者ID:lopuhin，项目名称:kaggle-kuzushiji-2019，代码行数:14，代码来源:utils.py

注：本文中的torch.distributed.barrier方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。