本文整理汇总了Python中torch.distributed.destroy_process_group方法的典型用法代码示例。如果您正苦于以下问题:Python distributed.destroy_process_group方法的具体用法?Python distributed.destroy_process_group怎么用?Python distributed.destroy_process_group使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.distributed
的用法示例。
在下文中一共展示了distributed.destroy_process_group方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: spmd_main
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import destroy_process_group [as 别名]
def spmd_main(local_world_size, local_rank):
# These are the parameters used to initialize the process group
env_dict = {
key: os.environ[key]
for key in ("MASTER_ADDR", "MASTER_PORT", "RANK", "WORLD_SIZE")
}
print(f"[{os.getpid()}] Initializing process group with: {env_dict}")
dist.init_process_group(backend="nccl")
print(
f"[{os.getpid()}]: world_size = {dist.get_world_size()}, "
+ f"rank = {dist.get_rank()}, backend={dist.get_backend()}"
)
demo_basic(local_world_size, local_rank)
# Tear down the process group
dist.destroy_process_group()
示例2: _test__native_dist_model_create_from_context_no_dist
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import destroy_process_group [as 别名]
def _test__native_dist_model_create_from_context_no_dist(true_backend, true_device):
assert _NativeDistModel.create_from_context() is None
dist.init_process_group(true_backend, "tcp://0.0.0.0:2222", world_size=1, rank=0)
dist.barrier()
_test__native_dist_model_create_from_context_no_local_rank()
true_conf = {
"device": true_device,
"local_rank": 0,
"rank": 0,
"world_size": 1,
"node_index": 0,
"nnodes": 1,
"nproc_per_node": 1,
}
_test__native_dist_model_create_from_context_env_local_rank(true_conf)
_test__native_dist_model_create_from_context_set_local_rank(true_conf)
dist.destroy_process_group()
示例3: clean_mp
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import destroy_process_group [as 别名]
def clean_mp(self):
if self.can_parallel:
dist.destroy_process_group()
示例4: cleanup
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import destroy_process_group [as 别名]
def cleanup():
dist.destroy_process_group()
示例5: close
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import destroy_process_group [as 别名]
def close(self):
r"""关闭Trainer,销毁进程"""
dist.destroy_process_group()
示例6: cleanup
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import destroy_process_group [as 别名]
def cleanup():
dist.destroy_process_group()
# pylint: disable=no-member
示例7: shutdown
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import destroy_process_group [as 别名]
def shutdown(cls):
if dist.get_rank() == 0 and cls.instance.ttp_initialized:
cls.instance.send_obj(
"terminate", cls.instance.get_ttp_rank(), cls.instance.ttp_group
)
dist.destroy_process_group(cls.instance.main_group)
dist.destroy_process_group(cls.instance.ttp_group)
dist.destroy_process_group()
cls.instance = None
示例8: tearDown
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import destroy_process_group [as 别名]
def tearDown(self):
# we need to de-initialize the distributed world, otherwise other
# tests will they're we're distributed when we're really not.
dist.destroy_process_group()
示例9: ray_start_2_cpus
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import destroy_process_group [as 别名]
def ray_start_2_cpus():
address_info = ray.init(num_cpus=2)
yield address_info
# The code after the yield will run as teardown code.
ray.shutdown()
# Ensure that tests don't ALL fail
if dist.is_initialized():
dist.destroy_process_group()
示例10: ray_start_4_cpus
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import destroy_process_group [as 别名]
def ray_start_4_cpus():
address_info = ray.init(num_cpus=4)
yield address_info
# The code after the yield will run as teardown code.
ray.shutdown()
# Ensure that tests don't ALL fail
if dist.is_initialized():
dist.destroy_process_group()
示例11: shutdown
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import destroy_process_group [as 别名]
def shutdown(self):
"""Attempts to shut down the worker."""
# However, it seems to be harmless to remove permanently
# since the processes are shutdown anyways. This comment can be
# removed in a future release if it is still not documented
# the stable Pytorch docs.
dist.destroy_process_group()
super(DistributedTorchRunner, self).shutdown()
示例12: run_training_teardown
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import destroy_process_group [as 别名]
def run_training_teardown(self):
if hasattr(self, '_teardown_already_run') and self._teardown_already_run:
return
self._teardown_already_run = True
# Train end events
with self.profiler.profile('on_train_end'):
# callbacks
self.on_train_end()
# model hooks
if self.is_function_implemented('on_train_end'):
self.get_model().on_train_end()
if self.logger is not None:
self.logger.finalize("success")
# summarize profile results
if self.global_rank == 0:
self.profiler.describe()
if self.global_rank == 0:
for proc in self.interactive_ddp_procs:
subprocess.Popen.kill(proc)
# clean up dist group
if self.use_ddp or self.use_ddp2:
torch_distrib.destroy_process_group()
示例13: test_sync_reduce_ddp
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import destroy_process_group [as 别名]
def test_sync_reduce_ddp():
"""Make sure sync-reduce works with DDP"""
tutils.reset_seed()
tutils.set_random_master_port()
worldsize = 2
mp.spawn(_ddp_test_fn, args=(worldsize,), nprocs=worldsize)
# dist.destroy_process_group()
示例14: test_tensor_metric_ddp
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import destroy_process_group [as 别名]
def test_tensor_metric_ddp():
tutils.reset_seed()
tutils.set_random_master_port()
world_size = 2
mp.spawn(_ddp_test_tensor_metric, args=(world_size,), nprocs=world_size)
# dist.destroy_process_group()
示例15: test_numpy_metric_ddp
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import destroy_process_group [as 别名]
def test_numpy_metric_ddp():
tutils.reset_seed()
tutils.set_random_master_port()
world_size = 2
mp.spawn(_ddp_test_numpy_metric, args=(world_size,), nprocs=world_size)
# dist.destroy_process_group()