本文整理汇总了Python中torch.distributed.new_group方法的典型用法代码示例。如果您正苦于以下问题:Python distributed.new_group方法的具体用法?Python distributed.new_group怎么用?Python distributed.new_group使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.distributed
的用法示例。
在下文中一共展示了distributed.new_group方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import new_group [as 别名]
def __init__(self, rank, learner_ranks, worker_ranks, ip, port):
world_size = len(learner_ranks) + len(worker_ranks)
dist.init_process_group(
"nccl",
init_method="tcp://{}:{}".format(ip, port),
rank=rank,
world_size=world_size,
)
groups = {}
for learner_rank in learner_ranks:
for worker_rank in worker_ranks:
g = dist.new_group([learner_rank, worker_rank])
if worker_rank == rank:
groups[learner_rank] = g
dist.new_group(learner_ranks)
self.groups = groups
self.device = torch.device(f"cuda:{ray.get_gpu_ids()[0]}")
self.rank = rank
self.network = torch.zeros(3).to(self.device)
self.exp = None
self.network_handle = None
示例2: init_distributed_training
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import new_group [as 别名]
def init_distributed_training(cfg):
"""
Initialize variables needed for distributed training.
"""
if cfg.NUM_GPUS == 1:
return
num_gpus_per_machine = cfg.NUM_GPUS
num_machines = dist.get_world_size() // num_gpus_per_machine
for i in range(num_machines):
ranks_on_i = list(
range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine)
)
pg = dist.new_group(ranks_on_i)
if i == cfg.SHARD_ID:
global _LOCAL_PROCESS_GROUP
_LOCAL_PROCESS_GROUP = pg
示例3: _start_reduction_threads
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import new_group [as 别名]
def _start_reduction_threads(self):
num_buckets = len(self.bucket_sizes)
self._reduction_queues = [queue.Queue() for _ in range(num_buckets)]
self._reduction_threads = []
self._reduction_streams = [[] for _ in range(num_buckets)]
self._nccl_streams = []
self._default_streams = []
for dev_id in self.device_ids:
with torch.cuda.device(dev_id):
# TODO: don't assume we're on a default stream
self._default_streams.append(torch.cuda.current_stream())
self._nccl_streams.append(torch.cuda.Stream())
for reduction_queue, reduction_streams in zip(self._reduction_queues, self._reduction_streams):
for dev_id in self.device_ids:
with torch.cuda.device(dev_id):
reduction_streams.append(torch.cuda.Stream())
# We only use the first device for distributed reductions
dist._register_stream(reduction_streams[0])
group_id = dist.new_group()
self._reduction_threads.append(threading.Thread(
target=self._reduction_thread_fn,
args=(reduction_queue, group_id, self.device_ids, reduction_streams, self._nccl_streams)))
self._reduction_threads[-1].daemon = True
self._reduction_threads[-1].start()
示例4: run
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import new_group [as 别名]
def run(world_size, rank, steps):
for step in range(1, steps + 1):
# get random int
value = randint(0, 10)
# group all ranks
ranks = list(range(world_size))
group = dist.new_group(ranks=ranks)
# compute reduced sum
tensor = torch.tensor(value, dtype=torch.int)
dist.all_reduce(tensor, op=dist.ReduceOp.SUM, group=group)
print('rank: {}, step: {}, value: {}, reduced sum: {}.'.format(rank, step, value, tensor.item()))
sleep(1)
示例5: __init__
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import new_group [as 别名]
def __init__(self, *args, **kwargs):
super(SynchronousDistributedTraining, self).__init__(*args, **kwargs)
self.world_size = distributed.get_world_size()
self.rank = distributed.get_rank()
self.group = distributed.new_group(ranks=list(range(self.world_size)))
示例6: __init__
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import new_group [as 别名]
def __init__(self, init_ttp=False):
# no need to do anything if we already initialized the communicator:
if not dist.is_initialized():
# get configuration variables from environmens:
for key in ["distributed_backend", "rendezvous", "world_size", "rank"]:
if key.upper() not in os.environ:
raise ValueError("Environment variable %s must be set." % key)
setattr(self, key.lower(), os.environ[key.upper()])
# make sure world size and rank are integers; comms stats are reset:
self.world_size = int(self.world_size)
self.rank = int(self.rank)
self.reset_communication_stats()
self._name = f"rank{self.rank}"
# logging:
logging.info("==================")
logging.info("DistributedCommunicator with rank %d" % self.rank)
logging.info("==================")
# initialize process group:
total_ws = self.world_size + 1 if init_ttp else self.world_size
dist.init_process_group(
backend=self.distributed_backend,
init_method=self.rendezvous,
world_size=total_ws,
rank=self.rank,
)
self.ttp_group = dist.new_group(list(range(total_ws)))
self.main_group = dist.new_group(list(range(self.world_size)))
self.ttp_initialized = init_ttp
logging.info("World size = %d" % self.world_size)
示例7: active_group
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import new_group [as 别名]
def active_group(active):
"""Initialize a distributed group where each process can independently decide whether to participate or not
Parameters
----------
active : bool
Whether this process will be active in the group or not
Returns
-------
A distributed group containing all processes that passed `active=True`, or `None` if all passed `False`
"""
world_size = distributed.get_world_size()
rank = distributed.get_rank()
# Check if cache is initialized, add WORLD and None to it
if not hasattr(active_group, "__cache__"):
active_group.__cache__ = {
frozenset(range(world_size)): distributed.group.WORLD,
frozenset(): None
}
# Gather active status from all workers
active = torch.tensor(rank if active else -1, dtype=torch.long, device=torch.cuda.current_device())
active_workers = torch.empty(world_size, dtype=torch.long, device=torch.cuda.current_device())
distributed.all_gather(list(active_workers.unbind(0)), active)
# Create and cache group if it doesn't exist yet
active_workers = frozenset(int(i) for i in active_workers.tolist() if i != -1)
if active_workers not in active_group.__cache__:
group = distributed.new_group(list(active_workers))
active_group.__cache__[active_workers] = group
return active_group.__cache__[active_workers]
示例8: _get_global_gloo_group
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import new_group [as 别名]
def _get_global_gloo_group():
"""
Return a process group based on gloo backend, containing all the ranks
The result is cached.
"""
if dist.get_backend() == "nccl":
return dist.new_group(backend="gloo")
else:
return dist.group.WORLD
示例9: _distributed_worker
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import new_group [as 别名]
def _distributed_worker(
local_rank, main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args
):
assert torch.cuda.is_available(), "cuda is not available. Please check your installation."
global_rank = machine_rank * num_gpus_per_machine + local_rank
try:
dist.init_process_group(
backend="NCCL", init_method=dist_url, world_size=world_size, rank=global_rank
)
except Exception as e:
logger = logging.getLogger(__name__)
logger.error("Process group URL: {}".format(dist_url))
raise e
# synchronize is needed here to prevent a possible timeout after calling init_process_group
# See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172
comm.synchronize()
assert num_gpus_per_machine <= torch.cuda.device_count()
torch.cuda.set_device(local_rank)
# Setup the local process group (which contains ranks within the same machine)
assert comm._LOCAL_PROCESS_GROUP is None
num_machines = world_size // num_gpus_per_machine
for i in range(num_machines):
ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine))
pg = dist.new_group(ranks_on_i)
if i == machine_rank:
comm._LOCAL_PROCESS_GROUP = pg
main_func(*args)
示例10: tmp_process_group
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import new_group [as 别名]
def tmp_process_group(backend):
cpu_pg = dist.new_group(backend=backend)
try:
yield cpu_pg
finally:
dist.destroy_process_group(cpu_pg)
示例11: _get_global_gloo_group
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import new_group [as 别名]
def _get_global_gloo_group():
"""
Return a process group based on gloo backend, containing all the ranks
The result is cached.
Returns:
(group): pytorch dist group.
"""
if dist.get_backend() == "nccl":
return dist.new_group(backend="gloo")
else:
return dist.group.WORLD