本文整理汇总了Python中torch.distributed.all_gather方法的典型用法代码示例。如果您正苦于以下问题:Python distributed.all_gather方法的具体用法?Python distributed.all_gather怎么用?Python distributed.all_gather使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.distributed
的用法示例。
在下文中一共展示了distributed.all_gather方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: merge
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import all_gather [as 别名]
def merge(img_ids, eval_imgs):
all_img_ids = all_gather(img_ids)
all_eval_imgs = all_gather(eval_imgs)
merged_img_ids = []
for p in all_img_ids:
merged_img_ids.extend(p)
merged_eval_imgs = []
for p in all_eval_imgs:
merged_eval_imgs.append(p)
merged_img_ids = np.array(merged_img_ids)
merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
# keep only unique (and in sorted order) images
merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
merged_eval_imgs = merged_eval_imgs[..., idx]
return merged_img_ids, merged_eval_imgs
示例2: gather_tensors
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import all_gather [as 别名]
def gather_tensors(input_array):
world_size = dist.get_world_size()
## gather shapes first
myshape = input_array.shape
mycount = input_array.size
shape_tensor = torch.Tensor(np.array(myshape)).cuda()
all_shape = [torch.Tensor(np.array(myshape)).cuda() for i in range(world_size)]
dist.all_gather(all_shape, shape_tensor)
## compute largest shapes
all_shape = [x.cpu().numpy() for x in all_shape]
all_count = [int(x.prod()) for x in all_shape]
all_shape = [list(map(int, x)) for x in all_shape]
max_count = max(all_count)
## padding tensors and gather them
output_tensors = [torch.Tensor(max_count).cuda() for i in range(world_size)]
padded_input_array = np.zeros(max_count)
padded_input_array[:mycount] = input_array.reshape(-1)
input_tensor = torch.Tensor(padded_input_array).cuda()
dist.all_gather(output_tensors, input_tensor)
## unpadding gathered tensors
padded_output = [x.cpu().numpy() for x in output_tensors]
output = [x[:all_count[i]].reshape(all_shape[i]) for i,x in enumerate(padded_output)]
return output
示例3: _gather_values
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import all_gather [as 别名]
def _gather_values(*tensors, group, world_size):
# Start gather operations asynchronously
gathered, gather_ops = [], []
for t in tensors:
t_all = t.new_empty(world_size, *t.shape)
t_op = distributed.all_gather(list(t_all.unbind(0)), t, group=group, async_op=True)
gathered.append(t_all)
gather_ops.append(t_op)
# Wait
for op in gather_ops:
op.wait()
# Return results
return tuple(gathered)
示例4: _pad_to_largest_tensor
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import all_gather [as 别名]
def _pad_to_largest_tensor(tensor, group):
"""
Returns:
list[int]: size of the tensor, on each rank
Tensor: padded tensor that has the max size
"""
world_size = dist.get_world_size(group=group)
assert (
world_size >= 1
), "comm.gather/all_gather must be called from ranks within the given group!"
local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device)
size_list = [
torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size)
]
dist.all_gather(size_list, local_size, group=group)
size_list = [int(size.item()) for size in size_list]
max_size = max(size_list)
# we pad the tensor because torch all_gather does not support
# gathering tensors of different shapes
if local_size != max_size:
padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device)
tensor = torch.cat((tensor, padding), dim=0)
return size_list, tensor
示例5: _pad_to_largest_tensor
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import all_gather [as 别名]
def _pad_to_largest_tensor(tensor, group):
"""
Returns:
list[int]: size of the tensor, on each rank
Tensor: padded tensor that has the max size
"""
world_size = dist.get_world_size(group=group)
assert (
world_size >= 1
), "comm.gather/all_gather must be called from ranks within the given group!"
local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device)
size_list = [
torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size)
]
dist.all_gather(size_list, local_size, group=group)
size_list = [int(size.item()) for size in size_list]
max_size = max(size_list)
# we pad the tensor because torch all_gather does not support
# gathering tensors of different shapes
if local_size != max_size:
padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device)
tensor = torch.cat((tensor, padding), dim=0)
return size_list, tensor
示例6: all_gather
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import all_gather [as 别名]
def all_gather(tensors):
"""
All gathers the provided tensors from all processes across machines.
Args:
tensors (list): tensors to perform all gather across all processes in
all machines.
"""
gather_list = []
output_tensor = []
world_size = dist.get_world_size()
for tensor in tensors:
tensor_placeholder = [
torch.ones_like(tensor) for _ in range(world_size)
]
dist.all_gather(tensor_placeholder, tensor, async_op=False)
gather_list.append(tensor_placeholder)
for gathered_tensor in gather_list:
output_tensor.append(torch.cat(gathered_tensor, dim=0))
return output_tensor
示例7: all_gather
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import all_gather [as 别名]
def all_gather(data):
"""
Run all_gather on arbitrary picklable data (not necessarily tensors)
Args:
data: any picklable object
Returns:
list[data]: list of data gathered from each rank
"""
world_size = get_world_size()
if world_size == 1:
return [data]
# serialized to a Tensor
buffer = pickle.dumps(data)
storage = torch.ByteStorage.from_buffer(buffer)
tensor = torch.ByteTensor(storage).to("cuda")
# obtain Tensor size of each rank
local_size = torch.LongTensor([tensor.numel()]).to("cuda")
size_list = [torch.LongTensor([0]).to("cuda") for _ in range(world_size)]
dist.all_gather(size_list, local_size)
size_list = [int(size.item()) for size in size_list]
max_size = max(size_list)
# receiving Tensor from all ranks
# we pad the tensor because torch all_gather does not support
# gathering tensors of different shapes
tensor_list = []
for _ in size_list:
tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda"))
if local_size != max_size:
padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda")
tensor = torch.cat((tensor, padding), dim=0)
dist.all_gather(tensor_list, tensor)
data_list = []
for size, tensor in zip(size_list, tensor_list):
buffer = tensor.cpu().numpy().tobytes()[:size]
data_list.append(pickle.loads(buffer))
return data_list
示例8: collect_results_gpu
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import all_gather [as 别名]
def collect_results_gpu(result_part, size):
rank, world_size = get_dist_info()
# dump result part to tensor with pickle
part_tensor = torch.tensor(
bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
# gather all result part tensor shape
shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
shape_list = [shape_tensor.clone() for _ in range(world_size)]
dist.all_gather(shape_list, shape_tensor)
# padding result part tensor to max length
shape_max = torch.tensor(shape_list).max()
part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
part_send[:shape_tensor[0]] = part_tensor
part_recv_list = [
part_tensor.new_zeros(shape_max) for _ in range(world_size)
]
# gather all result part
dist.all_gather(part_recv_list, part_send)
if rank == 0:
part_list = []
for recv, shape in zip(part_recv_list, shape_list):
part_list.append(
pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()))
# sort the results
ordered_results = []
for res in zip(*part_list):
ordered_results.extend(list(res))
# the dataloader may pad some samples
ordered_results = ordered_results[:size]
return ordered_results
示例9: all_gather
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import all_gather [as 别名]
def all_gather(data):
"""
Run all_gather on arbitrary picklable data (not necessarily tensors)
Args:
data: any picklable object
Returns:
list[data]: list of data gathered from each rank
"""
world_size = get_world_size()
if world_size == 1:
return [data]
# serialized to a Tensor
buffer = pickle.dumps(data)
storage = torch.ByteStorage.from_buffer(buffer)
tensor = torch.ByteTensor(storage).to("cuda")
# obtain Tensor size of each rank
local_size = torch.IntTensor([tensor.numel()]).to("cuda")
size_list = [torch.IntTensor([0]).to("cuda") for _ in range(world_size)]
dist.all_gather(size_list, local_size)
size_list = [int(size.item()) for size in size_list]
max_size = max(size_list)
# receiving Tensor from all ranks
# we pad the tensor because torch all_gather does not support
# gathering tensors of different shapes
tensor_list = []
for _ in size_list:
tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda"))
if local_size != max_size:
padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda")
tensor = torch.cat((tensor, padding), dim=0)
dist.all_gather(tensor_list, tensor)
data_list = []
for size, tensor in zip(size_list, tensor_list):
buffer = tensor.cpu().numpy().tobytes()[:size]
data_list.append(pickle.loads(buffer))
return data_list
示例10: shared_random_seed
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import all_gather [as 别名]
def shared_random_seed():
"""
Returns:
int: a random number that is the same across all workers.
If workers need a shared RNG, they can use this shared seed to
create one.
All workers must call this function, otherwise it will deadlock.
"""
ints = np.random.randint(2 ** 31)
all_ints = all_gather(ints)
return all_ints[0]
示例11: forward
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import all_gather [as 别名]
def forward(ctx, input):
input_list = [torch.zeros_like(input) for k in range(dist.get_world_size())]
# Use allgather instead of allreduce since I don't trust in-place operations ..
dist.all_gather(input_list, input, async_op=False)
inputs = torch.stack(input_list, dim=0)
return torch.sum(inputs, dim=0)
示例12: accumulate_metric
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import all_gather [as 别名]
def accumulate_metric(metric):
all_values = all_gather(metric.get_value())
if not is_main_process():
return None, None
for value in all_values[1:]:
metric.combine_value(value)
return metric.get()
示例13: _all_gather
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import all_gather [as 别名]
def _all_gather(rank, rows, columns):
tensor = _get_tensor(rank, rows, columns)
tensors_list = _get_zeros_tensors_list(rows, columns)
logger.debug('Rank: {},\nTensor BEFORE all_gather: {}'.format(rank, tensor))
dist.all_gather(tensors_list, tensor)
logger.debug('Rank: {},\nTensor AFTER all_gather: {}. tensors_list: {}\n'.format(
rank, tensor, tensors_list))
# tensor shouldn't have changed
assert torch.equal(tensor, _get_tensor(rank, rows, columns)), \
'Rank {}: Tensor got changed after all_gather.'.format(rank)
for i in range(dist.get_world_size()):
assert torch.equal(tensors_list[i], _get_tensor(i, rows, columns)), \
'Rank {}: tensors lists are not the same after all_gather.'
示例14: all_gather
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import all_gather [as 别名]
def all_gather(data):
"""
Run all_gather on arbitrary picklable data (not necessarily tensors)
Args:
data: any picklable object
Returns:
list[data]: list of data gathered from each rank
"""
world_size = get_world_size()
if world_size == 1:
return [data]
# serialized to a Tensor
buffer = pickle.dumps(data)
storage = torch.ByteStorage.from_buffer(buffer)
tensor = torch.ByteTensor(storage).to("cuda")
# obtain Tensor size of each rank
local_size = torch.tensor([tensor.numel()], device="cuda")
size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
dist.all_gather(size_list, local_size)
size_list = [int(size.item()) for size in size_list]
max_size = max(size_list)
# receiving Tensor from all ranks
# we pad the tensor because torch all_gather does not support
# gathering tensors of different shapes
tensor_list = []
for _ in size_list:
tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
if local_size != max_size:
padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
tensor = torch.cat((tensor, padding), dim=0)
dist.all_gather(tensor_list, tensor)
data_list = []
for size, tensor in zip(size_list, tensor_list):
buffer = tensor.cpu().numpy().tobytes()[:size]
data_list.append(pickle.loads(buffer))
return data_list
示例15: gather_predictions
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import all_gather [as 别名]
def gather_predictions(preds):
world_size = get_world_size()
if world_size > 1:
all_preds = preds.new(world_size * preds.size(0), preds.size(1))
all_preds_list = all_preds.chunk(world_size, dim=0)
dist.all_gather(all_preds_list, preds)
preds = all_preds
return preds