本文整理汇总了Python中torch.distributed.reduce方法的典型用法代码示例。如果您正苦于以下问题:Python distributed.reduce方法的具体用法?Python distributed.reduce怎么用?Python distributed.reduce使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.distributed
的用法示例。
在下文中一共展示了distributed.reduce方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: reduce_loss_dict
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import reduce [as 别名]
def reduce_loss_dict(loss_dict):
"""
Reduce the loss dictionary from all processes so that process with rank
0 has the averaged results. Returns a dict with the same fields as
loss_dict, after reduction.
"""
world_size = get_world_size()
if world_size < 2:
return loss_dict
with torch.no_grad():
loss_names = []
all_losses = []
for k in sorted(loss_dict.keys()):
loss_names.append(k)
all_losses.append(loss_dict[k])
all_losses = torch.stack(all_losses, dim=0)
dist.reduce(all_losses, dst=0)
if dist.get_rank() == 0:
# only main process gets accumulated, so only divide by
# world_size in this case
all_losses /= world_size
reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
return reduced_losses
示例2: reduce_loss_dict
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import reduce [as 别名]
def reduce_loss_dict(loss_dict):
"""
Reduce the loss dictionary from all processes so that process with rank
0 has the averaged results. Returns a dict with the same fields as
loss_dict, after reduction. (avg)
"""
world_size = get_world_size()
if world_size < 2:
return loss_dict
with torch.no_grad():
loss_names = []
all_losses = []
for k in sorted(loss_dict.keys()):
loss_names.append(k)
all_losses.append(loss_dict[k])
all_losses = torch.stack(all_losses, dim=0)
dist.reduce(all_losses, dst=0)
if dist.get_rank() == 0:
# only main process gets accumulated, so only divide by
# world_size in this case
all_losses /= world_size
reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
return reduced_losses
示例3: reduce
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import reduce [as 别名]
def reduce(self, input, dst, op=ReduceOp.SUM, batched=False):
"""Reduces the input data across all parties."""
assert dist.is_initialized(), "initialize the communicator first"
if batched:
assert isinstance(input, list), "batched reduce input must be a list"
reqs = []
result = [x.clone().data for x in input]
for tensor in result:
reqs.append(
dist.reduce(
tensor, dst, op=op, group=self.main_group, async_op=True
)
)
for req in reqs:
req.wait()
else:
assert torch.is_tensor(
input.data
), "unbatched input for reduce must be a torch tensor"
result = input.clone()
dist.reduce(result.data, dst, op=op, group=self.main_group)
return result if dst == self.get_rank() else None
示例4: all_reduce
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import reduce [as 别名]
def all_reduce(self, input, op=ReduceOp.SUM, batched=False):
"""Reduces the input data across all parties; all get the final result."""
assert dist.is_initialized(), "initialize the communicator first"
if batched:
assert isinstance(input, list), "batched reduce input must be a list"
reqs = []
result = [x.clone() for x in input]
for tensor in result:
reqs.append(
dist.all_reduce(
tensor.data, op=op, group=self.main_group, async_op=True
)
)
for req in reqs:
req.wait()
else:
assert torch.is_tensor(
input.data
), "unbatched input for reduce must be a torch tensor"
result = input.clone()
dist.all_reduce(result.data, op=op, group=self.main_group)
return result
示例5: broadcast
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import reduce [as 别名]
def broadcast(self, input, src, batched=False):
"""Broadcasts the tensor to all parties."""
assert dist.is_initialized(), "initialize the communicator first"
if batched:
assert isinstance(input, list), "batched reduce input must be a list"
reqs = []
for tensor in input:
reqs.append(
dist.broadcast(tensor, src, group=self.main_group, async_op=True)
)
for req in reqs:
req.wait()
else:
assert torch.is_tensor(
input.data
), "unbatched input for reduce must be a torch tensor"
dist.broadcast(input.data, src, group=self.main_group)
return input
示例6: reduce_loss_dict
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import reduce [as 别名]
def reduce_loss_dict(loss_dict):
"""
Reduce the loss dictionary from all processes so that process with rank
0 has the averaged results. Returns a dict with the same fields as
loss_dict, after reduction.
"""
world_size = get_world_size()
if world_size < 2:
return loss_dict
with torch.no_grad():
loss_names = []
all_losses = []
for k, v in loss_dict.items():
loss_names.append(k)
all_losses.append(v)
all_losses = torch.stack(all_losses, dim=0)
dist.reduce(all_losses, dst=0)
if dist.get_rank() == 0:
# only main process gets accumulated, so only divide by
# world_size in this case
all_losses /= world_size
reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
return reduced_losses
示例7: reduce_scalar_outputs
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import reduce [as 别名]
def reduce_scalar_outputs(scalar_outputs):
world_size = get_world_size()
if world_size < 2:
return scalar_outputs
with torch.no_grad():
names = []
scalars = []
for k in sorted(scalar_outputs.keys()):
names.append(k)
scalars.append(scalar_outputs[k])
scalars = torch.stack(scalars, dim=0)
dist.reduce(scalars, dst=0)
if dist.get_rank() == 0:
# only main process gets accumulated, so only divide by
# world_size in this case
scalars /= world_size
reduced_scalars = {k: v for k, v in zip(names, scalars)}
return reduced_scalars
示例8: reduce_loss_dict
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import reduce [as 别名]
def reduce_loss_dict(loss_dict):
"""
Reduce the loss dictionary from all processes so that process with rank
0 has the averaged results. Returns a dict with the same fields as
loss_dict, after reduction.
"""
world_size = dist_util.get_world_size()
if world_size < 2:
return loss_dict
with torch.no_grad():
loss_names = []
all_losses = []
for k in sorted(loss_dict.keys()):
loss_names.append(k)
all_losses.append(loss_dict[k])
all_losses = torch.stack(all_losses, dim=0)
dist.reduce(all_losses, dst=0)
if dist.get_rank() == 0:
# only main process gets accumulated, so only divide by
# world_size in this case
all_losses /= world_size
reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
return reduced_losses
示例9: reduce_dict
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import reduce [as 别名]
def reduce_dict(input_dict, average=True):
"""
Args:
input_dict (dict): all the values will be reduced
average (bool): whether to do average or sum
Reduce the values in the dictionary from all processes so that process with rank
0 has the averaged results. Returns a dict with the same fields as
input_dict, after reduction.
"""
world_size = get_world_size()
if world_size < 2:
return input_dict
with torch.no_grad():
names = []
values = []
# sort the keys so that they are consistent across processes
for k in sorted(input_dict.keys()):
names.append(k)
values.append(input_dict[k])
values = torch.stack(values, dim=0)
dist.reduce(values, dst=0)
if dist.get_rank() == 0 and average:
# only main process gets accumulated, so only divide by
# world_size in this case
values /= world_size
reduced_dict = {k: v for k, v in zip(names, values)}
return reduced_dict