本文整理匯總了Python中torch.distributed.get_world_size方法的典型用法代碼示例。如果您正苦於以下問題:Python distributed.get_world_size方法的具體用法?Python distributed.get_world_size怎麽用?Python distributed.get_world_size使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類torch.distributed
的用法示例。
在下文中一共展示了distributed.get_world_size方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: allreduce_grads
# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import get_world_size [as 別名]
def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
"""Allreduce gradients.
Args:
params (list[torch.Parameters]): List of parameters of a model
coalesce (bool, optional): Whether allreduce parameters as a whole.
Defaults to True.
bucket_size_mb (int, optional): Size of bucket, the unit is MB.
Defaults to -1.
"""
grads = [
param.grad.data for param in params
if param.requires_grad and param.grad is not None
]
world_size = dist.get_world_size()
if coalesce:
_allreduce_coalesced(grads, world_size, bucket_size_mb)
else:
for tensor in grads:
dist.all_reduce(tensor.div_(world_size))
示例2: __init__
# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import get_world_size [as 別名]
def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
if num_replicas is None:
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
num_replicas = dist.get_world_size()
if rank is None:
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
rank = dist.get_rank()
self.dataset = dataset
self.num_replicas = num_replicas
self.rank = rank
self.epoch = 0
self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
self.total_size = self.num_samples * self.num_replicas
self.shuffle = shuffle
示例3: __init__
# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import get_world_size [as 別名]
def __init__(self,
dataset,
samples_per_gpu=1,
num_replicas=None,
rank=None):
if num_replicas is None:
num_replicas = get_world_size()
if rank is None:
rank = get_rank()
self.dataset = dataset
self.samples_per_gpu = samples_per_gpu
self.num_replicas = num_replicas
self.rank = rank
self.epoch = 0
assert hasattr(self.dataset, 'flag')
self.flag = self.dataset.flag
self.group_sizes = np.bincount(self.flag)
self.num_samples = 0
for i, j in enumerate(self.group_sizes):
self.num_samples += int(
math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /
self.num_replicas)) * self.samples_per_gpu
self.total_size = self.num_samples * self.num_replicas
示例4: forward
# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import get_world_size [as 別名]
def forward(self, input):
if get_world_size() == 1 or not self.training:
return super().forward(input)
assert input.shape[0] > 0, "SyncBatchNorm does not support empty inputs"
C = input.shape[1]
mean = torch.mean(input, dim=[0, 2, 3])
meansqr = torch.mean(input * input, dim=[0, 2, 3])
vec = torch.cat([mean, meansqr], dim=0)
vec = AllReduce.apply(vec) * (1.0 / dist.get_world_size())
mean, meansqr = torch.split(vec, C)
var = meansqr - mean * mean
self.running_mean += self.momentum * (mean.detach() - self.running_mean)
self.running_var += self.momentum * (var.detach() - self.running_var)
invstd = torch.rsqrt(var + self.eps)
scale = self.weight * invstd
bias = self.bias - mean * scale
scale = scale.reshape(1, -1, 1, 1)
bias = bias.reshape(1, -1, 1, 1)
return input * scale + bias
示例5: reduce_loss_dict
# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import get_world_size [as 別名]
def reduce_loss_dict(loss_dict):
"""
Reduce the loss dictionary from all processes so that process with rank
0 has the averaged results. Returns a dict with the same fields as
loss_dict, after reduction.
"""
world_size = get_world_size()
if world_size < 2:
return loss_dict
with torch.no_grad():
loss_names = []
all_losses = []
for k in sorted(loss_dict.keys()):
loss_names.append(k)
all_losses.append(loss_dict[k])
all_losses = torch.stack(all_losses, dim=0)
dist.reduce(all_losses, dst=0)
if dist.get_rank() == 0:
# only main process gets accumulated, so only divide by
# world_size in this case
all_losses /= world_size
reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
return reduced_losses
示例6: __init__
# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import get_world_size [as 別名]
def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
import torch.distributed as dist
super().__init__(dataset)
if num_replicas is None: # pragma: no cover
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
num_replicas = dist.get_world_size()
if rank is None: # pragma: no cover
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
rank = dist.get_rank()
self.dataset = dataset
self.num_replicas = num_replicas
self.rank = rank
self.epoch = 0
self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
self.total_size = self.num_samples * self.num_replicas
self.shuffle = shuffle
示例7: _gather
# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import get_world_size [as 別名]
def _gather(rank, rows, columns):
dest = 0
tensor = _get_tensor(rank, rows, columns)
if rank == dest:
tensors_list = _get_zeros_tensors_list(rows, columns)
logger.debug('Rank: {},\nTensor BEFORE gather: {}. tensors_list: {}'.format(
rank, tensor, tensors_list))
dist.gather(tensor=tensor, gather_list=tensors_list)
logger.debug('Rank: {},\nTensor AFTER gather: {}. tensors_list: {}\n'.format(
rank, tensor, tensors_list))
for i in range(dist.get_world_size()):
assert torch.equal(tensors_list[i], _get_tensor(i, rows, columns)), \
'Rank {}: tensors lists are not the same after gather.'
else:
logger.debug('Rank: {},\nTensor BEFORE gather: {}\n'.format(rank, tensor))
dist.gather(tensor=tensor, dst=dest)
logger.debug('Rank: {},\nTensor AFTER gather: {}\n'.format(rank, tensor))
# tensor shouldn't have changed
assert torch.equal(tensor, _get_tensor(rank, rows, columns)), \
'Rank {}: Tensor got changed after gather.'.format(rank)
示例8: gather_tensors
# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import get_world_size [as 別名]
def gather_tensors(input_array):
world_size = dist.get_world_size()
## gather shapes first
myshape = input_array.shape
mycount = input_array.size
shape_tensor = torch.Tensor(np.array(myshape)).cuda()
all_shape = [torch.Tensor(np.array(myshape)).cuda() for i in range(world_size)]
dist.all_gather(all_shape, shape_tensor)
## compute largest shapes
all_shape = [x.cpu().numpy() for x in all_shape]
all_count = [int(x.prod()) for x in all_shape]
all_shape = [list(map(int, x)) for x in all_shape]
max_count = max(all_count)
## padding tensors and gather them
output_tensors = [torch.Tensor(max_count).cuda() for i in range(world_size)]
padded_input_array = np.zeros(max_count)
padded_input_array[:mycount] = input_array.reshape(-1)
input_tensor = torch.Tensor(padded_input_array).cuda()
dist.all_gather(output_tensors, input_tensor)
## unpadding gathered tensors
padded_output = [x.cpu().numpy() for x in output_tensors]
output = [x[:all_count[i]].reshape(all_shape[i]) for i,x in enumerate(padded_output)]
return output
示例9: __init__
# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import get_world_size [as 別名]
def __init__(self, dataset, total_iter, batch_size, world_size=None, rank=None, last_iter=-1):
if world_size is None:
world_size = dist.get_world_size()
if rank is None:
rank = dist.get_rank()
assert rank < world_size
self.dataset = dataset
self.total_iter = total_iter
self.batch_size = batch_size
self.world_size = world_size
self.rank = rank
self.last_iter = last_iter
self.total_size = self.total_iter*self.batch_size
self.indices = self.gen_new_list()
self.call = 0
示例10: __init__
# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import get_world_size [as 別名]
def __init__(self, params, dist_model=False):
model_params = params['module']
self.model = models.modules.__dict__[params['module']['arch']](model_params)
utils.init_weights(self.model, init_type='xavier')
self.model.cuda()
if dist_model:
self.model = utils.DistModule(self.model)
self.world_size = dist.get_world_size()
else:
self.model = models.modules.FixModule(self.model)
self.world_size = 1
if params['optim'] == 'SGD':
self.optim = torch.optim.SGD(
self.model.parameters(), lr=params['lr'],
momentum=0.9, weight_decay=0.0001)
elif params['optim'] == 'Adam':
self.optim = torch.optim.Adam(
self.model.parameters(), lr=params['lr'],
betas=(params['beta1'], 0.999))
else:
raise Exception("No such optimizer: {}".format(params['optim']))
cudnn.benchmark = True
示例11: reduce_mean
# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import get_world_size [as 別名]
def reduce_mean(tensor):
if not (dist.is_available() and dist.is_initialized()):
return tensor
tensor = tensor.clone()
dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM)
return tensor
示例12: _parse_losses
# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import get_world_size [as 別名]
def _parse_losses(self, losses):
"""Parse the raw outputs (losses) of the network.
Args:
losses (dict): Raw output of the network, which usually contain
losses and other necessary infomation.
Returns:
tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor
which may be a weighted sum of all losses, log_vars contains
all the variables to be sent to the logger.
"""
log_vars = OrderedDict()
for loss_name, loss_value in losses.items():
if isinstance(loss_value, torch.Tensor):
log_vars[loss_name] = loss_value.mean()
elif isinstance(loss_value, list):
log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
else:
raise TypeError(
f'{loss_name} is not a tensor or list of tensors')
loss = sum(_value for _key, _value in log_vars.items()
if 'loss' in _key)
log_vars['loss'] = loss
for loss_name, loss_value in log_vars.items():
# reduce loss when distributed training
if dist.is_available() and dist.is_initialized():
loss_value = loss_value.data.clone()
dist.all_reduce(loss_value.div_(dist.get_world_size()))
log_vars[loss_name] = loss_value.item()
return loss, log_vars
示例13: get_current_train_batch_size
# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import get_world_size [as 別名]
def get_current_train_batch_size(self):
if self.in_distributed_mode():
train_batch_size = max(self.setting.train_batch_size // dist.get_world_size(), 1)
else:
train_batch_size = self.setting.train_batch_size
return train_batch_size
示例14: average_gradients
# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import get_world_size [as 別名]
def average_gradients(model):
""" Gradient averaging. """
size = float(dist.get_world_size())
for name, param in model.named_parameters():
try:
dist.all_reduce(param.grad.data, op=dist.reduce_op.SUM)
param.grad.data /= size
except Exception as e:
logger.error('Error when all_reduce parameter {}, size={}, grad_type={}, error message {}'.format(
name, param.size(), param.grad.data.dtype, repr(e)
))
示例15: get_world_size
# 需要導入模塊: from torch import distributed [as 別名]
# 或者: from torch.distributed import get_world_size [as 別名]
def get_world_size():
if not dist.is_available():
return 1
if not dist.is_initialized():
return 1
return dist.get_world_size()