本文整理汇总了Python中torch._utils._unflatten_dense_tensors方法的典型用法代码示例。如果您正苦于以下问题:Python _utils._unflatten_dense_tensors方法的具体用法?Python _utils._unflatten_dense_tensors怎么用?Python _utils._unflatten_dense_tensors使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch._utils
的用法示例。
在下文中一共展示了_utils._unflatten_dense_tensors方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: flat_dist_call
# 需要导入模块: from torch import _utils [as 别名]
# 或者: from torch._utils import _unflatten_dense_tensors [as 别名]
def flat_dist_call(tensors, call, extra_args=None):
flat_dist_call.warn_on_half = True
buckets = {}
for tensor in tensors:
tp = tensor.type()
if tp not in buckets:
buckets[tp] = []
buckets[tp].append(tensor)
if flat_dist_call.warn_on_half:
if torch.cuda.HalfTensor in buckets:
print("WARNING: gloo dist backend for half parameters may be extremely slow." +
" It is recommended to use the NCCL backend in this case.")
flat_dist_call.warn_on_half = False
for tp in buckets:
bucket = buckets[tp]
coalesced = _flatten_dense_tensors(bucket)
if extra_args is not None:
call(coalesced, *extra_args)
else:
call(coalesced)
coalesced /= dist.get_world_size()
for buf, synced in zip(bucket, _unflatten_dense_tensors(coalesced, bucket)):
buf.copy_(synced)
示例2: _allreduce_coalesced
# 需要导入模块: from torch import _utils [as 别名]
# 或者: from torch._utils import _unflatten_dense_tensors [as 别名]
def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
if bucket_size_mb > 0:
bucket_size_bytes = bucket_size_mb * 1024 * 1024
buckets = _take_tensors(tensors, bucket_size_bytes)
else:
buckets = OrderedDict()
for tensor in tensors:
tp = tensor.type()
if tp not in buckets:
buckets[tp] = []
buckets[tp].append(tensor)
buckets = buckets.values()
for bucket in buckets:
flat_tensors = _flatten_dense_tensors(bucket)
dist.all_reduce(flat_tensors)
flat_tensors.div_(world_size)
for tensor, synced in zip(
bucket, _unflatten_dense_tensors(flat_tensors, bucket)
):
tensor.copy_(synced)
示例3: __init__
# 需要导入模块: from torch import _utils [as 别名]
# 或者: from torch._utils import _unflatten_dense_tensors [as 别名]
def __init__(self, module):
super(DistributedDataParallel, self).__init__()
self.warn_on_half = True#$ True if dist._backend == dist.dist_backend.GLOO else False
self.module = module
for p in self.module.state_dict().values():
if torch.is_tensor(p):
dist.broadcast(p, 0)
def allreduce_params():
if(self.needs_reduction):
self.needs_reduction = False
buckets = {}
for param in self.module.parameters():
if param.requires_grad and param.grad is not None:
tp = type(param.data)
if tp not in buckets:
buckets[tp] = []
buckets[tp].append(param)
if self.warn_on_half:
if torch.cuda.HalfTensor in buckets:
print("WARNING: gloo dist backend for half parameters may be extremely slow." +
" It is recommended to use the NCCL backend in this case.")
self.warn_on_half = False
for tp in buckets:
bucket = buckets[tp]
grads = [param.grad.data for param in bucket]
coalesced = _flatten_dense_tensors(grads)
dist.all_reduce(coalesced)
coalesced /= dist.get_world_size()
for buf, synced in zip(grads, _unflatten_dense_tensors(coalesced, grads)):
buf.copy_(synced)
for param in list(self.module.parameters()):
if param.requires_grad:
def allreduce_hook(*unused):
param._execution_engine.queue_callback(allreduce_params)
param.register_hook(allreduce_hook)
示例4: master2model
# 需要导入模块: from torch import _utils [as 别名]
# 或者: from torch._utils import _unflatten_dense_tensors [as 别名]
def master2model(model_params, master_params, flat_master: bool = False) -> None:
"Copy `master_params` to `model_params`."
if flat_master:
for model_group, master_group in zip(model_params, master_params):
if len(model_group) != 0:
for model, master in zip(model_group, _unflatten_dense_tensors(master_group[0].data, model_group)):
model.data.copy_(master)
else:
for model_group, master_group in zip(model_params, master_params):
for model, master in zip(model_group, master_group): model.data.copy_(master.data)
示例5: __init__
# 需要导入模块: from torch import _utils [as 别名]
# 或者: from torch._utils import _unflatten_dense_tensors [as 别名]
def __init__(self, module):
super(DistributedDataParallel, self).__init__()
self.module = module
self.first_call = True
def allreduce_params():
if (self.needs_reduction):
self.needs_reduction = False
buckets = {}
for param in self.module.parameters():
if param.requires_grad and param.grad is not None:
tp = type(param.data)
if tp not in buckets:
buckets[tp] = []
buckets[tp].append(param)
for tp in buckets:
bucket = buckets[tp]
grads = [param.grad.data for param in bucket]
coalesced = _flatten_dense_tensors(grads)
dist.all_reduce(coalesced)
coalesced /= dist.get_world_size()
for buf, synced in zip(grads, _unflatten_dense_tensors(coalesced, grads)):
buf.copy_(synced)
for param in list(self.module.parameters()):
def allreduce_hook(*unused):
Variable._execution_engine.queue_callback(allreduce_params)
if param.requires_grad:
param.register_hook(allreduce_hook)
示例6: synchronize
# 需要导入模块: from torch import _utils [as 别名]
# 或者: from torch._utils import _unflatten_dense_tensors [as 别名]
def synchronize(self):
synced = False
if self.count_down == 0:
missing_p = self._requires_update - set(self._handles.keys())
for p in missing_p:
self._allreduce_tensor(p)
if self._multi_node:
for p, value in self._handles.items():
handle, ctx = value
output = synchronize(handle)
p.grad.set_(self._compression.decompress(output, ctx) / self.accumulation_step)
else:
buckets = OrderedDict()
for tensor in self._handles.values():
tp = tensor.type()
if tp not in buckets:
buckets[tp] = []
buckets[tp].append(tensor)
for tp in buckets:
bucket = buckets[tp]
coalesced = flatten(bucket) / self.world_size / self.accumulation_step
torch.distributed.all_reduce_multigpu([coalesced])
for buf, synced in zip(bucket, unflatten(coalesced, bucket)):
buf.copy_(synced)
self._handles.clear()
synced = True
self.count_down = self.accumulation_step
self.count_down -= 1
return synced
示例7: _dist_broadcast_coalesced
# 需要导入模块: from torch import _utils [as 别名]
# 或者: from torch._utils import _unflatten_dense_tensors [as 别名]
def _dist_broadcast_coalesced(self, tensors, buffer_size):
for tensors in _take_tensors(tensors, buffer_size):
flat_tensors = _flatten_dense_tensors(tensors)
dist.broadcast(flat_tensors, 0)
for tensor, synced in zip(
tensors, _unflatten_dense_tensors(flat_tensors, tensors)
):
tensor.copy_(synced)
示例8: __init__
# 需要导入模块: from torch import _utils [as 别名]
# 或者: from torch._utils import _unflatten_dense_tensors [as 别名]
def __init__(self, module):
super(DistributedDataParallel, self).__init__()
self.warn_on_half = True if dist._backend == dist.dist_backend.GLOO else False
self.module = module
for p in self.module.state_dict().values():
if not torch.is_tensor(p):
continue
if dist._backend == dist.dist_backend.NCCL:
assert p.is_cuda, "NCCL backend only supports model parameters to be on GPU."
dist.broadcast(p, 0)
def allreduce_params():
if(self.needs_reduction):
self.needs_reduction = False
buckets = {}
for param in self.module.parameters():
if param.requires_grad and param.grad is not None:
tp = param.data.type()
if tp not in buckets:
buckets[tp] = []
buckets[tp].append(param)
if self.warn_on_half:
if torch.cuda.HalfTensor in buckets:
print("WARNING: gloo dist backend for half parameters may be extremely slow." +
" It is recommended to use the NCCL backend in this case.")
self.warn_on_half = False
for tp in buckets:
bucket = buckets[tp]
grads = [param.grad.data for param in bucket]
coalesced = _flatten_dense_tensors(grads)
dist.all_reduce(coalesced)
coalesced /= dist.get_world_size()
for buf, synced in zip(grads, _unflatten_dense_tensors(coalesced, grads)):
buf.copy_(synced)
for param in list(self.module.parameters()):
def allreduce_hook(*unused):
param._execution_engine.queue_callback(allreduce_params)
if param.requires_grad:
param.register_hook(allreduce_hook)
示例9: apply_gradient_allreduce
# 需要导入模块: from torch import _utils [as 别名]
# 或者: from torch._utils import _unflatten_dense_tensors [as 别名]
def apply_gradient_allreduce(module):
# sync model parameters
for p in module.state_dict().values():
if not torch.is_tensor(p):
continue
dist.broadcast(p, 0)
def allreduce_params():
if module.needs_reduction:
module.needs_reduction = False
# bucketing params based on value types
buckets = {}
for param in module.parameters():
if param.requires_grad and param.grad is not None:
tp = type(param.data)
if tp not in buckets:
buckets[tp] = []
buckets[tp].append(param)
for tp in buckets:
bucket = buckets[tp]
grads = [param.grad.data for param in bucket]
coalesced = _flatten_dense_tensors(grads)
dist.all_reduce(coalesced, op=dist.reduce_op.SUM)
coalesced /= dist.get_world_size()
for buf, synced in zip(
grads, _unflatten_dense_tensors(coalesced, grads)):
buf.copy_(synced)
for param in list(module.parameters()):
def allreduce_hook(*_):
Variable._execution_engine.queue_callback(allreduce_params)
if param.requires_grad:
param.register_hook(allreduce_hook)
def set_needs_reduction(self, *_):
self.needs_reduction = True
module.register_forward_hook(set_needs_reduction)
return module