本文整理汇总了Python中torch.cuda.comm.gather方法的典型用法代码示例。如果您正苦于以下问题:Python comm.gather方法的具体用法?Python comm.gather怎么用?Python comm.gather使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.cuda.comm
的用法示例。
在下文中一共展示了comm.gather方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: backward
# 需要导入模块: from torch.cuda import comm [as 别名]
# 或者: from torch.cuda.comm import gather [as 别名]
def backward(ctx, dz):
x, _ex, _exs, gamma, beta = ctx.saved_tensors
dz = dz.contiguous()
# BN backward
if dz.is_cuda:
dx, _dex, _dexs, dgamma, dbeta = \
lib.gpu.batchnorm_backward(dz, x, _ex, _exs, gamma, beta, ctx.eps)
else:
raise NotImplemented
if ctx.training:
if ctx.sync:
if ctx.is_master:
_dex, _dexs = [_dex.unsqueeze(0)], [_dexs.unsqueeze(0)]
for _ in range(ctx.master_queue.maxsize):
_dex_w, _dexs_w = ctx.master_queue.get()
ctx.master_queue.task_done()
_dex.append(_dex_w.unsqueeze(0))
_dexs.append(_dexs_w.unsqueeze(0))
_dex = comm.gather(_dex).mean(0)
_dexs = comm.gather(_dexs).mean(0)
tensors = comm.broadcast_coalesced((_dex, _dexs), [_dex.get_device()] + ctx.worker_ids)
for ts, queue in zip(tensors[1:], ctx.worker_queues):
queue.put(ts)
else:
ctx.master_queue.put((_dex, _dexs))
_dex, _dexs = ctx.worker_queue.get()
ctx.worker_queue.task_done()
if x.is_cuda:
dx_ = lib.gpu.expectation_backward(x, _dex, _dexs)
else:
raise NotImplemented
dx = dx + dx_
return dx, dgamma, dbeta, None, None, None, None, None, None, None, None, None
示例2: backward
# 需要导入模块: from torch.cuda import comm [as 别名]
# 或者: from torch.cuda.comm import gather [as 别名]
def backward(ctx, dz):
x, _ex, _exs, gamma, beta = ctx.saved_tensors
dz = dz.contiguous()
# BN backward
if dz.is_cuda:
dx, _dex, _dexs, dgamma, dbeta = lib.gpu.batchnorm_backward(dz, x, _ex, _exs, gamma, beta, ctx.eps)
else:
raise NotImplemented
if ctx.training:
if ctx.sync:
if ctx.is_master:
_dex, _dexs = [_dex.unsqueeze(0)], [_dexs.unsqueeze(0)]
for _ in range(ctx.master_queue.maxsize):
_dex_w, _dexs_w = ctx.master_queue.get()
ctx.master_queue.task_done()
_dex.append(_dex_w.unsqueeze(0))
_dexs.append(_dexs_w.unsqueeze(0))
_dex = comm.gather(_dex).mean(0)
_dexs = comm.gather(_dexs).mean(0)
tensors = comm.broadcast_coalesced((_dex, _dexs), [_dex.get_device()] + ctx.worker_ids)
for ts, queue in zip(tensors[1:], ctx.worker_queues):
queue.put(ts)
else:
ctx.master_queue.put((_dex, _dexs))
_dex, _dexs = ctx.worker_queue.get()
ctx.worker_queue.task_done()
if x.is_cuda:
dx_ = lib.gpu.expectation_backward(x, _dex, _dexs)
else:
raise NotImplemented
dx = dx + dx_
return dx, dgamma, dbeta, None, None, None, None, None, None, None, None, None
示例3: backward
# 需要导入模块: from torch.cuda import comm [as 别名]
# 或者: from torch.cuda.comm import gather [as 别名]
def backward(ctx, dz):
x, _ex, _exs, gamma, beta = ctx.saved_tensors
dz = dz.contiguous()
# BN backward
dx, _dex, _dexs, dgamma, dbeta = _C.batchnorm_backward(dz, x, _ex, _exs, gamma, beta, ctx.eps)
if ctx.training:
if ctx.sync:
if ctx.is_master:
_dex, _dexs = [_dex.unsqueeze(0)], [_dexs.unsqueeze(0)]
for _ in range(ctx.master_queue.maxsize):
_dex_w, _dexs_w = ctx.master_queue.get()
ctx.master_queue.task_done()
_dex.append(_dex_w.unsqueeze(0))
_dexs.append(_dexs_w.unsqueeze(0))
_dex = comm.gather(_dex).mean(0)
_dexs = comm.gather(_dexs).mean(0)
tensors = comm.broadcast_coalesced((_dex, _dexs), [_dex.get_device()] + ctx.worker_ids)
for ts, queue in zip(tensors[1:], ctx.worker_queues):
queue.put(ts)
else:
ctx.master_queue.put((_dex, _dexs))
_dex, _dexs = ctx.worker_queue.get()
ctx.worker_queue.task_done()
dx_ = _C.expectation_backward(x, _dex, _dexs)
dx = dx + dx_
return dx, dgamma, dbeta, None, None, None, None, None, None, None, None, None
示例4: forward
# 需要导入模块: from torch.cuda import comm [as 别名]
# 或者: from torch.cuda.comm import gather [as 别名]
def forward(ctx, target_device, dim, *inputs):
assert all(map(lambda i: i.is_cuda, inputs))
ctx.target_device = target_device
ctx.dim = dim
ctx.input_gpus = tuple(map(lambda i: i.get_device(), inputs))
ctx.input_sizes = tuple(map(lambda i: i.size(ctx.dim), inputs))
return comm.gather(inputs, ctx.dim, ctx.target_device)
示例5: forward
# 需要导入模块: from torch.cuda import comm [as 别名]
# 或者: from torch.cuda.comm import gather [as 别名]
def forward(cls, ctx, x, weight, bias, running_mean, running_var,
extra, training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
# Save context
cls._parse_extra(ctx, extra)
ctx.training = training
ctx.momentum = momentum
ctx.eps = eps
ctx.activation = activation
ctx.slope = slope
ctx.affine = weight is not None and bias is not None
# Prepare inputs
count = _count_samples(x) * (ctx.master_queue.maxsize + 1)
x = x.contiguous()
weight = weight.contiguous() if ctx.affine else x.new_empty(0)
bias = bias.contiguous() if ctx.affine else x.new_empty(0)
if ctx.training:
mean, var = _backend.mean_var(x)
if ctx.is_master:
means, vars = [mean.unsqueeze(0)], [var.unsqueeze(0)]
for _ in range(ctx.master_queue.maxsize):
mean_w, var_w = ctx.master_queue.get()
ctx.master_queue.task_done()
means.append(mean_w.unsqueeze(0))
vars.append(var_w.unsqueeze(0))
means = comm.gather(means)
vars = comm.gather(vars)
mean = means.mean(0)
var = (vars + (mean - means) ** 2).mean(0)
tensors = comm.broadcast_coalesced((mean, var), [mean.get_device()] + ctx.worker_ids)
for ts, queue in zip(tensors[1:], ctx.worker_queues):
queue.put(ts)
else:
ctx.master_queue.put((mean, var))
mean, var = ctx.worker_queue.get()
ctx.worker_queue.task_done()
# Update running stats
running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
# Mark in-place modified tensors
ctx.mark_dirty(x, running_mean, running_var)
else:
mean, var = running_mean.contiguous(), running_var.contiguous()
ctx.mark_dirty(x)
# BN forward + activation
_backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
_act_forward(ctx, x)
# Output
ctx.var = var
ctx.save_for_backward(x, var, weight, bias)
return x
示例6: forward
# 需要导入模块: from torch.cuda import comm [as 别名]
# 或者: from torch.cuda.comm import gather [as 别名]
def forward(cls, ctx, x, weight, bias, running_mean, running_var,
extra, training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
# Save context
cls._parse_extra(ctx, extra)
ctx.training = training
ctx.momentum = momentum
ctx.eps = eps
ctx.activation = activation
ctx.slope = slope
n = _count_samples(x) * (ctx.master_queue.maxsize + 1)
if ctx.training:
mean = x.new().resize_(1, running_mean.size(0))
var = x.new().resize_(1, running_var.size(0))
_check_contiguous(x, mean, var)
_check(_ext.bn_mean_var_cuda, x, mean, var)
if ctx.is_master:
means, vars = [mean], [var]
for _ in range(ctx.master_queue.maxsize):
mean_w, var_w = ctx.master_queue.get()
ctx.master_queue.task_done()
means.append(mean_w)
vars.append(var_w)
means = comm.gather(means)
vars = comm.gather(vars)
mean = means.mean(0)
var = (vars + (mean - means) ** 2).mean(0)
tensors = comm.broadcast_coalesced((mean, var), [mean.get_device()] + ctx.worker_ids)
for ts, queue in zip(tensors[1:], ctx.worker_queues):
queue.put(ts)
else:
ctx.master_queue.put((mean, var))
mean, var = ctx.worker_queue.get()
ctx.worker_queue.task_done()
# Update running stats
running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * n / (n - 1))
else:
mean, var = running_mean, running_var
_check_contiguous(x, mean, var, weight, bias)
_check(_ext.bn_forward_cuda,
x, mean, var,
weight if weight is not None else x.new(),
bias if bias is not None else x.new(),
x, x, ctx.eps)
# Activation
_act_forward(ctx, x)
# Output
ctx.var = var
ctx.save_for_backward(x, weight, bias, running_mean, running_var)
ctx.mark_dirty(x)
return x
示例7: forward
# 需要导入模块: from torch.cuda import comm [as 别名]
# 或者: from torch.cuda.comm import gather [as 别名]
def forward(cls, ctx, x, gamma, beta, running_mean, running_var,
extra, sync=True, training=True, momentum=0.1, eps=1e-05,
activation="none", slope=0.01):
# save context
cls._parse_extra(ctx, extra)
ctx.sync = sync
ctx.training = training
ctx.momentum = momentum
ctx.eps = eps
ctx.activation = activation
ctx.slope = slope
assert activation == 'none'
# continous inputs
x = x.contiguous()
gamma = gamma.contiguous()
beta = beta.contiguous()
if ctx.training:
_ex, _exs = _C.expectation_forward(x)
if ctx.sync:
if ctx.is_master:
_ex, _exs = [_ex.unsqueeze(0)], [_exs.unsqueeze(0)]
for _ in range(ctx.master_queue.maxsize):
_ex_w, _exs_w = ctx.master_queue.get()
ctx.master_queue.task_done()
_ex.append(_ex_w.unsqueeze(0))
_exs.append(_exs_w.unsqueeze(0))
_ex = comm.gather(_ex).mean(0)
_exs = comm.gather(_exs).mean(0)
tensors = comm.broadcast_coalesced((_ex, _exs), [_ex.get_device()] + ctx.worker_ids)
for ts, queue in zip(tensors[1:], ctx.worker_queues):
queue.put(ts)
else:
ctx.master_queue.put((_ex, _exs))
_ex, _exs = ctx.worker_queue.get()
ctx.worker_queue.task_done()
# Update running stats
_var = _exs - _ex ** 2
running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * _ex)
running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * _var)
# Mark in-place modified tensors
ctx.mark_dirty(running_mean, running_var)
else:
_ex, _var = running_mean.contiguous(), running_var.contiguous()
_exs = _var + _ex ** 2
# BN forward
y = _C.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
# Output
ctx.save_for_backward(x, _ex, _exs, gamma, beta)
return y
示例8: forward
# 需要导入模块: from torch.cuda import comm [as 别名]
# 或者: from torch.cuda.comm import gather [as 别名]
def forward(cls, ctx, x, weight, bias, running_mean, running_var,
extra, training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
# Save context
cls._parse_extra(ctx, extra)
ctx.training = training
ctx.momentum = momentum
ctx.eps = eps
ctx.activation = activation
ctx.slope = slope
n = _count_samples(x) * (ctx.master_queue.maxsize + 1)
if ctx.training:
mean = x.new().resize_(1, running_mean.size(0))
var = x.new().resize_(1, running_var.size(0))
_check_contiguous(x, mean, var)
_ext.bn_mean_var_cuda(x, mean, var)
if ctx.is_master:
means, vars = [mean], [var]
for _ in range(ctx.master_queue.maxsize):
mean_w, var_w = ctx.master_queue.get()
ctx.master_queue.task_done()
means.append(mean_w)
vars.append(var_w)
means = comm.gather(means)
vars = comm.gather(vars)
mean = means.mean(0)
var = (vars + (mean - means) ** 2).mean(0)
tensors = comm.broadcast_coalesced((mean, var), [mean.get_device()] + ctx.worker_ids)
for ts, queue in zip(tensors[1:], ctx.worker_queues):
queue.put(ts)
else:
ctx.master_queue.put((mean, var))
mean, var = ctx.worker_queue.get()
ctx.worker_queue.task_done()
# Update running stats
running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * n / (n - 1))
else:
mean, var = running_mean, running_var
_check_contiguous(x, mean, var, weight, bias)
_ext.bn_forward_cuda(
x, mean, var,
weight if weight is not None else x.new(),
bias if bias is not None else x.new(),
x, x, ctx.eps)
# Activation
_act_forward(ctx, x)
# Output
ctx.var = var
ctx.save_for_backward(x, weight, bias, running_mean, running_var)
ctx.mark_dirty(x)
return x
示例9: forward
# 需要导入模块: from torch.cuda import comm [as 别名]
# 或者: from torch.cuda.comm import gather [as 别名]
def forward(cls, ctx, x, weight, bias, running_mean, running_var,
extra, training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
# Save context
cls._parse_extra(ctx, extra)
ctx.training = training
ctx.momentum = momentum
ctx.eps = eps
ctx.activation = activation
ctx.slope = slope
ctx.affine = weight is not None and bias is not None
# Prepare inputs
count = _count_samples(x) * (ctx.master_queue.maxsize + 1)
x = x.contiguous()
weight = weight.contiguous() if ctx.affine else x.new_empty(0)
bias = bias.contiguous() if ctx.affine else x.new_empty(0)
if ctx.training:
mean, var = backend.mean_var(x)
if ctx.is_master:
means, vars = [mean.unsqueeze(0)], [var.unsqueeze(0)]
for _ in range(ctx.master_queue.maxsize):
mean_w, var_w = ctx.master_queue.get()
ctx.master_queue.task_done()
means.append(mean_w.unsqueeze(0))
vars.append(var_w.unsqueeze(0))
means = comm.gather(means)
vars = comm.gather(vars)
mean = means.mean(0)
var = (vars + (mean - means) ** 2).mean(0)
tensors = comm.broadcast_coalesced((mean, var), [mean.get_device()] + ctx.worker_ids)
for ts, queue in zip(tensors[1:], ctx.worker_queues):
queue.put(ts)
else:
ctx.master_queue.put((mean, var))
mean, var = ctx.worker_queue.get()
ctx.worker_queue.task_done()
# Update running stats
running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
# Mark in-place modified tensors
ctx.mark_dirty(x, running_mean, running_var)
else:
mean, var = running_mean.contiguous(), running_var.contiguous()
ctx.mark_dirty(x)
# BN forward + activation
backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
_act_forward(ctx, x)
# Output
ctx.var = var
ctx.save_for_backward(x, var, weight, bias)
return x