本文整理汇总了Python中detectron.utils.c2.CudaScope方法的典型用法代码示例。如果您正苦于以下问题:Python c2.CudaScope方法的具体用法?Python c2.CudaScope怎么用?Python c2.CudaScope使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类detectron.utils.c2
的用法示例。
在下文中一共展示了c2.CudaScope方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: broadcast_parameters
# 需要导入模块: from detectron.utils import c2 [as 别名]
# 或者: from detectron.utils.c2 import CudaScope [as 别名]
def broadcast_parameters(model):
"""Copy parameter blobs from GPU 0 over the corresponding parameter blobs
on GPUs 1 through cfg.NUM_GPUS - 1.
"""
if cfg.NUM_GPUS == 1:
# no-op if only running on a single GPU
return
def _do_broadcast(all_blobs):
assert len(all_blobs) % cfg.NUM_GPUS == 0, \
('Unexpected value for NUM_GPUS. Make sure you are not '
'running single-GPU inference with NUM_GPUS > 1.')
blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)
for i in range(blobs_per_gpu):
blobs = [p for p in all_blobs[i::blobs_per_gpu]]
data = workspace.FetchBlob(blobs[0])
logger.debug('Broadcasting {} to'.format(str(blobs[0])))
for i, p in enumerate(blobs[1:]):
logger.debug(' |-> {}'.format(str(p)))
with c2_utils.CudaScope(i + 1):
workspace.FeedBlob(p, data)
_do_broadcast(model.params)
_do_broadcast([b + '_momentum' for b in model.TrainableParams()])
示例2: _add_allreduce_graph
# 需要导入模块: from detectron.utils import c2 [as 别名]
# 或者: from detectron.utils.c2 import CudaScope [as 别名]
def _add_allreduce_graph(model):
"""Construct the graph that performs Allreduce on the gradients."""
# Need to all-reduce the per-GPU gradients if training with more than 1 GPU
all_params = model.TrainableParams()
assert len(all_params) % cfg.NUM_GPUS == 0
# The model parameters are replicated on each GPU, get the number
# distinct parameter blobs (i.e., the number of parameter blobs on
# each GPU)
params_per_gpu = int(len(all_params) / cfg.NUM_GPUS)
with c2_utils.CudaScope(0):
# Iterate over distinct parameter blobs
for i in range(params_per_gpu):
# Gradients from all GPUs for this parameter blob
gradients = [
model.param_to_grad[p] for p in all_params[i::params_per_gpu]
]
if len(gradients) > 0:
if cfg.USE_NCCL:
model.net.NCCLAllreduce(gradients, gradients)
else:
muji.Allreduce(model.net, gradients, reduced_affix='')
示例3: _CorrectMomentum
# 需要导入模块: from detectron.utils import c2 [as 别名]
# 或者: from detectron.utils.c2 import CudaScope [as 别名]
def _CorrectMomentum(self, correction):
"""The MomentumSGDUpdate op implements the update V as
V := mu * V + lr * grad,
where mu is the momentum factor, lr is the learning rate, and grad is
the stochastic gradient. Since V is not defined independently of the
learning rate (as it should ideally be), when the learning rate is
changed we should scale the update history V in order to make it
compatible in scale with lr * grad.
"""
logger.info(
'Scaling update history by {:.6f} (new lr / old lr)'.
format(correction))
for i in range(cfg.NUM_GPUS):
with c2_utils.CudaScope(i):
for param in self.TrainableParams(gpu_id=i):
op = core.CreateOperator(
'Scale', [param + '_momentum'], [param + '_momentum'],
scale=correction)
workspace.RunOperatorOnce(op)
示例4: _SetNewLr
# 需要导入模块: from detectron.utils import c2 [as 别名]
# 或者: from detectron.utils.c2 import CudaScope [as 别名]
def _SetNewLr(self, cur_lr, new_lr):
"""Do the actual work of updating the model and workspace blobs.
"""
for i in range(cfg.NUM_GPUS):
with c2_utils.CudaScope(i):
workspace.FeedBlob(
'gpu_{}/lr'.format(i), np.array([new_lr], dtype=np.float32))
ratio = _get_lr_change_ratio(cur_lr, new_lr)
if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
self._CorrectMomentum(new_lr / cur_lr)