本文整理匯總了Python中detectron.core.config.cfg.NUM_GPUS屬性的典型用法代碼示例。如果您正苦於以下問題:Python cfg.NUM_GPUS屬性的具體用法?Python cfg.NUM_GPUS怎麽用?Python cfg.NUM_GPUS使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在類detectron.core.config.cfg
的用法示例。
在下文中一共展示了cfg.NUM_GPUS屬性的10個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: broadcast_parameters
# 需要導入模塊: from detectron.core.config import cfg [as 別名]
# 或者: from detectron.core.config.cfg import NUM_GPUS [as 別名]
def broadcast_parameters(model):
"""Copy parameter blobs from GPU 0 over the corresponding parameter blobs
on GPUs 1 through cfg.NUM_GPUS - 1.
"""
if cfg.NUM_GPUS == 1:
# no-op if only running on a single GPU
return
def _do_broadcast(all_blobs):
assert len(all_blobs) % cfg.NUM_GPUS == 0, \
('Unexpected value for NUM_GPUS. Make sure you are not '
'running single-GPU inference with NUM_GPUS > 1.')
blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)
for i in range(blobs_per_gpu):
blobs = [p for p in all_blobs[i::blobs_per_gpu]]
data = workspace.FetchBlob(blobs[0])
logger.debug('Broadcasting {} to'.format(str(blobs[0])))
for i, p in enumerate(blobs[1:]):
logger.debug(' |-> {}'.format(str(p)))
with c2_utils.CudaScope(i + 1):
workspace.FeedBlob(p, data)
_do_broadcast(model.params)
_do_broadcast([b + '_momentum' for b in model.TrainableParams()])
示例2: GetStats
# 需要導入模塊: from detectron.core.config import cfg [as 別名]
# 或者: from detectron.core.config.cfg import NUM_GPUS [as 別名]
def GetStats(self, cur_iter, lr):
eta_seconds = self.iter_timer.average_time * (
cfg.SOLVER.MAX_ITER - cur_iter
)
eta = str(datetime.timedelta(seconds=int(eta_seconds)))
mem_stats = c2_py_utils.GetGPUMemoryUsageStats()
mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS])
stats = dict(
iter=cur_iter,
lr=float(lr),
time=self.iter_timer.average_time,
loss=self.smoothed_total_loss.GetMedianValue(),
eta=eta,
mb_qsize=int(
np.round(self.smoothed_mb_qsize.GetMedianValue())
),
mem=int(np.ceil(mem_usage / 1024 / 1024))
)
for k, v in self.smoothed_losses_and_metrics.items():
stats[k] = v.GetMedianValue()
return stats
示例3: _add_allreduce_graph
# 需要導入模塊: from detectron.core.config import cfg [as 別名]
# 或者: from detectron.core.config.cfg import NUM_GPUS [as 別名]
def _add_allreduce_graph(model):
"""Construct the graph that performs Allreduce on the gradients."""
# Need to all-reduce the per-GPU gradients if training with more than 1 GPU
all_params = model.TrainableParams()
assert len(all_params) % cfg.NUM_GPUS == 0
# The model parameters are replicated on each GPU, get the number
# distinct parameter blobs (i.e., the number of parameter blobs on
# each GPU)
params_per_gpu = int(len(all_params) / cfg.NUM_GPUS)
with c2_utils.CudaScope(0):
# Iterate over distinct parameter blobs
for i in range(params_per_gpu):
# Gradients from all GPUs for this parameter blob
gradients = [
model.param_to_grad[p] for p in all_params[i::params_per_gpu]
]
if len(gradients) > 0:
if cfg.USE_NCCL:
model.net.NCCLAllreduce(gradients, gradients)
else:
muji.Allreduce(model.net, gradients, reduced_affix='')
示例4: __init__
# 需要導入模塊: from detectron.core.config import cfg [as 別名]
# 或者: from detectron.core.config.cfg import NUM_GPUS [as 別名]
def __init__(self, **kwargs):
# Handle args specific to the DetectionModelHelper, others pass through
# to CNNModelHelper
self.train = kwargs.get('train', False)
self.num_classes = kwargs.get('num_classes', -1)
assert self.num_classes > 0, 'num_classes must be > 0'
for k in ('train', 'num_classes'):
if k in kwargs:
del kwargs[k]
kwargs['order'] = 'NCHW'
# Defensively set cudnn_exhaustive_search to False in case the default
# changes in CNNModelHelper. The detection code uses variable size
# inputs that might not play nicely with cudnn_exhaustive_search.
kwargs['cudnn_exhaustive_search'] = False
super(DetectionModelHelper, self).__init__(**kwargs)
self.roi_data_loader = None
self.losses = []
self.metrics = []
self.do_not_update_params = [] # Param on this list are not updated
self.net.Proto().type = cfg.MODEL.EXECUTION_TYPE
self.net.Proto().num_workers = cfg.NUM_GPUS * 4
self.prev_use_cudnn = self.use_cudnn
self.gn_params = [] # Param on this list are GroupNorm parameters
示例5: _CorrectMomentum
# 需要導入模塊: from detectron.core.config import cfg [as 別名]
# 或者: from detectron.core.config.cfg import NUM_GPUS [as 別名]
def _CorrectMomentum(self, correction):
"""The MomentumSGDUpdate op implements the update V as
V := mu * V + lr * grad,
where mu is the momentum factor, lr is the learning rate, and grad is
the stochastic gradient. Since V is not defined independently of the
learning rate (as it should ideally be), when the learning rate is
changed we should scale the update history V in order to make it
compatible in scale with lr * grad.
"""
logger.info(
'Scaling update history by {:.6f} (new lr / old lr)'.
format(correction))
for i in range(cfg.NUM_GPUS):
with c2_utils.CudaScope(i):
for param in self.TrainableParams(gpu_id=i):
op = core.CreateOperator(
'Scale', [param + '_momentum'], [param + '_momentum'],
scale=correction)
workspace.RunOperatorOnce(op)
示例6: create
# 需要導入模塊: from detectron.core.config import cfg [as 別名]
# 或者: from detectron.core.config.cfg import NUM_GPUS [as 別名]
def create(model_type_func, train=False, gpu_id=0):
"""Generic model creation function that dispatches to specific model
building functions.
By default, this function will generate a data parallel model configured to
run on cfg.NUM_GPUS devices. However, you can restrict it to build a model
targeted to a specific GPU by specifying gpu_id. This is used by
optimizer.build_data_parallel_model() during test time.
"""
model = DetectionModelHelper(
name=model_type_func,
train=train,
num_classes=cfg.MODEL.NUM_CLASSES,
init_params=train
)
model.only_build_forward_pass = False
model.target_gpu_id = gpu_id
return get_func(model_type_func)(model)
示例7: get_net
# 需要導入模塊: from detectron.core.config import cfg [as 別名]
# 或者: from detectron.core.config.cfg import NUM_GPUS [as 別名]
def get_net(data_loader, name):
logger = logging.getLogger(__name__)
blob_names = data_loader.get_output_names()
net = core.Net(name)
net.type = 'dag'
for gpu_id in range(cfg.NUM_GPUS):
with core.NameScope('gpu_{}'.format(gpu_id)):
with core.DeviceScope(muji.OnGPU(gpu_id)):
for blob_name in blob_names:
blob = core.ScopedName(blob_name)
workspace.CreateBlob(blob)
net.DequeueBlobs(
data_loader._blobs_queue_name, blob_names)
logger.info("Protobuf:\n" + str(net.Proto()))
return net
示例8: build_data_parallel_model
# 需要導入模塊: from detectron.core.config import cfg [as 別名]
# 或者: from detectron.core.config.cfg import NUM_GPUS [as 別名]
def build_data_parallel_model(model, single_gpu_build_func):
"""Build a data parallel model given a function that builds the model on a
single GPU.
"""
if model.only_build_forward_pass:
single_gpu_build_func(model)
elif model.train:
all_loss_gradients = _build_forward_graph(model, single_gpu_build_func)
# Add backward pass on all GPUs
model.AddGradientOperators(all_loss_gradients)
if cfg.NUM_GPUS > 1:
_add_allreduce_graph(model)
for gpu_id in range(cfg.NUM_GPUS):
# After allreduce, all GPUs perform SGD updates on their identical
# params and gradients in parallel
with c2_utils.NamedCudaScope(gpu_id):
add_single_gpu_param_update_ops(model, gpu_id)
else:
# Test-time network operates on single GPU
# Test-time parallelism is implemented through multiprocessing
with c2_utils.NamedCudaScope(model.target_gpu_id):
single_gpu_build_func(model)
示例9: __init__
# 需要導入模塊: from detectron.core.config import cfg [as 別名]
# 或者: from detectron.core.config.cfg import NUM_GPUS [as 別名]
def __init__(self, **kwargs):
# Handle args specific to the DetectionModelHelper, others pass through
# to CNNModelHelper
self.train = kwargs.get('train', False)
self.num_classes = kwargs.get('num_classes', -1)
assert self.num_classes > 0, 'num_classes must be > 0'
for k in ('train', 'num_classes'):
if k in kwargs:
del kwargs[k]
kwargs['order'] = 'NCHW'
# Defensively set cudnn_exhaustive_search to False in case the default
# changes in CNNModelHelper. The detection code uses variable size
# inputs that might not play nicely with cudnn_exhaustive_search.
kwargs['cudnn_exhaustive_search'] = False
super(DetectionModelHelper, self).__init__(**kwargs)
self.roi_data_loader = None
self.losses = []
self.metrics = []
self.do_not_update_params = [] # Param on this list are not updated
self.net.Proto().type = cfg.MODEL.EXECUTION_TYPE
self.net.Proto().num_workers = cfg.NUM_GPUS * 4
self.prev_use_cudnn = self.use_cudnn
self.gn_params = [] # Param on this list are GroupNorm parameters
self.stage_params = {} # Param on this list are updated with scalars
示例10: __init__
# 需要導入模塊: from detectron.core.config import cfg [as 別名]
# 或者: from detectron.core.config.cfg import NUM_GPUS [as 別名]
def __init__(
self,
roidb,
num_loaders=4,
minibatch_queue_size=64,
blobs_queue_capacity=8
):
self._roidb = roidb
self._lock = threading.Lock()
self._perm = deque(range(len(self._roidb)))
self._cur = 0 # _perm cursor
# The minibatch queue holds prepared training data in host (CPU) memory
# When training with N > 1 GPUs, each element in the minibatch queue
# is actually a partial minibatch which contributes 1 / N of the
# examples to the overall minibatch
self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size)
self._blobs_queue_capacity = blobs_queue_capacity
# Random queue name in case one instantiates multple RoIDataLoaders
self._loader_id = uuid.uuid4()
self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id)
# Loader threads construct (partial) minibatches and put them on the
# minibatch queue
self._num_loaders = num_loaders
self._num_gpus = cfg.NUM_GPUS
self.coordinator = Coordinator()
self._output_names = get_minibatch_blob_names()
self._shuffle_roidb_inds()
self.create_threads()