本文整理汇总了Python中caffe2.python.muji.OnGPU方法的典型用法代码示例。如果您正苦于以下问题:Python muji.OnGPU方法的具体用法?Python muji.OnGPU怎么用?Python muji.OnGPU使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类caffe2.python.muji
的用法示例。
在下文中一共展示了muji.OnGPU方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_net
# 需要导入模块: from caffe2.python import muji [as 别名]
# 或者: from caffe2.python.muji import OnGPU [as 别名]
def get_net(data_loader, name):
logger = logging.getLogger(__name__)
blob_names = data_loader.get_output_names()
net = core.Net(name)
net.type = 'dag'
for gpu_id in range(cfg.NUM_GPUS):
with core.NameScope('gpu_{}'.format(gpu_id)):
with core.DeviceScope(muji.OnGPU(gpu_id)):
for blob_name in blob_names:
blob = core.ScopedName(blob_name)
workspace.CreateBlob(blob)
net.DequeueBlobs(
data_loader._blobs_queue_name, blob_names)
logger.info("Protobuf:\n" + str(net.Proto()))
return net
示例2: add_inputs
# 需要导入模块: from caffe2.python import muji [as 别名]
# 或者: from caffe2.python.muji import OnGPU [as 别名]
def add_inputs(model, roidb=None):
"""Add network input ops. To be called *after* model_bulder.create()."""
# Implementation notes:
# Typically, one would create the input ops and then the rest of the net.
# However, creating the input ops depends on loading the dataset, which
# can take a few minutes for COCO.
# We prefer to avoid waiting so debugging can fail fast.
# Thus, we create the net *without input ops* prior to loading the
# dataset, and then add the input ops after loading the dataset.
# Since we defer input op creation, we need to do a little bit of surgery
# to place the input ops at the start of the network op list.
if roidb is not None:
# Make debugging easier when NUM_GPUS is 1 by only using one worker
# thread for loading mini-batches
num_workers = 1 if cfg.NUM_GPUS == 1 else cfg.NUM_WORKERS
model.roi_data_loader = RoIDataLoader(
roidb, num_workers=num_workers, num_enqueuers=1,
minibatch_queue_size=cfg.TRAIN.MINIBATCH_QUEUE_SIZE)
orig_num_op = len(model.net._net.op)
for gpu_id in range(cfg.NUM_GPUS):
with core.NameScope('gpu_{}'.format(gpu_id)):
with core.DeviceScope(muji.OnGPU(gpu_id)):
if model.train:
add_train_inputs(model)
else:
add_test_inputs(model)
# A little op surgery to move input ops to the start of the net
diff = len(model.net._net.op) - orig_num_op
new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff]
del model.net._net.op[:]
model.net._net.op.extend(new_op)
示例3: add_parameter_update_ops
# 需要导入模块: from caffe2.python import muji [as 别名]
# 或者: from caffe2.python.muji import OnGPU [as 别名]
def add_parameter_update_ops(model, gpu_id):
with core.DeviceScope(muji.OnGPU(gpu_id)):
with core.NameScope('gpu_{}'.format(gpu_id)):
# Learning rate of 0 is a dummy value to be set properly at the
# start of training
lr = model.param_init_net.ConstantFill(
[], 'lr', shape=[1], value=0.0)
one = model.param_init_net.ConstantFill(
[], 'one', shape=[1], value=1.0)
wd = model.param_init_net.ConstantFill(
[], 'wd', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY)
for param in model.TrainableParams(gpu_id=gpu_id):
logger.info('param ' + str(param) + ' will be updated')
param_grad = model.param_to_grad[param]
# Initialize momentum vector
param_momentum = model.param_init_net.ConstantFill(
[param], param + '_momentum', value=0.0)
if param in model.biases:
# Special treatment for biases (mainly to match historical impl.
# details):
# (1) Do not apply weight decay
# (2) Use a 2x higher learning rate
model.Scale(param_grad, param_grad, scale=2.0)
elif cfg.SOLVER.WEIGHT_DECAY > 0:
# Apply weight decay to non-bias weights
model.WeightedSum([param_grad, one, param, wd], param_grad)
# Update param_grad and param_momentum in place
model.net.MomentumSGDUpdate(
[param_grad, param_momentum, lr, param],
[param_grad, param_momentum, param],
momentum=cfg.SOLVER.MOMENTUM)
示例4: main
# 需要导入模块: from caffe2.python import muji [as 别名]
# 或者: from caffe2.python.muji import OnGPU [as 别名]
def main(opts):
logger = logging.getLogger(__name__)
roidb = combined_roidb_for_training(
cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES)
logger.info('{:d} roidb entries'.format(len(roidb)))
roi_data_loader = RoIDataLoader(
roidb,
num_loaders=cfg.DATA_LOADER.NUM_THREADS,
minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE,
blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY
)
blob_names = roi_data_loader.get_output_names()
net = core.Net('dequeue_net')
net.type = 'dag'
all_blobs = []
for gpu_id in range(cfg.NUM_GPUS):
with core.NameScope('gpu_{}'.format(gpu_id)):
with core.DeviceScope(muji.OnGPU(gpu_id)):
for blob_name in blob_names:
blob = core.ScopedName(blob_name)
all_blobs.append(blob)
workspace.CreateBlob(blob)
logger.info('Creating blob: {}'.format(blob))
net.DequeueBlobs(
roi_data_loader._blobs_queue_name, blob_names)
logger.info("Protobuf:\n" + str(net.Proto()))
if opts.profiler:
import cProfile
cProfile.runctx(
'loader_loop(roi_data_loader)', globals(), locals(),
sort='cumulative')
else:
loader_loop(roi_data_loader)
roi_data_loader.register_sigint_handler()
roi_data_loader.start(prefill=True)
total_time = 0
for i in range(opts.num_batches):
start_t = time.time()
for _ in range(opts.x_factor):
workspace.RunNetOnce(net)
total_time += (time.time() - start_t) / opts.x_factor
logger.info(
'{:d}/{:d}: Averge dequeue time: {:.3f}s [{:d}/{:d}]'.format(
i + 1, opts.num_batches, total_time / (i + 1),
roi_data_loader._minibatch_queue.qsize(),
cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE
)
)
# Sleep to simulate the time taken by running a little network
time.sleep(opts.sleep_time)
# To inspect:
# blobs = workspace.FetchBlobs(all_blobs)
# from IPython import embed; embed()
logger.info('Shutting down data loader...')
roi_data_loader.shutdown()
示例5: main
# 需要导入模块: from caffe2.python import muji [as 别名]
# 或者: from caffe2.python.muji import OnGPU [as 别名]
def main(opts):
logger = logging.getLogger(__name__)
roidb = combined_roidb_for_training(
cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES)
logger.info('{:d} roidb entries'.format(len(roidb)))
roi_data_loader = RoIDataLoader(
roidb,
num_loaders=opts.num_loaders,
minibatch_queue_size=opts.minibatch_queue_size,
blobs_queue_capacity=opts.blobs_queue_capacity)
blob_names = roi_data_loader.get_output_names()
net = core.Net('dequeue_net')
net.type = 'dag'
all_blobs = []
for gpu_id in range(cfg.NUM_GPUS):
with core.NameScope('gpu_{}'.format(gpu_id)):
with core.DeviceScope(muji.OnGPU(gpu_id)):
for blob_name in blob_names:
blob = core.ScopedName(blob_name)
all_blobs.append(blob)
workspace.CreateBlob(blob)
logger.info('Creating blob: {}'.format(blob))
net.DequeueBlobs(
roi_data_loader._blobs_queue_name, blob_names)
logger.info("Protobuf:\n" + str(net.Proto()))
if opts.profiler:
import cProfile
cProfile.runctx(
'loader_loop(roi_data_loader)', globals(), locals(),
sort='cumulative')
else:
loader_loop(roi_data_loader)
roi_data_loader.register_sigint_handler()
roi_data_loader.start(prefill=True)
total_time = 0
for i in range(opts.num_batches):
start_t = time.time()
for _ in range(opts.x_factor):
workspace.RunNetOnce(net)
total_time += (time.time() - start_t) / opts.x_factor
logger.info('{:d}/{:d}: Averge dequeue time: {:.3f}s [{:d}/{:d}]'.
format(i + 1, opts.num_batches, total_time / (i + 1),
roi_data_loader._minibatch_queue.qsize(),
opts.minibatch_queue_size))
# Sleep to simulate the time taken by running a little network
time.sleep(opts.sleep_time)
# To inspect:
# blobs = workspace.FetchBlobs(all_blobs)
# from IPython import embed; embed()
logger.info('Shutting down data loader...')
roi_data_loader.shutdown()
示例6: main
# 需要导入模块: from caffe2.python import muji [as 别名]
# 或者: from caffe2.python.muji import OnGPU [as 别名]
def main(opts):
logger = logging.getLogger(__name__)
roidb = combined_roidb_for_training(
cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES)
logger.info('{:d} roidb entries'.format(len(roidb)))
roi_data_loader = RoIDataLoader(
roidb,
num_loaders=opts.num_loaders,
minibatch_queue_size=opts.minibatch_queue_size,
blobs_queue_capacity=opts.blobs_queue_capacity)
blob_names = roi_data_loader.get_output_names()
net = core.Net('dequeue_net')
net.type = 'dag'
all_blobs = []
for gpu_id in range(cfg.NUM_GPUS):
with core.NameScope('gpu_{}'.format(gpu_id)):
with core.DeviceScope(muji.OnGPU(gpu_id)):
for blob_name in blob_names:
blob = core.ScopedName(blob_name)
all_blobs.append(blob)
workspace.CreateBlob(blob)
logger.info('Creating blob: {}'.format(blob))
net.DequeueBlobs(
roi_data_loader._blobs_queue_name, blob_names)
logger.info("Protobuf:\n" + str(net.Proto()))
if opts.profiler:
import cProfile
cProfile.runctx(
'loader_loop(roi_data_loader)', globals(), locals(),
sort='cumulative')
else:
loader_loop(roi_data_loader)
roi_data_loader.register_sigint_handler()
roi_data_loader.start(prefill=True)
total_time = 0
for i in range(opts.num_batches):
start_t = time.time()
for _ in range(opts.x_factor):
workspace.RunNetOnce(net)
total_time += (time.time() - start_t) / opts.x_factor
logger.info('{:d}/{:d}: Averge dequeue time: {:.3f}s [{:d}/{:d}]'.
format(i + 1, opts.num_batches, total_time / (i + 1),
roi_data_loader._minibatch_queue.qsize(),
opts.minibatch_queue_size))
# Sleep to simulate the time taken by running a little network
time.sleep(opts.sleep_time)
# To inspect:
# blobs = workspace.FetchBlobs(all_blobs)
# from IPython import embed; embed()
logger.info('Shutting down data loader (EnqueueBlob errors are ok)...')
roi_data_loader.shutdown()
示例7: build_data_parallel_model
# 需要导入模块: from caffe2.python import muji [as 别名]
# 或者: from caffe2.python.muji import OnGPU [as 别名]
def build_data_parallel_model(model, single_gpu_build_func):
if model.train:
all_loss_gradients = {} # Will include loss gradients from all GPUs
# Build the model on each GPU with correct name and device scoping
for gpu_id in range(cfg.NUM_GPUS):
with core.NameScope('gpu_{}'.format(gpu_id)):
with core.DeviceScope(muji.OnGPU(gpu_id)):
all_loss_gradients.update(
single_gpu_build_func(model))
# Add backward pass on all GPUs
model.AddGradientOperators(all_loss_gradients)
if cfg.NUM_GPUS > 1:
# Need to all-reduce the per-GPU gradients if training with more
# than 1 GPU
all_params = model.TrainableParams()
assert len(all_params) % cfg.NUM_GPUS == 0, \
'This should not happen.'
# The model parameters are replicated on each GPU, get the number
# distinct parameter blobs (i.e., the number of parameter blobs on
# each GPU)
params_per_gpu = int(len(all_params) / cfg.NUM_GPUS)
with core.DeviceScope(muji.OnGPU(cfg.ROOT_GPU_ID)):
# Iterate over distinct parameter blobs
for i in range(params_per_gpu):
# Gradients from all GPUs for this parameter blob
gradients = [
model.param_to_grad[p]
for p in all_params[i::params_per_gpu]
]
if len(gradients) > 0:
if cfg.USE_NCCL:
model.net.NCCLAllreduce(gradients, gradients)
else:
muji.Allreduce(
model.net, gradients, reduced_affix='')
for gpu_id in range(cfg.NUM_GPUS):
# After all-reduce, all GPUs perform SGD updates on their identical
# params and gradients in parallel
add_parameter_update_ops(model, gpu_id)
else:
# Testing only supports running on a single GPU
with core.NameScope('gpu_{}'.format(cfg.ROOT_GPU_ID)):
with core.DeviceScope(muji.OnGPU(cfg.ROOT_GPU_ID)):
single_gpu_build_func(model)