本文整理匯總了Python中net.Net.compute_size方法的典型用法代碼示例。如果您正苦於以下問題:Python Net.compute_size方法的具體用法?Python Net.compute_size怎麽用?Python Net.compute_size使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類net.Net
的用法示例。
在下文中一共展示了Net.compute_size方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: FeatureExtractor
# 需要導入模塊: from net import Net [as 別名]
# 或者: from net.Net import compute_size [as 別名]
class FeatureExtractor:
''' Class for extracting trained features
Feature will be stored in a txt file as a matrix. The size of the feature matrix is [num_img, feature_dimension]
Run it as::
>>> extractor = FeatureExtractor(solver_file, snapshot, gpu_idx)
>>> extractor.build_net()
>>> extractor.run(layer_name, feature_path)
:ivar str solver_file: path of the solver file in Caffe's proto format
:ivar int snapshot: the snapshot for testing
:ivar str layer_name: name of the ayer that produce feature
:ivar int gpu_idx: which gpu to perform the test
'''
def __init__(self, solver_file, snapshot, gpu_idx = 0):
self.solver_file = solver_file
self.snapshot = snapshot
self.gpu = owl.create_gpu_device(gpu_idx)
owl.set_device(self.gpu)
def build_net(self):
self.owl_net = Net()
self.builder = CaffeNetBuilder(self.solver_file)
self.snapshot_dir = self.builder.snapshot_dir
self.builder.build_net(self.owl_net)
self.owl_net.compute_size('TEST')
self.builder.init_net_from_file(self.owl_net, self.snapshot_dir, self.snapshot)
def run(s, layer_name, feature_path):
''' Run feature extractor
:param str layer_name: the layer to extract feature from
:param str feature_path: feature output path
'''
feature_unit = s.owl_net.units[s.owl_net.name_to_uid[layer_name][0]]
feature_file = open(feature_path, 'w')
batch_dir = 0
for testiteridx in range(s.owl_net.solver.test_iter[0]):
s.owl_net.forward('TEST')
feature = feature_unit.out.to_numpy()
feature_shape = np.shape(feature)
img_num = feature_shape[0]
feature_length = np.prod(feature_shape[1:len(feature_shape)])
feature = np.reshape(feature, [img_num, feature_length])
for imgidx in range(img_num):
for feaidx in range(feature_length):
info ='%f ' % (feature[imgidx, feaidx])
feature_file.write(info)
feature_file.write('\n')
print "Finish One Batch %d" % (batch_dir)
batch_dir += 1
feature_file.close()
示例2: MultiviewTester
# 需要導入模塊: from net import Net [as 別名]
# 或者: from net.Net import compute_size [as 別名]
class MultiviewTester:
''' Class for performing multi-view testing
Run it as::
>>> tester = MultiviewTester(solver_file, softmax_layer, snapshot, gpu_idx)
>>> tester.build_net()
>>> tester.run()
:ivar str solver_file: path of the solver file in Caffe's proto format
:ivar int snapshot: the snapshot for testing
:ivar str softmax_layer_name: name of the softmax layer that produce prediction
:ivar int gpu_idx: which gpu to perform the test
'''
def __init__(self, solver_file, softmax_layer_name, snapshot, gpu_idx = 0):
self.solver_file = solver_file
self.softmax_layer_name = softmax_layer_name
self.snapshot = snapshot
self.gpu = owl.create_gpu_device(gpu_idx)
owl.set_device(self.gpu)
def build_net(self):
self.owl_net = Net()
self.builder = CaffeNetBuilder(self.solver_file)
self.snapshot_dir = self.builder.snapshot_dir
self.builder.build_net(self.owl_net)
self.owl_net.compute_size('MULTI_VIEW')
self.builder.init_net_from_file(self.owl_net, self.snapshot_dir, self.snapshot)
def run(s):
#multi-view test
acc_num = 0
test_num = 0
loss_unit = s.owl_net.units[s.owl_net.name_to_uid[s.softmax_layer_name][0]]
for testiteridx in range(s.owl_net.solver.test_iter[0]):
for i in range(10):
s.owl_net.forward('MULTI_VIEW')
if i == 0:
softmax_val = loss_unit.ff_y
batch_size = softmax_val.shape[1]
softmax_label = loss_unit.y
else:
softmax_val = softmax_val + loss_unit.ff_y
test_num += batch_size
predict = softmax_val.argmax(0)
truth = softmax_label.argmax(0)
correct = (predict - truth).count_zero()
acc_num += correct
print "Accuracy the %d mb: %f, batch_size: %d" % (testiteridx, correct, batch_size)
sys.stdout.flush()
print "Testing Accuracy: %f" % (float(acc_num)/test_num)
示例3: __init__
# 需要導入模塊: from net import Net [as 別名]
# 或者: from net.Net import compute_size [as 別名]
class HeatmapVisualizer:
''' Class of heatmap visualizer.
Heat map can reveal which part of the activation is important. This information is useful when conducting detection and segmentation tasks.
:ivar str solver_file: name of the solver_file, it will tell Minerva the network configuration and model saving path
:ivar snapshot: saved model snapshot index
:ivar str layer_name: name of the layer whose activation will be viusualized as heatmap
:ivar str result_path: path for the result of visualization, heatmapvisualizer will generate a heatmap jpg for each testing image and save the image under result path.
:ivar gpu: the gpu to run testing
'''
def __init__(self, solver_file, snapshot, gpu_idx = 0):
self.solver_file = solver_file
self.snapshot = snapshot
self.gpu = owl.create_gpu_device(gpu_idx)
owl.set_device(self.gpu)
def build_net(self):
self.owl_net = Net()
self.builder = CaffeNetBuilder(self.solver_file)
self.snapshot_dir = self.builder.snapshot_dir
self.builder.build_net(self.owl_net)
self.owl_net.compute_size('TEST')
self.builder.init_net_from_file(self.owl_net, self.snapshot_dir, self.snapshot)
def run(s, layer_name, result_path):
''' Run heatmap visualizer
:param str layer_name: the layer to visualize
:param str result_path: the path to save heatmap
'''
feature_unit = s.owl_net.units[s.owl_net.name_to_uid[layer_name][0]]
#We need the testing data unit
data_unit = None
for i in range(len(s.owl_net.name_to_uid['data'])):
if s.owl_net.units[s.owl_net.name_to_uid['data'][i]].params.include[0].phase == 1:
data_unit = s.owl_net.units[s.owl_net.name_to_uid['data'][i]]
assert(data_unit)
#get the mean data
bp = BlobProto()
#get mean file
if len(data_unit.params.transform_param.mean_file) == 0:
mean_data = np.ones([3, 256, 256], dtype=np.float32)
assert(len(data_unit.params.transform_param.mean_value) == 3)
mean_data[0] = data_unit.params.transform_param.mean_value[0]
mean_data[1] = data_unit.params.transform_param.mean_value[1]
mean_data[2] = data_unit.params.transform_param.mean_value[2]
h_w = 256
else:
with open(data_unit.params.transform_param.mean_file, 'rb') as f:
bp.ParseFromString(f.read())
mean_narray = np.array(bp.data, dtype=np.float32)
h_w = np.sqrt(np.shape(mean_narray)[0] / 3)
mean_data = np.array(bp.data, dtype=np.float32).reshape([3, h_w, h_w])
#get the cropped img
crop_size = data_unit.params.transform_param.crop_size
crop_h_w = (h_w - crop_size) / 2
mean_data = mean_data[:, crop_h_w:crop_h_w + crop_size, crop_h_w:crop_h_w + crop_size]
cur_img = 0
for testiteridx in range(s.owl_net.solver.test_iter[0]):
s.owl_net.forward('TEST')
feature = feature_unit.out.to_numpy()
feature_shape = np.shape(feature)
data = data_unit.out.to_numpy()
img_num = feature_shape[0]
#processing each image
for imgidx in range(img_num):
img_feature = feature[imgidx,:]
#get the image
gbr_img_data = data[imgidx,:] + mean_data
img_data = np.zeros([data_unit.crop_size, data_unit.crop_size, 3], dtype=np.float32)
img_data[:,:,0] = gbr_img_data[2,:,:]
img_data[:,:,1] = gbr_img_data[1,:,:]
img_data[:,:,2] = gbr_img_data[0,:,:]
img_data /= 256
#get the heatmap
f_h = feature_shape[2]
f_w = feature_shape[3]
f_c = feature_shape[1]
heatmap = np.zeros([f_h, f_w], dtype=np.float32)
for cidx in range(f_c):
feature_map = img_feature[cidx,:]
f = np.max(np.max(feature_map)) - np.mean(np.mean(feature_map))
heatmap = heatmap + f * f * feature_map
#resize
heatmap = scipy.misc.imresize(heatmap,[data_unit.crop_size, data_unit.crop_size])
#save
fig, ax = plt.subplots(1,2)
ax[0].axis('off')
ax[1].axis('off')
ax[0].imshow(img_data, aspect='equal')
ax[1].imshow(heatmap, aspect='equal')
#ax[1] = plt.pcolor(heatmap)
info = '%s/%d.jpg' % (result_path, cur_img)
print info
fig.savefig(info)
plt.close('all')
#.........這裏部分代碼省略.........
示例4: __init__
# 需要導入模塊: from net import Net [as 別名]
# 或者: from net.Net import compute_size [as 別名]
class FilterVisualizer:
''' Class of filter visualizer.
Find the most interested patches of a filter to demostrate the pattern that filter insterested in. It first read in several images to conduct feed-forward and find the patches have the biggest activation value for a filter. Those patches usually contains the pattern of that filter.
:ivar str solver_file: name of the solver_file, it will tell Minerva the network configuration and model saving path
:ivar snapshot: saved model snapshot index
:ivar str layer_name: name of the layer that will be viusualized, we will visualize all the filters in that layer in one time
:ivar str result_path: path for the result of visualization, filtervisualizer will generate a jpg contains the nine selected patches for each filter in layer_name and save the image under result path.
:ivar gpu: the gpu to run testing
'''
def __init__(self, solver_file, snapshot, layer_name, result_path, gpu_idx = 0):
self.solver_file = solver_file
self.snapshot = snapshot
self.layer_name = layer_name
self.result_path = result_path
self.gpu = owl.create_gpu_device(gpu_idx)
owl.set_device(self.gpu)
def build_net(self):
self.owl_net = Net()
self.builder = CaffeNetBuilder(self.solver_file)
self.snapshot_dir = self.builder.snapshot_dir
self.builder.build_net(self.owl_net)
self.owl_net.compute_size('TEST')
self.builder.init_net_from_file(self.owl_net, self.snapshot_dir, self.snapshot)
def run(s):
#Need Attention, here we may have multiple data layer, just choose the TEST layer
data_unit = None
for data_idx in range(len(s.owl_net.data_layers)):
for i in range(len(s.owl_net.name_to_uid[s.owl_net.data_layers[data_idx]])):
if s.owl_net.units[s.owl_net.name_to_uid[s.owl_net.data_layers[data_idx]][i]].params.include[0].phase == 1:
data_unit = s.owl_net.units[s.owl_net.name_to_uid[s.owl_net.data_layers[data_idx]][i]]
assert(data_unit)
bp = BlobProto()
#get mean file
if len(data_unit.params.transform_param.mean_file) == 0:
mean_data = np.ones([3, 256, 256], dtype=np.float32)
assert(len(data_unit.params.transform_param.mean_value) == 3)
mean_data[0] = data_unit.params.transform_param.mean_value[0]
mean_data[1] = data_unit.params.transform_param.mean_value[1]
mean_data[2] = data_unit.params.transform_param.mean_value[2]
h_w = 256
else:
with open(data_unit.params.transform_param.mean_file, 'rb') as f:
bp.ParseFromString(f.read())
mean_narray = np.array(bp.data, dtype=np.float32)
h_w = np.sqrt(np.shape(mean_narray)[0] / 3)
mean_data = np.array(bp.data, dtype=np.float32).reshape([3, h_w, h_w])
#get the cropped img
crop_size = data_unit.params.transform_param.crop_size
crop_h_w = (h_w - crop_size) / 2
mean_data = mean_data[:, crop_h_w:crop_h_w + crop_size, crop_h_w:crop_h_w + crop_size]
feature_unit = s.owl_net.units[s.owl_net.name_to_uid[s.layer_name][0]]
batch_dir = 0
#we use 10000 images to conduct visualization
all_data = np.zeros([10000, 3, crop_size, crop_size], dtype=np.float32)
feature_shape = feature_unit.out_shape
all_feature = np.zeros([10000, feature_shape[2], feature_shape[1], feature_shape[0]], dtype=np.float32)
print 'Begin Generating Activations from Testing Set'
curimg = 0
for testiteridx in range(s.owl_net.solver.test_iter[0]):
s.owl_net.forward('TEST')
feature = feature_unit.out.to_numpy()
batch_size = np.shape(feature)[0]
all_feature[curimg:curimg+batch_size,:] = feature
data = data_unit.out.to_numpy()
all_data[curimg:curimg+batch_size,:] = data
curimg += batch_size
#HACK TODO: only take 10000 images
if curimg >= 10000:
break
info = 'Now Processed %d images' % (curimg)
print info
print 'Begin Selecting Patches'
#get the result
patch_shape = feature_unit.rec_on_ori
min_val = -float('inf')
#add back the mean file
for i in range(np.shape(all_data)[0]):
all_data[i,:,:,:] += mean_data
if len(feature_shape) == 4:
#iter for each filter, for each filter, we choose nine patch from different image
for i in range(feature_shape[2]):
#create the result image for nine patches
res_img = np.zeros([feature_unit.rec_on_ori * 3, feature_unit.rec_on_ori * 3, 3])
filter_feature = np.copy(all_feature[:,i,:,:])
for patchidx in range(9):
maxidx = np.argmax(filter_feature)
colidx = maxidx % feature_shape[0]
maxidx = (maxidx - colidx) / feature_shape[0]
rowidx = maxidx % feature_shape[1]
#.........這裏部分代碼省略.........
示例5: NetTester
# 需要導入模塊: from net import Net [as 別名]
# 或者: from net.Net import compute_size [as 別名]
class NetTester:
''' Class for performing testing, it can be single-view or multi-view, can be top-1 or top-5
Run it as::
>>> tester = NetTester(solver_file, softmax_layer, accuracy_layer, snapshot, gpu_idx)
>>> tester.build_net()
>>> tester.run(multiview)
:ivar str solver_file: path of the solver file in Caffe's proto format
:ivar int snapshot: the snapshot for testing
:ivar str softmax_layer_name: name of the softmax layer that produce prediction
:ivar str accuracy_layer_name: name of the accuracy layer that produce prediction
:ivar int gpu_idx: which gpu to perform the test
:ivar bool multiview: whether to use multiview tester
'''
def __init__(self, solver_file, softmax_layer_name, accuracy_layer_name, snapshot, gpu_idx = 0):
self.solver_file = solver_file
self.softmax_layer_name = softmax_layer_name
self.accuracy_layer_name = accuracy_layer_name
self.snapshot = snapshot
self.gpu = owl.create_gpu_device(gpu_idx)
owl.set_device(self.gpu)
def build_net(self):
self.owl_net = Net()
self.builder = CaffeNetBuilder(self.solver_file)
self.snapshot_dir = self.builder.snapshot_dir
self.builder.build_net(self.owl_net)
self.owl_net.compute_size('TEST')
self.builder.init_net_from_file(self.owl_net, self.snapshot_dir, self.snapshot)
def run(s, multiview):
#multi-view test
acc_num = 0
test_num = 0
loss_unit = s.owl_net.units[s.owl_net.name_to_uid[s.softmax_layer_name][0]]
accunit = s.owl_net.units[s.owl_net.name_to_uid[s.accuracy_layer_name][0]]
data_unit = None
for data_idx in range(len(s.owl_net.data_layers)):
for i in range(len(s.owl_net.name_to_uid[s.owl_net.data_layers[data_idx]])):
if s.owl_net.units[s.owl_net.name_to_uid[s.owl_net.data_layers[data_idx]][i]].params.include[0].phase == 1:
data_unit = s.owl_net.units[s.owl_net.name_to_uid[s.owl_net.data_layers[data_idx]][i]]
assert(data_unit)
if multiview == True:
data_unit.multiview = True
for testiteridx in range(s.owl_net.solver.test_iter[0]):
if multiview == True:
for i in range(10):
s.owl_net.forward('TEST')
if i == 0:
softmax_val = loss_unit.ff_y
batch_size = softmax_val.shape[1]
softmax_label = loss_unit.y
else:
softmax_val = softmax_val + loss_unit.ff_y
test_num += batch_size
if accunit.top_k == 5:
predict = softmax_val.to_numpy()
top_5 = np.argsort(predict, axis=1)[:,::-1]
ground_truth = softmax_label.max_index(0).to_numpy()
correct = 0
for i in range(batch_size):
for t in range(5):
if ground_truth[i] == top_5[i,t]:
correct += 1
break
acc_num += correct
else:
predict = softmax_val.max_index(0)
truth = softmax_label.max_index(0)
correct = (predict - truth).count_zero()
acc_num += correct
else:
s.owl_net.forward('TEST')
all_accunits = s.owl_net.get_accuracy_units()
batch_size = accunit.batch_size
test_num += batch_size
acc_num += (batch_size * accunit.acc)
correct = batch_size * accunit.acc
print "Accuracy of the %d mb: %f, batch_size: %d, current mean accuracy: %f" % (testiteridx, (correct * 1.0)/batch_size, batch_size, float(acc_num)/test_num)
sys.stdout.flush()
print "Testing Accuracy: %f" % (float(acc_num)/test_num)
示例6: NetTrainer
# 需要導入模塊: from net import Net [as 別名]
# 或者: from net.Net import compute_size [as 別名]
class NetTrainer:
''' Class for training neural network
Allows user to train using Caffe's network configure format but on multiple GPUs. One
could use NetTrainer as follows:
>>> trainer = NetTrainer(solver_file, snapshot, num_gpu)
>>> trainer.build_net()
>>> trainer.run()
:ivar str solver_file: path of the solver file in Caffe's proto format
:ivar int snapshot: the idx of snapshot to start with
:ivar int num_gpu: the number of gpu to use
:ivar int sync_freq: the frequency to stop lazy evaluation and print some information. The frequency means every how many
minibatches will the trainer call ``owl.wait_for_all()``. Note that this will influence the training
speed. Normally, the higher value is given, the faster the training speed but the more memory is used
during execution.
'''
def __init__(self, solver_file, snapshot = 0, gpu = 1, sync_freq=1, report=False, do_histogram=False):
self.solver_file = solver_file
self.snapshot = snapshot
self.num_gpu = gpu
self.sync_freq = sync_freq
self.report = report
self.do_histogram=do_histogram
if owl.has_mpi():
self.gpu = []
if gpu == 1:
#self.gpu += [owl.create_gpu_device(i) for i in range(owl.get_gpu_device_count())]
nodes = [owl.get_mpi_device_count(i) for i in range(1,owl.get_mpi_node_count())]
for n in range(len(nodes)):
print "using {} gpu's on node {}\n".format(nodes[n],n+1)
self.gpu += [owl.create_mpi_device(n+1,i+1) for i in range(nodes[n])]
self.num_gpu = len(self.gpu)
else:
self.gpu += [owl.create_cpu_device()]
self.gpu += [owl.create_mpi_device(n,0) for n in range(1,owl.get_mpi_node_count())]
self.num_gpu = len(self.gpu)
print "using {} cpu's over all nodes".format(self.num_gpu)
else:
if gpu == 1:
self.gpu = [owl.create_gpu_device(i) for i in range(self.num_gpu)]
self.num_gpu = len(self.gpu)
print "using {} gpu devices".format(len(self.gpu))
else:
self.gpu = [owl.create_cpu_device()]
self.num_gpu = len(self.gpu)
print "using {} cpus".format(len(self.gpu))
def build_net(self):
''' Build network structure using Caffe's proto definition. It will also initialize
the network either from given snapshot or from scratch (using proper initializer).
During initialization, it will first try to load weight from snapshot. If failed, it
will then initialize the weight accordingly.
'''
self.owl_net = Net()
self.builder = CaffeNetBuilder(self.solver_file)
self.snapshot_dir = self.builder.snapshot_dir
self.builder.build_net(self.owl_net, self.num_gpu)
self.owl_net.compute_size()
self.builder.init_net_from_file(self.owl_net, self.snapshot_dir, self.snapshot)
def run(s):
''' Run the training algorithm on multiple GPUs
The basic logic is similar to the traditional single GPU training code as follows (pseudo-code)::
for epoch in range(MAX_EPOCH):
for i in range(NUM_MINI_BATCHES):
# load i^th minibatch
minibatch = loader.load(i, MINI_BATCH_SIZE)
net.ff(minibatch.data)
net.bp(minibatch.label)
grad = net.gradient()
net.update(grad, MINI_BATCH_SIZE)
With Minerva's lazy evaluation and dataflow engine, we are able to modify the above logic
to perform data parallelism on multiple GPUs (pseudo-code)::
for epoch in range(MAX_EPOCH):
for i in range(0, NUM_MINI_BATCHES, NUM_GPU):
gpu_grad = [None for i in range(NUM_GPU)]
for gpuid in range(NUM_GPU):
# specify which gpu following codes are running on
owl.set_device(gpuid)
# each minibatch is split among GPUs
minibatch = loader.load(i + gpuid, MINI_BATCH_SIZE / NUM_GPU)
net.ff(minibatch.data)
net.bp(minibatch.label)
gpu_grad[gpuid] = net.gradient()
net.accumulate_and_update(gpu_grad, MINI_BATCH_SIZE)
So each GPU will take charge of one *mini-mini batch* training, and since all their ``ff``, ``bp`` and ``gradient``
calculations are independent among each others, they could be paralleled naturally using Minerva's DAG engine.
The only problem let is ``accumulate_and_update`` of the the gradient from all GPUs. If we do it on one GPU,
that GPU would become a bottleneck. The solution is to also partition the workload to different GPUs (pseudo-code)::
def accumulate_and_update(gpu_grad, MINI_BATCH_SIZE):
num_layers = len(gpu_grad[0])
#.........這裏部分代碼省略.........
示例7: NetTrainer
# 需要導入模塊: from net import Net [as 別名]
# 或者: from net.Net import compute_size [as 別名]
class NetTrainer:
''' Class for training neural network
Allows user to train using Caffe's network configure format but on multiple GPUs. One
could use NetTrainer as follows:
>>> trainer = NetTrainer(solver_file, snapshot, num_gpu)
>>> trainer.build_net()
>>> trainer.run()
:ivar str solver_file: path of the solver file in Caffe's proto format
:ivar int snapshot: the idx of snapshot to start with
:ivar int num_gpu: the number of gpu to use
'''
def __init__(self, solver_file, snapshot = 0, num_gpu = 1):
self.solver_file = solver_file
self.snapshot = snapshot
self.num_gpu = num_gpu
self.gpu = [owl.create_gpu_device(i) for i in range(num_gpu)]
def build_net(self):
''' Build network structure using Caffe's proto definition. It will also initialize
the network either from given snapshot or from scratch (using proper initializer).
During initialization, it will first try to load weight from snapshot. If failed, it
will then initialize the weight accordingly.
'''
self.owl_net = Net()
self.builder = CaffeNetBuilder(self.solver_file)
self.snapshot_dir = self.builder.snapshot_dir
self.builder.build_net(self.owl_net, self.num_gpu)
self.owl_net.compute_size()
self.builder.init_net_from_file(self.owl_net, self.snapshot_dir, self.snapshot)
def run(s):
''' Run the training algorithm on multiple GPUs
The basic logic is similar to the traditional single GPU training code as follows (pseudo-code)::
for epoch in range(MAX_EPOCH):
for i in range(NUM_MINI_BATCHES):
# load i^th minibatch
minibatch = loader.load(i, MINI_BATCH_SIZE)
net.ff(minibatch.data)
net.bp(minibatch.label)
grad = net.gradient()
net.update(grad, MINI_BATCH_SIZE)
With Minerva's lazy evaluation and dataflow engine, we are able to modify the above logic
to perform data parallelism on multiple GPUs (pseudo-code)::
for epoch in range(MAX_EPOCH):
for i in range(0, NUM_MINI_BATCHES, NUM_GPU):
gpu_grad = [None for i in range(NUM_GPU)]
for gpuid in range(NUM_GPU):
# specify which gpu following codes are running on
owl.set_device(gpuid)
# each minibatch is split among GPUs
minibatch = loader.load(i + gpuid, MINI_BATCH_SIZE / NUM_GPU)
net.ff(minibatch.data)
net.bp(minibatch.label)
gpu_grad[gpuid] = net.gradient()
net.accumulate_and_update(gpu_grad, MINI_BATCH_SIZE)
So each GPU will take charge of one *mini-mini batch* training, and since all their ``ff``, ``bp`` and ``gradient``
calculations are independent among each others, they could be paralleled naturally using Minerva's DAG engine.
The only problem let is ``accumulate_and_update`` of the the gradient from all GPUs. If we do it on one GPU,
that GPU would become a bottleneck. The solution is to also partition the workload to different GPUs (pseudo-code)::
def accumulate_and_update(gpu_grad, MINI_BATCH_SIZE):
num_layers = len(gpu_grad[0])
for layer in range(num_layers):
upd_gpu = layer * NUM_GPU / num_layers
# specify which gpu to update the layer
owl.set_device(upd_gpu)
for gid in range(NUM_GPU):
if gid != upd_gpu:
gpu_grad[upd_gpu][layer] += gpu_grad[gid][layer]
net.update_layer(layer, gpu_grad[upd_gpu][layer], MINI_BATCH_SIZE)
Since the update of each layer is independent among each others, the update could be paralleled affluently. Minerva's
dataflow engine transparently handles the dependency resolving, scheduling and memory copying among different devices,
so users don't need to care about that.
'''
wgrad = [[] for i in range(s.num_gpu)]
bgrad = [[] for i in range(s.num_gpu)]
last = time.time()
wunits = s.owl_net.get_weighted_unit_ids()
last_start = time.time()
for iteridx in range(s.snapshot * s.owl_net.solver.snapshot, s.owl_net.solver.max_iter):
# get the learning rate
if s.owl_net.solver.lr_policy == "poly":
s.owl_net.current_lr = s.owl_net.base_lr * pow(1 - float(iteridx) / s.owl_net.solver.max_iter, s.owl_net.solver.power)
elif s.owl_net.solver.lr_policy == "step":
s.owl_net.current_lr = s.owl_net.base_lr * pow(s.owl_net.solver.gamma, iteridx / s.owl_net.solver.stepsize)
# train on multi-gpu
for gpuid in range(s.num_gpu):
owl.set_device(s.gpu[gpuid])
#.........這裏部分代碼省略.........