本文整理匯總了Python中pylearn2.sandbox.cuda_convnet.filter_acts.FilterActs方法的典型用法代碼示例。如果您正苦於以下問題:Python filter_acts.FilterActs方法的具體用法?Python filter_acts.FilterActs怎麽用?Python filter_acts.FilterActs使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pylearn2.sandbox.cuda_convnet.filter_acts
的用法示例。
在下文中一共展示了filter_acts.FilterActs方法的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: grad
# 需要導入模塊: from pylearn2.sandbox.cuda_convnet import filter_acts [as 別名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 別名]
def grad(self, inputs, g_outputs):
"""
.. todo::
WRITEME
"""
hid_acts, filters, output_shape = inputs
g_images, = g_outputs
g_images = as_cuda_ndarray_variable(g_images)
assert not isinstance(g_images, list)
global FilterActs
global WeightActs
if FilterActs is None:
from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs
from pylearn2.sandbox.cuda_convnet.weight_acts import WeightActs
g_filters = WeightActs(stride=self.stride,
partial_sum=self.partial_sum, pad=self.pad)(
g_images, hid_acts, filters.shape[1:3])[0]
assert not isinstance(g_filters, list)
g_hid_acts = FilterActs(stride=self.stride, pad=self.pad,
partial_sum=self.partial_sum)(g_images, filters)
return [g_hid_acts, g_filters, DisconnectedType()()]
示例2: __init__
# 需要導入模塊: from pylearn2.sandbox.cuda_convnet import filter_acts [as 別名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 別名]
def __init__(self, input_layer, n_filters, filter_size, weights_std, init_bias_value, stride=1, nonlinearity=layers.rectify, dropout=0., partial_sum=None, pad=0, untie_biases=False):
"""
Only the valid border mode is supported.
n_filters should be a multiple of 16
"""
self.input_layer = input_layer
self.n_filters = n_filters
self.filter_size = filter_size
self.weights_std = np.float32(weights_std)
self.init_bias_value = np.float32(init_bias_value)
self.stride = stride
self.nonlinearity = nonlinearity
self.dropout = dropout
self.partial_sum = partial_sum
self.pad = pad
self.untie_biases = untie_biases
# if untie_biases == True, each position in the output map has its own bias (as opposed to having the same bias everywhere for a given filter)
self.mb_size = self.input_layer.mb_size
self.input_shape = self.input_layer.get_output_shape()
self.filter_shape = (self.input_shape[0], filter_size, filter_size, n_filters)
self.W = layers.shared_single(4) # theano.shared(np.random.randn(*self.filter_shape).astype(np.float32) * self.weights_std)
if self.untie_biases:
self.b = layers.shared_single(3)
else:
self.b = layers.shared_single(1) # theano.shared(np.ones(n_filters).astype(np.float32) * self.init_bias_value)
self.params = [self.W, self.b]
self.bias_params = [self.b]
self.reset_params()
self.filter_acts_op = FilterActs(stride=self.stride, partial_sum=self.partial_sum, pad=self.pad)
示例3: test_reject_rect
# 需要導入模塊: from pylearn2.sandbox.cuda_convnet import filter_acts [as 別名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 別名]
def test_reject_rect():
for cls in (FilterActs, ImageActs):
# Tests that running FilterActs with a non-square
# kernel is an error
rng = np.random.RandomState([2012, 10, 9])
batch_size = 5
rows = 10
cols = 9
channels = 3
filter_rows = 4
filter_cols = filter_rows + 1
num_filters = 6
images = shared(rng.uniform(-1., 1., (channels, rows, cols,
batch_size)).astype('float32'), name='images')
filters = shared(rng.uniform(-1., 1., (channels, filter_rows,
filter_cols, num_filters)).astype('float32'), name='filters')
gpu_images = gpu_from_host(images)
gpu_filters = gpu_from_host(filters)
if cls is ImageActs:
output = cls()(gpu_images, gpu_filters,
as_tensor_variable((rows, cols)))
else:
output = cls()(gpu_images, gpu_filters)
f = function([], output)
try:
output = f()
except ValueError:
continue
assert False
示例4: test_reject_bad_filt_number
# 需要導入模塊: from pylearn2.sandbox.cuda_convnet import filter_acts [as 別名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 別名]
def test_reject_bad_filt_number():
for cls in (FilterActs, ImageActs):
# Tests that running FilterActs with a # of filters per
# group that is not 16 is an error
rng = np.random.RandomState([2012, 10, 9])
batch_size = 5
rows = 10
cols = 9
channels = 3
filter_rows = 4
filter_cols = filter_rows
num_filters = 6
images = shared(rng.uniform(-1., 1., (channels, rows, cols,
batch_size)).astype('float32'), name='images')
filters = shared(rng.uniform(-1., 1., (channels, filter_rows,
filter_cols, num_filters)).astype('float32'), name='filters')
gpu_images = gpu_from_host(images)
gpu_filters = gpu_from_host(filters)
if cls is ImageActs:
output = cls()(gpu_images, gpu_filters,
as_tensor_variable((rows, cols)))
else:
output = cls()(gpu_images, gpu_filters)
f = function([], output)
try:
output = f()
except ValueError:
continue
assert False
示例5: make_funcs
# 需要導入模塊: from pylearn2.sandbox.cuda_convnet import filter_acts [as 別名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 別名]
def make_funcs(batch_size, rows, cols, channels, filter_rows,
num_filters):
rng = np.random.RandomState([2012,10,9])
filter_cols = filter_rows
base_image_value = rng.uniform(-1., 1., (channels, rows, cols,
batch_size)).astype('float32')
base_filters_value = rng.uniform(-1., 1., (channels, filter_rows,
filter_cols, num_filters)).astype('float32')
images = shared(base_image_value)
filters = shared(base_filters_value, name='filters')
# bench.py should always be run in gpu mode so we should not need a gpu_from_host here
output = FilterActs()(images, filters)
output_shared = shared( output.eval() )
cuda_convnet = function([], updates = { output_shared : output } )
cuda_convnet.name = 'cuda_convnet'
images_bc01v = base_image_value.transpose(3,0,1,2)
filters_bc01v = base_filters_value.transpose(3,0,1,2)
filters_bc01v = filters_bc01v[:,:,::-1,::-1]
images_bc01 = shared(images_bc01v)
filters_bc01 = shared(filters_bc01v)
output_conv2d = conv2d(images_bc01, filters_bc01,
border_mode='valid', image_shape = images_bc01v.shape,
filter_shape = filters_bc01v.shape)
output_conv2d_shared = shared(output_conv2d.eval())
baseline = function([], updates = { output_conv2d_shared : output_conv2d } )
baseline.name = 'baseline'
return cuda_convnet, baseline
示例6: fprop
# 需要導入模塊: from pylearn2.sandbox.cuda_convnet import filter_acts [as 別名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 別名]
def fprop(self, input):
# we reduce the precision of parameters for the computations
self.w_comp = apply_format(self.format, self.W, self.comp_precision, self.w_range)
self.b_comp = apply_format(self.format, self.b, self.comp_precision, self.b_range)
input = input.reshape(self.image_shape)
# convolution
input_shuffled = input.dimshuffle(1, 2, 3, 0) # bc01 to c01b
filters_shuffled = self.w_comp.dimshuffle(1, 2, 3, 0) *self.scale # bc01 to c01b
conv_op = FilterActs(stride=self.filter_stride, partial_sum=self.partial_sum,pad = self.zero_pad)
contiguous_input = gpu_contiguous(input_shuffled)
contiguous_filters = gpu_contiguous(filters_shuffled)
conv_out_shuffled = conv_op(contiguous_input, contiguous_filters)
# downsample each feature map individually, using maxpooling
# pooled_out = downsample.max_pool_2d(input=conv_out,
# ds=poolsize, ignore_border=True)
pool_op = MaxPool(ds=self.pool_shape, stride=self.pool_stride)
pooled_out_shuffled = pool_op(conv_out_shuffled)
pooled_out = pooled_out_shuffled.dimshuffle(3, 0, 1, 2) # c01b to bc01
# bias
pooled_out = apply_format(self.format, pooled_out + self.b_comp.dimshuffle('x', 0, 'x', 'x')*self.scale, self.comp_precision, self.z_range)
# activation
pooled_out = self.activation(pooled_out)
pooled_out = apply_format(self.format, pooled_out.flatten(2), self.comp_precision, self.y_range)
return pooled_out
示例7: __init__
# 需要導入模塊: from pylearn2.sandbox.cuda_convnet import filter_acts [as 別名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 別名]
def __init__(self, numpy_rng=None, input = None, filter_shape=(2, 1, 5, 5),
poolsize=(1, 1), activation=T.nnet.sigmoid,
flatten = False, use_fast = False):
self.type = 'conv'
self.input = input
self.filter_shape = filter_shape
self.poolsize = poolsize
self.activation = activation
self.flatten = flatten
fan_in = numpy.prod(filter_shape[1:])
fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
numpy.prod(poolsize))
# initialize weights with random weights
W_bound = numpy.sqrt(6. / (fan_in + fan_out))
initial_W = numpy.asarray( numpy_rng.uniform(
low=-W_bound, high=W_bound,
size=filter_shape),
dtype=theano.config.floatX)
if activation == T.nnet.sigmoid:
initial_W *= 4
W = theano.shared(value = initial_W, name = 'W')
self.W = W
# the bias is a 1D tensor -- one bias per output feature map
b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
self.b = theano.shared(value=b_values, name='b')
# convolve input feature maps with filters
if use_fast:
from theano.sandbox.cuda.basic_ops import gpu_contiguous
from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs
from pylearn2.sandbox.cuda_convnet.pool import MaxPool
input_shuffled = self.input.dimshuffle(1, 2, 3, 0) # bc01 to c01b
filters_shuffled = self.W.dimshuffle(1, 2, 3, 0) # bc01 to c01b
conv_op = FilterActs()
contiguous_input = gpu_contiguous(input_shuffled)
contiguous_filters = gpu_contiguous(filters_shuffled)
conv_out_shuffled = conv_op(contiguous_input, contiguous_filters)
y_out_shuffled = activation(conv_out_shuffled + self.b.dimshuffle(0, 'x', 'x', 'x'))
pool_op = MaxPool(ds=poolsize[0], stride=poolsize[0])
self.output = pool_op(y_out_shuffled).dimshuffle(3, 0, 1, 2)
else:
conv_out = conv.conv2d(input=self.input, filters=self.W,
filter_shape=filter_shape)
y_out = activation(conv_out + self.b.dimshuffle('x', 0, 'x', 'x'))
# downsample each feature map individually, using maxpooling
self.output = downsample.max_pool_2d(input=y_out,
ds=poolsize, ignore_border=True)
if self.flatten:
self.output = self.output.flatten(2)
self.params = [self.W, self.b]
示例8: make_funcs
# 需要導入模塊: from pylearn2.sandbox.cuda_convnet import filter_acts [as 別名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 別名]
def make_funcs(batch_size, rows, cols, channels, filter_rows,
num_filters):
rng = np.random.RandomState([2012,10,9])
filter_cols = filter_rows
base_image_value = rng.uniform(-1., 1., (channels, rows, cols,
batch_size)).astype('float32')
base_filters_value = rng.uniform(-1., 1., (channels, filter_rows,
filter_cols, num_filters)).astype('float32')
images = shared(base_image_value)
filters = shared(base_filters_value, name='filters')
# bench.py should always be run in gpu mode so we should not need a gpu_from_host here
layer_1_detector = FilterActs()(images, filters)
layer_1_pooled_fake = layer_1_detector[:,0:layer_1_detector.shape[0]:2,
0:layer_1_detector.shape[1]:2, :]
base_filters2_value = rng.uniform(-1., 1., (num_filters, filter_rows,
filter_cols, num_filters)).astype('float32')
filters2 = shared(base_filters_value, name='filters')
layer_2_detector = FilterActs()(images, filters2)
output = layer_2_detector
output_shared = shared( output.eval() )
cuda_convnet = function([], updates = { output_shared : output } )
cuda_convnet.name = 'cuda_convnet'
images_bc01 = base_image_value.transpose(3,0,1,2)
filters_bc01 = base_filters_value.transpose(3,0,1,2)
filters_bc01 = filters_bc01[:,:,::-1,::-1]
images_bc01 = shared(images_bc01)
filters_bc01 = shared(filters_bc01)
output_conv2d = conv2d(images_bc01, filters_bc01,
border_mode='valid')
output_conv2d_shared = shared(output_conv2d.eval())
baseline = function([], updates = { output_conv2d_shared : output_conv2d } )
baseline.name = 'baseline'
return cuda_convnet, baseline
示例9: test_filter_acts_strided
# 需要導入模塊: from pylearn2.sandbox.cuda_convnet import filter_acts [as 別名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 別名]
def test_filter_acts_strided():
# Tests that FilterActs with all possible strides
rng = np.random.RandomState([2012,10,9])
#Each list in shape_list :
#[img_shape,filter_shape]
#[(channels, rows, cols, batch_size),(channels, filter_rows, filter_cols, num_filters)]
shape_list = [[(1, 7, 8, 5), (1, 2, 2, 16)],
[(3, 7, 8, 5), (3, 3, 3, 16)],
[(16, 11, 11, 4), (16, 4, 4, 16)],
[(3, 20, 20, 3), (3, 5, 5, 16)],
[(3, 21, 21, 3), (3, 6, 6, 16)],
]
for test_idx in xrange(len(shape_list)):
images = rng.uniform(-1., 1., shape_list[test_idx][0]).astype('float32')
filters = rng.uniform(-1., 1., shape_list[test_idx][1]).astype('float32')
gpu_images = float32_shared_constructor(images,name='images')
gpu_filters = float32_shared_constructor(filters,name='filters')
print("test case %d..."%(test_idx+1))
for ii in xrange(filters.shape[1]):
stride = ii + 1
output = FilterActs(stride=stride)(gpu_images, gpu_filters)
output = host_from_gpu(output)
f = function([], output)
output_val = f()
output_python = FilterActs_python(images,filters,stride)
if np.abs(output_val - output_python).max() > 8.6e-6:
assert type(output_val) == type(output_python)
assert output_val.dtype == output_python.dtype
if output_val.shape != output_python.shape:
print('cuda-convnet shape: ',output_val.shape)
print('python conv shape: ',output_python.shape)
assert False
err = np.abs(output_val - output_python)
print('stride %d'%stride)
print('absolute error range: ', (err.min(), err.max()))
print('mean absolute error: ', err.mean())
print('cuda-convnet value range: ', (output_val.min(), output_val.max()))
print('python conv value range: ', (output_python.min(), output_python.max()))
#assert False
#print "pass"
示例10: test_match_valid_conv_strided
# 需要導入模塊: from pylearn2.sandbox.cuda_convnet import filter_acts [as 別名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 別名]
def test_match_valid_conv_strided():
# Tests that running FilterActs with stride is the same as running
# theano's conv2D in valid mode and then downsampling
rng = np.random.RandomState([2012,10,9])
batch_size = 5
rows = 9
cols = 9
channels = 3
filter_rows = 3
filter_cols = filter_rows
stride = 3
num_filters = 16
images = shared(rng.uniform(-1., 1., (channels, rows, cols,
batch_size)).astype('float32'), name='images')
filters = shared(rng.uniform(-1., 1., (channels, filter_rows,
filter_cols, num_filters)).astype('float32'), name='filters')
gpu_images = gpu_from_host(images)
gpu_filters = gpu_from_host(filters)
output = FilterActs(stride=stride)(gpu_images, gpu_filters)
output = host_from_gpu(output)
images_bc01 = images.dimshuffle(3,0,1,2)
filters_bc01 = filters.dimshuffle(3,0,1,2)
filters_bc01 = filters_bc01[:,:,::-1,::-1]
output_conv2d = conv2d(images_bc01, filters_bc01,
border_mode='valid', subsample=(stride, stride))
output_conv2d_orig = output_conv2d.dimshuffle(1,2,3,0)
output_conv2d = output_conv2d_orig # [:, ::stride, ::stride, :]
f = function([], [output, output_conv2d, output_conv2d_orig])
output, output_conv2d, output_conv2d_orig = f()
warnings.warn("""test_match_valid_conv success criterion is not very strict. Can we verify that this is OK?
One possibility is that theano is numerically unstable and Alex's code is better.
Probably theano CPU 64 bit is OK but it's worth checking the others.""")
if np.abs(output - output_conv2d).max() > 2.4e-6:
assert type(output) == type(output_conv2d)
assert output.dtype == output_conv2d.dtype
if output.shape != output_conv2d.shape:
print('cuda-convnet shape: ',output.shape)
print('theano shape: ',output_conv2d.shape)
assert False
err = np.abs(output - output_conv2d)
print('absolute error range: ', (err.min(), err.max()))
print('mean absolute error: ', err.mean())
print('cuda-convnet value range: ', (output.min(), output.max()))
print('theano value range: ', (output_conv2d.min(), output_conv2d.max()))
assert False
示例11: test_image_acts_strided
# 需要導入模塊: from pylearn2.sandbox.cuda_convnet import filter_acts [as 別名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 別名]
def test_image_acts_strided():
# Tests that running FilterActs with all possible strides
rng = np.random.RandomState([2012,10,9])
#Each list in shape_list :
#[img_shape,filter_shape]
#[(channels, rows, cols, batch_size),(channels, filter_rows, filter_cols, num_filters)]
shape_list = [[(1, 7, 8, 5), (1, 2, 2, 16)],
[(3, 7, 8, 5), (3, 3, 3, 16)],
[(16, 11, 11, 4), (16, 4, 4, 16)],
[(3, 20, 20, 3), (3, 5, 5, 16)],
[(3, 21, 21, 3), (3, 6, 6, 16)],
]
for test_idx in xrange(len(shape_list)):
images = rng.uniform(-1., 1., shape_list[test_idx][0]).astype('float32')
filters = rng.uniform(-1., 1., shape_list[test_idx][1]).astype('float32')
gpu_images = float32_shared_constructor(images,name='images')
gpu_filters = float32_shared_constructor(filters,name='filters')
print("test case %d..."%(test_idx+1))
for ii in xrange(filters.shape[1]):
stride = ii + 1
output_python = FilterActs_python(images,filters,stride)
hidacts = rng.uniform(-1., 1., output_python.shape).astype('float32')
gpu_hidacts = float32_shared_constructor(hidacts,name='hidacts')
Img_output_python = ImageActs_python(filters,hidacts,stride,(images.shape[1], images.shape[2]))
Img_output = ImageActs(stride=stride)(gpu_hidacts, gpu_filters, as_tensor_variable((images.shape[1], images.shape[2])))
Img_output = host_from_gpu(Img_output)
f = function([], Img_output)
Img_output_val = f()
warnings.warn("""test_image_acts_strided success criterion is not very strict.""")
if np.abs(Img_output_val - Img_output_python).max() > 2.1e-5:
assert type(Img_output_val) == type(Img_output_python)
assert Img_output_val.dtype == Img_output_python.dtype
if Img_output_val.shape != Img_output_python.shape:
print('cuda-convnet shape: ',Img_output_val.shape)
print('python conv shape: ',Img_output_python.shape)
assert False
err = np.abs(Img_output_val - Img_output_python)
print('stride %d'%stride)
print('absolute error range: ', (err.min(), err.max()))
print('mean absolute error: ', err.mean())
print('cuda-convnet value range: ', (Img_output_val.min(), Img_output_val.max()))
print('python conv value range: ', (Img_output_python.min(), Img_output_python.max()))
#assert False
#print "pass"
示例12: lmul
# 需要導入模塊: from pylearn2.sandbox.cuda_convnet import filter_acts [as 別名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 別名]
def lmul(self, x):
"""
.. todo::
WRITEME properly
dot(x, A)
aka, do convolution with input image x
"""
check_cuda(str(type(self)) + ".lmul")
cpu = 'Cuda' not in str(type(x))
if cpu:
x = gpu_from_host(x)
# x must be formatted as channel, topo dim 0, topo dim 1, batch_index
# for use with FilterActs
assert x.ndim == 4
x_axes = self.input_axes
assert len(x_axes) == 4
op_axes = ('c', 0, 1, 'b')
if tuple(x_axes) != op_axes:
x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes])
x = gpu_contiguous(x)
# Patch old pickle files.
if not hasattr(self, 'kernel_stride'):
self.kernel_stride = (1, 1)
rval = FilterActs(self.pad, self.partial_sum, self.kernel_stride[0])(
x,
self._filters
)
# Format the output based on the output space
rval_axes = self.output_axes
assert len(rval_axes) == 4
if cpu:
rval = host_from_gpu(rval)
if tuple(rval_axes) != op_axes:
rval = rval.dimshuffle(*[op_axes.index(axis)
for axis in rval_axes])
return rval
示例13: __init__
# 需要導入模塊: from pylearn2.sandbox.cuda_convnet import filter_acts [as 別名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 別名]
def __init__(self,
input_layer,
n_filters,
filter_size,
weights_std,
stride=1,
nonlinearity=layers.rectify,
dropout=0.,
partial_sum=None,
pad=0,
trainable=True):
"""
Only the valid border mode is supported.
n_filters should be a multiple of 16
"""
self.input_layer = input_layer
self.input_shape = self.input_layer.get_output_shape()
self.n_filters = n_filters
n_channels = self.input_shape[0]
self.n_channels = n_channels
self.filter_size = filter_size
self.weights_std = numpy.float32(weights_std)
self.stride = stride
self.nonlinearity = nonlinearity
self.dropout = dropout
self.partial_sum = partial_sum
self.pad = pad
self.mb_size = self.input_layer.mb_size
self.data_order = layers.data_order.type2
assert (len(self.input_layer.get_output_shape()) == 4), \
'Input must have 4 dimensions.'
assert (self.input_layer.data_order == self.data_order), \
'Input data order does not match this layer\'s data order.'
self.filter_shape = (n_channels, filter_size, filter_size, n_filters)
self.trainable = trainable
self.W = layers.shared_single(4)
self.params = [self.W]
self.reset_params()
self.filter_acts_op = FilterActs(stride=self.stride,
partial_sum=self.partial_sum,
pad=self.pad)
示例14: dropout_fprop
# 需要導入模塊: from pylearn2.sandbox.cuda_convnet import filter_acts [as 別名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 別名]
def dropout_fprop(self, input):
# we reduce the precision of parameters for the computations
self.fixed_W = apply_format(self.format, self.W, self.comp_precision, self.w_range)
self.fixed_b = apply_format(self.format, self.b, self.comp_precision, self.b_range)
# create the dropout mask
# The cast is important because
# int * float32 = float64 which pulls things off the gpu
srng = T.shared_randomstreams.RandomStreams(self.rng.randint(999999))
self.mask = T.cast(srng.binomial(n=1, p=self.p, size=T.shape(input)), theano.config.floatX)
input = input * self.mask
self.fixed_x = input.reshape(self.image_shape)
# convolution
input_shuffled = self.fixed_x.dimshuffle(1, 2, 3, 0) # bc01 to c01b
filters_shuffled = self.fixed_W.dimshuffle(1, 2, 3, 0) # bc01 to c01b
conv_op = FilterActs(stride=self.filter_stride, partial_sum=self.partial_sum,pad = self.zero_pad) # augment partial sum -> use less memory but slower
contiguous_input = gpu_contiguous(input_shuffled)
contiguous_filters = gpu_contiguous(filters_shuffled)
conv_out_shuffled = conv_op(contiguous_input, contiguous_filters)
self.z = conv_out_shuffled.dimshuffle(3, 0, 1, 2) # c01b to bc01
self.fixed_z = apply_format(self.format, self.z, self.comp_precision, self.z_range)
conv_out_shuffled = self.fixed_z.dimshuffle(1, 2, 3, 0) # bc01 to c01b
conv_out_shuffled = gpu_contiguous(conv_out_shuffled)
# downsample each feature map individually, using maxpooling
# pooled_out = downsample.max_pool_2d(input=conv_out,
# ds=poolsize, ignore_border=True)
pool_op = MaxPool(ds=self.pool_shape, stride=self.pool_stride)
pooled_out_shuffled = pool_op(conv_out_shuffled)
pooled_out = pooled_out_shuffled.dimshuffle(3, 0, 1, 2) # c01b to bc01
# bias
self.u = pooled_out + self.fixed_b.dimshuffle('x', 0, 'x', 'x')
self.fixed_u = apply_format(self.format, self.u, self.comp_precision, self.z_range)
# activation
self.y = self.activation(self.fixed_u).flatten(2)
self.fixed_y = apply_format(self.format, self.y, self.comp_precision, self.y_range)
return self.fixed_y