本文整理汇总了Python中pylearn2.sandbox.cuda_convnet.filter_acts.FilterActs方法的典型用法代码示例。如果您正苦于以下问题:Python filter_acts.FilterActs方法的具体用法?Python filter_acts.FilterActs怎么用?Python filter_acts.FilterActs使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pylearn2.sandbox.cuda_convnet.filter_acts
的用法示例。
在下文中一共展示了filter_acts.FilterActs方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: grad
# 需要导入模块: from pylearn2.sandbox.cuda_convnet import filter_acts [as 别名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 别名]
def grad(self, inputs, g_outputs):
"""
.. todo::
WRITEME
"""
hid_acts, filters, output_shape = inputs
g_images, = g_outputs
g_images = as_cuda_ndarray_variable(g_images)
assert not isinstance(g_images, list)
global FilterActs
global WeightActs
if FilterActs is None:
from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs
from pylearn2.sandbox.cuda_convnet.weight_acts import WeightActs
g_filters = WeightActs(stride=self.stride,
partial_sum=self.partial_sum, pad=self.pad)(
g_images, hid_acts, filters.shape[1:3])[0]
assert not isinstance(g_filters, list)
g_hid_acts = FilterActs(stride=self.stride, pad=self.pad,
partial_sum=self.partial_sum)(g_images, filters)
return [g_hid_acts, g_filters, DisconnectedType()()]
示例2: __init__
# 需要导入模块: from pylearn2.sandbox.cuda_convnet import filter_acts [as 别名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 别名]
def __init__(self, input_layer, n_filters, filter_size, weights_std, init_bias_value, stride=1, nonlinearity=layers.rectify, dropout=0., partial_sum=None, pad=0, untie_biases=False):
"""
Only the valid border mode is supported.
n_filters should be a multiple of 16
"""
self.input_layer = input_layer
self.n_filters = n_filters
self.filter_size = filter_size
self.weights_std = np.float32(weights_std)
self.init_bias_value = np.float32(init_bias_value)
self.stride = stride
self.nonlinearity = nonlinearity
self.dropout = dropout
self.partial_sum = partial_sum
self.pad = pad
self.untie_biases = untie_biases
# if untie_biases == True, each position in the output map has its own bias (as opposed to having the same bias everywhere for a given filter)
self.mb_size = self.input_layer.mb_size
self.input_shape = self.input_layer.get_output_shape()
self.filter_shape = (self.input_shape[0], filter_size, filter_size, n_filters)
self.W = layers.shared_single(4) # theano.shared(np.random.randn(*self.filter_shape).astype(np.float32) * self.weights_std)
if self.untie_biases:
self.b = layers.shared_single(3)
else:
self.b = layers.shared_single(1) # theano.shared(np.ones(n_filters).astype(np.float32) * self.init_bias_value)
self.params = [self.W, self.b]
self.bias_params = [self.b]
self.reset_params()
self.filter_acts_op = FilterActs(stride=self.stride, partial_sum=self.partial_sum, pad=self.pad)
示例3: test_reject_rect
# 需要导入模块: from pylearn2.sandbox.cuda_convnet import filter_acts [as 别名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 别名]
def test_reject_rect():
for cls in (FilterActs, ImageActs):
# Tests that running FilterActs with a non-square
# kernel is an error
rng = np.random.RandomState([2012, 10, 9])
batch_size = 5
rows = 10
cols = 9
channels = 3
filter_rows = 4
filter_cols = filter_rows + 1
num_filters = 6
images = shared(rng.uniform(-1., 1., (channels, rows, cols,
batch_size)).astype('float32'), name='images')
filters = shared(rng.uniform(-1., 1., (channels, filter_rows,
filter_cols, num_filters)).astype('float32'), name='filters')
gpu_images = gpu_from_host(images)
gpu_filters = gpu_from_host(filters)
if cls is ImageActs:
output = cls()(gpu_images, gpu_filters,
as_tensor_variable((rows, cols)))
else:
output = cls()(gpu_images, gpu_filters)
f = function([], output)
try:
output = f()
except ValueError:
continue
assert False
示例4: test_reject_bad_filt_number
# 需要导入模块: from pylearn2.sandbox.cuda_convnet import filter_acts [as 别名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 别名]
def test_reject_bad_filt_number():
for cls in (FilterActs, ImageActs):
# Tests that running FilterActs with a # of filters per
# group that is not 16 is an error
rng = np.random.RandomState([2012, 10, 9])
batch_size = 5
rows = 10
cols = 9
channels = 3
filter_rows = 4
filter_cols = filter_rows
num_filters = 6
images = shared(rng.uniform(-1., 1., (channels, rows, cols,
batch_size)).astype('float32'), name='images')
filters = shared(rng.uniform(-1., 1., (channels, filter_rows,
filter_cols, num_filters)).astype('float32'), name='filters')
gpu_images = gpu_from_host(images)
gpu_filters = gpu_from_host(filters)
if cls is ImageActs:
output = cls()(gpu_images, gpu_filters,
as_tensor_variable((rows, cols)))
else:
output = cls()(gpu_images, gpu_filters)
f = function([], output)
try:
output = f()
except ValueError:
continue
assert False
示例5: make_funcs
# 需要导入模块: from pylearn2.sandbox.cuda_convnet import filter_acts [as 别名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 别名]
def make_funcs(batch_size, rows, cols, channels, filter_rows,
num_filters):
rng = np.random.RandomState([2012,10,9])
filter_cols = filter_rows
base_image_value = rng.uniform(-1., 1., (channels, rows, cols,
batch_size)).astype('float32')
base_filters_value = rng.uniform(-1., 1., (channels, filter_rows,
filter_cols, num_filters)).astype('float32')
images = shared(base_image_value)
filters = shared(base_filters_value, name='filters')
# bench.py should always be run in gpu mode so we should not need a gpu_from_host here
output = FilterActs()(images, filters)
output_shared = shared( output.eval() )
cuda_convnet = function([], updates = { output_shared : output } )
cuda_convnet.name = 'cuda_convnet'
images_bc01v = base_image_value.transpose(3,0,1,2)
filters_bc01v = base_filters_value.transpose(3,0,1,2)
filters_bc01v = filters_bc01v[:,:,::-1,::-1]
images_bc01 = shared(images_bc01v)
filters_bc01 = shared(filters_bc01v)
output_conv2d = conv2d(images_bc01, filters_bc01,
border_mode='valid', image_shape = images_bc01v.shape,
filter_shape = filters_bc01v.shape)
output_conv2d_shared = shared(output_conv2d.eval())
baseline = function([], updates = { output_conv2d_shared : output_conv2d } )
baseline.name = 'baseline'
return cuda_convnet, baseline
示例6: fprop
# 需要导入模块: from pylearn2.sandbox.cuda_convnet import filter_acts [as 别名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 别名]
def fprop(self, input):
# we reduce the precision of parameters for the computations
self.w_comp = apply_format(self.format, self.W, self.comp_precision, self.w_range)
self.b_comp = apply_format(self.format, self.b, self.comp_precision, self.b_range)
input = input.reshape(self.image_shape)
# convolution
input_shuffled = input.dimshuffle(1, 2, 3, 0) # bc01 to c01b
filters_shuffled = self.w_comp.dimshuffle(1, 2, 3, 0) *self.scale # bc01 to c01b
conv_op = FilterActs(stride=self.filter_stride, partial_sum=self.partial_sum,pad = self.zero_pad)
contiguous_input = gpu_contiguous(input_shuffled)
contiguous_filters = gpu_contiguous(filters_shuffled)
conv_out_shuffled = conv_op(contiguous_input, contiguous_filters)
# downsample each feature map individually, using maxpooling
# pooled_out = downsample.max_pool_2d(input=conv_out,
# ds=poolsize, ignore_border=True)
pool_op = MaxPool(ds=self.pool_shape, stride=self.pool_stride)
pooled_out_shuffled = pool_op(conv_out_shuffled)
pooled_out = pooled_out_shuffled.dimshuffle(3, 0, 1, 2) # c01b to bc01
# bias
pooled_out = apply_format(self.format, pooled_out + self.b_comp.dimshuffle('x', 0, 'x', 'x')*self.scale, self.comp_precision, self.z_range)
# activation
pooled_out = self.activation(pooled_out)
pooled_out = apply_format(self.format, pooled_out.flatten(2), self.comp_precision, self.y_range)
return pooled_out
示例7: __init__
# 需要导入模块: from pylearn2.sandbox.cuda_convnet import filter_acts [as 别名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 别名]
def __init__(self, numpy_rng=None, input = None, filter_shape=(2, 1, 5, 5),
poolsize=(1, 1), activation=T.nnet.sigmoid,
flatten = False, use_fast = False):
self.type = 'conv'
self.input = input
self.filter_shape = filter_shape
self.poolsize = poolsize
self.activation = activation
self.flatten = flatten
fan_in = numpy.prod(filter_shape[1:])
fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
numpy.prod(poolsize))
# initialize weights with random weights
W_bound = numpy.sqrt(6. / (fan_in + fan_out))
initial_W = numpy.asarray( numpy_rng.uniform(
low=-W_bound, high=W_bound,
size=filter_shape),
dtype=theano.config.floatX)
if activation == T.nnet.sigmoid:
initial_W *= 4
W = theano.shared(value = initial_W, name = 'W')
self.W = W
# the bias is a 1D tensor -- one bias per output feature map
b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
self.b = theano.shared(value=b_values, name='b')
# convolve input feature maps with filters
if use_fast:
from theano.sandbox.cuda.basic_ops import gpu_contiguous
from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs
from pylearn2.sandbox.cuda_convnet.pool import MaxPool
input_shuffled = self.input.dimshuffle(1, 2, 3, 0) # bc01 to c01b
filters_shuffled = self.W.dimshuffle(1, 2, 3, 0) # bc01 to c01b
conv_op = FilterActs()
contiguous_input = gpu_contiguous(input_shuffled)
contiguous_filters = gpu_contiguous(filters_shuffled)
conv_out_shuffled = conv_op(contiguous_input, contiguous_filters)
y_out_shuffled = activation(conv_out_shuffled + self.b.dimshuffle(0, 'x', 'x', 'x'))
pool_op = MaxPool(ds=poolsize[0], stride=poolsize[0])
self.output = pool_op(y_out_shuffled).dimshuffle(3, 0, 1, 2)
else:
conv_out = conv.conv2d(input=self.input, filters=self.W,
filter_shape=filter_shape)
y_out = activation(conv_out + self.b.dimshuffle('x', 0, 'x', 'x'))
# downsample each feature map individually, using maxpooling
self.output = downsample.max_pool_2d(input=y_out,
ds=poolsize, ignore_border=True)
if self.flatten:
self.output = self.output.flatten(2)
self.params = [self.W, self.b]
示例8: make_funcs
# 需要导入模块: from pylearn2.sandbox.cuda_convnet import filter_acts [as 别名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 别名]
def make_funcs(batch_size, rows, cols, channels, filter_rows,
num_filters):
rng = np.random.RandomState([2012,10,9])
filter_cols = filter_rows
base_image_value = rng.uniform(-1., 1., (channels, rows, cols,
batch_size)).astype('float32')
base_filters_value = rng.uniform(-1., 1., (channels, filter_rows,
filter_cols, num_filters)).astype('float32')
images = shared(base_image_value)
filters = shared(base_filters_value, name='filters')
# bench.py should always be run in gpu mode so we should not need a gpu_from_host here
layer_1_detector = FilterActs()(images, filters)
layer_1_pooled_fake = layer_1_detector[:,0:layer_1_detector.shape[0]:2,
0:layer_1_detector.shape[1]:2, :]
base_filters2_value = rng.uniform(-1., 1., (num_filters, filter_rows,
filter_cols, num_filters)).astype('float32')
filters2 = shared(base_filters_value, name='filters')
layer_2_detector = FilterActs()(images, filters2)
output = layer_2_detector
output_shared = shared( output.eval() )
cuda_convnet = function([], updates = { output_shared : output } )
cuda_convnet.name = 'cuda_convnet'
images_bc01 = base_image_value.transpose(3,0,1,2)
filters_bc01 = base_filters_value.transpose(3,0,1,2)
filters_bc01 = filters_bc01[:,:,::-1,::-1]
images_bc01 = shared(images_bc01)
filters_bc01 = shared(filters_bc01)
output_conv2d = conv2d(images_bc01, filters_bc01,
border_mode='valid')
output_conv2d_shared = shared(output_conv2d.eval())
baseline = function([], updates = { output_conv2d_shared : output_conv2d } )
baseline.name = 'baseline'
return cuda_convnet, baseline
示例9: test_filter_acts_strided
# 需要导入模块: from pylearn2.sandbox.cuda_convnet import filter_acts [as 别名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 别名]
def test_filter_acts_strided():
# Tests that FilterActs with all possible strides
rng = np.random.RandomState([2012,10,9])
#Each list in shape_list :
#[img_shape,filter_shape]
#[(channels, rows, cols, batch_size),(channels, filter_rows, filter_cols, num_filters)]
shape_list = [[(1, 7, 8, 5), (1, 2, 2, 16)],
[(3, 7, 8, 5), (3, 3, 3, 16)],
[(16, 11, 11, 4), (16, 4, 4, 16)],
[(3, 20, 20, 3), (3, 5, 5, 16)],
[(3, 21, 21, 3), (3, 6, 6, 16)],
]
for test_idx in xrange(len(shape_list)):
images = rng.uniform(-1., 1., shape_list[test_idx][0]).astype('float32')
filters = rng.uniform(-1., 1., shape_list[test_idx][1]).astype('float32')
gpu_images = float32_shared_constructor(images,name='images')
gpu_filters = float32_shared_constructor(filters,name='filters')
print("test case %d..."%(test_idx+1))
for ii in xrange(filters.shape[1]):
stride = ii + 1
output = FilterActs(stride=stride)(gpu_images, gpu_filters)
output = host_from_gpu(output)
f = function([], output)
output_val = f()
output_python = FilterActs_python(images,filters,stride)
if np.abs(output_val - output_python).max() > 8.6e-6:
assert type(output_val) == type(output_python)
assert output_val.dtype == output_python.dtype
if output_val.shape != output_python.shape:
print('cuda-convnet shape: ',output_val.shape)
print('python conv shape: ',output_python.shape)
assert False
err = np.abs(output_val - output_python)
print('stride %d'%stride)
print('absolute error range: ', (err.min(), err.max()))
print('mean absolute error: ', err.mean())
print('cuda-convnet value range: ', (output_val.min(), output_val.max()))
print('python conv value range: ', (output_python.min(), output_python.max()))
#assert False
#print "pass"
示例10: test_match_valid_conv_strided
# 需要导入模块: from pylearn2.sandbox.cuda_convnet import filter_acts [as 别名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 别名]
def test_match_valid_conv_strided():
# Tests that running FilterActs with stride is the same as running
# theano's conv2D in valid mode and then downsampling
rng = np.random.RandomState([2012,10,9])
batch_size = 5
rows = 9
cols = 9
channels = 3
filter_rows = 3
filter_cols = filter_rows
stride = 3
num_filters = 16
images = shared(rng.uniform(-1., 1., (channels, rows, cols,
batch_size)).astype('float32'), name='images')
filters = shared(rng.uniform(-1., 1., (channels, filter_rows,
filter_cols, num_filters)).astype('float32'), name='filters')
gpu_images = gpu_from_host(images)
gpu_filters = gpu_from_host(filters)
output = FilterActs(stride=stride)(gpu_images, gpu_filters)
output = host_from_gpu(output)
images_bc01 = images.dimshuffle(3,0,1,2)
filters_bc01 = filters.dimshuffle(3,0,1,2)
filters_bc01 = filters_bc01[:,:,::-1,::-1]
output_conv2d = conv2d(images_bc01, filters_bc01,
border_mode='valid', subsample=(stride, stride))
output_conv2d_orig = output_conv2d.dimshuffle(1,2,3,0)
output_conv2d = output_conv2d_orig # [:, ::stride, ::stride, :]
f = function([], [output, output_conv2d, output_conv2d_orig])
output, output_conv2d, output_conv2d_orig = f()
warnings.warn("""test_match_valid_conv success criterion is not very strict. Can we verify that this is OK?
One possibility is that theano is numerically unstable and Alex's code is better.
Probably theano CPU 64 bit is OK but it's worth checking the others.""")
if np.abs(output - output_conv2d).max() > 2.4e-6:
assert type(output) == type(output_conv2d)
assert output.dtype == output_conv2d.dtype
if output.shape != output_conv2d.shape:
print('cuda-convnet shape: ',output.shape)
print('theano shape: ',output_conv2d.shape)
assert False
err = np.abs(output - output_conv2d)
print('absolute error range: ', (err.min(), err.max()))
print('mean absolute error: ', err.mean())
print('cuda-convnet value range: ', (output.min(), output.max()))
print('theano value range: ', (output_conv2d.min(), output_conv2d.max()))
assert False
示例11: test_image_acts_strided
# 需要导入模块: from pylearn2.sandbox.cuda_convnet import filter_acts [as 别名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 别名]
def test_image_acts_strided():
# Tests that running FilterActs with all possible strides
rng = np.random.RandomState([2012,10,9])
#Each list in shape_list :
#[img_shape,filter_shape]
#[(channels, rows, cols, batch_size),(channels, filter_rows, filter_cols, num_filters)]
shape_list = [[(1, 7, 8, 5), (1, 2, 2, 16)],
[(3, 7, 8, 5), (3, 3, 3, 16)],
[(16, 11, 11, 4), (16, 4, 4, 16)],
[(3, 20, 20, 3), (3, 5, 5, 16)],
[(3, 21, 21, 3), (3, 6, 6, 16)],
]
for test_idx in xrange(len(shape_list)):
images = rng.uniform(-1., 1., shape_list[test_idx][0]).astype('float32')
filters = rng.uniform(-1., 1., shape_list[test_idx][1]).astype('float32')
gpu_images = float32_shared_constructor(images,name='images')
gpu_filters = float32_shared_constructor(filters,name='filters')
print("test case %d..."%(test_idx+1))
for ii in xrange(filters.shape[1]):
stride = ii + 1
output_python = FilterActs_python(images,filters,stride)
hidacts = rng.uniform(-1., 1., output_python.shape).astype('float32')
gpu_hidacts = float32_shared_constructor(hidacts,name='hidacts')
Img_output_python = ImageActs_python(filters,hidacts,stride,(images.shape[1], images.shape[2]))
Img_output = ImageActs(stride=stride)(gpu_hidacts, gpu_filters, as_tensor_variable((images.shape[1], images.shape[2])))
Img_output = host_from_gpu(Img_output)
f = function([], Img_output)
Img_output_val = f()
warnings.warn("""test_image_acts_strided success criterion is not very strict.""")
if np.abs(Img_output_val - Img_output_python).max() > 2.1e-5:
assert type(Img_output_val) == type(Img_output_python)
assert Img_output_val.dtype == Img_output_python.dtype
if Img_output_val.shape != Img_output_python.shape:
print('cuda-convnet shape: ',Img_output_val.shape)
print('python conv shape: ',Img_output_python.shape)
assert False
err = np.abs(Img_output_val - Img_output_python)
print('stride %d'%stride)
print('absolute error range: ', (err.min(), err.max()))
print('mean absolute error: ', err.mean())
print('cuda-convnet value range: ', (Img_output_val.min(), Img_output_val.max()))
print('python conv value range: ', (Img_output_python.min(), Img_output_python.max()))
#assert False
#print "pass"
示例12: lmul
# 需要导入模块: from pylearn2.sandbox.cuda_convnet import filter_acts [as 别名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 别名]
def lmul(self, x):
"""
.. todo::
WRITEME properly
dot(x, A)
aka, do convolution with input image x
"""
check_cuda(str(type(self)) + ".lmul")
cpu = 'Cuda' not in str(type(x))
if cpu:
x = gpu_from_host(x)
# x must be formatted as channel, topo dim 0, topo dim 1, batch_index
# for use with FilterActs
assert x.ndim == 4
x_axes = self.input_axes
assert len(x_axes) == 4
op_axes = ('c', 0, 1, 'b')
if tuple(x_axes) != op_axes:
x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes])
x = gpu_contiguous(x)
# Patch old pickle files.
if not hasattr(self, 'kernel_stride'):
self.kernel_stride = (1, 1)
rval = FilterActs(self.pad, self.partial_sum, self.kernel_stride[0])(
x,
self._filters
)
# Format the output based on the output space
rval_axes = self.output_axes
assert len(rval_axes) == 4
if cpu:
rval = host_from_gpu(rval)
if tuple(rval_axes) != op_axes:
rval = rval.dimshuffle(*[op_axes.index(axis)
for axis in rval_axes])
return rval
示例13: __init__
# 需要导入模块: from pylearn2.sandbox.cuda_convnet import filter_acts [as 别名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 别名]
def __init__(self,
input_layer,
n_filters,
filter_size,
weights_std,
stride=1,
nonlinearity=layers.rectify,
dropout=0.,
partial_sum=None,
pad=0,
trainable=True):
"""
Only the valid border mode is supported.
n_filters should be a multiple of 16
"""
self.input_layer = input_layer
self.input_shape = self.input_layer.get_output_shape()
self.n_filters = n_filters
n_channels = self.input_shape[0]
self.n_channels = n_channels
self.filter_size = filter_size
self.weights_std = numpy.float32(weights_std)
self.stride = stride
self.nonlinearity = nonlinearity
self.dropout = dropout
self.partial_sum = partial_sum
self.pad = pad
self.mb_size = self.input_layer.mb_size
self.data_order = layers.data_order.type2
assert (len(self.input_layer.get_output_shape()) == 4), \
'Input must have 4 dimensions.'
assert (self.input_layer.data_order == self.data_order), \
'Input data order does not match this layer\'s data order.'
self.filter_shape = (n_channels, filter_size, filter_size, n_filters)
self.trainable = trainable
self.W = layers.shared_single(4)
self.params = [self.W]
self.reset_params()
self.filter_acts_op = FilterActs(stride=self.stride,
partial_sum=self.partial_sum,
pad=self.pad)
示例14: dropout_fprop
# 需要导入模块: from pylearn2.sandbox.cuda_convnet import filter_acts [as 别名]
# 或者: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs [as 别名]
def dropout_fprop(self, input):
# we reduce the precision of parameters for the computations
self.fixed_W = apply_format(self.format, self.W, self.comp_precision, self.w_range)
self.fixed_b = apply_format(self.format, self.b, self.comp_precision, self.b_range)
# create the dropout mask
# The cast is important because
# int * float32 = float64 which pulls things off the gpu
srng = T.shared_randomstreams.RandomStreams(self.rng.randint(999999))
self.mask = T.cast(srng.binomial(n=1, p=self.p, size=T.shape(input)), theano.config.floatX)
input = input * self.mask
self.fixed_x = input.reshape(self.image_shape)
# convolution
input_shuffled = self.fixed_x.dimshuffle(1, 2, 3, 0) # bc01 to c01b
filters_shuffled = self.fixed_W.dimshuffle(1, 2, 3, 0) # bc01 to c01b
conv_op = FilterActs(stride=self.filter_stride, partial_sum=self.partial_sum,pad = self.zero_pad) # augment partial sum -> use less memory but slower
contiguous_input = gpu_contiguous(input_shuffled)
contiguous_filters = gpu_contiguous(filters_shuffled)
conv_out_shuffled = conv_op(contiguous_input, contiguous_filters)
self.z = conv_out_shuffled.dimshuffle(3, 0, 1, 2) # c01b to bc01
self.fixed_z = apply_format(self.format, self.z, self.comp_precision, self.z_range)
conv_out_shuffled = self.fixed_z.dimshuffle(1, 2, 3, 0) # bc01 to c01b
conv_out_shuffled = gpu_contiguous(conv_out_shuffled)
# downsample each feature map individually, using maxpooling
# pooled_out = downsample.max_pool_2d(input=conv_out,
# ds=poolsize, ignore_border=True)
pool_op = MaxPool(ds=self.pool_shape, stride=self.pool_stride)
pooled_out_shuffled = pool_op(conv_out_shuffled)
pooled_out = pooled_out_shuffled.dimshuffle(3, 0, 1, 2) # c01b to bc01
# bias
self.u = pooled_out + self.fixed_b.dimshuffle('x', 0, 'x', 'x')
self.fixed_u = apply_format(self.format, self.u, self.comp_precision, self.z_range)
# activation
self.y = self.activation(self.fixed_u).flatten(2)
self.fixed_y = apply_format(self.format, self.y, self.comp_precision, self.y_range)
return self.fixed_y