本文整理汇总了Python中pyopencl.array.zeros函数的典型用法代码示例。如果您正苦于以下问题:Python zeros函数的具体用法?Python zeros怎么用?Python zeros使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了zeros函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_binned_data_stereographic
def get_binned_data_stereographic(self,limits=((-1,1),(-1,1)),points=500): #project data stereographically onto xy plane and bin it
""" stereographically project measured ray endpoints and bin them on the CL DEV. This is a lot faster when you have loads of data. Binning is done with points number of points within limits=((xmin,xmax),(ymin,ymax))."""
(pos0,pwr0) = self.get_measured_rays()
pos0_dev = cl_array.to_device(self.queue,pos0.astype(np.float32))
x_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
y_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
pwr0_dev = cl_array.to_device(self.queue,pwr0.astype(np.float32))
pwr_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
pivot = cl_array.to_device(self.queue,np.array([0,0,0,0],dtype=np.float32))
time1 = time()
R_dev = cl_array.to_device(self.queue,np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,0]]).astype(np.float32))
evt = self.prg.stereograph_project(self.queue, pwr0.shape, None, pos0_dev.data,pwr0_dev.data,R_dev.data,pivot.data,x_dev.data,y_dev.data,pwr_dev.data)
evt.wait()
x=x_dev.get()
y=y_dev.get()
pwr=np.float64(pwr_dev.get())
time2 = time()
dx = np.float64(limits[0][1]-limits[0][0])/np.float64(points)
dy = np.float64(limits[1][1]-limits[1][0])/np.float64(points)
pwr = pwr / (dx * dy)
(H,x_coord,y_coord)=np.histogram2d(x=x.flatten(),y=y.flatten(),bins=points,range=limits,weights=pwr.flatten())
self.hist_data = (H,x_coord,y_coord)
return self.hist_data
示例2: test_rotate_grid3d
def test_rotate_grid3d(self):
k = self.p.program.rotate_grid3d
# Identity rotation
rotmat = np.asarray([1, 0, 0, 0, 1, 0, 0, 0, 1] + [0] * 7, dtype=np.float32)
self.cl_grid = cl_array.zeros(self.queue, self.shape, dtype=np.float32)
self.cl_grid.fill(1)
self.cl_out = cl_array.zeros(self.queue, self.shape, dtype=np.float32)
args = (self.cl_grid.data, rotmat, self.cl_out.data)
gws = tuple([2 * self.values["llength"] + 1] * 3)
k(self.queue, gws, None, *args)
answer = [
[[1.0, 1.0, 1.0], [1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [1.0, 0.0, 0.0]],
[[1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
[[1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
]
self.assertTrue(np.allclose(answer, self.cl_out.get()))
# 90 degree rotation around z-axis
rotmat = np.asarray([0, -1, 0, 1, 0, 0, 0, 0, 1] + [0] * 7, dtype=np.float32)
grid = np.zeros(self.shape, dtype=np.float32)
grid[0, 0, 0] = 1
grid[0, 0, 1] = 1
self.cl_grid = cl_array.to_device(self.queue, grid)
self.cl_out.fill(0)
args = (self.cl_grid.data, rotmat, self.cl_out.data)
k(self.queue, gws, None, *args)
answer = np.zeros_like(grid)
answer[0, 0, 0] = 1
answer[0, 1, 0] = 1
self.assertTrue(np.allclose(answer, self.cl_out.get()))
示例3: get_binned_data_angular
def get_binned_data_angular(self,limits=((-1,1),(-1,1)),points=500):
""" Azimuth/elevation map measured ray endpoints to a circle and bin them on the CL DEV. This linearly maps elevation to the circle's radius and azimuth to phi. nice for cross-section plots of directivity. Binning is done with points number of points within limits=((xmin,xmax),(ymin,ymax))."""
(pos0,pwr0) = self.get_measured_rays()
pos0_dev = cl_array.to_device(self.queue,pos0.astype(np.float32))
x_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
y_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
pwr0_dev = cl_array.to_device(self.queue,pwr0.astype(np.float32))
pwr_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
pivot = cl_array.to_device(self.queue,np.array([0,0,0,0],dtype=np.float32))
time1 = time()
R_dev = cl_array.to_device(self.queue,np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,0]]).astype(np.float32))
evt = self.prg.angular_project(self.queue, pwr0.shape, None, pos0_dev.data,pwr0_dev.data,R_dev.data,pivot.data,x_dev.data,y_dev.data,pwr_dev.data)
evt.wait()
x=x_dev.get()
y=y_dev.get()
pwr=np.float64(pwr_dev.get())
time2 = time()
dx = np.float64(limits[0][1]-limits[0][0])/np.float64(points)
dy = np.float64(limits[1][1]-limits[1][0])/np.float64(points)
pwr = pwr / (dx * dy)
(H,x_coord,y_coord)=np.histogram2d(x=x.flatten(),y=y.flatten(),bins=points,range=limits,weights=pwr.flatten())
self.hist_data = (H,x_coord,y_coord)
return self.hist_data
示例4: test_clashvol
def test_clashvol(self):
NROT = np.random.randint(self.rotations.shape[0] + 1)
rotmat = self.rotations[NROT]
cpu_lsurf = np.zeros_like(self.im_lsurf.array)
disvis.libdisvis.rotate_image3d(self.im_lsurf.array, self.vlength, np.linalg.inv(rotmat), self.im_center, cpu_lsurf)
cpu_clashvol = numpy.fft.irfftn(numpy.fft.rfftn(cpu_lsurf).conj() * numpy.fft.rfftn(self.rcore.array), s=self.shape)
gpu_rcore = cl_array.to_device(self.queue, np.asarray(self.rcore.array, dtype=np.float32))
gpu_im_lsurf = cl.image_from_array(self.queue.context, np.asarray(self.im_lsurf.array, dtype=np.float32))
gpu_lsurf = cl_array.zeros(self.queue, self.shape, dtype=np.float32)
self.kernels.rotate_image3d(self.queue, self.sampler, gpu_im_lsurf, rotmat, gpu_lsurf, self.im_center)
gpu_ft_lsurf = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
gpu_ft_rcore = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
gpu_ft_clashvol = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
gpu_clashvol = cl_array.zeros(self.queue, self.shape, dtype=np.float32)
self.kernels.rfftn(self.queue, gpu_rcore, gpu_ft_rcore)
self.kernels.rfftn(self.queue, gpu_lsurf, gpu_ft_lsurf)
self.kernels.c_conj_multiply(self.queue, gpu_ft_lsurf, gpu_ft_rcore, gpu_ft_clashvol)
self.kernels.irfftn(self.queue, gpu_ft_clashvol, gpu_clashvol)
self.assertTrue(np.allclose(cpu_clashvol, gpu_clashvol.get(), atol=0.8))
示例5: init_buffers
def init_buffers(self, kernels):
if kernels is None or len(kernels.keys())==0:
raise Exception('No kernels found for OpenCL convolution')
mf = cl.mem_flags
self.source_host_buffer = numpy.zeros(self.image_width*self.image_height, dtype=numpy.uint8)
self.source_gpu_buffer = cl_array.zeros(self.queue, self.array_size, numpy.uint8)
self.temporal_host_buffers = {}
self.temporal_host_buffers[TMP1] = numpy.zeros_like(self.source_host_buffer, dtype=numpy.float32)
self.temporal_host_buffers[TMP2] = numpy.zeros_like(self.source_host_buffer, dtype=numpy.float32)
self.temporal_gpu_buffers = {}
self.temporal_gpu_buffers[TMP1] = cl_array.zeros(self.queue, self.array_size, numpy.float32)
self.temporal_gpu_buffers[TMP2] = cl_array.zeros(self.queue, self.array_size, numpy.float32)
self.filtered_host_buffer = numpy.zeros_like(self.source_host_buffer, dtype=numpy.float32)
self.filtered_gpu_buffer = cl_array.zeros(self.queue, self.array_size, numpy.float32)
self.kernel_host_buffers = {}
self.kernel_gpu_buffers = {}
self.filtered_host_back_buffers = {}
for cell in kernels.keys():
self.kernel_host_buffers[cell] = {}
self.kernel_gpu_buffers[cell] = {}
self.filtered_host_back_buffers[cell] = {}
for centre in kernels[cell].keys():
self.kernels_to_buffers(kernels, cell, centre)
self.filtered_host_back_buffers[cell][centre] = numpy.zeros_like(self.source_host_buffer,
dtype=numpy.float32)
示例6: __init__
def __init__(self, shape, do_checks=False, ctx=None, devicetype="all", platformid=None, deviceid=None, profile=False):
"""
Create a "Linear Algebra" plan for a given image shape.
:param shape: shape of the image (num_rows, num_columns)
:param do_checks (optional): if True, memory and data type checks are performed when possible.
:param ctx: actual working context, left to None for automatic
initialization from device type or platformid/deviceid
:param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
:param platformid: integer with the platform_identifier, as given by clinfo
:param deviceid: Integer with the device identifier, as given by clinfo
:param profile: switch on profiling to be able to profile at the kernel level,
store profiling elements (makes code slightly slower)
"""
OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
platformid=platformid, deviceid=deviceid,
profile=profile)
self.d_gradient = parray.zeros(self.queue, shape, np.complex64)
self.d_image = parray.zeros(self.queue, shape, np.float32)
self.add_to_cl_mem({
"d_gradient": self.d_gradient,
"d_image": self.d_image
})
self.wg2D = None
self.shape = shape
self.ndrange2D = (
int(self.shape[1]),
int(self.shape[0])
)
self.do_checks = bool(do_checks)
OpenclProcessing.compile_kernels(self, self.kernel_files)
示例7: _allocate_arrays
def _allocate_arrays(self):
# Determine the required shape and size of an array
self._ft_shape = tuple(
[self._target.shape[0] // 2 + 1] + list(self._target.shape[1:])
)
self._shape = self._target.shape
# Allocate arrays on CPU
self._lcc = np.zeros(self._target.shape, dtype=np.float32)
self._rot = np.zeros(self._target.shape, dtype=np.int32)
# Allocate arrays on GPU
arrays = '_target2 _rot_template _rot_mask _rot_mask2 _gcc _ave _ave2 _glcc'.split()
for array in arrays:
setattr(self, array,
cl_array.zeros( self._queue, self._shape, dtype=np.float32)
)
self._grot = cl_array.zeros(self._queue, self._shape, dtype=np.int32)
# Allocate all complex arrays
ft_arrays = 'target target2 template mask mask2 gcc ave ave2 lcc'.split()
for ft_array in ft_arrays:
setattr(self, '_ft_' + ft_array,
cl_array.to_device(self._queue,
np.zeros(self._ft_shape, dtype=np.complex64))
)
示例8: _gpu_init
def _gpu_init(self):
"""Method to initialize all the data for GPU-accelerate search"""
self.gpu_data = {}
g = self.gpu_data
d = self.data
q = self.queue
# move data to the GPU. All should be float32, as these is the native
# lenght for GPUs
g['rcore'] = cl_array.to_device(q, float32array(d['rcore'].array))
g['rsurf'] = cl_array.to_device(q, float32array(d['rsurf'].array))
# Make the scanning chain object an Image, as this is faster to rotate
g['im_lsurf'] = cl.image_from_array(q.context, float32array(d['lsurf'].array))
g['sampler'] = cl.Sampler(q.context, False, cl.addressing_mode.CLAMP,
cl.filter_mode.LINEAR)
if self.distance_restraints:
g['restraints'] = cl_array.to_device(q, float32array(d['restraints']))
# Allocate arrays on the GPU
g['lsurf'] = cl_array.zeros_like(g['rcore'])
g['clashvol'] = cl_array.zeros_like(g['rcore'])
g['intervol'] = cl_array.zeros_like(g['rcore'])
g['interspace'] = cl_array.zeros(q, d['shape'], dtype=np.int32)
g['restspace'] = cl_array.zeros_like(g['interspace'])
g['access_interspace'] = cl_array.zeros_like(g['interspace'])
g['best_access_interspace'] = cl_array.zeros_like(g['interspace'])
# arrays for counting
# Reductions are typically tedious on GPU, and we need to define the
# workgroupsize to allocate the correct amount of data
WORKGROUPSIZE = 32
nsubhists = int(np.ceil(g['rcore'].size/WORKGROUPSIZE))
g['subhists'] = cl_array.zeros(q, (nsubhists, d['nrestraints'] + 1), dtype=np.float32)
g['viol_counter'] = cl_array.zeros(q, (nsubhists, d['nrestraints'], d['nrestraints']), dtype=np.float32)
# complex arrays
g['ft_shape'] = list(d['shape'])
g['ft_shape'][0] = d['shape'][0]//2 + 1
g['ft_rcore'] = cl_array.zeros(q, g['ft_shape'], dtype=np.complex64)
g['ft_rsurf'] = cl_array.zeros_like(g['ft_rcore'])
g['ft_lsurf'] = cl_array.zeros_like(g['ft_rcore'])
g['ft_clashvol'] = cl_array.zeros_like(g['ft_rcore'])
g['ft_intervol'] = cl_array.zeros_like(g['ft_rcore'])
# other miscellanious data
g['nrot'] = d['nrot']
g['max_clash'] = d['max_clash']
g['min_interaction'] = d['min_interaction']
# kernels
g['k'] = Kernels(q.context)
g['k'].rfftn = pyclfft.RFFTn(q.context, d['shape'])
g['k'].irfftn = pyclfft.iRFFTn(q.context, d['shape'])
# initial calculations
g['k'].rfftn(q, g['rcore'], g['ft_rcore'])
g['k'].rfftn(q, g['rsurf'], g['ft_rsurf'])
示例9: initArrays
def initArrays(self):
self.specLevel_dev = cl_array.zeros(self.queue, (self.maxCells,self.nSpecies), dtype=numpy.float32)
self.specRate_dev = cl_array.zeros(self.queue, (self.maxCells,self.nSpecies), dtype=numpy.float32)
self.celltype = numpy.zeros((self.maxCells,), dtype=numpy.int32)
self.celltype_dev = cl_array.zeros(self.queue, (self.maxCells,),dtype=numpy.int32)
self.effgrow = numpy.zeros((self.maxCells,), dtype=numpy.float32)
self.effgrow_dev = cl_array.zeros(self.queue, (self.maxCells,), dtype=numpy.float32)
示例10: test_touch
def test_touch(self):
MAX_CLASH = 100 + 0.9
MIN_INTER = 300 + 0.9
NROT = np.random.randint(self.rotations.shape[0] + 1)
rotmat = self.rotations[0]
cpu_lsurf = np.zeros_like(self.im_lsurf.array)
disvis.libdisvis.rotate_image3d(self.im_lsurf.array, self.vlength, np.linalg.inv(rotmat), self.im_center, cpu_lsurf)
cpu_clashvol = numpy.fft.irfftn(numpy.fft.rfftn(cpu_lsurf).conj() * numpy.fft.rfftn(self.rcore.array))
gpu_rcore = cl_array.to_device(self.queue, np.asarray(self.rcore.array, dtype=np.float32))
gpu_im_lsurf = cl.image_from_array(self.queue.context, np.asarray(self.im_lsurf.array, dtype=np.float32))
gpu_lsurf = cl_array.zeros(self.queue, self.shape, dtype=np.float32)
self.kernels.rotate_image3d(self.queue, self.sampler, gpu_im_lsurf, rotmat, gpu_lsurf, self.im_center)
gpu_ft_lsurf = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
gpu_ft_rcore = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
gpu_ft_clashvol = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
gpu_clashvol = cl_array.zeros(self.queue, self.shape, dtype=np.float32)
self.kernels.rfftn(self.queue, gpu_rcore, gpu_ft_rcore)
self.kernels.rfftn(self.queue, gpu_lsurf, gpu_ft_lsurf)
self.kernels.c_conj_multiply(self.queue, gpu_ft_lsurf, gpu_ft_rcore, gpu_ft_clashvol)
self.kernels.irfftn(self.queue, gpu_ft_clashvol, gpu_clashvol)
cpu_intervol = numpy.fft.irfftn(numpy.fft.rfftn(cpu_lsurf).conj() * numpy.fft.rfftn(self.rsurf.array))
gpu_rsurf = cl_array.to_device(self.queue, np.asarray(self.rsurf.array, dtype=np.float32))
gpu_ft_rsurf = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
gpu_ft_intervol = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
gpu_intervol = cl_array.zeros(self.queue, self.shape, dtype=np.float32)
cpu_interspace = np.zeros(self.shape, dtype=np.int32)
gpu_interspace = cl_array.zeros(self.queue, self.shape, dtype=np.int32)
self.kernels.rfftn(self.queue, gpu_rsurf, gpu_ft_rsurf)
self.kernels.rfftn(self.queue, gpu_lsurf, gpu_ft_lsurf)
self.kernels.c_conj_multiply(self.queue, gpu_ft_lsurf, gpu_ft_rsurf, gpu_ft_intervol)
self.kernels.irfftn(self.queue, gpu_ft_intervol, gpu_intervol)
self.kernels.touch(self.queue, gpu_clashvol, MAX_CLASH, gpu_intervol, MIN_INTER, gpu_interspace)
np.logical_and(cpu_clashvol < MAX_CLASH, cpu_intervol > MIN_INTER, cpu_interspace)
disvis.volume.Volume(cpu_interspace, self.im_lsurf.voxelspacing, self.im_lsurf.origin).tofile('cpu_interspace.mrc')
disvis.volume.Volume(gpu_interspace.get(), self.im_lsurf.voxelspacing, self.im_lsurf.origin).tofile('gpu_interspace.mrc')
disvis.volume.Volume(cpu_interspace - gpu_interspace.get(), self.im_lsurf.voxelspacing, self.im_lsurf.origin).tofile('diff.mrc')
print()
print(cpu_interspace.sum(), gpu_interspace.get().sum())
print(np.abs(cpu_interspace - gpu_interspace.get()).sum())
self.assertTrue(np.allclose(gpu_interspace.get(), cpu_interspace))
示例11: test_2d_real_to_complex
def test_2d_real_to_complex(self, ctx):
queue = cl.CommandQueue(ctx)
M = 64
N = 32
nd_data = np.arange(M*N, dtype=np.float32)
nd_data.shape = (M, N)
cl_data = cla.to_device(queue, nd_data)
cl_data_transformed = cla.zeros(queue, (M, N//2+1), dtype = np.complex64)
transform = FFT(ctx, queue,
cl_data,
cl_data_transformed,
axes = (1,0),
)
transform.enqueue()
print(cl_data_transformed.get)
print(np.fft.rfft2(nd_data))
assert np.allclose(cl_data_transformed.get(),
np.fft.rfft2(nd_data),
rtol=1e-3, atol=1e-3)
示例12: compute_slices
def compute_slices(self, shape, pixel_size, queue=None, out=None, offset=None):
"""Compute slices with *shape* as (z, y, x), *pixel_size*. Use *queue* and *out* for
outuput. Offset is the starting point offset as (x, y, z).
"""
if queue is None:
queue = cfg.OPENCL.queue
if out is None:
out = cl_array.zeros(queue, shape, dtype=np.uint8)
pixel_size = make_tuple(pixel_size, num_dims=2)
v_1, v_2, v_3 = self._make_inputs(queue, pixel_size)
psm = pixel_size.simplified.magnitude
max_dx = self.max_triangle_x_diff.simplified.magnitude / psm[1]
if offset is None:
offset = gutil.make_vfloat3(0, 0, 0)
else:
offset = offset.simplified.magnitude
offset = gutil.make_vfloat3(offset[0] / psm[1], offset[1] / psm[0], offset[2] / psm[1])
cfg.OPENCL.programs['mesh'].compute_slices(queue,
(shape[2], shape[0]),
None,
v_1.data,
v_2.data,
v_3.data,
out.data,
np.int32(shape[1]),
np.int32(self.num_triangles),
offset,
cfg.PRECISION.np_float(max_dx))
return out
示例13: allocate_arrays
def allocate_arrays(self):
"""
Allocate various types of arrays for the tests
"""
# numpy images
self.grad = np.zeros(self.image.shape, dtype=np.complex64)
self.grad2 = np.zeros((2,) + self.image.shape, dtype=np.float32)
self.grad_ref = gradient(self.image)
self.div_ref = divergence(self.grad_ref)
self.image2 = np.zeros_like(self.image)
# Device images
self.gradient_parray = parray.zeros(self.la.queue, self.image.shape, np.complex64)
# we should be using cl.Buffer(self.la.ctx, cl.mem_flags.READ_WRITE, size=self.image.nbytes*2),
# but platforms not suporting openCL 1.2 have a problem with enqueue_fill_buffer,
# so we use the parray "fill" utility
self.gradient_buffer = self.gradient_parray.data
# Do the same for image
self.image_parray = parray.to_device(self.la.queue, self.image)
self.image_buffer = self.image_parray.data
# Refs
tmp = np.zeros(self.image.shape, dtype=np.complex64)
tmp.real = np.copy(self.grad_ref[0])
tmp.imag = np.copy(self.grad_ref[1])
self.grad_ref_parray = parray.to_device(self.la.queue, tmp)
self.grad_ref_buffer = self.grad_ref_parray.data
示例14: transfer_many
def transfer_many(objects, shape, pixel_size, energy, exponent=False, offset=None, queue=None,
out=None, t=None, check=True, block=False):
"""Compute transmission from more *objects*. If *exponent* is True, compute only the exponent,
if it is False, evaluate the exponent. Use *shape* (y, x), *pixel_size*, *energy*, *offset* as
(y, x), OpenCL command *queue*, *out* array, time *t*, check the sampling if *check* is True and
wait for OpenCL kernels if *block* is True. Returned *out* array is different from the input one
because of the pyopencl.clmath behavior.
"""
if queue is None:
queue = cfg.OPENCL.queue
if out is None:
out = cl_array.zeros(queue, shape, cfg.PRECISION.np_cplx)
u_sample = cl_array.Array(queue, shape, cfg.PRECISION.np_cplx)
lam = energy_to_wavelength(energy)
for i, sample in enumerate(objects):
try:
out += sample.transfer(shape, pixel_size, energy, exponent=True, offset=offset, t=t,
queue=queue, out=u_sample, check=False, block=block)
except NotImplementedError:
LOG.debug('%s does not support real space transfer', sample)
if check and not is_wavefield_sampling_ok(out, queue=queue):
LOG.error('Insufficient transmission function sampling')
# Apply the exponent
if not exponent:
out = clmath.exp(out, queue=queue)
return out
示例15: pad
def pad(image, region=None, out=None, value=0, queue=None, block=False):
"""Pad a 2D *image*. *region* is the region to pad as (y_0, x_0, height, width). If not
specified, the next power of two dimensions are used and the image is centered in the padded
one. The final image dimensions are height x width and the filling starts at (y_0, x_0), *out*
is the pyopencl Array instance, if not specified it will be created. *out* is also returned.
*value* is the padded value. If *block* is True, wait for the copy to finish.
"""
if region is None:
shape = tuple([next_power_of_two(n) for n in image.shape])
y_0 = (shape[0] - image.shape[0]) / 2
x_0 = (shape[1] - image.shape[1]) / 2
region = (y_0, x_0) + shape
if queue is None:
queue = cfg.OPENCL.queue
if out is None:
out = cl_array.zeros(queue, (region[2], region[3]), dtype=image.dtype) + value
image = g_util.get_array(image, queue=queue)
n_bytes = image.dtype.itemsize
y_0, x_0, height, width = region
src_origin = (0, 0, 0)
dst_origin = (n_bytes * x_0, y_0, 0)
region = (n_bytes * image.shape[1], image.shape[0], 1)
LOG.debug('pad, shape: %s, src_origin: %s, dst_origin: %s, region: %s', image.shape,
src_origin, dst_origin, region)
_copy_rect(image, out, src_origin, dst_origin, region, queue, block=block)
return out