本文整理汇总了Python中pyopencl.array.to_device函数的典型用法代码示例。如果您正苦于以下问题:Python to_device函数的具体用法?Python to_device怎么用?Python to_device使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了to_device函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: computeEnergy
def computeEnergy(self, x, y, z, q):
xd = cl_array.to_device(self.queue, x)
yd = cl_array.to_device(self.queue, y)
zd = cl_array.to_device(self.queue, z)
qd = cl_array.to_device(self.queue, q)
coulombEnergy = cl_array.zeros_like(xd)
prec = x.dtype
if prec == numpy.float32:
self.compEnergyF.calc_potential_energy(self.queue,
(x.size, ), None,
xd.data, yd.data, zd.data,
qd.data, coulombEnergy.data, numpy.int32(len(x)),
numpy.float32(self.k),numpy.float32(self.impactFact),
g_times_l = False)
elif prec == numpy.float64:
self.compEnergyD.calc_potential_energy(self.queue,
(x.size, ), None,
xd.data, yd.data, zd.data,
qd.data, coulombEnergy.data, numpy.int32(len(x)) ,
numpy.float64(self.k),numpy.float64(self.impactFact),
g_times_l = False)
else:
print("Unknown float type.")
return numpy.sum(coulombEnergy.get(self.queue))
示例2: get_binned_data_angular
def get_binned_data_angular(self,limits=((-1,1),(-1,1)),points=500):
""" Azimuth/elevation map measured ray endpoints to a circle and bin them on the CL DEV. This linearly maps elevation to the circle's radius and azimuth to phi. nice for cross-section plots of directivity. Binning is done with points number of points within limits=((xmin,xmax),(ymin,ymax))."""
(pos0,pwr0) = self.get_measured_rays()
pos0_dev = cl_array.to_device(self.queue,pos0.astype(np.float32))
x_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
y_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
pwr0_dev = cl_array.to_device(self.queue,pwr0.astype(np.float32))
pwr_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
pivot = cl_array.to_device(self.queue,np.array([0,0,0,0],dtype=np.float32))
time1 = time()
R_dev = cl_array.to_device(self.queue,np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,0]]).astype(np.float32))
evt = self.prg.angular_project(self.queue, pwr0.shape, None, pos0_dev.data,pwr0_dev.data,R_dev.data,pivot.data,x_dev.data,y_dev.data,pwr_dev.data)
evt.wait()
x=x_dev.get()
y=y_dev.get()
pwr=np.float64(pwr_dev.get())
time2 = time()
dx = np.float64(limits[0][1]-limits[0][0])/np.float64(points)
dy = np.float64(limits[1][1]-limits[1][0])/np.float64(points)
pwr = pwr / (dx * dy)
(H,x_coord,y_coord)=np.histogram2d(x=x.flatten(),y=y.flatten(),bins=points,range=limits,weights=pwr.flatten())
self.hist_data = (H,x_coord,y_coord)
return self.hist_data
示例3: test_nan_arithmetic
def test_nan_arithmetic(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
def make_nan_contaminated_vector(size):
shape = (size,)
a = numpy.random.randn(*shape).astype(numpy.float32)
#for i in range(0, shape[0], 3):
#a[i] = float('nan')
from random import randrange
for i in range(size//10):
a[randrange(0, size)] = float('nan')
return a
size = 1 << 20
a = make_nan_contaminated_vector(size)
a_gpu = cl_array.to_device(context, queue, a)
b = make_nan_contaminated_vector(size)
b_gpu = cl_array.to_device(context, queue, b)
ab = a*b
ab_gpu = (a_gpu*b_gpu).get()
for i in range(size):
assert numpy.isnan(ab[i]) == numpy.isnan(ab_gpu[i])
示例4: test_fancy_indexing
def test_fancy_indexing(ctx_factory):
if _PYPY:
pytest.xfail("numpypy: multi value setting is not supported")
context = ctx_factory()
queue = cl.CommandQueue(context)
n = 2 ** 20 + 2**18 + 22
numpy_dest = np.zeros(n, dtype=np.int32)
numpy_idx = np.arange(n, dtype=np.int32)
np.random.shuffle(numpy_idx)
numpy_src = 20000+np.arange(n, dtype=np.int32)
cl_dest = cl_array.to_device(queue, numpy_dest)
cl_idx = cl_array.to_device(queue, numpy_idx)
cl_src = cl_array.to_device(queue, numpy_src)
numpy_dest[numpy_idx] = numpy_src
cl_dest[cl_idx] = cl_src
assert np.array_equal(numpy_dest, cl_dest.get())
numpy_dest = numpy_src[numpy_idx]
cl_dest = cl_src[cl_idx]
assert np.array_equal(numpy_dest, cl_dest.get())
示例5: test_pthomas
def test_pthomas():
nz = 3
ny = 4
nx = 5
a = np.random.rand(nx)
b = np.random.rand(nx)
c = np.random.rand(nx)
d = np.random.rand(nz, ny, nx)
d_copy = d.copy()
solver = pthomas.PThomas(context, queue, (nz, ny, nx))
a_d = cl_array.to_device(queue, a)
b_d = cl_array.to_device(queue, b)
c_d = cl_array.to_device(queue, c)
c2_d = cl_array.to_device(queue, c)
d_d = cl_array.to_device(queue, d)
evt = solver.solve(a_d, b_d, c_d, c2_d, d_d)
d = d_d.get()
for i in range(nz):
for j in range(ny):
x_true = scipy_solve_banded(a, b, c, d_copy[i,j,:])
assert_allclose(x_true, d[i,j,:])
print 'pass'
示例6: allocate_arrays
def allocate_arrays(self):
"""
Allocate various types of arrays for the tests
"""
# numpy images
self.grad = np.zeros(self.image.shape, dtype=np.complex64)
self.grad2 = np.zeros((2,) + self.image.shape, dtype=np.float32)
self.grad_ref = gradient(self.image)
self.div_ref = divergence(self.grad_ref)
self.image2 = np.zeros_like(self.image)
# Device images
self.gradient_parray = parray.zeros(self.la.queue, self.image.shape, np.complex64)
# we should be using cl.Buffer(self.la.ctx, cl.mem_flags.READ_WRITE, size=self.image.nbytes*2),
# but platforms not suporting openCL 1.2 have a problem with enqueue_fill_buffer,
# so we use the parray "fill" utility
self.gradient_buffer = self.gradient_parray.data
# Do the same for image
self.image_parray = parray.to_device(self.la.queue, self.image)
self.image_buffer = self.image_parray.data
# Refs
tmp = np.zeros(self.image.shape, dtype=np.complex64)
tmp.real = np.copy(self.grad_ref[0])
tmp.imag = np.copy(self.grad_ref[1])
self.grad_ref_parray = parray.to_device(self.la.queue, tmp)
self.grad_ref_buffer = self.grad_ref_parray.data
示例7: _make_inputs
def _make_inputs(self, queue, pixel_size):
mf = cl.mem_flags
v_1 = cl_array.to_device(queue, self._make_vertices(0, pixel_size[1]))
v_2 = cl_array.to_device(queue, self._make_vertices(1, pixel_size[0]))
v_3 = cl_array.to_device(queue, self._make_vertices(2, pixel_size[1]))
return v_1, v_2, v_3
示例8: computeEnergy
def computeEnergy(self, x, y, z, q):
coulombEnergy = cl_array.zero_like(q)
xd = cl_array.to_device(self.queue, x)
yd = cl_array.to_device(self.queue, y)
zd = cl_array.to_device(self.queue, z)
qd = cl_array.to_device(self.queue, q)
prec = x.dtype
if prec == numpy.float32:
self.compEnergyF.calc_potential_energy(
self.queue, (x.size, ),
None,
xd.data,
yd.data,
zd.data,
qd.data,
coulombEnergy.data,
g_time_l=False)
elif prec == numpy.float64:
self.compEnergyD.calc_potential_energy(
self.queue, (x.size, ),
None,
xd.data,
yd.data,
zd.data,
qd.data,
coulombEnergy.data,
g_time_l=False)
else:
print("Unknown float type.")
return np.sum(coulombEnergy.get(self.queue))
示例9: compute_preconditioners
def compute_preconditioners(self):
"""
Create a diagonal preconditioner for the projection and backprojection
operator.
Each term of the diagonal is the sum of the projector/backprojector
along rows [1], i.e the projection/backprojection of an array of ones.
[1] Jens Gregor and Thomas Benson,
Computational Analysis and Improvement of SIRT,
IEEE transactions on medical imaging, vol. 27, no. 7, 2008
"""
# r_{i,i} = 1/(sum_j a_{i,j})
slice_ones = np.ones(self.backprojector.slice_shape, dtype=np.float32)
R = 1./self.projector.projection(slice_ones) # could be all done on GPU, but I want extra checks
R[np.logical_not(np.isfinite(R))] = 1. # In the case where the rotation axis is excentred
self.d_R = parray.to_device(self.queue, R)
# c_{j,j} = 1/(sum_i a_{i,j})
sino_ones = np.ones(self.sino_shape, dtype=np.float32)
C = 1./self.backprojector.backprojection(sino_ones)
C[np.logical_not(np.isfinite(C))] = 1. # In the case where the rotation axis is excentred
self.d_C = parray.to_device(self.queue, C)
self.add_to_cl_mem({
"d_R": self.d_R,
"d_C": self.d_C
})
示例10: get_array
def get_array(data, queue=None):
"""Get pyopencl.array.Array from *data* which can be a numpy array, a pyopencl.array.Array or a
pyopencl.Image. *queue* is an OpenCL command queue.
"""
if not queue:
queue = cfg.OPENCL.queue
if isinstance(data, cl_array.Array):
result = data
elif isinstance(data, np.ndarray):
if data.dtype.kind == 'c':
if data.dtype.itemsize != cfg.PRECISION.cl_cplx:
data = data.astype(cfg.PRECISION.np_cplx)
result = cl_array.to_device(queue, data.astype(cfg.PRECISION.np_cplx))
else:
if data.dtype.kind != 'f' or data.dtype.itemsize != cfg.PRECISION.cl_float:
data = data.astype(cfg.PRECISION.np_float)
result = cl_array.to_device(queue, data.astype(cfg.PRECISION.np_float))
elif isinstance(data, cl.Image):
result = cl_array.empty(queue, data.shape[::-1], np.float32)
cl.enqueue_copy(queue, result.data, data, offset=0, origin=(0, 0),
region=result.shape[::-1])
if result.dtype.itemsize != cfg.PRECISION.cl_float:
result = result.astype(cfg.PRECISION.np_float)
else:
raise TypeError('Unsupported data type {}'.format(type(data)))
return result
示例11: __init__
def __init__(self, ctx, queue, dtype=np.float32):
self.ctx = ctx
self.queue = queue
sobel_c = np.array([1., 0., -1.]).astype(dtype)
sobel_r = np.array([1., 2., 1.]).astype(dtype)
self.sobel_c = cl_array.to_device(self.queue, sobel_c)
self.sobel_r = cl_array.to_device(self.queue, sobel_r)
self.scratch = None
self.sepconv_rc = LocalMemorySeparableCorrelation(self.ctx, self.queue, sobel_r, sobel_c)
self.sepconv_cr = LocalMemorySeparableCorrelation(self.ctx, self.queue, sobel_c, sobel_r)
TYPE = ""
if dtype == np.float32:
TYPE = "float"
elif dtype == np.uint8:
TYPE = "unsigned char"
elif dtype == np.uint16:
TYPE = "unsigned short"
self.mag = ElementwiseKernel(ctx,
"float *result, %s *imgx, %s *imgy" % (TYPE, TYPE),
"result[i] = sqrt((float)imgx[i]*imgx[i] + (float)imgy[i]*imgy[i])",
"mag")
示例12: get_binned_data_stereographic
def get_binned_data_stereographic(self,limits=((-1,1),(-1,1)),points=500): #project data stereographically onto xy plane and bin it
""" stereographically project measured ray endpoints and bin them on the CL DEV. This is a lot faster when you have loads of data. Binning is done with points number of points within limits=((xmin,xmax),(ymin,ymax))."""
(pos0,pwr0) = self.get_measured_rays()
pos0_dev = cl_array.to_device(self.queue,pos0.astype(np.float32))
x_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
y_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
pwr0_dev = cl_array.to_device(self.queue,pwr0.astype(np.float32))
pwr_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
pivot = cl_array.to_device(self.queue,np.array([0,0,0,0],dtype=np.float32))
time1 = time()
R_dev = cl_array.to_device(self.queue,np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,0]]).astype(np.float32))
evt = self.prg.stereograph_project(self.queue, pwr0.shape, None, pos0_dev.data,pwr0_dev.data,R_dev.data,pivot.data,x_dev.data,y_dev.data,pwr_dev.data)
evt.wait()
x=x_dev.get()
y=y_dev.get()
pwr=np.float64(pwr_dev.get())
time2 = time()
dx = np.float64(limits[0][1]-limits[0][0])/np.float64(points)
dy = np.float64(limits[1][1]-limits[1][0])/np.float64(points)
pwr = pwr / (dx * dy)
(H,x_coord,y_coord)=np.histogram2d(x=x.flatten(),y=y.flatten(),bins=points,range=limits,weights=pwr.flatten())
self.hist_data = (H,x_coord,y_coord)
return self.hist_data
示例13: test_count_1
def test_count_1(self):
nrepeats = 3
shape = [5, 5, 5]
np_interspace = randint(2, size=shape).astype(np.int32)
np_access_interspace = randint(nrepeats, size=shape).astype(np.int32)
np_count = np.ones([nrepeats] + shape, dtype=np.float32)
weight = 0.5
expected = np.ones_like(np_count)
tmp = expected[0]
tmp[np_interspace == 1] += weight
for i in range(1, nrepeats):
tmp = expected[i]
tmp[np_access_interspace == i] += weight
cl_interspace = cl_array.to_device(self.queue, np_interspace)
cl_access_interspace = cl_array.to_device(self.queue, np_access_interspace)
cl_count = cl_array.to_device(self.queue, np_count)
self.kernels.count(self.queue, cl_interspace, cl_access_interspace, weight, cl_count)
self.assertTrue(np.allclose(expected, cl_count.get()))
示例14: CalcF
def CalcF(ctx, queue, m2, r2):
# Define dimensions
xdim = ydim = m2.shape[0]
# m2 = np.float32(m2)
# r2 = np.float32(r2)
# Get the compiled kernel
kernel = get_kernel(ctx, xdim)
# Move data to the GPU
gpu_m2 = cl_array.to_device(queue, m2)
gpu_r2 = cl_array.to_device(queue, r2)
gpu_result = cl_array.zeros(queue, (ydim, xdim), np.float32)
# Define grid shape (the same as the matrix dimensions)
grid_shape = (ydim, xdim)
# Get group shape based on the matrix dimensions and the actual hardware
group_shape = (16, 16)
event = kernel.CalcF(queue, grid_shape, group_shape, gpu_result.data, gpu_m2.data, gpu_r2.data)
event.wait()
result = gpu_result.get()
queue.finish()
return result
示例15: __init__
def __init__(self, target, queue, laplace=False):
super(GPUCorrelator, self).__init__(target, laplace=laplace)
self._queue = queue
self._ctx = self._queue.context
self._gpu = self._queue.device
self._allocate_arrays()
self._build_ffts()
self._generate_kernels()
target = self._target
if self._laplace:
target = self._laplace_filter(self._target)
# move some arrays to the GPU
self._gtarget = cl_array.to_device(self._queue, target.astype(np.float32))
self._lcc_mask = cl_array.to_device(self._queue, self._lcc_mask.astype(np.int32))
# Do some one-time precalculations
self._rfftn(self._gtarget, self._ft_target)
self._k.multiply(self._gtarget, self._gtarget, self._target2)
self._rfftn(self._target2, self._ft_target2)
self._gcenter = np.asarray(list(self._center) + [0], dtype=np.float32)
self._gshape = np.asarray(
list(self._target.shape) + [np.product(self._target.shape)],
dtype=np.int32)