本文整理汇总了Python中pyopencl.array.empty_like函数的典型用法代码示例。如果您正苦于以下问题:Python empty_like函数的具体用法?Python empty_like怎么用?Python empty_like使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了empty_like函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: gs_mod_gpu
def gs_mod_gpu(idata,itera=10,osize=256):
cut=osize//2
pl=cl.get_platforms()[0]
devices=pl.get_devices(device_type=cl.device_type.GPU)
ctx = cl.Context(devices=[devices[0]])
queue = cl.CommandQueue(ctx)
plan = Plan(idata.shape, queue=queue,dtype=complex128) #no funciona con "complex128"
src = str(Template(KERNEL).render(
double_support=all(
has_double_support(dev) for dev in devices),
amd_double_support=all(
has_amd_double_support(dev) for dev in devices)
))
prg = cl.Program(ctx,src).build()
idata_gpu=cl_array.to_device(queue, ifftshift(idata).astype("complex128"))
fdata_gpu=cl_array.empty_like(idata_gpu)
rdata_gpu=cl_array.empty_like(idata_gpu)
plan.execute(idata_gpu.data,fdata_gpu.data)
mask=exp(2.j*pi*random(idata.shape))
mask[512-cut:512+cut,512-cut:512+cut]=0
idata_gpu=cl_array.to_device(queue, ifftshift(idata+mask).astype("complex128"))
fdata_gpu=cl_array.empty_like(idata_gpu)
rdata_gpu=cl_array.empty_like(idata_gpu)
error_gpu=cl_array.to_device(ctx, queue, zeros(idata_gpu.shape).astype("double"))
plan.execute(idata_gpu.data,fdata_gpu.data)
e=1000
ea=1000
for i in range (itera):
prg.norm(queue, fdata_gpu.shape, None,fdata_gpu.data)
plan.execute(fdata_gpu.data,rdata_gpu.data,inverse=True)
#~ prg.norm1(queue, rdata_gpu.shape,None,rdata_gpu.data,idata_gpu.data,error_gpu.data, int32(cut))
norm1=prg.norm1
norm1.set_scalar_arg_dtypes([None, None, None, int32])
norm1(queue, rdata_gpu.shape,None,rdata_gpu.data,idata_gpu.data,error_gpu.data, int32(cut))
e= sqrt(cl_array.sum(error_gpu).get())/(2*cut)
#~ if e>ea:
#~
#~ break
#~ ea=e
plan.execute(rdata_gpu.data,fdata_gpu.data)
fdata=fdata_gpu.get()
fdata=ifftshift(fdata)
fdata=exp(1.j*angle(fdata))
return fdata
示例2: cl_test_sobel
def cl_test_sobel(im):
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
sobel = Sobel(ctx, queue)
im_buf = cl_array.to_device(queue, im)
mag_buf = cl_array.empty_like(im_buf)
imgx_buf = cl_array.empty_like(im_buf)
imgy_buf = cl_array.empty_like(im_buf)
sobel(im_buf, imgx_buf, imgy_buf, mag_buf)
return (mag_buf.get(), imgx_buf.get(), imgy_buf.get())
示例3: __call__
def __call__(self,
input_buf,
row_buf,
col_buf,
output_buf,
intermed_buf=None):
(h, w) = input_buf.shape
r = row_buf.shape[0]
c = col_buf.shape[0]
if intermed_buf is None:
intermed_buf = cl_array.empty_like(input_buf)
self.program.separable_correlation_row(self.queue,
(h, w),
None,
intermed_buf.data,
input_buf.data,
np.int32(w), np.int32(h),
row_buf.data,
np.int32(r))
self.program.separable_correlation_col(self.queue,
(h, w),
None,
output_buf.data,
intermed_buf.data,
np.int32(w), np.int32(h),
col_buf.data,
np.int32(c))
示例4: genindices
def genindices(self, arrayin):
"""Generate indices for splitting array."""
retval = dict()
# run the 'trim' program
# need to split if it's too long!
splitlist = tuple([x for x in xrange(CLIDT.indexmaxsize, arrayin.shape[0], CLIDT.indexmaxsize)])
indexinc = 0
for chunk in np.vsplit(arrayin, splitlist):
chunkarr = cla.to_device(self.queue, np.asarray(chunk, dtype=np.int32))
template = cla.empty_like(chunkarr)
event = self.program.trim(
self.queue, chunkarr.shape, None, chunkarr.data, template.data, np.int32(self.split)
)
try:
event.wait()
except cl.RuntimeError, inst:
errstr = inst.__str__()
if errstr == "clWaitForEvents failed: out of resources":
print "OpenCL timed out, probably due to the display manager."
print "Disable your display manager and try again!"
print "If that does not work, rerun with OpenCL disabled."
else:
raise cl.RuntimeError, inst
sys.exit(1)
for index, elem in enumerate(template.get()):
splitkey = tuple([x for x in elem])
try:
retval[splitkey]
except KeyError:
retval[splitkey] = []
retval[splitkey].append(index + indexinc)
indexinc += CLIDT.indexmaxsize
示例5: square
def square():
if not slicer.util.getNode('moving'):
load_default_volume()
a = slicer.util.array('moving').flatten()
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags
a_dev = cl_array.to_device(queue, a)
dest_dev = cl_array.empty_like(a_dev)
prg = cl.Program(ctx, """
__kernel void square(__global const short *a, __global short *c)
{
int gid = get_global_id(0);
c[gid] = a[gid] * a[gid];
}
""").build()
prg.square(queue, a.shape, None, a_dev.data, dest_dev.data)
diff = ( dest_dev - (a_dev*a_dev) ).get()
norm = la.norm(diff)
print(norm)
示例6: test_elwise_kernel_with_options
def test_elwise_kernel_with_options(ctx_factory):
from pyopencl.clrandom import rand as clrand
from pyopencl.elementwise import ElementwiseKernel
context = ctx_factory()
queue = cl.CommandQueue(context)
in_gpu = clrand(queue, (50,), np.float32)
options = ['-D', 'ADD_ONE']
add_one = ElementwiseKernel(
context,
"float* out, const float *in",
"""
out[i] = in[i]
#ifdef ADD_ONE
+1
#endif
;
""",
options=options,
)
out_gpu = cl_array.empty_like(in_gpu)
add_one(out_gpu, in_gpu)
gt = in_gpu.get() + 1
gv = out_gpu.get()
assert la.norm(gv - gt) < 1e-5
示例7: setup_arrays
def setup_arrays(self, nrays, nsamples, cutoff):
prog_params = (nrays, nsamples, cutoff)
if prog_params in self.array_cache:
return self.array_cache[prog_params]
else:
arrays = ArraySet()
arrays.scratch = cla.empty(self.queue,
(nsamples, nrays),
dtype=np.float32,
allocator=self.memory_pool)
arrays.result = cla.empty(self.queue,
(nrays,),
dtype=np.int32,
allocator=self.memory_pool)
arrays.pre_cutoff = cla.empty(self.queue,
(nrays, cutoff),
dtype=np.float32,
allocator=self.memory_pool)
arrays.pre_cutoff_squared = cla.empty_like(arrays.pre_cutoff)
arrays.idx = cla.arange(self.queue, 0, cutoff * nrays, 1,
dtype=np.int32,
allocator=self.memory_pool)
self.array_cache[prog_params] = arrays
return arrays
示例8: __call__
def __call__(self, input_ary, output_ary=None, allocator=None,
stream=None):
allocator = allocator or input_ary.allocator
if output_ary is None:
output_ary = input_ary
if isinstance(output_ary, (str, unicode)) and output_ary == "new":
output_ary = cl_array.empty_like(input_ary, allocator=allocator)
if input_ary.shape != output_ary.shape:
raise ValueError("input and output must have the same shape")
if not input_ary.flags.forc:
raise RuntimeError("ScanKernel cannot "
"deal with non-contiguous arrays")
n, = input_ary.shape
if not n:
return output_ary
unit_size = self.scan_wg_size * self.scan_wg_seq_batches
dev = driver.Context.get_device()
max_groups = 3*dev.get_attribute(
driver.device_attribute.MULTIPROCESSOR_COUNT)
from pytools import uniform_interval_splitting
interval_size, num_groups = uniform_interval_splitting(
n, unit_size, max_groups);
block_results = allocator(self.dtype.itemsize*num_groups)
dummy_results = allocator(self.dtype.itemsize)
# first level scan of interval (one interval per block)
self.scan_intervals_knl.prepared_async_call(
(num_groups, 1), (self.scan_wg_size, 1, 1), stream,
input_ary.gpudata,
n, interval_size,
output_ary.gpudata,
block_results)
# second level inclusive scan of per-block results
self.scan_intervals_knl.prepared_async_call(
(1,1), (self.scan_wg_size, 1, 1), stream,
block_results,
num_groups, interval_size,
block_results,
dummy_results)
# update intervals with result of second level scan
self.final_update_knl.prepared_async_call(
(num_groups, 1,), (self.update_wg_size, 1, 1), stream,
output_ary.gpudata,
n, interval_size,
block_results)
return output_ary
示例9: setup_device
def setup_device(self, imshape):
print('Setting up with imshape = %s' % (str(imshape)))
self.cached_shape = imshape
self.clIm = cla.Array(self.q, imshape, np.float32)
self.clm = cla.empty_like(self.clIm)
self.clx = cla.empty_like(self.clIm)
self.cly = cla.empty_like(self.clIm)
self.clO = cla.zeros_like(self.clIm)
self.clM = cla.zeros_like(self.clIm)
self.clF = cla.empty_like(self.clIm)
self.clS = cla.empty_like(self.clIm)
self.clThisS = cla.empty_like(self.clIm)
self.clScratch = cla.empty_like(self.clIm)
self.radial_prg = pyopencl.Program(self.ctx, RADIAL_PROGRAM).build()
self.sobel = Sobel(self.ctx, self.q)
#self.sepcorr2d = NaiveSeparableCorrelation(self.ctx, self.q)
self.sepcorr2d = LocalMemorySeparableCorrelation(self.ctx, self.q)
self.accum = ElementwiseKernel(self.ctx,
'float *a, float *b',
'a[i] += b[i]')
self.norm_s = ElementwiseKernel(self.ctx,
'float *s, const float nRadii',
's[i] = -1 * s[i] / nRadii',
'norm_s')
self.accum_s = ElementwiseKernel(self.ctx,
'float *a, float *b, const float nr',
'a[i] -= b[i] / nr')
self.gaussians = {}
self.gaussian_prgs = {}
self.minmax = MinMaxKernel(self.ctx, self.q)
# starburst storage
clImageFormat = cl.ImageFormat(cl.channel_order.R,
cl.channel_type.FLOAT)
self.clIm2D = cl.Image(self.ctx,
mf.READ_ONLY,
clImageFormat,
imshape)
# Create sampler for sampling image object
self.imSampler = cl.Sampler(self.ctx,
False, # Non-normalized coordinates
cl.addressing_mode.CLAMP_TO_EDGE,
cl.filter_mode.LINEAR)
self.cl_find_ray_boundaries = FindRayBoundaries(self.ctx, self.q)
示例10: test
def test(self):
a = numpy.random.randn(4, 4).astype(numpy.float32)
b = numpy.random.randn(4, 4).astype(numpy.float32)
c = numpy.random.randn(4, 4).astype(numpy.float32)
a_gpu = cl_array.to_device(self.ctx, queue, a)
b_gpu = cl_array.to_device(self.ctx, queue, b)
c_gpu = cl_array.to_device(self.ctx, queue, c)
dest_gpu = cl_array.empty_like(a_gpu)
示例11: test_elwise_kernel
def test_elwise_kernel(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
from pyopencl.clrandom import rand as clrand
a_gpu = clrand(context, queue, (50,), numpy.float32)
b_gpu = clrand(context, queue, (50,), numpy.float32)
from pyopencl.elementwise import ElementwiseKernel
lin_comb = ElementwiseKernel(context,
"float a, float *x, float b, float *y, float *z",
"z[i] = a*x[i] + b*y[i]",
"linear_combination")
c_gpu = cl_array.empty_like(a_gpu)
lin_comb(5, a_gpu, 6, b_gpu, c_gpu)
assert la.norm((c_gpu - (5*a_gpu+6*b_gpu)).get()) < 1e-5
示例12: callback_post
def callback_post(self, context):
print("context:", context)
queue = cl.CommandQueue(context)
nd_data = np.array([[1, 2, 3, 4],
[5, 6, 5, 2]],
dtype=np.complex64)
nd_user_data = np.array([[2, 2, 2, 2],
[3, 4, 5, 6]],
dtype=np.float32)
cl_data = cla.to_device(queue, nd_data)
cl_user_data = cla.to_device(queue, nd_user_data)
cl_data_transformed = cla.empty_like(cl_data)
G = GpyFFT(debug=False)
plan = G.create_plan(context, cl_data.shape)
plan.strides_in = tuple(x // cl_data.dtype.itemsize for x in cl_data.strides)
plan.strides_out = tuple(x // cl_data.dtype.itemsize for x in cl_data_transformed.strides)
plan.inplace = False
plan.precision = CLFFT_SINGLE
plan.set_callback(b'postset',
self.callback_kernel_src_postset,
'post',
user_data=cl_user_data.data)
plan.bake(queue)
plan.enqueue_transform((queue,),
(cl_data.data,),
(cl_data_transformed.data,)
)
queue.finish()
print('cl_data_transformed:')
print(cl_data_transformed)
print('fft(nd_data) * nd_user_data')
print(np.fft.fftn(nd_data))
assert np.allclose(cl_data_transformed.get(),
np.fft.fftn(nd_data) * nd_user_data)
del plan
示例13: test_spirv
def test_spirv(ctx_factory):
ctx = ctx_factory()
queue = cl.CommandQueue(ctx)
if (ctx._get_cl_version() < (2, 1) or
cl.get_cl_header_version() < (2, 1)):
from pytest import skip
skip("SPIR-V program creation only available in OpenCL 2.1 and higher")
n = 50000
a_dev = cl.clrandom.rand(queue, n, np.float32)
b_dev = cl.clrandom.rand(queue, n, np.float32)
dest_dev = cl_array.empty_like(a_dev)
with open("add-vectors-%d.spv" % queue.device.address_bits, "rb") as spv_file:
spv = spv_file.read()
prg = cl.Program(ctx, spv)
prg.sum(queue, a_dev.shape, None, a_dev.data, b_dev.data, dest_dev.data)
assert la.norm((dest_dev - (a_dev+b_dev)).get()) < 1e-7
示例14: Arrays
# Use OpenCL To Add Two Random Arrays (Using PyOpenCL Arrays and Elementwise)
import pyopencl as cl # Import the OpenCL GPU computing API
import pyopencl.array as cl_array # Import PyOpenCL Array (a Numpy array plus an OpenCL buffer object)
import numpy # Import Numpy number tools
context = cl.create_some_context() # Initialize the Context
queue = cl.CommandQueue(context) # Instantiate a Queue
a = cl_array.to_device(queue, numpy.random.randn(10).astype(numpy.float32)) # Create a random pyopencl array
b = cl_array.to_device(queue, numpy.random.randn(10).astype(numpy.float32)) # Create a random pyopencl array
c = cl_array.empty_like(a) # Create an empty pyopencl destination array
sum = cl.elementwise.ElementwiseKernel(context, "float *a, float *b, float *c", "c[i] = a[i] + b[i]", "sum")
# Create an elementwise kernel object
# - Arguments: a string formatted as a C argument list
# - Operation: a snippet of C that carries out the desired map operatino
# - Name: the fuction name as which the kernel is compiled
sum(a, b, c) # Call the elementwise kernel
print("a: {}".format(a))
print("b: {}".format(b))
print("c: {}".format(c))
# Print all three arrays, to show sum() worked
示例15: get_val
# Kernel for reduce-code
krnlRed=ReductionKernel(ctx,numpy.float32,neutral="0",
reduce_expr="a+b",map_expr="get_val(x[i])*%10.3f" % length,
arguments="__global float *x",
preamble="""
float get_val(float x)
{
return x*x;
}
""")
# Generation of an array where each element is an evaluated integral.
tonum=1000000 # Number of elements.
# Array to send to the GPU.
p_gpu=cl_array.to_device(ctx,queue,sp.linspace(0,tonum,tonum+1).astype(numpy.float32))
res=cl_array.empty_like(p_gpu) # The resultating array
# Elementwise (mapping) kernel.
krnlMap=ElementwiseKernel(ctx,"float *param, float *res", "res[i]=integrate(param[i])",preamble="""
float integrate(float param)
{
float sum=0;
for (float f=0.0;f<10.0;f+=0.001)
{
sum+=(f*f-10*f-param);
}
return sum/1000.0;
}
""")
integrand=krnlRed(vals).get() # Calculate the first integral.
krnlMap(p_gpu,res) # Generate the large array.