本文整理匯總了Python中pyopencl.Buffer方法的典型用法代碼示例。如果您正苦於以下問題:Python pyopencl.Buffer方法的具體用法?Python pyopencl.Buffer怎麽用?Python pyopencl.Buffer使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pyopencl
的用法示例。
在下文中一共展示了pyopencl.Buffer方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: ready_argument_list
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import Buffer [as 別名]
def ready_argument_list(self, arguments):
"""ready argument list to be passed to the kernel, allocates gpu mem
:param arguments: List of arguments to be passed to the kernel.
The order should match the argument list on the OpenCL kernel.
Allowed values are numpy.ndarray, and/or numpy.int32, numpy.float32, and so on.
:type arguments: list(numpy objects)
:returns: A list of arguments that can be passed to an OpenCL kernel.
:rtype: list( pyopencl.Buffer, numpy.int32, ... )
"""
gpu_args = []
for arg in arguments:
# if arg i is a numpy array copy to device
if isinstance(arg, numpy.ndarray):
gpu_args.append(cl.Buffer(self.ctx, self.mf.READ_WRITE | self.mf.COPY_HOST_PTR, hostbuf=arg))
else: # if not an array, just pass argument along
gpu_args.append(arg)
return gpu_args
示例2: run_kernel
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import Buffer [as 別名]
def run_kernel(self, func, gpu_args, threads, grid):
"""runs the OpenCL kernel passed as 'func'
:param func: An OpenCL Kernel
:type func: pyopencl.Kernel
:param gpu_args: A list of arguments to the kernel, order should match the
order in the code. Allowed values are either variables in global memory
or single values passed by value.
:type gpu_args: list( pyopencl.Buffer, numpy.int32, ...)
:param threads: A tuple listing the number of work items in each dimension of
the work group.
:type threads: tuple(int, int, int)
:param grid: A tuple listing the number of work groups in each dimension
of the NDRange.
:type grid: tuple(int, int)
"""
global_size = (grid[0]*threads[0], grid[1]*threads[1], grid[2]*threads[2])
local_size = threads
event = func(self.queue, global_size, local_size, *gpu_args)
event.wait()
示例3: memset
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import Buffer [as 別名]
def memset(self, buffer, value, size):
"""set the memory in allocation to the value in value
:param allocation: An OpenCL Buffer to fill
:type allocation: pyopencl.Buffer
:param value: The value to set the memory to
:type value: a single 32-bit int
:param size: The size of to the allocation unit in bytes
:type size: int
"""
if isinstance(buffer, cl.Buffer):
try:
cl.enqueue_fill_buffer(self.queue, buffer, numpy.uint32(value), 0, size)
except AttributeError:
src=numpy.zeros(size, dtype='uint8')+numpy.uint8(value)
cl.enqueue_copy(self.queue, buffer, src)
示例4: test_ready_argument_list
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import Buffer [as 別名]
def test_ready_argument_list():
size = 1000
a = np.int32(75)
b = np.random.randn(size).astype(np.float32)
c = np.zeros_like(b)
arguments = [c, a, b]
dev = opencl.OpenCLFunctions(0)
gpu_args = dev.ready_argument_list(arguments)
assert isinstance(gpu_args[0], pyopencl.Buffer)
assert isinstance(gpu_args[1], np.int32)
assert isinstance(gpu_args[2], pyopencl.Buffer)
gpu_args[0].release()
gpu_args[2].release()
示例5: create_internal_buffer
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import Buffer [as 別名]
def create_internal_buffer(self, ctx):
cityxy = [(self.city_info[idx][0], self.city_info[idx][1]) for idx in range(len(self.city_info))]
self.__np_cityxy = numpy.array(cityxy, dtype=numpy.float32)
mf = cl.mem_flags
self.__dev_cityxy = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__np_cityxy)
self.__dev_cityxy = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__np_cityxy)
self.__np_iterations = numpy.int32(self.iterations)
self.__np_temperature = numpy.float32(self.temperature)
self.__np_terminate_temperature = numpy.float32(self.terminate_temperature)
self.__np_alpha = numpy.float32(self.alpha)
示例6: __init_cl_member
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import Buffer [as 別名]
def __init_cl_member(self):
self.__np_costs = self.sas.get_cost_buffer()
num_of_solution, self.__np_solution = self.sas.get_solution_info()
mf = cl.mem_flags
# Random number should be given by Host program because OpenCL doesn't have a random number
# generator. We just include one, Noise.cl.
rnum = [random.randint(0, 4294967295) for i in range(num_of_solution)]
## note: numpy.random.rand() gives us a list float32 and we cast it to uint32 at the calling
## of kernel function. It just views the original byte order as uint32.
self.__dev_rnum = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=numpy.array(rnum, dtype=numpy.uint32))
self.__dev_costs = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__np_costs)
self.__dev_solution = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__np_solution)
self.sas.create_internal_buffer(self.__ctx)
## To save the annealing state
示例7: __prepare_fitness_args
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import Buffer [as 別名]
def __prepare_fitness_args(self):
mf = cl.mem_flags
self.__fitness_args_list = [self.__dev_chromosomes, self.__dev_fitnesses]
self.__extra_fitness_args_list = []
if self.__fitness_args is not None:
## create buffers for fitness arguments
for arg in self.__fitness_args:
cl_buffer = cl.Buffer(self.__ctx,
mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=numpy.array(arg['v'],
dtype=self.__type_to_numpy_type(arg['t'])))
self.__extra_fitness_args_list.append(cl_buffer)
# concatenate two fitness args list
self.__fitness_args_list = self.__fitness_args_list + self.__extra_fitness_args_list
示例8: benchmark
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import Buffer [as 別名]
def benchmark(self, func, gpu_args, threads, grid):
"""runs the kernel and measures time repeatedly, returns average time
Runs the kernel and measures kernel execution time repeatedly, number of
iterations is set during the creation of OpenCLFunctions. Benchmark returns
a robust average, from all measurements the fastest and slowest runs are
discarded and the rest is included in the returned average. The reason for
this is to be robust against initialization artifacts and other exceptional
cases.
:param func: A PyOpenCL kernel compiled for this specific kernel configuration
:type func: pyopencl.Kernel
:param gpu_args: A list of arguments to the kernel, order should match the
order in the code. Allowed values are either variables in global memory
or single values passed by value.
:type gpu_args: list( pyopencl.Buffer, numpy.int32, ...)
:param threads: A tuple listing the number of work items in each dimension of
the work group.
:type threads: tuple(int, int, int)
:param grid: A tuple listing the number of work groups in each dimension
of the NDRange.
:type grid: tuple(int, int)
:returns: All benchmark results.
:rtype: dict()
"""
result = dict()
result["times"] = []
global_size = (grid[0]*threads[0], grid[1]*threads[1], grid[2]*threads[2])
local_size = threads
time = []
for _ in range(self.iterations):
event = func(self.queue, global_size, local_size, *gpu_args)
event.wait()
result["times"].append((event.profile.end - event.profile.start)*1e-6)
result["time"] = numpy.mean(result["times"])
return result
示例9: memcpy_dtoh
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import Buffer [as 別名]
def memcpy_dtoh(self, dest, src):
"""perform a device to host memory copy
:param dest: A numpy array in host memory to store the data
:type dest: numpy.ndarray
:param src: An OpenCL Buffer to copy data from
:type src: pyopencl.Buffer
"""
if isinstance(src, cl.Buffer):
cl.enqueue_copy(self.queue, dest, src)
示例10: change_params
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import Buffer [as 別名]
def change_params(self, cl_platform_index=None, cl_device_index=None, **kargs):
BasePeakDetector.change_params(self, **kargs)
OpenCL_Helper.initialize_opencl(self, cl_platform_index=cl_platform_index, cl_device_index=cl_device_index)
if self.chunksize > self.max_wg_size:
n = int(np.ceil(self.chunksize / self.max_wg_size))
self.global_size = (self.max_wg_size * n, )
self.local_size = (self.max_wg_size,)
else:
self.global_size = (self.chunksize, )
self.local_size = (self.chunksize, )
chunksize2=self.chunksize+2*self.n_span
self.sum_rectified = np.zeros((self.chunksize), dtype=self.dtype)
self.peak_mask = np.zeros((self.chunksize), dtype='uint8')
ring_sum = np.zeros((chunksize2), dtype=self.dtype)
#GPU buffers
self.sigs_cl = pyopencl.Buffer(self.ctx, mf.READ_WRITE, size=self.nb_channel*self.chunksize*self.dtype.itemsize)
self.ring_sum_cl = pyopencl.Buffer(self.ctx, mf.READ_WRITE| mf.COPY_HOST_PTR, hostbuf=ring_sum)
self.peak_mask_cl = pyopencl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.peak_mask)
kernel = self.kernel%dict(chunksize=self.chunksize, nb_channel=self.nb_channel, n_span=self.n_span,
relative_threshold=self.relative_threshold, peak_sign={'+':1, '-':-1}[self.peak_sign])
prg = pyopencl.Program(self.ctx, kernel)
self.opencl_prg = prg.build(options='-cl-mad-enable')
self.kern_detect_peaks = getattr(self.opencl_prg, 'detect_peaks')
示例11: _make_gpu_buffer
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import Buffer [as 別名]
def _make_gpu_buffer(self):
# TODO fifo size should be : chunksize+2*self.n_span
self.fifo_size = self.chunksize + 2*self.n_span
self.fifo_sigs = FifoBuffer((self.fifo_size, self.nb_channel), self.dtype)
self.mask_peaks = np.zeros((self.chunksize, self.nb_channel), dtype='uint8') # bool
#GPU buffers
self.fifo_sigs_cl = pyopencl.Buffer(self.ctx, mf.READ_WRITE| mf.COPY_HOST_PTR, hostbuf=self.fifo_sigs.buffer)
self.neighbours_cl = pyopencl.Buffer(self.ctx, mf.READ_WRITE| mf.COPY_HOST_PTR, hostbuf=self.neighbours)
self.mask_peaks_cl = pyopencl.Buffer(self.ctx, mf.READ_WRITE| mf.COPY_HOST_PTR, hostbuf=self.mask_peaks)
示例12: printfullinfo
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import Buffer [as 別名]
def printfullinfo(self):
print('\n' + '=' * 60 + '\nOpenCL Platforms and Devices')
i=0
for platform in cl.get_platforms():
print('=' * 60)
print('Platform %d - Name: ' %i + platform.name)
print('Platform %d - Vendor: ' %i + platform.vendor)
print('Platform %d - Version: ' %i + platform.version)
print('Platform %d - Profile: ' %i + platform.profile)
for device in platform.get_devices():
print(' ' + '-' * 56)
print(' Device - Name: ' \
+ device.name)
print(' Device - Type: ' \
+ cl.device_type.to_string(device.type))
print(' Device - Max Clock Speed: {0} Mhz' \
.format(device.max_clock_frequency))
print(' Device - Compute Units: {0}' \
.format(device.max_compute_units))
print(' Device - Local Memory: {0:.0f} KB' \
.format(device.local_mem_size / 1024.0))
print(' Device - Constant Memory: {0:.0f} KB' \
.format(device.max_constant_buffer_size / 1024.0))
print(' Device - Global Memory: {0:.0f} GB' \
.format(device.global_mem_size / 1073741824.0))
print(' Device - Max Buffer/Image Size: {0:.0f} MB' \
.format(device.max_mem_alloc_size / 1048576.0))
print(' Device - Max Work Group Size: {0:.0f}' \
.format(device.max_work_group_size))
print('\n')
i+=1
示例13: test_gpu_vector_sum
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import Buffer [as 別名]
def test_gpu_vector_sum(a, b):
#define the PyOpenCL Context
platform = cl.get_platforms()[0]
device = platform.get_devices()[0]
context = cl.Context([device])
queue = cl.CommandQueue(context, \
properties=cl.command_queue_properties.PROFILING_ENABLE)
#prepare the data structure
a_buffer = cl.Buffer\
(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=a)
b_buffer = cl.Buffer\
(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=b)
c_buffer = cl.Buffer\
(context, cl.mem_flags.WRITE_ONLY, b.nbytes)
program = cl.Program(context, """
__kernel void sum(__global const float *a, __global const float *b, __global float *c)
{
int i = get_global_id(0);
int j;
for(j = 0; j < 10000; j++)
{
c[i] = a[i] + b[i];
}
}""").build()
#start the gpu test
gpu_start_time = time()
event = program.sum(queue, a.shape, None, a_buffer, b_buffer, c_buffer)
event.wait()
elapsed = 1e-9*(event.profile.end - event.profile.start)
print("GPU Kernel evaluation Time: {0} s".format(elapsed))
c_gpu = np.empty_like(a)
cl._enqueue_read_buffer(queue, c_buffer, c_gpu).wait()
gpu_end_time = time()
print("GPU Time: {0} s".format(gpu_end_time - gpu_start_time))
return c_gpu
#start the test
開發者ID:PacktPublishing,項目名稱:Python-Parallel-Programming-Cookbook-Second-Edition,代碼行數:39,代碼來源:testApplicationPyopencl.py
示例14: create_bytearray
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import Buffer [as 別名]
def create_bytearray(ctx, size):
mf = cl.mem_flags
py_buffer = numpy.zeros(size, dtype=numpy.int32)
cl_buffer = cl.Buffer(ctx,
mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=py_buffer)
return py_buffer, cl_buffer
示例15: preexecute_kernels
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import Buffer [as 別名]
def preexecute_kernels(self, ctx, queue, population):
## initialize global variables for kernel execution
total_dna_size = population * self.dna_total_length
other_chromosomes = numpy.zeros(total_dna_size, dtype=numpy.int32)
cross_map = numpy.zeros(total_dna_size, dtype=numpy.int32)
ratios = numpy.zeros(population, dtype=numpy.float32)
mf = cl.mem_flags
self.__dev_ratios = cl.Buffer(ctx, mf.WRITE_ONLY, ratios.nbytes)
self.__dev_other_chromosomes = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=other_chromosomes)
self.__dev_cross_map = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=cross_map)