本文整理汇总了Golang中gocl/cl.CLReleaseContext函数的典型用法代码示例。如果您正苦于以下问题:Golang CLReleaseContext函数的具体用法?Golang CLReleaseContext怎么用?Golang CLReleaseContext使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了CLReleaseContext函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: TestContext
func TestContext(t *testing.T) {
/* Host/device data structures */
var platform [1]cl.CL_platform_id
var device [1]cl.CL_device_id
var context cl.CL_context
var err cl.CL_int
var paramValueSize cl.CL_size_t
var ref_count interface{}
user_data := []byte("Hello, I am callback")
/* Access the first installed platform */
err = cl.CLGetPlatformIDs(1, platform[:], nil)
if err != cl.CL_SUCCESS {
t.Errorf("Couldn't find any platforms")
}
/* Access the first available device */
err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_GPU, 1, device[:], nil)
if err == cl.CL_DEVICE_NOT_FOUND {
err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_CPU, 1, device[:], nil)
}
if err != cl.CL_SUCCESS {
t.Errorf("Couldn't find any devices")
}
/* Create the context */
context = cl.CLCreateContext(nil, 1, device[:], my_contex_notify, unsafe.Pointer(&user_data), &err)
if err != cl.CL_SUCCESS {
t.Errorf("Couldn't create a context")
}
/* Determine the reference count */
err = cl.CLGetContextInfo(context, cl.CL_CONTEXT_REFERENCE_COUNT,
0, nil, ¶mValueSize)
if err != cl.CL_SUCCESS {
t.Errorf("Failed to find context %s.\n", "CL_CONTEXT_REFERENCE_COUNT")
}
err = cl.CLGetContextInfo(context, cl.CL_CONTEXT_REFERENCE_COUNT,
paramValueSize, &ref_count, nil)
if err != cl.CL_SUCCESS {
t.Errorf("Couldn't read the reference count.")
}
t.Logf("Initial reference count: %d\n", ref_count.(cl.CL_uint))
/* Update and display the reference count */
cl.CLRetainContext(context)
cl.CLGetContextInfo(context, cl.CL_CONTEXT_REFERENCE_COUNT,
paramValueSize, &ref_count, nil)
t.Logf("Reference count: %d\n", ref_count.(cl.CL_uint))
cl.CLReleaseContext(context)
cl.CLGetContextInfo(context, cl.CL_CONTEXT_REFERENCE_COUNT,
paramValueSize, &ref_count, nil)
t.Logf("Reference count: %d\n", ref_count.(cl.CL_uint))
cl.CLReleaseContext(context)
}
示例2: main
//.........这里部分代码省略.........
// Associate the input and output buffers with the
// kernel
// using clSetKernelArg()
status = cl.CLSetKernelArg(kernel,
0,
cl.CL_size_t(unsafe.Sizeof(bufferA)),
unsafe.Pointer(&bufferA))
status |= cl.CLSetKernelArg(kernel,
1,
cl.CL_size_t(unsafe.Sizeof(bufferB)),
unsafe.Pointer(&bufferB))
status |= cl.CLSetKernelArg(kernel,
2,
cl.CL_size_t(unsafe.Sizeof(bufferC)),
unsafe.Pointer(&bufferC))
if status != cl.CL_SUCCESS {
println("CLSetKernelArg status!=cl.CL_SUCCESS")
return
}
//-----------------------------------------------------
// STEP 10: Configure the work-item structure
//-----------------------------------------------------
// Define an index space (global work size) of work
// items for
// execution. A workgroup size (local work size) is not
// required,
// but can be used.
var globalWorkSize [1]cl.CL_size_t
// There are 'elements' work-items
globalWorkSize[0] = elements
//-----------------------------------------------------
// STEP 11: Enqueue the kernel for execution
//-----------------------------------------------------
// Execute the kernel by using
// clEnqueueNDRangeKernel().
// 'globalWorkSize' is the 1D dimension of the
// work-items
status = cl.CLEnqueueNDRangeKernel(cmdQueue,
kernel,
1,
nil,
globalWorkSize[:],
nil,
0,
nil,
nil)
if status != cl.CL_SUCCESS {
println("CLEnqueueNDRangeKernel status!=cl.CL_SUCCESS")
return
}
//-----------------------------------------------------
// STEP 12: Read the output buffer back to the host
//-----------------------------------------------------
// Use clEnqueueReadBuffer() to read the OpenCL output
// buffer (bufferC)
// to the host output array (C)
cl.CLEnqueueReadBuffer(cmdQueue,
bufferC,
cl.CL_TRUE,
0,
datasize,
unsafe.Pointer(&C[0]),
0,
nil,
nil)
if status != cl.CL_SUCCESS {
println("CLEnqueueReadBuffer status!=cl.CL_SUCCESS")
return
}
// Verify the output
result := true
for i := cl.CL_int(0); i < cl.CL_int(elements); i++ {
if C[i] != i+i {
result = false
break
}
}
if result {
println("Output is correct\n")
} else {
println("Output is incorrect\n")
}
//-----------------------------------------------------
// STEP 13: Release OpenCL resources
//-----------------------------------------------------
// Free OpenCL resources
cl.CLReleaseKernel(kernel)
cl.CLReleaseProgram(program)
cl.CLReleaseCommandQueue(cmdQueue)
cl.CLReleaseMemObject(bufferA)
cl.CLReleaseMemObject(bufferB)
cl.CLReleaseMemObject(bufferC)
cl.CLReleaseContext(context)
}
示例3: main
func main() {
/* OpenCL data structures */
var device []cl.CL_device_id
var context cl.CL_context
var queue cl.CL_command_queue
var program *cl.CL_program
var kernel cl.CL_kernel
var err cl.CL_int
/* Data and buffers */
var select1 [4]float32
var select2 [2]cl.CL_uchar
var select1_buffer, select2_buffer cl.CL_mem
/* Create a context */
device = utils.Create_device()
context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
if err < 0 {
println("Couldn't create a context")
return
}
/* Create a kernel */
program = utils.Build_program(context, device[:], PROGRAM_FILE, nil)
kernel = cl.CLCreateKernel(*program, KERNEL_FUNC, &err)
if err < 0 {
println("Couldn't create a kernel")
return
}
/* Create a write-only buffer to hold the output data */
select1_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_WRITE_ONLY,
cl.CL_size_t(unsafe.Sizeof(select1)), nil, &err)
if err < 0 {
println("Couldn't create a buffer")
return
}
select2_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_WRITE_ONLY,
cl.CL_size_t(unsafe.Sizeof(select2)), nil, &err)
/* Create kernel argument */
err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(select1_buffer)), unsafe.Pointer(&select1_buffer))
if err < 0 {
println("Couldn't set a kernel argument")
return
}
cl.CLSetKernelArg(kernel, 1, cl.CL_size_t(unsafe.Sizeof(select2_buffer)), unsafe.Pointer(&select2_buffer))
/* Create a command queue */
queue = cl.CLCreateCommandQueue(context, device[0], 0, &err)
if err < 0 {
println("Couldn't create a command queue")
return
}
/* Enqueue kernel */
err = cl.CLEnqueueTask(queue, kernel, 0, nil, nil)
if err < 0 {
println("Couldn't enqueue the kernel")
return
}
/* Read and print the result */
err = cl.CLEnqueueReadBuffer(queue, select1_buffer, cl.CL_TRUE, 0,
cl.CL_size_t(unsafe.Sizeof(select1)), unsafe.Pointer(&select1), 0, nil, nil)
if err < 0 {
println("Couldn't read the buffer")
return
}
cl.CLEnqueueReadBuffer(queue, select2_buffer, cl.CL_TRUE, 0,
cl.CL_size_t(unsafe.Sizeof(select2)), unsafe.Pointer(&select2), 0, nil, nil)
fmt.Printf("select: ")
for i := 0; i < 3; i++ {
fmt.Printf("%.2f, ", select1[i])
}
fmt.Printf("%.2f\n", select1[3])
fmt.Printf("bitselect: %X, %X\n", select2[0], select2[1])
/* Deallocate resources */
cl.CLReleaseMemObject(select1_buffer)
cl.CLReleaseMemObject(select2_buffer)
cl.CLReleaseKernel(kernel)
cl.CLReleaseCommandQueue(queue)
cl.CLReleaseProgram(*program)
cl.CLReleaseContext(context)
}
示例4: main
//.........这里部分代码省略.........
var err cl.CL_int
var err1 error
var global_size [2]cl.CL_size_t
/* Image data */
var pixels []uint16
var png_format cl.CL_image_format
var input_image, output_image cl.CL_mem
var origin, region [3]cl.CL_size_t
var width, height cl.CL_size_t
/* Open input file and read image data */
pixels, width, height, err1 = utils.Read_image_data(INPUT_FILE)
if err1 != nil {
return
} else {
fmt.Printf("width=%d, height=%d", width, height)
}
/* Create a device and context */
device = utils.Create_device()
context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
if err < 0 {
println("Couldn't create a context")
return
}
/* Build the program and create a kernel */
program = utils.Build_program(context, device[:], PROGRAM_FILE, nil)
kernel = cl.CLCreateKernel(*program, KERNEL_FUNC, &err)
if err < 0 {
fmt.Printf("Couldn't create a kernel: %d", err)
return
}
/* Create image object */
png_format.Image_channel_order = cl.CL_LUMINANCE
png_format.Image_channel_data_type = cl.CL_UNORM_INT16
input_image = cl.CLCreateImage2D(context,
cl.CL_MEM_READ_ONLY|cl.CL_MEM_COPY_HOST_PTR,
&png_format, width, height, 0, unsafe.Pointer(&pixels[0]), &err)
output_image = cl.CLCreateImage2D(context,
cl.CL_MEM_WRITE_ONLY, &png_format, width, height, 0, nil, &err)
if err < 0 {
println("Couldn't create the image object")
return
}
/* Create kernel arguments */
err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(input_image)), unsafe.Pointer(&input_image))
err |= cl.CLSetKernelArg(kernel, 1, cl.CL_size_t(unsafe.Sizeof(output_image)), unsafe.Pointer(&output_image))
if err < 0 {
println("Couldn't set a kernel argument")
return
}
/* Create a command queue */
queue = cl.CLCreateCommandQueue(context, device[0], 0, &err)
if err < 0 {
println("Couldn't create a command queue")
return
}
/* Enqueue kernel */
global_size[0] = width
global_size[1] = height
err = cl.CLEnqueueNDRangeKernel(queue, kernel, 2, nil, global_size[:],
nil, 0, nil, nil)
if err < 0 {
println("Couldn't enqueue the kernel")
return
}
/* Read the image object */
origin[0] = 0
origin[1] = 0
origin[2] = 0
region[0] = width
region[1] = height
region[2] = 1
err = cl.CLEnqueueReadImage(queue, output_image, cl.CL_TRUE, origin,
region, 0, 0, unsafe.Pointer(&pixels[0]), 0, nil, nil)
if err < 0 {
println("Couldn't read from the image object")
return
}
/* Create output PNG file and write data */
utils.Write_image_data(OUTPUT_FILE, pixels, width, height)
/* Deallocate resources */
cl.CLReleaseMemObject(input_image)
cl.CLReleaseMemObject(output_image)
cl.CLReleaseKernel(kernel)
cl.CLReleaseCommandQueue(queue)
cl.CLReleaseProgram(*program)
cl.CLReleaseContext(context)
}
示例5: main
func main() {
/* OpenCL data structures */
var device []cl.CL_device_id
var context cl.CL_context
var queue cl.CL_command_queue
var program *cl.CL_program
var kernel cl.CL_kernel
var err cl.CL_int
/* Data and buffers */
var r_coords = [4]float32{2, 1, 3, 4}
var angles = [4]float32{3 * M_PI / 8, 3 * M_PI / 4, 4 * M_PI / 3, 11 * M_PI / 6}
var x_coords, y_coords [4]float32
var r_coords_buffer, angles_buffer,
x_coords_buffer, y_coords_buffer cl.CL_mem
/* Create a device and context */
device = utils.Create_device()
context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
if err < 0 {
println("Couldn't create a context")
return
}
/* Create a kernel */
program = utils.Build_program(context, device[:], PROGRAM_FILE, nil)
kernel = cl.CLCreateKernel(*program, KERNEL_FUNC, &err)
if err < 0 {
println("Couldn't create a kernel")
return
}
/* Create a write-only buffer to hold the output data */
r_coords_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_READ_ONLY|cl.CL_MEM_COPY_HOST_PTR,
cl.CL_size_t(unsafe.Sizeof(r_coords)), unsafe.Pointer(&r_coords[0]), &err)
if err < 0 {
println("Couldn't create a buffer")
return
}
angles_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_READ_ONLY|cl.CL_MEM_COPY_HOST_PTR,
cl.CL_size_t(unsafe.Sizeof(angles)), unsafe.Pointer(&angles[0]), &err)
x_coords_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_READ_WRITE,
cl.CL_size_t(unsafe.Sizeof(x_coords)), nil, &err)
y_coords_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_READ_WRITE,
cl.CL_size_t(unsafe.Sizeof(y_coords)), nil, &err)
/* Create kernel argument */
err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(r_coords_buffer)), unsafe.Pointer(&r_coords_buffer))
if err < 0 {
println("Couldn't set a kernel argument")
return
}
cl.CLSetKernelArg(kernel, 1, cl.CL_size_t(unsafe.Sizeof(angles_buffer)), unsafe.Pointer(&angles_buffer))
cl.CLSetKernelArg(kernel, 2, cl.CL_size_t(unsafe.Sizeof(x_coords_buffer)), unsafe.Pointer(&x_coords_buffer))
cl.CLSetKernelArg(kernel, 3, cl.CL_size_t(unsafe.Sizeof(y_coords_buffer)), unsafe.Pointer(&y_coords_buffer))
/* Create a command queue */
queue = cl.CLCreateCommandQueue(context, device[0], 0, &err)
if err < 0 {
println("Couldn't create a command queue")
return
}
/* Enqueue kernel */
err = cl.CLEnqueueTask(queue, kernel, 0, nil, nil)
if err < 0 {
println("Couldn't enqueue the kernel")
return
}
/* Read and print the result */
err = cl.CLEnqueueReadBuffer(queue, x_coords_buffer, cl.CL_TRUE, 0,
cl.CL_size_t(unsafe.Sizeof(x_coords)), unsafe.Pointer(&x_coords), 0, nil, nil)
if err < 0 {
println("Couldn't read the buffer")
return
}
cl.CLEnqueueReadBuffer(queue, y_coords_buffer, cl.CL_TRUE, 0,
cl.CL_size_t(unsafe.Sizeof(y_coords)), unsafe.Pointer(&y_coords), 0, nil, nil)
/* Display the results */
for i := 0; i < 4; i++ {
fmt.Printf("(%6.3f, %6.3f)\n", x_coords[i], y_coords[i])
}
/* Deallocate resources */
cl.CLReleaseMemObject(r_coords_buffer)
cl.CLReleaseMemObject(angles_buffer)
cl.CLReleaseMemObject(x_coords_buffer)
cl.CLReleaseMemObject(y_coords_buffer)
cl.CLReleaseKernel(kernel)
cl.CLReleaseCommandQueue(queue)
cl.CLReleaseProgram(*program)
cl.CLReleaseContext(context)
}
示例6: main
func main() {
// Use this to check the output of each API call
var status cl.CL_int
//-----------------------------------------------------
// STEP 1: Discover and initialize the platforms
//-----------------------------------------------------
var numPlatforms cl.CL_uint
var platforms []cl.CL_platform_id
// Use clGetPlatformIDs() to retrieve the number of
// platforms
status = cl.CLGetPlatformIDs(0, nil, &numPlatforms)
// Allocate enough space for each platform
platforms = make([]cl.CL_platform_id, numPlatforms)
// Fill in platforms with clGetPlatformIDs()
status = cl.CLGetPlatformIDs(numPlatforms, platforms, nil)
utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetPlatformIDs")
//-----------------------------------------------------
// STEP 2: Discover and initialize the GPU devices
//-----------------------------------------------------
var numDevices cl.CL_uint
var devices []cl.CL_device_id
// Use clGetDeviceIDs() to retrieve the number of
// devices present
status = cl.CLGetDeviceIDs(platforms[0],
cl.CL_DEVICE_TYPE_GPU,
0,
nil,
&numDevices)
utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")
// Allocate enough space for each device
devices = make([]cl.CL_device_id, numDevices)
// Fill in devices with clGetDeviceIDs()
status = cl.CLGetDeviceIDs(platforms[0],
cl.CL_DEVICE_TYPE_GPU,
numDevices,
devices,
nil)
utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")
//-----------------------------------------------------
// STEP 3: Create a context
//-----------------------------------------------------
var context cl.CL_context
// Create a context using clCreateContext() and
// associate it with the devices
context = cl.CLCreateContext(nil,
numDevices,
devices,
nil,
nil,
&status)
utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateContext")
defer cl.CLReleaseContext(context)
//-----------------------------------------------------
// STEP 4: Create a command queue
//-----------------------------------------------------
var commandQueue [MAX_COMMAND_QUEUE]cl.CL_command_queue
// Create a command queue using clCreateCommandQueueWithProperties(),
// and associate it with the device you want to execute
for i := 0; i < MAX_COMMAND_QUEUE; i++ {
commandQueue[i] = cl.CLCreateCommandQueueWithProperties(context,
devices[0],
nil,
&status)
utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateCommandQueueWithProperties")
defer cl.CLReleaseCommandQueue(commandQueue[i])
}
//-----------------------------------------------------
// STEP 5: Create device buffers
//-----------------------------------------------------
producerGroupSize := cl.CL_size_t(PRODUCER_GROUP_SIZE)
producerGlobalSize := cl.CL_size_t(PRODUCER_GLOBAL_SIZE)
consumerGroupSize := cl.CL_size_t(CONSUMER_GROUP_SIZE)
consumerGlobalSize := cl.CL_size_t(CONSUMER_GLOBAL_SIZE)
var samplePipePkt [2]cl.CL_float
szPipe := cl.CL_uint(PIPE_SIZE)
szPipePkt := cl.CL_uint(unsafe.Sizeof(samplePipePkt))
if szPipe%PRNG_CHANNELS != 0 {
szPipe = (szPipe/PRNG_CHANNELS)*PRNG_CHANNELS + PRNG_CHANNELS
}
consumerGlobalSize = cl.CL_size_t(szPipe)
pipePktPerThread := cl.CL_int(szPipe) / PRNG_CHANNELS
seed := cl.CL_int(SEED)
rngType := cl.CL_int(RV_GAUSSIAN)
var histMin cl.CL_float
var histMax cl.CL_float
//.........这里部分代码省略.........
示例7: main
func main() {
// Use this to check the output of each API call
var status cl.CL_int
//-----------------------------------------------------
// STEP 1: Discover and initialize the platforms
//-----------------------------------------------------
var numPlatforms cl.CL_uint
var platforms []cl.CL_platform_id
// Use clGetPlatformIDs() to retrieve the number of
// platforms
status = cl.CLGetPlatformIDs(0, nil, &numPlatforms)
// Allocate enough space for each platform
platforms = make([]cl.CL_platform_id, numPlatforms)
// Fill in platforms with clGetPlatformIDs()
status = cl.CLGetPlatformIDs(numPlatforms, platforms, nil)
utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetPlatformIDs")
//-----------------------------------------------------
// STEP 2: Discover and initialize the GPU devices
//-----------------------------------------------------
var numDevices cl.CL_uint
var devices []cl.CL_device_id
// Use clGetDeviceIDs() to retrieve the number of
// devices present
status = cl.CLGetDeviceIDs(platforms[0],
cl.CL_DEVICE_TYPE_GPU,
0,
nil,
&numDevices)
utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")
// Allocate enough space for each device
devices = make([]cl.CL_device_id, numDevices)
// Fill in devices with clGetDeviceIDs()
status = cl.CLGetDeviceIDs(platforms[0],
cl.CL_DEVICE_TYPE_GPU,
numDevices,
devices,
nil)
utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")
//-----------------------------------------------------
// STEP 3: Create a context
//-----------------------------------------------------
var context cl.CL_context
// Create a context using clCreateContext() and
// associate it with the devices
context = cl.CLCreateContext(nil,
numDevices,
devices,
nil,
nil,
&status)
utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateContext")
defer cl.CLReleaseContext(context)
//-----------------------------------------------------
// STEP 4: Create a command queue
//-----------------------------------------------------
var cmdQueue cl.CL_command_queue
// Create a command queue using clCreateCommandQueueWithProperties(),
// and associate it with the device you want to execute
cmdQueue = cl.CLCreateCommandQueueWithProperties(context,
devices[0],
nil,
&status)
utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateCommandQueueWithProperties")
defer cl.CLReleaseCommandQueue(cmdQueue)
//-----------------------------------------------------
// STEP 5: Create device buffers
//-----------------------------------------------------
// initialize any device/SVM memory here.
/* svm buffer for binary tree */
svmTreeBuf := cl.CLSVMAlloc(context,
cl.CL_MEM_READ_WRITE,
cl.CL_size_t(NUMBER_OF_NODES*unsafe.Sizeof(sampleNode)),
0)
if nil == svmTreeBuf {
println("clSVMAlloc(svmTreeBuf) failed.")
return
}
defer cl.CLSVMFree(context, svmTreeBuf)
/* svm buffer for search keys */
svmSearchBuf := cl.CLSVMAlloc(context,
cl.CL_MEM_READ_WRITE,
cl.CL_size_t(NUMBER_OF_SEARCH_KEY*unsafe.Sizeof(sampleKey)),
0)
if nil == svmSearchBuf {
println("clSVMAlloc(svmSearchBuf) failed.")
//.........这里部分代码省略.........
示例8: main
func main() {
// Use this to check the output of each API call
var status cl.CL_int
//-----------------------------------------------------
// STEP 1: Discover and initialize the platforms
//-----------------------------------------------------
var numPlatforms cl.CL_uint
// Use clGetPlatformIDs() to retrieve the number of
// platforms
status = cl.CLGetPlatformIDs(0, nil, &numPlatforms)
utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetPlatformIDs")
// Allocate enough space for each platform
platforms := make([]cl.CL_platform_id, numPlatforms)
// Fill in platforms with clGetPlatformIDs()
status = cl.CLGetPlatformIDs(numPlatforms, platforms, nil)
utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetPlatformIDs")
//-----------------------------------------------------
// STEP 2: Discover and initialize the GPU devices
//-----------------------------------------------------
var numDevices cl.CL_uint
// Use clGetDeviceIDs() to retrieve the number of
// devices present
status = cl.CLGetDeviceIDs(platforms[0],
cl.CL_DEVICE_TYPE_GPU,
0,
nil,
&numDevices)
utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")
// Allocate enough space for each device
devices := make([]cl.CL_device_id, numDevices)
// Fill in devices with clGetDeviceIDs()
status = cl.CLGetDeviceIDs(platforms[0],
cl.CL_DEVICE_TYPE_GPU,
numDevices,
devices,
nil)
utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")
var caps cl.CL_device_svm_capabilities
var caps_value interface{}
status = cl.CLGetDeviceInfo(
devices[0],
cl.CL_DEVICE_SVM_CAPABILITIES,
cl.CL_size_t(unsafe.Sizeof(caps)),
&caps_value,
nil)
caps = caps_value.(cl.CL_device_svm_capabilities)
// Coarse-grained buffer SVM should be available on any OpenCL 2.0 device.
// So it is either not an OpenCL 2.0 device or it must support coarse-grained buffer SVM:
if !(status == cl.CL_SUCCESS && (caps&cl.CL_DEVICE_SVM_FINE_GRAIN_BUFFER) != 0) {
fmt.Printf("Cannot detect fine-grained buffer SVM capabilities on the device. The device seemingly doesn't support fine-grained buffer SVM. caps=%x\n", caps)
println("")
return
}
//-----------------------------------------------------
// STEP 3: Create a context
//-----------------------------------------------------
// Create a context using clCreateContext() and
// associate it with the devices
context := cl.CLCreateContext(nil,
numDevices,
devices,
nil,
nil,
&status)
utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateContext")
defer cl.CLReleaseContext(context)
//-----------------------------------------------------
// STEP 4: Create a command queue
//-----------------------------------------------------
// Create a command queue using clCreateCommandQueueWithProperties(),
// and associate it with the device you want to execute
queue := cl.CLCreateCommandQueueWithProperties(context,
devices[0],
nil,
&status)
utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateCommandQueueWithProperties")
defer cl.CLReleaseCommandQueue(queue)
//-----------------------------------------------------
// STEP 5: Create and compile the program
//-----------------------------------------------------
programSource, programeSize := utils.Load_programsource("svmfg.cl")
// Create a program using clCreateProgramWithSource()
program := cl.CLCreateProgramWithSource(context,
1,
programSource[:],
//.........这里部分代码省略.........
示例9: main
//.........这里部分代码省略.........
/* Extension data */
var sizeofuint cl.CL_uint
var addr_data interface{}
var ext_data interface{}
fp64_ext := "cl_khr_fp64"
var ext_size cl.CL_size_t
var options []byte
/* Create a device and context */
device = utils.Create_device()
context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
if err < 0 {
println("Couldn't create a context")
return
}
/* Obtain the device data */
if cl.CLGetDeviceInfo(device[0], cl.CL_DEVICE_ADDRESS_BITS,
cl.CL_size_t(unsafe.Sizeof(sizeofuint)), &addr_data, nil) < 0 {
println("Couldn't read extension data")
return
}
fmt.Printf("Address width: %v\n", addr_data.(cl.CL_uint))
/* Define "FP_64" option if doubles are supported */
cl.CLGetDeviceInfo(device[0], cl.CL_DEVICE_EXTENSIONS,
0, nil, &ext_size)
// ext_data = (char*)malloc(ext_size + 1);
// ext_data[ext_size] = '\0';
cl.CLGetDeviceInfo(device[0], cl.CL_DEVICE_EXTENSIONS,
ext_size, &ext_data, nil)
if strings.Contains(ext_data.(string), fp64_ext) {
fmt.Printf("The %s extension is supported.\n", fp64_ext)
options = []byte("-DFP_64 ")
} else {
fmt.Printf("The %s extension is not supported. %s\n", fp64_ext, ext_data.(string))
}
/* Build the program and create the kernel */
program = utils.Build_program(context, device[:], PROGRAM_FILE, options)
kernel = cl.CLCreateKernel(*program, KERNEL_FUNC, &err)
if err < 0 {
println("Couldn't create a kernel")
return
}
/* Create CL buffers to hold input and output data */
a_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_READ_ONLY|
cl.CL_MEM_COPY_HOST_PTR, cl.CL_size_t(unsafe.Sizeof(a)), unsafe.Pointer(&a), &err)
if err < 0 {
println("Couldn't create a memory object")
return
}
b_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_READ_ONLY|
cl.CL_MEM_COPY_HOST_PTR, cl.CL_size_t(unsafe.Sizeof(b)), unsafe.Pointer(&b), nil)
output_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_WRITE_ONLY,
cl.CL_size_t(unsafe.Sizeof(b)), nil, nil)
/* Create kernel arguments */
err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(a_buffer)), unsafe.Pointer(&a_buffer))
if err < 0 {
println("Couldn't set a kernel argument")
return
}
cl.CLSetKernelArg(kernel, 1, cl.CL_size_t(unsafe.Sizeof(b_buffer)), unsafe.Pointer(&b_buffer))
cl.CLSetKernelArg(kernel, 2, cl.CL_size_t(unsafe.Sizeof(output_buffer)), unsafe.Pointer(&output_buffer))
/* Create a command queue */
queue = cl.CLCreateCommandQueue(context, device[0], 0, &err)
if err < 0 {
println("Couldn't create a command queue")
return
}
/* Enqueue kernel */
err = cl.CLEnqueueTask(queue, kernel, 0, nil, nil)
if err < 0 {
println("Couldn't enqueue the kernel")
return
}
/* Read and print the result */
err = cl.CLEnqueueReadBuffer(queue, output_buffer, cl.CL_TRUE, 0,
cl.CL_size_t(unsafe.Sizeof(result)), unsafe.Pointer(&result), 0, nil, nil)
if err < 0 {
println("Couldn't read the output buffer")
return
}
fmt.Printf("The kernel result is %f\n", result)
/* Deallocate resources */
cl.CLReleaseMemObject(a_buffer)
cl.CLReleaseMemObject(b_buffer)
cl.CLReleaseMemObject(output_buffer)
cl.CLReleaseKernel(kernel)
cl.CLReleaseCommandQueue(queue)
cl.CLReleaseProgram(*program)
cl.CLReleaseContext(context)
}
示例10: main
//.........这里部分代码省略.........
/* Initialize arrays */
for i := 0; i < 100; i++ {
data_one[i] = 1.0 * float32(i)
data_two[i] = -1.0 * float32(i)
result_array[i] = 0.0
}
/* Create a device and context */
device = utils.Create_device()
context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
if err < 0 {
println("Couldn't create a context")
return
}
/* Build the program and create the kernel */
program = utils.Build_program(context, device[:], PROGRAM_FILE, nil)
kernel = cl.CLCreateKernel(*program, []byte(KERNEL_FUNC), &err)
if err < 0 {
println("Couldn't create a kernel")
return
}
/* Create buffers */
buffer_one = cl.CLCreateBuffer(context, cl.CL_MEM_READ_WRITE|
cl.CL_MEM_COPY_HOST_PTR, cl.CL_size_t(unsafe.Sizeof(data_one)), unsafe.Pointer(&data_one[0]), &err)
if err < 0 {
println("Couldn't create buffer object 1")
return
}
buffer_two = cl.CLCreateBuffer(context, cl.CL_MEM_READ_WRITE|
cl.CL_MEM_COPY_HOST_PTR, cl.CL_size_t(unsafe.Sizeof(data_two)), unsafe.Pointer(&data_two), &err)
if err < 0 {
println("Couldn't create buffer object 2")
return
}
/* Set buffers as arguments to the kernel */
err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(buffer_one)), unsafe.Pointer(&buffer_one))
err |= cl.CLSetKernelArg(kernel, 1, cl.CL_size_t(unsafe.Sizeof(buffer_two)), unsafe.Pointer(&buffer_two))
if err < 0 {
println("Couldn't set the buffer as the kernel argument")
return
}
/* Create a command queue */
queue = cl.CLCreateCommandQueue(context, device[0], 0, &err)
if err < 0 {
println("Couldn't create a command queue")
return
}
/* Enqueue kernel */
err = cl.CLEnqueueTask(queue, kernel, 0, nil, nil)
if err < 0 {
println("Couldn't enqueue the kernel")
return
}
/* Enqueue command to copy buffer one to buffer two */
err = cl.CLEnqueueCopyBuffer(queue, buffer_one, buffer_two, 0, 0,
cl.CL_size_t(unsafe.Sizeof(data_one)), 0, nil, nil)
if err < 0 {
println("Couldn't perform the buffer copy")
return
}
/* Enqueue command to map buffer two to host memory */
mapped_memory = cl.CLEnqueueMapBuffer(queue, buffer_two, cl.CL_TRUE,
cl.CL_MAP_READ, 0, cl.CL_size_t(unsafe.Sizeof(data_two)), 0, nil, nil, &err)
if err < 0 {
println("Couldn't map the buffer to host memory")
return
}
/* Transfer memory and unmap the buffer */
C.memcpy(unsafe.Pointer(&result_array[0]), mapped_memory, C.size_t(unsafe.Sizeof(data_two)))
err = cl.CLEnqueueUnmapMemObject(queue, buffer_two, mapped_memory,
0, nil, nil)
if err < 0 {
println("Couldn't unmap the buffer")
return
}
/* Display updated buffer */
for i := 0; i < 10; i++ {
for j := 0; j < 10; j++ {
fmt.Printf("%6.1f", result_array[j+i*10])
}
fmt.Printf("\n")
}
/* Deallocate resources */
cl.CLReleaseMemObject(buffer_one)
cl.CLReleaseMemObject(buffer_two)
cl.CLReleaseKernel(kernel)
cl.CLReleaseCommandQueue(queue)
cl.CLReleaseProgram(*program)
cl.CLReleaseContext(context)
}
示例11: main
func main() {
/* OpenCL data structures */
var device []cl.CL_device_id
var context cl.CL_context
var queue cl.CL_command_queue
var program *cl.CL_program
var kernel cl.CL_kernel
var err cl.CL_int
/* Data and events */
var num_ints cl.CL_int
var num_items [1]cl.CL_size_t
var data [NUM_INTS]cl.CL_int
var data_buffer cl.CL_mem
var prof_event cl.CL_event
var total_time cl.CL_ulong
var time_start, time_end interface{}
/* Initialize data */
for i := 0; i < NUM_INTS; i++ {
data[i] = cl.CL_int(i)
}
/* Set number of data points and work-items */
num_ints = NUM_INTS
num_items[0] = NUM_ITEMS
/* Create a device and context */
device = utils.Create_device()
context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
if err < 0 {
println("Couldn't create a context")
return
}
/* Build the program and create a kernel */
program = utils.Build_program(context, device[:], PROGRAM_FILE, nil)
kernel = cl.CLCreateKernel(*program, KERNEL_FUNC, &err)
if err < 0 {
println("Couldn't create a kernel")
return
}
/* Create a buffer to hold data */
data_buffer = cl.CLCreateBuffer(context,
cl.CL_MEM_READ_WRITE|cl.CL_MEM_COPY_HOST_PTR,
cl.CL_size_t(unsafe.Sizeof(data[0]))*NUM_INTS, unsafe.Pointer(&data[0]), &err)
if err < 0 {
println("Couldn't create a buffer")
return
}
/* Create kernel argument */
err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(data_buffer)), unsafe.Pointer(&data_buffer))
if err < 0 {
println("Couldn't set a kernel argument")
return
}
cl.CLSetKernelArg(kernel, 1, cl.CL_size_t(unsafe.Sizeof(num_ints)), unsafe.Pointer(&num_ints))
/* Create a command queue */
queue = cl.CLCreateCommandQueue(context, device[0],
cl.CL_QUEUE_PROFILING_ENABLE, &err)
if err < 0 {
println("Couldn't create a command queue")
return
}
total_time = 0.0
for i := 0; i < NUM_ITERATIONS; i++ {
/* Enqueue kernel */
cl.CLEnqueueNDRangeKernel(queue, kernel, 1, nil, num_items[:],
nil, 0, nil, &prof_event)
if err < 0 {
println("Couldn't enqueue the kernel")
return
}
/* Finish processing the queue and get profiling information */
cl.CLFinish(queue)
cl.CLGetEventProfilingInfo(prof_event, cl.CL_PROFILING_COMMAND_START,
cl.CL_size_t(unsafe.Sizeof(total_time)), &time_start, nil)
cl.CLGetEventProfilingInfo(prof_event, cl.CL_PROFILING_COMMAND_END,
cl.CL_size_t(unsafe.Sizeof(total_time)), &time_end, nil)
total_time += time_end.(cl.CL_ulong) - time_start.(cl.CL_ulong)
}
fmt.Printf("Average time = %v\n", total_time/NUM_ITERATIONS)
/* Deallocate resources */
cl.CLReleaseEvent(prof_event)
cl.CLReleaseKernel(kernel)
cl.CLReleaseMemObject(data_buffer)
cl.CLReleaseCommandQueue(queue)
cl.CLReleaseProgram(*program)
cl.CLReleaseContext(context)
}
示例12: main
func main() {
/* Host/device data structures */
var device []cl.CL_device_id
var context cl.CL_context
var err cl.CL_int
/* Data and buffers */
var main_data [100]float32
var main_buffer, sub_buffer cl.CL_mem
var main_buffer_mem, sub_buffer_mem interface{}
var main_buffer_size, sub_buffer_size interface{}
var buffer_size cl.CL_size_t
var buffer_mem cl.CL_ulong
var region cl.CL_buffer_region
/* Create device and context */
device = utils.Create_device()
context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
if err < 0 {
println("Couldn't create a context")
return
}
/* Create a buffer to hold 100 floating-point values */
main_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_READ_ONLY|
cl.CL_MEM_COPY_HOST_PTR, cl.CL_size_t(unsafe.Sizeof(main_data)), unsafe.Pointer(&main_data[0]), &err)
if err < 0 {
println("Couldn't create a buffer")
return
}
/* Create a sub-buffer containing values 30-49 */
region.Origin = 30 * cl.CL_size_t(unsafe.Sizeof(main_data[0]))
region.Size = 20 * cl.CL_size_t(unsafe.Sizeof(main_data[0]))
fmt.Printf("origin=%d, size=%d\n", region.Origin, region.Size)
sub_buffer = cl.CLCreateSubBuffer(main_buffer, cl.CL_MEM_READ_ONLY|
cl.CL_MEM_COPY_HOST_PTR, cl.CL_BUFFER_CREATE_TYPE_REGION, unsafe.Pointer(®ion), &err)
if err < 0 {
fmt.Printf("Couldn't create a sub-buffer, errcode=%d\n", err)
return
}
/* Obtain size information about the buffers */
cl.CLGetMemObjectInfo(main_buffer, cl.CL_MEM_SIZE,
cl.CL_size_t(unsafe.Sizeof(buffer_size)), &main_buffer_size, nil)
cl.CLGetMemObjectInfo(sub_buffer, cl.CL_MEM_SIZE,
cl.CL_size_t(unsafe.Sizeof(buffer_size)), &sub_buffer_size, nil)
fmt.Printf("Main buffer size: %v\n", main_buffer_size.(cl.CL_size_t))
fmt.Printf("Sub-buffer size: %v\n", sub_buffer_size.(cl.CL_size_t))
/* Obtain the host pointers */
cl.CLGetMemObjectInfo(main_buffer, cl.CL_MEM_HOST_PTR, cl.CL_size_t(unsafe.Sizeof(buffer_mem)),
&main_buffer_mem, nil)
cl.CLGetMemObjectInfo(sub_buffer, cl.CL_MEM_HOST_PTR, cl.CL_size_t(unsafe.Sizeof(buffer_mem)),
&sub_buffer_mem, nil)
fmt.Printf("Main buffer memory address: %v\n", main_buffer_mem.(cl.CL_ulong))
fmt.Printf("Sub-buffer memory address: %v\n", sub_buffer_mem.(cl.CL_ulong))
/* Print the address of the main data */
fmt.Printf("Main array address: %v\n", main_data)
/* Deallocate resources */
cl.CLReleaseMemObject(main_buffer)
cl.CLReleaseMemObject(sub_buffer)
cl.CLReleaseContext(context)
}
示例13: TestProgram
func TestProgram(t *testing.T) {
/* Host/device data structures */
var platform [1]cl.CL_platform_id
var device [1]cl.CL_device_id
var context cl.CL_context
var i, err cl.CL_int
/* Program data structures */
var program cl.CL_program
var program_buffer [NUM_FILES][]byte
var program_log interface{}
var file_name = []string{"bad.cl", "good.cl"}
options := "-cl-finite-math-only -cl-no-signed-zeros"
var program_size [NUM_FILES]cl.CL_size_t
var log_size cl.CL_size_t
/* Access the first installed platform */
err = cl.CLGetPlatformIDs(1, platform[:], nil)
if err < 0 {
t.Errorf("Couldn't find any platforms")
}
/* Access the first GPU/CPU */
err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_GPU, 1, device[:], nil)
if err == cl.CL_DEVICE_NOT_FOUND {
err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_CPU, 1, device[:], nil)
}
if err < 0 {
t.Errorf("Couldn't find any devices")
}
/* Create a context */
context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
if err < 0 {
t.Errorf("Couldn't create a context")
}
/* Read each program file and place content into buffer array */
for i = 0; i < NUM_FILES; i++ {
program_handle, err := os.Open(file_name[i])
if err != nil {
t.Errorf("Couldn't find the program file")
}
defer program_handle.Close()
fi, err2 := program_handle.Stat()
if err2 != nil {
t.Errorf("Couldn't find the program stat")
}
program_size[i] = cl.CL_size_t(fi.Size())
program_buffer[i] = make([]byte, program_size[i])
read_size, err3 := program_handle.Read(program_buffer[i])
if err3 != nil || cl.CL_size_t(read_size) != program_size[i] {
t.Errorf("read file error or file size wrong")
}
}
/* Create a program containing all program content */
program = cl.CLCreateProgramWithSource(context, NUM_FILES,
program_buffer[:], program_size[:], &err)
if err < 0 {
t.Errorf("Couldn't create the program")
}
/* Build program */
err = cl.CLBuildProgram(program, 1, device[:], []byte(options), nil, nil)
if err < 0 {
/* Find size of log and print to std output */
cl.CLGetProgramBuildInfo(program, device[0], cl.CL_PROGRAM_BUILD_LOG,
0, nil, &log_size)
//program_log = (char*) malloc(log_size+1);
//program_log[log_size] = '\0';
cl.CLGetProgramBuildInfo(program, device[0], cl.CL_PROGRAM_BUILD_LOG,
log_size, &program_log, nil)
t.Errorf("%s\n", program_log)
//free(program_log);
}
/* Deallocate resources */
//for(i=0; i<NUM_FILES; i++) {
// free(program_buffer[i]);
//}
cl.CLReleaseProgram(program)
cl.CLReleaseContext(context)
}
示例14: main
//.........这里部分代码省略.........
var program *cl.CL_program
var kernel cl.CL_kernel
var err cl.CL_int
/* Data and buffers */
var full_matrix, zero_matrix [80]float32
var sizeoffloat32 = cl.CL_size_t(unsafe.Sizeof(full_matrix[0]))
var buffer_origin = [3]cl.CL_size_t{5 * sizeoffloat32, 3, 0}
var host_origin = [3]cl.CL_size_t{1 * sizeoffloat32, 1, 0}
var region = [3]cl.CL_size_t{4 * sizeoffloat32, 4, 1}
var matrix_buffer cl.CL_mem
/* Initialize data */
for i := 0; i < 80; i++ {
full_matrix[i] = float32(i) * 1.0
zero_matrix[i] = 0.0
}
/* Create a device and context */
device = utils.Create_device()
context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
if err < 0 {
println("Couldn't create a context")
return
}
/* Build the program and create the kernel */
program = utils.Build_program(context, device[:], PROGRAM_FILE, nil)
if program == nil {
println("Couldn't build program")
return
}
kernel = cl.CLCreateKernel(*program, []byte(KERNEL_FUNC), &err)
if err < 0 {
println("Couldn't create a kernel")
return
}
/* Create a buffer to hold 80 floats */
matrix_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_READ_WRITE|
cl.CL_MEM_COPY_HOST_PTR, cl.CL_size_t(unsafe.Sizeof(full_matrix)), unsafe.Pointer(&full_matrix[0]), &err)
if err < 0 {
println("Couldn't create a buffer object")
return
}
/* Set buffer as argument to the kernel */
err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(matrix_buffer)), unsafe.Pointer(&matrix_buffer))
if err < 0 {
println("Couldn't set the buffer as the kernel argument")
return
}
/* Create a command queue */
queue = cl.CLCreateCommandQueue(context, device[0], 0, &err)
if err < 0 {
println("Couldn't create a command queue")
return
}
/* Enqueue kernel */
err = cl.CLEnqueueTask(queue, kernel, 0, nil, nil)
if err < 0 {
println("Couldn't enqueue the kernel")
return
}
/* Enqueue command to write to buffer */
err = cl.CLEnqueueWriteBuffer(queue, matrix_buffer, cl.CL_TRUE, 0,
cl.CL_size_t(unsafe.Sizeof(full_matrix)), unsafe.Pointer(&full_matrix[0]), 0, nil, nil)
if err < 0 {
println("Couldn't write to the buffer object")
return
}
/* Enqueue command to read rectangle of data */
err = cl.CLEnqueueReadBufferRect(queue, matrix_buffer, cl.CL_TRUE,
buffer_origin, host_origin, region, 10*sizeoffloat32, 0,
10*sizeoffloat32, 0, unsafe.Pointer(&zero_matrix[0]), 0, nil, nil)
if err < 0 {
println("Couldn't read the rectangle from the buffer object")
return
}
/* Display updated buffer */
for i := 0; i < 8; i++ {
for j := 0; j < 10; j++ {
fmt.Printf("%6.1f", zero_matrix[j+i*10])
}
fmt.Printf("\n")
}
/* Deallocate resources */
cl.CLReleaseMemObject(matrix_buffer)
cl.CLReleaseKernel(kernel)
cl.CLReleaseCommandQueue(queue)
cl.CLReleaseProgram(*program)
cl.CLReleaseContext(context)
}
示例15: main
//.........这里部分代码省略.........
if err < 0 {
println("Couldn't create a context")
return
}
/* Build the program and create a kernel */
program = utils.Build_program(context, device[:], PROGRAM_FILE, nil)
kernel = cl.CLCreateKernel(*program, KERNEL_FUNC, &err)
if err < 0 {
println("Couldn't create a kernel")
return
}
/* Create a buffer to hold data */
data_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_WRITE_ONLY,
cl.CL_size_t(unsafe.Sizeof(data[0]))*NUM_BYTES, nil, &err)
if err < 0 {
println("Couldn't create a buffer")
return
}
/* Create kernel argument */
err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(data_buffer)), unsafe.Pointer(&data_buffer))
if err < 0 {
println("Couldn't set a kernel argument")
return
}
/* Tell kernel number of char16 vectors */
num_vectors = NUM_BYTES / 16
cl.CLSetKernelArg(kernel, 1, cl.CL_size_t(unsafe.Sizeof(num_vectors)), unsafe.Pointer(&num_vectors))
/* Create a command queue */
queue = cl.CLCreateCommandQueue(context, device[0],
cl.CL_QUEUE_PROFILING_ENABLE, &err)
if err < 0 {
println("Couldn't create a command queue")
return
}
total_time = 0.0
for i := 0; i < NUM_ITERATIONS; i++ {
/* Enqueue kernel */
err = cl.CLEnqueueTask(queue, kernel, 0, nil, nil)
if err < 0 {
println("Couldn't enqueue the kernel")
return
}
if PROFILE_READ == 1 {
/* Read the buffer */
err = cl.CLEnqueueReadBuffer(queue, data_buffer, cl.CL_TRUE, 0,
cl.CL_size_t(unsafe.Sizeof(data[0]))*NUM_BYTES, unsafe.Pointer(&data[0]), 0, nil, &prof_event)
if err < 0 {
println("Couldn't read the buffer")
return
}
} else {
/* Create memory map */
mapped_memory = cl.CLEnqueueMapBuffer(queue, data_buffer, cl.CL_TRUE,
cl.CL_MAP_READ, 0, cl.CL_size_t(unsafe.Sizeof(data[0]))*NUM_BYTES, 0, nil, &prof_event, &err)
if err < 0 {
println("Couldn't map the buffer to host memory")
return
}
}
/* Get profiling information */
cl.CLGetEventProfilingInfo(prof_event, cl.CL_PROFILING_COMMAND_START,
cl.CL_size_t(unsafe.Sizeof(total_time)), &time_start, nil)
cl.CLGetEventProfilingInfo(prof_event, cl.CL_PROFILING_COMMAND_END,
cl.CL_size_t(unsafe.Sizeof(total_time)), &time_end, nil)
total_time += time_end.(cl.CL_ulong) - time_start.(cl.CL_ulong)
if PROFILE_READ == 0 {
/* Unmap the buffer */
err = cl.CLEnqueueUnmapMemObject(queue, data_buffer, mapped_memory,
0, nil, nil)
if err < 0 {
println("Couldn't unmap the buffer")
return
}
}
}
if PROFILE_READ == 1 {
fmt.Printf("Average read time: %v\n", total_time/NUM_ITERATIONS)
} else {
fmt.Printf("Average map time: %v\n", total_time/NUM_ITERATIONS)
}
/* Deallocate resources */
cl.CLReleaseEvent(prof_event)
cl.CLReleaseMemObject(data_buffer)
cl.CLReleaseKernel(kernel)
cl.CLReleaseCommandQueue(queue)
cl.CLReleaseProgram(*program)
cl.CLReleaseContext(context)
}