本文整理汇总了C++中cl::CommandQueue::finish方法的典型用法代码示例。如果您正苦于以下问题:C++ CommandQueue::finish方法的具体用法?C++ CommandQueue::finish怎么用?C++ CommandQueue::finish使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cl::CommandQueue
的用法示例。
在下文中一共展示了CommandQueue::finish方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: run_kernel
float clPeak::run_kernel(cl::CommandQueue &queue, cl::Kernel &kernel, cl::NDRange &globalSize, cl::NDRange &localSize, int iters)
{
float timed = 0;
// Dummy calls
queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
queue.finish();
if(useEventTimer)
{
for(int i=0; i<iters; i++)
{
cl::Event timeEvent;
queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize, NULL, &timeEvent);
queue.finish();
timed += timeInUS(timeEvent);
}
} else // std timer
{
Timer timer;
timer.start();
for(int i=0; i<iters; i++)
{
queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
}
queue.finish();
timed = timer.stopAndTime();
}
return (timed / iters);
}
示例2: procOCL_I2I
void procOCL_I2I(int texIn, int texOut, int w, int h)
{
if(!haveOpenCL) return;
LOGD("procOCL_I2I(%d, %d, %d, %d)", texIn, texOut, w, h);
cl::ImageGL imgIn (theContext, CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, texIn);
cl::ImageGL imgOut(theContext, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, texOut);
std::vector < cl::Memory > images;
images.push_back(imgIn);
images.push_back(imgOut);
int64_t t = getTimeMs();
theQueue.enqueueAcquireGLObjects(&images);
theQueue.finish();
LOGD("enqueueAcquireGLObjects() costs %d ms", getTimeInterval(t));
t = getTimeMs();
cl::Kernel Laplacian(theProgI2I, "Laplacian"); //TODO: may be done once
Laplacian.setArg(0, imgIn);
Laplacian.setArg(1, imgOut);
theQueue.finish();
LOGD("Kernel() costs %d ms", getTimeInterval(t));
t = getTimeMs();
theQueue.enqueueNDRangeKernel(Laplacian, cl::NullRange, cl::NDRange(w, h), cl::NullRange);
theQueue.finish();
LOGD("enqueueNDRangeKernel() costs %d ms", getTimeInterval(t));
t = getTimeMs();
theQueue.enqueueReleaseGLObjects(&images);
theQueue.finish();
LOGD("enqueueReleaseGLObjects() costs %d ms", getTimeInterval(t));
}
示例3: runKernelLatency
int clPeak::runKernelLatency(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
{
if(!isKernelLatency)
return 0;
cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();
cl_uint numItems = (devInfo.maxWGSize) * (devInfo.numCUs) * FETCH_PER_WI;
cl::NDRange globalSize = (numItems / FETCH_PER_WI);
cl::NDRange localSize = devInfo.maxWGSize;
int iters = devInfo.kernelLatencyIters;
float latency;
try
{
log->print(NEWLINE TAB TAB "Kernel launch latency : ");
log->xmlOpenTag("kernel_launch_latency");
log->xmlAppendAttribs("unit", "us");
cl::Buffer inputBuf = cl::Buffer(ctx, CL_MEM_READ_ONLY, (numItems * sizeof(float)));
cl::Buffer outputBuf = cl::Buffer(ctx, CL_MEM_WRITE_ONLY, (numItems * sizeof(float)));
cl::Kernel kernel_v1(prog, "global_bandwidth_v1_local_offset");
kernel_v1.setArg(0, inputBuf), kernel_v1.setArg(1, outputBuf);
// Dummy calls
queue.enqueueNDRangeKernel(kernel_v1, cl::NullRange, globalSize, localSize);
queue.enqueueNDRangeKernel(kernel_v1, cl::NullRange, globalSize, localSize);
queue.finish();
latency = 0;
for(int i=0; i<iters; i++)
{
cl::Event timeEvent;
queue.enqueueNDRangeKernel(kernel_v1, cl::NullRange, globalSize, localSize, NULL, &timeEvent);
queue.finish();
cl_ulong start = timeEvent.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>() / 1000;
cl_ulong end = timeEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>() / 1000;
latency += (float)((int)end - (int)start);
}
latency /= iters;
log->print(latency); log->print(" us" NEWLINE);
log->xmlSetContent(latency);
log->xmlCloseTag();
}
catch(cl::Error error)
{
log->print(error.err() + NEWLINE);
log->print(TAB TAB "Tests skipped" NEWLINE);
return -1;
}
return 0;
}
示例4: updateParticles
void updateParticles(float timeDelta)
{
try
{
vector<cl::Memory> glBuffers;
glBuffers.push_back(m_positions);
glBuffers.push_back(m_colors);
//this will update our system by calculating new velocity and updating the positions of our particles
//Make sure OpenGL is done using our VBOs
glFinish();
// map OpenGL buffer object for writing from OpenCL
// this passes in the vector of VBO buffer objects (position and color)
m_queue.enqueueAcquireGLObjects(&glBuffers);
m_particleKernel.setArg(5, timeDelta); //pass in the timestep
//execute the kernel
m_queue.enqueueNDRangeKernel(m_particleKernel, cl::NullRange, cl::NDRange(m_numParticles),
cl::NullRange);
//Release the VBOs so OpenGL can play with them
m_queue.enqueueReleaseGLObjects(&glBuffers, NULL);
m_queue.finish();
}
catch(cl::Error &error)
{
LOG_ERROR << error.what() << "(" << oclErrorString(error.err()) << ")";
}
}
示例5: procOCL_OCV
void procOCL_OCV(int tex, int w, int h)
{
int64_t t = getTimeMs();
cl::ImageGL imgIn (theContext, CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, tex);
std::vector < cl::Memory > images(1, imgIn);
theQueue.enqueueAcquireGLObjects(&images);
theQueue.finish();
cv::UMat uIn, uOut, uTmp;
cv::ocl::convertFromImage(imgIn(), uIn);
LOGD("loading texture data to OpenCV UMat costs %d ms", getTimeInterval(t));
theQueue.enqueueReleaseGLObjects(&images);
t = getTimeMs();
//cv::blur(uIn, uOut, cv::Size(5, 5));
cv::Laplacian(uIn, uTmp, CV_8U);
cv:multiply(uTmp, 10, uOut);
cv::ocl::finish();
LOGD("OpenCV processing costs %d ms", getTimeInterval(t));
t = getTimeMs();
cl::ImageGL imgOut(theContext, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, tex);
images.clear();
images.push_back(imgOut);
theQueue.enqueueAcquireGLObjects(&images);
cl_mem clBuffer = (cl_mem)uOut.handle(cv::ACCESS_READ);
cl_command_queue q = (cl_command_queue)cv::ocl::Queue::getDefault().ptr();
size_t offset = 0;
size_t origin[3] = { 0, 0, 0 };
size_t region[3] = { w, h, 1 };
CV_Assert(clEnqueueCopyBufferToImage (q, clBuffer, imgOut(), offset, origin, region, 0, NULL, NULL) == CL_SUCCESS);
theQueue.enqueueReleaseGLObjects(&images);
cv::ocl::finish();
LOGD("uploading results to texture costs %d ms", getTimeInterval(t));
}
示例6: simulationStep
void simulationStep() {
try {
// copy
auto buffer = cl::Buffer(context, CL_MEM_READ_ONLY,
sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
nullptr, nullptr);
queue.enqueueWriteBuffer(buffer, CL_TRUE, 0,
sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
visualizationBufferCPU, NULL, NULL);
// enque
stepKernel.setArg(2, buffer);
cl::NDRange global((size_t) (fieldWidth * fieldHeight));
queue.enqueueNDRangeKernel(stepKernel, cl::NullRange, global, cl::NullRange);
// read back
queue.enqueueReadBuffer(visualizationBufferGPU, CL_TRUE, 0,
sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
visualizationBufferCPU, NULL, NULL);
// finish
queue.finish();
} catch (cl::Error err) {
std::cout << "Error: " << err.what() << "(" << err.err() << ")" << std::endl;
exit(3);
}
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, fieldWidth, fieldHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE,
visualizationBufferCPU);
}
示例7: draw
void PTWeekend::draw()
{
/*
* BEGIN - each frame part
*/
/* Enqueue kernel for execution */
glm::vec3 origin,lower_left, hor, ver;
float theta = camera.getFov() * M_PI / 180.0f;
float half_height = tan(theta / 2.0f);
float half_width = camera.getAspectRatio() * half_height;
origin = camera.getEyePoint();
glm::vec3 u, v, w;
w = -glm::normalize(camera.getViewDirection()); //odd...
u = glm::normalize(glm::cross(glm::vec3(0,1,0), w));
v = glm::cross(w, u);
lower_left = origin - half_width * u - half_height * v - w;
hor = 2.0f * half_width * u;
ver = 2.0f * half_height * v;
pt_assert(cl_set_pinhole_cam_arg(origin, lower_left, hor, ver, cam_buffer, cmd_queue), "Could not fill camera buffer");
clStatus = cmd_queue.enqueueAcquireGLObjects(&img_buffer, NULL, NULL);
pt_assert(clStatus, "Could not acquire gl objects");
cl::Event profiling_evt;
clStatus = cmd_queue.enqueueNDRangeKernel(kernel,
cl::NDRange(0,0),
cl::NDRange(img_width, img_height),
cl::NDRange(local_width,local_height),
NULL,
&profiling_evt);
profiling_evt.wait();
pt_assert(clStatus, "Could not enqueue the kernel");
clStatus = cmd_queue.enqueueReleaseGLObjects(&img_buffer, NULL, NULL);
pt_assert(clStatus, "Could not release gl objects");
cmd_queue.finish();
cl_ulong time_start = profiling_evt.getProfilingInfo<CL_PROFILING_COMMAND_START>();
cl_ulong time_end = profiling_evt.getProfilingInfo<CL_PROFILING_COMMAND_END>();
cl_ulong total_time = time_end - time_start;
std::cout << "Total time: " << total_time * 0.001 * 0.001 << " ms \n";
/*
* END - each frame part
*/
gl::draw(imgTex, Rectf(0, 0, getWindowWidth(), getWindowHeight()));
}
示例8: enqueueNewMarker
cl::Event RuntimeMeasurementsManager::enqueueNewMarker(cl::CommandQueue queue) {
cl::Event event;
#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
// Use deprecated API
queue.enqueueMarker(&event);
#else
queue.enqueueMarkerWithWaitList(NULL, &event)
#endif
queue.finish();
return event;
}
示例9: runTransferBandwidthTest
int clPeak::runTransferBandwidthTest(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
{
if(!isTransferBW)
return 0;
float timed, gbps;
cl::NDRange globalSize, localSize;
cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();
int iters = devInfo.transferBWIters;
Timer timer;
float *arr = NULL;
cl_uint maxItems = devInfo.maxAllocSize / sizeof(float) / 2;
cl_uint numItems;
// Set an upper-limit for cpu devies
if(devInfo.deviceType & CL_DEVICE_TYPE_CPU) {
numItems = roundToPowOf2(maxItems, 26);
} else {
numItems = roundToPowOf2(maxItems);
}
try
{
arr = new float[numItems];
cl::Buffer clBuffer = cl::Buffer(ctx, (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), (numItems * sizeof(float)));
log->print(NEWLINE TAB TAB "Transfer bandwidth (GBPS)" NEWLINE);
log->xmlOpenTag("transfer_bandwidth");
log->xmlAppendAttribs("unit", "gbps");
///////////////////////////////////////////////////////////////////////////
// enqueueWriteBuffer
log->print(TAB TAB TAB "enqueueWriteBuffer : ");
// Dummy warm-up
queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
queue.finish();
timed = 0;
if(useEventTimer)
{
for(int i=0; i<iters; i++)
{
cl::Event timeEvent;
queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
queue.finish();
timed += timeInUS(timeEvent);
}
} else
{
Timer timer;
timer.start();
for(int i=0; i<iters; i++)
{
queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
}
queue.finish();
timed = timer.stopAndTime();
}
timed /= iters;
gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
log->print(gbps); log->print(NEWLINE);
log->xmlRecord("enqueuewritebuffer", gbps);
///////////////////////////////////////////////////////////////////////////
// enqueueReadBuffer
log->print(TAB TAB TAB "enqueueReadBuffer : ");
// Dummy warm-up
queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
queue.finish();
timed = 0;
if(useEventTimer)
{
for(int i=0; i<iters; i++)
{
cl::Event timeEvent;
queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
queue.finish();
timed += timeInUS(timeEvent);
}
} else
{
Timer timer;
timer.start();
for(int i=0; i<iters; i++)
{
queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
}
queue.finish();
timed = timer.stopAndTime();
}
timed /= iters;
gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
//.........这里部分代码省略.........
示例10: maxValueCL
int MaxValueSimple::maxValueCL(int* values, size_t len) {
try {
cl_int status = CL_SUCCESS;
/*** Ausgabe von Informationen ueber gewaehltes OpenCL-Device ***/
/* TODO logging
Logger::logDebug(
METHOD,
Logger::sStream << "max compute units: " << devices[0].getInfo<
CL_DEVICE_MAX_COMPUTE_UNITS> ());
Logger::logDebug(
METHOD,
Logger::sStream << "max work item sizes: "
<< devices[0].getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES> ()[0]);
Logger::logDebug(
METHOD,
Logger::sStream << "max work group sizes: "
<< devices[0].getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE> ());
Logger::logDebug(
METHOD,
Logger::sStream << "max global mem size (KB): "
<< devices[0].getInfo<CL_DEVICE_GLOBAL_MEM_SIZE> ()
/ 1024);
Logger::logDebug(
METHOD,
Logger::sStream << "max local mem size (KB): "
<< devices[0].getInfo<CL_DEVICE_LOCAL_MEM_SIZE> ()
/ 1024);
*/
/*** Erstellen und Vorbereiten der Daten ***/
cl::Buffer vBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_int) * len, &values[0], &status);
if (status != CL_SUCCESS) {
throw cl::Error(status, "cl::Buffer values");
}
cmdQ.finish();
/*** Arbeitsgroeszen berechnen ***/
// Anzahl der Work-Items = globalSize
// Work-Items pro Work-Group = localSize
const size_t MAX_GROUP_SIZE = devices[0].getInfo<
CL_DEVICE_MAX_WORK_GROUP_SIZE> ();
size_t globalSize;
size_t localSize;
do {
globalSize = len;
localSize = MaxValueSimple::calcWorkGroupSize(globalSize,
MAX_GROUP_SIZE);
if (localSize == 1) {
globalSize = ceil((double) len / WG_FAC) * WG_FAC;
localSize = MaxValueSimple::calcWorkGroupSize(globalSize,
MAX_GROUP_SIZE);
/* TODO logging
Logger::logDebug(
METHOD,
Logger::sStream << "GlobalSize has been extended to "
<< globalSize);
*/
}
/* TODO logging
Logger::logDebug(METHOD,
Logger::sStream << "globalSize: " << globalSize);
Logger::logDebug(METHOD,
Logger::sStream << "localSize: " << localSize);
*/
/*** Kernel-Argumente setzen ***/
status = kernel.setArg(0, vBuffer);
if (status != CL_SUCCESS) {
throw cl::Error(status, "Kernel.SetArg");
}
status = kernel.setArg(1, sizeof(cl_int) * localSize, NULL);
if (status != CL_SUCCESS) {
throw cl::Error(status, "Kernel.SetArg");
}
/*** Kernel ausfuehren und auf Abarbeitung warten ***/
cl::KernelFunctor func = kernel.bind(cmdQ, cl::NDRange(globalSize),
cl::NDRange(localSize));
event = func();
event.wait();
cmdQ.finish();
/*
runtimeKernel
+= event.getProfilingInfo<CL_PROFILING_COMMAND_END> ();
runtimeKernel
-= event.getProfilingInfo<CL_PROFILING_COMMAND_START> ();
*/
len = globalSize / localSize;
} while (globalSize > localSize && localSize > 1);
/*** Daten vom OpenCL-Device holen ***/
// TODO nur 1. element auslesen
status = cmdQ.enqueueReadBuffer(vBuffer, true, 0, sizeof(cl_int) * 1,
//.........这里部分代码省略.........
示例11: runTransferBandwidthTest
int clPeak::runTransferBandwidthTest(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
{
if(!isTransferBW)
return 0;
float timed, gbps;
cl::NDRange globalSize, localSize;
cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();
int iters = devInfo.transferBWIters;
Timer timer;
cl_uint maxItems = devInfo.maxAllocSize / sizeof(float) / 2;
cl_uint numItems;
// Set an upper-limit for cpu devies
if(devInfo.deviceType & CL_DEVICE_TYPE_CPU) {
numItems = roundToPowOf2(maxItems, 26);
} else {
numItems = roundToPowOf2(maxItems);
}
float *arr = new float[numItems];
try
{
cl::Buffer clBuffer = cl::Buffer(ctx, (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), (numItems * sizeof(float)));
cout << NEWLINE TAB TAB "Transfer bandwidth (GBPS)" << endl;
cout << setprecision(2) << fixed;
///////////////////////////////////////////////////////////////////////////
// enqueueWriteBuffer
cout << TAB TAB TAB "enqueueWriteBuffer : "; cout.flush();
// Dummy warm-up
queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
queue.finish();
timed = 0;
if(useEventTimer)
{
for(int i=0; i<iters; i++)
{
cl::Event timeEvent;
queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
queue.finish();
timed += timeInUS(timeEvent);
}
} else
{
Timer timer;
timer.start();
for(int i=0; i<iters; i++)
{
queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
}
queue.finish();
timed = timer.stopAndTime();
}
timed /= iters;
gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
cout << gbps << endl;
///////////////////////////////////////////////////////////////////////////
// enqueueReadBuffer
cout << TAB TAB TAB "enqueueReadBuffer : "; cout.flush();
// Dummy warm-up
queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
queue.finish();
timed = 0;
if(useEventTimer)
{
for(int i=0; i<iters; i++)
{
cl::Event timeEvent;
queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
queue.finish();
timed += timeInUS(timeEvent);
}
} else
{
Timer timer;
timer.start();
for(int i=0; i<iters; i++)
{
queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
}
queue.finish();
timed = timer.stopAndTime();
}
timed /= iters;
gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
cout << gbps << endl;
///////////////////////////////////////////////////////////////////////////
//.........这里部分代码省略.........
示例12: ComputeResidual
PetscErrorCode ComputeResidual(TS ts,
PetscScalar t,
Vec Prim, Vec dPrim_dt,
Vec F, void *ptr)
{
PetscErrorCode ierr;
PetscScalar *prim, *dprim_dt, *f;
// Get pointers to Petsc Vecs so that we can access the data.
ierr = VecGetArray(Prim, &prim); CHKERRQ(ierr);
ierr = VecGetArray(dPrim_dt, &dprim_dt); CHKERRQ(ierr);
ierr = VecGetArray(F, &f); CHKERRQ(ierr);
// OpenCL buffers.
cl::Buffer primBuffer, dprimBuffer_dt, fbuffer;
PetscInt size = DOF*N1*N2*sizeof(PetscScalar);
// Create OpenCL buffers from the data pointers to Petsc Vecs.
primBuffer = cl::Buffer(context,
CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
size, &(prim[0]), &clErr);
dprimBuffer_dt = cl::Buffer(context,
CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
size, &(dprim_dt[0]), &clErr);
fbuffer = cl::Buffer(context,
CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY,
size, &(f[0]), &clErr);
// Set kernel args.
clErr = kernel.setArg(0, primBuffer);
clErr = kernel.setArg(1, dprimBuffer_dt);
clErr = kernel.setArg(2, fbuffer);
// Kernel launch parameters and execution.
cl::NDRange global(N1, N2);
cl::NDRange local(TILE_SIZE_X1, TILE_SIZE_X2);
clErr = queue.enqueueNDRangeKernel(kernel,
cl::NullRange,
global, local,
NULL, NULL);
// The following "buffer mapping" is not needed if running on CPU but is
// needed if the OpenCL device executing the kernel is a GPU in order to
// sync the data. For CPUs this routine is zero cost when used with buffers
// created using CL_MEM_USE_HOST_PTR like we did above. For GPUs, the GPU
// will access the data on the RAM as and when needed automatically without
// user intervention.
f = (PetscScalar*)queue.enqueueMapBuffer(fbuffer,
CL_FALSE,
CL_MAP_READ,
0, size,
NULL, NULL, &clErr);
// Global sync point for all the threads to ensure execution is complete.
clErr = queue.finish();
// Restore the pointers.
ierr = VecRestoreArray(Prim, &prim); CHKERRQ(ierr);
ierr = VecRestoreArray(dPrim_dt, &dprim_dt); CHKERRQ(ierr);
ierr = VecRestoreArray(F, &f); CHKERRQ(ierr);
return(0);
}
示例13: main
int main()
{
try {
std::vector<cl::Device> devices;
// select platform
cl::Platform platform = selectPlatform();
// select device
platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
cl::Device device = selectDevice(devices);
// create context
context = cl::Context(devices);
// create command queue
queue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE);
// load opencl source
std::ifstream cl_file("inclusive_scan.cl");
std::string cl_string{std::istreambuf_iterator<char>(cl_file),
std::istreambuf_iterator<char>()};
cl::Program::Sources source(1,
std::make_pair(cl_string.c_str(),
cl_string.length() + 1));
// create programm
program = cl::Program(context, source);
// compile opencl source
try {
program.build(devices);
size_t input_size;
std::ifstream input_file("input.txt");
input_file >> input_size;
std::vector<float> input(input_size);
// for (size_t i = 0; i < input_size; ++i) {
// input[i] = i % 10;
// }
for (int i = 0; i < input_size; i++) {
input_file >> input[i];
}
std::vector<float> output(input_size, 0);
cl::Buffer dev_input (context, CL_MEM_READ_ONLY, sizeof(float) * input_size);
queue.enqueueWriteBuffer(dev_input, CL_TRUE, 0, sizeof(float) * input_size, &input[0]);
cl::Buffer dev_output = inclusive_scan(dev_input, input_size);
queue.enqueueReadBuffer(dev_output, CL_TRUE, 0, sizeof(float) * input_size, &output[0]);
queue.finish();
cpu_check(input, output);
std::ofstream output_file("output.txt");
for (int i = 0; i < input_size; i++) {
output_file << output[i] << " ";
}
}
catch (cl::Error const & e) {
std::string log_str = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
std::cout << std::endl << e.what() << " : " << e.err() << std::endl;
std::cout << log_str;
return 0;
}
}
catch (cl::Error const & e) {
std::cout << "Error: " << e.what() << " #" << e.err() << std::endl;
}
return 0;
}
示例14: mainLoop
void mainLoop( cl::CommandQueue& queue, cl::Context& context, cl::Kernel kernel, cl::Buffer clImgDesc, cl::Buffer clCamera ){
cl::Event eAcquire, eRelease, eExecute;
cl_int err;
glFinish();
checkGLErr( "glFinish()" );
queue.enqueueWriteBuffer( clImgDesc, CL_TRUE, 0, 1 * sizeof(ImageDescriptor), (const void*)&imgDesc);
err = queue.enqueueAcquireGLObjects( vSharedUnits, NULL, &eAcquire );
checkErr(err, "CommandQueue::enqueueAcquireGLObjects()");
eAcquire.wait();
err = queue.enqueueNDRangeKernel( kernel, cl::NullRange, cl::NDRange(WIDTH, HEIGHT), cl::NullRange, NULL, &eExecute);
checkErr(err, "CommandQueue::enqueueNDRangeKernel()");
//std::cout<<"Kernel executing"<< std::endl ;
clock_t ti = clock();
eExecute.wait();
clock_t tf = clock();
queue.finish();
err = queue.enqueueReleaseGLObjects( vSharedUnits, NULL, &eRelease );
checkErr(err, "CommandQueue::enqueueReleaseGLObjects()");
eRelease.wait();
imgDesc.numSamples += SAMPLES;
pAccumulator->glBind( GL_DRAW_FRAMEBUFFER );
checkGLErr( "glBind GL_DRAW_FRAMEBUFFER, Accumulator " );
pCLTarget->glBind( GL_READ_FRAMEBUFFER );
checkGLErr( "glBind GL_READ_FRAMEBUFFER, Main Target " );
glBlitFramebuffer( 0, 0, WIDTH, HEIGHT, 0, 0, WIDTH, HEIGHT, GL_COLOR_BUFFER_BIT, GL_NEAREST );
checkGLErr( "glBlitFramebuffer" );
glBindFramebuffer( GL_DRAW_FRAMEBUFFER, 0 );
checkGLErr( "glBind GL_DRAW_FRAMEBUFFER, 0 " );
pCLTarget->glBind( GL_READ_FRAMEBUFFER );
checkGLErr( "glBind GL_READ_FRAMEBUFFER, something " );
glBlitFramebuffer( 0, 0, WIDTH, HEIGHT, 0, 0, WIDTH, HEIGHT, GL_COLOR_BUFFER_BIT, GL_NEAREST );
checkGLErr( "glBlitFramebuffer" );
glfwPollEvents();
pCamera->glfwHandleCursor( ((float)(tf - ti))/(CLOCKS_PER_SEC * 1.0f) );
if( sceneChanged() ){
//printf("scene changed..!");
imgDesc.numSamples = 0;
CLCamera* cam = pCamera->getCLCamera();
queue.enqueueWriteBuffer( clCamera, CL_TRUE, 0, 1 * sizeof(CLCamera), (const void*)cam );
delete cam;
}
glfwSwapBuffers( window );
checkGLErr( "glSwapBuffers" );
//Block for a while.
//int i;
//std::cin >> i;
//float timeTaken = ( (float)(tf - ti) ) / (float)CLOCKS_PER_SEC;
//std::cout<<"Time taken: "<< timeTaken * 1000 << "ms" << std::endl;
//std::cout<<"Predicted FPS: "<< 1 / timeTaken << " FPS"<< std::endl;
if( imgDesc.numSamples % 10 == 0 )
std::cout<<"numSamples: "<<imgDesc.numSamples<<std::endl;
//handleFrameCounter();
}