本文整理汇总了C++中cl::CommandQueue类的典型用法代码示例。如果您正苦于以下问题:C++ CommandQueue类的具体用法?C++ CommandQueue怎么用?C++ CommandQueue使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了CommandQueue类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: EnqueueAdvancePathsKernel
void PathOCLRenderThread::EnqueueAdvancePathsKernel(cl::CommandQueue &oclQueue) {
PathOCLRenderEngine *engine = (PathOCLRenderEngine *)renderEngine;
const u_int taskCount = engine->taskCount;
// Micro kernels version
oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_RT_NEXT_VERTEX, cl::NullRange,
cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_HIT_NOTHING, cl::NullRange,
cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_HIT_OBJECT, cl::NullRange,
cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_RT_DL, cl::NullRange,
cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_DL_ILLUMINATE, cl::NullRange,
cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_DL_SAMPLE_BSDF, cl::NullRange,
cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_GENERATE_NEXT_VERTEX_RAY, cl::NullRange,
cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_SPLAT_SAMPLE, cl::NullRange,
cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_NEXT_SAMPLE, cl::NullRange,
cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_GENERATE_CAMERA_RAY, cl::NullRange,
cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
}
示例2: update
void MetaBallsApp::update()
{
std::vector<cl::Memory> acquire( { mClParticleBuf, mClMarchingRenderBuffer, mClMarchingDebugBuffer } );
mClCommandQueue.enqueueAcquireGLObjects( &acquire );
updateParticles();
updateMarching();
mClCommandQueue.enqueueReleaseGLObjects( &acquire );
}
示例3: runKernelLatency
int clPeak::runKernelLatency(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
{
if(!isKernelLatency)
return 0;
cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();
cl_uint numItems = (devInfo.maxWGSize) * (devInfo.numCUs) * FETCH_PER_WI;
cl::NDRange globalSize = (numItems / FETCH_PER_WI);
cl::NDRange localSize = devInfo.maxWGSize;
int iters = devInfo.kernelLatencyIters;
float latency;
try
{
log->print(NEWLINE TAB TAB "Kernel launch latency : ");
log->xmlOpenTag("kernel_launch_latency");
log->xmlAppendAttribs("unit", "us");
cl::Buffer inputBuf = cl::Buffer(ctx, CL_MEM_READ_ONLY, (numItems * sizeof(float)));
cl::Buffer outputBuf = cl::Buffer(ctx, CL_MEM_WRITE_ONLY, (numItems * sizeof(float)));
cl::Kernel kernel_v1(prog, "global_bandwidth_v1_local_offset");
kernel_v1.setArg(0, inputBuf), kernel_v1.setArg(1, outputBuf);
// Dummy calls
queue.enqueueNDRangeKernel(kernel_v1, cl::NullRange, globalSize, localSize);
queue.enqueueNDRangeKernel(kernel_v1, cl::NullRange, globalSize, localSize);
queue.finish();
latency = 0;
for(int i=0; i<iters; i++)
{
cl::Event timeEvent;
queue.enqueueNDRangeKernel(kernel_v1, cl::NullRange, globalSize, localSize, NULL, &timeEvent);
queue.finish();
cl_ulong start = timeEvent.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>() / 1000;
cl_ulong end = timeEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>() / 1000;
latency += (float)((int)end - (int)start);
}
latency /= iters;
log->print(latency); log->print(" us" NEWLINE);
log->xmlSetContent(latency);
log->xmlCloseTag();
}
catch(cl::Error error)
{
log->print(error.err() + NEWLINE);
log->print(TAB TAB "Tests skipped" NEWLINE);
return -1;
}
return 0;
}
示例4: enqueueNewMarker
cl::Event RuntimeMeasurementsManager::enqueueNewMarker(cl::CommandQueue queue) {
cl::Event event;
#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
// Use deprecated API
queue.enqueueMarker(&event);
#else
queue.enqueueMarkerWithWaitList(NULL, &event)
#endif
queue.finish();
return event;
}
示例5: copyFromDevice
void copyFromDevice(cl::CommandQueue &queue)
{
if(m_pElts==NULL)
throw cl::Error(CL_INVALID_MEM_OBJECT, "copyFromDevice - Buffer is not initialised.");
queue.enqueueReadBuffer(m_buffer, CL_TRUE, 0, m_cb, m_pElts);
}
示例6: kernel
void kernel(cl::Buffer& devOut, cl::CommandQueue& queue)
{
static std::once_flag compileFlag;
static cl::Program prog;
static cl::Kernel kern;
std::call_once(compileFlag,
[queue]() {
prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(), fractal_ocl_kernel, true);
kern = cl::Kernel(prog, "julia");
});
//auto juliaOp = cl::make_kernel<Buffer, unsigned, unsigned>(kern);
static const NDRange local(8, 8);
NDRange global(local[0] * divup(DIMX, local[0]),
local[1] * divup(DIMY, local[1]));
kern.setArg(0, devOut);
kern.setArg(1, DIMX);
kern.setArg(2, DIMY);
queue.enqueueNDRangeKernel(kern, cl::NullRange, global, local);
//juliaOp(EnqueueArgs(queue, global, local), devOut, DIMX, DIMY);
}
示例7: runTestType
bool runTestType(cl::Context context, cl::CommandQueue queue)
{
cl_uint size = 1024 * 2 + 15;
std::vector<T> input(size);
std::cout << "##Testing scan for " << input.size() << " elements and type "
<< magnet::CL::detail::traits<T>::kernel_type();
for(size_t i = 0; i < input.size(); ++i)
input[i] = i+1;
// create input buffer using pinned memory
cl::Buffer bufferIn(context, CL_MEM_ALLOC_HOST_PTR |
CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE,
sizeof(T) * input.size(), &input[0])
;
magnet::CL::scan<T> scanFunctor;
scanFunctor.build(queue, context);
scanFunctor(bufferIn, bufferIn);
std::vector<T> output(size);
queue.enqueueReadBuffer(bufferIn, CL_TRUE, 0, input.size() *
sizeof(T), &output[0]);
bool failed = !testOutput(input, output);
std::cout << (failed ? " FAILED" : " PASSED") << std::endl;
return failed;
}
示例8: runKernel
cl::Event runKernel(const cl::CommandQueue& queue, const cl::Kernel& kernel, const cl::NDRange& globalSize, const cl::NDRange& groupSize, std::vector<cl::Event>& events)
{
cl::Event event;
queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, groupSize, &events, &event);
events.push_back(event);
return event;
}
示例9: runTestType
void runTestType(cl::Context context, cl::CommandQueue queue)
{
cl_uint size = 2 << 10;
std::vector<T> input(size);
std::cout << "##Testing bitonic sort for " << input.size() << " elements and type "
<< magnet::CL::detail::traits<T>::kernel_type()
<< std::endl;
for(size_t i = 0; i < input.size(); ++i)
input[i] = input.size() - i - 1;
// create input buffer using pinned memory
cl::Buffer bufferIn(context, CL_MEM_ALLOC_HOST_PTR |
CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE,
sizeof(T) * input.size(), &input[0])
;
magnet::CL::bitonicSort<T> bitonicSortFunctor;
bitonicSortFunctor.build(queue, context);
bitonicSortFunctor(bufferIn);
std::vector<T> output(size);
queue.enqueueReadBuffer(bufferIn, CL_TRUE, 0, input.size() *
sizeof(T), &output[0]);
if (!testOutput(input, output))
M_throw() << "Incorrect output for size "
<< input.size()
<< " and type "
<< magnet::CL::detail::traits<T>::kernel_type();
}
示例10: addkernelarg
inline void OpenCL::addkernelarg(std::size_t i, std::vector<T> const & arg, cl::Kernel & kernel,cl::CommandQueue &quene) const
{
cl::Buffer buffer(this->context,CL_MEM_READ_WRITE,arg.size()*sizeof(T));
// std::cout << "enqeue\n";
quene.enqueueWriteBuffer(buffer,CL_FALSE,0,sizeof(T)*arg.size(),&(arg[0]));
kernel.setArg(i,buffer);
}
示例11: const
inline void OpenCL::addkernelarg(std::size_t i, T const (& arg)[N], cl::Kernel & kernel,cl::CommandQueue &quene) const
{
cl::Buffer buffer(this->context,CL_MEM_READ_WRITE,N*sizeof(T));
// std::cout << "enqeue\n";
quene.enqueueWriteBuffer(buffer,CL_FALSE,0,sizeof(T)*N,&arg);
kernel.setArg(i,buffer);
}
示例12: release
CL::Event OGLSharedFramebuffer::release(CL::CommandQueue& queue, const CL::Event& evt)
{
if (_shared) {
CL::Event e = queue.enq_GL_release(_cl_buffer->get(),
"release framebuffer", evt);
return e;
} else {
assert(_local);
CL::Event e = queue.enq_read_buffer(*_cl_buffer, _local, _tex_buffer.get_size(),
"read framebuffer", evt);
queue.wait_for_events(e);
_tex_buffer.load(_local);
return CL::Event();
}
}
示例13: simulationStep
void simulationStep() {
try {
// copy
auto buffer = cl::Buffer(context, CL_MEM_READ_ONLY,
sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
nullptr, nullptr);
queue.enqueueWriteBuffer(buffer, CL_TRUE, 0,
sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
visualizationBufferCPU, NULL, NULL);
// enque
stepKernel.setArg(2, buffer);
cl::NDRange global((size_t) (fieldWidth * fieldHeight));
queue.enqueueNDRangeKernel(stepKernel, cl::NullRange, global, cl::NullRange);
// read back
queue.enqueueReadBuffer(visualizationBufferGPU, CL_TRUE, 0,
sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
visualizationBufferCPU, NULL, NULL);
// finish
queue.finish();
} catch (cl::Error err) {
std::cout << "Error: " << err.what() << "(" << err.err() << ")" << std::endl;
exit(3);
}
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, fieldWidth, fieldHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE,
visualizationBufferCPU);
}
示例14: updateParticles
void updateParticles(float timeDelta)
{
try
{
vector<cl::Memory> glBuffers;
glBuffers.push_back(m_positions);
glBuffers.push_back(m_colors);
//this will update our system by calculating new velocity and updating the positions of our particles
//Make sure OpenGL is done using our VBOs
glFinish();
// map OpenGL buffer object for writing from OpenCL
// this passes in the vector of VBO buffer objects (position and color)
m_queue.enqueueAcquireGLObjects(&glBuffers);
m_particleKernel.setArg(5, timeDelta); //pass in the timestep
//execute the kernel
m_queue.enqueueNDRangeKernel(m_particleKernel, cl::NullRange, cl::NDRange(m_numParticles),
cl::NullRange);
//Release the VBOs so OpenGL can play with them
m_queue.enqueueReleaseGLObjects(&glBuffers, NULL);
m_queue.finish();
}
catch(cl::Error &error)
{
LOG_ERROR << error.what() << "(" << oclErrorString(error.err()) << ")";
}
}
示例15: procOCL_OCV
void procOCL_OCV(int tex, int w, int h)
{
int64_t t = getTimeMs();
cl::ImageGL imgIn (theContext, CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, tex);
std::vector < cl::Memory > images(1, imgIn);
theQueue.enqueueAcquireGLObjects(&images);
theQueue.finish();
cv::UMat uIn, uOut, uTmp;
cv::ocl::convertFromImage(imgIn(), uIn);
LOGD("loading texture data to OpenCV UMat costs %d ms", getTimeInterval(t));
theQueue.enqueueReleaseGLObjects(&images);
t = getTimeMs();
//cv::blur(uIn, uOut, cv::Size(5, 5));
cv::Laplacian(uIn, uTmp, CV_8U);
cv:multiply(uTmp, 10, uOut);
cv::ocl::finish();
LOGD("OpenCV processing costs %d ms", getTimeInterval(t));
t = getTimeMs();
cl::ImageGL imgOut(theContext, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, tex);
images.clear();
images.push_back(imgOut);
theQueue.enqueueAcquireGLObjects(&images);
cl_mem clBuffer = (cl_mem)uOut.handle(cv::ACCESS_READ);
cl_command_queue q = (cl_command_queue)cv::ocl::Queue::getDefault().ptr();
size_t offset = 0;
size_t origin[3] = { 0, 0, 0 };
size_t region[3] = { w, h, 1 };
CV_Assert(clEnqueueCopyBufferToImage (q, clBuffer, imgOut(), offset, origin, region, 0, NULL, NULL) == CL_SUCCESS);
theQueue.enqueueReleaseGLObjects(&images);
cv::ocl::finish();
LOGD("uploading results to texture costs %d ms", getTimeInterval(t));
}