当前位置: 首页>>代码示例>>C++>>正文


C++ CommandQueue::enqueueNDRangeKernel方法代码示例

本文整理汇总了C++中cl::CommandQueue::enqueueNDRangeKernel方法的典型用法代码示例。如果您正苦于以下问题:C++ CommandQueue::enqueueNDRangeKernel方法的具体用法?C++ CommandQueue::enqueueNDRangeKernel怎么用?C++ CommandQueue::enqueueNDRangeKernel使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在cl::CommandQueue的用法示例。


在下文中一共展示了CommandQueue::enqueueNDRangeKernel方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: EnqueueAdvancePathsKernel

void PathOCLRenderThread::EnqueueAdvancePathsKernel(cl::CommandQueue &oclQueue) {
	PathOCLRenderEngine *engine = (PathOCLRenderEngine *)renderEngine;
	const u_int taskCount = engine->taskCount;

	// Micro kernels version
	oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_RT_NEXT_VERTEX, cl::NullRange,
			cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
	oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_HIT_NOTHING, cl::NullRange,
			cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
	oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_HIT_OBJECT, cl::NullRange,
			cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
	oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_RT_DL, cl::NullRange,
			cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
	oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_DL_ILLUMINATE, cl::NullRange,
			cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
	oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_DL_SAMPLE_BSDF, cl::NullRange,
			cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
	oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_GENERATE_NEXT_VERTEX_RAY, cl::NullRange,
			cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
	oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_SPLAT_SAMPLE, cl::NullRange,
			cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
	oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_NEXT_SAMPLE, cl::NullRange,
			cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
	oclQueue.enqueueNDRangeKernel(*advancePathsKernel_MK_GENERATE_CAMERA_RAY, cl::NullRange,
			cl::NDRange(taskCount), cl::NDRange(advancePathsWorkGroupSize));
}
开发者ID:yangzhengxing,项目名称:luxrender,代码行数:26,代码来源:pathoclthread.cpp

示例2: run_kernel

float clPeak::run_kernel(cl::CommandQueue &queue, cl::Kernel &kernel, cl::NDRange &globalSize, cl::NDRange &localSize, int iters)
{
    float timed = 0;

    // Dummy calls
    queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
    queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
    queue.finish();

    if(useEventTimer)
    {
        for(int i=0; i<iters; i++)
        {
            cl::Event timeEvent;

            queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize, NULL, &timeEvent);
            queue.finish();
            timed += timeInUS(timeEvent);
        }
    } else      // std timer
    {
        Timer timer;

        timer.start();
        for(int i=0; i<iters; i++)
        {
            queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
        }
        queue.finish();
        timed = timer.stopAndTime();
    }

    return (timed / iters);
}
开发者ID:JiniusResearch,项目名称:clpeak,代码行数:34,代码来源:clpeak.cpp

示例3: runKernelLatency

int clPeak::runKernelLatency(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
{
    if(!isKernelLatency)
        return 0;

    cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();
    cl_uint numItems = (devInfo.maxWGSize) * (devInfo.numCUs) * FETCH_PER_WI;
    cl::NDRange globalSize = (numItems / FETCH_PER_WI);
    cl::NDRange localSize = devInfo.maxWGSize;
    int iters = devInfo.kernelLatencyIters;
    float latency;

    try
    {
        log->print(NEWLINE TAB TAB "Kernel launch latency : ");
        log->xmlOpenTag("kernel_launch_latency");
        log->xmlAppendAttribs("unit", "us");

        cl::Buffer inputBuf = cl::Buffer(ctx, CL_MEM_READ_ONLY, (numItems * sizeof(float)));
        cl::Buffer outputBuf = cl::Buffer(ctx, CL_MEM_WRITE_ONLY, (numItems * sizeof(float)));

        cl::Kernel kernel_v1(prog, "global_bandwidth_v1_local_offset");
        kernel_v1.setArg(0, inputBuf), kernel_v1.setArg(1, outputBuf);

        // Dummy calls
        queue.enqueueNDRangeKernel(kernel_v1, cl::NullRange, globalSize, localSize);
        queue.enqueueNDRangeKernel(kernel_v1, cl::NullRange, globalSize, localSize);
        queue.finish();

        latency = 0;
        for(int i=0; i<iters; i++)
        {
            cl::Event timeEvent;
            queue.enqueueNDRangeKernel(kernel_v1, cl::NullRange, globalSize, localSize, NULL, &timeEvent);
            queue.finish();
            cl_ulong start = timeEvent.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>() / 1000;
            cl_ulong end = timeEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>() / 1000;
            latency += (float)((int)end - (int)start);
        }
        latency /= iters;

        log->print(latency);    log->print(" us" NEWLINE);
        log->xmlSetContent(latency);
        log->xmlCloseTag();
    }
    catch(cl::Error error)
    {
        log->print(error.err() + NEWLINE);
        log->print(TAB TAB "Tests skipped" NEWLINE);
        return -1;
    }

    return 0;
}
开发者ID:nivertech,项目名称:clpeak,代码行数:54,代码来源:kernel_latency.cpp

示例4: helper

void helper(uint32_t* out, int osize, uint8_t* in, int isize, int w, int h, int choice)
{
	int set_size=8;
    try {
        cl::Buffer bufferIn = cl::Buffer(gContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                isize*sizeof(cl_uchar), in, NULL);
        cl::Buffer bufferOut = cl::Buffer(gContext, CL_MEM_READ_WRITE, osize*sizeof(cl_uchar4));
        cl::Buffer bufferOut2= cl::Buffer(gContext, CL_MEM_READ_WRITE, osize*sizeof(cl_uchar4));
        gNV21Kernel.setArg(2,w);
        gNV21Kernel.setArg(3,h);
        gNV21Kernel.setArg(1,bufferIn);
        gNV21Kernel.setArg(0,bufferOut);
        gQueue.enqueueNDRangeKernel(gNV21Kernel,
                cl::NullRange,
                cl::NDRange( (int)ceil((float)w/16.0f)*16,(int)ceil((float)h/16.0f)*16),
                cl::NDRange(set_size,set_size),
                NULL,
                NULL);
        if (choice==1) {
            gLaplacianK.setArg(2,w);
            gLaplacianK.setArg(3,h);
            gLaplacianK.setArg(1,bufferOut);
            gLaplacianK.setArg(0,bufferOut2);
            gQueue.enqueueNDRangeKernel(gLaplacianK,
                    cl::NullRange,
                    cl::NDRange( (int)ceil((float)w/16.0f)*16,(int)ceil((float)h/16.0f)*16),
                    cl::NDRange(set_size,set_size),
                    NULL,
                    NULL);
        }
        else if (choice>1) {
        	gNegative.setArg(2,w);
        	gNegative.setArg(3,h);
        	gNegative.setArg(1,bufferOut);
        	gNegative.setArg(0,bufferOut2);
        	gQueue.enqueueNDRangeKernel(gNegative,
        	                    cl::NullRange,
        	                    cl::NDRange( (int)ceil((float)w/16.0f)*16,(int)ceil((float)h/16.0f)*16),
        	                    cl::NDRange(set_size,set_size),
        	                    NULL,
        	                    NULL);

        }

        gQueue.enqueueReadBuffer(bufferOut2, CL_TRUE, 0, osize*sizeof(cl_uchar4), out);
    }
    catch (cl::Error e) {
        LOGI("@oclDecoder: %s %d \n",e.what(),e.err());
    }
}
开发者ID:khaled777b,项目名称:AndroidOpenCL-Filter,代码行数:50,代码来源:processor.cpp

示例5: updateParticles

    void updateParticles(float timeDelta)
    {
        try
        {
            vector<cl::Memory> glBuffers;
            glBuffers.push_back(m_positions);
            glBuffers.push_back(m_colors);
            
            //this will update our system by calculating new velocity and updating the positions of our particles
            //Make sure OpenGL is done using our VBOs
            glFinish();
            
            // map OpenGL buffer object for writing from OpenCL
            // this passes in the vector of VBO buffer objects (position and color)
            m_queue.enqueueAcquireGLObjects(&glBuffers);
            
            m_particleKernel.setArg(5, timeDelta); //pass in the timestep
            
            //execute the kernel
            m_queue.enqueueNDRangeKernel(m_particleKernel, cl::NullRange, cl::NDRange(m_numParticles),
                                         cl::NullRange);
            //Release the VBOs so OpenGL can play with them
            m_queue.enqueueReleaseGLObjects(&glBuffers, NULL);

            m_queue.finish();
        }
        catch(cl::Error &error)
        {
            LOG_ERROR << error.what() << "(" << oclErrorString(error.err()) << ")";
        }
    }
开发者ID:mojovski,项目名称:KinskiGL,代码行数:31,代码来源:main.cpp

示例6: simulationStep

void simulationStep() {
    try {
        // copy
        auto buffer = cl::Buffer(context, CL_MEM_READ_ONLY,
                                 sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
                                 nullptr, nullptr);
        queue.enqueueWriteBuffer(buffer, CL_TRUE, 0,
                                 sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
                                 visualizationBufferCPU, NULL, NULL);

        // enque
        stepKernel.setArg(2, buffer);
        cl::NDRange global((size_t) (fieldWidth * fieldHeight));
        queue.enqueueNDRangeKernel(stepKernel, cl::NullRange, global, cl::NullRange);

        // read back
        queue.enqueueReadBuffer(visualizationBufferGPU, CL_TRUE, 0,
                                sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
                                visualizationBufferCPU, NULL, NULL);

        // finish
        queue.finish();
    } catch (cl::Error err) {
        std::cout << "Error: " << err.what() << "(" << err.err() << ")" << std::endl;
        exit(3);
    }

    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, fieldWidth, fieldHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE,
                 visualizationBufferCPU);
}
开发者ID:steindani,项目名称:gpgpu_gameoflife,代码行数:30,代码来源:GameOfLife.cpp

示例7: procOCL_I2I

void procOCL_I2I(int texIn, int texOut, int w, int h)
{
    if(!haveOpenCL) return;

    LOGD("procOCL_I2I(%d, %d, %d, %d)", texIn, texOut, w, h);
    cl::ImageGL imgIn (theContext, CL_MEM_READ_ONLY,  GL_TEXTURE_2D, 0, texIn);
    cl::ImageGL imgOut(theContext, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, texOut);
    std::vector < cl::Memory > images;
    images.push_back(imgIn);
    images.push_back(imgOut);

    int64_t t = getTimeMs();
    theQueue.enqueueAcquireGLObjects(&images);
    theQueue.finish();
    LOGD("enqueueAcquireGLObjects() costs %d ms", getTimeInterval(t));

    t = getTimeMs();
    cl::Kernel Laplacian(theProgI2I, "Laplacian"); //TODO: may be done once
    Laplacian.setArg(0, imgIn);
    Laplacian.setArg(1, imgOut);
    theQueue.finish();
    LOGD("Kernel() costs %d ms", getTimeInterval(t));

    t = getTimeMs();
    theQueue.enqueueNDRangeKernel(Laplacian, cl::NullRange, cl::NDRange(w, h), cl::NullRange);
    theQueue.finish();
    LOGD("enqueueNDRangeKernel() costs %d ms", getTimeInterval(t));

    t = getTimeMs();
    theQueue.enqueueReleaseGLObjects(&images);
    theQueue.finish();
    LOGD("enqueueReleaseGLObjects() costs %d ms", getTimeInterval(t));
}
开发者ID:adrians,项目名称:opencv,代码行数:33,代码来源:CLprocessor.cpp

示例8: runKernel

cl::Event runKernel(const cl::CommandQueue& queue, const cl::Kernel& kernel, const cl::NDRange& globalSize, const cl::NDRange& groupSize, std::vector<cl::Event>& events)
{
	cl::Event event;
	queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, groupSize, &events, &event);
	events.push_back(event);
	return event;
}
开发者ID:Jereq,项目名称:Raytracer,代码行数:7,代码来源:CLHelper.cpp

示例9: kernel

void kernel(cl::Buffer& devOut, cl::CommandQueue& queue)
{
    static std::once_flag   compileFlag;
    static cl::Program      prog;
    static cl::Kernel       kern;

    std::call_once(compileFlag,
        [queue]() {
        prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(), fractal_ocl_kernel, true);
            kern = cl::Kernel(prog, "julia");
        });

    //auto juliaOp = cl::make_kernel<Buffer, unsigned, unsigned>(kern);

    static const NDRange local(8, 8);
    NDRange global(local[0] * divup(DIMX, local[0]),
                   local[1] * divup(DIMY, local[1]));

    kern.setArg(0, devOut);
    kern.setArg(1, DIMX);
    kern.setArg(2, DIMY);
    queue.enqueueNDRangeKernel(kern, cl::NullRange, global, local);

    //juliaOp(EnqueueArgs(queue, global, local), devOut, DIMX, DIMY);
}
开发者ID:syurkevi,项目名称:forge,代码行数:25,代码来源:fractal.cpp

示例10: draw

void PTWeekend::draw()
{
    /*
     * BEGIN - each frame part
     */
    
    /* Enqueue kernel for execution */
    
    glm::vec3 origin,lower_left, hor, ver;
    
    float theta = camera.getFov() * M_PI / 180.0f;
    float half_height = tan(theta / 2.0f);
    float half_width = camera.getAspectRatio() * half_height;
    
    origin = camera.getEyePoint();
    glm::vec3 u, v, w;
    
    w = -glm::normalize(camera.getViewDirection()); //odd...
    u = glm::normalize(glm::cross(glm::vec3(0,1,0), w));
    v = glm::cross(w, u);
    
    lower_left = origin - half_width * u - half_height * v - w;
    hor = 2.0f * half_width * u;
    ver = 2.0f * half_height * v;
    
    pt_assert(cl_set_pinhole_cam_arg(origin, lower_left, hor, ver, cam_buffer, cmd_queue), "Could not fill camera buffer");
    
    clStatus = cmd_queue.enqueueAcquireGLObjects(&img_buffer, NULL, NULL);
    pt_assert(clStatus, "Could not acquire gl objects");
    
    cl::Event profiling_evt;
    
    
    
    clStatus = cmd_queue.enqueueNDRangeKernel(kernel,
                                              cl::NDRange(0,0),
                                              cl::NDRange(img_width, img_height),
                                              cl::NDRange(local_width,local_height),
                                              NULL,
                                              &profiling_evt);
    profiling_evt.wait();
    
    pt_assert(clStatus, "Could not enqueue the kernel");
    clStatus = cmd_queue.enqueueReleaseGLObjects(&img_buffer, NULL, NULL);
    pt_assert(clStatus, "Could not release gl objects");
    cmd_queue.finish();
    
    cl_ulong time_start = profiling_evt.getProfilingInfo<CL_PROFILING_COMMAND_START>();
    cl_ulong time_end = profiling_evt.getProfilingInfo<CL_PROFILING_COMMAND_END>();
    cl_ulong total_time = time_end - time_start;
    std::cout << "Total time: " << total_time * 0.001 * 0.001 << " ms \n";
    
    /*
     * END - each frame part
     */
    
    gl::draw(imgTex, Rectf(0, 0, getWindowWidth(), getWindowHeight()));
}
开发者ID:AndreaMelle,项目名称:graphics-playground,代码行数:58,代码来源:PTWeekend.cpp

示例11: sumTest

void sumTest(cl::Buffer queue_data, cl::Buffer queue_metadata,
             cl::Buffer& device_result, int iterations,
             ProgramCache& cache,
             cl::CommandQueue& queue)
{
    cl::Context context = queue.getInfo<CL_QUEUE_CONTEXT>();

    std::vector<std::string> sources;
    sources.push_back("ParallelQueue");
    sources.push_back("ParallelQueueTests");

    cl::Program& program = cache.getProgram(sources);

    cl::Kernel sum_test_kernel(program, "sum_test");

    cl::Device device = queue.getInfo<CL_QUEUE_DEVICE>();

    int warp_size = sum_test_kernel
        .getWorkGroupInfo<CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE>(device);

    std::cout << "warp size: " << warp_size << std::endl;

    int max_group_size = device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()[0];
    int queue_num_threads = 512;

    if(queue_num_threads > max_group_size)
        queue_num_threads = max_group_size;

    cl::LocalSpaceArg local_queue
            = cl::__local(sizeof(int) * queue_num_threads * 2);
    cl::LocalSpaceArg reduction_buffer
            = cl::__local(sizeof(int) * queue_num_threads);
    cl::LocalSpaceArg got_work
            = cl::__local(sizeof(int));
    cl::LocalSpaceArg prefix_sum_input
            = cl::__local(sizeof(int) * queue_num_threads);
    cl::LocalSpaceArg prefix_sum_output
            = cl::__local(sizeof(int) * queue_num_threads);

    sum_test_kernel.setArg(0, queue_data);
    sum_test_kernel.setArg(1, queue_metadata);
    sum_test_kernel.setArg(2, device_result);
    sum_test_kernel.setArg(3, iterations);
    sum_test_kernel.setArg(4, local_queue);
    sum_test_kernel.setArg(5, reduction_buffer);
    sum_test_kernel.setArg(6, got_work);
    sum_test_kernel.setArg(7, prefix_sum_input);
    sum_test_kernel.setArg(8, prefix_sum_output);

    cl::NDRange nullRange;
    cl::NDRange global(queue_num_threads, 1);
    cl::NDRange local(queue_num_threads, 1);

    cl_int status = queue.enqueueNDRangeKernel(sum_test_kernel,
                                               nullRange, global, local);
}
开发者ID:sharmaashish,项目名称:emory-cci-fast-ia-gsoc,代码行数:56,代码来源:parallelQueueTest.cpp

示例12: findMinSeamVert

    void findMinSeamVert(cl::Context &ctx,
                         cl::CommandQueue &cmdQueue,
                         cl::Event &event,
                         std::vector<cl::Event> &deps,
                         cl::Buffer &energyMatrix,
                         cl::Buffer &vertMinEnergy,
                         cl::Buffer &vertMinIdx,
                         int width,
                         int height,
                         int pitch,
                         int colsRemoved) {

        cl_int errNum;
        errNum = findMinSeamVertKernel.setArg(0, energyMatrix);
        errNum |= findMinSeamVertKernel.setArg(1, vertMinEnergy);
        errNum |= findMinSeamVertKernel.setArg(2, vertMinIdx);
        errNum |= findMinSeamVertKernel.setArg(3, cl::__local(256 * sizeof(float)));
        errNum |= findMinSeamVertKernel.setArg(4, cl::__local(256 * sizeof(float)));
        errNum |= findMinSeamVertKernel.setArg(5, width);
        errNum |= findMinSeamVertKernel.setArg(6, height);
        errNum |= findMinSeamVertKernel.setArg(7, pitch);
        errNum |= findMinSeamVertKernel.setArg(8, colsRemoved);

        if (errNum != CL_SUCCESS) {
            std::cerr << "Error setting findMinSeamVert arguments." << std::endl;
            exit(-1);
        }

        // This kernel could be written to use more than one work group, but its probably not worth it.

        cl::NDRange offset = cl::NDRange(0);
        cl::NDRange localWorkSize = cl::NDRange(256);
        cl::NDRange globalWorkSize = cl::NDRange(256);

        errNum = cmdQueue.enqueueNDRangeKernel(findMinSeamVertKernel,
                                               offset,
                                               globalWorkSize,
                                               localWorkSize,
                                               &deps,
                                               &event);
        if (errNum != CL_SUCCESS) {
            std::cerr << "Error enqueuing computeSeams kernel for execution." << std::endl;
            exit(-1);
        }

        /** DEBUG **/
        // int deviceResultIdx[1];
        // float deviceResultEnergy[1];

        // mem::read(ctx, cmdQueue, deviceResultIdx, vertMinIdx);
        // mem::read(ctx, cmdQueue, deviceResultEnergy, vertMinEnergy);

        // std::cout << "deviceResultIdx = " << deviceResultIdx[0] << std::endl;
        // std::cout << "deviceResultEnergy = " << deviceResultEnergy[0] << std::endl;
    }
开发者ID:amidvidy,项目名称:seamcarve-opencl,代码行数:55,代码来源:kernel.hpp

示例13: enqueue

void Reduce::enqueue(
    const cl::CommandQueue &commandQueue,
    const cl::Buffer &inBuffer,
    const cl::Buffer &outBuffer,
    ::size_t first,
    ::size_t elements,
    ::size_t outPosition,
    const VECTOR_CLASS<cl::Event> *events,
    cl::Event *event)
{
    
    /* Validate parameters */
    if (first + elements < first)
    {
        // Only happens if first + elements overflows. size_t is unsigned so behaviour
        // is well-defined.
        throw cl::Error(CL_INVALID_VALUE, "clogs::Reduce::enqueue: range out of input buffer bounds");
    }
    if (inBuffer.getInfo<CL_MEM_SIZE>() / elementSize < first + elements)
        throw cl::Error(CL_INVALID_VALUE, "clogs::Reduce::enqueue: range out of input buffer bounds");
    if (outBuffer.getInfo<CL_MEM_SIZE>() / elementSize <= outPosition)
        throw cl::Error(CL_INVALID_VALUE, "clogs::Reduce::enqueue: output position out of buffer bounds");
    if (!(inBuffer.getInfo<CL_MEM_FLAGS>() & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY)))
    {
        throw cl::Error(CL_INVALID_VALUE, "clogs::Reduce::enqueue: input buffer is not readable");
    }
    if (!(outBuffer.getInfo<CL_MEM_FLAGS>() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)))
    {
        throw cl::Error(CL_INVALID_VALUE, "clogs::Reduce::enqueue: output buffer is not writable");
    }
    if (elements == 0)
        throw cl::Error(CL_INVALID_GLOBAL_WORK_SIZE, "clogs::Reduce::enqueue: elements is zero");

    const ::size_t blockSize = roundUp(elements, reduceWorkGroupSize * reduceBlocks) / reduceBlocks;
    
    reduceKernel.setArg(1, outBuffer);
    reduceKernel.setArg(2, (cl_uint) outPosition);
    reduceKernel.setArg(3, inBuffer);
    reduceKernel.setArg(4, (cl_uint) first);
    reduceKernel.setArg(5, (cl_uint) elements);
    reduceKernel.setArg(7, (cl_uint) blockSize);

    cl::Event reduceEvent;
    commandQueue.enqueueNDRangeKernel(
        reduceKernel,
        cl::NullRange,
        cl::NDRange(reduceWorkGroupSize * reduceBlocks),
        cl::NDRange(reduceWorkGroupSize),
        events, &reduceEvent);
    doEventCallback(reduceEvent);

    if (event != NULL)
        *event = reduceEvent;
}
开发者ID:ResearchDaniel,项目名称:Correlated-Photon-Mapping-for-Interactive-Global-Illumination-of-Time-Varying-Volumetric-Data,代码行数:54,代码来源:reduce.cpp

示例14: zeroBuffer

void zeroBuffer(::cl::Context &context, ::cl::CommandQueue &commandQueue, ::cl::Buffer &buffer, size_t size, std::vector<::cl::Event> *events, ::cl::Event &event)
{
    cl_int status;
    ::cl::Kernel kernel=getKernel(context, "zeroMemory", "utils.cl", utils_cl);

    status=kernel.setArg(0, buffer);

    ::cl::NDRange globalThreads(size);

    status=commandQueue.enqueueNDRangeKernel(kernel, ::cl::NullRange, globalThreads, ::cl::NullRange, events, &event);
}
开发者ID:InfiniteInteractive,项目名称:LimitlessSDK,代码行数:11,代码来源:utils_cl.cpp

示例15: backtrack

    void backtrack(cl::Context &ctx,
                   cl::CommandQueue &cmdQueue,
                   cl::Event &event,
                   std::vector<cl::Event> &deps,
                   cl::Buffer &energyMatrix,
                   cl::Buffer &vertSeamPath,
                   cl::Buffer &vertMinIdx,
                   int width,
                   int height,
                   int pitch,
                   int colsRemoved) {

        cl_int errNum;

        // Set kernel arguments
        errNum = backtrackKernel.setArg(0, energyMatrix);
        errNum |= backtrackKernel.setArg(1, vertSeamPath);
        errNum |= backtrackKernel.setArg(2, vertMinIdx);
        errNum |= backtrackKernel.setArg(3, width);
        errNum |= backtrackKernel.setArg(4, height);
        errNum |= backtrackKernel.setArg(5, pitch);
        errNum |= backtrackKernel.setArg(6, colsRemoved);

        if (errNum != CL_SUCCESS) {
            std::cerr << "Error setting backtrack kernel arguments." << std::endl;
            exit(-1);
        }

        cl::NDRange offset = cl::NDRange(0);
        cl::NDRange localWorkSize = cl::NDRange(1);
        cl::NDRange globalWorkSize = cl::NDRange(256);

        errNum = cmdQueue.enqueueNDRangeKernel(backtrackKernel,
                                               offset,
                                               globalWorkSize,
                                               localWorkSize,
                                               &deps,
                                               &event);


        if (errNum != CL_SUCCESS) {
            std::cerr << "Error enqueueing backTrack kernel for execution." << std::endl;
            exit(-1);
        }

        // /** DEBUGGING **/
        // int deviceResult[height];

        // mem::read(ctx, cmdQueue, deviceResult, vertSeamPath, height);
        // for (int i = height - 5; i < height; ++i) {
        //     std::cout << "deviceResult[" << i << "]=\t" << deviceResult[i] << std::endl;
        // }

    }
开发者ID:amidvidy,项目名称:seamcarve-opencl,代码行数:54,代码来源:kernel.hpp


注:本文中的cl::CommandQueue::enqueueNDRangeKernel方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。