C++ clWaitForEvents函数代码示例

本文整理汇总了C++中clWaitForEvents函数的典型用法代码示例。如果您正苦于以下问题：C++ clWaitForEvents函数的具体用法？C++ clWaitForEvents怎么用？C++ clWaitForEvents使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了clWaitForEvents函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: oclLaunchKernel

double
oclLaunchKernel(cl_kernel k, cl_command_queue q, int nbobj, int nbthread, const char *fname, const int line)
{
    cl_int err = 0;
    dim3 gws, lws;
    cl_event event;
    double elapsk;
    int maxThreads = 0;
    cl_uint one = 1;
    cl_device_id dId = oclGetDeviceOfCQueue(q);
    size_t prefsz = 32;

    maxThreads = oclGetMaxWorkSize(k, dId);
    maxThreads = MIN(maxThreads, nbthread);

    // Get the proper size for the hardware
    err = clGetKernelWorkGroupInfo(k, dId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(prefsz), &prefsz, NULL);
    oclCheckErr(err, "clGetKernelWorkGroupInfo CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE");

    // make sure we have the proper multiple: AMD 7970 crashes is not met.
    maxThreads = oclMultiple(maxThreads, prefsz);
    // printf("1D %d \n", maxThreads);

    oclMkNDrange(nbobj, maxThreads, NDR_1D, gws, lws);
    // printf("Launch: %ld G:%ld %ld %ld L:%ld %ld %ld\n", nbobj, gws[0], gws[1], gws[2], lws[0], lws[1], lws[2]);

    err = clEnqueueNDRangeKernel(q, k, NDR_1D, NULL, gws, lws, 0, NULL, &event);
    oclCheckErrF(err, "clEnqueueNDRangeKernel", fname, line);

    err = clWaitForEvents(one, &event);
    oclCheckErrF(err, "clWaitForEvents", fname, line);

    elapsk = oclChronoElaps(event);

    err = clReleaseEvent(event);
    oclCheckErrF(err, "clReleaseEvent", fname, line);

    return elapsk;
}

开发者ID:kghoracle，项目名称:Hydro，代码行数:39，代码来源:ocltools.c

示例2: RunRoutine

 // Describes how to run the CLBlast routine
 static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
   #ifdef OPENCL_API
     auto queue_plain = queue();
     auto event = cl_event{};
     auto status = Hbmv(args.layout, args.triangle,
                        args.n, args.kl, args.alpha,
                        buffers.a_mat(), args.a_offset, args.a_ld,
                        buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
                        buffers.y_vec(), args.y_offset, args.y_inc,
                        &queue_plain, &event);
     if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
   #elif CUDA_API
     auto status = Hbmv(args.layout, args.triangle,
                        args.n, args.kl, args.alpha,
                        buffers.a_mat(), args.a_offset, args.a_ld,
                        buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
                        buffers.y_vec(), args.y_offset, args.y_inc,
                        queue.GetContext()(), queue.GetDevice()());
     cuStreamSynchronize(queue());
   #endif
   return status;
 }

开发者ID:gpu，项目名称:CLBlast，代码行数:23，代码来源:xhbmv.hpp

示例3: clWaitForEvents

PerformanceAnalyser::TimelineEntry PerformanceAnalyser::analyzeEvent(cl_event &event) {
    // Wait for event information to be ready
    clWaitForEvents(1, &event);
    TimelineEntry entry;

    cl_ulong time;
    clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &time, NULL);
    entry.start_time = (double) time / 1000000000.0;

    clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &time, NULL);
    entry.end_time = (double) time / 1000000000.0;

    clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &time, NULL);
    double exec_start = ((double) time / 1000000000.0);
    entry.execution_time = entry.end_time - exec_start;
    entry.api_overhead = exec_start - entry.start_time;
    entry.total_time = entry.end_time - entry.start_time;

    entry.cpu_time = (getTime()-m_time)-entry.total_time;

    return entry;
}

开发者ID:babrodtk，项目名称:ocls-core，代码行数:22，代码来源:PerformanceAnalyser.cpp

示例4: read_value

void read_value(){

    int err;
    cl_event readevent;
    err = clEnqueueReadBuffer(commands, d_output, CL_TRUE, 0,
                              REC_N * sizeof(cl_int),
                              h_output, 0, NULL, &readevent);
    if (err != CL_SUCCESS)
    {
        printf("Error: Failed to read output array! %d\n", err);
        printf("Test failed\n");
        exit(1);
    }

    clWaitForEvents(1, &readevent);

    printf("\n[host] outputs:\n");
    for (int i = 0; i < REC_N; ++i) {
        printf("%d ", h_output[i]);
    }
    printf("\n");
}

开发者ID:ericfukuda，项目名称:sdaccel_samples，代码行数:22，代码来源:host.cpp

示例5: mat_mul_cl_clblas

void mat_mul_cl_clblas(const F *A, const F *B, F *C, size_t n, Cache *cache) {
    cl_event event;
    size_t mat_sizeof;

    mat_sizeof = n * n * sizeof(F);
    clEnqueueWriteBuffer(cache->common.command_queue, cache->buf_a, CL_TRUE, 0, mat_sizeof, (F*)A, 0, NULL, NULL);
    clEnqueueWriteBuffer(cache->common.command_queue, cache->buf_b, CL_TRUE, 0, mat_sizeof, (F*)B, 0, NULL, NULL);
    clblasSgemm(
        clblasRowMajor,
        clblasNoTrans,
        clblasNoTrans,

        n,
        n,
        n,
        1.0,

        cache->buf_a,
        0,
        n,

        cache->buf_b,
        0,
        n,

        0.0,
        cache->buf_c,
        0,
        n,

        1,
        &(cache->common.command_queue),
        0,
        NULL,
        &event
    );
    clWaitForEvents(1, &event);
    clEnqueueReadBuffer(cache->common.command_queue, cache->buf_c, CL_TRUE, 0, mat_sizeof, C, 0, NULL, NULL);
}

开发者ID:cirosantilli，项目名称:cpp-cheat，代码行数:39，代码来源:matmul.c

示例6: mwWaitReleaseEvent

/* Wait for an event then release it */
cl_int mwWaitReleaseEvent(cl_event* ev)
{
    cl_int err;

    assert(ev);

    err = clWaitForEvents(1, ev);
    if (err != CL_SUCCESS)
    {
        mwPerrorCL(err, "Failed to wait for event");
        return err;
    }

    err = clReleaseEvent(*ev);
    if (err != CL_SUCCESS)
    {
        mwPerrorCL(err, "Failed to release event");
        return err;
    }

    return CL_SUCCESS;
}

开发者ID:LocutusOfBorg，项目名称:milkywayathome_client，代码行数:23，代码来源:milkyway_cl_util.c

示例7: context

/*!
    Copies the contents of this buffer, starting at \a offset to
    \a rect within \a dest.  Returns true if the copy was successful;
    false otherwise.

    This function will block until the request finishes.
    The request is executed on the active command queue for context().

    \sa copyToAsync()
*/
bool QCLBuffer::copyTo
(size_t offset, const QCLImage2D &dest, const QRect &rect)
{
    const size_t dst_origin[3] = {static_cast<size_t>(rect.x()),
                                  static_cast<size_t>(rect.y()), 0
                                 };
    const size_t region[3] = {static_cast<size_t>(rect.width()),
                              static_cast<size_t>(rect.height()), 1
                             };
    cl_event event;
    cl_int error = clEnqueueCopyBufferToImage
                   (context()->activeQueue(), memoryId(), dest.memoryId(),
                    offset, dst_origin, region, 0, 0, &event);
    context()->reportError("QCLBuffer::copyTo(QCLImage2D):", error);
    if (error == CL_SUCCESS) {
        clWaitForEvents(1, &event);
        clReleaseEvent(event);
        return true;
    } else {
        return false;
    }
}

开发者ID:radrad350，项目名称:QtOpenCL，代码行数:32，代码来源:qclbuffer.cpp

示例8: testScanImpl

void testScanImpl(int rLen)
{
	int _CPU_GPU=0;
	cl_event eventList[2];
	int index=0;
	cl_kernel Kernel; 
	int CPU_GPU;
	double burden;	
	int result=0;
	int memSize=sizeof(int)*rLen;
	int outSize=sizeof(int)*rLen;
	void *Rin;
	HOST_MALLOC(Rin, memSize);
	generateRandInt((int*)Rin, rLen,rLen,0);
	void *Rout;
	HOST_MALLOC(Rout, outSize);
	cl_mem d_Rin;
	CL_MALLOC(&d_Rin, memSize);
	cl_mem d_Rout;
	CL_MALLOC(&d_Rout, outSize);
	cl_writebuffer(d_Rin, Rin, memSize,&index,eventList,&CPU_GPU,&burden,_CPU_GPU);
	ScanPara *SP;
	SP=(ScanPara*)malloc(sizeof(ScanPara));
	initScan(rLen,SP);
	scanImpl(d_Rin,rLen,d_Rout,&index,eventList,&Kernel,&CPU_GPU,&burden,SP,_CPU_GPU);	
	cl_readbuffer(Rout, d_Rout, outSize,&index,eventList,&CPU_GPU,&burden,_CPU_GPU);
	clWaitForEvents(1,&eventList[(index-1)%2]);
	closeScan(SP);
	deschedule(CPU_GPU,burden);
	//validateScan( (int*)Rin, rLen, (int*)Rout );
	HOST_FREE(Rin);
	HOST_FREE(Rout);
	CL_FREE(d_Rin);
	CL_FREE(d_Rout);
	clReleaseKernel(Kernel);  
	clReleaseEvent(eventList[0]);
	clReleaseEvent(eventList[1]);
}

开发者ID:johnspaul92，项目名称:omnidb-paralleldbonapu，代码行数:38，代码来源:testScan.cpp

示例9: acc_event_synchronize

int acc_event_synchronize (void* event){
  // debug info
  if (verbose_print){
    fprintf(stdout, "\n ... EVENT SYNCHRONIZATION ... \n");
    fprintf(stdout, " ---> Entering: acc_event_synchronize.\n");
  }

  // local event and queue pointers
  cl_event *clevent = (cl_event *) event;

  // wait for an event ( !!! need to share the same ctx !!! )
  cl_error = clWaitForEvents((cl_uint) 1, clevent);
  if (acc_opencl_error_check(cl_error, __LINE__))
    return -1;

  // debug info
  if (verbose_print){
    fprintf(stdout, " ---> Leaving: acc_event_synchronize.\n");
  }

  // assign return value
  return 0;
}

开发者ID:rzk1，项目名称:cp2k-mcgill，代码行数:23，代码来源:acc_opencl_event.c

示例10: pclu_call_kernel

void
pclu_call_kernel(pclu_program* pgm, const char* name, pclu_range range, size_t argc, ...)
{
    cl_int errcode;
    cl_kernel kern = clCreateKernel(pgm->program, name, &errcode);
    pclu_check_call("clCreateKernel", errcode);

    va_list ap;
    va_start(ap, argc);

    for (cl_uint ii = 0; ii < argc; ++ii) {
        size_t size = va_arg(ap, size_t);	
        void*  arg  = va_arg(ap, void*);
        pclu_check_call("clSetKernelArg", clSetKernelArg(kern, ii, size, arg));
    }

    va_end(ap);

#define NO_CL_EVENTS 1

#ifdef NO_CL_EVENTS
    cl_event kernel_done = 0;
#else
    cl_event kernel_done = clCreateUserEvent(pgm->pclu->context, &errcode);
    pclu_check_call("clCreateUserEvent", errcode);
#endif

    errcode = clEnqueueNDRangeKernel(pgm->pclu->queue, kern, range.nd, 0, 
				     range.global, 0, 0, 0, &kernel_done);
    pclu_check_call("clEnqueueNDRangeKernel", errcode);

#ifndef NO_CL_EVENTS
    pclu_check_call("clWaitForEvents", clWaitForEvents(1, &kernel_done));
#endif

    pclu_check_call("clReleaseKernel", clReleaseKernel(kern));
}

开发者ID:NatTuck，项目名称:pocl-0.7x，代码行数:37，代码来源:pclu.c

示例11: CL_GroupBy

extern "C" int CL_GroupBy(Record * h_Rin, int rLen, Record* h_Rout, int** h_startPos,
                          int numThread, int numBlock , int _CPU_GPU)
{
    cl_mem d_Rin;
    cl_mem d_Rout;
    cl_mem d_startPos;
    /////////////////////////////////////////////////////////////////////////////////////////////////////////////
    cl_event eventList[2];
    int index=0;
    cl_kernel Kernel;
    int CPU_GPU;
    double burden;
    int memSize = sizeof(Record)*rLen;


    CL_MALLOC( &d_Rin, memSize );
    CL_MALLOC(&d_Rout, memSize );
    cl_writebuffer( d_Rin, h_Rin, memSize,&index,eventList,&CPU_GPU,&burden,_CPU_GPU);
    int numGroup = 0;


    numGroup= groupByImpl(d_Rin, rLen, d_Rout, &d_startPos, numThread, numBlock,&index,eventList,&Kernel,&CPU_GPU,&burden,_CPU_GPU);
    (*h_startPos) = (int*)malloc( sizeof(int)*numGroup );

    cl_readbuffer( *h_startPos, d_startPos, sizeof(int)*numGroup,&index,eventList,&CPU_GPU,&burden,_CPU_GPU);
    cl_readbuffer( h_Rout, d_Rout, sizeof(Record)*rLen,&index,eventList,&CPU_GPU,&burden,_CPU_GPU);
    clWaitForEvents(1,&eventList[(index-1)%2]);
    deschedule(CPU_GPU,burden);
    CL_FREE( d_Rin );
    CL_FREE( d_Rout );
    CL_FREE( d_startPos );
    clReleaseKernel(Kernel);
    clReleaseEvent(eventList[0]);
    clReleaseEvent(eventList[1]);
    printf("CL_GroupBy\n");
    return numGroup;
}

开发者ID:johnspaul92，项目名称:omnidb-paralleldbonapu，代码行数:37，代码来源:GroupBy.cpp

示例12: clWaitForEvents

void deathray::SingleFrameExecute() {	
	cl_uint wait_list_length = 0;
	cl_event wait_list[3];
	result status;

	if (temporal_radius_Y_ == 0 && h_Y_ > 0.f) {
		status = g_SingleFrame_Y.CopyTo(srcpY_);
		if (status != FILTER_OK) env_->ThrowError("Deathray: Copy Y to device status=%d and OpenCL status=%d", status, g_last_cl_error);
	}
	if (temporal_radius_UV_ == 0 && h_UV_ > 0.f) {
		status = g_SingleFrame_U.CopyTo(srcpU_);
		if (status != FILTER_OK) env_->ThrowError("Deathray: Copy U to device status=%d and OpenCL status=%d", status, g_last_cl_error);
		status = g_SingleFrame_V.CopyTo(srcpV_);
		if (status != FILTER_OK) env_->ThrowError("Deathray: Copy V to device status=%d and OpenCL status=%d", status, g_last_cl_error);
	}

	if (temporal_radius_Y_ == 0 && h_Y_ > 0.f) {
		status = g_SingleFrame_Y.Execute();
		if (status != FILTER_OK) env_->ThrowError("Deathray: Execute Y kernel status=%d and OpenCL status=%d", status, g_last_cl_error);
		status = g_SingleFrame_Y.CopyFrom(dstpY_, wait_list);
		if (status != FILTER_OK) env_->ThrowError("Deathray: Copy Y to host status=%d and OpenCL status=%d", status, g_last_cl_error);
		++wait_list_length;
	}

	if (temporal_radius_UV_ == 0 && h_UV_ > 0.f) {
		g_SingleFrame_U.Execute();
		if (status != FILTER_OK) env_->ThrowError("Deathray: Execute U kernel status=%d and OpenCL status=%d", status, g_last_cl_error);
		g_SingleFrame_U.CopyFrom(dstpU_, wait_list + wait_list_length++);
		if (status != FILTER_OK) env_->ThrowError("Deathray: Copy U to host status=%d and OpenCL status=%d", status, g_last_cl_error);
		g_SingleFrame_V.Execute();
		if (status != FILTER_OK) env_->ThrowError("Deathray: Execute V kernel status=%d and OpenCL status=%d", status, g_last_cl_error);
		g_SingleFrame_V.CopyFrom(dstpV_, wait_list + wait_list_length++);
		if (status != FILTER_OK) env_->ThrowError("Deathray: Copy V to host status=%d and OpenCL status=%d", status, g_last_cl_error);
	}

	clWaitForEvents(wait_list_length, wait_list);
}

开发者ID:chappjc，项目名称:Deathray，代码行数:37，代码来源:deathray.cpp

示例13: copyhostptr_roundtrip_func

    void copyhostptr_roundtrip_func()
    {
        timer.Start(timer_id);
        //set up buffer
        cl_int err;
        buffer_.buf_a_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                        (buffer_.lda_ * buffer_.a_num_vectors_ +
                                         buffer_.offA_) * sizeof(T),
                                        buffer_.a_, &err);

        buffer_.buf_b_ = clCreateBuffer(ctx_, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                        (buffer_.ldb_ * buffer_.b_num_vectors_ +
                                         buffer_.offB_) * sizeof(T),
                                        buffer_.b_, &err);
        //call func
        xTrsm_Function(false);
        //read gpu buffer
        err = clEnqueueReadBuffer(queue_, buffer_.buf_b_, CL_TRUE,
                                  buffer_.offB_ * sizeof(T), buffer_.ldb_ * buffer_.b_num_vectors_ *
                                  sizeof(T),
                                  buffer_.b_, 0, NULL, &event_);
        clWaitForEvents(1, &event_);
        timer.Stop(timer_id);
    }

开发者ID:nagyist，项目名称:clBLAS，代码行数:24，代码来源:clfunc_xtrsm.hpp

示例14: write_to_buffer

void write_to_buffer(eObj* e, cObj cCandidate) {
    Tempest::data.lNumPSMs += 1;
    if (e->iNumBufferedCandidates == 0) {
        clWaitForEvents(1, &(e->clEventSent));
        if (Tempest::config.profile) {
          cl_ulong start;
          cl_ulong end;
          int err;
          err = clGetEventProfilingInfo(e->clEventSent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
          err |= clGetEventProfilingInfo(e->clEventSent, CL_PROFILING_COMMAND_END,   sizeof(cl_ulong), &end,   NULL);
          if (err == 0)
          e->device->totalSendTime += (end-start);
          }
        clReleaseEvent(e->clEventSent);
    }
    e->candidateBuffer[e->iNumBufferedCandidates] = cCandidate;
    //memcpy(e->candidateBuffer+e->iNumBufferedCandidates, &cCandidate, sizeof(cObj));
    e->iNumCandidates++;
    e->iNumBufferedCandidates++;
    if (e->iNumBufferedCandidates == e->candidateBufferSize) {
        //printf("%d\t%d\n", gpu_info.iNumScoringKernels, iBin);
        e->device->scoreCandidates(e);
    }
}

开发者ID:markadamo，项目名称:tempest，代码行数:24，代码来源:theoretical.cpp

示例15: Dsyrk_internal

cl_int Dsyrk_internal(
  cl_env *env, double *a, double *c, double alpha, double beta,
  clblasTranspose transA, clblasUplo uplo, int ar, int ac, int n, int size_a, int size_c)
{
  CHECK(clblasSetup());
  cl_event events[NEVENTS];
  int nevent = 0;
  cl_mem mem_a = create_mem(env, a, size_a, CL_MEM_READ_ONLY, &(events[nevent++]));
  cl_mem mem_c;
  if (beta != 0) mem_c = create_mem(env, c, size_c, CL_MEM_READ_WRITE, &(events[nevent++]));
  else mem_c = create_mem(env, NULL, size_c, CL_MEM_READ_WRITE, NULL);
  
  int k = transA == clblasNoTrans ? ar : ac;
  cl_int err = clblasDsyrk(clblasColumnMajor, uplo, transA, 
    n, k, alpha, mem_a, 0, ac, beta, mem_c, 0, n,
    1, &(env->queues[0]), nevent, events, &(events[nevent]));
  CHECK(err);
  events[nevent+1] = *read_mem(env, mem_c, c, size_c, 1, &(events[nevent]));
  CHECK(clWaitForEvents(1, &(events[nevent+1])));
  CHECK(clReleaseMemObject(mem_a));
  CHECK(clReleaseMemObject(mem_c));
  clblasTeardown();
  return CL_SUCCESS;
}

开发者ID:yeomii，项目名称:RclBLAS，代码行数:24，代码来源:blas3-wrapper.c

注：本文中的clWaitForEvents函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。