本文整理汇总了C++中clWaitForEvents函数的典型用法代码示例。如果您正苦于以下问题:C++ clWaitForEvents函数的具体用法?C++ clWaitForEvents怎么用?C++ clWaitForEvents使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了clWaitForEvents函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: oclLaunchKernel
double
oclLaunchKernel(cl_kernel k, cl_command_queue q, int nbobj, int nbthread, const char *fname, const int line)
{
cl_int err = 0;
dim3 gws, lws;
cl_event event;
double elapsk;
int maxThreads = 0;
cl_uint one = 1;
cl_device_id dId = oclGetDeviceOfCQueue(q);
size_t prefsz = 32;
maxThreads = oclGetMaxWorkSize(k, dId);
maxThreads = MIN(maxThreads, nbthread);
// Get the proper size for the hardware
err = clGetKernelWorkGroupInfo(k, dId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(prefsz), &prefsz, NULL);
oclCheckErr(err, "clGetKernelWorkGroupInfo CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE");
// make sure we have the proper multiple: AMD 7970 crashes is not met.
maxThreads = oclMultiple(maxThreads, prefsz);
// printf("1D %d \n", maxThreads);
oclMkNDrange(nbobj, maxThreads, NDR_1D, gws, lws);
// printf("Launch: %ld G:%ld %ld %ld L:%ld %ld %ld\n", nbobj, gws[0], gws[1], gws[2], lws[0], lws[1], lws[2]);
err = clEnqueueNDRangeKernel(q, k, NDR_1D, NULL, gws, lws, 0, NULL, &event);
oclCheckErrF(err, "clEnqueueNDRangeKernel", fname, line);
err = clWaitForEvents(one, &event);
oclCheckErrF(err, "clWaitForEvents", fname, line);
elapsk = oclChronoElaps(event);
err = clReleaseEvent(event);
oclCheckErrF(err, "clReleaseEvent", fname, line);
return elapsk;
}
示例2: RunRoutine
// Describes how to run the CLBlast routine
static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
#ifdef OPENCL_API
auto queue_plain = queue();
auto event = cl_event{};
auto status = Hbmv(args.layout, args.triangle,
args.n, args.kl, args.alpha,
buffers.a_mat(), args.a_offset, args.a_ld,
buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
buffers.y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
#elif CUDA_API
auto status = Hbmv(args.layout, args.triangle,
args.n, args.kl, args.alpha,
buffers.a_mat(), args.a_offset, args.a_ld,
buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
buffers.y_vec(), args.y_offset, args.y_inc,
queue.GetContext()(), queue.GetDevice()());
cuStreamSynchronize(queue());
#endif
return status;
}
示例3: clWaitForEvents
PerformanceAnalyser::TimelineEntry PerformanceAnalyser::analyzeEvent(cl_event &event) {
// Wait for event information to be ready
clWaitForEvents(1, &event);
TimelineEntry entry;
cl_ulong time;
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &time, NULL);
entry.start_time = (double) time / 1000000000.0;
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &time, NULL);
entry.end_time = (double) time / 1000000000.0;
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &time, NULL);
double exec_start = ((double) time / 1000000000.0);
entry.execution_time = entry.end_time - exec_start;
entry.api_overhead = exec_start - entry.start_time;
entry.total_time = entry.end_time - entry.start_time;
entry.cpu_time = (getTime()-m_time)-entry.total_time;
return entry;
}
示例4: read_value
void read_value(){
int err;
cl_event readevent;
err = clEnqueueReadBuffer(commands, d_output, CL_TRUE, 0,
REC_N * sizeof(cl_int),
h_output, 0, NULL, &readevent);
if (err != CL_SUCCESS)
{
printf("Error: Failed to read output array! %d\n", err);
printf("Test failed\n");
exit(1);
}
clWaitForEvents(1, &readevent);
printf("\n[host] outputs:\n");
for (int i = 0; i < REC_N; ++i) {
printf("%d ", h_output[i]);
}
printf("\n");
}
示例5: mat_mul_cl_clblas
void mat_mul_cl_clblas(const F *A, const F *B, F *C, size_t n, Cache *cache) {
cl_event event;
size_t mat_sizeof;
mat_sizeof = n * n * sizeof(F);
clEnqueueWriteBuffer(cache->common.command_queue, cache->buf_a, CL_TRUE, 0, mat_sizeof, (F*)A, 0, NULL, NULL);
clEnqueueWriteBuffer(cache->common.command_queue, cache->buf_b, CL_TRUE, 0, mat_sizeof, (F*)B, 0, NULL, NULL);
clblasSgemm(
clblasRowMajor,
clblasNoTrans,
clblasNoTrans,
n,
n,
n,
1.0,
cache->buf_a,
0,
n,
cache->buf_b,
0,
n,
0.0,
cache->buf_c,
0,
n,
1,
&(cache->common.command_queue),
0,
NULL,
&event
);
clWaitForEvents(1, &event);
clEnqueueReadBuffer(cache->common.command_queue, cache->buf_c, CL_TRUE, 0, mat_sizeof, C, 0, NULL, NULL);
}
示例6: mwWaitReleaseEvent
/* Wait for an event then release it */
cl_int mwWaitReleaseEvent(cl_event* ev)
{
cl_int err;
assert(ev);
err = clWaitForEvents(1, ev);
if (err != CL_SUCCESS)
{
mwPerrorCL(err, "Failed to wait for event");
return err;
}
err = clReleaseEvent(*ev);
if (err != CL_SUCCESS)
{
mwPerrorCL(err, "Failed to release event");
return err;
}
return CL_SUCCESS;
}
示例7: context
/*!
Copies the contents of this buffer, starting at \a offset to
\a rect within \a dest. Returns true if the copy was successful;
false otherwise.
This function will block until the request finishes.
The request is executed on the active command queue for context().
\sa copyToAsync()
*/
bool QCLBuffer::copyTo
(size_t offset, const QCLImage2D &dest, const QRect &rect)
{
const size_t dst_origin[3] = {static_cast<size_t>(rect.x()),
static_cast<size_t>(rect.y()), 0
};
const size_t region[3] = {static_cast<size_t>(rect.width()),
static_cast<size_t>(rect.height()), 1
};
cl_event event;
cl_int error = clEnqueueCopyBufferToImage
(context()->activeQueue(), memoryId(), dest.memoryId(),
offset, dst_origin, region, 0, 0, &event);
context()->reportError("QCLBuffer::copyTo(QCLImage2D):", error);
if (error == CL_SUCCESS) {
clWaitForEvents(1, &event);
clReleaseEvent(event);
return true;
} else {
return false;
}
}
示例8: testScanImpl
void testScanImpl(int rLen)
{
int _CPU_GPU=0;
cl_event eventList[2];
int index=0;
cl_kernel Kernel;
int CPU_GPU;
double burden;
int result=0;
int memSize=sizeof(int)*rLen;
int outSize=sizeof(int)*rLen;
void *Rin;
HOST_MALLOC(Rin, memSize);
generateRandInt((int*)Rin, rLen,rLen,0);
void *Rout;
HOST_MALLOC(Rout, outSize);
cl_mem d_Rin;
CL_MALLOC(&d_Rin, memSize);
cl_mem d_Rout;
CL_MALLOC(&d_Rout, outSize);
cl_writebuffer(d_Rin, Rin, memSize,&index,eventList,&CPU_GPU,&burden,_CPU_GPU);
ScanPara *SP;
SP=(ScanPara*)malloc(sizeof(ScanPara));
initScan(rLen,SP);
scanImpl(d_Rin,rLen,d_Rout,&index,eventList,&Kernel,&CPU_GPU,&burden,SP,_CPU_GPU);
cl_readbuffer(Rout, d_Rout, outSize,&index,eventList,&CPU_GPU,&burden,_CPU_GPU);
clWaitForEvents(1,&eventList[(index-1)%2]);
closeScan(SP);
deschedule(CPU_GPU,burden);
//validateScan( (int*)Rin, rLen, (int*)Rout );
HOST_FREE(Rin);
HOST_FREE(Rout);
CL_FREE(d_Rin);
CL_FREE(d_Rout);
clReleaseKernel(Kernel);
clReleaseEvent(eventList[0]);
clReleaseEvent(eventList[1]);
}
示例9: acc_event_synchronize
int acc_event_synchronize (void* event){
// debug info
if (verbose_print){
fprintf(stdout, "\n ... EVENT SYNCHRONIZATION ... \n");
fprintf(stdout, " ---> Entering: acc_event_synchronize.\n");
}
// local event and queue pointers
cl_event *clevent = (cl_event *) event;
// wait for an event ( !!! need to share the same ctx !!! )
cl_error = clWaitForEvents((cl_uint) 1, clevent);
if (acc_opencl_error_check(cl_error, __LINE__))
return -1;
// debug info
if (verbose_print){
fprintf(stdout, " ---> Leaving: acc_event_synchronize.\n");
}
// assign return value
return 0;
}
示例10: pclu_call_kernel
void
pclu_call_kernel(pclu_program* pgm, const char* name, pclu_range range, size_t argc, ...)
{
cl_int errcode;
cl_kernel kern = clCreateKernel(pgm->program, name, &errcode);
pclu_check_call("clCreateKernel", errcode);
va_list ap;
va_start(ap, argc);
for (cl_uint ii = 0; ii < argc; ++ii) {
size_t size = va_arg(ap, size_t);
void* arg = va_arg(ap, void*);
pclu_check_call("clSetKernelArg", clSetKernelArg(kern, ii, size, arg));
}
va_end(ap);
#define NO_CL_EVENTS 1
#ifdef NO_CL_EVENTS
cl_event kernel_done = 0;
#else
cl_event kernel_done = clCreateUserEvent(pgm->pclu->context, &errcode);
pclu_check_call("clCreateUserEvent", errcode);
#endif
errcode = clEnqueueNDRangeKernel(pgm->pclu->queue, kern, range.nd, 0,
range.global, 0, 0, 0, &kernel_done);
pclu_check_call("clEnqueueNDRangeKernel", errcode);
#ifndef NO_CL_EVENTS
pclu_check_call("clWaitForEvents", clWaitForEvents(1, &kernel_done));
#endif
pclu_check_call("clReleaseKernel", clReleaseKernel(kern));
}
示例11: CL_GroupBy
extern "C" int CL_GroupBy(Record * h_Rin, int rLen, Record* h_Rout, int** h_startPos,
int numThread, int numBlock , int _CPU_GPU)
{
cl_mem d_Rin;
cl_mem d_Rout;
cl_mem d_startPos;
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
cl_event eventList[2];
int index=0;
cl_kernel Kernel;
int CPU_GPU;
double burden;
int memSize = sizeof(Record)*rLen;
CL_MALLOC( &d_Rin, memSize );
CL_MALLOC(&d_Rout, memSize );
cl_writebuffer( d_Rin, h_Rin, memSize,&index,eventList,&CPU_GPU,&burden,_CPU_GPU);
int numGroup = 0;
numGroup= groupByImpl(d_Rin, rLen, d_Rout, &d_startPos, numThread, numBlock,&index,eventList,&Kernel,&CPU_GPU,&burden,_CPU_GPU);
(*h_startPos) = (int*)malloc( sizeof(int)*numGroup );
cl_readbuffer( *h_startPos, d_startPos, sizeof(int)*numGroup,&index,eventList,&CPU_GPU,&burden,_CPU_GPU);
cl_readbuffer( h_Rout, d_Rout, sizeof(Record)*rLen,&index,eventList,&CPU_GPU,&burden,_CPU_GPU);
clWaitForEvents(1,&eventList[(index-1)%2]);
deschedule(CPU_GPU,burden);
CL_FREE( d_Rin );
CL_FREE( d_Rout );
CL_FREE( d_startPos );
clReleaseKernel(Kernel);
clReleaseEvent(eventList[0]);
clReleaseEvent(eventList[1]);
printf("CL_GroupBy\n");
return numGroup;
}
示例12: clWaitForEvents
void deathray::SingleFrameExecute() {
cl_uint wait_list_length = 0;
cl_event wait_list[3];
result status;
if (temporal_radius_Y_ == 0 && h_Y_ > 0.f) {
status = g_SingleFrame_Y.CopyTo(srcpY_);
if (status != FILTER_OK) env_->ThrowError("Deathray: Copy Y to device status=%d and OpenCL status=%d", status, g_last_cl_error);
}
if (temporal_radius_UV_ == 0 && h_UV_ > 0.f) {
status = g_SingleFrame_U.CopyTo(srcpU_);
if (status != FILTER_OK) env_->ThrowError("Deathray: Copy U to device status=%d and OpenCL status=%d", status, g_last_cl_error);
status = g_SingleFrame_V.CopyTo(srcpV_);
if (status != FILTER_OK) env_->ThrowError("Deathray: Copy V to device status=%d and OpenCL status=%d", status, g_last_cl_error);
}
if (temporal_radius_Y_ == 0 && h_Y_ > 0.f) {
status = g_SingleFrame_Y.Execute();
if (status != FILTER_OK) env_->ThrowError("Deathray: Execute Y kernel status=%d and OpenCL status=%d", status, g_last_cl_error);
status = g_SingleFrame_Y.CopyFrom(dstpY_, wait_list);
if (status != FILTER_OK) env_->ThrowError("Deathray: Copy Y to host status=%d and OpenCL status=%d", status, g_last_cl_error);
++wait_list_length;
}
if (temporal_radius_UV_ == 0 && h_UV_ > 0.f) {
g_SingleFrame_U.Execute();
if (status != FILTER_OK) env_->ThrowError("Deathray: Execute U kernel status=%d and OpenCL status=%d", status, g_last_cl_error);
g_SingleFrame_U.CopyFrom(dstpU_, wait_list + wait_list_length++);
if (status != FILTER_OK) env_->ThrowError("Deathray: Copy U to host status=%d and OpenCL status=%d", status, g_last_cl_error);
g_SingleFrame_V.Execute();
if (status != FILTER_OK) env_->ThrowError("Deathray: Execute V kernel status=%d and OpenCL status=%d", status, g_last_cl_error);
g_SingleFrame_V.CopyFrom(dstpV_, wait_list + wait_list_length++);
if (status != FILTER_OK) env_->ThrowError("Deathray: Copy V to host status=%d and OpenCL status=%d", status, g_last_cl_error);
}
clWaitForEvents(wait_list_length, wait_list);
}
示例13: copyhostptr_roundtrip_func
void copyhostptr_roundtrip_func()
{
timer.Start(timer_id);
//set up buffer
cl_int err;
buffer_.buf_a_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
(buffer_.lda_ * buffer_.a_num_vectors_ +
buffer_.offA_) * sizeof(T),
buffer_.a_, &err);
buffer_.buf_b_ = clCreateBuffer(ctx_, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
(buffer_.ldb_ * buffer_.b_num_vectors_ +
buffer_.offB_) * sizeof(T),
buffer_.b_, &err);
//call func
xTrsm_Function(false);
//read gpu buffer
err = clEnqueueReadBuffer(queue_, buffer_.buf_b_, CL_TRUE,
buffer_.offB_ * sizeof(T), buffer_.ldb_ * buffer_.b_num_vectors_ *
sizeof(T),
buffer_.b_, 0, NULL, &event_);
clWaitForEvents(1, &event_);
timer.Stop(timer_id);
}
示例14: write_to_buffer
void write_to_buffer(eObj* e, cObj cCandidate) {
Tempest::data.lNumPSMs += 1;
if (e->iNumBufferedCandidates == 0) {
clWaitForEvents(1, &(e->clEventSent));
if (Tempest::config.profile) {
cl_ulong start;
cl_ulong end;
int err;
err = clGetEventProfilingInfo(e->clEventSent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
err |= clGetEventProfilingInfo(e->clEventSent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL);
if (err == 0)
e->device->totalSendTime += (end-start);
}
clReleaseEvent(e->clEventSent);
}
e->candidateBuffer[e->iNumBufferedCandidates] = cCandidate;
//memcpy(e->candidateBuffer+e->iNumBufferedCandidates, &cCandidate, sizeof(cObj));
e->iNumCandidates++;
e->iNumBufferedCandidates++;
if (e->iNumBufferedCandidates == e->candidateBufferSize) {
//printf("%d\t%d\n", gpu_info.iNumScoringKernels, iBin);
e->device->scoreCandidates(e);
}
}
示例15: Dsyrk_internal
cl_int Dsyrk_internal(
cl_env *env, double *a, double *c, double alpha, double beta,
clblasTranspose transA, clblasUplo uplo, int ar, int ac, int n, int size_a, int size_c)
{
CHECK(clblasSetup());
cl_event events[NEVENTS];
int nevent = 0;
cl_mem mem_a = create_mem(env, a, size_a, CL_MEM_READ_ONLY, &(events[nevent++]));
cl_mem mem_c;
if (beta != 0) mem_c = create_mem(env, c, size_c, CL_MEM_READ_WRITE, &(events[nevent++]));
else mem_c = create_mem(env, NULL, size_c, CL_MEM_READ_WRITE, NULL);
int k = transA == clblasNoTrans ? ar : ac;
cl_int err = clblasDsyrk(clblasColumnMajor, uplo, transA,
n, k, alpha, mem_a, 0, ac, beta, mem_c, 0, n,
1, &(env->queues[0]), nevent, events, &(events[nevent]));
CHECK(err);
events[nevent+1] = *read_mem(env, mem_c, c, size_c, 1, &(events[nevent]));
CHECK(clWaitForEvents(1, &(events[nevent+1])));
CHECK(clReleaseMemObject(mem_a));
CHECK(clReleaseMemObject(mem_c));
clblasTeardown();
return CL_SUCCESS;
}