本文整理汇总了C++中cudaEventCreate函数的典型用法代码示例。如果您正苦于以下问题:C++ cudaEventCreate函数的具体用法?C++ cudaEventCreate怎么用?C++ cudaEventCreate使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cudaEventCreate函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: cudaEventCreate
cudaTimer::cudaTimer(){
cudaError_t status;
status = cudaEventCreate(&inicio);
status = cudaEventCreate(&fin);
}
示例2: time_invocation_cuda
double time_invocation_cuda(std::size_t num_trials, Function f, Arg1 arg1, Arg2 arg2, Arg3 arg3)
{
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start);
for(std::size_t i = 0;
i < num_trials;
++i)
{
f(arg1,arg2,arg3);
}
cudaEventRecord(stop);
cudaThreadSynchronize();
float msecs = 0;
cudaEventElapsedTime(&msecs, start, stop);
cudaEventDestroy(start);
cudaEventDestroy(stop);
// return mean msecs
return msecs / num_trials;
}
示例3: check
float bench::ClockBenchmark::_determineCycleTime() {
cudaEvent_t start, end;
check( cudaEventCreate(&start) );
check( cudaEventCreate(&end) );
unsigned long long elapsedCycles;
unsigned long long* deviceElapsedCycles;
long long int* deviceDummyMem;
const dim3 grid(1,1,1), block(1,1,1);
check( cudaMalloc((void**)&deviceElapsedCycles, sizeof(unsigned long long)) );
check( cudaMalloc((void**)&deviceDummyMem, sizeof(long long int)) );
check( cudaEventRecord(start) );
cudaDetermineCycleTimeWrapper(deviceElapsedCycles, deviceDummyMem, grid, block);
check( cudaEventRecord(end) );
check( cudaDeviceSynchronize() );
check( cudaMemcpy(&elapsedCycles, deviceElapsedCycles, sizeof(unsigned long long), cudaMemcpyDeviceToHost) );
float elapsedTime = 0;
check( cudaEventElapsedTime(&elapsedTime, start, end) );
report(util::Indents(2) << "elapsed time: " << elapsedTime << "ms");
report(util::Indents(2) << "elapsed cycles: " << elapsedCycles);
return elapsedTime * 1000000.0 / (float)elapsedCycles;
}
示例4: main
int main()
{
cudaEvent_t start;
cudaEvent_t end;
float duration;
const float overestimateRate = 0.01f;
const float errorRate = 0.01f;
Tokenizer tokenizer( overestimateRate, errorRate );
/************** Test counting string tokens *************/
TextReader reader;
cudaEventCreate( &start );
cudaEventRecord( start, 0 );
reader.Read();
tokenizer.StartTokenizing(
reader.GetCharBuffer(),
reader.GetOffsetBuffer(),
reader.GetCharBufferSize(),
reader.GetOffsetBufferSize() );
cudaEventCreate( &end );
cudaEventRecord( end, 0 );
cudaEventSynchronize( end );
cudaEventElapsedTime( &duration, start, end );
printf( "Time taken: %.3lf milliseconds\n", duration );
tokenizer.GetFrequency( "a" );
}
示例5: trainMethodsSpeedTestGPU
void trainMethodsSpeedTestGPU(fann *ann, fann_train_data* train, unsigned int trainingAlgorithm, unsigned int epochCount)
{
fann *gpunn = fann_copy(ann);
gpunn->training_algorithm = (fann_train_enum)trainingAlgorithm;
{
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
gpuann_fann_parallel_train_on_data(gpunn, train, epochCount);
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
cudaEventDestroy(start);
cudaEventDestroy(stop);
printf("%10.5f ", time);
}
fann_destroy(gpunn);
}
示例6: StartTimer
unsigned int StartTimer () {
cudaEventCreate(&timerStart);
cudaEventCreate(&timerStop);
cudaEventRecord(timerStart,0);
return 0;
}
示例7: runCuda
void runCuda()
{
//////////////////////
// Timing cuda call //
//////////////////////
float time;
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
// Map OpenGL buffer object for writing from CUDA on a single GPU
// No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer
dptr=NULL;
vbo = mesh->getVBO();
vbosize = mesh->getVBOsize();
nbo = mesh->getNBO();
nbosize = mesh->getNBOsize();
#if RGBONLY == 1
float newcbo[] = {0.0, 1.0, 0.0,
0.0, 0.0, 1.0,
1.0, 0.0, 0.0};
cbo = newcbo;
cbosize = 9;
#elif RGBONLY == 0
vec3 defaultColor(0.5f, 0.5f, 0.5f);
mesh->changeColor(defaultColor);
cbo = mesh->getCBO();
cbosize = mesh->getCBOsize();
#endif
ibo = mesh->getIBO();
ibosize = mesh->getIBOsize();
cudaGLMapBufferObject((void**)&dptr, pbo);
updateCamera();
cudaRasterizeCore(cam, dptr, glm::vec2(width, height), frame, vbo, vbosize, cbo, cbosize, ibo, ibosize, nbo, nbosize, lights, lightsize, alpha, beta, displayMode);
cudaGLUnmapBufferObject(pbo);
vbo = NULL;
cbo = NULL;
ibo = NULL;
frame++;
fpstracker++;
//////////////////////
// Timing cuda call //
//////////////////////
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
printf("runCuda runtime: %3.1f ms \n", time);
}
示例8: start
CudaTimer::CudaTimer(cudaStream_t stream)
: start(nullptr)
, end(nullptr)
, stream(stream)
{
CUDA_CHECK(cudaEventCreate(&start));
CUDA_CHECK(cudaEventCreate(&end));
}
示例9: CUDATimer
CUDATimer() {
start_ = 0.0; stop_ = 0.0; elapsed_ = 0.0; is_running_ = false;
cudaEventCreate(&custart_);
cudaEventCreate(&custop_);
cudaEventCreate(&cubase_);
cudaEventRecord(cubase_);
} // CUDATimer()
示例10: contractTT
void contractTT(sTensorGPU *TT1, sTensorGPU *TT2, const int n, const int size)
{
cublasHandle_t handle;
cublasCreate(&handle);
type result=0;
sTensorGPU temp1 = emptyTensor(size*size,2);
sTensorGPU temp2 = emptyTensor(size*size*2,3);
cudaEvent_t start;
cudaEventCreate(&start);
cudaEvent_t stop;
cudaEventCreate(&stop);
//printf("Start contractTT\n");
cudaEventRecord(start, NULL);
int indA = TT1[0].size[0];
int indB = TT2[0].size[0];
sTensorCPU tt1start = copyToCPU(TT1[0]);
sTensorCPU tt2start = copyToCPU(TT2[0]);
sTensorCPU tt1end = copyToCPU(TT1[n - 1]);
sTensorCPU tt2end = copyToCPU( TT2[n - 1]);
for (int i = 0; i < indA; i++){
TT1[0] = prepareTensorStart(tt1start, i);
TT1[n - 1] = prepareTensorEnd(tt1end, i);
for (int j = 0; j < indB; j++){
TT2[0] = prepareTensorStart(tt2start, j);
TT2[n - 1] = prepareTensorEnd(tt2end, j);
contractTensor(handle, TT1[0], TT2[0], temp1);
for (int i = 1; i < n; i++){
contractTensor(handle, temp1, TT1[i], temp2);
contractTensor(handle, temp2, TT2[i], temp1, 2);
}
type add = 0;
cudaMemcpy(&add, temp1.deviceData, sizeof(type), cudaMemcpyDeviceToHost);
//printf("%e ", add);
result += add;
}
}
cudaEventRecord(stop, NULL);
cudaEventSynchronize(stop);
float msecTotal = 0.0f;
cudaEventElapsedTime(&msecTotal, start, stop);
printf("Time: %.3fms\n", msecTotal);
printf("Ops: %.0f\n", bops);
double gigaFlops = (bops * 1.0e-9f) / (msecTotal / 1000.0f);
printf("Perf= %.2f GFlop/s\n", gigaFlops);
cublasDestroy(handle);
cudaDeviceReset();
printf("%.5e \n", result);
exit(0);
}
示例11: CUDA_CHECK
void Timer::Init() {
if (!initted()) {
if (Caffe::mode() == Caffe::GPU) {
CUDA_CHECK(cudaEventCreate(&start_gpu_));
CUDA_CHECK(cudaEventCreate(&stop_gpu_));
}
initted_ = true;
}
}
示例12: startTest
// startTest ------------------------------------------------------------------
// Initializes the cuda timer events and starts the timer.
// @param start - Start time evet
// @param end - End time evet
//-----------------------------------------------------------------------------
void startTest(cudaEvent_t &start, cudaEvent_t &stop, char* msg){
// Create Events
cudaEventCreate( &start );
cudaEventCreate( &stop );
// Start Timer
printf("%s\n", msg);
cudaEventRecord( start, 0 );
}
示例13: sobel1
void sobel1(int *h_result, unsigned int *h_pic, int xsize, int ysize, int thresh)
{
int *d_result;
unsigned int *d_pic;
int resultSize = xsize * ysize * 3 * sizeof(int);
int picSize = xsize * ysize * sizeof(int);
cudaMalloc( (void**)&d_result, resultSize);
if( !d_result) {
exit(-1);
}
cudaMalloc( (void**)&d_pic, picSize);
if( !d_pic) {
exit(-1);
}
cudaMemcpy(d_result, h_result, resultSize, cudaMemcpyHostToDevice);
cudaMemcpy(d_pic, h_pic, picSize, cudaMemcpyHostToDevice);
dim3 threadsPerBlock(BLOCKSIZE, BLOCKSIZE);
dim3 numBlocks(ceil((float)ysize/(float)threadsPerBlock.x), ceil((float)xsize/(float)threadsPerBlock.y));
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
{ __set_CUDAConfig(numBlocks, threadsPerBlock );
d_sobel1 (d_result, d_pic, xsize, ysize, thresh);}
cudaEventSynchronize(stop);
float elapsedTime;
cudaEventElapsedTime(&elapsedTime, start, stop);
cudaEventDestroy(start);
cudaEventDestroy(stop);
cudaMemcpy(h_result, d_result, resultSize, cudaMemcpyDeviceToHost);
cudaMemcpy(h_pic, d_pic, picSize, cudaMemcpyDeviceToHost);
cudaFree(d_result);
cudaFree(d_pic);
}
示例14: startTimer_GPU
void startTimer_GPU( MTime* mtime )
{
mtime->type = GPU_TIME;
if(mtime->gpustart==0 || mtime->gpustop==0)
{
CHECK_ERROR(cudaEventCreate(&mtime->gpustart));
CHECK_ERROR(cudaEventCreate(&mtime->gpustop));
}
CHECK_ERROR( cudaEventRecord(mtime->gpustart) );
}
示例15: m_ceStartEvent
CCudaTimeMeasure::CCudaTimeMeasure(cudaStream_t csStreamID/* = 0*/):
m_ceStartEvent(NULL),
m_ceStopEvent(NULL),
m_csStreamID(csStreamID)
{
cudaCheckError(cudaEventCreate(&m_ceStartEvent));
cudaCheckError(cudaEventCreate(&m_ceStopEvent));
cudaCheckError(cudaEventRecord(m_ceStartEvent, m_csStreamID));
}