本文整理汇总了C++中CUDA_SAFE_CALL函数的典型用法代码示例。如果您正苦于以下问题:C++ CUDA_SAFE_CALL函数的具体用法?C++ CUDA_SAFE_CALL怎么用?C++ CUDA_SAFE_CALL使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了CUDA_SAFE_CALL函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: CUDA_SAFE_CALL
void CUDAResourceManager::deallocUSG(GPUUsg *usg)
{
CUDA_SAFE_CALL(cudaFree(usg->getElemList()));
CUDA_SAFE_CALL(cudaFree(usg->getTypeList()));
CUDA_SAFE_CALL(cudaFree(usg->getConnList()));
CUDA_SAFE_CALL(cudaFree(usg->getVertices()));
}
示例2: sizeof
void
LiGL2D::setVbo(int spaceVect)
{
GLuint oldVbo = 0;
GLuint newVbo = 0;
if(vbo != 0){
oldVbo = vbo;
vbo = 0;
}
if(iw != 0 && ih !=0){
GLint bsize;
// create buffer object
unsigned int size = ((int)iw/(spaceVect+1))*((int)ih/(spaceVect+1)) * 6 * sizeof(float2);
glGenBuffers( 1, &newVbo);
glBindBuffer( GL_ARRAY_BUFFER, newVbo);
// initialize buffer object
glBufferData( GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
glGetBufferParameterivARB(GL_ARRAY_BUFFER_ARB, GL_BUFFER_SIZE_ARB, &bsize);
glBindBuffer( GL_ARRAY_BUFFER, 0);
// register buffer object with CUDA
CUDA_SAFE_CALL(cudaGLRegisterBufferObject(newVbo));
sVbo = ((int)iw/(spaceVect+1))*((int)ih/(spaceVect+1))*6;
vbo = newVbo;
emit sendVbo(vbo);
}
if(oldVbo != 0){
CUDA_SAFE_CALL(cudaGLUnregisterBufferObject(oldVbo));
glDeleteBuffers(1, &oldVbo);
}
}
示例3: DBG
void CpuSNN::printSimSummary(FILE *fp)
{
DBG(2, fpLog, AT, "printSimSummary()");
float etime;
if(currentMode == GPU_MODE) {
stopGPUTiming();
etime = gpuExecutionTime;
CUDA_SAFE_CALL( cudaMemcpyFromSymbol( &spikeCountD2, "secD2fireCnt", sizeof(int), 0, cudaMemcpyDeviceToHost));
CUDA_SAFE_CALL( cudaMemcpyFromSymbol( &spikeCountD1, "secD1fireCnt", sizeof(int), 0, cudaMemcpyDeviceToHost));
spikeCountAll1sec = spikeCountD1 + spikeCountD2;
CUDA_SAFE_CALL( cudaMemcpyFromSymbol( &spikeCountD2, "spikeCountD2", sizeof(int), 0, cudaMemcpyDeviceToHost));
CUDA_SAFE_CALL( cudaMemcpyFromSymbol( &spikeCountD1, "spikeCountD1", sizeof(int), 0, cudaMemcpyDeviceToHost));
spikeCountAll = spikeCountD1 + spikeCountD2;
}
else {
stopCPUTiming();
etime = cpuExecutionTime;
}
fprintf(fp, "\n*** Network configuration dumped in %s.dot file...\n\
Use graphViz to see the network connectivity...\n\n", networkName.c_str());
fprintf(fp, "*********** %s Simulation Summary **********\n", (currentMode == GPU_MODE)?("GPU"):"CPU");
fprintf(fp, "Network Parameters: \n\tN = %d (numNExcReg:numNInhReg=%2.1f:%2.1f), numPostSynapses = %d, D = %d\n", numN, 100.0*numNExcReg/numN, 100.0*numNInhReg/numN, numPostSynapses, D);
fprintf(fp, "Random Seed: %d\n", randSeed);
fprintf(fp, "Timing: \n\tModel Simulation Time = %lld sec \n\tActual Execution Time = %4.2f sec\n", (unsigned long long)simTimeSec, etime/1000.0);
fprintf(fp, "Average Firing Rate \n\t2+ms delay = %3.3f Hz \n\t1ms delay = %3.3f Hz \n\tOverall = %3.3f Hz\n",
spikeCountD2/(1.0*simTimeSec*numNExcReg), spikeCountD1/(1.0*simTimeSec*numNInhReg), spikeCountAll/(1.0*simTimeSec*numN));
fprintf(fp, "Overall Firing Count: \n\t2+ms delay = %d \n\t1ms delay = %d \n\tTotal = %d\n",
spikeCountD2, spikeCountD1, spikeCountAll );
fprintf(fp, "**************************************\n\n");
fflush(fp);
}
示例4: CUDA_SAFE_CALL
bool MultivalueHashTable::Initialize(const unsigned max_table_entries,
const float space_usage,
const unsigned num_hash_functions)
{
bool success = HashTable::Initialize(max_table_entries, space_usage,
num_hash_functions);
target_space_usage_ = space_usage;
// + 2N 32-bit entries
CUDA_SAFE_CALL(cudaMalloc( (void**)&d_scratch_offsets_,
sizeof(unsigned) * max_table_entries ));
CUDA_SAFE_CALL(cudaMalloc( (void**)&d_scratch_is_unique_,
sizeof(unsigned) * max_table_entries ));
success &= (d_scratch_offsets_ != NULL);
success &= (d_scratch_is_unique_ != NULL);
// Allocate memory for the scan.
// + Unknown memory usage
CUDPPConfiguration config;
config.op = CUDPP_ADD;
config.datatype = CUDPP_UINT;
config.algorithm = CUDPP_SCAN;
config.options = CUDPP_OPTION_FORWARD | CUDPP_OPTION_INCLUSIVE;
CUDPPResult result = cudppPlan(theCudpp, &scanplan_, config,
max_table_entries, 1, 0);
if (CUDPP_SUCCESS != result) {
fprintf(stderr, "Failed to create plan.");
return false;
}
return success;
}
示例5: makeCurrent
void
LiGL2D::setPbo(int image_width, int image_height)
{
makeCurrent();
iw = image_width;
ih = image_height;
GLuint oldPbo = 0;
GLuint newPbo = 0;
GLuint oldTex = 0;
if(pbo != 0){
oldPbo = pbo;
pbo = 0;
oldTex = tex;
}
if(iw != 0 && ih !=0){
glGenBuffers(1, &newPbo);
glBindBuffer(GL_ARRAY_BUFFER, newPbo);
glBufferData(GL_ARRAY_BUFFER, image_height*image_width* 4*sizeof(GLubyte),NULL, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
CUDA_SAFE_CALL(cudaGLRegisterBufferObject(newPbo));
createTexture(&tex, iw, ih);
glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
pbo = newPbo;
emit sendPbo(pbo);
}
if(oldPbo != 0){
CUDA_SAFE_CALL(cudaGLUnregisterBufferObject(oldPbo));
glDeleteBuffers(1, &oldPbo);
}
if(oldTex != 0){
glDeleteTextures(1, &oldTex);
}
}
示例6: memcpyFromDeviceAsync
void
FiringBuffer::sync(cudaStream_t stream)
{
memcpyFromDeviceAsync(mh_buffer.get(), md_buffer.get(),
m_mapper.partitionCount() * m_pitch, stream);
CUDA_SAFE_CALL(cudaEventRecord(m_copyDone, stream));
CUDA_SAFE_CALL(cudaEventSynchronize(m_copyDone));
populateSparse(mh_buffer.get());
}
示例7: runbench_warmup
void runbench_warmup(double *cd, long size){
const long reduced_grid_size = size/(UNROLLED_MEMORY_ACCESSES)/32;
const int BLOCK_SIZE = 256;
const int TOTAL_REDUCED_BLOCKS = reduced_grid_size/BLOCK_SIZE;
dim3 dimBlock(BLOCK_SIZE, 1, 1);
dim3 dimReducedGrid(TOTAL_REDUCED_BLOCKS, 1, 1);
hipLaunchKernel(HIP_KERNEL_NAME(benchmark_func< short, BLOCK_SIZE, 0 >), dim3(dimReducedGrid), dim3(dimBlock ), 0, 0, (short)1, (short*)cd);
CUDA_SAFE_CALL( hipGetLastError() );
CUDA_SAFE_CALL( hipDeviceSynchronize() );
}
示例8: CUDA_SAFE_CALL
//---------------------------------------------
//GPU memory operations
//---------------------------------------------
char *D_MALLOC(size_t size)
{
char *buf = NULL;
CUDA_SAFE_CALL(cudaMalloc((void**)&buf, size));
CUDA_SAFE_CALL(cudaMemset(buf, 0, size));
#ifdef __DEBUG__
# ifdef __ALLOC__
BenLog("+d%d bytes\n", size);
# endif //__ALLOC__
d_dmemUsage += size;
#endif
return buf;
}
示例9: memcpy
void ParticleListCPUSorted::copy_from(const ParticleList* list_in)
{
ispecies = list_in -> ispecies;
// Free realkind arrays
if(list_in -> device_type == 0){
for(int i=0;i<ParticleList_nfloats;i++)
{
memcpy(*get_float(i),*(list_in->get_float(i)),nptcls*sizeof(realkind));
}
// Allocate int arrays
for(int i=0;i<ParticleList_nints;i++)
{
memcpy(*get_int(i),*(list_in->get_int(i)),nptcls*sizeof(int));
}
// allocate short ints for cluster id's
memcpy(cluster_id,list_in->cluster_id,nptcls*sizeof(int));
// memcpy(num_subcycles,list_in->num_subcycles,nptcls*sizeof(int));
//
// memcpy(num_piccard,list_in->num_piccard,nptcls*sizeof(double));
// memcpy(num_piccard2,list_in->num_piccard2,nptcls*sizeof(double));
}
else if(list_in->device_type == 1)
{
#ifndef NO_CUDA
enum cudaMemcpyKind kind = cudaMemcpyDeviceToHost;
// Free realkind arrays
for(int i=0;i<ParticleList_nfloats;i++)
{
CUDA_SAFE_CALL(cudaMemcpyAsync(*get_float(i),*(list_in->get_float(i)),nptcls*sizeof(realkind),kind));
}
// Allocate int arrays
for(int i=0;i<ParticleList_nints;i++)
{
CUDA_SAFE_CALL(cudaMemcpyAsync(*get_int(i),*(list_in->get_int(i)),nptcls*sizeof(int),kind));
}
// allocate short ints for cluster id's
CUDA_SAFE_CALL(cudaMemcpyAsync(cluster_id,(list_in->cluster_id),nptcls*sizeof(int),kind));
CUDA_SAFE_CALL(cudaDeviceSynchronize());
#endif
}
}
示例10: stream
CudaGridMap::CudaGridMap(const Vec3i &numGridPoints, const Vec3i &numGridPointsPadded, const double *inputEnergies, cudaStream_t stream)
: stream(stream), numGridPoints(numGridPoints), numGridPointsPadded(numGridPointsPadded)
{
// Allocate the padded grid in global memory
CUDA_SAFE_CALL(cudaMalloc((void**)&energiesDevice, sizeof(float) * numGridPointsPadded.Cube()));
// Convert doubles to floats and save them in page-locked memory
int numGridPointsPerMap = numGridPoints.Cube();
CUDA_SAFE_CALL(cudaMallocHost((void**)&energiesHost, sizeof(float) * numGridPointsPerMap));
std::transform(inputEnergies, inputEnergies + numGridPointsPerMap, energiesHost, typecast<float, double>);
// Copy the initial energies from the original grid to the padded one in global memory
// Elements in the area of padding will stay uninitialized
copyGridMapPadded(energiesDevice, numGridPointsPadded, energiesHost, numGridPoints, cudaMemcpyHostToDevice);
}
示例11: CUDA_SAFE_CALL
void CudaUVMSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
{
try {
Kokkos::Impl::num_uvm_allocations -= 1;
CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) );
} catch(...) {}
}
示例12: attach_texture_object
::cudaTextureObject_t
SharedAllocationRecord< Kokkos::CudaSpace , void >::
attach_texture_object( const unsigned sizeof_alias
, void * const alloc_ptr
, size_t const alloc_size )
{
// Only valid for 300 <= __CUDA_ARCH__
// otherwise return zero.
::cudaTextureObject_t tex_obj ;
struct cudaResourceDesc resDesc ;
struct cudaTextureDesc texDesc ;
memset( & resDesc , 0 , sizeof(resDesc) );
memset( & texDesc , 0 , sizeof(texDesc) );
resDesc.resType = cudaResourceTypeLinear ;
resDesc.res.linear.desc = ( sizeof_alias == 4 ? cudaCreateChannelDesc< int >() :
( sizeof_alias == 8 ? cudaCreateChannelDesc< ::int2 >() :
/* sizeof_alias == 16 */ cudaCreateChannelDesc< ::int4 >() ) );
resDesc.res.linear.sizeInBytes = alloc_size ;
resDesc.res.linear.devPtr = alloc_ptr ;
CUDA_SAFE_CALL( cudaCreateTextureObject( & tex_obj , & resDesc, & texDesc, NULL ) );
return tex_obj ;
}
示例13: main
int main( int, char ** )
{
do_main();
CUDA_SAFE_CALL( cudaDeviceReset() );
return 0;
}
示例14: Vec3i
void CudaGridMap::copyGridMapPadded(float *dst, const Vec3i &numGridPointsDst,
const float *src, const Vec3i &numGridPointsSrc,
cudaMemcpyKind kind)
{
Vec3i numGridPointsMin = Vec3i(Mathi::Min(numGridPointsDst.x, numGridPointsSrc.x),
Mathi::Min(numGridPointsDst.y, numGridPointsSrc.y),
Mathi::Min(numGridPointsDst.z, numGridPointsSrc.z));
int numGridPointsDstXMulY = numGridPointsDst.x * numGridPointsDst.y;
int numGridPointsSrcXMulY = numGridPointsSrc.x * numGridPointsSrc.y;
for (int z = 0; z < numGridPointsMin.z; z++)
{
// Set the base of output indices from z
int outputIndexZBaseDst = z * numGridPointsDstXMulY;
int outputIndexZBaseSrc = z * numGridPointsSrcXMulY;
for (int y = 0; y < numGridPointsMin.y; y++)
{
// Set the base of output indices from (z,y)
int outputIndexZYBaseDst = outputIndexZBaseDst + y * numGridPointsDst.x;
int outputIndexZYBaseSrc = outputIndexZBaseSrc + y * numGridPointsSrc.x;
// Copy one row in axis X
CUDA_SAFE_CALL(cudaMemcpyAsync(dst + outputIndexZYBaseDst, src + outputIndexZYBaseSrc, sizeof(float) * numGridPointsMin.x, kind, stream));
}
}
}
示例15: FieldDataCPU
void NodeFieldData::allocate(PlasmaData* _pdata)
{
pdata = _pdata;
nx = pdata->nx;
ny = pdata->ny;
nz = pdata->nz;
cpu_fields = new FieldDataCPU();
cpu_fields -> allocate(pdata);
if(pdata->node_info->nGPU > 0)
{
gpu_fields = (FieldDataGPU*)malloc(pdata->node_info->nGPU * sizeof(FieldDataGPU));
#pragma omp parallel for
for(int i=0;i<pdata->node_info->nGPU;i++)
{
CUDA_SAFE_CALL(cudaSetDevice(pdata->thread_info[pdata->node_info->nspecies+i]->gpu_info->igpu));
gpu_fields[i] = *(new FieldDataGPU());
gpu_fields[i].allocate(pdata);
}
}
if(pdata->node_info->nMIC > 0)
{
mic_fields = new FieldDataMIC();
mic_fields -> allocate(pdata);
}
bcast_timer = new CPUTimer();
}