当前位置: 首页>>代码示例>>C++>>正文


C++ CUDA_SAFE_CALL函数代码示例

本文整理汇总了C++中CUDA_SAFE_CALL函数的典型用法代码示例。如果您正苦于以下问题:C++ CUDA_SAFE_CALL函数的具体用法?C++ CUDA_SAFE_CALL怎么用?C++ CUDA_SAFE_CALL使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了CUDA_SAFE_CALL函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: CUDA_SAFE_CALL

void CUDAResourceManager::deallocUSG(GPUUsg *usg)
{
    CUDA_SAFE_CALL(cudaFree(usg->getElemList()));
    CUDA_SAFE_CALL(cudaFree(usg->getTypeList()));
    CUDA_SAFE_CALL(cudaFree(usg->getConnList()));
    CUDA_SAFE_CALL(cudaFree(usg->getVertices()));
}
开发者ID:nixz,项目名称:covise,代码行数:7,代码来源:CUDAResourceManager.cpp

示例2: sizeof

void
LiGL2D::setVbo(int spaceVect)
{
	GLuint oldVbo = 0;
	GLuint newVbo = 0;
	if(vbo != 0){
		oldVbo = vbo;
		vbo = 0;
	}
	if(iw != 0 && ih !=0){
		GLint bsize;
		// create buffer object
		unsigned int size = ((int)iw/(spaceVect+1))*((int)ih/(spaceVect+1)) * 6 *  sizeof(float2);
		glGenBuffers( 1, &newVbo);
		glBindBuffer( GL_ARRAY_BUFFER, newVbo);
		// initialize buffer object
		glBufferData( GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
		glGetBufferParameterivARB(GL_ARRAY_BUFFER_ARB, GL_BUFFER_SIZE_ARB, &bsize); 
		glBindBuffer( GL_ARRAY_BUFFER, 0);
		// register buffer object with CUDA
		CUDA_SAFE_CALL(cudaGLRegisterBufferObject(newVbo));
		sVbo = ((int)iw/(spaceVect+1))*((int)ih/(spaceVect+1))*6;
		vbo = newVbo;
		emit sendVbo(vbo);
	}
	if(oldVbo != 0){
		CUDA_SAFE_CALL(cudaGLUnregisterBufferObject(oldVbo));
		glDeleteBuffers(1, &oldVbo);
	}
}
开发者ID:dongmingdmdm,项目名称:camnavi,代码行数:30,代码来源:LiGL2Dwidget.cpp

示例3: DBG

	void CpuSNN::printSimSummary(FILE *fp)
	{
		DBG(2, fpLog, AT, "printSimSummary()");
		float etime;
		if(currentMode == GPU_MODE)	 {
			stopGPUTiming();
			etime = gpuExecutionTime;
			CUDA_SAFE_CALL( cudaMemcpyFromSymbol( &spikeCountD2, "secD2fireCnt", sizeof(int), 0, cudaMemcpyDeviceToHost));
			CUDA_SAFE_CALL( cudaMemcpyFromSymbol( &spikeCountD1, "secD1fireCnt", sizeof(int), 0, cudaMemcpyDeviceToHost));
			spikeCountAll1sec = spikeCountD1 + spikeCountD2;
			CUDA_SAFE_CALL( cudaMemcpyFromSymbol( &spikeCountD2, "spikeCountD2", sizeof(int), 0, cudaMemcpyDeviceToHost));
			CUDA_SAFE_CALL( cudaMemcpyFromSymbol( &spikeCountD1, "spikeCountD1", sizeof(int), 0, cudaMemcpyDeviceToHost));
			spikeCountAll      = spikeCountD1 + spikeCountD2;
		}
		else {
			stopCPUTiming();
			etime = cpuExecutionTime;
		}

		fprintf(fp, "\n*** Network configuration dumped in %s.dot file...\n\
				Use graphViz to see the network connectivity...\n\n", networkName.c_str());
		fprintf(fp, "*********** %s Simulation Summary **********\n", (currentMode == GPU_MODE)?("GPU"):"CPU");
		fprintf(fp, "Network Parameters: \n\tN = %d (numNExcReg:numNInhReg=%2.1f:%2.1f), numPostSynapses = %d, D = %d\n", numN, 100.0*numNExcReg/numN, 100.0*numNInhReg/numN, numPostSynapses, D);
		fprintf(fp, "Random Seed: %d\n", randSeed);
		fprintf(fp, "Timing: \n\tModel Simulation Time = %lld sec \n\tActual Execution Time = %4.2f sec\n",  (unsigned long long)simTimeSec, etime/1000.0);
		fprintf(fp, "Average Firing Rate \n\t2+ms delay = %3.3f Hz \n\t1ms delay = %3.3f Hz \n\tOverall = %3.3f Hz\n",
			spikeCountD2/(1.0*simTimeSec*numNExcReg), spikeCountD1/(1.0*simTimeSec*numNInhReg), spikeCountAll/(1.0*simTimeSec*numN));
		fprintf(fp, "Overall Firing Count: \n\t2+ms delay = %d \n\t1ms delay = %d \n\tTotal = %d\n",
			spikeCountD2, spikeCountD1, spikeCountAll );
		fprintf(fp, "**************************************\n\n");

		fflush(fp);
	}
开发者ID:drghirlanda,项目名称:mint,代码行数:33,代码来源:printSNNInfo.cpp

示例4: CUDA_SAFE_CALL

bool MultivalueHashTable::Initialize(const unsigned   max_table_entries,
                                     const float      space_usage,
                                     const unsigned   num_hash_functions)
{                                    
    bool success = HashTable::Initialize(max_table_entries, space_usage,
                                             num_hash_functions);
    target_space_usage_ = space_usage;

    // + 2N 32-bit entries
    CUDA_SAFE_CALL(cudaMalloc( (void**)&d_scratch_offsets_, 
                               sizeof(unsigned) * max_table_entries ));
    CUDA_SAFE_CALL(cudaMalloc( (void**)&d_scratch_is_unique_,
                               sizeof(unsigned) * max_table_entries ));

    success &= (d_scratch_offsets_ != NULL);
    success &= (d_scratch_is_unique_ != NULL);

    // Allocate memory for the scan.
    // + Unknown memory usage
    CUDPPConfiguration config;
    config.op            = CUDPP_ADD;
    config.datatype      = CUDPP_UINT;
    config.algorithm     = CUDPP_SCAN;
    config.options       = CUDPP_OPTION_FORWARD | CUDPP_OPTION_INCLUSIVE;
    CUDPPResult result   = cudppPlan(theCudpp, &scanplan_, config, 
                                     max_table_entries, 1, 0);
    if (CUDPP_SUCCESS != result) {
        fprintf(stderr, "Failed to create plan.");
        return false;
    }
    return success;
}
开发者ID:DeepCV,项目名称:cudpp,代码行数:32,代码来源:hash_multivalue.cpp

示例5: makeCurrent

void
LiGL2D::setPbo(int image_width, int image_height)
{
	makeCurrent();
	iw = image_width;
	ih = image_height;
	GLuint oldPbo = 0;
	GLuint newPbo = 0;
	GLuint oldTex = 0;

	if(pbo != 0){
		oldPbo = pbo;
		pbo = 0;
		oldTex = tex;
	}
	if(iw != 0 && ih !=0){
		glGenBuffers(1, &newPbo);
		glBindBuffer(GL_ARRAY_BUFFER, newPbo);
		glBufferData(GL_ARRAY_BUFFER, image_height*image_width* 4*sizeof(GLubyte),NULL, GL_DYNAMIC_DRAW);
		glBindBuffer(GL_ARRAY_BUFFER, 0);
		CUDA_SAFE_CALL(cudaGLRegisterBufferObject(newPbo));
		createTexture(&tex, iw, ih);
		glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);	
		pbo = newPbo;
		emit sendPbo(pbo);
	}
	if(oldPbo != 0){
		CUDA_SAFE_CALL(cudaGLUnregisterBufferObject(oldPbo));
		glDeleteBuffers(1, &oldPbo);
	}
	if(oldTex != 0){
		glDeleteTextures(1, &oldTex);
	}
}
开发者ID:dongmingdmdm,项目名称:camnavi,代码行数:34,代码来源:LiGL2Dwidget.cpp

示例6: memcpyFromDeviceAsync

void
FiringBuffer::sync(cudaStream_t stream)
{
	memcpyFromDeviceAsync(mh_buffer.get(), md_buffer.get(),
			m_mapper.partitionCount() * m_pitch, stream);
	CUDA_SAFE_CALL(cudaEventRecord(m_copyDone, stream));
	CUDA_SAFE_CALL(cudaEventSynchronize(m_copyDone));
	populateSparse(mh_buffer.get());
}
开发者ID:MogeiWang,项目名称:nemosim,代码行数:9,代码来源:FiringBuffer.cpp

示例7: runbench_warmup

void runbench_warmup(double *cd, long size){
	const long reduced_grid_size = size/(UNROLLED_MEMORY_ACCESSES)/32;
	const int BLOCK_SIZE = 256;
	const int TOTAL_REDUCED_BLOCKS = reduced_grid_size/BLOCK_SIZE;

	dim3 dimBlock(BLOCK_SIZE, 1, 1);
	dim3 dimReducedGrid(TOTAL_REDUCED_BLOCKS, 1, 1);

	hipLaunchKernel(HIP_KERNEL_NAME(benchmark_func< short, BLOCK_SIZE, 0 >), dim3(dimReducedGrid), dim3(dimBlock ), 0, 0, (short)1, (short*)cd);
	CUDA_SAFE_CALL( hipGetLastError() );
	CUDA_SAFE_CALL( hipDeviceSynchronize() );
}
开发者ID:sunway513,项目名称:mixbench,代码行数:12,代码来源:mix_kernels_hip.cpp

示例8: CUDA_SAFE_CALL

//---------------------------------------------
//GPU memory operations
//---------------------------------------------
char *D_MALLOC(size_t size)
{	
	char *buf = NULL;
	CUDA_SAFE_CALL(cudaMalloc((void**)&buf, size));
	CUDA_SAFE_CALL(cudaMemset(buf, 0, size));
#ifdef __DEBUG__
#	ifdef __ALLOC__
	BenLog("+d%d bytes\n", size);
#	endif //__ALLOC__
	d_dmemUsage += size;
#endif
	return buf;
}
开发者ID:xiaobaidemu,项目名称:GPU_Apriori,代码行数:16,代码来源:BenCUDAMem.cpp

示例9: memcpy

void ParticleListCPUSorted::copy_from(const ParticleList* list_in)
{
	ispecies = list_in -> ispecies;
	// Free realkind arrays

	if(list_in -> device_type == 0){
		for(int i=0;i<ParticleList_nfloats;i++)
		{
			memcpy(*get_float(i),*(list_in->get_float(i)),nptcls*sizeof(realkind));
		}

		// Allocate int arrays
		for(int i=0;i<ParticleList_nints;i++)
		{
			memcpy(*get_int(i),*(list_in->get_int(i)),nptcls*sizeof(int));
		}

		// allocate short ints for cluster id's
		memcpy(cluster_id,list_in->cluster_id,nptcls*sizeof(int));

//		memcpy(num_subcycles,list_in->num_subcycles,nptcls*sizeof(int));
//
//		memcpy(num_piccard,list_in->num_piccard,nptcls*sizeof(double));
//		memcpy(num_piccard2,list_in->num_piccard2,nptcls*sizeof(double));
	}
	else if(list_in->device_type == 1)
	{

#ifndef NO_CUDA
		enum cudaMemcpyKind kind = cudaMemcpyDeviceToHost;
		// Free realkind arrays
		for(int i=0;i<ParticleList_nfloats;i++)
		{
			CUDA_SAFE_CALL(cudaMemcpyAsync(*get_float(i),*(list_in->get_float(i)),nptcls*sizeof(realkind),kind));
		}

		// Allocate int arrays
		for(int i=0;i<ParticleList_nints;i++)
		{
			CUDA_SAFE_CALL(cudaMemcpyAsync(*get_int(i),*(list_in->get_int(i)),nptcls*sizeof(int),kind));
		}

		// allocate short ints for cluster id's
		CUDA_SAFE_CALL(cudaMemcpyAsync(cluster_id,(list_in->cluster_id),nptcls*sizeof(int),kind));

		CUDA_SAFE_CALL(cudaDeviceSynchronize());
#endif
	}
}
开发者ID:cocomans,项目名称:plasma,代码行数:49,代码来源:ParticleListCPUSorted.cpp

示例10: stream

CudaGridMap::CudaGridMap(const Vec3i &numGridPoints, const Vec3i &numGridPointsPadded, const double *inputEnergies, cudaStream_t stream)
    : stream(stream), numGridPoints(numGridPoints), numGridPointsPadded(numGridPointsPadded)
{
    // Allocate the padded grid in global memory
    CUDA_SAFE_CALL(cudaMalloc((void**)&energiesDevice, sizeof(float) * numGridPointsPadded.Cube()));

    // Convert doubles to floats and save them in page-locked memory
    int numGridPointsPerMap = numGridPoints.Cube();
    CUDA_SAFE_CALL(cudaMallocHost((void**)&energiesHost, sizeof(float) * numGridPointsPerMap));
    std::transform(inputEnergies, inputEnergies + numGridPointsPerMap, energiesHost, typecast<float, double>);

    // Copy the initial energies from the original grid to the padded one in global memory
    // Elements in the area of padding will stay uninitialized
    copyGridMapPadded(energiesDevice, numGridPointsPadded, energiesHost, numGridPoints, cudaMemcpyHostToDevice);
}
开发者ID:marekolsak,项目名称:fastgrid,代码行数:15,代码来源:CudaGridMap.cpp

示例11: CUDA_SAFE_CALL

void CudaUVMSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
{
  try {
    Kokkos::Impl::num_uvm_allocations -= 1;
    CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) );
  } catch(...) {}
}
开发者ID:gmackey,项目名称:kokkos,代码行数:7,代码来源:Kokkos_CudaSpace.cpp

示例12: attach_texture_object

::cudaTextureObject_t
SharedAllocationRecord< Kokkos::CudaSpace , void >::
attach_texture_object( const unsigned sizeof_alias
                     , void *   const alloc_ptr
                     , size_t   const alloc_size )
{
  // Only valid for 300 <= __CUDA_ARCH__
  // otherwise return zero.

  ::cudaTextureObject_t tex_obj ;

  struct cudaResourceDesc resDesc ;
  struct cudaTextureDesc  texDesc ;

  memset( & resDesc , 0 , sizeof(resDesc) );
  memset( & texDesc , 0 , sizeof(texDesc) );

  resDesc.resType                = cudaResourceTypeLinear ;
  resDesc.res.linear.desc        = ( sizeof_alias ==  4 ?  cudaCreateChannelDesc< int >() :
                                   ( sizeof_alias ==  8 ?  cudaCreateChannelDesc< ::int2 >() :
                                  /* sizeof_alias == 16 */ cudaCreateChannelDesc< ::int4 >() ) );
  resDesc.res.linear.sizeInBytes = alloc_size ;
  resDesc.res.linear.devPtr      = alloc_ptr ;

  CUDA_SAFE_CALL( cudaCreateTextureObject( & tex_obj , & resDesc, & texDesc, NULL ) );

  return tex_obj ;
}
开发者ID:gurkih,项目名称:lammps,代码行数:28,代码来源:Kokkos_CudaSpace.cpp

示例13: main

int main( int, char ** )
{
  do_main();

  CUDA_SAFE_CALL( cudaDeviceReset() );
  return 0;
}
开发者ID:ABHISKUMAR,项目名称:nsight-gtc2013,代码行数:7,代码来源:main.cpp

示例14: Vec3i

void CudaGridMap::copyGridMapPadded(float *dst,       const Vec3i &numGridPointsDst,
                                    const float *src, const Vec3i &numGridPointsSrc,
                                    cudaMemcpyKind kind)
{
    Vec3i numGridPointsMin = Vec3i(Mathi::Min(numGridPointsDst.x, numGridPointsSrc.x),
                                   Mathi::Min(numGridPointsDst.y, numGridPointsSrc.y),
                                   Mathi::Min(numGridPointsDst.z, numGridPointsSrc.z));
    int numGridPointsDstXMulY = numGridPointsDst.x * numGridPointsDst.y;
    int numGridPointsSrcXMulY = numGridPointsSrc.x * numGridPointsSrc.y;

    for (int z = 0; z < numGridPointsMin.z; z++)
    {
        // Set the base of output indices from z
        int outputIndexZBaseDst = z * numGridPointsDstXMulY;
        int outputIndexZBaseSrc = z * numGridPointsSrcXMulY;

        for (int y = 0; y < numGridPointsMin.y; y++)
        {
            // Set the base of output indices from (z,y)
            int outputIndexZYBaseDst = outputIndexZBaseDst + y * numGridPointsDst.x;
            int outputIndexZYBaseSrc = outputIndexZBaseSrc + y * numGridPointsSrc.x;

            // Copy one row in axis X
            CUDA_SAFE_CALL(cudaMemcpyAsync(dst + outputIndexZYBaseDst, src + outputIndexZYBaseSrc, sizeof(float) * numGridPointsMin.x, kind, stream));
        }
    }
}
开发者ID:marekolsak,项目名称:fastgrid,代码行数:27,代码来源:CudaGridMap.cpp

示例15: FieldDataCPU

void NodeFieldData::allocate(PlasmaData* _pdata)
{
	pdata = _pdata;

	nx = pdata->nx;
	ny = pdata->ny;
	nz = pdata->nz;


	cpu_fields = new FieldDataCPU();
	cpu_fields -> allocate(pdata);


	if(pdata->node_info->nGPU > 0)
	{
		gpu_fields = (FieldDataGPU*)malloc(pdata->node_info->nGPU * sizeof(FieldDataGPU));
#pragma omp parallel for
		for(int i=0;i<pdata->node_info->nGPU;i++)
		{
			CUDA_SAFE_CALL(cudaSetDevice(pdata->thread_info[pdata->node_info->nspecies+i]->gpu_info->igpu));
			gpu_fields[i] = *(new FieldDataGPU());
			gpu_fields[i].allocate(pdata);
		}
	}

	if(pdata->node_info->nMIC > 0)
	{
		mic_fields = new FieldDataMIC();
		mic_fields -> allocate(pdata);
	}
	bcast_timer = new CPUTimer();
}
开发者ID:cocomans,项目名称:plasma,代码行数:32,代码来源:NodeFieldData.cpp


注:本文中的CUDA_SAFE_CALL函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。