本文整理汇总了C++中cuDeviceGet函数的典型用法代码示例。如果您正苦于以下问题:C++ cuDeviceGet函数的具体用法?C++ cuDeviceGet怎么用?C++ cuDeviceGet使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cuDeviceGet函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: sarafft_init
extern "C" void sarafft_init() {
printf( "Cuda is about to be initialized!\n" );
fflush ( stdout );
char *OMPI_COMM_WORLD_LOCAL_RANK = getenv( "OMPI_COMM_WORLD_LOCAL_RANK" );
if ( NULL == OMPI_COMM_WORLD_LOCAL_RANK ) {
printf( "OMPI_COMM_WORLD_LOCAL_RANK not set!\n" );
fflush ( stdout );
exit( 80 );
}
int localRank = atoi( OMPI_COMM_WORLD_LOCAL_RANK );
printf( "Local rank is %d\n", localRank );
fflush ( stdout );
if ( CUDA_SUCCESS != cuInit( 0 ) ) {
printf( "cuInit failed!\n" );
fflush ( stdout );
exit( 81 );
}
CUdevice device;
if ( CUDA_SUCCESS != cuDeviceGet( &device, localRank ) ) {
printf( "cuDeviceGet failed!\n" );
fflush ( stdout );
exit( 82 );
}
if ( CUDA_SUCCESS != cuCtxCreate( &cuda_context, CU_CTX_SCHED_YIELD, device ) ) {
printf( "cuCtxCreate failed!\n" );
fflush ( stdout );
exit( 83 );
}
printf( "Cuda was initialized successfully!\n" );
fflush ( stdout );
}
示例2: cuda_driver_api_init
CUresult cuda_driver_api_init(CUcontext *pctx, CUmodule *pmod, const char *f)
{
CUresult res;
CUdevice dev;
res = cuInit(0);
if (res != CUDA_SUCCESS) {
printf("cuInit failed: res = %lu\n", (unsigned long)res);
return res;
}
res = cuDeviceGet(&dev, 0);
if (res != CUDA_SUCCESS) {
printf("cuDeviceGet failed: res = %lu\n", (unsigned long)res);
return res;
}
res = cuCtxCreate(pctx, 0, dev);
if (res != CUDA_SUCCESS) {
printf("cuCtxCreate failed: res = %lu\n", (unsigned long)res);
return res;
}
res = cuModuleLoad(pmod, f);
if (res != CUDA_SUCCESS) {
printf("cuModuleLoad() failed\n");
cuCtxDestroy(*pctx);
return res;
}
return CUDA_SUCCESS;
}
示例3: main
int main() {
int ngpu;
CUdevice cuDevice;
CUcontext cuContext;
cuInit(0);
cuDeviceGetCount(&ngpu);
//printf("ngpu = %d\n", ngpu);
size_t *totals, *frees ;
totals = (size_t *) calloc (ngpu, sizeof(size_t));
frees = (size_t *) calloc (ngpu, sizeof(size_t));
int tid;
omp_set_num_threads(ngpu);
#pragma omp parallel private(tid, cuDevice, cuContext) shared(frees, totals)
{
tid = omp_get_thread_num();
//printf("nthreads = %d, tid = %d\n", omp_get_num_threads(), tid);
cuDeviceGet(&cuDevice, tid);
cuCtxCreate(&cuContext, tid, cuDevice);
cuMemGetInfo((size_t*)&frees[tid], (size_t*)&totals[tid]);
}
printf ("\ttotal\t\tfree\t\tused\n");
for(int i=0; i<ngpu; i++) {
printf("GPU %d\t%lu\t%lu\t%lu\n", i, (size_t)totals[i], (size_t)frees[i], (size_t)totals[i]-(size_t)frees[i]);
}
return 0;
}
示例4: fprintf
void GPUInterface::GetDeviceDescription(int deviceNumber,
char* deviceDescription) {
#ifdef BEAGLE_DEBUG_FLOW
fprintf(stderr, "\t\t\tEntering GPUInterface::GetDeviceDescription\n");
#endif
CUdevice tmpCudaDevice;
SAFE_CUDA(cuDeviceGet(&tmpCudaDevice, (*resourceMap)[deviceNumber]));
#if CUDA_VERSION >= 3020
size_t totalGlobalMemory = 0;
#else
unsigned int totalGlobalMemory = 0;
#endif
int clockSpeed = 0;
int mpCount = 0;
int major = 0;
int minor = 0;
SAFE_CUDA(cuDeviceComputeCapability(&major, &minor, tmpCudaDevice));
SAFE_CUDA(cuDeviceTotalMem(&totalGlobalMemory, tmpCudaDevice));
SAFE_CUDA(cuDeviceGetAttribute(&clockSpeed, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, tmpCudaDevice));
SAFE_CUDA(cuDeviceGetAttribute(&mpCount, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, tmpCudaDevice));
sprintf(deviceDescription,
"Global memory (MB): %d | Clock speed (Ghz): %1.2f | Number of cores: %d",
int(totalGlobalMemory / 1024.0 / 1024.0 + 0.5),
clockSpeed / 1000000.0,
nGpuArchCoresPerSM[major] * mpCount);
#ifdef BEAGLE_DEBUG_FLOW
fprintf(stderr, "\t\t\tLeaving GPUInterface::GetDeviceDescription\n");
#endif
}
示例5: CUDADevice
CUDADevice(DeviceInfo& info, Stats &stats, bool background_) : Device(stats)
{
background = background_;
cuDevId = info.num;
cuDevice = 0;
cuContext = 0;
/* intialize */
if(cuda_error(cuInit(0)))
return;
/* setup device and context */
if(cuda_error(cuDeviceGet(&cuDevice, cuDevId)))
return;
CUresult result;
if(background) {
result = cuCtxCreate(&cuContext, 0, cuDevice);
}
else {
result = cuGLCtxCreate(&cuContext, 0, cuDevice);
if(result != CUDA_SUCCESS) {
result = cuCtxCreate(&cuContext, 0, cuDevice);
background = true;
}
}
if(cuda_error_(result, "cuCtxCreate"))
return;
cuda_pop_context();
}
示例6: main
main()
{
/* initialize CUDA */
CUresult res;
res = cuInit(0);
MY_CUDA_CHECK(res, "cuInit()");
/* check GPU is setted or not */
int device_num;
res = cuDeviceGetCount(&device_num);
MY_CUDA_CHECK(res, "cuDeviceGetCount()");
if (device_num == 0) { // no GPU is detected
fprintf(stderr, "no CUDA capable GPU is detected...\n");
exit(1);
}
printf("%d GPUs are detected\n", device_num);
for (int i=0; i<device_num; i++)
{
/* get device handle of GPU No.i */
CUdevice dev;
res = cuDeviceGet(&dev, i);
MY_CUDA_CHECK(res, "cuDeviceGet()");
/* search compute capability of GPU No.i */
int major=0, minor=0;
res = cuDeviceComputeCapability(&major, &minor, dev);
MY_CUDA_CHECK(res, "cuDeviceComputeCapability()");
printf("GPU[%d] : actual compute capability is : %d%d\n", i, major, minor);
}
}
示例7: printout_devices
void printout_devices( )
{
int ndevices;
cuDeviceGetCount( &ndevices );
for( int idevice = 0; idevice < ndevices; idevice++ )
{
char name[200];
#if CUDA_VERSION > 3010
size_t totalMem;
#else
unsigned int totalMem;
#endif
int clock;
CUdevice dev;
cuDeviceGet( &dev, idevice );
cuDeviceGetName( name, sizeof(name), dev );
cuDeviceTotalMem( &totalMem, dev );
cuDeviceGetAttribute( &clock,
CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev );
printf( "device %d: %s, %.1f MHz clock, %.1f MB memory\n",
idevice, name, clock/1000.f, totalMem/1024.f/1024.f );
}
}
示例8: main
int main(int argc, char* argv[])
{
cuInit(0);
int devs = 0;
cuDeviceGetCount(&devs);
assert(devs > 0);
CUdevice dev;
CUresult status;
CUcontext ctx = 0;
cuDeviceGet(&dev, 0);
cuCtxCreate(&ctx, 0, dev);
{
size_t f = 0, t = 0;
CUresult r = cuMemGetInfo(&f, &t);
fprintf( stderr, "Do cuMemGetInfo: %d, %zu/%zu\n", r, f, t );
}
__init("\n");
printf("\nPress any key to exit...");
char c;
scanf("%c", &c);
return 0;
}
示例9: init_cuda_context
int
init_cuda_context (void)
{
#ifdef _ENABLE_CUDA_
CUresult curesult = CUDA_SUCCESS;
CUdevice cuDevice;
int local_rank, dev_count;
int dev_id = 0;
char * str;
if ((str = getenv("LOCAL_RANK")) != NULL) {
cudaGetDeviceCount(&dev_count);
local_rank = atoi(str);
dev_id = local_rank % dev_count;
}
curesult = cuInit(0);
if (curesult != CUDA_SUCCESS) {
return 1;
}
curesult = cuDeviceGet(&cuDevice, dev_id);
if (curesult != CUDA_SUCCESS) {
return 1;
}
curesult = cuCtxCreate(&cuContext, 0, cuDevice);
if (curesult != CUDA_SUCCESS) {
return 1;
}
#endif
return 0;
}
示例10: mcopy_gpu_init
int mcopy_gpu_init(struct device_info *device_info)
{
char fname[256];
CUresult res;
/* printf("madd_gpu_init called.\n"); */
/* Initialization */
if ((res = cuInit(0)) != CUDA_SUCCESS) {
printf("cuInit failed: res = %lu\n", (unsigned long)res);
return -1;
}
if ((res = cuDeviceGet(&device_info->dev, 0)) != CUDA_SUCCESS) {
printf("cuDeviceGet failed: res = %lu\n", (unsigned long)res);
return -1;
}
if ((res = cuCtxCreate(&device_info->context, 0, device_info->dev)) !=
CUDA_SUCCESS) {
printf("cuCtxCreate failed: res = %lu\n", (unsigned long)res);
return -1;
}
return 0;
}
示例11: initCuda
bool initCuda(CUcontext & cuContext)
{
// Initialize Cuda
CUresult cerr;
int deviceCount;
cudaGetDeviceCount(&deviceCount);
if (deviceCount == 0)
{
fprintf(stderr, "Sorry, no CUDA device found");
return false;
}
int selectedDevice = 0;
if (selectedDevice >= deviceCount)
{
fprintf(stderr, "Choose device ID between 0 and %d\n", deviceCount-1);
return false;
}
// Initialize the CUDA device
CUdevice cuDevice;
cerr = cuDeviceGet(&cuDevice,selectedDevice);
checkError(cerr);
cerr = cuCtxCreate(&cuContext, CU_CTX_MAP_HOST|CU_CTX_BLOCKING_SYNC, cuDevice);
checkError(cerr);
}
示例12: cuda_over_map
Object cuda_over_map(Object self, int nparts, int *argcv,
Object *argv, int flags) {
CUresult error;
cuInit(0);
int deviceCount = 0;
error = cuDeviceGetCount(&deviceCount);
if (deviceCount == 0) {
raiseError("No CUDA devices found");
}
CUdevice cuDevice;
CUcontext cuContext;
CUmodule cuModule;
CUfunction cuFunc;
error = cuDeviceGet(&cuDevice, 0);
error = cuCtxCreate(&cuContext, 0, cuDevice);
CUdeviceptr d_A;
CUdeviceptr d_B;
CUdeviceptr d_res;
errcheck(cuModuleLoad(&cuModule, grcstring(argv[argcv[0]])));
CUdeviceptr dps[argcv[0]];
void *args[argcv[0]+2];
int size = INT_MAX;
for (int i=0; i<argcv[0]; i++) {
struct CudaFloatArray *a = (struct CudaFloatArray *)argv[i];
if (a->size < size)
size = a->size;
errcheck(cuMemAlloc(&dps[i], size * sizeof(float)));
errcheck(cuMemcpyHtoD(dps[i], &a->data, size * sizeof(float)));
args[i+1] = &dps[i];
}
struct CudaFloatArray *r =
(struct CudaFloatArray *)(alloc_CudaFloatArray(size));
int fsize = sizeof(float) * size;
errcheck(cuMemAlloc(&d_res, fsize));
errcheck(cuMemcpyHtoD(d_res, &r->data, fsize));
args[0] = &d_res;
args[argcv[0]+1] = &size;
int threadsPerBlock = 256;
int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
char name[256];
strcpy(name, "block");
strcat(name, grcstring(argv[argcv[0]]) + strlen("_cuda/"));
for (int i=0; name[i] != 0; i++)
if (name[i] == '.') {
name[i] = 0;
break;
}
errcheck(cuModuleGetFunction(&cuFunc, cuModule, name));
errcheck(cuLaunchKernel(cuFunc, blocksPerGrid, 1, 1,
threadsPerBlock, 1, 1,
0,
NULL, args, NULL));
errcheck(cuMemcpyDtoH(&r->data, d_res, fsize));
cuMemFree(d_res);
for (int i=0; i<argcv[0]; i++)
cuMemFree(dps[i]);
return (Object)r;
}
示例13: cuInit
device::device() {
cuInit(0);
cuDeviceGet(&cu_device, 0);
checkCudaError("device::device Init");
//cuCtxCreate(&cu_context, 0, cu_device);
//checkCudaError("device::device Create context");
device_name = props.name;
}
示例14: get_suitable_block_num
int get_suitable_block_num(int device,
int *max_block_num,
int *mp_num,
int word_size,
int thread_num,
int large_size)
{
#ifdef TODO
cudaDeviceProp dev;
CUdevice cuDevice;
int max_thread_dev;
int max_block, max_block_mem, max_block_dev;
int major, minor, ver;
//int regs, max_block_regs;
ccudaGetDeviceProperties(&dev, device);
cuDeviceGet(&cuDevice, device);
cuDeviceComputeCapability(&major, &minor, cuDevice);
//cudaFuncGetAttributes()
#if 0
if (word_size == 4) {
regs = 14;
} else {
regs = 16;
}
max_block_regs = dev.regsPerBlock / (regs * thread_num);
#endif
max_block_mem = dev.sharedMemPerBlock / (large_size * word_size + 16);
if (major == 9999 && minor == 9999) {
return -1;
}
ver = major * 100 + minor;
if (ver <= 101) {
max_thread_dev = 768;
} else if (ver <= 103) {
max_thread_dev = 1024;
} else if (ver <= 200) {
max_thread_dev = 1536;
} else {
max_thread_dev = 1536;
}
max_block_dev = max_thread_dev / thread_num;
if (max_block_mem < max_block_dev) {
max_block = max_block_mem;
} else {
max_block = max_block_dev;
}
#if 0
if (max_block_regs < max_block) {
max_block = max_block_regs;
}
#endif
*max_block_num = max_block;
*mp_num = dev.multiProcessorCount;
return max_block * dev.multiProcessorCount;
#endif
return 0;
}
示例15: SAFE_CUDA
bool GPUInterface::GetSupportsDoublePrecision(int deviceNumber) {
CUdevice tmpCudaDevice;
SAFE_CUDA(cuDeviceGet(&tmpCudaDevice, (*resourceMap)[deviceNumber]));
int major = 0;
int minor = 0;
SAFE_CUDA(cuDeviceComputeCapability(&major, &minor, tmpCudaDevice));
return (major >= 2 || (major >= 1 && minor >= 3));
}