当前位置: 首页>>代码示例>>C++>>正文


C++ cudaGetDevice函数代码示例

本文整理汇总了C++中cudaGetDevice函数的典型用法代码示例。如果您正苦于以下问题:C++ cudaGetDevice函数的具体用法?C++ cudaGetDevice怎么用?C++ cudaGetDevice使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了cudaGetDevice函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: switch

// 内部使用的  
// 如果当前未初始化直接在GPU分配内存  
// 如果当前在CPU,则在GPU上分配内存并且复制到GPU  
// 如果数据已经在GPU则啥也不做  
inline void SyncedMemory::to_gpu() {  
#ifndef CPU_ONLY  
  switch (head_) {  
  case UNINITIALIZED:  
    // 获取设备  
    CUDA_CHECK(cudaGetDevice(&gpu_device_));  
    // 在设备上分配内存  
    CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));  
    // 初始化为0  
    caffe_gpu_memset(size_, 0, gpu_ptr_);  
    head_ = HEAD_AT_GPU;  
    own_gpu_data_ = true;  
    break;  
  case HEAD_AT_CPU:  
    if (gpu_ptr_ == NULL) {  
      CUDA_CHECK(cudaGetDevice(&gpu_device_));  
      CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));  
      own_gpu_data_ = true;  
    }  
    caffe_gpu_memcpy(size_, cpu_ptr_, gpu_ptr_);  
    head_ = SYNCED;  
    break;  
  case HEAD_AT_GPU:  
  case SYNCED:  
    break;  
  }  
#else  
  NO_GPU;  
#endif  
}  
开发者ID:runaway,项目名称:studycaffe,代码行数:34,代码来源:syncedmem.cpp

示例2: cuda_assert

        ~cuda_pattern_data()
        {
            int current_id;
            cuda_assert( cudaGetDevice(&current_id) );
            if ( current_id != device_id ) cuda_assert( cudaSetDevice( device_id ) );

            if ( ar ) cuda_assert( cudaFree(ar) );
            if ( dim ) cuda_assert( cudaFree(dim) );
            if ( I_diff ) cuda_assert( cudaFree(I_diff) );
            if ( I_exp ) cuda_assert( cudaFree(I_exp) );
            if ( I_exp ) cuda_assert( cudaFree(I_zigmoid) );
            if ( diag ) cuda_assert( cudaFree(diag) );
            if ( ug ) cuda_assert( cudaFree(ug) );
            if ( cache ) cuda_assert( cudaFree(cache) );
            if ( beams ) cuda_assert( cudaFree(beams) );
            if ( kt_factor ) cuda_assert( cudaFree(kt_factor) );
            if ( gvec ) cuda_assert( cudaFree(gvec) );
            if ( tilt ) cuda_assert( cudaFree(tilt) );

            ar = 0;
            dim = 0;
            I_diff = 0;
            I_exp = 0;
            I_zigmoid = 0;
            diag = 0;
            ug = 0;
            cache = 0;
            gvec = 0;
            tilt = 0;
        }
开发者ID:fengwang,项目名称:larbed-refinement,代码行数:30,代码来源:cuda_rotated_pattern_data.hpp

示例3: THCState_getCurrentDeviceProperties

struct cudaDeviceProp* THCState_getCurrentDeviceProperties(THCState* state)
{
  int curDev = -1;
  THCudaCheck(cudaGetDevice(&curDev));

  return &(state->deviceProperties[curDev]);
}
开发者ID:ASAPPinc,项目名称:cutorch,代码行数:7,代码来源:THCGeneral.c

示例4: printf

void Engine::DeviceQuery() {
  cudaDeviceProp prop;
  int device;
  if (cudaSuccess != cudaGetDevice(&device)) {
    printf("No cuda device present.\n");
    return;
  }
  CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
  LOG(INFO) << "Device id:                     " << device;
  LOG(INFO) << "Major revision number:         " << prop.major;
  LOG(INFO) << "Minor revision number:         " << prop.minor;
  LOG(INFO) << "Name:                          " << prop.name;
  LOG(INFO) << "Total global memory:           " << prop.totalGlobalMem;
  LOG(INFO) << "Total shared memory per block: " << prop.sharedMemPerBlock;
  LOG(INFO) << "Total registers per block:     " << prop.regsPerBlock;
  LOG(INFO) << "Warp size:                     " << prop.warpSize;
  LOG(INFO) << "Maximum memory pitch:          " << prop.memPitch;
  LOG(INFO) << "Maximum threads per block:     " << prop.maxThreadsPerBlock;
  LOG(INFO) << "Maximum dimension of block:    "
      << prop.maxThreadsDim[0] << ", " << prop.maxThreadsDim[1] << ", "
      << prop.maxThreadsDim[2];
  LOG(INFO) << "Maximum dimension of grid:     "
      << prop.maxGridSize[0] << ", " << prop.maxGridSize[1] << ", "
      << prop.maxGridSize[2];
  LOG(INFO) << "Clock rate:                    " << prop.clockRate;
  LOG(INFO) << "Total constant memory:         " << prop.totalConstMem;
  LOG(INFO) << "Texture alignment:             " << prop.textureAlignment;
  LOG(INFO) << "Concurrent copy and execution: "
      << (prop.deviceOverlap ? "Yes" : "No");
  LOG(INFO) << "Number of multiprocessors:     " << prop.multiProcessorCount;
  LOG(INFO) << "Kernel execution timeout:      "
      << (prop.kernelExecTimeoutEnabled ? "Yes" : "No");
  return;
}
开发者ID:airxcy,项目名称:ITF_Tracker,代码行数:34,代码来源:common.cpp

示例5: THCudaMemGetInfoCached

cudaError_t THCudaMemGetInfoCached(THCState *state,  size_t* freeBytes, size_t* totalBytes, size_t* largestBlock)
{
  size_t cachedBytes = 0;
  THCDeviceAllocator* allocator = state->cudaDeviceAllocator;

  *largestBlock = 0;
  /* get info from CUDA first */
  cudaError_t ret = cudaMemGetInfo(freeBytes, totalBytes);
  if (ret!= cudaSuccess)
    return ret;

  int device;
  ret = cudaGetDevice(&device);
  if (ret!= cudaSuccess)
    return ret;

  /* not always true - our optimistic guess here */
  *largestBlock = *freeBytes;

  if (allocator->cacheInfo != NULL)
    allocator->cacheInfo(allocator->state, device, &cachedBytes, largestBlock);

  /* Adjust resulting free bytes number. largesBlock unused for now */
  *freeBytes += cachedBytes;
  return cudaSuccess;
}
开发者ID:HustlehardInc,项目名称:pytorch,代码行数:26,代码来源:THCGeneral.cpp

示例6: printf

void Caffe::DeviceQuery() {
  cudaDeviceProp prop;
  int device;
  if (cudaSuccess != cudaGetDevice(&device)) {
    printf("No cuda device present.\n");
    return;
  }
  CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
  printf("Device id:                     %d\n", device);
  printf("Major revision number:         %d\n", prop.major);
  printf("Minor revision number:         %d\n", prop.minor);
  printf("Name:                          %s\n", prop.name);
  printf("Total global memory:           %lu\n", prop.totalGlobalMem);
  printf("Total shared memory per block: %lu\n", prop.sharedMemPerBlock);
  printf("Total registers per block:     %d\n", prop.regsPerBlock);
  printf("Warp size:                     %d\n", prop.warpSize);
  printf("Maximum memory pitch:          %lu\n", prop.memPitch);
  printf("Maximum threads per block:     %d\n", prop.maxThreadsPerBlock);
  printf("Maximum dimension of block:    %d, %d, %d\n",
      prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
  printf("Maximum dimension of grid:     %d, %d, %d\n",
      prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]);
  printf("Clock rate:                    %d\n", prop.clockRate);
  printf("Total constant memory:         %lu\n", prop.totalConstMem);
  printf("Texture alignment:             %lu\n", prop.textureAlignment);
  printf("Concurrent copy and execution: %s\n",
      (prop.deviceOverlap ? "Yes" : "No"));
  printf("Number of multiprocessors:     %d\n", prop.multiProcessorCount);
  printf("Kernel execution timeout:      %s\n",
      (prop.kernelExecTimeoutEnabled ? "Yes" : "No"));
  return;
}
开发者ID:FuchenUSTC,项目名称:caffe-c3d,代码行数:32,代码来源:common.cpp

示例7: cudaGetDeviceCount

void GpuDeviceInformationDialog::setupGpuDeviceTabPages()
{
	
	int numDevs = 0;
	cudaGetDeviceCount(&numDevs);

	this->setWindowTitle(QString("GPU Device Information (") + QString::number(numDevs) + QString(" devices found)"));

	for(int i = 0; i < numDevs; i++)
	{
		cudaDeviceProp devProp;
		cudaGetDeviceProperties(&devProp, i);

		QWidget* deviceTabPage = new GpuDeviceInformationDialogTabPage(devProp, i);
		
		this->tabWidget->addTab(deviceTabPage, devProp.name);

		connect(deviceTabPage, SIGNAL(setMainComputeDevice(int)), this, SLOT(on_setMainComputeDevice(int)));
		connect(this, SIGNAL(hasChangedMainComputeDevice(int)), deviceTabPage, SLOT(on_hasChangedMainComputeDevice(int)));

	}

	int currentComputeDevice;
	cudaGetDevice(&currentComputeDevice);

	emit hasChangedMainComputeDevice(currentComputeDevice);
}
开发者ID:apartridge,项目名称:GpuRayTracer,代码行数:27,代码来源:GpuDeviceInformationDialog.cpp

示例8: __declspec

__declspec(dllexport) int __stdcall GetDevice()
{
    int device = 0;
    cudaGetDevice(&device);

    return device;
}
开发者ID:dtegunov,项目名称:warp,代码行数:7,代码来源:Device.cpp

示例9: rcrackiThreadEntryPoint

// start processing of jobs
void rcrackiThread::rcrackiThreadEntryPoint()
{
#if GPU
	if(gpu != 0 && cudaGetDevice(&cudaDevId) == CUDA_SUCCESS) {
		cudaBuffCount = 0x2000;
		cudaChainSize = 100;

		cudaDeviceProp deviceProp;
		if(cudaGetDeviceProperties(&deviceProp, cudaDevId) == CUDA_SUCCESS) {
			switch(deviceProp.major) {
			case 1: ; break;
			case 2:
				cudaBuffCount = 0x4000;
				cudaChainSize = 200;
				break;
			}
		}
		cudaBuffCount = rcuda::GetChainsBufferSize(cudaBuffCount);
	}
	else
#endif
		cudaDevId = -1;

	if (falseAlarmChecker) {
		if (falseAlarmCheckerO) {
			CheckAlarmO();
		}
		else {
			CheckAlarm();
		}
	}
	else {
		PreCalculate();
	}
}
开发者ID:ChunHungLiu,项目名称:ctf-writeup,代码行数:36,代码来源:rcrackiThread.cpp

示例10: TryToAddSingleFitStream

bool TryToAddSingleFitStream(void * vpsM, WorkerInfoQueue* q){
#ifdef ION_COMPILE_CUDA
  int dev_id = 0;
  cudaStreamManager * psM = (cudaStreamManager *) vpsM;
  SingleFitStream * temp;
  cudaGetDevice( &dev_id );
  int i;
    try{ // exception handling to allow fallback to CPU Fit if not a single strweam could be created
      temp =  new SingleFitStream(q);
      i = psM->addStreamUnit( temp);
      std::cout <<"CUDA: Device " <<  dev_id <<  " Single Fit stream " << i <<" created " << std::endl;
      psM->printMemoryUsage();
    }
    catch(cudaException& e)
    {
      cout << e.what() << endl;
      if(psM->getNumStreams() > 0){ 
        cout << "CUDA: Device " << dev_id<< " could not create more than " << psM->getNumStreams() << " Single Fit streams" << std::endl;       
        psM->printMemoryUsage();
      }else{
        std::cout << "CUDA: Device " << dev_id << " no Single Fit streams could be created >>>>>>>>>>>>>>>>> FALLING BACK TO CPU!"<< std::endl;
        return false;
      }
    }

#endif
  return true;
}
开发者ID:bdiegel,项目名称:TS,代码行数:28,代码来源:cudaWrapper.cpp

示例11: getCurrentDeviceProperties

const cudaDeviceProp& getCurrentDeviceProperties() {
  int device = 0;
  auto err = cudaGetDevice(&device);
  checkCuda(err, std::string("CUDA ERROR: cudaGetDeviceCount "));

  return getDeviceProperties(device);
}
开发者ID:JohnJPS,项目名称:fbcuda,代码行数:7,代码来源:CachedDeviceProperties.cpp

示例12: m_initialized

  GpuSurfDetectorInternal::GpuSurfDetectorInternal(GpuSurfConfiguration config) : 
    m_initialized(false),
    m_config(config)
  {
    int deviceCount;
    int device;
    cudaError_t err;
    cudaGetDeviceCount(&deviceCount);
    ASRL_ASSERT_GT(deviceCount,0,"There are no CUDA capable devices present");
    
	
    err = cudaGetDevice(&device);
    ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to get the CUDA device: " << cudaGetErrorString(err));		
    //std::cout << "Found device " << device << std::endl;
    err = cudaGetDeviceProperties(&m_deviceProp,device);
    ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to get the CUDA device properties: " << cudaGetErrorString(err));		

    // Some more checking...
    ASRL_ASSERT_GE(m_deviceProp.major,1,"Minimum compute capability 1.1 is necessary");
    ASRL_ASSERT_GE(m_deviceProp.minor,1,"Minimum compute capability 1.1 is necessary");

    m_maxmin.init(ASRL_SURF_MAX_CANDIDATES,false);
    m_maxmin.memset(0);

  }
开发者ID:dongmingdmdm,项目名称:camnavi,代码行数:25,代码来源:GpuSurfDetectorInternal.cpp

示例13: cudaGetDevice

int CUDADevicesService::getMaximumTexture2DHeight() {
	int device;
	cudaGetDevice(&device);
	cudaDeviceProp* devProperties = new cudaDeviceProp();
	cudaGetDeviceProperties(devProperties, device);
	return devProperties->maxTexture2D[1];
}
开发者ID:christiantinauer,项目名称:relaxometry,代码行数:7,代码来源:CUDADevicesService.cpp

示例14: cutorch_streamWaitFor

/*
   Usage:
   cutorch.streamWaitFor(waiterStream, {waitForStream1, ..., waitForStreamN})
   for streams on the current device. Creates a one-way barrier where
   waiterStream waits for waitForStream1-N to reach the current point.
*/
static int cutorch_streamWaitFor(lua_State *L)
{
  THCState *state = cutorch_getstate(L);

  int curDev = -1;
  THCudaCheck(cudaGetDevice(&curDev));

  /* Check that the waiting stream is in bounds; this will error out if not */
  int waitingId = (int) luaL_checknumber(L, 1);
  cudaStream_t streamWaiting =
    THCState_getDeviceStream(state, curDev, waitingId);

  /* Validate the streams that we are waiting on */
  int streams = checkAndCountListOfStreams(L, state, 2, curDev);

  if (streams < 1) {
    /* nothing to synchronize */
    return 0;
  }
  /* One-way dependency; streamWaiting will wait for the list of streams to
     wait on to complete execution of pending scheduled kernels/events */
  cudaEvent_t * events = (cudaEvent_t*)malloc(sizeof(cudaEvent_t) * streams);
  createSingleDeviceEvents(L, state, 2, curDev, events);
  /* Then, wait on them */
  for (int i = 0; i < streams; i++) {
    THCudaCheck(cudaStreamWaitEvent(streamWaiting, events[i], 0));
    THCudaCheck(cudaEventDestroy(events[i]));
  }
  free(events);
  return 0;
}
开发者ID:ASAPPinc,项目名称:cutorch,代码行数:37,代码来源:init.c

示例15: gpu_print_properties

void gpu_print_properties(FILE* out){
  int device = -1;
  gpu_safe( cudaGetDevice(&device) );
  
  cudaDeviceProp prop;
  gpu_safe( cudaGetDeviceProperties(&prop, device) ); 
  
  int MiB = 1024 * 1024;
  int kiB = 1024;
  
  fprintf(out, "     Device number: %d\n", device);
  fprintf(out, "       Device name: %s\n", prop.name);
  fprintf(out, "     Global Memory: %d MiB\n", (int)(prop.totalGlobalMem/MiB));
  fprintf(out, "     Shared Memory: %d kiB/block\n", (int)(prop.sharedMemPerBlock/kiB));
  fprintf(out, "   Constant memory: %d kiB\n", (int)(prop.totalConstMem/kiB));
  fprintf(out, "         Registers: %d per block\n", (int)(prop.regsPerBlock/kiB));
  fprintf(out, "         Warp size: %d threads\n", (int)(prop.warpSize));
  //fprintf(out, "  Max memory pitch: %d bytes\n", (int)(prop.memPitch));
  fprintf(out, " Texture alignment: %d bytes\n", (int)(prop.textureAlignment));
  fprintf(out, " Max threads/block: %d\n", prop.maxThreadsPerBlock);
  fprintf(out, "    Max block size: %d x %d x %d threads\n", prop.maxThreadsDim[X], prop.maxThreadsDim[Y], prop.maxThreadsDim[Z]);
  fprintf(out, "     Max grid size: %d x %d x %d blocks\n", prop.maxGridSize[X], prop.maxGridSize[Y], prop.maxGridSize[Z]);
  fprintf(out, "Compute capability: %d.%d\n", prop.major, prop.minor);
  fprintf(out, "        Clock rate: %d MHz\n", prop.clockRate/1000);
  fprintf(out, "   Multiprocessors: %d\n", prop.multiProcessorCount);
  fprintf(out, "   Timeout enabled: %d\n", prop.kernelExecTimeoutEnabled);
  fprintf(out, "      Compute mode: %d\n", prop.computeMode);
  fprintf(out, "    Device overlap: %d\n", prop.deviceOverlap);
  fprintf(out, "Concurrent kernels: %d\n", prop.concurrentKernels);
  fprintf(out, "        Integrated: %d\n", prop.integrated);
  fprintf(out, "  Can map host mem: %d\n", prop.canMapHostMemory);
  
}
开发者ID:LStoleriu,项目名称:hotspin,代码行数:33,代码来源:gpu_properties.cpp


注:本文中的cudaGetDevice函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。