本文整理汇总了C++中cudaGetDevice函数的典型用法代码示例。如果您正苦于以下问题:C++ cudaGetDevice函数的具体用法?C++ cudaGetDevice怎么用?C++ cudaGetDevice使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cudaGetDevice函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: switch
// 内部使用的
// 如果当前未初始化直接在GPU分配内存
// 如果当前在CPU,则在GPU上分配内存并且复制到GPU
// 如果数据已经在GPU则啥也不做
inline void SyncedMemory::to_gpu() {
#ifndef CPU_ONLY
switch (head_) {
case UNINITIALIZED:
// 获取设备
CUDA_CHECK(cudaGetDevice(&gpu_device_));
// 在设备上分配内存
CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
// 初始化为0
caffe_gpu_memset(size_, 0, gpu_ptr_);
head_ = HEAD_AT_GPU;
own_gpu_data_ = true;
break;
case HEAD_AT_CPU:
if (gpu_ptr_ == NULL) {
CUDA_CHECK(cudaGetDevice(&gpu_device_));
CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
own_gpu_data_ = true;
}
caffe_gpu_memcpy(size_, cpu_ptr_, gpu_ptr_);
head_ = SYNCED;
break;
case HEAD_AT_GPU:
case SYNCED:
break;
}
#else
NO_GPU;
#endif
}
示例2: cuda_assert
~cuda_pattern_data()
{
int current_id;
cuda_assert( cudaGetDevice(¤t_id) );
if ( current_id != device_id ) cuda_assert( cudaSetDevice( device_id ) );
if ( ar ) cuda_assert( cudaFree(ar) );
if ( dim ) cuda_assert( cudaFree(dim) );
if ( I_diff ) cuda_assert( cudaFree(I_diff) );
if ( I_exp ) cuda_assert( cudaFree(I_exp) );
if ( I_exp ) cuda_assert( cudaFree(I_zigmoid) );
if ( diag ) cuda_assert( cudaFree(diag) );
if ( ug ) cuda_assert( cudaFree(ug) );
if ( cache ) cuda_assert( cudaFree(cache) );
if ( beams ) cuda_assert( cudaFree(beams) );
if ( kt_factor ) cuda_assert( cudaFree(kt_factor) );
if ( gvec ) cuda_assert( cudaFree(gvec) );
if ( tilt ) cuda_assert( cudaFree(tilt) );
ar = 0;
dim = 0;
I_diff = 0;
I_exp = 0;
I_zigmoid = 0;
diag = 0;
ug = 0;
cache = 0;
gvec = 0;
tilt = 0;
}
示例3: THCState_getCurrentDeviceProperties
struct cudaDeviceProp* THCState_getCurrentDeviceProperties(THCState* state)
{
int curDev = -1;
THCudaCheck(cudaGetDevice(&curDev));
return &(state->deviceProperties[curDev]);
}
示例4: printf
void Engine::DeviceQuery() {
cudaDeviceProp prop;
int device;
if (cudaSuccess != cudaGetDevice(&device)) {
printf("No cuda device present.\n");
return;
}
CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
LOG(INFO) << "Device id: " << device;
LOG(INFO) << "Major revision number: " << prop.major;
LOG(INFO) << "Minor revision number: " << prop.minor;
LOG(INFO) << "Name: " << prop.name;
LOG(INFO) << "Total global memory: " << prop.totalGlobalMem;
LOG(INFO) << "Total shared memory per block: " << prop.sharedMemPerBlock;
LOG(INFO) << "Total registers per block: " << prop.regsPerBlock;
LOG(INFO) << "Warp size: " << prop.warpSize;
LOG(INFO) << "Maximum memory pitch: " << prop.memPitch;
LOG(INFO) << "Maximum threads per block: " << prop.maxThreadsPerBlock;
LOG(INFO) << "Maximum dimension of block: "
<< prop.maxThreadsDim[0] << ", " << prop.maxThreadsDim[1] << ", "
<< prop.maxThreadsDim[2];
LOG(INFO) << "Maximum dimension of grid: "
<< prop.maxGridSize[0] << ", " << prop.maxGridSize[1] << ", "
<< prop.maxGridSize[2];
LOG(INFO) << "Clock rate: " << prop.clockRate;
LOG(INFO) << "Total constant memory: " << prop.totalConstMem;
LOG(INFO) << "Texture alignment: " << prop.textureAlignment;
LOG(INFO) << "Concurrent copy and execution: "
<< (prop.deviceOverlap ? "Yes" : "No");
LOG(INFO) << "Number of multiprocessors: " << prop.multiProcessorCount;
LOG(INFO) << "Kernel execution timeout: "
<< (prop.kernelExecTimeoutEnabled ? "Yes" : "No");
return;
}
示例5: THCudaMemGetInfoCached
cudaError_t THCudaMemGetInfoCached(THCState *state, size_t* freeBytes, size_t* totalBytes, size_t* largestBlock)
{
size_t cachedBytes = 0;
THCDeviceAllocator* allocator = state->cudaDeviceAllocator;
*largestBlock = 0;
/* get info from CUDA first */
cudaError_t ret = cudaMemGetInfo(freeBytes, totalBytes);
if (ret!= cudaSuccess)
return ret;
int device;
ret = cudaGetDevice(&device);
if (ret!= cudaSuccess)
return ret;
/* not always true - our optimistic guess here */
*largestBlock = *freeBytes;
if (allocator->cacheInfo != NULL)
allocator->cacheInfo(allocator->state, device, &cachedBytes, largestBlock);
/* Adjust resulting free bytes number. largesBlock unused for now */
*freeBytes += cachedBytes;
return cudaSuccess;
}
示例6: printf
void Caffe::DeviceQuery() {
cudaDeviceProp prop;
int device;
if (cudaSuccess != cudaGetDevice(&device)) {
printf("No cuda device present.\n");
return;
}
CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
printf("Device id: %d\n", device);
printf("Major revision number: %d\n", prop.major);
printf("Minor revision number: %d\n", prop.minor);
printf("Name: %s\n", prop.name);
printf("Total global memory: %lu\n", prop.totalGlobalMem);
printf("Total shared memory per block: %lu\n", prop.sharedMemPerBlock);
printf("Total registers per block: %d\n", prop.regsPerBlock);
printf("Warp size: %d\n", prop.warpSize);
printf("Maximum memory pitch: %lu\n", prop.memPitch);
printf("Maximum threads per block: %d\n", prop.maxThreadsPerBlock);
printf("Maximum dimension of block: %d, %d, %d\n",
prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
printf("Maximum dimension of grid: %d, %d, %d\n",
prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]);
printf("Clock rate: %d\n", prop.clockRate);
printf("Total constant memory: %lu\n", prop.totalConstMem);
printf("Texture alignment: %lu\n", prop.textureAlignment);
printf("Concurrent copy and execution: %s\n",
(prop.deviceOverlap ? "Yes" : "No"));
printf("Number of multiprocessors: %d\n", prop.multiProcessorCount);
printf("Kernel execution timeout: %s\n",
(prop.kernelExecTimeoutEnabled ? "Yes" : "No"));
return;
}
示例7: cudaGetDeviceCount
void GpuDeviceInformationDialog::setupGpuDeviceTabPages()
{
int numDevs = 0;
cudaGetDeviceCount(&numDevs);
this->setWindowTitle(QString("GPU Device Information (") + QString::number(numDevs) + QString(" devices found)"));
for(int i = 0; i < numDevs; i++)
{
cudaDeviceProp devProp;
cudaGetDeviceProperties(&devProp, i);
QWidget* deviceTabPage = new GpuDeviceInformationDialogTabPage(devProp, i);
this->tabWidget->addTab(deviceTabPage, devProp.name);
connect(deviceTabPage, SIGNAL(setMainComputeDevice(int)), this, SLOT(on_setMainComputeDevice(int)));
connect(this, SIGNAL(hasChangedMainComputeDevice(int)), deviceTabPage, SLOT(on_hasChangedMainComputeDevice(int)));
}
int currentComputeDevice;
cudaGetDevice(¤tComputeDevice);
emit hasChangedMainComputeDevice(currentComputeDevice);
}
示例8: __declspec
__declspec(dllexport) int __stdcall GetDevice()
{
int device = 0;
cudaGetDevice(&device);
return device;
}
示例9: rcrackiThreadEntryPoint
// start processing of jobs
void rcrackiThread::rcrackiThreadEntryPoint()
{
#if GPU
if(gpu != 0 && cudaGetDevice(&cudaDevId) == CUDA_SUCCESS) {
cudaBuffCount = 0x2000;
cudaChainSize = 100;
cudaDeviceProp deviceProp;
if(cudaGetDeviceProperties(&deviceProp, cudaDevId) == CUDA_SUCCESS) {
switch(deviceProp.major) {
case 1: ; break;
case 2:
cudaBuffCount = 0x4000;
cudaChainSize = 200;
break;
}
}
cudaBuffCount = rcuda::GetChainsBufferSize(cudaBuffCount);
}
else
#endif
cudaDevId = -1;
if (falseAlarmChecker) {
if (falseAlarmCheckerO) {
CheckAlarmO();
}
else {
CheckAlarm();
}
}
else {
PreCalculate();
}
}
示例10: TryToAddSingleFitStream
bool TryToAddSingleFitStream(void * vpsM, WorkerInfoQueue* q){
#ifdef ION_COMPILE_CUDA
int dev_id = 0;
cudaStreamManager * psM = (cudaStreamManager *) vpsM;
SingleFitStream * temp;
cudaGetDevice( &dev_id );
int i;
try{ // exception handling to allow fallback to CPU Fit if not a single strweam could be created
temp = new SingleFitStream(q);
i = psM->addStreamUnit( temp);
std::cout <<"CUDA: Device " << dev_id << " Single Fit stream " << i <<" created " << std::endl;
psM->printMemoryUsage();
}
catch(cudaException& e)
{
cout << e.what() << endl;
if(psM->getNumStreams() > 0){
cout << "CUDA: Device " << dev_id<< " could not create more than " << psM->getNumStreams() << " Single Fit streams" << std::endl;
psM->printMemoryUsage();
}else{
std::cout << "CUDA: Device " << dev_id << " no Single Fit streams could be created >>>>>>>>>>>>>>>>> FALLING BACK TO CPU!"<< std::endl;
return false;
}
}
#endif
return true;
}
示例11: getCurrentDeviceProperties
const cudaDeviceProp& getCurrentDeviceProperties() {
int device = 0;
auto err = cudaGetDevice(&device);
checkCuda(err, std::string("CUDA ERROR: cudaGetDeviceCount "));
return getDeviceProperties(device);
}
示例12: m_initialized
GpuSurfDetectorInternal::GpuSurfDetectorInternal(GpuSurfConfiguration config) :
m_initialized(false),
m_config(config)
{
int deviceCount;
int device;
cudaError_t err;
cudaGetDeviceCount(&deviceCount);
ASRL_ASSERT_GT(deviceCount,0,"There are no CUDA capable devices present");
err = cudaGetDevice(&device);
ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to get the CUDA device: " << cudaGetErrorString(err));
//std::cout << "Found device " << device << std::endl;
err = cudaGetDeviceProperties(&m_deviceProp,device);
ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to get the CUDA device properties: " << cudaGetErrorString(err));
// Some more checking...
ASRL_ASSERT_GE(m_deviceProp.major,1,"Minimum compute capability 1.1 is necessary");
ASRL_ASSERT_GE(m_deviceProp.minor,1,"Minimum compute capability 1.1 is necessary");
m_maxmin.init(ASRL_SURF_MAX_CANDIDATES,false);
m_maxmin.memset(0);
}
示例13: cudaGetDevice
int CUDADevicesService::getMaximumTexture2DHeight() {
int device;
cudaGetDevice(&device);
cudaDeviceProp* devProperties = new cudaDeviceProp();
cudaGetDeviceProperties(devProperties, device);
return devProperties->maxTexture2D[1];
}
示例14: cutorch_streamWaitFor
/*
Usage:
cutorch.streamWaitFor(waiterStream, {waitForStream1, ..., waitForStreamN})
for streams on the current device. Creates a one-way barrier where
waiterStream waits for waitForStream1-N to reach the current point.
*/
static int cutorch_streamWaitFor(lua_State *L)
{
THCState *state = cutorch_getstate(L);
int curDev = -1;
THCudaCheck(cudaGetDevice(&curDev));
/* Check that the waiting stream is in bounds; this will error out if not */
int waitingId = (int) luaL_checknumber(L, 1);
cudaStream_t streamWaiting =
THCState_getDeviceStream(state, curDev, waitingId);
/* Validate the streams that we are waiting on */
int streams = checkAndCountListOfStreams(L, state, 2, curDev);
if (streams < 1) {
/* nothing to synchronize */
return 0;
}
/* One-way dependency; streamWaiting will wait for the list of streams to
wait on to complete execution of pending scheduled kernels/events */
cudaEvent_t * events = (cudaEvent_t*)malloc(sizeof(cudaEvent_t) * streams);
createSingleDeviceEvents(L, state, 2, curDev, events);
/* Then, wait on them */
for (int i = 0; i < streams; i++) {
THCudaCheck(cudaStreamWaitEvent(streamWaiting, events[i], 0));
THCudaCheck(cudaEventDestroy(events[i]));
}
free(events);
return 0;
}
示例15: gpu_print_properties
void gpu_print_properties(FILE* out){
int device = -1;
gpu_safe( cudaGetDevice(&device) );
cudaDeviceProp prop;
gpu_safe( cudaGetDeviceProperties(&prop, device) );
int MiB = 1024 * 1024;
int kiB = 1024;
fprintf(out, " Device number: %d\n", device);
fprintf(out, " Device name: %s\n", prop.name);
fprintf(out, " Global Memory: %d MiB\n", (int)(prop.totalGlobalMem/MiB));
fprintf(out, " Shared Memory: %d kiB/block\n", (int)(prop.sharedMemPerBlock/kiB));
fprintf(out, " Constant memory: %d kiB\n", (int)(prop.totalConstMem/kiB));
fprintf(out, " Registers: %d per block\n", (int)(prop.regsPerBlock/kiB));
fprintf(out, " Warp size: %d threads\n", (int)(prop.warpSize));
//fprintf(out, " Max memory pitch: %d bytes\n", (int)(prop.memPitch));
fprintf(out, " Texture alignment: %d bytes\n", (int)(prop.textureAlignment));
fprintf(out, " Max threads/block: %d\n", prop.maxThreadsPerBlock);
fprintf(out, " Max block size: %d x %d x %d threads\n", prop.maxThreadsDim[X], prop.maxThreadsDim[Y], prop.maxThreadsDim[Z]);
fprintf(out, " Max grid size: %d x %d x %d blocks\n", prop.maxGridSize[X], prop.maxGridSize[Y], prop.maxGridSize[Z]);
fprintf(out, "Compute capability: %d.%d\n", prop.major, prop.minor);
fprintf(out, " Clock rate: %d MHz\n", prop.clockRate/1000);
fprintf(out, " Multiprocessors: %d\n", prop.multiProcessorCount);
fprintf(out, " Timeout enabled: %d\n", prop.kernelExecTimeoutEnabled);
fprintf(out, " Compute mode: %d\n", prop.computeMode);
fprintf(out, " Device overlap: %d\n", prop.deviceOverlap);
fprintf(out, "Concurrent kernels: %d\n", prop.concurrentKernels);
fprintf(out, " Integrated: %d\n", prop.integrated);
fprintf(out, " Can map host mem: %d\n", prop.canMapHostMemory);
}