本文整理汇总了C++中cudaGetDeviceProperties函数的典型用法代码示例。如果您正苦于以下问题:C++ cudaGetDeviceProperties函数的具体用法?C++ cudaGetDeviceProperties怎么用?C++ cudaGetDeviceProperties使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cudaGetDeviceProperties函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
int main(int argc, char ** argv) {
int deviceCount;
wbArg_read(argc, argv);
cudaGetDeviceCount(&deviceCount);
wbTime_start(GPU, "Getting GPU Data."); //@@ start a timer
for (int dev = 0; dev < deviceCount; dev++) {
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, dev);
if (dev == 0) {
if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
wbLog(TRACE, "No CUDA GPU has been detected");
return -1;
} else if (deviceCount == 1) {
//@@ WbLog is a provided logging API (similar to Log4J).
//@@ The logging function wbLog takes a level which is either
//@@ OFF, FATAL, ERROR, WARN, INFO, DEBUG, or TRACE and a
//@@ message to be printed.
wbLog(TRACE, "There is 1 device supporting CUDA");
} else {
wbLog(TRACE, "There are ", deviceCount, " devices supporting CUDA");
}
}
wbLog(TRACE, "Device ", dev, " name: ", deviceProp.name);
wbLog(TRACE, " Computational Capabilities: ", deviceProp.major, ".", deviceProp.minor);
wbLog(TRACE, " Maximum global memory size: ", deviceProp.totalGlobalMem);
wbLog(TRACE, " Maximum constant memory size: ", deviceProp.totalConstMem);
wbLog(TRACE, " Maximum shared memory size per block: ", deviceProp.sharedMemPerBlock);
wbLog(TRACE, " Maximum block dimensions: ", deviceProp.maxThreadsDim[0], " x ",
deviceProp.maxThreadsDim[1], " x ",
deviceProp.maxThreadsDim[2]);
wbLog(TRACE, " Maximum grid dimensions: ", deviceProp.maxGridSize[0], " x ",
deviceProp.maxGridSize[1], " x ",
deviceProp.maxGridSize[2]);
wbLog(TRACE, " Warp size: ", deviceProp.warpSize);
}
wbTime_stop(GPU, "Getting GPU Data."); //@@ stop the timer
return 0;
}
示例2: initializeCUDA
void initializeCUDA() {
cudaError_t error;
int devID = 0;
error = cudaSetDevice(devID); if (error != cudaSuccess){printf("cudaSetDevice returned error code %d, line(%d)\n", error, __LINE__);exit(EXIT_FAILURE);}
error = cudaGetDevice(&devID); if (error != cudaSuccess){printf("cudaGetDevice returned error code %d, line(%d)\n", error, __LINE__);exit(EXIT_FAILURE);}
// printf("Device ID is %d\n",devID);
cudaDeviceProp deviceProp;
error = cudaGetDeviceProperties(&deviceProp,devID); if (error != cudaSuccess){printf("cudaGetDeviceProperties returned error code %d, line(%d)\n", error, __LINE__);exit(EXIT_FAILURE);}
// printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor);
// use larger block size for Fermi and above
block_size = (deviceProp.major < 2) ? 16 : 32;
}
示例3: XDEVL_MODULE_ERROR
xdl_int XdevLCudaImpl::init() {
TiXmlDocument xmlDocument;
if (!xmlDocument.LoadFile(getMediator()->getXmlFilename())) {
XDEVL_MODULE_ERROR("Could not parse xml file: " << getMediator()->getXmlFilename() << std::endl);
return ERR_ERROR;
}
if (readModuleInformation(&xmlDocument) != ERR_OK)
return ERR_ERROR;
cudaGetDevice(&m_devID);
cudaGetDeviceProperties(&m_prop, m_devID);
return ERR_OK;
}
示例4: cuda_safe_call
void cuda_running_configuration::update_parameters()
{
cuda_safe_call(cudaDriverGetVersion(&driver_version));
cuda_safe_call(cudaRuntimeGetVersion(&runtime_version));
int device_count;
cuda_safe_call(cudaGetDeviceCount(&device_count));
if (device_count <= 0)
throw neural_network_exception("No CUDA capable devices are found");
if (device_id >= device_count)
throw neural_network_exception((boost::format("Device ID %1% specified while %2% devices are available") % device_id % device_count).str());
cudaDeviceProp device_prop;
cuda_safe_call(cudaGetDeviceProperties(&device_prop, device_id));
device_name = device_prop.name;
compute_capability_major = device_prop.major;
compute_capability_minor = device_prop.minor;
clock_rate = device_prop.clockRate;
memory_clock_rate = device_prop.memoryClockRate;
memory_bus_width = device_prop.memoryBusWidth;
global_memory_size = device_prop.totalGlobalMem;
ecc_enabled = (device_prop.ECCEnabled != 0);
l2_cache_size = device_prop.l2CacheSize;
multiprocessor_count = device_prop.multiProcessorCount;
smem_per_block = device_prop.sharedMemPerBlock;
max_threads_per_multiprocessor = device_prop.maxThreadsPerMultiProcessor;
max_threads_per_block = device_prop.maxThreadsPerBlock;
for(int i = 0; i < sizeof(max_threads_dim) / sizeof(max_threads_dim[0]); ++i)
max_threads_dim[i] = device_prop.maxThreadsDim[i];
for(int i = 0; i < sizeof(max_grid_size) / sizeof(max_grid_size[0]); ++i)
max_grid_size[i] = device_prop.maxGridSize[i];
max_texture_1d_linear = device_prop.maxTexture1DLinear;
texture_alignment = device_prop.textureAlignment;
pci_bus_id = device_prop.pciBusID;
pci_device_id = device_prop.pciDeviceID;
#ifdef _WIN32
tcc_mode = (device_prop.tccDriver != 0);
#endif
cuda_safe_call(cudaSetDevice(device_id));
cublas_safe_call(cublasCreate(&cublas_handle));
cusparse_safe_call(cusparseCreate(&cusparse_handle));
}
示例5: cudaGetDeviceCount
void DialogSelectHardware::setListDevice()
{
cudaGetDeviceCount(&deviceCount);
QString text("Detectados "+QString::number(deviceCount)+" Dispositivos Compatibles con CUDA");
QMessageBox::information(0,"Dispositivos Detectados",text,QMessageBox::Ok);
deviceProp = new cudaDeviceProp;
for (int dev = 0; dev < deviceCount; ++dev)
{
cudaGetDeviceProperties(deviceProp, dev);
QString text("Device "+QString::number(dev).append(" : ")+ deviceProp->name);
ui->deviceComboBox->addItem(text);
}
}
示例6: cudaSetDevice
void DialogSelectHardware::ChangeText(int indexDevice)
{
int driverVersion = 0, runtimeVersion = 0;
cudaSetDevice(indexDevice);
cudaGetDeviceProperties(deviceProp, indexDevice);
cudaDriverGetVersion(&driverVersion);
cudaRuntimeGetVersion(&runtimeVersion);
char msg[256];
SPRINTF(msg,"%.0f MBytes (%llu bytes)\n",
(float)deviceProp->totalGlobalMem/1048576.0f, (unsigned long long) deviceProp->totalGlobalMem);
ui->tableWidget->clear();
addItem(QString ("Device "+QString::number(indexDevice).append(" : ")+ deviceProp->name),0,0);
addItem((selectDevice == indexDevice) ? "Dispositivo Seleccionado " : " ",0,1);
addItem("CUDA Driver Version / Runtime Version",1,0);
addItem(QString ("%1.%2 / %3.%4").arg(driverVersion/1000).arg((driverVersion%100)/10).arg( runtimeVersion/1000).arg((runtimeVersion%100)/10),1,1);
addItem("CUDA Capability Major/Minor version number: ",2,0);
addItem(QString ("%1.%2").arg(deviceProp->major).arg(deviceProp->minor),2,1);
addItem("Total amount of global memory:",3,0);
addItem(msg,3,1);
addItem(QString ("(%1) Multiprocessors, (%2) CUDA Cores/MP:%3 CUDA Cores").arg( deviceProp->multiProcessorCount).arg( _ConvertSMVer2Cores(deviceProp->major, deviceProp->minor)).arg( _ConvertSMVer2Cores(deviceProp->major, deviceProp->minor) * deviceProp->multiProcessorCount),4,0);
addItem("Total amount of constant memory:",5,0);
addItem(QString ("%1 bytes").arg(deviceProp->totalConstMem),5,1);
addItem("Total amount of shared memory per block:",6,0);
addItem(QString ("%1 bytes").arg(deviceProp->sharedMemPerBlock),6,1);
addItem("Total number of registers available per block:",7,0);
addItem(QString ("%1").arg(deviceProp->regsPerBlock),7,1);
addItem("Warp size:",8,0);
addItem(QString ("%1").arg(deviceProp->warpSize),8,1);
addItem("Maximum number of threads per multiprocessor:",9,0);
addItem(QString ("%1").arg(deviceProp->maxThreadsPerMultiProcessor),9,1);
addItem("Maximum number of threads per block:",10,0);
addItem(QString ("%1").arg(deviceProp->maxThreadsPerBlock),10,1);
addItem("Max dimension size of a thread block (x,y,z):",11,0);
addItem(QString ("(%1, %2, %3)").arg(deviceProp->maxThreadsDim[0]).arg( deviceProp->maxThreadsDim[1]).arg( deviceProp->maxThreadsDim[2]),11,1);
addItem("Max dimension size of a grid size (x,y,z):",12,0);
addItem(QString ("(%1, %2, %3)\n").arg(deviceProp->maxGridSize[0]).arg(deviceProp->maxGridSize[1]).arg(deviceProp->maxGridSize[2]),12,1);
addItem("Run time limit on kernels: ",13,0);
addItem(QString ("%1\n").arg(deviceProp->kernelExecTimeoutEnabled ? "Yes" : "No"),13,1);
addItem("Integrated GPU sharing Host Memory: ",14,0);
addItem( QString ("%1\n").arg(deviceProp->integrated ? "Yes" : "No"),14,1);
ui->tableWidget->resizeColumnsToContents();
ui->tableWidget->resizeRowsToContents();
}
示例7: cutilDeviceInit
void
cutilDeviceInit ( int argc, char ** argv )
{
int deviceCount;
cutilSafeCall ( cudaGetDeviceCount ( &deviceCount ) );
if ( deviceCount == 0 )
{
printf ( "cutil error: no devices supporting CUDA\n" );
exit ( -1 );
}
cudaDeviceProp_t deviceProp;
cutilSafeCall ( cudaGetDeviceProperties ( &deviceProp, 0 ) );
printf ( "\n Using CUDA device: %s\n", deviceProp.name );
cutilSafeCall ( cudaSetDevice ( 0 ) );
}
示例8: runAutoTest
////////////////////////////////////////////////////////////////////////////////
//! Run test
////////////////////////////////////////////////////////////////////////////////
void runAutoTest(int argc, char** argv)
{
printf("[%s]\n", sSDKsample);
// Cuda init
int dev = cutilChooseCudaDevice(argc, argv);
cudaDeviceProp deviceProp;
cutilSafeCall(cudaGetDeviceProperties(&deviceProp, dev));
printf("Compute capability %d.%d\n", deviceProp.major, deviceProp.minor);
int version = deviceProp.major*10 + deviceProp.minor;
g_hasDouble = (version >= 13);
if (inEmulationMode()) {
// workaround since SM13 kernel doesn't produce correct output in emulation mode
g_hasDouble = false;
}
// create FFT plan
CUFFT_SAFE_CALL(cufftPlan2d(&fftPlan, meshW, meshH, CUFFT_C2R) );
// allocate memory
fftInputW = (meshW / 2)+1;
fftInputH = meshH;
fftInputSize = (fftInputW*fftInputH)*sizeof(float2);
cutilSafeCall(cudaMalloc((void **)&d_h0, fftInputSize) );
cutilSafeCall(cudaMalloc((void **)&d_ht, fftInputSize) );
h_h0 = (float2 *) malloc(fftInputSize);
generate_h0();
cutilSafeCall(cudaMemcpy(d_h0, h_h0, fftInputSize, cudaMemcpyHostToDevice) );
cutilSafeCall(cudaMalloc((void **)&d_slope, meshW*meshH*sizeof(float2)) );
cutCreateTimer(&timer);
cutStartTimer(timer);
prevTime = cutGetTimerValue(timer);
// Creating the Auto-Validation Code
g_CheckRender = new CheckBackBuffer(windowH, windowH, 4, false);
g_CheckRender->setPixelFormat(GL_RGBA);
g_CheckRender->setExecPath(argv[0]);
g_CheckRender->EnableQAReadback(true);
runCudaTest(g_hasDouble);
cudaThreadExit();
}
示例9: QDialog
CudaDeviceDialog::CudaDeviceDialog(QWidget *parent)
: QDialog(parent)
{
m = new Ui_CudaDeviceDialog;
m->setupUi(this);
int deviceCount = 0;
if (cudaGetDeviceCount(&deviceCount) == cudaSuccess) {
for (int i = 0; i < deviceCount; ++i) {
cudaDeviceProp p;
cudaGetDeviceProperties(&p, i);
m->comboBox->addItem(p.name);
}
}
connect(m->comboBox, SIGNAL(currentIndexChanged(int)), this, SLOT(updateInfo(int)));
updateInfo(0);
}
示例10: PreRunJacobi
/**
* @brief This function is called immediately before the main Jacobi loop
*
* @param[in] cartComm The carthesian communicator
* @param[in] rank The rank of the calling MPI process
* @param[in] size The total number of MPI processes available
* @param[out] timerStart The Jacobi loop starting moment (measured as wall-time)
*/
void PreRunJacobi(MPI_Comm cartComm, int rank, int size, double * timerStart)
{
struct cudaDeviceProp devProps;
int crtDevice = 0, enabledECC = 0;
// We get the properties of the current device, assuming all other devices are the same
SafeCudaCall(cudaGetDevice(&crtDevice));
SafeCudaCall(cudaGetDeviceProperties(&devProps, crtDevice));
// Determine how many devices have ECC enabled (assuming exactly one process per device)
MPI_Reduce(&devProps.ECCEnabled, &enabledECC, 1, MPI_INT, MPI_SUM, MPI_MASTER_RANK, cartComm);
MPI_Barrier(cartComm);
OnePrintf(rank == MPI_MASTER_RANK, "Starting Jacobi run with %d processes using \"%s\" GPUs (ECC enabled: %d / %d):\n",
size, devProps.name, enabledECC, size);
* timerStart = MPI_Wtime();
}
示例11: gpuDeviceInit
// General GPU Device CUDA Initialization
inline int gpuDeviceInit(int devID)
{
int device_count;
checkCudaErrors(cudaGetDeviceCount(&device_count));
if (device_count == 0)
{
fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
if (devID < 0)
{
devID = 0;
}
if (devID > device_count-1)
{
fprintf(stderr, "\n");
fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", device_count);
fprintf(stderr, ">> gpuDeviceInit (-device=%d) is not a valid GPU device. <<\n", devID);
fprintf(stderr, "\n");
return -devID;
}
cudaDeviceProp deviceProp;
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
if (deviceProp.computeMode == cudaComputeModeProhibited)
{
fprintf(stderr, "Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n");
return -1;
}
if (deviceProp.major < 1)
{
fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n");
exit(EXIT_FAILURE);
}
checkCudaErrors(cudaSetDevice(devID));
printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name);
return devID;
}
示例12: THCudaInit
void THCudaInit(THCState* state)
{
int count = 0;
THCudaCheck(cudaGetDeviceCount(&count));
int device = 0;
THCudaCheck(cudaGetDevice(&device));
state->rngState = (THCRNGState*)malloc(sizeof(THCRNGState));
THCRandom_init(state, count, device);
state->blasState = (THCBlasState*)malloc(sizeof(THCBlasState));
THCudaBlas_init(state, count, device);
state->numDevices = count;
state->deviceProperties =
(struct cudaDeviceProp*)malloc(count * sizeof(struct cudaDeviceProp));
THCState_setDeviceMode(state, THCStateDeviceModeManual);
state->numUserStreams = 0;
state->streamsPerDevice =
(cudaStream_t**)malloc(count * sizeof(cudaStream_t*));
/* Enable P2P access between all pairs, if possible */
THCudaEnablePeerToPeerAccess(state);
for (int i = 0; i < count; ++i)
{
THCudaCheck(cudaSetDevice(i));
THCudaCheck(cudaGetDeviceProperties(&state->deviceProperties[i], i));
/* Stream index 0 will be the default stream for convenience; by
default no user streams are reserved */
state->streamsPerDevice[i] =
(cudaStream_t*)malloc(sizeof(cudaStream_t));
state->streamsPerDevice[i][0] = NULL;
}
/* Restore to previous device */
THCudaCheck(cudaSetDevice(device));
/* Start in the default stream on the current device */
state->currentPerDeviceStream = 0;
state->currentStream = NULL;
}
示例13: device
ContextPtr CudaDevice::Create(int ordinal, bool stream) {
// Create the device.
DevicePtr device(new CudaDevice);
cudaError_t error = cudaGetDeviceProperties(&device->_prop, ordinal);
if(cudaSuccess != error) {
fprintf(stderr, "FAILURE TO CREATE DEVICE %d\n", ordinal);
exit(0);
}
// Set this device as the active one on the thread.
device->_ordinal = ordinal;
cudaSetDevice(ordinal);
AllocPtr alloc = device->CreateDefaultAlloc();
// Create the context.
return device->CreateStream(stream, alloc.get());
}
示例14: checkCUDAProfile
bool checkCUDAProfile(int dev, int min_runtime, int min_compute)
{
int runtimeVersion = 0;
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, dev);
fprintf(stderr,"\nDevice %d: \"%s\"\n", dev, deviceProp.name);
cudaRuntimeGetVersion(&runtimeVersion);
fprintf(stderr," CUDA Runtime Version :\t%d.%d\n", runtimeVersion/1000, (runtimeVersion%100)/10);
fprintf(stderr," CUDA Compute Capability :\t%d.%d\n", deviceProp.major, deviceProp.minor);
if( runtimeVersion >= min_runtime && ((deviceProp.major<<4) + deviceProp.minor) >= min_compute ) {
return true;
} else {
return false;
}
}
示例15: checkCUDAProfile
bool checkCUDAProfile(int dev)
{
int runtimeVersion = 0;
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, dev);
fprintf(stderr,"\nDevice %d: \"%s\"\n", dev, deviceProp.name);
cudaRuntimeGetVersion(&runtimeVersion);
fprintf(stderr," CUDA Runtime Version:\t%d.%d\n", runtimeVersion/1000, (runtimeVersion%100)/10);
fprintf(stderr," CUDA SM Capability :\t%d.%d\n", deviceProp.major, deviceProp.minor);
if( runtimeVersion/1000 >= 3 && runtimeVersion%100 >= 1 && deviceProp.major >= 2 ) {
return true;
} else {
return false;
}
}