本文整理汇总了C++中CUDNN_CHECK函数的典型用法代码示例。如果您正苦于以下问题:C++ CUDNN_CHECK函数的具体用法?C++ CUDNN_CHECK怎么用?C++ CUDNN_CHECK使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了CUDNN_CHECK函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: createPoolingDesc
inline void createPoolingDesc(cudnnPoolingDescriptor_t* pool,
cudnnPoolingMode_t mode, int h, int w, int pad_h, int pad_w,
int stride_h, int stride_w) {
CUDNN_CHECK(cudnnCreatePoolingDescriptor(pool));
CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool, mode, h, w, pad_h, pad_w,
stride_h, stride_w));
}
示例2: createTensor4dDesc
inline void createTensor4dDesc(cudnnTensorDescriptor_t* desc, Size size,
Stride stride) {
CUDNN_CHECK(cudnnCreateTensorDescriptor(desc));
CUDNN_CHECK(cudnnSetTensor4dDescriptorEx(*desc, dataType<Dtype>::type,
size.num(), size.channels(), size.height(), size.width(),
stride.nstride(), stride.cstride(), stride.hstride(),
stride.wstride()));
}
示例3: pad_y
ConvBC01CuDNN<T>::ConvBC01CuDNN(int pad_y, int pad_x, int stride_y,
int stride_x) : pad_y(pad_y), pad_x(pad_x), stride_y(stride_y),
stride_x(stride_x), n_imgs(0), n_channels(0), n_filters(0), img_h(0),
img_w(0), filter_h(0), filter_w(0), workspace_size(0) {
CUDNN_CHECK(cudnnCreateTensorDescriptor(&imgs_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&convout_desc));
CUDNN_CHECK(cudnnCreateFilterDescriptor(&filters_desc));
CUDNN_CHECK(cudnnCreateConvolutionDescriptor(&conv_desc));
}
示例4: createFilterDesc
inline void createFilterDesc(cudnnFilterDescriptor_t* desc,
int n, int c, int h, int w) {
CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));
#if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnSetFilter4dDescriptor(*desc, dataType<Dtype>::type,
CUDNN_TENSOR_NCHW, n, c, h, w));
#else
CUDNN_CHECK(cudnnSetFilter4dDescriptor_v4(*desc, dataType<Dtype>::type,
CUDNN_TENSOR_NCHW, n, c, h, w));
#endif
}
示例5: CUDA_CHECK
void CuDNNConvolutionLayer<Dtype>::LayerSetUp(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
ConvolutionLayer<Dtype>::LayerSetUp(bottom, top);
// Initialize CUDA streams and cuDNN.
stream_ = new cudaStream_t[this->group_ * CUDNN_STREAMS_PER_GROUP];
handle_ = new cudnnHandle_t[this->group_ * CUDNN_STREAMS_PER_GROUP];
workspaceSizeInBytes = 0;
workspace = NULL;
workspace = NULL;
workspaceSizeInBytes = (size_t)0;
for (int g = 0; g < this->group_ * CUDNN_STREAMS_PER_GROUP; g++) {
CUDA_CHECK(cudaStreamCreate(&stream_[g]));
CUDNN_CHECK(cudnnCreate(&handle_[g]));
CUDNN_CHECK(cudnnSetStream(handle_[g], stream_[g]));
}
// Set the indexing parameters.
weight_offset_ = (this->num_output_ / this->group_)
* (this->channels_ / this->group_) * this->kernel_h_ * this->kernel_w_;
bias_offset_ = (this->num_output_ / this->group_);
// Create filter descriptor.
cudnn::createFilterDesc<Dtype>(&filter_desc_,
this->num_output_ / this->group_, this->channels_ / this->group_,
this->kernel_h_, this->kernel_w_);
// Create tensor descriptor(s) for data and corresponding convolution(s).
for (int i = 0; i < bottom.size(); i++) {
cudnnTensorDescriptor_t bottom_desc;
cudnn::createTensor4dDesc<Dtype>(&bottom_desc);
bottom_descs_.push_back(bottom_desc);
cudnnTensorDescriptor_t top_desc;
cudnn::createTensor4dDesc<Dtype>(&top_desc);
top_descs_.push_back(top_desc);
cudnnConvolutionDescriptor_t conv_desc;
cudnn::createConvolutionDesc<Dtype>(&conv_desc);
conv_descs_.push_back(conv_desc);
}
// Tensor descriptor for bias.
if (this->bias_term_) {
cudnn::createTensor4dDesc<Dtype>(&bias_desc_);
}
handles_setup_ = true;
}
示例6: setConvolutionDesc
inline void setConvolutionDesc(cudnnConvolutionDescriptor_t* conv,
cudnnTensorDescriptor_t bottom, cudnnFilterDescriptor_t filter,
int pad_h, int pad_w, int stride_h, int stride_w) {
#if CUDNN_VERSION_MIN(6, 0, 0)
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(*conv,
pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION,
dataType<Dtype>::type));
#else
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(*conv,
pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION));
#endif
}
示例7: CUDNN_CHECK
void ConvBC01CuDNN<T>::fprop(const T *imgs, const T *filters, int n_imgs,
int n_channels, int n_filters, int img_h, int img_w, int filter_h,
int filter_w, T *convout) {
bool set_conv_desc = false;
if (n_imgs != this->n_imgs || n_channels != this->n_channels ||
img_h != this->img_h || img_w != this->img_w) {
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
imgs_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, n_imgs, n_channels,
img_h, img_w
));
this->n_imgs = n_imgs;
this->n_channels = n_channels;
this->img_h = img_h;
this->img_w = img_w;
set_conv_desc = true;
}
if (n_filters != this->n_filters || n_channels != this->n_channels ||
filter_h != this->filter_h || filter_w != this->filter_w) {
CUDNN_CHECK(cudnnSetFilter4dDescriptor(
filters_desc, CUDNN_DATA_FLOAT, n_filters, n_channels, filter_h,
filter_w
));
this->n_filters = n_filters;
this->n_channels = n_channels;
this->filter_h = filter_h;
this->filter_w = filter_w;
set_conv_desc = true;
}
if (set_conv_desc) {
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
conv_desc, pad_y, pad_x, stride_y, stride_x, 1, 1, CUDNN_CONVOLUTION
));
int n, c, h, w;
CUDNN_CHECK(cudnnGetConvolution2dForwardOutputDim(
conv_desc, imgs_desc, filters_desc, &n, &c, &h, &w
));
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
convout_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, n, c, h, w
));
CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(
CUDNN::handle(), imgs_desc, filters_desc, conv_desc, convout_desc,
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, WORKSPACE_LIMIT,
&fwd_algo
));
CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(
CUDNN::handle(), imgs_desc, filters_desc, conv_desc, convout_desc,
fwd_algo, &workspace_size
));
}
void *workspace = NULL;
if (workspace_size > 0) {
workspace = CUDA::buffer(workspace_size);
}
CUDNN_CHECK(cudnnConvolutionForward(
CUDNN::handle(), &CUDNN::one, imgs_desc, imgs, filters_desc, filters,
conv_desc, fwd_algo, workspace, workspace_size, &CUDNN::zero,
convout_desc, convout
));
}
示例8: CUDNN_CHECK
void PoolBC01CuDNN<T>::bprop(const T *imgs, const T* poolout,
const T *poolout_d, T *imgs_d) {
CUDNN_CHECK(cudnnPoolingBackward(
CUDNN::handle(), pool_desc, &CUDNN::one, poolout_desc, poolout,
poolout_desc, poolout_d, imgs_desc, imgs, &CUDNN::zero, imgs_desc, imgs_d
));
}
示例9: setTensorNdDesc
inline void setTensorNdDesc(cudnnTensorDescriptor_t* desc,
const int_tp total_dims,
const int_tp* shape, const int_tp* stride) {
// Pad to at least 4 dimensions
int_tp cudnn_dims = std::max(total_dims, (int_tp)4);
int_tp padding = std::max((int_tp)0, cudnn_dims - total_dims);
std::vector<int> shape_int(cudnn_dims);
std::vector<int> stride_int(cudnn_dims);
for (int_tp i = cudnn_dims - 1; i >= 0; --i) {
if (i < padding) {
shape_int[i] = 1;
stride_int[i] = shape_int[i + 1] * stride_int[i + 1];
} else {
shape_int[i] = shape[i - padding];
stride_int[i] = stride[i - padding];
}
}
const int* shape_ptr = &shape_int[0];
const int* stride_ptr = &stride_int[0];
CUDNN_CHECK(
cudnnSetTensorNdDescriptor(*desc, dataType<Dtype>::type, cudnn_dims,
shape_ptr, stride_ptr));
}
示例10: CUDNN_CHECK
void Activation::compute_gpu(const vector<bool>& add) {
DTYPE alpha = 1.;
DTYPE beta = add[0] ? 1. : 0.;
CUDNN_CHECK(cudnnActivationForward(cudnn_handle(), activation_mode_,
&alpha, bottom_desc_, inputs_[0]->gpu_data(), &beta, top_desc_,
outputs_[0]->mutable_gpu_data()));
}
示例11: CUDNN_CHECK
void CuDNNPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
PoolingLayer<Dtype>::LayerSetUp(bottom, top);
// stride
const int* kernel_shape_data = this->kernel_shape_.cpu_data();
// stride
const int* stride_data = this->stride_.cpu_data();
// padding
const int* pad_data = this->pad_.cpu_data();
int kernel_shape[this->num_spatial_axes_];
int stride[this->num_spatial_axes_];
int pad[this->num_spatial_axes_];
for (int i = 0; i < this->num_spatial_axes_; i++){
kernel_shape[i] = kernel_shape_data[i];
stride[i] = stride_data[i];
pad[i] = pad_data[i];
}
CUDNN_CHECK(cudnnCreate(&handle_));
cudnn::createTensorDesc<Dtype>(&bottom_desc_);
cudnn::createTensorDesc<Dtype>(&top_desc_);
cudnn::createPoolingNdDesc<Dtype>(&pooling_desc_,
this->layer_param_.pooling_param().pool(), &mode_,
this->num_spatial_axes_, kernel_shape,
pad, stride);
handles_setup_ = true;
}
示例12: CUDNN_CHECK
void Softmax::compute_gpu(const vector<bool>& add) {
DTYPE alpha = 1.;
DTYPE beta = add[0] ? 1. : 0.;
CUDNN_CHECK(cudnnSoftmaxForward(cudnn_handle(), CUDNN_SOFTMAX_ACCURATE,
softmax_mode_, &alpha, bottom_desc_, inputs_[0]->gpu_data(), &beta,
top_desc_, outputs_[0]->mutable_gpu_data()));
}
示例13: SwitchDevice
void Context::Init(int device_id) {
device_id_ = device_id;
SwitchDevice();
#if defined(USE_CUDA)
if (blas_handle_ == nullptr) {
CUBLAS_CHECK(cublasCreate((cublasHandle_t*)&blas_handle_));
CHECK_NOTNULL(blas_handle_);
}
#endif
#if defined(USE_CUDNN)
if (cudnn_handle_ == nullptr) {
CUDNN_CHECK(cudnnCreate((cudnnHandle_t*)&cudnn_handle_));
CHECK_NOTNULL(cudnn_handle_);
}
#endif
#if defined(USE_NNPACK)
if (nnpack_handle_ == nullptr) {
CHECK_EQ(nnp_initialize(), nnp_status_success);
nnpack_handle_ = pthreadpool_create(0);
CHECK_NOTNULL(nnpack_handle_);
}
#endif
}
示例14: CUDNN_CHECK
void CuDNNLRNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
LRNLayer<Dtype>::LayerSetUp(bottom, top);
CUDNN_CHECK(cudnnCreate(&handle_));
CUDNN_CHECK(cudnnCreateLRNDescriptor(&norm_desc_));
cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
cudnn::createTensor4dDesc<Dtype>(&top_desc_);
// create a LRN handle
handles_setup_ = true;
size_ = this->layer_param().lrn_param().local_size();
alpha_ = this->layer_param().lrn_param().alpha();
beta_ = this->layer_param().lrn_param().beta();
k_ = this->layer_param().lrn_param().k();
}
示例15: createPoolingDesc
inline void createPoolingDesc(cudnnPoolingDescriptor_t* conv,
PoolingParameter_PoolMethod poolmethod, cudnnPoolingMode_t* mode,
int h, int w, int stride_h, int stride_w) {
switch (poolmethod) {
case PoolingParameter_PoolMethod_MAX:
*mode = CUDNN_POOLING_MAX;
break;
case PoolingParameter_PoolMethod_AVE:
*mode = CUDNN_POOLING_AVERAGE;
break;
default:
LOG(FATAL) << "Unknown pooling method.";
}
CUDNN_CHECK(cudnnCreatePoolingDescriptor(conv));
CUDNN_CHECK(cudnnSetPoolingDescriptor(*conv, *mode, h, w,
stride_h, stride_w));
}