本文整理汇总了C++中Tensor::MoveToGPU方法的典型用法代码示例。如果您正苦于以下问题:C++ Tensor::MoveToGPU方法的具体用法?C++ Tensor::MoveToGPU怎么用?C++ Tensor::MoveToGPU使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Tensor
的用法示例。
在下文中一共展示了Tensor::MoveToGPU方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: SMS
void TensorMath::SMS(const Tensor& source, Tensor& target)
{
#ifdef BUILD_OPENCL
if(source.cl_gpu_ || target.cl_gpu_) {
((Tensor&)source).MoveToGPU();
target.MoveToGPU(true);
const int width = target.width();
const int height = target.height();
const int maps = target.maps();
const int samples = target.samples();
cl_uint error = 0;
error |= clSetKernelArg (CLHelper::k_sms, 0, sizeof (cl_mem), &(((Tensor&)source).cl_data_ptr_));
error |= clSetKernelArg (CLHelper::k_sms, 1, sizeof (cl_mem), &(target.cl_data_ptr_));
error |= clSetKernelArg (CLHelper::k_sms, 2, sizeof (cl_uint), &width);
error |= clSetKernelArg (CLHelper::k_sms, 3, sizeof (cl_uint), &height);
error |= clSetKernelArg (CLHelper::k_sms, 4, sizeof (cl_uint), &maps);
error |= clSetKernelArg (CLHelper::k_sms, 5, sizeof (cl_uint), &samples);
if (error != CL_SUCCESS) {
FATAL("Error setting kernel args: " << (signed int) error);
}
size_t global_work_size[] = {(size_t)target.elements()};
error = clEnqueueNDRangeKernel (CLHelper::queue, CLHelper::k_sms, 1, NULL,
global_work_size, NULL, 0, NULL, NULL);
if (error != CL_SUCCESS) {
FATAL("Error enqueueing kernel: " << (signed int) error);
}
#ifdef BRUTAL_FINISH
error = clFinish (CLHelper::queue);
if (error != CL_SUCCESS) {
FATAL("Error finishing command queue: " << (signed int) error);
}
#endif
} else {
#endif
const int width = target.width();
const int height = target.height();
const int maps = target.maps();
const int samples = target.samples();
for(int sample = 0; sample < samples; sample++) {
for(int map = 0; map < maps; map++) {
const datum* src = source.data_ptr_const(0, 0, sample, map);
datum* tgt = target.data_ptr(0, 0, map, sample);
std::memcpy(tgt, src, sizeof(datum) * width * height);
}
}
#ifdef BUILD_OPENCL
}
#endif
target.hint_ignore_content_ = false;
}
示例2: UP
void TensorMath::UP(const Tensor& source, Tensor& target, const int region_width, const int region_height, const datum target_factor)
{
#ifdef BUILD_OPENCL
if(source.cl_gpu_ || target.cl_gpu_) {
((Tensor&)source).MoveToGPU();
target.MoveToGPU(true);
const int target_width = target.width();
const int target_height = target.height();
const int source_width = source.width();
const int source_height = source.height();
cl_uint error = 0;
error |= clSetKernelArg (CLHelper::k_up, 0, sizeof (cl_mem), &(((Tensor&)source).cl_data_ptr_));
error |= clSetKernelArg (CLHelper::k_up, 1, sizeof (cl_mem), &(target.cl_data_ptr_));
error |= clSetKernelArg (CLHelper::k_up, 2, sizeof (cl_uint), &target_width);
error |= clSetKernelArg (CLHelper::k_up, 3, sizeof (cl_uint), &target_height);
error |= clSetKernelArg (CLHelper::k_up, 4, sizeof (cl_uint), &source_width);
error |= clSetKernelArg (CLHelper::k_up, 5, sizeof (cl_uint), &source_height);
error |= clSetKernelArg (CLHelper::k_up, 6, sizeof (cl_uint), ®ion_width);
error |= clSetKernelArg (CLHelper::k_up, 7, sizeof (cl_uint), ®ion_height);
error |= clSetKernelArg (CLHelper::k_up, 8, sizeof (cl_float), &target_factor);
if (error != CL_SUCCESS) {
FATAL("Error setting kernel args: " << (signed int) error);
}
size_t global_work_size[] = {(size_t)target.width(), (size_t)target.height(), (size_t)(target.maps() * target.samples())};
error = clEnqueueNDRangeKernel (CLHelper::queue, CLHelper::k_up, 3, NULL,
global_work_size, NULL, 0, NULL, NULL);
if (error != CL_SUCCESS) {
FATAL("Error enqueueing kernel: " << (signed int) error);
}
#ifdef BRUTAL_FINISH
error = clFinish (CLHelper::queue);
if (error != CL_SUCCESS) {
FATAL("Error finishing command queue: " << (signed int) error);
}
#endif
} else {
#endif
const int width = source.width();
const int height = source.height();
const int maps = source.maps();
const int samples = source.samples();
for(int sample = 0; sample < samples; sample++) {
for(int map = 0; map < maps; map++) {
for(unsigned int y = 0; y < height; y++) {
const unsigned int iy = region_height * y;
for(unsigned int x = 0; x < width; x++) {
const unsigned int ix = region_width * x;
const datum* src = source.data_ptr_const(x, y, map, sample);
datum sum = *src;
for(unsigned int ry = 0; ry < region_height; ry++) {
for(unsigned int rx = 0; rx < region_width; rx++) {
datum* tgt = target.data_ptr(ix + rx, iy + ry, map, sample);
*tgt = sum * target_factor;
}
}
}
}
}
}
#ifdef BUILD_OPENCL
}
#endif
target.hint_ignore_content_ = false;
}
示例3: clSetKernelArg
void TensorMath::COL2IM(Tensor& source, const int source_width, const int source_height, const int maps, const int samples, const int kernel_width, const int kernel_height, const int stride_width, const int stride_height, const int pad_width, const int pad_height, const Tensor& target)
{
#ifdef BUILD_OPENCL
if(source.cl_gpu_ || target.cl_gpu_) {
((Tensor&)target).MoveToGPU();
source.MoveToGPU(true);
cl_uint error = 0;
const int target_width = (2 * pad_width + source_width - kernel_width) / stride_width + 1;
const int target_height = (2 * pad_height + source_height - kernel_height) / stride_height + 1;
const int target_maps = kernel_width * kernel_height * maps;
error |= clSetKernelArg (CLHelper::k_col2im, 0, sizeof (cl_mem), &(((Tensor&)source).cl_data_ptr_));
error |= clSetKernelArg (CLHelper::k_col2im, 1, sizeof (cl_mem), &(target.cl_data_ptr_));
error |= clSetKernelArg (CLHelper::k_col2im, 2, sizeof (cl_int), &source_width);
error |= clSetKernelArg (CLHelper::k_col2im, 3, sizeof (cl_int), &source_height);
error |= clSetKernelArg (CLHelper::k_col2im, 4, sizeof (cl_int), &maps);
error |= clSetKernelArg (CLHelper::k_col2im, 5, sizeof (cl_int), &samples);
error |= clSetKernelArg (CLHelper::k_col2im, 6, sizeof (cl_int), &target_width);
error |= clSetKernelArg (CLHelper::k_col2im, 7, sizeof (cl_int), &target_height);
error |= clSetKernelArg (CLHelper::k_col2im, 8, sizeof (cl_int), &target_maps);
error |= clSetKernelArg (CLHelper::k_col2im, 9, sizeof (cl_int), &kernel_width);
error |= clSetKernelArg (CLHelper::k_col2im, 10, sizeof (cl_int), &kernel_height);
error |= clSetKernelArg (CLHelper::k_col2im, 11, sizeof (cl_int), &stride_width);
error |= clSetKernelArg (CLHelper::k_col2im, 12, sizeof (cl_int), &stride_height);
error |= clSetKernelArg (CLHelper::k_col2im, 13, sizeof (cl_int), &pad_width);
error |= clSetKernelArg (CLHelper::k_col2im, 14, sizeof (cl_int), &pad_height);
if (error != CL_SUCCESS) {
FATAL("Error setting kernel args: " << (signed int) error);
}
size_t global_work_size[] = {(size_t)(source_width * source_height), (size_t)maps, (size_t)samples};
error = clEnqueueNDRangeKernel (CLHelper::queue, CLHelper::k_col2im, 3, NULL,
global_work_size, NULL, 0, NULL, NULL);
if (error != CL_SUCCESS) {
FATAL("Error enqueueing kernel: " << (signed int) error);
}
#ifdef BRUTAL_FINISH
error = clFinish (CLHelper::queue);
if (error != CL_SUCCESS) {
FATAL("Error finishing command queue: " << (signed int) error);
}
#endif
} else {
((Tensor&)target).MoveToCPU();
source.MoveToCPU(true);
#endif
SETSAMPLE(source, -1, 0.0);
const int target_width = (2 * pad_width + source_width - kernel_width) / stride_width + 1;
const int target_height = (2 * pad_height + source_height - kernel_height) / stride_height + 1;
const int target_maps = kernel_width * kernel_height * maps;
const int target_size = samples * target_width * target_height * target_maps;
const int actual_target_size = target.samples() * target.width()* target.height() * target.maps();
if(target_size != actual_target_size)
FATAL("Target size wrong!");
for(int sample = 0; sample < samples; sample++) {
datum* source_ptr = source.data_ptr(0, 0, 0, sample);
for(int target_map = 0; target_map < target_maps; target_map++) {
const datum* target_ptr = target.data_ptr_const(0, 0, 0, target_map);
int kx = target_map % kernel_width;
int ky = (target_map / kernel_width) % kernel_height;
int imap = target_map / (kernel_width * kernel_height);
for(int oy = 0; oy < target_height; oy++) {
int iy = oy * stride_height - pad_height + ky;
if(iy >= 0 && iy < source_height) {
for(int ox = 0; ox < target_width; ox++) {
int ix = ox * stride_width - pad_width + kx;
if(ix >= 0 && iy < source_width) {
source_ptr[(imap * source_height + iy) * source_width + ix] +=
target_ptr[(sample * target_height + oy) * target_width + ox];
}
}
}
}
}
}
#ifdef BUILD_OPENCL
}
#endif
source.hint_ignore_content_ = false;
}