本文整理汇总了C++中Coordinates::set_num_dimensions方法的典型用法代码示例。如果您正苦于以下问题:C++ Coordinates::set_num_dimensions方法的具体用法?C++ Coordinates::set_num_dimensions怎么用?C++ Coordinates::set_num_dimensions使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Coordinates
的用法示例。
在下文中一共展示了Coordinates::set_num_dimensions方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: configure
//.........这里部分代码省略.........
if(is_interleaved_transposed)
{
build_opts.add_option("-DCOLS_B=" + support::cpp11::to_string(input1->info()->dimension(0)));
if(data_type == DataType::F32)
{
kernel_name = "gemm_mm_interleaved_transposed_f32_" + string_from_target(arch_target);
}
else
{
kernel_name = "gemm_mm_interleaved_transposed_" + lower_string(string_from_data_type(data_type));
}
// Configure kernel window
const unsigned int num_elems_processed_per_iteration_x = max_cl_vector_width / data_size_from_type(data_type);
constexpr unsigned int num_elems_processed_per_iteration_y = 4;
Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
AccessWindowRectangle input0_access(input0->info(), 0, 0, num_elems_processed_per_iteration_y, 1, 1.f, 0.25f);
AccessWindowTranspose input1_access(input1->info(), 0, 0, num_elems_processed_per_iteration_x, 1, 0.f, 0.25f);
AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
update_window_and_padding(win, input0_access, input1_access, output_access);
output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape()));
ICLKernel::configure(win);
}
else // The input tensors have not been reshaped
{
build_opts.add_option("-DCOLS_A=" + support::cpp11::to_string(input0->info()->dimension(0)));
// Special case for 1xN, 2xN, 3xN and 4xN input0 tensor. num_elems_processed_per_iteration_x is set up for the default case.
unsigned int num_elems_processed_per_iteration_x = max_cl_vector_width / data_size_from_type(data_type);
const unsigned int num_elems_processed_per_iteration_y = std::min(static_cast<int>(output->info()->dimension(1)), 4);
// Create kernels according to the architecture, data type and input size.
if(arch_target == GPUTarget::BIFROST && data_type == DataType::F32)
{
// The first kernel is optimized for the case of 1000 or less output elements (e.g. FC8 of AlexNet and VGG-16, and
// FC1 of Inception v3). The second kernel is optimized for the case of greater than 1000 output elements (e.g.
// FC6 and FC7 of AlexNet and VGG-16).
if(input1->info()->dimension(0) <= 1000)
{
// Each work-item processes 2 elements in the X dimension.
num_elems_processed_per_iteration_x = 2;
kernel_name = "gemm_mm_floating_point_f32_bifrost_1000";
}
else
{
// Each work-item processes 4 elements in the X dimension (as in the default case).
num_elems_processed_per_iteration_x = 4;
kernel_name = "gemm_mm_floating_point_f32_bifrost";
}
// The work-group size equal to the Bifrost quad size has been proved to be optimal for these kernels
// via exhaustive autotuning over a range of representative layer configurations.
_lws_hint = cl::NDRange(4);
}
else if(is_data_type_fixed_point(data_type))
{
kernel_name = "gemm_mm_" + lower_string(string_from_data_type(data_type));
}
else // (MIDGARD and F32) or (F16)
{
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
kernel_name = "gemm_mm_floating_point";
}
build_opts.add_option("-DNUM_ELEMS_PROCESSED_PER_THREAD_Y=" + support::cpp11::to_string(num_elems_processed_per_iteration_y));
build_opts.add_option("-DNUM_ELEMS_PROCESSED_PER_THREAD_X=" + support::cpp11::to_string(num_elems_processed_per_iteration_x));
// Configure window
Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
AccessWindowStatic input0_access(input0->info(), 0, 0, input0->info()->dimension(0), ceil_to_multiple(input0->info()->dimension(1), num_elems_processed_per_iteration_y));
AccessWindowStatic input1_access(input1->info(), 0, 0, ceil_to_multiple(input1->info()->dimension(0), num_elems_processed_per_iteration_x), input1->info()->dimension(1));
AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
update_window_and_padding(win, input0_access, input1_access, output_access);
Coordinates coord;
coord.set_num_dimensions(output->info()->num_dimensions());
output_access.set_valid_region(win, ValidRegion(coord, output->info()->tensor_shape()));
ICLKernel::configure(win);
}
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
// Set config_id for enabling LWS tuning
_config_id = "gemm_";
_config_id += (is_interleaved_transposed ? "reshaped_" : "");
_config_id += lower_string(string_from_data_type(input0->info()->data_type()));
_config_id += "_";
_config_id += support::cpp11::to_string(output->info()->dimension(1));
_config_id += "_";
_config_id += support::cpp11::to_string(output->info()->dimension(0));
_config_id += "_";
_config_id += (is_interleaved_transposed ? support::cpp11::to_string(input1->info()->dimension(0)) : support::cpp11::to_string(input1->info()->dimension(1)));
}