本文整理汇总了C++中Task::create_workblock方法的典型用法代码示例。如果您正苦于以下问题:C++ Task::create_workblock方法的具体用法?C++ Task::create_workblock怎么用?C++ Task::create_workblock使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Task
的用法示例。
在下文中一共展示了Task::create_workblock方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: fft
// Split-complex FFT
void
fft(T const* in_re, T const* in_im, T* out_re, T* out_im,
length_type length, T scale, int exponent)
{
assert(is_dma_addr_ok(in_re));
assert(is_dma_addr_ok(in_im));
assert(is_dma_addr_ok(out_re));
assert(is_dma_addr_ok(out_im));
static char* code_ea = 0;
static int code_size;
Overlay_params params;
Fft_split_params* fftp = ¶ms.zfft;
if (code_ea == 0) load_plugin(code_ea, code_size, "plugin", "zhalfast_f");
fftp->code_ea = (uintptr_t)code_ea;
fftp->code_size = code_size;
fftp->cmd = overlay_zfft_f;
fftp->direction = (exponent == -1 ? fwd_fft : inv_fft);
fftp->size = length;
fftp->scale = scale;
fftp->ea_input_re = ea_from_ptr(in_re);
fftp->ea_input_im = ea_from_ptr(in_im);
fftp->ea_output_re = ea_from_ptr(out_re);
fftp->ea_output_im = ea_from_ptr(out_im);
fftp->in_blk_stride = 0; // not applicable in the single FFT case
fftp->out_blk_stride = 0;
fftp->chunks_per_wb = 1;
fftp->chunks_per_spe = 1;
Task_manager *mgr = Task_manager::instance();
Task task = mgr->reserve_iobuf<Plugin_tag, void>
(VSIP_IMPL_OVERLAY_STACK_SIZE,
sizeof(Overlay_params),
VSIP_IMPL_OVERLAY_BUFFER_SIZE, VSIP_IMPL_OVERLAY_DTL_SIZE);
assert(2*sizeof(complex<T>)*length <= VSIP_IMPL_OVERLAY_BUFFER_SIZE);
Workblock block = task.create_workblock(1);
block.set_parameters(params);
block.enqueue();
task.sync();
}
示例2: fftm
//.........这里部分代码省略.........
assert(is_dma_addr_ok(in_im + (axis != 0 ? in_r_stride : in_c_stride)));
assert(is_dma_addr_ok(out_im + (axis != 0 ? out_r_stride : out_c_stride)));
static char* code_ea = 0;
static int code_size;
Overlay_params params;
Fft_split_params* fftp = ¶ms.zfft;
if (code_ea == 0) load_plugin(code_ea, code_size, "plugin", "zhalfast_f");
fftp->code_ea = (uintptr_t)code_ea;
fftp->code_size = code_size;
fftp->cmd = overlay_zfft_f;
fftp->direction = (exponent == -1 ? fwd_fft : inv_fft);
fftp->scale = scale;
length_type num_ffts;
length_type in_stride;
length_type out_stride;
if (axis != 0)
{
num_ffts = rows;
in_stride = in_r_stride;
out_stride = out_r_stride;
fftp->size = cols;
}
else
{
num_ffts = cols;
in_stride = in_c_stride;
out_stride = out_c_stride;
fftp->size = rows;
}
fftp->ea_input_re = ea_from_ptr(in_re);
fftp->ea_input_im = ea_from_ptr(in_im);
fftp->ea_output_re = ea_from_ptr(out_re);
fftp->ea_output_im = ea_from_ptr(out_im);
fftp->in_blk_stride = in_stride;
fftp->out_blk_stride = out_stride;
Task_manager *mgr = Task_manager::instance();
length_type spes = mgr->num_spes();
length_type chunks_per_wb;
// A chunk is the amount of data to perform 1 FFT.
//
// If the chunk size is less than 16 KB, send multiple chunks per
// workblock to amortize transfer costs.
if (fftp->size * sizeof(float) < 16384)
chunks_per_wb = std::min<length_type>(
16384 / (fftp->size * sizeof(float)),
VSIP_IMPL_OVERLAY_DTL_SIZE / 4);
else
chunks_per_wb = 1;
length_type num_wb = num_ffts / chunks_per_wb;
length_type wb_per_spe = num_wb / spes;
length_type extra_wb = (num_ffts % chunks_per_wb) ? 1 : 0;
Task task = mgr->reserve_iobuf<Plugin_tag, void>
(VSIP_IMPL_OVERLAY_STACK_SIZE,
sizeof(Overlay_params),
VSIP_IMPL_OVERLAY_BUFFER_SIZE,
VSIP_IMPL_OVERLAY_DTL_SIZE);
assert(2*sizeof(complex<T>)*chunks_per_wb*fftp->size <=
VSIP_IMPL_OVERLAY_BUFFER_SIZE);
assert(4*chunks_per_wb <= VSIP_IMPL_OVERLAY_DTL_SIZE);
for (length_type i = 0; i < spes && i < num_wb + extra_wb; ++i)
{
// If wbs don't divide evenly, give the first SPEs one extra.
length_type spe_wb = (i < num_wb % spes) ? wb_per_spe + 1
: wb_per_spe;
length_type spe_ffts = spe_wb * chunks_per_wb;
if (extra_wb && (i == spes-1 || i >= num_wb))
{
spe_wb += 1;
spe_ffts += num_ffts % chunks_per_wb;
}
fftp->chunks_per_wb = chunks_per_wb;
fftp->chunks_per_spe = spe_ffts;
Workblock block = task.create_workblock(spe_wb);
block.set_parameters(params);
block.enqueue();
fftp->ea_input_re += sizeof(T) * spe_ffts * in_stride;
fftp->ea_input_im += sizeof(T) * spe_ffts * in_stride;
fftp->ea_output_re += sizeof(T) * spe_ffts * out_stride;
fftp->ea_output_im += sizeof(T) * spe_ffts * out_stride;
}
task.sync();
}