本文整理汇总了C++中Func::add_custom_lowering_pass方法的典型用法代码示例。如果您正苦于以下问题:C++ Func::add_custom_lowering_pass方法的具体用法?C++ Func::add_custom_lowering_pass怎么用?C++ Func::add_custom_lowering_pass使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Func
的用法示例。
在下文中一共展示了Func::add_custom_lowering_pass方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: perform_test
bool perform_test(const char *label, const Target target, Func f, int expected_nvarying, float tol, std::function<float(int x, int y, int c)> expected_val) {
fprintf(stderr, "%s\n", label);
Buffer<float> out(8, 8, 3);
varyings.clear();
f.add_custom_lowering_pass(new CountVarying);
f.realize(out, target);
// Check for the correct number of varying attributes
if ((int)varyings.size() != expected_nvarying) {
fprintf(stderr,
"%s: Error: wrong number of varying attributes: %d should be %d\n",
label, (int)varyings.size(), expected_nvarying);
return false;
}
// Check for correct result values
out.copy_to_host();
if (!Testing::check_result<float>(out, tol, expected_val)) {
return false;
}
fprintf(stderr, "%s Passed!\n", label);
return true;
}
示例2: main
int main(int argc, char **argv) {
ImageParam input(UInt(8), 1);
input.dim(0).set_bounds(0, size);
{
Func f;
Var x;
f(x) = input(x);
// Output must have the same size as the input.
f.output_buffer().dim(0).set_bounds(input.dim(0).min(), input.dim(0).extent());
f.add_custom_lowering_pass(new Validator);
f.compile_jit();
Buffer<uint8_t> dummy(size);
dummy.fill(42);
input.set(dummy);
Buffer<uint8_t> out = f.realize(size);
if (!out.all_equal(42)) {
std::cerr << "wrong output" << std::endl;
exit(-1);
}
}
{
Func f;
Var x;
f(x) = undef(UInt(8));
RDom r(input);
f(r.x) = cast<uint8_t>(42);
f.add_custom_lowering_pass(new Validator);
f.compile_jit();
Buffer<uint8_t> dummy(size);
input.set(dummy);
Buffer<uint8_t> out = f.realize(size);
if (!out.all_equal(42)) {
std::cerr << "wrong output" << std::endl;
exit(-1);
}
}
std::cout << "Success!" << std::endl;
return 0;
}
示例3: main
int main(int argc, char **argv) {
if (!get_jit_target_from_environment().has_gpu_feature()) {
printf("Not running test because no gpu target enabled\n");
return 0;
}
{
Func f;
Var x, y, z;
// Construct a Func with lots of potential race conditions, and
// then run it in thread blocks on the gpu.
f(x, y) = x + 100 * y;
const int passes = 10;
for (int i = 0; i < passes; i++) {
RDom rx(0, 10);
// Flip each row, using spots 10-19 as temporary storage
f(rx + 10, y) = f(9 - rx, y);
f(rx, y) = f(rx + 10, y);
// Flip each column the same way
RDom ry(0, 8);
f(x, ry + 8) = f(x, 7 - ry);
f(x, ry) = f(x, ry + 8);
}
Func g;
g(x, y) = f(0, 0)+ f(9, 7);
g.gpu_tile(x, y, 16, 8);
f.compute_at(g, Var::gpu_blocks());
for (int i = 0; i < passes; i++) {
f.update(i*4 + 0).gpu_threads(y);
f.update(i*4 + 1).gpu_threads(y);
f.update(i*4 + 2).gpu_threads(x);
f.update(i*4 + 3).gpu_threads(x);
}
Image<int> out = g.realize(100, 100);
for (int y = 0; y < out.height(); y++) {
for (int x = 0; x < out.width(); x++) {
int correct = 7*100 + 9;
if (out(x, y) != correct) {
printf("out(%d, %d) = %d instead of %d\n",
x, y, out(x, y), correct);
return -1;
}
}
}
}
{
// Construct a Func with undef stages, then run it in thread
// blocks and make sure the right number of syncthreads are
// added.
Func f;
Var x, y, z;
f(x, y) = undef<int>();
f(x, y) += x + 100 * y;
// This next line is dubious, because it entirely masks the
// effect of the previous definition. If you add an undefined
// value to the previous def, then Halide can evaluate this to
// whatever it likes. Currently we'll just elide this update
// definition.
f(x, y) += undef<int>();
f(x, y) += y * 100 + x;
Func g;
g(x, y) = f(0, 0) + f(7, 7);
g.gpu_tile(x, y, 8, 8);
f.compute_at(g, Var::gpu_blocks());
f.gpu_threads(x, y);
f.update(0).gpu_threads(x, y);
f.update(1).gpu_threads(x, y);
f.update(2).gpu_threads(x, y);
// There should be two thread barriers: one in between the
// non-undef definitions, and one between f and g.
g.add_custom_lowering_pass(new CheckBarrierCount(2));
Image<int> out = g.realize(100, 100);
}
printf("Success!\n");
return 0;
}