本文整理汇总了C++中Func::in方法的典型用法代码示例。如果您正苦于以下问题:C++ Func::in方法的具体用法?C++ Func::in怎么用?C++ Func::in使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Func
的用法示例。
在下文中一共展示了Func::in方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
int main(int argc, char **argv) {
Target t = get_jit_target_from_environment();
if (!t.features_any_of({Target::CUDACapability50,
Target::CUDACapability61})) {
printf("This test requires cuda enabled with cuda capability 5.0 or greater\n");
return 0;
}
{
// Shuffle test to do a small convolution
Func f, g;
Var x, y;
f(x, y) = x + y;
g(x, y) = f(x-1, y) + f(x+1, y);
Var xo, xi, yi, yo;
g.gpu_tile(x, y, xi, yi, 32, 2, TailStrategy::RoundUp).gpu_lanes(xi);
f.compute_root();
f.in(g).compute_at(g, yi).split(x, xo, xi, 32, TailStrategy::RoundUp).gpu_lanes(xi).unroll(xo);
Buffer<int> out = g.realize(32, 4);
for (int y = 0; y < out.height(); y++) {
for (int x = 0; x < out.width(); x++) {
int correct = 2*(x + y);
int actual = out(x, y);
if (correct != actual) {
printf("out(%d, %d) = %d instead of %d\n",
x, y, actual, correct);
return -1;
}
}
}
}
{
// Broadcast test - an outer product access pattern
Func a, b, c;
Var x, y;
a(x) = cast<float>(x);
b(y) = cast<float>(y);
c(x, y) = a(x) + 100 * b(y);
a.compute_root();
b.compute_root();
Var xi, yi, yii;
c.tile(x, y, xi, yi, 32, 32, TailStrategy::RoundUp)
.gpu_blocks(x, y)
.gpu_lanes(xi);
// We're going to be computing 'a' and 'b' at block level, but
// we want them in register, not shared, so we explicitly call
// store_in.
a.in(c).compute_at(c, x)
.gpu_lanes(x)
.store_in(MemoryType::Register);
b.in(c).compute_at(c, x)
.gpu_lanes(y)
.store_in(MemoryType::Register);
Buffer<float> out = c.realize(32, 32);
for (int y = 0; y < out.height(); y++) {
for (int x = 0; x < out.width(); x++) {
float correct = x + 100 * y;
float actual = out(x, y);
// The floats are small integers, so they should be exact.
if (correct != actual) {
printf("out(%d, %d) = %f instead of %f\n",
x, y, actual, correct);
return -1;
}
}
}
}
{
// Vectorized broadcast test. Each lane is responsible for a
// 2-vector from 'a' and a 2-vector from 'b' instead of a single
// value.
Func a, b, c;
Var x, y;
a(x) = cast<float>(x);
b(y) = cast<float>(y);
c(x, y) = a(x) + 100 * b(y);
a.compute_root();
b.compute_root();
Var xi, yi, yii;
c.tile(x, y, xi, yi, 64, 64, TailStrategy::RoundUp)
.gpu_blocks(x, y)
.split(yi, yi, yii, 64).unroll(yii, 2).gpu_threads(yi)
.vectorize(xi, 2).gpu_lanes(xi);
a.in(c).compute_at(c, yi).vectorize(x, 2).gpu_lanes(x);
b.in(c).compute_at(c, yi).vectorize(y, 2).gpu_lanes(y);
Buffer<float> out = c.realize(64, 64);
//.........这里部分代码省略.........