本文整理汇总了C++中Func::compute_at方法的典型用法代码示例。如果您正苦于以下问题:C++ Func::compute_at方法的具体用法?C++ Func::compute_at怎么用?C++ Func::compute_at使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Func
的用法示例。
在下文中一共展示了Func::compute_at方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: process
Func process(Func raw, Type result_type,
ImageParam matrix_3200, ImageParam matrix_7000, Param<float> color_temp,
Param<float> gamma, Param<float> contrast, Param<int> blackLevel, Param<int> whiteLevel) {
Var yii, xi;
Func denoised = hot_pixel_suppression(raw);
Func deinterleaved = deinterleave(denoised);
Func demosaiced = demosaic(deinterleaved);
Func corrected = color_correct(demosaiced, matrix_3200, matrix_7000, color_temp);
Func curved = apply_curve(corrected, result_type, gamma, contrast, blackLevel, whiteLevel);
processed(x, y, c) = curved(x, y, c);
// Schedule
Expr out_width = processed.output_buffer().width();
Expr out_height = processed.output_buffer().height();
int strip_size = 32;
int vec = target.natural_vector_size(UInt(16));
if (target.has_feature(Target::HVX_64)) {
vec = 32;
} else if (target.has_feature(Target::HVX_128)) {
vec = 64;
}
denoised.compute_at(processed, yi).store_at(processed, yo)
.fold_storage(y, 8)
.vectorize(x, vec);
deinterleaved.compute_at(processed, yi).store_at(processed, yo)
.fold_storage(y, 4)
.vectorize(x, 2*vec, TailStrategy::RoundUp)
.reorder(c, x, y)
.unroll(c);
corrected.compute_at(processed, x)
.vectorize(x, vec)
.reorder(c, x, y)
.unroll(c);
processed.compute_root()
.split(y, yo, yi, strip_size)
.split(yi, yi, yii, 2)
.split(x, x, xi, 2*vec, TailStrategy::RoundUp)
.reorder(xi, c, yii, x, yi, yo)
.vectorize(xi, 2*vec)
.parallel(yo);
if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
processed.hexagon();
denoised.align_storage(x, vec);
deinterleaved.align_storage(x, vec);
corrected.align_storage(x, vec);
}
// We can generate slightly better code if we know the splits divide the extent.
processed
.bound(c, 0, 3)
.bound(x, 0, ((out_width)/(2*vec))*(2*vec))
.bound(y, 0, (out_height/strip_size)*strip_size);
return processed;
}
示例2: main
int main(int argc, char **argv) {
Func source;
source.define_extern("make_data",
std::vector<ExternFuncArgument>(),
Float(32), 2);
Func sink;
Var x, y;
sink(x, y) = source(x, y) - sin(x + y);
Var xi, yi;
sink.tile(x, y, xi, yi, 32, 32);
// Compute the source per tile of sink
source.compute_at(sink, x);
Image<float> output = sink.realize(100, 100);
// Should be all zeroes.
RDom r(output);
float error = evaluate_may_gpu<float>(sum(abs(output(r.x, r.y))));
if (error != 0) {
printf("Something went wrong\n");
return -1;
}
Func multi;
std::vector<Type> types;
types.push_back(Float(32));
types.push_back(Float(32));
multi.define_extern("make_data_multi",
std::vector<ExternFuncArgument>(),
types, 2);
Func sink_multi;
sink_multi(x, y) = multi(x, y)[0] - sin(x + y) +
multi(x, y)[1] - cos(x + y);
sink_multi.tile(x, y, xi, yi, 32, 32);
// Compute the source per tile of sink
multi.compute_at(sink_multi, x);
Image<float> output_multi = sink_multi.realize(100, 100);
// Should be all zeroes.
float error_multi = evaluate<float>(sum(abs(output_multi(r.x, r.y))));
if (error_multi != 0) {
printf("Something went wrong in multi case\n");
return -1;
}
printf("Success!\n");
return 0;
}
示例3: blur_then_transpose
Func blur_then_transpose(Func f, Func coeff, Expr size, Expr sigma) {
Func blurred = performBlur(f, coeff, size, sigma);
// Also compute attenuation due to zero boundary condition by
// blurring an image of ones in the same way. This gives a
// boundary condition equivalent to reweighting the Gaussian
// near the edge. (TODO: add a generator param to select
// different boundary conditions).
Func ones;
ones(x, y) = 1.0f;
Func attenuation = performBlur(ones, coeff, size, sigma);
// Invert the attenuation so we can multiply by it. The
// attenuation is the same for every row/channel so we only
// need one column.
Func inverse_attenuation;
inverse_attenuation(y) = 1.0f / attenuation(0, y);
// Transpose it
Func transposed;
transposed(x, y) = blurred(y, x);
// Correct for attenuation
Func out;
out(x, y) = transposed(x, y) * inverse_attenuation(x);
// Schedule it.
Var yi, xi, yii, xii;
attenuation.compute_root();
inverse_attenuation.compute_root().vectorize(y, 8);
out.compute_root()
.tile(x, y, xi, yi, 8, 32)
.tile(xi, yi, xii, yii, 8, 8)
.vectorize(xii).unroll(yii).parallel(y);
blurred.compute_at(out, y);
transposed.compute_at(out, xi).vectorize(y).unroll(x);
for (int i = 0; i < blurred.num_update_definitions(); i++) {
RDom r = blurred.reduction_domain(i);
if (r.defined()) {
blurred.update(i).reorder(x, r);
}
blurred.update(i).vectorize(x, 8).unroll(x);
}
return out;
}
示例4: main
int main(int argc, char **argv) {
Func mandelbrot;
Var x, y;
Param<float> x_min, x_max, y_min, y_max, c_real, c_imag;
Param<int> w, h, iters;
Complex initial(lerp(x_min, x_max, cast<float>(x)/w),
lerp(y_min, y_max, cast<float>(y)/h));
Complex c(c_real, c_imag);
Var z;
mandelbrot(x, y, z) = initial;
RDom t(1, iters);
Complex current = mandelbrot(x, y, t-1);
mandelbrot(x, y, t) = current*current + c;
// How many iterations until something escapes a circle of radius 2?
Func count;
Tuple escape = argmin(magnitude(mandelbrot(x, y, t)) < 4);
// If it never escapes, use the value 0
count(x, y) = select(escape[1], 0, escape[0]);
Var xi, yi, xo, yo;
count.tile(x, y, xo, yo, xi, yi, 8, 8);
count.parallel(yo).vectorize(xi, 4).unroll(xi).unroll(yi, 2);
mandelbrot.compute_at(count, xo);
Argument args[] = {x_min, x_max, y_min, y_max, c_real, c_imag, iters, w, h};
count.compile_to_file("mandelbrot", std::vector<Argument>(args, args + 9));
return 0;
}
示例5: main
int main(int argc, char **argv)
{
Image<uint8_t> input = load<uint8_t>("P1070046.png");
timeval t1, t2;
gettimeofday(&t1, NULL);
Var x,y,c;
Func toFloat;
toFloat(c,x,y) = cast<float>(input(x,y,c))/255.0;
Func toHSV;
toHSV = hsv(toFloat);
Func saturated;
saturated(c,x,y) = select(c != 1,
toHSV(c,x,y),
clamp(1*fast_pow(toHSV(c,x,y),0.5),
0,1));
Func toRGB,toInt;
toRGB = rgb(saturated);
toInt(x,y,c) = cast<uint8_t>(toRGB(c,x,y)*255.0);
Var y_outer,y_inner;
toInt.reorder(c,x,y);
toInt.split(y,y_outer, y_inner, 256);
toInt.parallel(y_outer);
toHSV.compute_at(toInt,x);
Halide::Image<uint8_t> output = toInt.realize(input.width(),input.height(),input.channels());
gettimeofday(&t2, NULL);
save(output,"vibSat.png");
std::cout<<float(t2.tv_sec - t1.tv_sec) + float(t2.tv_usec - t1.tv_usec)/1000000.0f << std::endl;
return 0;
}
示例6: schedule_for_cpu
// Now we define methods that give our pipeline several different
// schedules.
void schedule_for_cpu() {
// Compute the look-up-table ahead of time.
lut.compute_root();
// Compute color channels innermost. Promise that there will
// be three of them and unroll across them.
curved.reorder(c, x, y)
.bound(c, 0, 3)
.unroll(c);
// Look-up-tables don't vectorize well, so just parallelize
// curved in slices of 16 scanlines.
Var yo, yi;
curved.split(y, yo, yi, 16)
.parallel(yo);
// Compute sharpen as needed per scanline of curved.
sharpen.compute_at(curved, yi);
// Vectorize the sharpen. It's 16-bit so we'll vectorize it 8-wide.
sharpen.vectorize(x, 8);
// Compute the padded input as needed per scanline of curved,
// reusing previous values computed within the same strip of
// 16 scanlines.
padded.store_at(curved, yo)
.compute_at(curved, yi);
// Also vectorize the padding. It's 8-bit, so we'll vectorize
// 16-wide.
padded.vectorize(x, 16);
// JIT-compile the pipeline for the CPU.
curved.compile_jit();
}
示例7: main
int main(int argc, char **argv) {
Func source;
source.define_extern("make_data",
std::vector<ExternFuncArgument>(),
Float(32), 2);
Func sink;
Var x, y;
sink(x, y) = source(x, y) - sin(x + y);
Var xi, yi;
sink.tile(x, y, xi, yi, 32, 32);
// Compute the source per tile of sink
source.compute_at(sink, x);
Image<float> output = sink.realize(100, 100);
// Should be all zeroes.
RDom r(output);
float error = evaluate<float>(sum(abs(output(r.x, r.y))));
if (error != 0) {
printf("Something went wrong\n");
return -1;
}
printf("Success!\n");
return 0;
}
示例8: Test
explicit Test(int i) {
// We use specific calls as proxies for verifying that compute_at
// happens where we expect: sin() for the inner function, cos()
// for the outer one; these are chosen mainly because they won't
// ever get generated incidentally by the lowering code as part of
// general code structure.
inner = Func("inner" + std::to_string(i));
inner(x, y, c) = sin(cast<float>(x + y + c));
inner.compute_at(inner_compute_at).store_at(inner_store_at);
outer = Func("outer" + std::to_string(i));
outer(x, y, c) = cos(cast<float>(inner(x, y, c)));
}
示例9: global_wrap_test
int global_wrap_test() {
Func source("source"), g("g"), h("h"), i("i");
Var x("x"), y("y");
source(x, y) = x + y;
ImageParam img(Int(32), 2, "img");
Buffer<int> buf = source.realize(200, 200);
img.set(buf);
g(x, y) = img(x, y);
h(x, y) = g(x, y) + img(x, y);
Var xi("xi"), yi("yi"), t("t");
Func wrapper = img.in();
Func img_f = img;
img_f.compute_root();
h.compute_root().tile(x, y, xi, yi, 16, 16).fuse(x, y, t).parallel(t);
g.compute_at(h, yi);
wrapper.compute_at(h, yi).tile(_0, _1, xi, yi, 8, 8).fuse(xi, yi, t).vectorize(t, 4);
// Check the call graphs.
// Expect 'g' to call 'wrapper', 'wrapper' to call 'img_f', 'img_f' to call 'img',
// 'h' to call 'wrapper' and 'g'
Module m = h.compile_to_module({h.infer_arguments()});
CheckCalls c;
m.functions().front().body.accept(&c);
CallGraphs expected = {
{h.name(), {g.name(), wrapper.name()}},
{g.name(), {wrapper.name()}},
{wrapper.name(), {img_f.name()}},
{img_f.name(), {img.name()}},
};
if (check_call_graphs(c.calls, expected) != 0) {
return -1;
}
Buffer<int> im = h.realize(200, 200);
auto func = [](int x, int y) { return 2*(x + y); };
if (check_image(im, func)) {
return -1;
}
return 0;
}
示例10: blur_cols_transpose
// Defines a func to blur the columns of an input with a first order low
// pass IIR filter, followed by a transpose.
Func blur_cols_transpose(Func input, Expr height, Expr alpha) {
Func blur;
// Pure definition: do nothing.
blur(x, y, c) = undef<float>();
// Update 0: set the top row of the result to the input.
blur(x, 0, c) = input(x, 0, c);
// Update 1: run the IIR filter down the columns.
RDom ry(1, height - 1);
blur(x, ry, c) =
(1 - alpha)*blur(x, ry - 1, c) + alpha*input(x, ry, c);
// Update 2: run the IIR blur up the columns.
Expr flip_ry = height - ry - 1;
blur(x, flip_ry, c) =
(1 - alpha)*blur(x, flip_ry + 1, c) + alpha*blur(x, flip_ry, c);
// Transpose the blur.
Func transpose;
transpose(x, y, c) = blur(y, x, c);
// Schedule:
// Split the transpose into tiles of rows. Parallelize over channels
// and strips (Halide supports nested parallelism).
Var xo, yo;
transpose.compute_root()
.tile(x, y, xo, yo, x, y, 8, 8)
.vectorize(x)
.parallel(yo)
.parallel(c);
// Run the filter on each row of tiles (which corresponds to a strip of
// columns in the input).
blur.compute_at(transpose, yo);
// Vectorize computations within the strips.
blur.update(1)
.reorder(x, ry)
.vectorize(x);
blur.update(2)
.reorder(x, ry)
.vectorize(x);
return transpose;
}
示例11: global_wrap_test
int global_wrap_test() {
Func f("f"), g("g"), h("h"), i("i");
Var x("x"), y("y");
f(x, y) = x + y;
g(x, y) = f(x, y);
h(x, y) = g(x, y) + f(x, y);
Var xi("xi"), yi("yi"), t("t");
Func wrapper = f.in();
f.compute_root();
h.compute_root().tile(x, y, xi, yi, 16, 16).fuse(x, y, t).parallel(t);
g.compute_at(h, yi);
wrapper.compute_at(h, yi).tile(x, y, xi, yi, 8, 8).fuse(xi, yi, t).vectorize(t, 4);
// Check the call graphs.
// Expect 'g' to call 'wrapper', 'wrapper' to call 'f', 'f' to call nothing,
// 'h' to call 'wrapper' and 'g'
Module m = h.compile_to_module({});
CheckCalls c;
m.functions().front().body.accept(&c);
CallGraphs expected = {
{h.name(), {g.name(), wrapper.name()}},
{g.name(), {wrapper.name()}},
{wrapper.name(), {f.name()}},
{f.name(), {}},
};
if (check_call_graphs(c.calls, expected) != 0) {
return -1;
}
Image<int> im = h.realize(200, 200);
auto func = [](int x, int y) {
return 2*(x + y);
};
if (check_image(im, func)) {
return -1;
}
return 0;
}
示例12: main
int main(int argc, char **argv) {
Var x, y;
Func mandelbrot;
// Use a different scale on x and y because terminal characters
// are not square. Arbitrarily chosen to fit the set nicely.
Complex initial(x/20.0f, y/8.0f);
Var z;
mandelbrot(x, y, z) = Complex(0.0f, 0.0f);
RDom t(1, 40);
Complex current = mandelbrot(x, y, t-1);
mandelbrot(x, y, t) = current*current + initial;
// How many iterations until something escapes a circle of radius 2?
Func count;
Tuple escape = argmin(magnitude(mandelbrot(x, y, t)) < 4);
// If it never escapes, use the value 0
count(x, y) = select(escape[1], 0, escape[0]);
RDom r(-45, 71, -10, 21);
Func render;
render() = 0;
render() = draw_pixel(r.x, r.y, count(r.x, r.y));
mandelbrot.compute_at(render, r.x);
render.realize();
printf("\n");
// Check draw_pixel was called the right number of times.
if (call_count != 71*21) {
printf("Something went wrong\n");
return -1;
}
printf("Success!\n");
return 0;
}
示例13: process
Func process(Func raw, Type result_type,
ImageParam matrix_3200, ImageParam matrix_7000, Param<float> color_temp,
Param<float> gamma, Param<float> contrast) {
Var xi, yi;
Func denoised = hot_pixel_suppression(raw);
Func deinterleaved = deinterleave(denoised);
Func demosaiced = demosaic(deinterleaved);
Func corrected = color_correct(demosaiced, matrix_3200, matrix_7000, color_temp);
Func curved = apply_curve(corrected, result_type, gamma, contrast);
processed(tx, ty, c) = curved(tx, ty, c);
// Schedule
processed.bound(c, 0, 3); // bound color loop 0-3, properly
if (schedule == 0) {
// Compute in chunks over tiles, vectorized by 8
denoised.compute_at(processed, tx).vectorize(x, 8);
deinterleaved.compute_at(processed, tx).vectorize(x, 8).reorder(c, x, y).unroll(c);
corrected.compute_at(processed, tx).vectorize(x, 4).reorder(c, x, y).unroll(c);
processed.tile(tx, ty, xi, yi, 32, 32).reorder(xi, yi, c, tx, ty);
processed.parallel(ty);
} else if (schedule == 1) {
// Same as above, but don't vectorize (sse is bad at interleaved 16-bit ops)
denoised.compute_at(processed, tx);
deinterleaved.compute_at(processed, tx);
corrected.compute_at(processed, tx);
processed.tile(tx, ty, xi, yi, 128, 128).reorder(xi, yi, c, tx, ty);
processed.parallel(ty);
} else {
denoised.compute_root();
deinterleaved.compute_root();
corrected.compute_root();
processed.compute_root();
}
return processed;
}
示例14: main
int main(int argc, char **argv) {
if (!get_jit_target_from_environment().has_gpu_feature()) {
printf("Not running test because no gpu target enabled\n");
return 0;
}
{
Func f;
Var x, y, z;
// Construct a Func with lots of potential race conditions, and
// then run it in thread blocks on the gpu.
f(x, y) = x + 100 * y;
const int passes = 10;
for (int i = 0; i < passes; i++) {
RDom rx(0, 10);
// Flip each row, using spots 10-19 as temporary storage
f(rx + 10, y) = f(9 - rx, y);
f(rx, y) = f(rx + 10, y);
// Flip each column the same way
RDom ry(0, 8);
f(x, ry + 8) = f(x, 7 - ry);
f(x, ry) = f(x, ry + 8);
}
Func g;
g(x, y) = f(0, 0)+ f(9, 7);
g.gpu_tile(x, y, 16, 8);
f.compute_at(g, Var::gpu_blocks());
for (int i = 0; i < passes; i++) {
f.update(i*4 + 0).gpu_threads(y);
f.update(i*4 + 1).gpu_threads(y);
f.update(i*4 + 2).gpu_threads(x);
f.update(i*4 + 3).gpu_threads(x);
}
Image<int> out = g.realize(100, 100);
for (int y = 0; y < out.height(); y++) {
for (int x = 0; x < out.width(); x++) {
int correct = 7*100 + 9;
if (out(x, y) != correct) {
printf("out(%d, %d) = %d instead of %d\n",
x, y, out(x, y), correct);
return -1;
}
}
}
}
{
// Construct a Func with undef stages, then run it in thread
// blocks and make sure the right number of syncthreads are
// added.
Func f;
Var x, y, z;
f(x, y) = undef<int>();
f(x, y) += x + 100 * y;
// This next line is dubious, because it entirely masks the
// effect of the previous definition. If you add an undefined
// value to the previous def, then Halide can evaluate this to
// whatever it likes. Currently we'll just elide this update
// definition.
f(x, y) += undef<int>();
f(x, y) += y * 100 + x;
Func g;
g(x, y) = f(0, 0) + f(7, 7);
g.gpu_tile(x, y, 8, 8);
f.compute_at(g, Var::gpu_blocks());
f.gpu_threads(x, y);
f.update(0).gpu_threads(x, y);
f.update(1).gpu_threads(x, y);
f.update(2).gpu_threads(x, y);
// There should be two thread barriers: one in between the
// non-undef definitions, and one between f and g.
g.add_custom_lowering_pass(new CheckBarrierCount(2));
Image<int> out = g.realize(100, 100);
}
printf("Success!\n");
return 0;
}
示例15: demosaic
//.........这里部分代码省略.........
g_r(x, y) = select(ghd_r < gvd_r, gh_r, gv_r);
Expr gv_b = avg(g_gr(x, y+1), g_gr(x, y));
Expr gvd_b = absd(g_gr(x, y+1), g_gr(x, y));
Expr gh_b = avg(g_gb(x-1, y), g_gb(x, y));
Expr ghd_b = absd(g_gb(x-1, y), g_gb(x, y));
g_b(x, y) = select(ghd_b < gvd_b, gh_b, gv_b);
// Next interpolate red at gr by first interpolating, then
// correcting using the error green would have had if we had
// interpolated it in the same way (i.e. add the second derivative
// of the green channel at the same place).
Expr correction;
correction = g_gr(x, y) - avg(g_r(x, y), g_r(x-1, y));
r_gr(x, y) = correction + avg(r_r(x-1, y), r_r(x, y));
// Do the same for other reds and blues at green sites
correction = g_gr(x, y) - avg(g_b(x, y), g_b(x, y-1));
b_gr(x, y) = correction + avg(b_b(x, y), b_b(x, y-1));
correction = g_gb(x, y) - avg(g_r(x, y), g_r(x, y+1));
r_gb(x, y) = correction + avg(r_r(x, y), r_r(x, y+1));
correction = g_gb(x, y) - avg(g_b(x, y), g_b(x+1, y));
b_gb(x, y) = correction + avg(b_b(x, y), b_b(x+1, y));
// Now interpolate diagonally to get red at blue and blue at
// red. Hold onto your hats; this gets really fancy. We do the
// same thing as for interpolating green where we try both
// directions (in this case the positive and negative diagonals),
// and use the one with the lowest absolute difference. But we
// also use the same trick as interpolating red and blue at green
// sites - we correct our interpolations using the second
// derivative of green at the same sites.
correction = g_b(x, y) - avg(g_r(x, y), g_r(x-1, y+1));
Expr rp_b = correction + avg(r_r(x, y), r_r(x-1, y+1));
Expr rpd_b = absd(r_r(x, y), r_r(x-1, y+1));
correction = g_b(x, y) - avg(g_r(x-1, y), g_r(x, y+1));
Expr rn_b = correction + avg(r_r(x-1, y), r_r(x, y+1));
Expr rnd_b = absd(r_r(x-1, y), r_r(x, y+1));
r_b(x, y) = select(rpd_b < rnd_b, rp_b, rn_b);
// Same thing for blue at red
correction = g_r(x, y) - avg(g_b(x, y), g_b(x+1, y-1));
Expr bp_r = correction + avg(b_b(x, y), b_b(x+1, y-1));
Expr bpd_r = absd(b_b(x, y), b_b(x+1, y-1));
correction = g_r(x, y) - avg(g_b(x+1, y), g_b(x, y-1));
Expr bn_r = correction + avg(b_b(x+1, y), b_b(x, y-1));
Expr bnd_r = absd(b_b(x+1, y), b_b(x, y-1));
b_r(x, y) = select(bpd_r < bnd_r, bp_r, bn_r);
// Interleave the resulting channels
Func r = interleave_y(interleave_x(r_gr, r_r),
interleave_x(r_b, r_gb));
Func g = interleave_y(interleave_x(g_gr, g_r),
interleave_x(g_b, g_gb));
Func b = interleave_y(interleave_x(b_gr, b_r),
interleave_x(b_b, b_gb));
Func output;
output(x, y, c) = select(c == 0, r(x, y),
c == 1, g(x, y),
b(x, y));
/* THE SCHEDULE */
int vec = target.natural_vector_size(UInt(16));
if (target.has_feature(Target::HVX_64)) {
vec = 32;
} else if (target.has_feature(Target::HVX_128)) {
vec = 64;
}
g_r.compute_at(processed, yi)
.store_at(processed, yo)
.vectorize(x, vec, TailStrategy::RoundUp)
.fold_storage(y, 2);
g_b.compute_at(processed, yi)
.store_at(processed, yo)
.vectorize(x, vec, TailStrategy::RoundUp)
.fold_storage(y, 2);
output.compute_at(processed, x)
.vectorize(x)
.unroll(y)
.reorder(c, x, y)
.unroll(c);
if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
g_r.align_storage(x, vec);
g_b.align_storage(x, vec);
}
return output;
}