本文整理汇总了C++中Func::compute_root方法的典型用法代码示例。如果您正苦于以下问题:C++ Func::compute_root方法的具体用法?C++ Func::compute_root怎么用?C++ Func::compute_root使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Func
的用法示例。
在下文中一共展示了Func::compute_root方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: ImageConverter
Halide::Func ImageConverter(Halide::ImageParam image) {
// First get min and max of the image
RDom r(0, image.width(), 0, image.height());
// Now rescale the image to the range 0..255 and project the value to a RGBA integer value
Func imgmin;
imgmin() = minimum(image(r.x, r.y));
Func imgmax;
imgmax() = maximum(image(r.x, r.y));
Expr scale = 1.0f / (imgmax() - imgmin());
Func rescaled;
Var x, y;
Expr val = cast<uint32_t>(255.0f * (image(x, y) - imgmin()) * scale + 0.5f);
Expr scaled = val * cast<uint32_t>(0x010101);
rescaled(x, y) = scaled;
imgmin.compute_root();
imgmax.compute_root();
Var xo, yo, xi, yi;
//rescaled.tile(x, y, xo, yo, xi, yi, 32, 8);
//rescaled.vectorize(xi);
//rescaled.unroll(yi);
return rescaled;
}
示例2: blur_then_transpose
Func blur_then_transpose(Func f, Func coeff, Expr size, Expr sigma) {
Func blurred = performBlur(f, coeff, size, sigma);
// Also compute attenuation due to zero boundary condition by
// blurring an image of ones in the same way. This gives a
// boundary condition equivalent to reweighting the Gaussian
// near the edge. (TODO: add a generator param to select
// different boundary conditions).
Func ones;
ones(x, y) = 1.0f;
Func attenuation = performBlur(ones, coeff, size, sigma);
// Invert the attenuation so we can multiply by it. The
// attenuation is the same for every row/channel so we only
// need one column.
Func inverse_attenuation;
inverse_attenuation(y) = 1.0f / attenuation(0, y);
// Transpose it
Func transposed;
transposed(x, y) = blurred(y, x);
// Correct for attenuation
Func out;
out(x, y) = transposed(x, y) * inverse_attenuation(x);
// Schedule it.
Var yi, xi, yii, xii;
attenuation.compute_root();
inverse_attenuation.compute_root().vectorize(y, 8);
out.compute_root()
.tile(x, y, xi, yi, 8, 32)
.tile(xi, yi, xii, yii, 8, 8)
.vectorize(xii).unroll(yii).parallel(y);
blurred.compute_at(out, y);
transposed.compute_at(out, xi).vectorize(y).unroll(x);
for (int i = 0; i < blurred.num_update_definitions(); i++) {
RDom r = blurred.reduction_domain(i);
if (r.defined()) {
blurred.update(i).reorder(x, r);
}
blurred.update(i).vectorize(x, 8).unroll(x);
}
return out;
}
示例3: color_correct
Func color_correct(Func input, ImageParam matrix_3200, ImageParam matrix_7000, Param<float> kelvin) {
// Get a color matrix by linearly interpolating between two
// calibrated matrices using inverse kelvin.
Func matrix;
Expr alpha = (1.0f/kelvin - 1.0f/3200) / (1.0f/7000 - 1.0f/3200);
Expr val = (matrix_3200(x, y) * alpha + matrix_7000(x, y) * (1 - alpha));
matrix(x, y) = cast<int16_t>(val * 256.0f); // Q8.8 fixed point
matrix.compute_root();
Func corrected;
Expr ir = cast<int32_t>(input(x, y, 0));
Expr ig = cast<int32_t>(input(x, y, 1));
Expr ib = cast<int32_t>(input(x, y, 2));
Expr r = matrix(3, 0) + matrix(0, 0) * ir + matrix(1, 0) * ig + matrix(2, 0) * ib;
Expr g = matrix(3, 1) + matrix(0, 1) * ir + matrix(1, 1) * ig + matrix(2, 1) * ib;
Expr b = matrix(3, 2) + matrix(0, 2) * ir + matrix(1, 2) * ig + matrix(2, 2) * ib;
r = cast<int16_t>(r/256);
g = cast<int16_t>(g/256);
b = cast<int16_t>(b/256);
corrected(x, y, c) = select(c == 0, r,
c == 1, g,
b);
return corrected;
}
示例4: schedule_for_cpu
// Now we define methods that give our pipeline several different
// schedules.
void schedule_for_cpu() {
// Compute the look-up-table ahead of time.
lut.compute_root();
// Compute color channels innermost. Promise that there will
// be three of them and unroll across them.
curved.reorder(c, x, y)
.bound(c, 0, 3)
.unroll(c);
// Look-up-tables don't vectorize well, so just parallelize
// curved in slices of 16 scanlines.
Var yo, yi;
curved.split(y, yo, yi, 16)
.parallel(yo);
// Compute sharpen as needed per scanline of curved.
sharpen.compute_at(curved, yi);
// Vectorize the sharpen. It's 16-bit so we'll vectorize it 8-wide.
sharpen.vectorize(x, 8);
// Compute the padded input as needed per scanline of curved,
// reusing previous values computed within the same strip of
// 16 scanlines.
padded.store_at(curved, yo)
.compute_at(curved, yi);
// Also vectorize the padding. It's 8-bit, so we'll vectorize
// 16-wide.
padded.vectorize(x, 16);
// JIT-compile the pipeline for the CPU.
curved.compile_jit();
}
示例5: build
Func build(bool use_shared) {
Func host;
Var x, y;
host(x, y) = x + y;
host.compute_root();
// We'll either inline this (and hopefully use the GPU's L1 cache)
// or stage it into shared.
Func staged;
staged(x, y) = host(x, y);
// Now we just need to access the Func staged a bunch.
const int stages = 10;
Func f[stages];
for (int i = 0; i < stages; i++) {
Expr prev = (i == 0) ? Expr(0) : Expr(f[i-1](x, y));
Expr stencil = 0;
for (int dy = -1; dy <= 1; dy++) {
for (int dx = -1; dx <= 1; dx++) {
stencil += staged(select(prev > 0, x, x+dx),
select(prev > 0, y, y+dy));
}
}
if (i == 0) {
f[i](x, y) = stencil;
} else {
f[i](x, y) = f[i-1](x, y) + stencil;
}
}
Func final = f[stages-1];
final.compute_root().gpu_tile(x, y, 8, 8);
示例6: ColorMgetfilter
Func ColorMgetfilter(Func stBasis, float angle, uint8_t iXo, uint8_t iYo, uint8_t iTo, uint8_t iCo ) {
// Compute a rotated basis at (iXo,iYo,iTo,iCo) order with angle value
// temporary setting
uint8_t numSTB = 63;
uint8_t numSB = 21;
angle = -1*angle - M_PI/2;
float * weights;
Func work; // work: rotated basis at a particular spatio-temporal order
work(x,y,t) = cast<float>(0.0f);
weights = (float *) calloc(iXo+iYo+1,sizeof(float));
// compute weights for possible orders
for (int i = 0; i <= iXo; i++)
for (int j = 0; j <= iYo; j++)
weights[iXo+iYo-i-j] += float(combination(iXo,i))*float(combination(iYo,j))*pow((-1.0f),float(i))*pow(cos(angle),float(iXo-i+j))*pow(sin(angle),float(iYo+i-j));
// get filtered expression at paricular order and angle value
// Func basis("basis");
for (int k=0; k<=(iXo+iYo); k++) {
int index = Mgetfilterindex(iXo+iYo-k,k,iTo,numSTB,numSB);
// basis = spatial_temporal_derivative(T,iXo+iYo-k,k,iTo,iCo);
if ((index > 0) && (weights[iXo+iYo-k] != 0))
work(x,y,t) += weights[iXo+iYo-k]*stBasis(x,y,iCo,t)[index];
}
work.compute_root();
free(weights);
return work;
}
示例7: process
Func process(Func raw, Type result_type,
ImageParam matrix_3200, ImageParam matrix_7000, Param<float> color_temp,
Param<float> gamma, Param<float> contrast, Param<int> blackLevel, Param<int> whiteLevel) {
Var yii, xi;
Func denoised = hot_pixel_suppression(raw);
Func deinterleaved = deinterleave(denoised);
Func demosaiced = demosaic(deinterleaved);
Func corrected = color_correct(demosaiced, matrix_3200, matrix_7000, color_temp);
Func curved = apply_curve(corrected, result_type, gamma, contrast, blackLevel, whiteLevel);
processed(x, y, c) = curved(x, y, c);
// Schedule
Expr out_width = processed.output_buffer().width();
Expr out_height = processed.output_buffer().height();
int strip_size = 32;
int vec = target.natural_vector_size(UInt(16));
if (target.has_feature(Target::HVX_64)) {
vec = 32;
} else if (target.has_feature(Target::HVX_128)) {
vec = 64;
}
denoised.compute_at(processed, yi).store_at(processed, yo)
.fold_storage(y, 8)
.vectorize(x, vec);
deinterleaved.compute_at(processed, yi).store_at(processed, yo)
.fold_storage(y, 4)
.vectorize(x, 2*vec, TailStrategy::RoundUp)
.reorder(c, x, y)
.unroll(c);
corrected.compute_at(processed, x)
.vectorize(x, vec)
.reorder(c, x, y)
.unroll(c);
processed.compute_root()
.split(y, yo, yi, strip_size)
.split(yi, yi, yii, 2)
.split(x, x, xi, 2*vec, TailStrategy::RoundUp)
.reorder(xi, c, yii, x, yi, yo)
.vectorize(xi, 2*vec)
.parallel(yo);
if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
processed.hexagon();
denoised.align_storage(x, vec);
deinterleaved.align_storage(x, vec);
corrected.align_storage(x, vec);
}
// We can generate slightly better code if we know the splits divide the extent.
processed
.bound(c, 0, 3)
.bound(x, 0, ((out_width)/(2*vec))*(2*vec))
.bound(y, 0, (out_height/strip_size)*strip_size);
return processed;
}
示例8: simple_rfactor_with_specialize_test
int simple_rfactor_with_specialize_test(bool compile_module) {
Func f("f"), g("g");
Var x("x"), y("y");
f(x, y) = x + y;
f.compute_root();
g(x, y) = 40;
RDom r(10, 20, 30, 40);
g(r.x, r.y) = min(f(r.x, r.y) + 2, g(r.x, r.y));
Param<int> p;
Var u("u");
Func intm = g.update(0).specialize(p >= 10).rfactor(r.y, u);
intm.compute_root();
intm.vectorize(u, 8);
intm.update(0).vectorize(r.x, 2);
if (compile_module) {
p.set(20);
// Check the call graphs.
Module m = g.compile_to_module({g.infer_arguments()});
CheckCalls checker;
m.functions().front().body.accept(&checker);
CallGraphs expected = {
{g.name(), {}},
{g.update(0).name(), {f.name(), intm.name(), g.name()}},
{intm.name(), {}},
{intm.update(0).name(), {f.name(), intm.name()}},
{f.name(), {}},
};
if (check_call_graphs(checker.calls, expected) != 0) {
return -1;
}
} else {
{
p.set(0);
Image<int> im = g.realize(80, 80);
auto func = [](int x, int y, int z) {
return (10 <= x && x <= 29) && (30 <= y && y <= 69) ? std::min(x + y + 2, 40) : 40;
};
if (check_image(im, func)) {
return -1;
}
}
{
p.set(20);
Image<int> im = g.realize(80, 80);
auto func = [](int x, int y, int z) {
return (10 <= x && x <= 29) && (30 <= y && y <= 69) ? std::min(x + y + 2, 40) : 40;
};
if (check_image(im, func)) {
return -1;
}
}
}
return 0;
}
示例9: count_host_alignment_asserts
int count_host_alignment_asserts(Func f, std::map<string, int> m) {
Target t = get_jit_target_from_environment();
t.set_feature(Target::NoBoundsQuery);
f.compute_root();
Stmt s = Internal::lower({f.function()}, f.name(), t);
CountHostAlignmentAsserts c(m);
s.accept(&c);
return c.count;
}
示例10: build
Func build() {
Func in;
in(x) = x;
in.compute_root();
Func up = upsample(upsample(in));
return up;
}
示例11: count_interleaves
int count_interleaves(Func f) {
Target t = get_jit_target_from_environment();
t.set_feature(Target::NoBoundsQuery);
t.set_feature(Target::NoAsserts);
f.compute_root();
Stmt s = Internal::lower({f.function()}, f.name(), t);
CountInterleaves i;
s.accept(&i);
return i.result;
}
示例12: main
int main(int argc, char **argv) {
// Define a pipeline that dumps some squares to a file using an
// external consumer stage.
Func source;
Var x;
source(x) = x*x;
Param<int> min, extent;
Param<const char *> filename;
Func sink;
std::vector<ExternFuncArgument> args;
args.push_back(source);
args.push_back(filename);
args.push_back(min);
args.push_back(extent);
sink.define_extern("dump_to_file", args, Int(32), 0);
source.compute_root();
sink.compile_jit();
// Dump the first 10 squares to a file
filename.set("halide_test_extern_consumer.txt");
min.set(0);
extent.set(10);
sink.realize();
if (!check_result())
return -1;
// Test ImageParam ExternFuncArgument via passed in image.
Image<int32_t> buf = source.realize(10);
ImageParam passed_in(Int(32), 1);
passed_in.set(buf);
Func sink2;
std::vector<ExternFuncArgument> args2;
args2.push_back(passed_in);
args2.push_back(filename);
args2.push_back(min);
args2.push_back(extent);
sink2.define_extern("dump_to_file", args2, Int(32), 0);
sink2.realize();
if (!check_result())
return -1;
printf("Success!\n");
return 0;
}
示例13: process
Func process(Func raw, Type result_type,
ImageParam matrix_3200, ImageParam matrix_7000, Param<float> color_temp,
Param<float> gamma, Param<float> contrast) {
Var xi, yi;
Func denoised = hot_pixel_suppression(raw);
Func deinterleaved = deinterleave(denoised);
Func demosaiced = demosaic(deinterleaved);
Func corrected = color_correct(demosaiced, matrix_3200, matrix_7000, color_temp);
Func curved = apply_curve(corrected, result_type, gamma, contrast);
processed(tx, ty, c) = curved(tx, ty, c);
// Schedule
processed.bound(c, 0, 3); // bound color loop 0-3, properly
if (schedule == 0) {
// Compute in chunks over tiles, vectorized by 8
denoised.compute_at(processed, tx).vectorize(x, 8);
deinterleaved.compute_at(processed, tx).vectorize(x, 8).reorder(c, x, y).unroll(c);
corrected.compute_at(processed, tx).vectorize(x, 4).reorder(c, x, y).unroll(c);
processed.tile(tx, ty, xi, yi, 32, 32).reorder(xi, yi, c, tx, ty);
processed.parallel(ty);
} else if (schedule == 1) {
// Same as above, but don't vectorize (sse is bad at interleaved 16-bit ops)
denoised.compute_at(processed, tx);
deinterleaved.compute_at(processed, tx);
corrected.compute_at(processed, tx);
processed.tile(tx, ty, xi, yi, 128, 128).reorder(xi, yi, c, tx, ty);
processed.parallel(ty);
} else {
denoised.compute_root();
deinterleaved.compute_root();
corrected.compute_root();
processed.compute_root();
}
return processed;
}
示例14: main
int main(int argc, char **argv) {
// Generate random input image.
const int W = 128, H = 48;
Buffer<uint8_t> in(W, H);
for (int y = 0; y < H; y++) {
for (int x = 0; x < W; x++) {
in(x, y) = rand() & 0xff;
}
}
Var x("x"), y("y");
// Apply the boundary condition up-front.
Func input = BoundaryConditions::repeat_edge(in);
input.compute_root();
// Define the dilate algorithm.
Func max_x("max_x");
Func dilate3x3("dilate3x3");
max_x(x, y) = max3(input(x-1, y), input(x, y), input(x+1, y));
dilate3x3(x, y) = max3(max_x(x, y-1), max_x(x, y), max_x(x, y+1));
// Schedule.
Target target = get_jit_target_from_environment();
if (target.has_gpu_feature()) {
dilate3x3.gpu_tile(x, y, 16, 16);
} else if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
dilate3x3.hexagon().vectorize(x, 64);
} else {
dilate3x3.vectorize(x, target.natural_vector_size<uint8_t>());
}
// Run the pipeline and verify the results are correct.
Buffer<uint8_t> out = dilate3x3.realize(W, H, target);
for (int y = 1; y < H-1; y++) {
for (int x = 1; x < W-1; x++) {
uint16_t correct = max3(max3(in(x-1, y-1), in(x, y-1), in(x+1, y-1)),
max3(in(x-1, y ), in(x, y ), in(x+1, y )),
max3(in(x-1, y+1), in(x, y+1), in(x+1, y+1)));
if (out(x, y) != correct) {
std::cout << "out(" << x << ", " << y << ") = " << out(x, y) << " instead of " << correct << "\n";
return -1;
}
}
}
std::cout << "Success!\n";
return 0;
}
示例15: rdom_with_predicate_rfactor_test
int rdom_with_predicate_rfactor_test(bool compile_module) {
Func f("f"), g("g");
Var x("x"), y("y"), z("z");
f(x, y, z) = x + y + z;
f.compute_root();
g(x, y, z) = 1;
RDom r(5, 10, 5, 10, 0, 20);
r.where(r.x < r.y);
r.where(r.x + 2*r.y <= r.z);
g(r.x, r.y, r.z) += f(r.x, r.y, r.z);
Var u("u"), v("v");
Func intm = g.update(0).rfactor({{r.y, u}, {r.x, v}});
intm.compute_root();
Var ui("ui"), vi("vi"), t("t");
intm.tile(u, v, ui, vi, 2, 2).fuse(u, v, t).parallel(t);
intm.update(0).vectorize(r.z, 2);
if (compile_module) {
// Check the call graphs.
Module m = g.compile_to_module({g.infer_arguments()});
CheckCalls checker;
m.functions().front().body.accept(&checker);
CallGraphs expected = {
{g.name(), {}},
{g.update(0).name(), {intm.name(), g.name()}},
{intm.name(), {}},
{intm.update(0).name(), {f.name(), intm.name()}},
{f.name(), {}},
};
if (check_call_graphs(checker.calls, expected) != 0) {
return -1;
}
} else {
Image<int> im = g.realize(20, 20, 20);
auto func = [](int x, int y, int z) {
return (5 <= x && x <= 14) && (5 <= y && y <= 14) &&
(0 <= z && z <= 19) && (x < y) && (x + 2*y <= z) ? x + y + z + 1 : 1;
};
if (check_image(im, func)) {
return -1;
}
}
return 0;
}