当前位置: 首页>>代码示例>>C++>>正文


C++ Func::compute_at方法代码示例

本文整理汇总了C++中Func::compute_at方法的典型用法代码示例。如果您正苦于以下问题:C++ Func::compute_at方法的具体用法?C++ Func::compute_at怎么用?C++ Func::compute_at使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Func的用法示例。


在下文中一共展示了Func::compute_at方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: process

Func process(Func raw, Type result_type,
             ImageParam matrix_3200, ImageParam matrix_7000, Param<float> color_temp,
             Param<float> gamma, Param<float> contrast, Param<int> blackLevel, Param<int> whiteLevel) {

    Var yii, xi;

    Func denoised = hot_pixel_suppression(raw);
    Func deinterleaved = deinterleave(denoised);
    Func demosaiced = demosaic(deinterleaved);
    Func corrected = color_correct(demosaiced, matrix_3200, matrix_7000, color_temp);
    Func curved = apply_curve(corrected, result_type, gamma, contrast, blackLevel, whiteLevel);

    processed(x, y, c) = curved(x, y, c);

    // Schedule
    Expr out_width = processed.output_buffer().width();
    Expr out_height = processed.output_buffer().height();

    int strip_size = 32;
    int vec = target.natural_vector_size(UInt(16));
    if (target.has_feature(Target::HVX_64)) {
        vec = 32;
    } else if (target.has_feature(Target::HVX_128)) {
        vec = 64;
    }
    denoised.compute_at(processed, yi).store_at(processed, yo)
        .fold_storage(y, 8)
        .vectorize(x, vec);
    deinterleaved.compute_at(processed, yi).store_at(processed, yo)
        .fold_storage(y, 4)
        .vectorize(x, 2*vec, TailStrategy::RoundUp)
        .reorder(c, x, y)
        .unroll(c);
    corrected.compute_at(processed, x)
        .vectorize(x, vec)
        .reorder(c, x, y)
        .unroll(c);
    processed.compute_root()
        .split(y, yo, yi, strip_size)
        .split(yi, yi, yii, 2)
        .split(x, x, xi, 2*vec, TailStrategy::RoundUp)
        .reorder(xi, c, yii, x, yi, yo)
        .vectorize(xi, 2*vec)
        .parallel(yo);

    if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
        processed.hexagon();
        denoised.align_storage(x, vec);
        deinterleaved.align_storage(x, vec);
        corrected.align_storage(x, vec);
    }

    // We can generate slightly better code if we know the splits divide the extent.
    processed
        .bound(c, 0, 3)
        .bound(x, 0, ((out_width)/(2*vec))*(2*vec))
        .bound(y, 0, (out_height/strip_size)*strip_size);

    return processed;
}
开发者ID:alinas,项目名称:Halide,代码行数:60,代码来源:camera_pipe.cpp

示例2: main

int main(int argc, char **argv) {
    Func source;
    source.define_extern("make_data",
                         std::vector<ExternFuncArgument>(),
                         Float(32), 2);
    Func sink;
    Var x, y;
    sink(x, y) = source(x, y) - sin(x + y);

    Var xi, yi;
    sink.tile(x, y, xi, yi, 32, 32);

    // Compute the source per tile of sink
    source.compute_at(sink, x);

    Image<float> output = sink.realize(100, 100);

    // Should be all zeroes.
    RDom r(output);
    float error = evaluate_may_gpu<float>(sum(abs(output(r.x, r.y))));
    if (error != 0) {
        printf("Something went wrong\n");
        return -1;
    }

    Func multi;
    std::vector<Type> types;
    types.push_back(Float(32));
    types.push_back(Float(32));
    multi.define_extern("make_data_multi",
                        std::vector<ExternFuncArgument>(),
			types, 2);
    Func sink_multi;
    sink_multi(x, y) = multi(x, y)[0] - sin(x + y) +
                       multi(x, y)[1] - cos(x + y);

    sink_multi.tile(x, y, xi, yi, 32, 32);

    // Compute the source per tile of sink
    multi.compute_at(sink_multi, x);

    Image<float> output_multi = sink_multi.realize(100, 100);

    // Should be all zeroes.
    float error_multi = evaluate<float>(sum(abs(output_multi(r.x, r.y))));
    if (error_multi != 0) {
        printf("Something went wrong in multi case\n");
        return -1;
    }

    printf("Success!\n");
    return 0;

}
开发者ID:AheadIO,项目名称:Halide,代码行数:54,代码来源:extern_producer.cpp

示例3: blur_then_transpose

Func blur_then_transpose(Func f, Func coeff, Expr size, Expr sigma) {

    Func blurred = performBlur(f, coeff, size, sigma);

    // Also compute attenuation due to zero boundary condition by
    // blurring an image of ones in the same way. This gives a
    // boundary condition equivalent to reweighting the Gaussian
    // near the edge. (TODO: add a generator param to select
    // different boundary conditions).
    Func ones;
    ones(x, y) = 1.0f;
    Func attenuation = performBlur(ones, coeff, size, sigma);

    // Invert the attenuation so we can multiply by it. The
    // attenuation is the same for every row/channel so we only
    // need one column.
    Func inverse_attenuation;
    inverse_attenuation(y) = 1.0f / attenuation(0, y);

    // Transpose it
    Func transposed;
    transposed(x, y) = blurred(y, x);

    // Correct for attenuation
    Func out;
    out(x, y) = transposed(x, y) * inverse_attenuation(x);

    // Schedule it.
    Var yi, xi, yii, xii;

    attenuation.compute_root();
    inverse_attenuation.compute_root().vectorize(y, 8);
    out.compute_root()
        .tile(x, y, xi, yi, 8, 32)
        .tile(xi, yi, xii, yii, 8, 8)
        .vectorize(xii).unroll(yii).parallel(y);
    blurred.compute_at(out, y);
    transposed.compute_at(out, xi).vectorize(y).unroll(x);

    for (int i = 0; i < blurred.num_update_definitions(); i++) {
        RDom r = blurred.reduction_domain(i);
        if (r.defined()) {
            blurred.update(i).reorder(x, r);
        }
        blurred.update(i).vectorize(x, 8).unroll(x);
    }

    return out;
}
开发者ID:CarVac,项目名称:filmulator-gui,代码行数:49,代码来源:diffuse.cpp

示例4: main

int main(int argc, char **argv) {
    Func mandelbrot;
    Var x, y;

    Param<float> x_min, x_max, y_min, y_max, c_real, c_imag;
    Param<int> w, h, iters;
    Complex initial(lerp(x_min, x_max, cast<float>(x)/w),
                    lerp(y_min, y_max, cast<float>(y)/h));
    Complex c(c_real, c_imag);

    Var z;
    mandelbrot(x, y, z) = initial;
    RDom t(1, iters);
    Complex current = mandelbrot(x, y, t-1);
    mandelbrot(x, y, t) = current*current + c;

    // How many iterations until something escapes a circle of radius 2?
    Func count;
    Tuple escape = argmin(magnitude(mandelbrot(x, y, t)) < 4);

    // If it never escapes, use the value 0
    count(x, y) = select(escape[1], 0, escape[0]);

    Var xi, yi, xo, yo;
    count.tile(x, y, xo, yo, xi, yi, 8, 8);
    count.parallel(yo).vectorize(xi, 4).unroll(xi).unroll(yi, 2);
    mandelbrot.compute_at(count, xo);

    Argument args[] = {x_min, x_max, y_min, y_max, c_real, c_imag, iters, w, h};

    count.compile_to_file("mandelbrot", std::vector<Argument>(args, args + 9));

    return 0;
}
开发者ID:AheadIO,项目名称:Halide,代码行数:34,代码来源:mandelbrot_generate.cpp

示例5: main

int main(int argc, char **argv)
{
  Image<uint8_t> input = load<uint8_t>("P1070046.png");

  timeval t1, t2;
  gettimeofday(&t1, NULL);
  Var x,y,c;
  Func toFloat;
  toFloat(c,x,y) = cast<float>(input(x,y,c))/255.0;
  Func toHSV;
  toHSV = hsv(toFloat);
  Func saturated;
  saturated(c,x,y) = select(c != 1,
                            toHSV(c,x,y),
                            clamp(1*fast_pow(toHSV(c,x,y),0.5),
                                  0,1));
  Func toRGB,toInt;
  toRGB = rgb(saturated);
  toInt(x,y,c) = cast<uint8_t>(toRGB(c,x,y)*255.0);
  Var y_outer,y_inner;
  toInt.reorder(c,x,y);
  toInt.split(y,y_outer, y_inner, 256);
  toInt.parallel(y_outer);
  toHSV.compute_at(toInt,x);
  Halide::Image<uint8_t> output = toInt.realize(input.width(),input.height(),input.channels());
  gettimeofday(&t2, NULL);
  save(output,"vibSat.png");
  std::cout<<float(t2.tv_sec - t1.tv_sec) + float(t2.tv_usec - t1.tv_usec)/1000000.0f << std::endl;
  return 0;
}
开发者ID:CarVac,项目名称:filmulator-gui,代码行数:30,代码来源:vibranceSaturation.cpp

示例6: schedule_for_cpu

    // Now we define methods that give our pipeline several different
    // schedules.
    void schedule_for_cpu() {
        // Compute the look-up-table ahead of time.
        lut.compute_root();

        // Compute color channels innermost. Promise that there will
        // be three of them and unroll across them.
        curved.reorder(c, x, y)
              .bound(c, 0, 3)
              .unroll(c);

        // Look-up-tables don't vectorize well, so just parallelize
        // curved in slices of 16 scanlines.
        Var yo, yi;
        curved.split(y, yo, yi, 16)
              .parallel(yo);

        // Compute sharpen as needed per scanline of curved.
        sharpen.compute_at(curved, yi);

        // Vectorize the sharpen. It's 16-bit so we'll vectorize it 8-wide.
        sharpen.vectorize(x, 8);

        // Compute the padded input as needed per scanline of curved,
        // reusing previous values computed within the same strip of
        // 16 scanlines.
        padded.store_at(curved, yo)
              .compute_at(curved, yi);

        // Also vectorize the padding. It's 8-bit, so we'll vectorize
        // 16-wide.
        padded.vectorize(x, 16);

        // JIT-compile the pipeline for the CPU.
        curved.compile_jit();
    }
开发者ID:Mengke-Yuan,项目名称:Halide,代码行数:37,代码来源:lesson_12_using_the_gpu.cpp

示例7: main

int main(int argc, char **argv) {
    Func source;
    source.define_extern("make_data",
                         std::vector<ExternFuncArgument>(),
                         Float(32), 2);
    Func sink;
    Var x, y;
    sink(x, y) = source(x, y) - sin(x + y);

    Var xi, yi;
    sink.tile(x, y, xi, yi, 32, 32);

    // Compute the source per tile of sink
    source.compute_at(sink, x);

    Image<float> output = sink.realize(100, 100);

    // Should be all zeroes.
    RDom r(output);
    float error = evaluate<float>(sum(abs(output(r.x, r.y))));
    if (error != 0) {
        printf("Something went wrong\n");
        return -1;
    }

    printf("Success!\n");
    return 0;

}
开发者ID:202198,项目名称:Halide,代码行数:29,代码来源:extern_producer.cpp

示例8: Test

    explicit Test(int i) {
        // We use specific calls as proxies for verifying that compute_at
        // happens where we expect: sin() for the inner function, cos()
        // for the outer one; these are chosen mainly because they won't
        // ever get generated incidentally by the lowering code as part of
        // general code structure.
        inner = Func("inner" + std::to_string(i));
        inner(x, y, c) = sin(cast<float>(x + y + c));

        inner.compute_at(inner_compute_at).store_at(inner_store_at);

        outer = Func("outer" + std::to_string(i));
        outer(x, y, c) = cos(cast<float>(inner(x, y, c)));
    }
开发者ID:adityaatluri,项目名称:Halide,代码行数:14,代码来源:deferred_loop_level.cpp

示例9: global_wrap_test

int global_wrap_test() {
    Func source("source"), g("g"), h("h"), i("i");
    Var x("x"), y("y");

    source(x, y) = x + y;
    ImageParam img(Int(32), 2, "img");
    Buffer<int> buf = source.realize(200, 200);
    img.set(buf);

    g(x, y) = img(x, y);
    h(x, y) = g(x, y) + img(x, y);

    Var xi("xi"), yi("yi"), t("t");
    Func wrapper = img.in();
    Func img_f = img;
    img_f.compute_root();
    h.compute_root().tile(x, y, xi, yi, 16, 16).fuse(x, y, t).parallel(t);
    g.compute_at(h, yi);
    wrapper.compute_at(h, yi).tile(_0, _1, xi, yi, 8, 8).fuse(xi, yi, t).vectorize(t, 4);

    // Check the call graphs.
    // Expect 'g' to call 'wrapper', 'wrapper' to call 'img_f', 'img_f' to call 'img',
    // 'h' to call 'wrapper' and 'g'
    Module m = h.compile_to_module({h.infer_arguments()});
    CheckCalls c;
    m.functions().front().body.accept(&c);

    CallGraphs expected = {
        {h.name(), {g.name(), wrapper.name()}},
        {g.name(), {wrapper.name()}},
        {wrapper.name(), {img_f.name()}},
        {img_f.name(), {img.name()}},
    };
    if (check_call_graphs(c.calls, expected) != 0) {
        return -1;
    }

    Buffer<int> im = h.realize(200, 200);
    auto func = [](int x, int y) { return 2*(x + y); };
    if (check_image(im, func)) {
        return -1;
    }
    return 0;
}
开发者ID:bleibig,项目名称:Halide,代码行数:44,代码来源:image_wrap.cpp

示例10: blur_cols_transpose

// Defines a func to blur the columns of an input with a first order low
// pass IIR filter, followed by a transpose.
Func blur_cols_transpose(Func input, Expr height, Expr alpha) {
    Func blur;

    // Pure definition: do nothing.
    blur(x, y, c) = undef<float>();
    // Update 0: set the top row of the result to the input.
    blur(x, 0, c) = input(x, 0, c);
    // Update 1: run the IIR filter down the columns.
    RDom ry(1, height - 1);
    blur(x, ry, c) =
        (1 - alpha)*blur(x, ry - 1, c) + alpha*input(x, ry, c);
    // Update 2: run the IIR blur up the columns.
    Expr flip_ry = height - ry - 1;
    blur(x, flip_ry, c) =
        (1 - alpha)*blur(x, flip_ry + 1, c) + alpha*blur(x, flip_ry, c);

    // Transpose the blur.
    Func transpose;
    transpose(x, y, c) = blur(y, x, c);

    // Schedule:
    // Split the transpose into tiles of rows. Parallelize over channels
    // and strips (Halide supports nested parallelism).
    Var xo, yo;
    transpose.compute_root()
        .tile(x, y, xo, yo, x, y, 8, 8)
        .vectorize(x)
        .parallel(yo)
        .parallel(c);

    // Run the filter on each row of tiles (which corresponds to a strip of
    // columns in the input).
    blur.compute_at(transpose, yo);

    // Vectorize computations within the strips.
    blur.update(1)
        .reorder(x, ry)
        .vectorize(x);
    blur.update(2)
        .reorder(x, ry)
        .vectorize(x);

    return transpose;
}
开发者ID:JayHuangYC,项目名称:Halide,代码行数:46,代码来源:iir_blur.cpp

示例11: global_wrap_test

int global_wrap_test() {
    Func f("f"), g("g"), h("h"), i("i");
    Var x("x"), y("y");

    f(x, y) = x + y;
    g(x, y) = f(x, y);
    h(x, y) = g(x, y) + f(x, y);

    Var xi("xi"), yi("yi"), t("t");
    Func wrapper = f.in();
    f.compute_root();
    h.compute_root().tile(x, y, xi, yi, 16, 16).fuse(x, y, t).parallel(t);
    g.compute_at(h, yi);
    wrapper.compute_at(h, yi).tile(x, y, xi, yi, 8, 8).fuse(xi, yi, t).vectorize(t, 4);

    // Check the call graphs.
    // Expect 'g' to call 'wrapper', 'wrapper' to call 'f', 'f' to call nothing,
    // 'h' to call 'wrapper' and 'g'
    Module m = h.compile_to_module({});
    CheckCalls c;
    m.functions().front().body.accept(&c);

    CallGraphs expected = {
        {h.name(), {g.name(), wrapper.name()}},
        {g.name(), {wrapper.name()}},
        {wrapper.name(), {f.name()}},
        {f.name(), {}},
    };
    if (check_call_graphs(c.calls, expected) != 0) {
        return -1;
    }

    Image<int> im = h.realize(200, 200);
    auto func = [](int x, int y) {
        return 2*(x + y);
    };
    if (check_image(im, func)) {
        return -1;
    }
    return 0;
}
开发者ID:cyanjc321,项目名称:Halide,代码行数:41,代码来源:wrap.cpp

示例12: main

int main(int argc, char **argv) {
    Var x, y;

    Func mandelbrot;
    // Use a different scale on x and y because terminal characters
    // are not square. Arbitrarily chosen to fit the set nicely.
    Complex initial(x/20.0f, y/8.0f);
    Var z;
    mandelbrot(x, y, z) = Complex(0.0f, 0.0f);
    RDom t(1, 40);
    Complex current = mandelbrot(x, y, t-1);
    mandelbrot(x, y, t) = current*current + initial;

    // How many iterations until something escapes a circle of radius 2?
    Func count;
    Tuple escape = argmin(magnitude(mandelbrot(x, y, t)) < 4);
    // If it never escapes, use the value 0
    count(x, y) = select(escape[1], 0, escape[0]);

    RDom r(-45, 71, -10, 21);
    Func render;
    render() = 0;
    render() = draw_pixel(r.x, r.y, count(r.x, r.y));

    mandelbrot.compute_at(render, r.x);

    render.realize();

    printf("\n");

    // Check draw_pixel was called the right number of times.
    if (call_count != 71*21) {
        printf("Something went wrong\n");
        return -1;
    }

    printf("Success!\n");
    return 0;
}
开发者ID:drtpig,项目名称:Halide,代码行数:39,代码来源:side_effects.cpp

示例13: process

Func process(Func raw, Type result_type,
             ImageParam matrix_3200, ImageParam matrix_7000, Param<float> color_temp,
             Param<float> gamma, Param<float> contrast) {

    Var xi, yi;

    Func denoised = hot_pixel_suppression(raw);
    Func deinterleaved = deinterleave(denoised);
    Func demosaiced = demosaic(deinterleaved);
    Func corrected = color_correct(demosaiced, matrix_3200, matrix_7000, color_temp);
    Func curved = apply_curve(corrected, result_type, gamma, contrast);

    processed(tx, ty, c) = curved(tx, ty, c);

    // Schedule
    processed.bound(c, 0, 3); // bound color loop 0-3, properly
    if (schedule == 0) {
        // Compute in chunks over tiles, vectorized by 8
        denoised.compute_at(processed, tx).vectorize(x, 8);
        deinterleaved.compute_at(processed, tx).vectorize(x, 8).reorder(c, x, y).unroll(c);
        corrected.compute_at(processed, tx).vectorize(x, 4).reorder(c, x, y).unroll(c);
        processed.tile(tx, ty, xi, yi, 32, 32).reorder(xi, yi, c, tx, ty);
        processed.parallel(ty);
    } else if (schedule == 1) {
        // Same as above, but don't vectorize (sse is bad at interleaved 16-bit ops)
        denoised.compute_at(processed, tx);
        deinterleaved.compute_at(processed, tx);
        corrected.compute_at(processed, tx);
        processed.tile(tx, ty, xi, yi, 128, 128).reorder(xi, yi, c, tx, ty);
        processed.parallel(ty);
    } else {
        denoised.compute_root();
        deinterleaved.compute_root();
        corrected.compute_root();
        processed.compute_root();
    }

    return processed;
}
开发者ID:DoDNet,项目名称:Halide,代码行数:39,代码来源:camera_pipe.cpp

示例14: main

int main(int argc, char **argv) {
    if (!get_jit_target_from_environment().has_gpu_feature()) {
        printf("Not running test because no gpu target enabled\n");
        return 0;
    }

    {
        Func f;
        Var x, y, z;

        // Construct a Func with lots of potential race conditions, and
        // then run it in thread blocks on the gpu.

        f(x, y) = x + 100 * y;

        const int passes = 10;
        for (int i = 0; i < passes; i++) {
            RDom rx(0, 10);
            // Flip each row, using spots 10-19 as temporary storage
            f(rx + 10, y) = f(9 - rx, y);
            f(rx, y) = f(rx + 10, y);
            // Flip each column the same way
            RDom ry(0, 8);
            f(x, ry + 8) = f(x, 7 - ry);
            f(x, ry) = f(x, ry + 8);
        }

        Func g;
        g(x, y) = f(0, 0)+ f(9, 7);

        g.gpu_tile(x, y, 16, 8);
        f.compute_at(g, Var::gpu_blocks());

        for (int i = 0; i < passes; i++) {
            f.update(i*4 + 0).gpu_threads(y);
            f.update(i*4 + 1).gpu_threads(y);
            f.update(i*4 + 2).gpu_threads(x);
            f.update(i*4 + 3).gpu_threads(x);
        }

        Image<int> out = g.realize(100, 100);
        for (int y = 0; y < out.height(); y++) {
            for (int x = 0; x < out.width(); x++) {
                int correct = 7*100 + 9;
                if (out(x, y) != correct) {
                    printf("out(%d, %d) = %d instead of %d\n",
                           x, y, out(x, y), correct);
                    return -1;
                }
            }
        }

    }

    {
        // Construct a Func with undef stages, then run it in thread
        // blocks and make sure the right number of syncthreads are
        // added.

        Func f;
        Var x, y, z;
        f(x, y) = undef<int>();
        f(x, y) += x + 100 * y;
        // This next line is dubious, because it entirely masks the
        // effect of the previous definition. If you add an undefined
        // value to the previous def, then Halide can evaluate this to
        // whatever it likes. Currently we'll just elide this update
        // definition.
        f(x, y) += undef<int>();
        f(x, y) += y * 100 + x;

        Func g;
        g(x, y) = f(0, 0) + f(7, 7);

        g.gpu_tile(x, y, 8, 8);
        f.compute_at(g, Var::gpu_blocks());

        f.gpu_threads(x, y);
        f.update(0).gpu_threads(x, y);
        f.update(1).gpu_threads(x, y);
        f.update(2).gpu_threads(x, y);

        // There should be two thread barriers: one in between the
        // non-undef definitions, and one between f and g.
        g.add_custom_lowering_pass(new CheckBarrierCount(2));

        Image<int> out = g.realize(100, 100);
    }

    printf("Success!\n");
    return 0;
}
开发者ID:josephsieh,项目名称:Halide,代码行数:92,代码来源:gpu_thread_barrier.cpp

示例15: demosaic


//.........这里部分代码省略.........
    g_r(x, y)  = select(ghd_r < gvd_r, gh_r, gv_r);

    Expr gv_b  = avg(g_gr(x, y+1), g_gr(x, y));
    Expr gvd_b = absd(g_gr(x, y+1), g_gr(x, y));
    Expr gh_b  = avg(g_gb(x-1, y), g_gb(x, y));
    Expr ghd_b = absd(g_gb(x-1, y), g_gb(x, y));

    g_b(x, y)  = select(ghd_b < gvd_b, gh_b, gv_b);

    // Next interpolate red at gr by first interpolating, then
    // correcting using the error green would have had if we had
    // interpolated it in the same way (i.e. add the second derivative
    // of the green channel at the same place).
    Expr correction;
    correction = g_gr(x, y) - avg(g_r(x, y), g_r(x-1, y));
    r_gr(x, y) = correction + avg(r_r(x-1, y), r_r(x, y));

    // Do the same for other reds and blues at green sites
    correction = g_gr(x, y) - avg(g_b(x, y), g_b(x, y-1));
    b_gr(x, y) = correction + avg(b_b(x, y), b_b(x, y-1));

    correction = g_gb(x, y) - avg(g_r(x, y), g_r(x, y+1));
    r_gb(x, y) = correction + avg(r_r(x, y), r_r(x, y+1));

    correction = g_gb(x, y) - avg(g_b(x, y), g_b(x+1, y));
    b_gb(x, y) = correction + avg(b_b(x, y), b_b(x+1, y));

    // Now interpolate diagonally to get red at blue and blue at
    // red. Hold onto your hats; this gets really fancy. We do the
    // same thing as for interpolating green where we try both
    // directions (in this case the positive and negative diagonals),
    // and use the one with the lowest absolute difference. But we
    // also use the same trick as interpolating red and blue at green
    // sites - we correct our interpolations using the second
    // derivative of green at the same sites.

    correction = g_b(x, y)  - avg(g_r(x, y), g_r(x-1, y+1));
    Expr rp_b  = correction + avg(r_r(x, y), r_r(x-1, y+1));
    Expr rpd_b = absd(r_r(x, y), r_r(x-1, y+1));

    correction = g_b(x, y)  - avg(g_r(x-1, y), g_r(x, y+1));
    Expr rn_b  = correction + avg(r_r(x-1, y), r_r(x, y+1));
    Expr rnd_b = absd(r_r(x-1, y), r_r(x, y+1));

    r_b(x, y)  = select(rpd_b < rnd_b, rp_b, rn_b);


    // Same thing for blue at red
    correction = g_r(x, y)  - avg(g_b(x, y), g_b(x+1, y-1));
    Expr bp_r  = correction + avg(b_b(x, y), b_b(x+1, y-1));
    Expr bpd_r = absd(b_b(x, y), b_b(x+1, y-1));

    correction = g_r(x, y)  - avg(g_b(x+1, y), g_b(x, y-1));
    Expr bn_r  = correction + avg(b_b(x+1, y), b_b(x, y-1));
    Expr bnd_r = absd(b_b(x+1, y), b_b(x, y-1));

    b_r(x, y)  =  select(bpd_r < bnd_r, bp_r, bn_r);

    // Interleave the resulting channels
    Func r = interleave_y(interleave_x(r_gr, r_r),
                          interleave_x(r_b, r_gb));
    Func g = interleave_y(interleave_x(g_gr, g_r),
                          interleave_x(g_b, g_gb));
    Func b = interleave_y(interleave_x(b_gr, b_r),
                          interleave_x(b_b, b_gb));

    Func output;
    output(x, y, c) = select(c == 0, r(x, y),
                             c == 1, g(x, y),
                                     b(x, y));


    /* THE SCHEDULE */
    int vec = target.natural_vector_size(UInt(16));
    if (target.has_feature(Target::HVX_64)) {
        vec = 32;
    } else if (target.has_feature(Target::HVX_128)) {
        vec = 64;
    }
    g_r.compute_at(processed, yi)
        .store_at(processed, yo)
        .vectorize(x, vec, TailStrategy::RoundUp)
        .fold_storage(y, 2);
    g_b.compute_at(processed, yi)
        .store_at(processed, yo)
        .vectorize(x, vec, TailStrategy::RoundUp)
        .fold_storage(y, 2);
    output.compute_at(processed, x)
        .vectorize(x)
        .unroll(y)
        .reorder(c, x, y)
        .unroll(c);

    if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
        g_r.align_storage(x, vec);
        g_b.align_storage(x, vec);
    }

    return output;
}
开发者ID:alinas,项目名称:Halide,代码行数:101,代码来源:camera_pipe.cpp


注:本文中的Func::compute_at方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。