当前位置: 首页>>代码示例>>C++>>正文


C++ Func::hexagon方法代码示例

本文整理汇总了C++中Func::hexagon方法的典型用法代码示例。如果您正苦于以下问题:C++ Func::hexagon方法的具体用法?C++ Func::hexagon怎么用?C++ Func::hexagon使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Func的用法示例。


在下文中一共展示了Func::hexagon方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: process

Func process(Func raw, Type result_type,
             ImageParam matrix_3200, ImageParam matrix_7000, Param<float> color_temp,
             Param<float> gamma, Param<float> contrast, Param<int> blackLevel, Param<int> whiteLevel) {

    Var yii, xi;

    Func denoised = hot_pixel_suppression(raw);
    Func deinterleaved = deinterleave(denoised);
    Func demosaiced = demosaic(deinterleaved);
    Func corrected = color_correct(demosaiced, matrix_3200, matrix_7000, color_temp);
    Func curved = apply_curve(corrected, result_type, gamma, contrast, blackLevel, whiteLevel);

    processed(x, y, c) = curved(x, y, c);

    // Schedule
    Expr out_width = processed.output_buffer().width();
    Expr out_height = processed.output_buffer().height();

    int strip_size = 32;
    int vec = target.natural_vector_size(UInt(16));
    if (target.has_feature(Target::HVX_64)) {
        vec = 32;
    } else if (target.has_feature(Target::HVX_128)) {
        vec = 64;
    }
    denoised.compute_at(processed, yi).store_at(processed, yo)
        .fold_storage(y, 8)
        .vectorize(x, vec);
    deinterleaved.compute_at(processed, yi).store_at(processed, yo)
        .fold_storage(y, 4)
        .vectorize(x, 2*vec, TailStrategy::RoundUp)
        .reorder(c, x, y)
        .unroll(c);
    corrected.compute_at(processed, x)
        .vectorize(x, vec)
        .reorder(c, x, y)
        .unroll(c);
    processed.compute_root()
        .split(y, yo, yi, strip_size)
        .split(yi, yi, yii, 2)
        .split(x, x, xi, 2*vec, TailStrategy::RoundUp)
        .reorder(xi, c, yii, x, yi, yo)
        .vectorize(xi, 2*vec)
        .parallel(yo);

    if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
        processed.hexagon();
        denoised.align_storage(x, vec);
        deinterleaved.align_storage(x, vec);
        corrected.align_storage(x, vec);
    }

    // We can generate slightly better code if we know the splits divide the extent.
    processed
        .bound(c, 0, 3)
        .bound(x, 0, ((out_width)/(2*vec))*(2*vec))
        .bound(y, 0, (out_height/strip_size)*strip_size);

    return processed;
}
开发者ID:alinas,项目名称:Halide,代码行数:60,代码来源:camera_pipe.cpp

示例2: schedule

void schedule(Func f, const Target &t) {
    // TODO: Add GPU schedule where supported.
    if (t.features_any_of({Target::HVX_64, Target::HVX_128})) {
        f.hexagon().vectorize(x, 32);
    } else {
        f.vectorize(x, 16);
    }
}
开发者ID:csachs,项目名称:Halide,代码行数:8,代码来源:bit_counting.cpp

示例3: main


//.........这里部分代码省略.........
        f.set_custom_print(halide_print);
        Buffer<float> imf = f.realize(N);

        assert(messages.size() == (size_t)N);

        char correct[1024];
        for (int i = 0; i < N; i++) {
            snprintf(correct, sizeof(correct), "%f\n", imf(i));
            // Some versions of the std library can emit some NaN patterns
            // as "-nan", due to sloppy conversion (or not) of the sign bit.
            // Halide considers all NaN's equivalent, so paper over this
            // noise in the test by normalizing all -nan -> nan.
            if (messages[i] == "-nan\n") messages[i] = "nan\n";
            if (!strcmp(correct, "-nan\n")) strcpy(correct, "nan\n");
            if (messages[i] != correct) {
                printf("float %d: %s vs %s for %10.20e\n", i, messages[i].c_str(), correct, imf(i));
                return -1;
            }
        }

        messages.clear();

        g(x) = print(reinterpret(Float(64), (cast<uint64_t>(random_uint()) << 32) | random_uint()));
        g.set_custom_print(halide_print);
        Buffer<double> img = g.realize(N);

        assert(messages.size() == (size_t)N);

        for (int i = 0; i < N; i++) {
            snprintf(correct, sizeof(correct), "%e\n", img(i));
            // Some versions of the std library can emit some NaN patterns
            // as "-nan", due to sloppy conversion (or not) of the sign bit.
            // Halide considers all NaN's equivalent, so paper over this
            // noise in the test by normalizing all -nan -> nan.
            if (messages[i] == "-nan\n") messages[i] = "nan\n";
            if (!strcmp(correct, "-nan\n")) strcpy(correct, "nan\n");
            if (messages[i] != correct) {
                printf("double %d: %s vs %s for %10.20e\n", i, messages[i].c_str(), correct, img(i));
                return -1;
            }
        }


    }
    #endif

    messages.clear();

    {
        Func f;

        // Test a vectorized print.
        f(x) = print(x * 3);
        f.set_custom_print(halide_print);
        f.vectorize(x, 32);
        if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
            f.hexagon();
        }
        Buffer<int> result = f.realize(128);

        if (!target.features_any_of({Target::HVX_64, Target::HVX_128})) {
            assert((int)messages.size() == result.width());
            for (size_t i = 0; i < messages.size(); i++) {
                assert(messages[i] == std::to_string(i * 3) + "\n");
            }
        } else {
            // The Hexagon simulator prints directly to stderr, so we
            // can't read the messages.
        }
    }

    messages.clear();

    {
        Func f;

        // Test a vectorized print_when.
        f(x) = print_when(x % 2 == 0, x * 3);
        f.set_custom_print(halide_print);
        f.vectorize(x, 32);
        if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
            f.hexagon();
        }
        Buffer<int> result = f.realize(128);

        if (!target.features_any_of({Target::HVX_64, Target::HVX_128})) {
            assert((int)messages.size() == result.width() / 2);
            for (size_t i = 0; i < messages.size(); i++) {
                assert(messages[i] == std::to_string(i * 2 * 3) + "\n");
            }
        } else {
            // The Hexagon simulator prints directly to stderr, so we
            // can't read the messages.
        }
    }


    printf("Success!\n");
    return 0;
}
开发者ID:white-pony,项目名称:Halide,代码行数:101,代码来源:print.cpp

示例4: main

int main(int argc, char **argv) {
    Target target = get_jit_target_from_environment();

    if (1) {
        // Test a tuple reduction on the gpu
        Func f;
        Var x, y;

        f(x, y) = Tuple(x + y, x - y);

        // Updates to a reduction are atomic.
        f(x, y) = Tuple(f(x, y)[1]*2, f(x, y)[0]*2);
        // now equals ((x - y)*2, (x + y)*2)

        if (target.has_gpu_feature()) {
            f.gpu_tile(x, y, 16, 16);
            f.update().gpu_tile(x, y, 16, 16);
        } else if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
            f.hexagon(y).vectorize(x, 32);
            f.update().hexagon(y).vectorize(x, 32);
        }

        Realization result = f.realize(1024, 1024);

        Image<int> a = result[0], b = result[1];

        for (int y = 0; y < a.height(); y++) {
            for (int x = 0; x < a.width(); x++) {
                int correct_a = (x - y)*2;
                int correct_b = (x + y)*2;
                if (a(x, y) != correct_a || b(x, y) != correct_b) {
                    printf("result(%d, %d) = (%d, %d) instead of (%d, %d)\n",
                           x, y, a(x, y), b(x, y), correct_a, correct_b);
                    return -1;
                }
            }
        }
    }

    if (1) {
        // Now test one that alternates between cpu and gpu per update step
        Func f;
        Var x, y;

        f(x, y) = Tuple(x + y, x - y);

        for (size_t i = 0; i < 10; i++) {
            // Swap the tuple elements and increment both
            f(x, y) = Tuple(f(x, y)[1] + 1, f(x, y)[0] + 1);
        }

        // Schedule the pure step and the odd update steps on the gpu
        if (target.has_gpu_feature()) {
            f.gpu_tile(x, y, 16, 16);
        } else if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
            f.hexagon(y).vectorize(x, 32);
        }
        for (int i = 0; i < 10; i ++) {
	    if (i & 1) {
                if (target.has_gpu_feature()) {
                    f.update(i).gpu_tile(x, y, 16, 16);
                } else if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
                    f.update(i).hexagon(y).vectorize(x, 32);
                }
	    } else {
		f.update(i);
	    }
        }

        Realization result = f.realize(1024, 1024);

        Image<int> a = result[0], b = result[1];

        for (int y = 0; y < a.height(); y++) {
            for (int x = 0; x < a.width(); x++) {
                int correct_a = (x + y) + 10;
                int correct_b = (x - y) + 10;
                if (a(x, y) != correct_a || b(x, y) != correct_b) {
                    printf("result(%d, %d) = (%d, %d) instead of (%d, %d)\n",
                           x, y, a(x, y), b(x, y), correct_a, correct_b);
                    return -1;
                }
            }
        }

    }

    if (1) {
        // Same as above, but switches which steps are gpu and cpu
        Func f;
        Var x, y;

        f(x, y) = Tuple(x + y, x - y);

        for (size_t i = 0; i < 10; i++) {
            // Swap the tuple elements and increment both
            f(x, y) = Tuple(f(x, y)[1] + 1, f(x, y)[0] + 1);
        }

        // Schedule the even update steps on the gpu
//.........这里部分代码省略.........
开发者ID:Mengke-Yuan,项目名称:Halide,代码行数:101,代码来源:tuple_reduction.cpp

示例5: main

int main(int argc, char **argv) {

    Buffer<uint8_t> input(128, 64);

    for (int y = 0; y < input.height(); y++) {
        for (int x = 0; x < input.width(); x++) {
            input(x, y) = y*input.width() + x;
        }
    }

    Var x, y, xi, yi;
    {
        Func f;
        f(x, y) = select(((input(x, y) > 10) && (input(x, y) < 20)) ||
                         ((input(x, y) > 40) && (!(input(x, y) > 50))),
                         u8(255), u8(0));

        Target target = get_jit_target_from_environment();
        if (target.has_gpu_feature()) {
            f.gpu_tile(x, y, xi, yi, 16, 16).vectorize(xi, 4);
        } else if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
            f.hexagon().vectorize(x, 128);
        } else {
            f.vectorize(x, 8);
        }

        Buffer<uint8_t> output = f.realize(input.width(), input.height(), target);

        for (int y = 0; y < input.height(); y++) {
            for (int x = 0; x < input.width(); x++) {
                bool cond = ((input(x, y) > 10) && (input(x, y) < 20)) ||
                    ((input(x, y) > 40) && (!(input(x, y) > 50)));
                uint8_t correct = cond ? 255 : 0;
                if (correct != output(x, y)) {
                    fprintf(stderr, "output(%d, %d) = %d instead of %d\n", x, y, output(x, y), correct);
                    return -1;
                }
            }
        }
    }

    // Test a condition that uses a let resulting from common
    // subexpression elimination.
    {
        Func f;
        Expr common_cond = input(x, y) > 10;
        f(x, y) = select((common_cond && (input(x, y) < 20)) ||
                         ((input(x, y) > 40) && (!common_cond)),
                         u8(255), u8(0));

        Target target = get_jit_target_from_environment();
        if (target.has_gpu_feature()) {
            f.gpu_tile(x, y, xi, yi, 16, 16).vectorize(xi, 4);
        } else if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
            f.hexagon().vectorize(x, 128);
        } else {
            f.vectorize(x, 8);
        }

        Buffer<uint8_t> output = f.realize(input.width(), input.height(), target);

        for (int y = 0; y < input.height(); y++) {
            for (int x = 0; x < input.width(); x++) {
                bool common_cond = input(x, y) > 10;
                bool cond = (common_cond && (input(x, y) < 20)) ||
                    ((input(x, y) > 40) && (!common_cond));
                uint8_t correct = cond ? 255 : 0;
                if (correct != output(x, y)) {
                    fprintf(stderr, "output(%d, %d) = %d instead of %d\n", x, y, output(x, y), correct);
                    return -1;
                }
            }
        }
    }

    // Test a condition which has vector and scalar inputs.
    {
        Func f("f");
        f(x, y) = select(x < 10 || x > 20 || y < 10 || y > 20, 0, input(x, y));

        Target target = get_jit_target_from_environment();

        if (target.has_gpu_feature()) {
            f.gpu_tile(x, y, xi, yi, 16, 16).vectorize(xi, 4);
        } else if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
            f.hexagon().vectorize(x, 128);
        } else {
            f.vectorize(x, 128);
        }

        Buffer<uint8_t> output = f.realize(input.width(), input.height(), target);

        for (int y = 0; y < input.height(); y++) {
            for (int x = 0; x < input.width(); x++) {
                bool cond = x < 10 || x > 20 || y < 10 || y > 20;
                uint8_t correct = cond ? 0 : input(x,y);
                if (correct != output(x, y)) {
                    fprintf(stderr, "output(%d, %d) = %d instead of %d\n", x, y, output(x, y), correct);
                    return -1;
                }
//.........这里部分代码省略.........
开发者ID:adityaatluri,项目名称:Halide,代码行数:101,代码来源:logical.cpp

示例6: main

int main(int arch, char **argv) {
    const int W = 256, H = 256;

    Buffer<uint8_t> in(W, H);
    // Set up the input.
    for (int y = 0; y < H; y++) {
        for (int x = 0; x < W; x++) {
            in(x, y) = rand() & 0xff;
        }
    }

    // Define a convolution kernel, and its sum.
    Buffer<int8_t> kernel(3, 3);
    kernel.set_min(-1, -1);
    for (int y = -1; y <= 1; y++) {
        for (int x = -1; x <= 1; x++) {
            kernel(x, y) = rand() % 8 - 4;
        }
    }

    Var x("x"), y("y"), xi("xi"), yi("yi");
    RDom r(-1, 3, -1, 3);

    // Boundary condition.
    Func input = BoundaryConditions::repeat_edge(in);
    input.compute_root();

    // Test a widening reduction, followed by a narrowing.
    {
        Func f;
        f(x, y) = u8_sat(sum(i16(input(x + r.x, y + r.y)) * kernel(r.x, r.y)) / 16);

        // Schedule.
        Target target = get_jit_target_from_environment();
        if (target.has_gpu_feature()) {
            f.gpu_tile(x, y, xi, yi, 16, 16);
        } else if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
            f.hexagon().vectorize(x, 128);
        } else {
            f.vectorize(x, target.natural_vector_size<uint8_t>());
        }

        // Run the pipeline and verify the results are correct.
        Buffer<uint8_t> out = f.realize(W, H, target);

        for (int y = 1; y < H-1; y++) {
            for (int x = 1; x < W-1; x++) {
                int16_t correct = 0;
                for (int ry = -1; ry <= 1; ry++) {
                    for (int rx = -1; rx <= 1; rx++) {
                        correct += static_cast<int16_t>(in(x + rx, y + ry)) * kernel(rx, ry);
                    }
                }
                correct = std::min(std::max(correct / 16, 0), 255);
                if (correct != out(x, y)) {
                    std::cout << "out(" << x << ", " << y << ") = " << (int)out(x, y) << " instead of " << correct << "\n";
                    return -1;
                }
            }
        }
    }

    // Test a tuple reduction with widening, followed by narrowing the result.
    {
        Func f;
        f(x, y) = { i16(0), i8(0) };
        f(x, y) = {
            f(x, y)[0] + i16(input(x + r.x, y + r.y)) * kernel(r.x, r.y),
            f(x, y)[1] + kernel(r.x, r.y),
        };

        Func g;
        g(x, y) = u8_sat((f(x, y)[0] + f(x, y)[1]) / 16);

        // Schedule.
        Target target = get_jit_target_from_environment();
        if (target.has_gpu_feature()) {
            g.gpu_tile(x, y, xi, yi, 16, 16);
        } else if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
            g.hexagon().vectorize(x, 128);
        } else {
            g.vectorize(x, target.natural_vector_size<uint8_t>());
        }

        // Run the pipeline and verify the results are correct.
        Buffer<uint8_t> out = g.realize(W, H, target);

        for (int y = 1; y < H-1; y++) {
            for (int x = 1; x < W-1; x++) {
                int16_t correct = 0;
                for (int ry = -1; ry <= 1; ry++) {
                    for (int rx = -1; rx <= 1; rx++) {
                        correct += static_cast<int16_t>(in(x + rx, y + ry)) * kernel(rx, ry);
                        correct += kernel(rx, ry);
                    }
                }
                correct = std::min(std::max(correct / 16, 0), 255);
                if (correct != out(x, y)) {
                    std::cout << "out(" << x << ", " << y << ") = " << (int)out(x, y) << " instead of " << correct << "\n";
                    return -1;
//.........这里部分代码省略.........
开发者ID:adityaatluri,项目名称:Halide,代码行数:101,代码来源:widening_reduction.cpp


注:本文中的Func::hexagon方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。