本文整理汇总了C++中Func::hexagon方法的典型用法代码示例。如果您正苦于以下问题:C++ Func::hexagon方法的具体用法?C++ Func::hexagon怎么用?C++ Func::hexagon使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Func
的用法示例。
在下文中一共展示了Func::hexagon方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: process
Func process(Func raw, Type result_type,
ImageParam matrix_3200, ImageParam matrix_7000, Param<float> color_temp,
Param<float> gamma, Param<float> contrast, Param<int> blackLevel, Param<int> whiteLevel) {
Var yii, xi;
Func denoised = hot_pixel_suppression(raw);
Func deinterleaved = deinterleave(denoised);
Func demosaiced = demosaic(deinterleaved);
Func corrected = color_correct(demosaiced, matrix_3200, matrix_7000, color_temp);
Func curved = apply_curve(corrected, result_type, gamma, contrast, blackLevel, whiteLevel);
processed(x, y, c) = curved(x, y, c);
// Schedule
Expr out_width = processed.output_buffer().width();
Expr out_height = processed.output_buffer().height();
int strip_size = 32;
int vec = target.natural_vector_size(UInt(16));
if (target.has_feature(Target::HVX_64)) {
vec = 32;
} else if (target.has_feature(Target::HVX_128)) {
vec = 64;
}
denoised.compute_at(processed, yi).store_at(processed, yo)
.fold_storage(y, 8)
.vectorize(x, vec);
deinterleaved.compute_at(processed, yi).store_at(processed, yo)
.fold_storage(y, 4)
.vectorize(x, 2*vec, TailStrategy::RoundUp)
.reorder(c, x, y)
.unroll(c);
corrected.compute_at(processed, x)
.vectorize(x, vec)
.reorder(c, x, y)
.unroll(c);
processed.compute_root()
.split(y, yo, yi, strip_size)
.split(yi, yi, yii, 2)
.split(x, x, xi, 2*vec, TailStrategy::RoundUp)
.reorder(xi, c, yii, x, yi, yo)
.vectorize(xi, 2*vec)
.parallel(yo);
if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
processed.hexagon();
denoised.align_storage(x, vec);
deinterleaved.align_storage(x, vec);
corrected.align_storage(x, vec);
}
// We can generate slightly better code if we know the splits divide the extent.
processed
.bound(c, 0, 3)
.bound(x, 0, ((out_width)/(2*vec))*(2*vec))
.bound(y, 0, (out_height/strip_size)*strip_size);
return processed;
}
示例2: schedule
void schedule(Func f, const Target &t) {
// TODO: Add GPU schedule where supported.
if (t.features_any_of({Target::HVX_64, Target::HVX_128})) {
f.hexagon().vectorize(x, 32);
} else {
f.vectorize(x, 16);
}
}
示例3: main
//.........这里部分代码省略.........
f.set_custom_print(halide_print);
Buffer<float> imf = f.realize(N);
assert(messages.size() == (size_t)N);
char correct[1024];
for (int i = 0; i < N; i++) {
snprintf(correct, sizeof(correct), "%f\n", imf(i));
// Some versions of the std library can emit some NaN patterns
// as "-nan", due to sloppy conversion (or not) of the sign bit.
// Halide considers all NaN's equivalent, so paper over this
// noise in the test by normalizing all -nan -> nan.
if (messages[i] == "-nan\n") messages[i] = "nan\n";
if (!strcmp(correct, "-nan\n")) strcpy(correct, "nan\n");
if (messages[i] != correct) {
printf("float %d: %s vs %s for %10.20e\n", i, messages[i].c_str(), correct, imf(i));
return -1;
}
}
messages.clear();
g(x) = print(reinterpret(Float(64), (cast<uint64_t>(random_uint()) << 32) | random_uint()));
g.set_custom_print(halide_print);
Buffer<double> img = g.realize(N);
assert(messages.size() == (size_t)N);
for (int i = 0; i < N; i++) {
snprintf(correct, sizeof(correct), "%e\n", img(i));
// Some versions of the std library can emit some NaN patterns
// as "-nan", due to sloppy conversion (or not) of the sign bit.
// Halide considers all NaN's equivalent, so paper over this
// noise in the test by normalizing all -nan -> nan.
if (messages[i] == "-nan\n") messages[i] = "nan\n";
if (!strcmp(correct, "-nan\n")) strcpy(correct, "nan\n");
if (messages[i] != correct) {
printf("double %d: %s vs %s for %10.20e\n", i, messages[i].c_str(), correct, img(i));
return -1;
}
}
}
#endif
messages.clear();
{
Func f;
// Test a vectorized print.
f(x) = print(x * 3);
f.set_custom_print(halide_print);
f.vectorize(x, 32);
if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
f.hexagon();
}
Buffer<int> result = f.realize(128);
if (!target.features_any_of({Target::HVX_64, Target::HVX_128})) {
assert((int)messages.size() == result.width());
for (size_t i = 0; i < messages.size(); i++) {
assert(messages[i] == std::to_string(i * 3) + "\n");
}
} else {
// The Hexagon simulator prints directly to stderr, so we
// can't read the messages.
}
}
messages.clear();
{
Func f;
// Test a vectorized print_when.
f(x) = print_when(x % 2 == 0, x * 3);
f.set_custom_print(halide_print);
f.vectorize(x, 32);
if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
f.hexagon();
}
Buffer<int> result = f.realize(128);
if (!target.features_any_of({Target::HVX_64, Target::HVX_128})) {
assert((int)messages.size() == result.width() / 2);
for (size_t i = 0; i < messages.size(); i++) {
assert(messages[i] == std::to_string(i * 2 * 3) + "\n");
}
} else {
// The Hexagon simulator prints directly to stderr, so we
// can't read the messages.
}
}
printf("Success!\n");
return 0;
}
示例4: main
int main(int argc, char **argv) {
Target target = get_jit_target_from_environment();
if (1) {
// Test a tuple reduction on the gpu
Func f;
Var x, y;
f(x, y) = Tuple(x + y, x - y);
// Updates to a reduction are atomic.
f(x, y) = Tuple(f(x, y)[1]*2, f(x, y)[0]*2);
// now equals ((x - y)*2, (x + y)*2)
if (target.has_gpu_feature()) {
f.gpu_tile(x, y, 16, 16);
f.update().gpu_tile(x, y, 16, 16);
} else if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
f.hexagon(y).vectorize(x, 32);
f.update().hexagon(y).vectorize(x, 32);
}
Realization result = f.realize(1024, 1024);
Image<int> a = result[0], b = result[1];
for (int y = 0; y < a.height(); y++) {
for (int x = 0; x < a.width(); x++) {
int correct_a = (x - y)*2;
int correct_b = (x + y)*2;
if (a(x, y) != correct_a || b(x, y) != correct_b) {
printf("result(%d, %d) = (%d, %d) instead of (%d, %d)\n",
x, y, a(x, y), b(x, y), correct_a, correct_b);
return -1;
}
}
}
}
if (1) {
// Now test one that alternates between cpu and gpu per update step
Func f;
Var x, y;
f(x, y) = Tuple(x + y, x - y);
for (size_t i = 0; i < 10; i++) {
// Swap the tuple elements and increment both
f(x, y) = Tuple(f(x, y)[1] + 1, f(x, y)[0] + 1);
}
// Schedule the pure step and the odd update steps on the gpu
if (target.has_gpu_feature()) {
f.gpu_tile(x, y, 16, 16);
} else if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
f.hexagon(y).vectorize(x, 32);
}
for (int i = 0; i < 10; i ++) {
if (i & 1) {
if (target.has_gpu_feature()) {
f.update(i).gpu_tile(x, y, 16, 16);
} else if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
f.update(i).hexagon(y).vectorize(x, 32);
}
} else {
f.update(i);
}
}
Realization result = f.realize(1024, 1024);
Image<int> a = result[0], b = result[1];
for (int y = 0; y < a.height(); y++) {
for (int x = 0; x < a.width(); x++) {
int correct_a = (x + y) + 10;
int correct_b = (x - y) + 10;
if (a(x, y) != correct_a || b(x, y) != correct_b) {
printf("result(%d, %d) = (%d, %d) instead of (%d, %d)\n",
x, y, a(x, y), b(x, y), correct_a, correct_b);
return -1;
}
}
}
}
if (1) {
// Same as above, but switches which steps are gpu and cpu
Func f;
Var x, y;
f(x, y) = Tuple(x + y, x - y);
for (size_t i = 0; i < 10; i++) {
// Swap the tuple elements and increment both
f(x, y) = Tuple(f(x, y)[1] + 1, f(x, y)[0] + 1);
}
// Schedule the even update steps on the gpu
//.........这里部分代码省略.........
示例5: main
int main(int argc, char **argv) {
Buffer<uint8_t> input(128, 64);
for (int y = 0; y < input.height(); y++) {
for (int x = 0; x < input.width(); x++) {
input(x, y) = y*input.width() + x;
}
}
Var x, y, xi, yi;
{
Func f;
f(x, y) = select(((input(x, y) > 10) && (input(x, y) < 20)) ||
((input(x, y) > 40) && (!(input(x, y) > 50))),
u8(255), u8(0));
Target target = get_jit_target_from_environment();
if (target.has_gpu_feature()) {
f.gpu_tile(x, y, xi, yi, 16, 16).vectorize(xi, 4);
} else if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
f.hexagon().vectorize(x, 128);
} else {
f.vectorize(x, 8);
}
Buffer<uint8_t> output = f.realize(input.width(), input.height(), target);
for (int y = 0; y < input.height(); y++) {
for (int x = 0; x < input.width(); x++) {
bool cond = ((input(x, y) > 10) && (input(x, y) < 20)) ||
((input(x, y) > 40) && (!(input(x, y) > 50)));
uint8_t correct = cond ? 255 : 0;
if (correct != output(x, y)) {
fprintf(stderr, "output(%d, %d) = %d instead of %d\n", x, y, output(x, y), correct);
return -1;
}
}
}
}
// Test a condition that uses a let resulting from common
// subexpression elimination.
{
Func f;
Expr common_cond = input(x, y) > 10;
f(x, y) = select((common_cond && (input(x, y) < 20)) ||
((input(x, y) > 40) && (!common_cond)),
u8(255), u8(0));
Target target = get_jit_target_from_environment();
if (target.has_gpu_feature()) {
f.gpu_tile(x, y, xi, yi, 16, 16).vectorize(xi, 4);
} else if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
f.hexagon().vectorize(x, 128);
} else {
f.vectorize(x, 8);
}
Buffer<uint8_t> output = f.realize(input.width(), input.height(), target);
for (int y = 0; y < input.height(); y++) {
for (int x = 0; x < input.width(); x++) {
bool common_cond = input(x, y) > 10;
bool cond = (common_cond && (input(x, y) < 20)) ||
((input(x, y) > 40) && (!common_cond));
uint8_t correct = cond ? 255 : 0;
if (correct != output(x, y)) {
fprintf(stderr, "output(%d, %d) = %d instead of %d\n", x, y, output(x, y), correct);
return -1;
}
}
}
}
// Test a condition which has vector and scalar inputs.
{
Func f("f");
f(x, y) = select(x < 10 || x > 20 || y < 10 || y > 20, 0, input(x, y));
Target target = get_jit_target_from_environment();
if (target.has_gpu_feature()) {
f.gpu_tile(x, y, xi, yi, 16, 16).vectorize(xi, 4);
} else if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
f.hexagon().vectorize(x, 128);
} else {
f.vectorize(x, 128);
}
Buffer<uint8_t> output = f.realize(input.width(), input.height(), target);
for (int y = 0; y < input.height(); y++) {
for (int x = 0; x < input.width(); x++) {
bool cond = x < 10 || x > 20 || y < 10 || y > 20;
uint8_t correct = cond ? 0 : input(x,y);
if (correct != output(x, y)) {
fprintf(stderr, "output(%d, %d) = %d instead of %d\n", x, y, output(x, y), correct);
return -1;
}
//.........这里部分代码省略.........
示例6: main
int main(int arch, char **argv) {
const int W = 256, H = 256;
Buffer<uint8_t> in(W, H);
// Set up the input.
for (int y = 0; y < H; y++) {
for (int x = 0; x < W; x++) {
in(x, y) = rand() & 0xff;
}
}
// Define a convolution kernel, and its sum.
Buffer<int8_t> kernel(3, 3);
kernel.set_min(-1, -1);
for (int y = -1; y <= 1; y++) {
for (int x = -1; x <= 1; x++) {
kernel(x, y) = rand() % 8 - 4;
}
}
Var x("x"), y("y"), xi("xi"), yi("yi");
RDom r(-1, 3, -1, 3);
// Boundary condition.
Func input = BoundaryConditions::repeat_edge(in);
input.compute_root();
// Test a widening reduction, followed by a narrowing.
{
Func f;
f(x, y) = u8_sat(sum(i16(input(x + r.x, y + r.y)) * kernel(r.x, r.y)) / 16);
// Schedule.
Target target = get_jit_target_from_environment();
if (target.has_gpu_feature()) {
f.gpu_tile(x, y, xi, yi, 16, 16);
} else if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
f.hexagon().vectorize(x, 128);
} else {
f.vectorize(x, target.natural_vector_size<uint8_t>());
}
// Run the pipeline and verify the results are correct.
Buffer<uint8_t> out = f.realize(W, H, target);
for (int y = 1; y < H-1; y++) {
for (int x = 1; x < W-1; x++) {
int16_t correct = 0;
for (int ry = -1; ry <= 1; ry++) {
for (int rx = -1; rx <= 1; rx++) {
correct += static_cast<int16_t>(in(x + rx, y + ry)) * kernel(rx, ry);
}
}
correct = std::min(std::max(correct / 16, 0), 255);
if (correct != out(x, y)) {
std::cout << "out(" << x << ", " << y << ") = " << (int)out(x, y) << " instead of " << correct << "\n";
return -1;
}
}
}
}
// Test a tuple reduction with widening, followed by narrowing the result.
{
Func f;
f(x, y) = { i16(0), i8(0) };
f(x, y) = {
f(x, y)[0] + i16(input(x + r.x, y + r.y)) * kernel(r.x, r.y),
f(x, y)[1] + kernel(r.x, r.y),
};
Func g;
g(x, y) = u8_sat((f(x, y)[0] + f(x, y)[1]) / 16);
// Schedule.
Target target = get_jit_target_from_environment();
if (target.has_gpu_feature()) {
g.gpu_tile(x, y, xi, yi, 16, 16);
} else if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
g.hexagon().vectorize(x, 128);
} else {
g.vectorize(x, target.natural_vector_size<uint8_t>());
}
// Run the pipeline and verify the results are correct.
Buffer<uint8_t> out = g.realize(W, H, target);
for (int y = 1; y < H-1; y++) {
for (int x = 1; x < W-1; x++) {
int16_t correct = 0;
for (int ry = -1; ry <= 1; ry++) {
for (int rx = -1; rx <= 1; rx++) {
correct += static_cast<int16_t>(in(x + rx, y + ry)) * kernel(rx, ry);
correct += kernel(rx, ry);
}
}
correct = std::min(std::max(correct / 16, 0), 255);
if (correct != out(x, y)) {
std::cout << "out(" << x << ", " << y << ") = " << (int)out(x, y) << " instead of " << correct << "\n";
return -1;
//.........这里部分代码省略.........