本文整理匯總了C++中AlignLo函數的典型用法代碼示例。如果您正苦於以下問題:C++ AlignLo函數的具體用法?C++ AlignLo怎麽用?C++ AlignLo使用的例子?那麽, 這裏精選的函數代碼示例或許可以為您提供幫助。
在下文中一共展示了AlignLo函數的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的C++代碼示例。
示例1: ReduceGray3x3
template<bool align, bool compensation> void ReduceGray3x3(
const uint8_t* src, size_t srcWidth, size_t srcHeight, size_t srcStride,
uint8_t* dst, size_t dstWidth, size_t dstHeight, size_t dstStride)
{
assert(srcWidth >= A && (srcWidth + 1)/2 == dstWidth && (srcHeight + 1)/2 == dstHeight);
if(align)
assert(Aligned(src) && Aligned(srcStride));
size_t lastOddCol = srcWidth - AlignLo(srcWidth, 2);
size_t bodyWidth = AlignLo(srcWidth, A);
for(size_t row = 0; row < srcHeight; row += 2, dst += dstStride, src += 2*srcStride)
{
const uint8_t * s1 = src;
const uint8_t * s0 = s1 - (row ? srcStride : 0);
const uint8_t * s2 = s1 + (row != srcHeight - 1 ? srcStride : 0);
vst1_u8(dst, ReduceRow<compensation>(ReduceColNose<align>(s0),
ReduceColNose<align>(s1), ReduceColNose<align>(s2)));
for(size_t srcCol = A, dstCol = HA; srcCol < bodyWidth; srcCol += A, dstCol += HA)
vst1_u8(dst + dstCol, ReduceRow<compensation>(ReduceColBody<align>(s0 + srcCol),
ReduceColBody<align>(s1 + srcCol), ReduceColBody<align>(s2 + srcCol)));
if(bodyWidth != srcWidth)
{
size_t srcCol = srcWidth - A - lastOddCol;
size_t dstCol = dstWidth - HA - lastOddCol;
vst1_u8(dst + dstCol, ReduceRow<compensation>(ReduceColBody<false>(s0 + srcCol),
ReduceColBody<false>(s1 + srcCol), ReduceColBody<false>(s2 + srcCol)));
if(lastOddCol)
dst[dstWidth - 1] = Base::GaussianBlur3x3<compensation>(s0 + srcWidth, s1 + srcWidth, s2 + srcWidth, -2, -1, -1);
}
}
}
示例2: AbsDifferenceSum
template <bool align> void AbsDifferenceSum(
const uint8_t *a, size_t aStride, const uint8_t *b, size_t bStride,
size_t width, size_t height, uint64_t * sum)
{
assert(width >= A);
if (align)
assert(Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride));
size_t alignedWidth = AlignLo(width, QA);
size_t bodyWidth = AlignLo(width, A);
v128_u8 tailMask = ShiftLeft(K8_FF, A - width + bodyWidth);
*sum = 0;
for (size_t row = 0; row < height; ++row)
{
size_t col = 0;
v128_u32 sums[4] = { K32_00000000, K32_00000000, K32_00000000, K32_00000000 };
for (; col < alignedWidth; col += QA)
{
AbsDifferenceSum<align>(a, b, col, sums[0]);
AbsDifferenceSum<align>(a, b, col + A, sums[1]);
AbsDifferenceSum<align>(a, b, col + 2 * A, sums[2]);
AbsDifferenceSum<align>(a, b, col + 3 * A, sums[3]);
}
sums[0] = vec_add(vec_add(sums[0], sums[1]), vec_add(sums[2], sums[3]));
for (; col < bodyWidth; col += A)
AbsDifferenceSum<align>(a, b, col, sums[0]);
if (width - bodyWidth)
AbsDifferenceSumMasked<false>(a, b, width - A, tailMask, sums[0]);
*sum += ExtractSum(sums[0]);
a += aStride;
b += bStride;
}
}
示例3: ConditionalCount8u
void ConditionalCount8u(const uint8_t * src, size_t stride, size_t width, size_t height, uint8_t value, uint32_t * count)
{
assert(width >= A);
if (align)
assert(Aligned(src) && Aligned(stride));
size_t alignedWidth = AlignLo(width, QA);
size_t bodyWidth = AlignLo(width, A);
v128_u8 tailMask = ShiftLeft(K8_01, A - width + alignedWidth);
v128_u8 _value = SIMD_VEC_SET1_EPI8(value);
v128_u32 counts[4] = { K32_00000000, K32_00000000, K32_00000000, K32_00000000 };
for (size_t row = 0; row < height; ++row)
{
size_t col = 0;
for (; col < alignedWidth; col += QA)
{
ConditionalCount8u<align, compareType>(src, col, _value, counts[0]);
ConditionalCount8u<align, compareType>(src, col + A, _value, counts[1]);
ConditionalCount8u<align, compareType>(src, col + 2 * A, _value, counts[2]);
ConditionalCount8u<align, compareType>(src, col + 3 * A, _value, counts[3]);
}
for (; col < bodyWidth; col += A)
ConditionalCount8u<align, compareType>(src, col, _value, counts[0]);
if (alignedWidth != width)
{
const v128_u8 mask = vec_and(Compare8u<compareType>(Load<false>(src + width - A), _value), tailMask);
counts[0] = vec_msum(mask, K8_01, counts[0]);
}
src += stride;
}
counts[0] = vec_add(vec_add(counts[0], counts[1]), vec_add(counts[2], counts[3]));
*count = ExtractSum(counts[0]);
}
示例4: SquaredDifferenceSum16f
template <bool align> SIMD_INLINE void SquaredDifferenceSum16f(const uint16_t * a, const uint16_t * b, size_t size, float * sum)
{
assert(size >= F);
if (align)
assert(Aligned(a) && Aligned(b));
size_t partialAlignedSize = AlignLo(size, F);
size_t fullAlignedSize = AlignLo(size, DF);
size_t i = 0;
float32x4_t sums[2] = { vdupq_n_f32(0), vdupq_n_f32(0) };
if (fullAlignedSize)
{
for (; i < fullAlignedSize; i += DF)
{
SquaredDifferenceSum16f<align>(a, b, i + F * 0, sums[0]);
SquaredDifferenceSum16f<align>(a, b, i + F * 1, sums[1]);
}
sums[0] = vaddq_f32(sums[0], sums[1]);
}
for (; i < partialAlignedSize; i += F)
SquaredDifferenceSum16f<align>(a, b, i, sums[0]);
if (partialAlignedSize != size)
{
float32x4_t tailMask = RightNotZero(size - partialAlignedSize);
float32x4_t _a = vcvt_f32_f16((float16x4_t)LoadHalf<align>(a + size - F));
float32x4_t _b = vcvt_f32_f16((float16x4_t)LoadHalf<align>(a + size - F));
float32x4_t _d = And(vsubq_f32(_a, _b), tailMask);
sums[0] = vaddq_f32(sums[0], vmulq_f32(_d, _d));
}
*sum = ExtractSum32f(sums[0]);
}
示例5: BgrToGray
template <bool align> void BgrToGray(const uint8_t * bgr, size_t width, size_t height, size_t bgrStride, uint8_t * gray, size_t grayStride)
{
assert(width >= A);
if(align)
assert(Aligned(bgr) && Aligned(bgrStride) && Aligned(gray) && Aligned(grayStride));
size_t alignedWidth = AlignLo(width, A);
for(size_t row = 0; row < height; ++row)
{
Loader<align> _bgr(bgr);
Storer<align> _gray(gray);
BgrToGray<align, true>(_bgr, _gray);
for(size_t col = A; col < alignedWidth; col += A)
BgrToGray<align, false>(_bgr, _gray);
Flush(_gray);
if(alignedWidth != width)
{
Loader<false> _bgr(bgr + 3*(width - A));
Storer<false> _gray(gray + width - A);
BgrToGray<false, true>(_bgr, _gray);
Flush(_gray);
}
bgr += bgrStride;
gray += grayStride;
}
}
示例6: EdgeBackgroundAdjustRangeMasked
template <bool align> void EdgeBackgroundAdjustRangeMasked(uint8_t * backgroundCount, size_t backgroundCountStride, size_t width, size_t height,
uint8_t * backgroundValue, size_t backgroundValueStride, uint8_t threshold, const uint8_t * mask, size_t maskStride)
{
assert(width >= A);
if(align)
{
assert(Aligned(backgroundValue) && Aligned(backgroundValueStride));
assert(Aligned(backgroundCount) && Aligned(backgroundCountStride));
assert(Aligned(mask) && Aligned(maskStride));
}
const __m256i _threshold = _mm256_set1_epi8((char)threshold);
size_t alignedWidth = AlignLo(width, A);
__m256i tailMask = SetMask<uint8_t>(0, A - width + alignedWidth, 1);
for(size_t row = 0; row < height; ++row)
{
for(size_t col = 0; col < alignedWidth; col += A)
EdgeBackgroundAdjustRangeMasked<align>(backgroundCount, backgroundValue, mask, col, _threshold, K8_01);
if(alignedWidth != width)
EdgeBackgroundAdjustRangeMasked<false>(backgroundCount, backgroundValue, mask, width - A, _threshold, tailMask);
backgroundValue += backgroundValueStride;
backgroundCount += backgroundCountStride;
mask += maskStride;
}
}
示例7: AddFeatureDifference
template <bool align> void AddFeatureDifference(const uint8_t * value, size_t valueStride, size_t width, size_t height,
const uint8_t * lo, size_t loStride, const uint8_t * hi, size_t hiStride,
uint16_t weight, uint8_t * difference, size_t differenceStride)
{
assert(width >= A);
if(align)
{
assert(Aligned(value) && Aligned(valueStride));
assert(Aligned(lo) && Aligned(loStride));
assert(Aligned(hi) && Aligned(hiStride));
assert(Aligned(difference) && Aligned(differenceStride));
}
size_t alignedWidth = AlignLo(width, A);
__m128i tailMask = ShiftLeft(K_INV_ZERO, A - width + alignedWidth);
__m128i _weight = _mm_set1_epi16((short)weight);
for(size_t row = 0; row < height; ++row)
{
for(size_t col = 0; col < alignedWidth; col += A)
AddFeatureDifference<align>(value, lo, hi, difference, col, _weight, K_INV_ZERO);
if(alignedWidth != width)
AddFeatureDifference<false>(value, lo, hi, difference, width - A, _weight, tailMask);
value += valueStride;
lo += loStride;
hi += hiStride;
difference += differenceStride;
}
}
示例8: AbsSecondDerivativeHistogram
template<bool align> void AbsSecondDerivativeHistogram(const uint8_t *src, size_t width, size_t height, size_t stride,
size_t step, size_t indent, uint32_t * histogram)
{
memset(histogram, 0, sizeof(uint32_t)*HISTOGRAM_SIZE);
Buffer buffer(stride);
buffer.p += indent;
src += indent*(stride + 1);
height -= 2*indent;
width -= 2*indent;
ptrdiff_t bodyStart = (uint8_t*)AlignHi(buffer.p, A) - buffer.p;
ptrdiff_t bodyEnd = bodyStart + AlignLo(width - bodyStart, A);
size_t rowStep = step*stride;
for(size_t row = 0; row < height; ++row)
{
if(bodyStart)
AbsSecondDerivative<false>(src, step, rowStep, buffer.p);
for(ptrdiff_t col = bodyStart; col < bodyEnd; col += A)
AbsSecondDerivative<align>(src + col, step, rowStep, buffer.p + col);
if(width != (size_t)bodyEnd)
AbsSecondDerivative<false>(src + width - A, step, rowStep, buffer.p + width - A);
for(size_t i = 0; i < width; ++i)
++histogram[buffer.p[i]];
src += stride;
}
}
示例9: SquaredDifferenceSum
template <bool align> void SquaredDifferenceSum(
const uint8_t *a, size_t aStride, const uint8_t *b, size_t bStride,
size_t width, size_t height, uint64_t * sum)
{
assert(width < 0x10000);
if(align)
{
assert(Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride));
}
size_t bodyWidth = AlignLo(width, A);
__m128i tailMask = ShiftLeft(K_INV_ZERO, A - width + bodyWidth);
__m128i fullSum = _mm_setzero_si128();
for(size_t row = 0; row < height; ++row)
{
__m128i rowSum = _mm_setzero_si128();
for(size_t col = 0; col < bodyWidth; col += A)
{
const __m128i a_ = Load<align>((__m128i*)(a + col));
const __m128i b_ = Load<align>((__m128i*)(b + col));
rowSum = _mm_add_epi32(rowSum, SquaredDifference(a_, b_));
}
if(width - bodyWidth)
{
const __m128i a_ = _mm_and_si128(tailMask, Load<false>((__m128i*)(a + width - A)));
const __m128i b_ = _mm_and_si128(tailMask, Load<false>((__m128i*)(b + width - A)));
rowSum = _mm_add_epi32(rowSum, SquaredDifference(a_, b_));
}
fullSum = _mm_add_epi64(fullSum, HorizontalSum32(rowSum));
a += aStride;
b += bStride;
}
*sum = ExtractInt64Sum(fullSum);
}
示例10: FillBgr
template <bool align> void FillBgr(uint8_t * dst, size_t stride, size_t width, size_t height, uint8_t blue, uint8_t green, uint8_t red)
{
size_t size = width*3;
size_t step = A*3;
size_t alignedSize = AlignLo(width, A)*3;
uint32_t bgrb = uint32_t(blue) | (uint32_t(green) << 8) | (uint32_t(red) << 16) | (uint32_t(blue) << 24);
uint32_t grbg = uint32_t(green) | (uint32_t(red) << 8) | (uint32_t(blue) << 16) | (uint32_t(green) << 24);
uint32_t rbgr = uint32_t(red) | (uint32_t(blue) << 8) | (uint32_t(green) << 16) | (uint32_t(red) << 24);
__m128i bgrs[3];
bgrs[0] = _mm_setr_epi32(bgrb, grbg, rbgr, bgrb);
bgrs[1] = _mm_setr_epi32(grbg, rbgr, bgrb, grbg);
bgrs[2] = _mm_setr_epi32(rbgr, bgrb, grbg, rbgr);
for(size_t row = 0; row < height; ++row)
{
size_t offset = 0;
for(; offset < alignedSize; offset += step)
{
Store<align>((__m128i*)(dst + offset) + 0, bgrs[0]);
Store<align>((__m128i*)(dst + offset) + 1, bgrs[1]);
Store<align>((__m128i*)(dst + offset) + 2, bgrs[2]);
}
if(offset < size)
{
offset = size - step;
Store<false>((__m128i*)(dst + offset) + 0, bgrs[0]);
Store<false>((__m128i*)(dst + offset) + 1, bgrs[1]);
Store<false>((__m128i*)(dst + offset) + 2, bgrs[2]);
}
dst += stride;
}
}
示例11: ConditionalCount16i
void ConditionalCount16i(const uint8_t * src, size_t stride, size_t width, size_t height, int16_t value, uint32_t * count)
{
assert(width >= HA);
if (align)
assert(Aligned(src) && Aligned(stride));
size_t alignedWidth = AlignLo(width, DA);
size_t bodyWidth = Simd::AlignLo(width, HA);
v128_u16 tailMask = ShiftLeft(K16_0001, HA - width + alignedWidth);
v128_s16 _value = SIMD_VEC_SET1_EPI16(value);
v128_u32 counts[4] = { K32_00000000, K32_00000000, K32_00000000, K32_00000000 };
for (size_t row = 0; row < height; ++row)
{
const int16_t * s = (const int16_t *)src;
size_t col = 0;
for (; col < alignedWidth; col += DA)
{
ConditionalCount16i<align, compareType>(s, col, _value, counts[0]);
ConditionalCount16i<align, compareType>(s, col + HA, _value, counts[1]);
ConditionalCount16i<align, compareType>(s, col + 2 * HA, _value, counts[2]);
ConditionalCount16i<align, compareType>(s, col + 3 * HA, _value, counts[3]);
}
for (; col < bodyWidth; col += HA)
ConditionalCount16i<align, compareType>(s, col, _value, counts[0]);
if (alignedWidth != width)
{
const v128_u16 mask = vec_and((v128_u16)Compare16i<compareType>(Load<false>(s + width - HA), _value), tailMask);
counts[0] = vec_msum(mask, K16_0001, counts[0]);
}
src += stride;
}
counts[0] = vec_add(vec_add(counts[0], counts[1]), vec_add(counts[2], counts[3]));
*count = ExtractSum(counts[0]);
}
示例12: StretchGray2x2
template <bool align> void StretchGray2x2(
const uint8_t *src, size_t srcWidth, size_t srcHeight, size_t srcStride,
uint8_t *dst, size_t dstWidth, size_t dstHeight, size_t dstStride)
{
assert(srcWidth*2 == dstWidth && srcHeight*2 == dstHeight && srcWidth >= A);
if(align)
{
assert(Aligned(src) && Aligned(srcStride));
assert(Aligned(dst) && Aligned(dstStride));
}
size_t alignedWidth = AlignLo(srcWidth, A);
for(size_t row = 0; row < srcHeight; ++row)
{
uint8_t * dstEven = dst;
uint8_t * dstOdd = dst + dstStride;
for(size_t srcCol = 0, dstCol = 0; srcCol < alignedWidth; srcCol += A, dstCol += DA)
{
__m256i value = LoadPermuted<align>((__m256i*)(src + srcCol));
StoreUnpacked<align>(value, dstEven + dstCol);
StoreUnpacked<align>(value, dstOdd + dstCol);
}
if(alignedWidth != srcWidth)
{
__m256i value = LoadPermuted<false>((__m256i*)(src + srcWidth - A));
StoreUnpacked<false>(value, dstEven + dstWidth - 2*A);
StoreUnpacked<false>(value, dstOdd + dstWidth - 2*A);
}
src += srcStride;
dst += 2*dstStride;
}
}
示例13: Yuv444pToHue
template <bool align> void Yuv444pToHue(const uint8_t * y, size_t yStride, const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride,
size_t width, size_t height, uint8_t * hue, size_t hueStride)
{
assert(width >= A);
if(align)
{
assert(Aligned(y) && Aligned(yStride) && Aligned(u) && Aligned(uStride));
assert(Aligned(v) && Aligned(vStride) && Aligned(hue) && Aligned(hueStride));
}
const __m128 KF_255_DIV_6 = _mm_set_ps1(Base::KF_255_DIV_6);
size_t bodyWidth = AlignLo(width, A);
size_t tail = width - bodyWidth;
for(size_t row = 0; row < height; row += 1)
{
for(size_t col = 0; col < bodyWidth; col += A)
{
Store<align>((__m128i*)(hue + col), YuvToHue8(Load<align>((__m128i*)(y + col)),
Load<align>((__m128i*)(u + col)), Load<align>((__m128i*)(v + col)), KF_255_DIV_6));
}
if(tail)
{
size_t offset = width - A;
Store<false>((__m128i*)(hue + offset), YuvToHue8(Load<false>((__m128i*)(y + offset)),
Load<false>((__m128i*)(u + offset)), Load<false>((__m128i*)(v + offset)), KF_255_DIV_6));
}
y += yStride;
u += uStride;
v += vStride;
hue += hueStride;
}
}
示例14: LbpEstimate
template <bool align> void LbpEstimate(
const uint8_t * src, size_t srcStride, size_t width, size_t height, uint8_t * dst, size_t dstStride)
{
assert(width >= 2);
if (align)
assert(Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride));
size_t alignedWidth = AlignLo(width - 2, A) + 1;
__mmask64 tailMask = Aligned(width - alignedWidth);
memset(dst, 0, width);
src += srcStride;
dst += dstStride;
for (size_t row = 2; row < height; ++row)
{
dst[0] = 0;
size_t col = 1;
for (; col < alignedWidth; col += A)
LbpEstimate<align, false>(src + col, srcStride, dst + col);
if (col < width)
LbpEstimate<align, false>(src + col, srcStride, dst + col, tailMask);
dst[width - 1] = 0;
src += srcStride;
dst += dstStride;
}
memset(dst, 0, width);
}
示例15: BgraToYuv422p
template <bool align> void BgraToYuv422p(const uint8_t * bgra, size_t width, size_t height, size_t bgraStride, uint8_t * y, size_t yStride,
uint8_t * u, size_t uStride, uint8_t * v, size_t vStride)
{
assert((width%2 == 0) && (width >= DA));
if(align)
{
assert(Aligned(y) && Aligned(yStride) && Aligned(u) && Aligned(uStride));
assert(Aligned(v) && Aligned(vStride) && Aligned(bgra) && Aligned(bgraStride));
}
size_t alignedWidth = AlignLo(width, DA);
const size_t A8 = A*8;
for(size_t row = 0; row < height; ++row)
{
for(size_t colUV = 0, colY = 0, colBgra = 0; colY < alignedWidth; colY += DA, colUV += A, colBgra += A8)
BgraToYuv422p<align>(bgra + colBgra, y + colY, u + colUV, v + colUV);
if(width != alignedWidth)
{
size_t offset = width - DA;
BgraToYuv422p<false>(bgra + offset*4, y + offset, u + offset/2, v + offset/2);
}
y += yStride;
u += uStride;
v += vStride;
bgra += bgraStride;
}
}