本文整理汇总了C++中BYTW函数的典型用法代码示例。如果您正苦于以下问题:C++ BYTW函数的具体用法?C++ BYTW怎么用?C++ BYTW使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了BYTW函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: t1bv_4
static void t1bv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
{
INT m;
R *x;
x = ii;
for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(4, rs)) {
V T1, T8, T3, T6, T7, T2, T5;
T1 = LD(&(x[0]), ms, &(x[0]));
T7 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
T8 = BYTW(&(W[TWVL * 4]), T7);
T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
T3 = BYTW(&(W[TWVL * 2]), T2);
T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
T6 = BYTW(&(W[0]), T5);
{
V T4, T9, Ta, Tb;
T4 = VSUB(T1, T3);
T9 = VBYI(VSUB(T6, T8));
ST(&(x[WS(rs, 3)]), VSUB(T4, T9), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 1)]), VADD(T4, T9), ms, &(x[WS(rs, 1)]));
Ta = VADD(T1, T3);
Tb = VADD(T6, T8);
ST(&(x[WS(rs, 2)]), VSUB(Ta, Tb), ms, &(x[0]));
ST(&(x[0]), VADD(Ta, Tb), ms, &(x[0]));
}
}
}
VLEAVE();
}
示例2: t1buv_3
static void t1buv_3(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
INT m;
R *x;
x = ii;
for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(rs)) {
V T1, T2, T4;
T1 = LD(&(x[0]), ms, &(x[0]));
T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
T4 = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
{
V T3, T5, T8, T6, T7;
T3 = BYTW(&(W[0]), T2);
T5 = BYTW(&(W[TWVL * 2]), T4);
T8 = VMUL(LDK(KP866025403), VSUB(T3, T5));
T6 = VADD(T3, T5);
T7 = VFNMS(LDK(KP500000000), T6, T1);
ST(&(x[0]), VADD(T1, T6), ms, &(x[0]));
ST(&(x[WS(rs, 2)]), VFNMSI(T8, T7), ms, &(x[0]));
ST(&(x[WS(rs, 1)]), VFMAI(T8, T7), ms, &(x[WS(rs, 1)]));
}
}
}
示例3: t1bv_6
static void t1bv_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
{
INT m;
R *x;
x = ii;
for (m = mb, W = W + (mb * ((TWVL / VL) * 10)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(6, rs)) {
V T1, T2, Ta, Tc, T5, T7;
T1 = LD(&(x[0]), ms, &(x[0]));
T2 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
Tc = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
{
V T3, Tb, Td, T6, T8;
T3 = BYTW(&(W[TWVL * 4]), T2);
Tb = BYTW(&(W[TWVL * 6]), Ta);
Td = BYTW(&(W[0]), Tc);
T6 = BYTW(&(W[TWVL * 2]), T5);
T8 = BYTW(&(W[TWVL * 8]), T7);
{
V Ti, T4, Tk, Te, Tj, T9;
Ti = VADD(T1, T3);
T4 = VSUB(T1, T3);
Tk = VADD(Tb, Td);
Te = VSUB(Tb, Td);
Tj = VADD(T6, T8);
T9 = VSUB(T6, T8);
{
V Tl, Tn, Tf, Th, Tm, Tg;
Tl = VADD(Tj, Tk);
Tn = VMUL(LDK(KP866025403), VSUB(Tj, Tk));
Tf = VADD(T9, Te);
Th = VMUL(LDK(KP866025403), VSUB(T9, Te));
ST(&(x[0]), VADD(Ti, Tl), ms, &(x[0]));
Tm = VFNMS(LDK(KP500000000), Tl, Ti);
ST(&(x[WS(rs, 3)]), VADD(T4, Tf), ms, &(x[WS(rs, 1)]));
Tg = VFNMS(LDK(KP500000000), Tf, T4);
ST(&(x[WS(rs, 4)]), VFMAI(Tn, Tm), ms, &(x[0]));
ST(&(x[WS(rs, 2)]), VFNMSI(Tn, Tm), ms, &(x[0]));
ST(&(x[WS(rs, 5)]), VFNMSI(Th, Tg), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 1)]), VFMAI(Th, Tg), ms, &(x[WS(rs, 1)]));
}
}
}
}
}
VLEAVE();
}
示例4: DVK
static const R *t1bv_6(R *ri, R *ii, const R *W, stride ios, int m, int dist)
{
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
int i;
R *x;
x = ii;
BEGIN_SIMD();
for (i = m; i > 0; i = i - VL, x = x + (VL * dist), W = W + (TWVL * 10)) {
V Tf, Ti, Ta, Tk, T5, Tj, Tc, Te, Td;
Tc = LD(&(x[0]), dist, &(x[0]));
Td = LD(&(x[WS(ios, 3)]), dist, &(x[WS(ios, 1)]));
Te = BYTW(&(W[TWVL * 4]), Td);
Tf = VSUB(Tc, Te);
Ti = VADD(Tc, Te);
{
V T7, T9, T6, T8;
T6 = LD(&(x[WS(ios, 4)]), dist, &(x[0]));
T7 = BYTW(&(W[TWVL * 6]), T6);
T8 = LD(&(x[WS(ios, 1)]), dist, &(x[WS(ios, 1)]));
T9 = BYTW(&(W[0]), T8);
Ta = VSUB(T7, T9);
Tk = VADD(T7, T9);
}
{
V T2, T4, T1, T3;
T1 = LD(&(x[WS(ios, 2)]), dist, &(x[0]));
T2 = BYTW(&(W[TWVL * 2]), T1);
T3 = LD(&(x[WS(ios, 5)]), dist, &(x[WS(ios, 1)]));
T4 = BYTW(&(W[TWVL * 8]), T3);
T5 = VSUB(T2, T4);
Tj = VADD(T2, T4);
}
{
V Tb, Tg, Th, Tn, Tl, Tm;
Tb = VBYI(VMUL(LDK(KP866025403), VSUB(T5, Ta)));
Tg = VADD(T5, Ta);
Th = VFNMS(LDK(KP500000000), Tg, Tf);
ST(&(x[WS(ios, 1)]), VADD(Tb, Th), dist, &(x[WS(ios, 1)]));
ST(&(x[WS(ios, 3)]), VADD(Tf, Tg), dist, &(x[WS(ios, 1)]));
ST(&(x[WS(ios, 5)]), VSUB(Th, Tb), dist, &(x[WS(ios, 1)]));
Tn = VBYI(VMUL(LDK(KP866025403), VSUB(Tj, Tk)));
Tl = VADD(Tj, Tk);
Tm = VFNMS(LDK(KP500000000), Tl, Ti);
ST(&(x[WS(ios, 2)]), VSUB(Tm, Tn), dist, &(x[0]));
ST(&(x[0]), VADD(Ti, Tl), dist, &(x[0]));
ST(&(x[WS(ios, 4)]), VADD(Tn, Tm), dist, &(x[0]));
}
}
END_SIMD();
return W;
}
示例5: t1buv_5
static void t1buv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
{
INT m;
R *x;
x = ii;
for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(5, rs)) {
V T1, T2, T9, T4, T7;
T1 = LD(&(x[0]), ms, &(x[0]));
T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
T9 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
T4 = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
T7 = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
{
V T3, Ta, T5, T8;
T3 = BYTW(&(W[0]), T2);
Ta = BYTW(&(W[TWVL * 4]), T9);
T5 = BYTW(&(W[TWVL * 6]), T4);
T8 = BYTW(&(W[TWVL * 2]), T7);
{
V T6, Tg, Tb, Th;
T6 = VADD(T3, T5);
Tg = VSUB(T3, T5);
Tb = VADD(T8, Ta);
Th = VSUB(T8, Ta);
{
V Te, Tc, Tk, Ti, Td, Tj, Tf;
Te = VSUB(T6, Tb);
Tc = VADD(T6, Tb);
Tk = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tg, Th));
Ti = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Th, Tg));
Td = VFNMS(LDK(KP250000000), Tc, T1);
ST(&(x[0]), VADD(T1, Tc), ms, &(x[0]));
Tj = VFNMS(LDK(KP559016994), Te, Td);
Tf = VFMA(LDK(KP559016994), Te, Td);
ST(&(x[WS(rs, 2)]), VFNMSI(Tk, Tj), ms, &(x[0]));
ST(&(x[WS(rs, 3)]), VFMAI(Tk, Tj), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 4)]), VFNMSI(Ti, Tf), ms, &(x[0]));
ST(&(x[WS(rs, 1)]), VFMAI(Ti, Tf), ms, &(x[WS(rs, 1)]));
}
}
}
}
}
VLEAVE();
}
示例6: t1buv_6
static void t1buv_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
INT m;
R *x;
x = ii;
for (m = mb, W = W + (mb * ((TWVL / VL) * 10)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(rs)) {
V Tf, Ti, Ta, Tk, T5, Tj, Tc, Te, Td;
Tc = LD(&(x[0]), ms, &(x[0]));
Td = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Te = BYTW(&(W[TWVL * 4]), Td);
Tf = VSUB(Tc, Te);
Ti = VADD(Tc, Te);
{
V T7, T9, T6, T8;
T6 = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
T7 = BYTW(&(W[TWVL * 6]), T6);
T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
T9 = BYTW(&(W[0]), T8);
Ta = VSUB(T7, T9);
Tk = VADD(T7, T9);
}
{
V T2, T4, T1, T3;
T1 = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
T2 = BYTW(&(W[TWVL * 2]), T1);
T3 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
T4 = BYTW(&(W[TWVL * 8]), T3);
T5 = VSUB(T2, T4);
Tj = VADD(T2, T4);
}
{
V Tb, Tg, Th, Tn, Tl, Tm;
Tb = VBYI(VMUL(LDK(KP866025403), VSUB(T5, Ta)));
Tg = VADD(T5, Ta);
Th = VFNMS(LDK(KP500000000), Tg, Tf);
ST(&(x[WS(rs, 1)]), VADD(Tb, Th), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 3)]), VADD(Tf, Tg), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 5)]), VSUB(Th, Tb), ms, &(x[WS(rs, 1)]));
Tn = VBYI(VMUL(LDK(KP866025403), VSUB(Tj, Tk)));
Tl = VADD(Tj, Tk);
Tm = VFNMS(LDK(KP500000000), Tl, Ti);
ST(&(x[WS(rs, 2)]), VSUB(Tm, Tn), ms, &(x[0]));
ST(&(x[0]), VADD(Ti, Tl), ms, &(x[0]));
ST(&(x[WS(rs, 4)]), VADD(Tn, Tm), ms, &(x[0]));
}
}
}
示例7: t1bv_7
static void t1bv_7(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP222520933, +0.222520933956314404288902564496794759466355569);
DVK(KP900968867, +0.900968867902419126236102319507445051165919162);
DVK(KP623489801, +0.623489801858733530525004884004239810632274731);
DVK(KP433883739, +0.433883739117558120475768332848358754609990728);
DVK(KP781831482, +0.781831482468029808708444526674057750232334519);
DVK(KP974927912, +0.974927912181823607018131682993931217232785801);
INT m;
R *x;
x = ii;
for (m = mb, W = W + (mb * ((TWVL / VL) * 12)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 12), MAKE_VOLATILE_STRIDE(rs)) {
V Th, Tf, Ti, T5, Tk, Ta, Tj, To, Tp;
Th = LD(&(x[0]), ms, &(x[0]));
{
V Tc, Te, Tb, Td;
Tb = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Tc = BYTW(&(W[TWVL * 2]), Tb);
Td = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
Te = BYTW(&(W[TWVL * 8]), Td);
Tf = VSUB(Tc, Te);
Ti = VADD(Tc, Te);
}
{
V T2, T4, T1, T3;
T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
T2 = BYTW(&(W[0]), T1);
T3 = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
T4 = BYTW(&(W[TWVL * 10]), T3);
T5 = VSUB(T2, T4);
Tk = VADD(T2, T4);
}
{
V T7, T9, T6, T8;
T6 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
T7 = BYTW(&(W[TWVL * 4]), T6);
T8 = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
T9 = BYTW(&(W[TWVL * 6]), T8);
Ta = VSUB(T7, T9);
Tj = VADD(T7, T9);
}
ST(&(x[0]), VADD(Th, VADD(Tk, VADD(Ti, Tj))), ms, &(x[0]));
To = VBYI(VFNMS(LDK(KP781831482), Ta, VFNMS(LDK(KP433883739), Tf, VMUL(LDK(KP974927912), T5))));
Tp = VFMA(LDK(KP623489801), Tj, VFNMS(LDK(KP900968867), Ti, VFNMS(LDK(KP222520933), Tk, Th)));
ST(&(x[WS(rs, 2)]), VADD(To, Tp), ms, &(x[0]));
ST(&(x[WS(rs, 5)]), VSUB(Tp, To), ms, &(x[WS(rs, 1)]));
{
V Tg, Tl, Tm, Tn;
Tg = VBYI(VFMA(LDK(KP433883739), T5, VFNMS(LDK(KP781831482), Tf, VMUL(LDK(KP974927912), Ta))));
Tl = VFMA(LDK(KP623489801), Ti, VFNMS(LDK(KP222520933), Tj, VFNMS(LDK(KP900968867), Tk, Th)));
ST(&(x[WS(rs, 3)]), VADD(Tg, Tl), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 4)]), VSUB(Tl, Tg), ms, &(x[0]));
Tm = VBYI(VFMA(LDK(KP781831482), T5, VFMA(LDK(KP974927912), Tf, VMUL(LDK(KP433883739), Ta))));
Tn = VFMA(LDK(KP623489801), Tk, VFNMS(LDK(KP900968867), Tj, VFNMS(LDK(KP222520933), Ti, Th)));
ST(&(x[WS(rs, 1)]), VADD(Tm, Tn), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 6)]), VSUB(Tn, Tm), ms, &(x[0]));
}
}
}
示例8: q1bv_2
static void q1bv_2(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
{
{
INT m;
R *x;
x = ii;
for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(4, rs), MAKE_VOLATILE_STRIDE(4, vs)) {
V T1, T2, T3, T4, T5, T6;
T1 = LD(&(x[0]), ms, &(x[0]));
T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
T3 = BYTW(&(W[0]), VSUB(T1, T2));
T4 = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)]));
T5 = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)]));
T6 = BYTW(&(W[0]), VSUB(T4, T5));
ST(&(x[WS(vs, 1)]), T3, ms, &(x[WS(vs, 1)]));
ST(&(x[WS(vs, 1) + WS(rs, 1)]), T6, ms, &(x[WS(vs, 1) + WS(rs, 1)]));
ST(&(x[0]), VADD(T1, T2), ms, &(x[0]));
ST(&(x[WS(rs, 1)]), VADD(T4, T5), ms, &(x[WS(rs, 1)]));
}
}
VLEAVE();
}
示例9: t1bv_2
static void t1bv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
INT m;
R *x;
x = ii;
for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(rs)) {
V T1, T3, T2;
T1 = LD(&(x[0]), ms, &(x[0]));
T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
T3 = BYTW(&(W[0]), T2);
ST(&(x[WS(rs, 1)]), VSUB(T1, T3), ms, &(x[WS(rs, 1)]));
ST(&(x[0]), VADD(T1, T3), ms, &(x[0]));
}
}
示例10: LD
static const R *t2bv_2(R *ri, R *ii, const R *W, stride ios, INT m, INT dist)
{
INT i;
R *x;
x = ii;
for (i = m; i > 0; i = i - VL, x = x + (VL * dist), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(ios)) {
V T1, T3, T2;
T1 = LD(&(x[0]), dist, &(x[0]));
T2 = LD(&(x[WS(ios, 1)]), dist, &(x[WS(ios, 1)]));
T3 = BYTW(&(W[0]), T2);
ST(&(x[WS(ios, 1)]), VSUB(T1, T3), dist, &(x[WS(ios, 1)]));
ST(&(x[0]), VADD(T1, T3), dist, &(x[0]));
}
return W;
}
示例11: t1buv_8
static void t1buv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP707106781, +0.707106781186547524400844362104849039284835938);
INT m;
R *x;
x = ii;
for (m = mb, W = W + (mb * ((TWVL / VL) * 14)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(rs)) {
V Tl, Tq, Tg, Tr, T5, Tt, Ta, Tu, Ti, Tk, Tj;
Ti = LD(&(x[0]), ms, &(x[0]));
Tj = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
Tk = BYTW(&(W[TWVL * 6]), Tj);
Tl = VSUB(Ti, Tk);
Tq = VADD(Ti, Tk);
{
V Td, Tf, Tc, Te;
Tc = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Td = BYTW(&(W[TWVL * 2]), Tc);
Te = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
Tf = BYTW(&(W[TWVL * 10]), Te);
Tg = VSUB(Td, Tf);
Tr = VADD(Td, Tf);
}
{
V T2, T4, T1, T3;
T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
T2 = BYTW(&(W[0]), T1);
T3 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
T4 = BYTW(&(W[TWVL * 8]), T3);
T5 = VSUB(T2, T4);
Tt = VADD(T2, T4);
}
{
V T7, T9, T6, T8;
T6 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
T7 = BYTW(&(W[TWVL * 12]), T6);
T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
T9 = BYTW(&(W[TWVL * 4]), T8);
Ta = VSUB(T7, T9);
Tu = VADD(T7, T9);
}
{
V Ts, Tv, Tw, Tx;
Ts = VSUB(Tq, Tr);
Tv = VBYI(VSUB(Tt, Tu));
ST(&(x[WS(rs, 6)]), VSUB(Ts, Tv), ms, &(x[0]));
ST(&(x[WS(rs, 2)]), VADD(Ts, Tv), ms, &(x[0]));
Tw = VADD(Tq, Tr);
Tx = VADD(Tt, Tu);
ST(&(x[WS(rs, 4)]), VSUB(Tw, Tx), ms, &(x[0]));
ST(&(x[0]), VADD(Tw, Tx), ms, &(x[0]));
{
V Th, To, Tn, Tp, Tb, Tm;
Tb = VMUL(LDK(KP707106781), VSUB(T5, Ta));
Th = VBYI(VSUB(Tb, Tg));
To = VBYI(VADD(Tg, Tb));
Tm = VMUL(LDK(KP707106781), VADD(T5, Ta));
Tn = VSUB(Tl, Tm);
Tp = VADD(Tl, Tm);
ST(&(x[WS(rs, 3)]), VADD(Th, Tn), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 7)]), VSUB(Tp, To), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 5)]), VSUB(Tn, Th), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 1)]), VADD(To, Tp), ms, &(x[WS(rs, 1)]));
}
}
}
}
示例12: DVK
static const R *t1bv_9(R *ri, R *ii, const R *W, stride ios, INT m, INT dist)
{
DVK(KP939692620, +0.939692620785908384054109277324731469936208134);
DVK(KP907603734, +0.907603734547952313649323976213898122064543220);
DVK(KP666666666, +0.666666666666666666666666666666666666666666667);
DVK(KP852868531, +0.852868531952443209628250963940074071936020296);
DVK(KP879385241, +0.879385241571816768108218554649462939872416269);
DVK(KP984807753, +0.984807753012208059366743024589523013670643252);
DVK(KP826351822, +0.826351822333069651148283373230685203999624323);
DVK(KP347296355, +0.347296355333860697703433253538629592000751354);
DVK(KP898197570, +0.898197570222573798468955502359086394667167570);
DVK(KP673648177, +0.673648177666930348851716626769314796000375677);
DVK(KP420276625, +0.420276625461206169731530603237061658838781920);
DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
DVK(KP586256827, +0.586256827714544512072145703099641959914944179);
DVK(KP968908795, +0.968908795874236621082202410917456709164223497);
DVK(KP726681596, +0.726681596905677465811651808188092531873167623);
DVK(KP439692620, +0.439692620785908384054109277324731469936208134);
DVK(KP203604859, +0.203604859554852403062088995281827210665664861);
DVK(KP152703644, +0.152703644666139302296566746461370407999248646);
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
INT i;
R *x;
x = ii;
for (i = m; i > 0; i = i - VL, x = x + (VL * dist), W = W + (TWVL * 16), MAKE_VOLATILE_STRIDE(ios)) {
V T1, T3, T5, T9, Tn, Tb, Td, Th, Tj, Tx, T6;
T1 = LD(&(x[0]), dist, &(x[0]));
{
V T2, T4, T8, Tm;
T2 = LD(&(x[WS(ios, 3)]), dist, &(x[WS(ios, 1)]));
T4 = LD(&(x[WS(ios, 6)]), dist, &(x[0]));
T8 = LD(&(x[WS(ios, 2)]), dist, &(x[0]));
Tm = LD(&(x[WS(ios, 1)]), dist, &(x[WS(ios, 1)]));
{
V Ta, Tc, Tg, Ti;
Ta = LD(&(x[WS(ios, 5)]), dist, &(x[WS(ios, 1)]));
Tc = LD(&(x[WS(ios, 8)]), dist, &(x[0]));
Tg = LD(&(x[WS(ios, 4)]), dist, &(x[0]));
Ti = LD(&(x[WS(ios, 7)]), dist, &(x[WS(ios, 1)]));
T3 = BYTW(&(W[TWVL * 4]), T2);
T5 = BYTW(&(W[TWVL * 10]), T4);
T9 = BYTW(&(W[TWVL * 2]), T8);
Tn = BYTW(&(W[0]), Tm);
Tb = BYTW(&(W[TWVL * 8]), Ta);
Td = BYTW(&(W[TWVL * 14]), Tc);
Th = BYTW(&(W[TWVL * 6]), Tg);
Tj = BYTW(&(W[TWVL * 12]), Ti);
}
}
Tx = VSUB(T3, T5);
T6 = VADD(T3, T5);
{
V Tl, Te, Tk, To, T7, TN;
Tl = VSUB(Td, Tb);
Te = VADD(Tb, Td);
Tk = VSUB(Th, Tj);
To = VADD(Th, Tj);
T7 = VFNMS(LDK(KP500000000), T6, T1);
TN = VADD(T1, T6);
{
V Tf, TP, Tp, TO;
Tf = VFNMS(LDK(KP500000000), Te, T9);
TP = VADD(T9, Te);
Tp = VFNMS(LDK(KP500000000), To, Tn);
TO = VADD(Tn, To);
{
V Tz, TC, Tu, TD, TA, Tq, TQ, TS;
Tz = VFNMS(LDK(KP152703644), Tl, Tf);
TC = VFMA(LDK(KP203604859), Tf, Tl);
Tu = VFNMS(LDK(KP439692620), Tk, Tf);
TD = VFNMS(LDK(KP726681596), Tk, Tp);
TA = VFMA(LDK(KP968908795), Tp, Tk);
Tq = VFNMS(LDK(KP586256827), Tp, Tl);
TQ = VADD(TO, TP);
TS = VMUL(LDK(KP866025403), VSUB(TO, TP));
{
V TI, TB, TH, TE, Tr, TR, Tw, Tv;
Tv = VFNMS(LDK(KP420276625), Tu, Tl);
TI = VFMA(LDK(KP673648177), TA, Tz);
TB = VFNMS(LDK(KP673648177), TA, Tz);
TH = VFNMS(LDK(KP898197570), TD, TC);
TE = VFMA(LDK(KP898197570), TD, TC);
Tr = VFNMS(LDK(KP347296355), Tq, Tk);
ST(&(x[0]), VADD(TQ, TN), dist, &(x[0]));
TR = VFNMS(LDK(KP500000000), TQ, TN);
Tw = VFNMS(LDK(KP826351822), Tv, Tp);
{
V TM, TL, TF, TJ, Ts, Ty, TG, TK, Tt;
TM = VMUL(LDK(KP984807753), VFMA(LDK(KP879385241), Tx, TI));
TL = VFMA(LDK(KP852868531), TE, T7);
TF = VFNMS(LDK(KP500000000), TE, TB);
TJ = VFMA(LDK(KP666666666), TI, TH);
Ts = VFNMS(LDK(KP907603734), Tr, Tf);
ST(&(x[WS(ios, 6)]), VFNMSI(TS, TR), dist, &(x[0]));
ST(&(x[WS(ios, 3)]), VFMAI(TS, TR), dist, &(x[WS(ios, 1)]));
Ty = VMUL(LDK(KP984807753), VFNMS(LDK(KP879385241), Tx, Tw));
ST(&(x[WS(ios, 8)]), VFNMSI(TM, TL), dist, &(x[0]));
ST(&(x[WS(ios, 1)]), VFMAI(TM, TL), dist, &(x[WS(ios, 1)]));
TG = VFMA(LDK(KP852868531), TF, T7);
TK = VMUL(LDK(KP866025403), VFNMS(LDK(KP852868531), TJ, Tx));
//.........这里部分代码省略.........
示例13: t1bv_15
static void t1bv_15(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP216506350, +0.216506350946109661690930792688234045867850657);
DVK(KP484122918, +0.484122918275927110647408174972799951354115213);
DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
DVK(KP509036960, +0.509036960455127183450980863393907648510733164);
DVK(KP823639103, +0.823639103546331925877420039278190003029660514);
DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
{
INT m;
R *x;
x = ii;
for (m = mb, W = W + (mb * ((TWVL / VL) * 28)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 28), MAKE_VOLATILE_STRIDE(rs)) {
V Ts, TV, T1f, TZ, T10, Tb, Tm, Tt, T1j, T1k, T1l, TI, TM, TR, Tz;
V TD, TQ, T1g, T1h, T1i;
{
V TT, Tr, Tp, Tq, To, TU;
TT = LD(&(x[0]), ms, &(x[0]));
Tq = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
Tr = BYTW(&(W[TWVL * 18]), Tq);
To = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
Tp = BYTW(&(W[TWVL * 8]), To);
Ts = VSUB(Tp, Tr);
TU = VADD(Tp, Tr);
TV = VFNMS(LDK(KP500000000), TU, TT);
T1f = VADD(TT, TU);
}
{
V Tx, TG, TK, TB, T5, Ty, Tg, TH, Tl, TL, Ta, TC;
{
V Tw, TF, TJ, TA;
Tw = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Tx = BYTW(&(W[TWVL * 4]), Tw);
TF = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
TG = BYTW(&(W[TWVL * 10]), TF);
TJ = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
TK = BYTW(&(W[TWVL * 16]), TJ);
TA = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
TB = BYTW(&(W[TWVL * 22]), TA);
}
{
V T2, T4, T1, T3;
T1 = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
T2 = BYTW(&(W[TWVL * 14]), T1);
T3 = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
T4 = BYTW(&(W[TWVL * 24]), T3);
T5 = VSUB(T2, T4);
Ty = VADD(T2, T4);
}
{
V Td, Tf, Tc, Te;
Tc = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
Td = BYTW(&(W[TWVL * 20]), Tc);
Te = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
Tf = BYTW(&(W[0]), Te);
Tg = VSUB(Td, Tf);
TH = VADD(Td, Tf);
}
{
V Ti, Tk, Th, Tj;
Th = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
Ti = BYTW(&(W[TWVL * 26]), Th);
Tj = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
Tk = BYTW(&(W[TWVL * 6]), Tj);
Tl = VSUB(Ti, Tk);
TL = VADD(Ti, Tk);
}
{
V T7, T9, T6, T8;
T6 = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
T7 = BYTW(&(W[TWVL * 2]), T6);
T8 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
T9 = BYTW(&(W[TWVL * 12]), T8);
Ta = VSUB(T7, T9);
TC = VADD(T7, T9);
}
TZ = VSUB(T5, Ta);
T10 = VSUB(Tg, Tl);
Tb = VADD(T5, Ta);
Tm = VADD(Tg, Tl);
Tt = VADD(Tb, Tm);
T1j = VADD(TG, TH);
T1k = VADD(TK, TL);
T1l = VADD(T1j, T1k);
TI = VFNMS(LDK(KP500000000), TH, TG);
TM = VFNMS(LDK(KP500000000), TL, TK);
TR = VADD(TI, TM);
Tz = VFNMS(LDK(KP500000000), Ty, Tx);
TD = VFNMS(LDK(KP500000000), TC, TB);
TQ = VADD(Tz, TD);
T1g = VADD(Tx, Ty);
T1h = VADD(TB, TC);
T1i = VADD(T1g, T1h);
}
{
V T1o, T1m, T1n, T1s, T1t, T1q, T1r, T1u, T1p;
//.........这里部分代码省略.........
示例14: t1bv_16
static void t1bv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP382683432, +0.382683432365089771728459984030398866761344562);
DVK(KP923879532, +0.923879532511286756128183189396788286822416626);
DVK(KP707106781, +0.707106781186547524400844362104849039284835938);
{
INT m;
R *x;
x = ii;
for (m = mb, W = W + (mb * ((TWVL / VL) * 30)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 30), MAKE_VOLATILE_STRIDE(rs)) {
V TJ, T1b, TD, T1c, T17, T18, Ty, TK, T10, T11, T12, Tb, TM, T13, T14;
V T15, Tm, TN, TG, TI, TH;
TG = LD(&(x[0]), ms, &(x[0]));
TH = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
TI = BYTW(&(W[TWVL * 14]), TH);
TJ = VSUB(TG, TI);
T1b = VADD(TG, TI);
{
V TA, TC, Tz, TB;
Tz = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
TA = BYTW(&(W[TWVL * 6]), Tz);
TB = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
TC = BYTW(&(W[TWVL * 22]), TB);
TD = VSUB(TA, TC);
T1c = VADD(TA, TC);
}
{
V Tp, Tw, Tr, Tu, Ts, Tx;
{
V To, Tv, Tq, Tt;
To = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Tp = BYTW(&(W[TWVL * 2]), To);
Tv = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
Tw = BYTW(&(W[TWVL * 10]), Tv);
Tq = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
Tr = BYTW(&(W[TWVL * 18]), Tq);
Tt = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
Tu = BYTW(&(W[TWVL * 26]), Tt);
}
T17 = VADD(Tp, Tr);
T18 = VADD(Tu, Tw);
Ts = VSUB(Tp, Tr);
Tx = VSUB(Tu, Tw);
Ty = VMUL(LDK(KP707106781), VSUB(Ts, Tx));
TK = VMUL(LDK(KP707106781), VADD(Ts, Tx));
}
{
V T2, T9, T4, T7, T5, Ta;
{
V T1, T8, T3, T6;
T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
T2 = BYTW(&(W[0]), T1);
T8 = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
T9 = BYTW(&(W[TWVL * 24]), T8);
T3 = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
T4 = BYTW(&(W[TWVL * 16]), T3);
T6 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
T7 = BYTW(&(W[TWVL * 8]), T6);
}
T10 = VADD(T2, T4);
T11 = VADD(T7, T9);
T12 = VSUB(T10, T11);
T5 = VSUB(T2, T4);
Ta = VSUB(T7, T9);
Tb = VFNMS(LDK(KP382683432), Ta, VMUL(LDK(KP923879532), T5));
TM = VFMA(LDK(KP382683432), T5, VMUL(LDK(KP923879532), Ta));
}
{
V Td, Tk, Tf, Ti, Tg, Tl;
{
V Tc, Tj, Te, Th;
Tc = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
Td = BYTW(&(W[TWVL * 28]), Tc);
Tj = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
Tk = BYTW(&(W[TWVL * 20]), Tj);
Te = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
Tf = BYTW(&(W[TWVL * 12]), Te);
Th = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Ti = BYTW(&(W[TWVL * 4]), Th);
}
T13 = VADD(Td, Tf);
T14 = VADD(Ti, Tk);
T15 = VSUB(T13, T14);
Tg = VSUB(Td, Tf);
Tl = VSUB(Ti, Tk);
Tm = VFMA(LDK(KP923879532), Tg, VMUL(LDK(KP382683432), Tl));
TN = VFNMS(LDK(KP382683432), Tg, VMUL(LDK(KP923879532), Tl));
}
{
V T1a, T1g, T1f, T1h;
{
V T16, T19, T1d, T1e;
T16 = VMUL(LDK(KP707106781), VSUB(T12, T15));
T19 = VSUB(T17, T18);
T1a = VBYI(VSUB(T16, T19));
T1g = VBYI(VADD(T19, T16));
T1d = VSUB(T1b, T1c);
T1e = VMUL(LDK(KP707106781), VADD(T12, T15));
T1f = VSUB(T1d, T1e);
T1h = VADD(T1d, T1e);
//.........这里部分代码省略.........
示例15: t1bv_32
static void t1bv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP195090322, +0.195090322016128267848284868477022240927691618);
DVK(KP980785280, +0.980785280403230449126182236134239036973933731);
DVK(KP555570233, +0.555570233019602224742830813948532874374937191);
DVK(KP831469612, +0.831469612302545237078788377617905756738560812);
DVK(KP382683432, +0.382683432365089771728459984030398866761344562);
DVK(KP923879532, +0.923879532511286756128183189396788286822416626);
DVK(KP707106781, +0.707106781186547524400844362104849039284835938);
INT m;
R *x;
x = ii;
for (m = mb, W = W + (mb * ((TWVL / VL) * 62)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 62), MAKE_VOLATILE_STRIDE(rs)) {
V T4, T1D, T2P, T3h, Tf, T1y, T2K, T3i, TC, T1w, T2G, T3e, Tr, T1v, T2D;
V T3d, T1k, T20, T2y, T3a, T1r, T21, T2v, T39, TV, T1X, T2r, T37, T12, T1Y;
V T2o, T36;
{
V T1, T1C, T3, T1A, T1B, T2, T1z, T2N, T2O;
T1 = LD(&(x[0]), ms, &(x[0]));
T1B = LD(&(x[WS(rs, 24)]), ms, &(x[0]));
T1C = BYTW(&(W[TWVL * 46]), T1B);
T2 = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
T3 = BYTW(&(W[TWVL * 30]), T2);
T1z = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
T1A = BYTW(&(W[TWVL * 14]), T1z);
T4 = VSUB(T1, T3);
T1D = VSUB(T1A, T1C);
T2N = VADD(T1, T3);
T2O = VADD(T1A, T1C);
T2P = VSUB(T2N, T2O);
T3h = VADD(T2N, T2O);
}
{
V T6, Td, T8, Tb;
{
V T5, Tc, T7, Ta;
T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
T6 = BYTW(&(W[TWVL * 6]), T5);
Tc = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
Td = BYTW(&(W[TWVL * 22]), Tc);
T7 = LD(&(x[WS(rs, 20)]), ms, &(x[0]));
T8 = BYTW(&(W[TWVL * 38]), T7);
Ta = LD(&(x[WS(rs, 28)]), ms, &(x[0]));
Tb = BYTW(&(W[TWVL * 54]), Ta);
}
{
V T9, Te, T2I, T2J;
T9 = VSUB(T6, T8);
Te = VSUB(Tb, Td);
Tf = VMUL(LDK(KP707106781), VADD(T9, Te));
T1y = VMUL(LDK(KP707106781), VSUB(T9, Te));
T2I = VADD(T6, T8);
T2J = VADD(Tb, Td);
T2K = VSUB(T2I, T2J);
T3i = VADD(T2I, T2J);
}
}
{
V Tt, TA, Tv, Ty;
{
V Ts, Tz, Tu, Tx;
Ts = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
Tt = BYTW(&(W[TWVL * 10]), Ts);
Tz = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
TA = BYTW(&(W[TWVL * 26]), Tz);
Tu = LD(&(x[WS(rs, 22)]), ms, &(x[0]));
Tv = BYTW(&(W[TWVL * 42]), Tu);
Tx = LD(&(x[WS(rs, 30)]), ms, &(x[0]));
Ty = BYTW(&(W[TWVL * 58]), Tx);
}
{
V Tw, TB, T2E, T2F;
Tw = VSUB(Tt, Tv);
TB = VSUB(Ty, TA);
TC = VFNMS(LDK(KP382683432), TB, VMUL(LDK(KP923879532), Tw));
T1w = VFMA(LDK(KP923879532), TB, VMUL(LDK(KP382683432), Tw));
T2E = VADD(Ty, TA);
T2F = VADD(Tt, Tv);
T2G = VSUB(T2E, T2F);
T3e = VADD(T2E, T2F);
}
}
{
V Ti, Tp, Tk, Tn;
{
V Th, To, Tj, Tm;
Th = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Ti = BYTW(&(W[TWVL * 2]), Th);
To = LD(&(x[WS(rs, 26)]), ms, &(x[0]));
Tp = BYTW(&(W[TWVL * 50]), To);
Tj = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
Tk = BYTW(&(W[TWVL * 34]), Tj);
Tm = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
Tn = BYTW(&(W[TWVL * 18]), Tm);
}
{
V Tl, Tq, T2B, T2C;
Tl = VSUB(Ti, Tk);
Tq = VSUB(Tn, Tp);
Tr = VFMA(LDK(KP382683432), Tl, VMUL(LDK(KP923879532), Tq));
//.........这里部分代码省略.........