本文整理汇总了C++中LDW函数的典型用法代码示例。如果您正苦于以下问题:C++ LDW函数的具体用法?C++ LDW怎么用?C++ LDW使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了LDW函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: t3fv_4
static void t3fv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
INT m;
R *x;
x = ri;
for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(rs)) {
V T2, T3, T4;
T2 = LDW(&(W[0]));
T3 = LDW(&(W[TWVL * 2]));
T4 = VZMULJ(T2, T3);
{
V T1, Tb, T6, T9, Ta, T5, T8;
T1 = LD(&(x[0]), ms, &(x[0]));
Ta = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Tb = VZMULJ(T3, Ta);
T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
T6 = VZMULJ(T4, T5);
T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
T9 = VZMULJ(T2, T8);
{
V T7, Tc, Td, Te;
T7 = VSUB(T1, T6);
Tc = VBYI(VSUB(T9, Tb));
ST(&(x[WS(rs, 1)]), VSUB(T7, Tc), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 3)]), VADD(T7, Tc), ms, &(x[WS(rs, 1)]));
Td = VADD(T1, T6);
Te = VADD(T9, Tb);
ST(&(x[WS(rs, 2)]), VSUB(Td, Te), ms, &(x[0]));
ST(&(x[0]), VADD(Td, Te), ms, &(x[0]));
}
}
}
}
示例2: hc2cfdftv_4
static void hc2cfdftv_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
{
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 6)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(16, rs)) {
V T1, T2, Tb, T5, T6, T4, T9, T3, Tc, T7, Ta, Tg, T8, Td, Th;
V Tf, Te, Ti, Tj;
T1 = LD(&(Rp[0]), ms, &(Rp[0]));
T2 = LD(&(Rm[0]), -ms, &(Rm[0]));
Tb = LDW(&(W[0]));
T5 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)]));
T6 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)]));
T4 = LDW(&(W[TWVL * 2]));
T9 = LDW(&(W[TWVL * 4]));
T3 = VFMACONJ(T2, T1);
Tc = VZMULIJ(Tb, VFNMSCONJ(T2, T1));
T7 = VZMULJ(T4, VFMACONJ(T6, T5));
Ta = VZMULIJ(T9, VFNMSCONJ(T6, T5));
Tg = VADD(T3, T7);
T8 = VSUB(T3, T7);
Td = VSUB(Ta, Tc);
Th = VADD(Tc, Ta);
Tf = VCONJ(VMUL(LDK(KP500000000), VFMAI(Td, T8)));
Te = VMUL(LDK(KP500000000), VFNMSI(Td, T8));
Ti = VMUL(LDK(KP500000000), VSUB(Tg, Th));
Tj = VCONJ(VMUL(LDK(KP500000000), VADD(Th, Tg)));
ST(&(Rm[0]), Tf, -ms, &(Rm[0]));
ST(&(Rp[WS(rs, 1)]), Te, ms, &(Rp[WS(rs, 1)]));
ST(&(Rp[0]), Ti, ms, &(Rp[0]));
ST(&(Rm[WS(rs, 1)]), Tj, -ms, &(Rm[WS(rs, 1)]));
}
}
VLEAVE();
}
示例3: t1sv_2
static void t1sv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
{
INT m;
for (m = mb, W = W + (mb * 2); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 2), MAKE_VOLATILE_STRIDE(4, rs)) {
V T1, T8, T6, T7;
T1 = LD(&(ri[0]), ms, &(ri[0]));
T8 = LD(&(ii[0]), ms, &(ii[0]));
{
V T3, T5, T2, T4;
T3 = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)]));
T5 = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)]));
T2 = LDW(&(W[0]));
T4 = LDW(&(W[TWVL * 1]));
T6 = VFMA(T2, T3, VMUL(T4, T5));
T7 = VFNMS(T4, T3, VMUL(T2, T5));
}
ST(&(ri[WS(rs, 1)]), VSUB(T1, T6), ms, &(ri[WS(rs, 1)]));
ST(&(ii[WS(rs, 1)]), VSUB(T8, T7), ms, &(ii[WS(rs, 1)]));
ST(&(ri[0]), VADD(T1, T6), ms, &(ri[0]));
ST(&(ii[0]), VADD(T7, T8), ms, &(ii[0]));
}
}
VLEAVE();
}
示例4: hc2cfdftv_4
static void hc2cfdftv_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 6)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(rs)) {
V T4, Tc, T9, Te, T1, T3, T2, Tb, T6, T8, T7, T5, Td, Tg, Th;
V Ta, Tf, Tk, Tl, Ti, Tj;
T1 = LD(&(Rp[0]), ms, &(Rp[0]));
T2 = LD(&(Rm[0]), -ms, &(Rm[0]));
T3 = VCONJ(T2);
T4 = VADD(T1, T3);
Tb = LDW(&(W[0]));
Tc = VZMULIJ(Tb, VSUB(T3, T1));
T6 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)]));
T7 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)]));
T8 = VCONJ(T7);
T5 = LDW(&(W[TWVL * 2]));
T9 = VZMULJ(T5, VADD(T6, T8));
Td = LDW(&(W[TWVL * 4]));
Te = VZMULIJ(Td, VSUB(T8, T6));
Ta = VSUB(T4, T9);
Tf = VBYI(VSUB(Tc, Te));
Tg = VMUL(LDK(KP500000000), VSUB(Ta, Tf));
Th = VCONJ(VMUL(LDK(KP500000000), VADD(Ta, Tf)));
ST(&(Rp[WS(rs, 1)]), Tg, ms, &(Rp[WS(rs, 1)]));
ST(&(Rm[0]), Th, -ms, &(Rm[0]));
Ti = VADD(T4, T9);
Tj = VADD(Tc, Te);
Tk = VCONJ(VMUL(LDK(KP500000000), VSUB(Ti, Tj)));
Tl = VMUL(LDK(KP500000000), VADD(Ti, Tj));
ST(&(Rm[WS(rs, 1)]), Tk, -ms, &(Rm[WS(rs, 1)]));
ST(&(Rp[0]), Tl, ms, &(Rp[0]));
}
}
示例5: hc2cbdftv_4
static void hc2cbdftv_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
{
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 6)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(rs)) {
V T2, T3, T5, T6, Tf, T1, T9, Ta, T4, Tb, T7, Tc, Th, T8, Tg;
V Te, Td, Ti, Tj;
T2 = LD(&(Rp[0]), ms, &(Rp[0]));
T3 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)]));
T5 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)]));
T6 = LD(&(Rm[0]), -ms, &(Rm[0]));
Tf = LDW(&(W[0]));
T1 = LDW(&(W[TWVL * 4]));
T9 = LDW(&(W[TWVL * 2]));
Ta = VFMACONJ(T3, T2);
T4 = VFNMSCONJ(T3, T2);
Tb = VFMACONJ(T6, T5);
T7 = VFNMSCONJ(T6, T5);
Tc = VZMUL(T9, VSUB(Ta, Tb));
Th = VADD(Ta, Tb);
T8 = VZMULI(T1, VFNMSI(T7, T4));
Tg = VZMULI(Tf, VFMAI(T7, T4));
Te = VCONJ(VSUB(Tc, T8));
Td = VADD(T8, Tc);
Ti = VADD(Tg, Th);
Tj = VCONJ(VSUB(Th, Tg));
ST(&(Rm[WS(rs, 1)]), Te, -ms, &(Rm[WS(rs, 1)]));
ST(&(Rp[WS(rs, 1)]), Td, ms, &(Rp[WS(rs, 1)]));
ST(&(Rp[0]), Ti, ms, &(Rp[0]));
ST(&(Rm[0]), Tj, -ms, &(Rm[0]));
}
}
VLEAVE();
}
示例6: LD
static const R *t1sv_2(R *ri, R *ii, const R *W, stride ios, INT m, INT dist)
{
INT i;
for (i = m; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * dist), ii = ii + ((2 * VL) * dist), W = W + ((2 * VL) * 2), MAKE_VOLATILE_STRIDE(ios)) {
V T1, Ta, T3, T6, T2, T5;
T1 = LD(&(ri[0]), dist, &(ri[0]));
Ta = LD(&(ii[0]), dist, &(ii[0]));
T3 = LD(&(ri[WS(ios, 1)]), dist, &(ri[WS(ios, 1)]));
T6 = LD(&(ii[WS(ios, 1)]), dist, &(ii[WS(ios, 1)]));
T2 = LDW(&(W[0]));
T5 = LDW(&(W[TWVL * 1]));
{
V T8, T4, T9, T7;
T8 = VMUL(T2, T6);
T4 = VMUL(T2, T3);
T9 = VFNMS(T5, T3, T8);
T7 = VFMA(T5, T6, T4);
ST(&(ii[0]), VADD(T9, Ta), dist, &(ii[0]));
ST(&(ii[WS(ios, 1)]), VSUB(Ta, T9), dist, &(ii[WS(ios, 1)]));
ST(&(ri[0]), VADD(T1, T7), dist, &(ri[0]));
ST(&(ri[WS(ios, 1)]), VSUB(T1, T7), dist, &(ri[WS(ios, 1)]));
}
}
return W;
}
示例7: hc2cbdftv_4
static void hc2cbdftv_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 6)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(rs)) {
V T5, Tc, T9, Td, T2, T4, T3, T6, T8, T7, Tj, Ti, Th, Tk, Tl;
V Ta, Te, T1, Tb, Tf, Tg;
T2 = LD(&(Rp[0]), ms, &(Rp[0]));
T3 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)]));
T4 = VCONJ(T3);
T5 = VSUB(T2, T4);
Tc = VADD(T2, T4);
T6 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)]));
T7 = LD(&(Rm[0]), -ms, &(Rm[0]));
T8 = VCONJ(T7);
T9 = VBYI(VSUB(T6, T8));
Td = VADD(T6, T8);
Tj = VADD(Tc, Td);
Th = LDW(&(W[0]));
Ti = VZMULI(Th, VADD(T5, T9));
Tk = VADD(Ti, Tj);
ST(&(Rp[0]), Tk, ms, &(Rp[0]));
Tl = VCONJ(VSUB(Tj, Ti));
ST(&(Rm[0]), Tl, -ms, &(Rm[0]));
T1 = LDW(&(W[TWVL * 4]));
Ta = VZMULI(T1, VSUB(T5, T9));
Tb = LDW(&(W[TWVL * 2]));
Te = VZMUL(Tb, VSUB(Tc, Td));
Tf = VADD(Ta, Te);
ST(&(Rp[WS(rs, 1)]), Tf, ms, &(Rp[WS(rs, 1)]));
Tg = VCONJ(VSUB(Te, Ta));
ST(&(Rm[WS(rs, 1)]), Tg, -ms, &(Rm[WS(rs, 1)]));
}
}
示例8: hc2cfdftv_6
static void hc2cfdftv_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
{
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 10)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(24, rs)) {
V Ta, Tu, Tn, Tw, Ti, Tv, T1, T8, Tg, Tf, T7, T3, Te, T6, T2;
V T4, T9, T5, Tk, Tm, Tj, Tl, Tc, Th, Tb, Td, Tr, Tp, Tq, To;
V Tt, Ts, TA, Ty, Tz, Tx, TC, TB;
T1 = LD(&(Rp[0]), ms, &(Rp[0]));
T8 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)]));
Tg = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0]));
Te = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0]));
Tf = VCONJ(Te);
T6 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)]));
T7 = VCONJ(T6);
T2 = LD(&(Rm[0]), -ms, &(Rm[0]));
T3 = VCONJ(T2);
T4 = VADD(T1, T3);
T5 = LDW(&(W[TWVL * 4]));
T9 = VZMULIJ(T5, VSUB(T7, T8));
Ta = VADD(T4, T9);
Tu = VSUB(T4, T9);
Tj = LDW(&(W[0]));
Tk = VZMULIJ(Tj, VSUB(T3, T1));
Tl = LDW(&(W[TWVL * 6]));
Tm = VZMULJ(Tl, VADD(Tf, Tg));
Tn = VADD(Tk, Tm);
Tw = VSUB(Tm, Tk);
Tb = LDW(&(W[TWVL * 2]));
Tc = VZMULJ(Tb, VADD(T7, T8));
Td = LDW(&(W[TWVL * 8]));
Th = VZMULIJ(Td, VSUB(Tf, Tg));
Ti = VADD(Tc, Th);
Tv = VSUB(Tc, Th);
Tr = VMUL(LDK(KP500000000), VBYI(VMUL(LDK(KP866025403), VSUB(Tn, Ti))));
To = VADD(Ti, Tn);
Tp = VMUL(LDK(KP500000000), VADD(Ta, To));
Tq = VFNMS(LDK(KP250000000), To, VMUL(LDK(KP500000000), Ta));
ST(&(Rp[0]), Tp, ms, &(Rp[0]));
Tt = VCONJ(VADD(Tq, Tr));
ST(&(Rm[WS(rs, 1)]), Tt, -ms, &(Rm[WS(rs, 1)]));
Ts = VSUB(Tq, Tr);
ST(&(Rp[WS(rs, 2)]), Ts, ms, &(Rp[0]));
TA = VMUL(LDK(KP500000000), VBYI(VMUL(LDK(KP866025403), VSUB(Tw, Tv))));
Tx = VADD(Tv, Tw);
Ty = VCONJ(VMUL(LDK(KP500000000), VADD(Tu, Tx)));
Tz = VFNMS(LDK(KP250000000), Tx, VMUL(LDK(KP500000000), Tu));
ST(&(Rm[WS(rs, 2)]), Ty, -ms, &(Rm[0]));
TC = VADD(Tz, TA);
ST(&(Rp[WS(rs, 1)]), TC, ms, &(Rp[WS(rs, 1)]));
TB = VCONJ(VSUB(Tz, TA));
ST(&(Rm[0]), TB, -ms, &(Rm[0]));
}
}
VLEAVE();
}
示例9: t2sv_4
static void t2sv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
{
INT m;
for (m = mb, W = W + (mb * 4); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 4), MAKE_VOLATILE_STRIDE(8, rs)) {
V T2, T4, T3, T5, T6, T8;
T2 = LDW(&(W[0]));
T4 = LDW(&(W[TWVL * 1]));
T3 = LDW(&(W[TWVL * 2]));
T5 = LDW(&(W[TWVL * 3]));
T6 = VFMA(T2, T3, VMUL(T4, T5));
T8 = VFNMS(T4, T3, VMUL(T2, T5));
{
V T1, Tp, Ta, To, Te, Tk, Th, Tl, T7, T9;
T1 = LD(&(ri[0]), ms, &(ri[0]));
Tp = LD(&(ii[0]), ms, &(ii[0]));
T7 = LD(&(ri[WS(rs, 2)]), ms, &(ri[0]));
T9 = LD(&(ii[WS(rs, 2)]), ms, &(ii[0]));
Ta = VFMA(T6, T7, VMUL(T8, T9));
To = VFNMS(T8, T7, VMUL(T6, T9));
{
V Tc, Td, Tf, Tg;
Tc = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)]));
Td = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)]));
Te = VFMA(T2, Tc, VMUL(T4, Td));
Tk = VFNMS(T4, Tc, VMUL(T2, Td));
Tf = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)]));
Tg = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)]));
Th = VFMA(T3, Tf, VMUL(T5, Tg));
Tl = VFNMS(T5, Tf, VMUL(T3, Tg));
}
{
V Tb, Ti, Tn, Tq;
Tb = VADD(T1, Ta);
Ti = VADD(Te, Th);
ST(&(ri[WS(rs, 2)]), VSUB(Tb, Ti), ms, &(ri[0]));
ST(&(ri[0]), VADD(Tb, Ti), ms, &(ri[0]));
Tn = VADD(Tk, Tl);
Tq = VADD(To, Tp);
ST(&(ii[0]), VADD(Tn, Tq), ms, &(ii[0]));
ST(&(ii[WS(rs, 2)]), VSUB(Tq, Tn), ms, &(ii[0]));
}
{
V Tj, Tm, Tr, Ts;
Tj = VSUB(T1, Ta);
Tm = VSUB(Tk, Tl);
ST(&(ri[WS(rs, 3)]), VSUB(Tj, Tm), ms, &(ri[WS(rs, 1)]));
ST(&(ri[WS(rs, 1)]), VADD(Tj, Tm), ms, &(ri[WS(rs, 1)]));
Tr = VSUB(Tp, To);
Ts = VSUB(Te, Th);
ST(&(ii[WS(rs, 1)]), VSUB(Tr, Ts), ms, &(ii[WS(rs, 1)]));
ST(&(ii[WS(rs, 3)]), VADD(Ts, Tr), ms, &(ii[WS(rs, 1)]));
}
}
}
}
VLEAVE();
}
示例10: t3fv_5
static void t3fv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
{
INT m;
R *x;
x = ri;
for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(5, rs)) {
V T1, T4, T5, T9;
T1 = LDW(&(W[0]));
T4 = LDW(&(W[TWVL * 2]));
T5 = VZMUL(T1, T4);
T9 = VZMULJ(T1, T4);
{
V Tg, Tk, Tl, T8, Te, Th;
Tg = LD(&(x[0]), ms, &(x[0]));
{
V T3, Td, T7, Tb;
{
V T2, Tc, T6, Ta;
T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
T3 = VZMULJ(T1, T2);
Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Td = VZMULJ(T4, Tc);
T6 = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
T7 = VZMULJ(T5, T6);
Ta = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Tb = VZMULJ(T9, Ta);
}
Tk = VSUB(T3, T7);
Tl = VSUB(Tb, Td);
T8 = VADD(T3, T7);
Te = VADD(Tb, Td);
Th = VADD(T8, Te);
}
ST(&(x[0]), VADD(Tg, Th), ms, &(x[0]));
{
V Tm, Tn, Tj, To, Tf, Ti;
Tm = VBYI(VFMA(LDK(KP951056516), Tk, VMUL(LDK(KP587785252), Tl)));
Tn = VBYI(VFNMS(LDK(KP587785252), Tk, VMUL(LDK(KP951056516), Tl)));
Tf = VMUL(LDK(KP559016994), VSUB(T8, Te));
Ti = VFNMS(LDK(KP250000000), Th, Tg);
Tj = VADD(Tf, Ti);
To = VSUB(Ti, Tf);
ST(&(x[WS(rs, 1)]), VSUB(Tj, Tm), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 3)]), VSUB(To, Tn), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 4)]), VADD(Tm, Tj), ms, &(x[0]));
ST(&(x[WS(rs, 2)]), VADD(Tn, To), ms, &(x[0]));
}
}
}
}
VLEAVE();
}
示例11: hc2cbdftv_6
static void hc2cbdftv_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 10)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(rs)) {
V T5, Th, Te, Ts, Tk, Tm, T2, T4, T3, T6, Tc, T8, Tb, T7, Ta;
V T9, Td, Ti, Tj, TA, Tf, Tn, Tv, Tt, Tz, T1, Tl, Tg, Tu, Tr;
V Tq, Ty, To, Tp, TC, TB, Tx, Tw;
T2 = LD(&(Rp[0]), ms, &(Rp[0]));
T3 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0]));
T4 = VCONJ(T3);
T5 = VSUB(T2, T4);
Th = VADD(T2, T4);
T6 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0]));
Tc = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)]));
T7 = LD(&(Rm[0]), -ms, &(Rm[0]));
T8 = VCONJ(T7);
Ta = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)]));
Tb = VCONJ(Ta);
T9 = VSUB(T6, T8);
Td = VSUB(Tb, Tc);
Te = VADD(T9, Td);
Ts = VBYI(VMUL(LDK(KP866025403), VSUB(T9, Td)));
Ti = VADD(T6, T8);
Tj = VADD(Tb, Tc);
Tk = VADD(Ti, Tj);
Tm = VBYI(VMUL(LDK(KP866025403), VSUB(Ti, Tj)));
TA = VADD(Th, Tk);
T1 = LDW(&(W[TWVL * 4]));
Tf = VZMULI(T1, VADD(T5, Te));
Tl = VFNMS(LDK(KP500000000), Tk, Th);
Tg = LDW(&(W[TWVL * 2]));
Tn = VZMUL(Tg, VSUB(Tl, Tm));
Tu = LDW(&(W[TWVL * 6]));
Tv = VZMUL(Tu, VADD(Tm, Tl));
Tr = VFNMS(LDK(KP500000000), Te, T5);
Tq = LDW(&(W[TWVL * 8]));
Tt = VZMULI(Tq, VSUB(Tr, Ts));
Ty = LDW(&(W[0]));
Tz = VZMULI(Ty, VADD(Ts, Tr));
To = VADD(Tf, Tn);
ST(&(Rp[WS(rs, 1)]), To, ms, &(Rp[WS(rs, 1)]));
Tp = VCONJ(VSUB(Tn, Tf));
ST(&(Rm[WS(rs, 1)]), Tp, -ms, &(Rm[WS(rs, 1)]));
TC = VCONJ(VSUB(TA, Tz));
ST(&(Rm[0]), TC, -ms, &(Rm[0]));
TB = VADD(Tz, TA);
ST(&(Rp[0]), TB, ms, &(Rp[0]));
Tx = VCONJ(VSUB(Tv, Tt));
ST(&(Rm[WS(rs, 2)]), Tx, -ms, &(Rm[0]));
Tw = VADD(Tt, Tv);
ST(&(Rp[WS(rs, 2)]), Tw, ms, &(Rp[0]));
}
}
示例12: LD
static const R *t1sv_4(R *ri, R *ii, const R *W, stride ios, INT m, INT dist)
{
INT i;
for (i = m; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * dist), ii = ii + ((2 * VL) * dist), W = W + ((2 * VL) * 6), MAKE_VOLATILE_STRIDE(ios)) {
V T1, Tv, T3, T6, T5, Ta, Td, Tc, Tg, Tj, Tt, T4, Tf, Ti, Tn;
V Tb, T2, T9;
T1 = LD(&(ri[0]), dist, &(ri[0]));
Tv = LD(&(ii[0]), dist, &(ii[0]));
T3 = LD(&(ri[WS(ios, 2)]), dist, &(ri[0]));
T6 = LD(&(ii[WS(ios, 2)]), dist, &(ii[0]));
T2 = LDW(&(W[TWVL * 2]));
T5 = LDW(&(W[TWVL * 3]));
Ta = LD(&(ri[WS(ios, 1)]), dist, &(ri[WS(ios, 1)]));
Td = LD(&(ii[WS(ios, 1)]), dist, &(ii[WS(ios, 1)]));
T9 = LDW(&(W[0]));
Tc = LDW(&(W[TWVL * 1]));
Tg = LD(&(ri[WS(ios, 3)]), dist, &(ri[WS(ios, 1)]));
Tj = LD(&(ii[WS(ios, 3)]), dist, &(ii[WS(ios, 1)]));
Tt = VMUL(T2, T6);
T4 = VMUL(T2, T3);
Tf = LDW(&(W[TWVL * 4]));
Ti = LDW(&(W[TWVL * 5]));
Tn = VMUL(T9, Td);
Tb = VMUL(T9, Ta);
{
V Tu, T7, Tp, Th, To, Te;
Tu = VFNMS(T5, T3, Tt);
T7 = VFMA(T5, T6, T4);
Tp = VMUL(Tf, Tj);
Th = VMUL(Tf, Tg);
To = VFNMS(Tc, Ta, Tn);
Te = VFMA(Tc, Td, Tb);
{
V Tw, Tx, T8, Tm, Tq, Tk;
Tw = VADD(Tu, Tv);
Tx = VSUB(Tv, Tu);
T8 = VADD(T1, T7);
Tm = VSUB(T1, T7);
Tq = VFNMS(Ti, Tg, Tp);
Tk = VFMA(Ti, Tj, Th);
{
V Ts, Tr, Tl, Ty;
Ts = VADD(To, Tq);
Tr = VSUB(To, Tq);
Tl = VADD(Te, Tk);
Ty = VSUB(Te, Tk);
ST(&(ri[WS(ios, 1)]), VADD(Tm, Tr), dist, &(ri[WS(ios, 1)]));
ST(&(ri[WS(ios, 3)]), VSUB(Tm, Tr), dist, &(ri[WS(ios, 1)]));
ST(&(ii[WS(ios, 2)]), VSUB(Tw, Ts), dist, &(ii[0]));
ST(&(ii[0]), VADD(Ts, Tw), dist, &(ii[0]));
ST(&(ii[WS(ios, 3)]), VADD(Ty, Tx), dist, &(ii[WS(ios, 1)]));
ST(&(ii[WS(ios, 1)]), VSUB(Tx, Ty), dist, &(ii[WS(ios, 1)]));
ST(&(ri[0]), VADD(T8, Tl), dist, &(ri[0]));
ST(&(ri[WS(ios, 2)]), VSUB(T8, Tl), dist, &(ri[0]));
}
}
}
}
return W;
}
示例13: t1sv_2
static void t1sv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
INT m;
for (m = mb, W = W + (mb * 2); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 2), MAKE_VOLATILE_STRIDE(rs)) {
V T1, Ta, T3, T6, T2, T5;
T1 = LD(&(ri[0]), ms, &(ri[0]));
Ta = LD(&(ii[0]), ms, &(ii[0]));
T3 = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)]));
T6 = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)]));
T2 = LDW(&(W[0]));
T5 = LDW(&(W[TWVL * 1]));
{
V T8, T4, T9, T7;
T8 = VMUL(T2, T6);
T4 = VMUL(T2, T3);
T9 = VFNMS(T5, T3, T8);
T7 = VFMA(T5, T6, T4);
ST(&(ii[0]), VADD(T9, Ta), ms, &(ii[0]));
ST(&(ii[WS(rs, 1)]), VSUB(Ta, T9), ms, &(ii[WS(rs, 1)]));
ST(&(ri[0]), VADD(T1, T7), ms, &(ri[0]));
ST(&(ri[WS(rs, 1)]), VSUB(T1, T7), ms, &(ri[WS(rs, 1)]));
}
}
}
示例14: hc2cfdftv_2
static void hc2cfdftv_2(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 2)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(rs)) {
V T1, T2, T4, T3, T5, T7, T6;
T1 = LD(&(Rp[0]), ms, &(Rp[0]));
T2 = LD(&(Rm[0]), -ms, &(Rm[0]));
T4 = LDW(&(W[0]));
T3 = VFMACONJ(T2, T1);
T5 = VZMULIJ(T4, VFNMSCONJ(T2, T1));
T7 = VCONJ(VMUL(LDK(KP500000000), VADD(T3, T5)));
T6 = VMUL(LDK(KP500000000), VSUB(T3, T5));
ST(&(Rm[0]), T7, -ms, &(Rm[0]));
ST(&(Rp[0]), T6, ms, &(Rp[0]));
}
}
示例15: hc2cbdftv_2
static void hc2cbdftv_2(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
{
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 2)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(8, rs)) {
V T2, T3, T1, T5, T4, T7, T6;
T2 = LD(&(Rp[0]), ms, &(Rp[0]));
T3 = LD(&(Rm[0]), -ms, &(Rm[0]));
T1 = LDW(&(W[0]));
T5 = VFMACONJ(T3, T2);
T4 = VZMULI(T1, VFNMSCONJ(T3, T2));
T7 = VCONJ(VSUB(T5, T4));
T6 = VADD(T4, T5);
ST(&(Rm[0]), T7, -ms, &(Rm[0]));
ST(&(Rp[0]), T6, ms, &(Rp[0]));
}
}
VLEAVE();
}