本文整理汇总了Golang中github.com/klauspost/intrinsics/x86.M128i函数的典型用法代码示例。如果您正苦于以下问题:Golang M128i函数的具体用法?Golang M128i怎么用?Golang M128i使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了M128i函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: MaskzConflictEpi32
// MaskzConflictEpi32: Test each 32-bit element of 'a' for equality with all
// other elements in 'a' closer to the least significant bit using zeromask 'k'
// (elements are zeroed out when the corresponding mask bit is not set). Each
// element's comparison forms a zero extended bit vector in 'dst'.
//
// FOR j := 0 to 3
// i := j*32
// IF k[i]
// FOR l := 0 to j-1
// m := l*32
// dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
// ENDFOR
// dst[i+31:i+j] := 0
// ELSE
// dst[i+31:i] := 0
// FI
// ENDFOR
// dst[MAX:128] := 0
//
// Instruction: 'VPCONFLICTD'. Intrinsic: '_mm_maskz_conflict_epi32'.
// Requires AVX512CD.
func MaskzConflictEpi32(k x86.Mmask8, a x86.M128i) (dst x86.M128i) {
return x86.M128i(maskzConflictEpi32(uint8(k), [16]byte(a)))
}
示例2: ShuffleEpi8
// ShuffleEpi8: Shuffle packed 8-bit integers in 'a' according to shuffle
// control mask in the corresponding 8-bit element of 'b', and store the
// results in 'dst'.
//
// FOR j := 0 to 15
// i := j*8
// IF b[i+7] == 1
// dst[i+7:i] := 0
// ELSE
// index[3:0] := b[i+3:i]
// dst[i+7:i] := a[index*8+7:index*8]
// FI
// ENDFOR
//
// Instruction: 'PSHUFB'. Intrinsic: '_mm_shuffle_epi8'.
// Requires SSSE3.
func ShuffleEpi8(a x86.M128i, b x86.M128i) (dst x86.M128i) {
return x86.M128i(shuffleEpi8([16]byte(a), [16]byte(b)))
}
示例3: Sha1rnds4Epu32
// Sha1rnds4Epu32: Perform four rounds of SHA1 operation using an initial SHA1
// state (A,B,C,D) from 'a' and some pre-computed sum of the next 4 round
// message values (unsigned 32-bit integers), and state variable E from 'b',
// and store the updated SHA1 state (A,B,C,D) in 'dst'. 'func' contains the
// logic functions and round constants.
//
// IF (func[1:0] = 0) THEN
// f() := f0(), K := K0;
// ELSE IF (func[1:0] = 1) THEN
// f() := f1(), K := K1;
// ELSE IF (func[1:0] = 2) THEN
// f() := f2(), K := K2;
// ELSE IF (func[1:0] = 3) THEN
// f() := f3(), K := K3;
// FI;
//
// A := a[127:96];
// B := a[95:64];
// C := a[63:32];
// D := a[31:0];
//
// W[0] := b[127:96];
// W[1] := b[95:64];
// W[2] := b[63:32];
// W[3] := b[31:0];
//
// A[1] := f(B, C, D) + (A <<< 5) + W[0] + K;
// B[1] := A;
// C[1] := B <<< 30;
// D[1] := C;
// E[1] := D;
//
// FOR i = 1 to 3
// A[i+1] := f(B[i], C[i], D[i]) + (A[i] <<< 5) + W[i] + E[i] + K;
// B[i+1] := A[i];
// C[i+1] := B[i] <<< 30;
// D[i+1] := C[i];
// E[i+1] := D[i];
// ENDFOR;
//
// dst[127:96] := A[4];
// dst[95:64] := B[4];
// dst[63:32] := C[4];
// dst[31:0] := D[4];
//
// Instruction: 'SHA1RNDS4'. Intrinsic: '_mm_sha1rnds4_epu32'.
// Requires SHA.
func Sha1rnds4Epu32(a x86.M128i, b x86.M128i, fnc int) (dst x86.M128i) {
return x86.M128i(sha1rnds4Epu32([16]byte(a), [16]byte(b), fnc))
}
示例4: Sha256rnds2Epu32
// Sha256rnds2Epu32: Perform 2 rounds of SHA256 operation using an initial
// SHA256 state (C,D,G,H) from 'a', an initial SHA256 state (A,B,E,F) from 'b',
// and a pre-computed sum of the next 2 round message values (unsigned 32-bit
// integers) and the corresponding round constants from 'k', and store the
// updated SHA256 state (A,B,E,F) in 'dst'.
//
// A[0] := b[127:96];
// B[0] := b[95:64];
// C[0] := a[127:96];
// D[0] := a[95:64];
// E[0] := b[63:32];
// F[0] := b[31:0];
// G[0] := a[63:32];
// H[0] := a[31:0];
//
// W_K0 := k[31:0];
// W_K1 := k[63:32];
//
// FOR i = 0 to 1
// A_(i+1) := Ch(E[i], F[i], G[i]) + sum1(E[i]) + WKi + H[i] + Maj(A[i], B[i], C[i]) + sum0(A[i]);
// B_(i+1) := A[i];
// C_(i+1) := B[i];
// D_(i+1) := C[i];
// E_(i+1) := Ch(E[i], F[i], G[i]) + sum1(E[i]) + WKi + H[i] + D[i];
// F_(i+1) := E[i];
// G_(i+1) := F[i];
// H_(i+1) := G[i];
// ENDFOR;
//
// dst[127:96] := A[2];
// dst[95:64] := B[2];
// dst[63:32] := E[2];
// dst[31:0] := F[2];
//
// Instruction: 'SHA256RNDS2'. Intrinsic: '_mm_sha256rnds2_epu32'.
// Requires SHA.
func Sha256rnds2Epu32(a x86.M128i, b x86.M128i, k x86.M128i) (dst x86.M128i) {
return x86.M128i(sha256rnds2Epu32([16]byte(a), [16]byte(b), [16]byte(k)))
}
示例5: Clmulepi64Si128
// Clmulepi64Si128: Perform a carry-less multiplication of two 64-bit integers,
// selected from 'a' and 'b' according to 'imm8', and store the results in
// 'dst'.
//
// IF (imm8[0] = 0)
// TEMP1 := a[63:0];
// ELSE
// TEMP1 := a[127:64];
// FI
// IF (imm8[4] = 0)
// TEMP2 := b[63:0];
// ELSE
// TEMP2 := b[127:64];
// FI
//
// FOR i := 0 to 63
// TEMP[i] := (TEMP1[0] and TEMP2[i]);
// FOR j := 1 to i
// TEMP [i] := TEMP [i] XOR (TEMP1[j] AND TEMP2[i-j])
// ENDFOR
// dst[i] := TEMP[i];
// ENDFOR
// FOR i := 64 to 127
// TEMP [i] := 0;
// FOR j := (i - 63) to 63
// TEMP [i] := TEMP [i] XOR (TEMP1[j] AND TEMP2[i-j])
// ENDFOR
// dst[i] := TEMP[i];
// ENDFOR
// dst[127] := 0
//
// Instruction: 'PCLMULQDQ'. Intrinsic: '_mm_clmulepi64_si128'.
// Requires PCLMULQDQ.
//
// FIXME: Requires compiler support (has immediate)
func Clmulepi64Si128(a x86.M128i, b x86.M128i, imm8 byte) (dst x86.M128i) {
return x86.M128i(clmulepi64Si128([16]byte(a), [16]byte(b), imm8))
}
示例6: AesenclastSi128
// AesenclastSi128: Perform the last round of an AES encryption flow on data
// (state) in 'a' using the round key in 'RoundKey', and store the result in
// 'dst'."
//
// state := a
// a[127:0] := ShiftRows(a[127:0])
// a[127:0] := SubBytes(a[127:0])
// dst[127:0] := a[127:0] XOR RoundKey[127:0]
//
// Instruction: 'AESENCLAST'. Intrinsic: '_mm_aesenclast_si128'.
// Requires AES.
func AesenclastSi128(a x86.M128i, RoundKey x86.M128i) (dst x86.M128i) {
return x86.M128i(aesenclastSi128([16]byte(a), [16]byte(RoundKey)))
}
示例7: MaskzLzcntEpi64
// MaskzLzcntEpi64: Counts the number of leading zero bits in each packed
// 64-bit integer in 'a', and store the results in 'dst' using zeromask 'k'
// (elements are zeroed out when the corresponding mask bit is not set).
//
// FOR j := 0 to 1
// i := j*64
// IF k[j]
// tmp := 63
// dst[i+63:i] := 0
// DO WHILE (tmp >= 0 AND a[i+tmp] == 0)
// tmp := tmp - 1
// dst[i+63:i] := dst[i+63:i] + 1
// OD
// ELSE
// dst[i+63:i] := 0
// FI
// ENDFOR
// dst[MAX:128] := 0
//
// Instruction: 'VPLZCNTQ'. Intrinsic: '_mm_maskz_lzcnt_epi64'.
// Requires AVX512CD.
func MaskzLzcntEpi64(k x86.Mmask8, a x86.M128i) (dst x86.M128i) {
return x86.M128i(maskzLzcntEpi64(uint8(k), [16]byte(a)))
}
示例8: CvtpsPh
// CvtpsPh: Convert packed single-precision (32-bit) floating-point elements in
// 'a' to packed half-precision (16-bit) floating-point elements, and store the
// results in 'dst'.
// Rounding is done according to the 'rounding' parameter, which can be one
// of:
// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
//
// FOR j := 0 to 3
// i := 16*j
// l := 32*j
// dst[i+15:i] := Convert_FP32_To_FP16FP(a[l+31:l])
// ENDFOR
// dst[MAX:128] := 0
//
// Instruction: 'VCVTPS2PH'. Intrinsic: '_mm_cvtps_ph'.
// Requires FP16C.
func CvtpsPh(a x86.M128, rounding int) (dst x86.M128i) {
return x86.M128i(cvtpsPh([4]float32(a), rounding))
}
示例9: LzcntEpi64
// LzcntEpi64: Counts the number of leading zero bits in each packed 64-bit
// integer in 'a', and store the results in 'dst'.
//
// FOR j := 0 to 1
// i := j*64
// tmp := 63
// dst[i+63:i] := 0
// DO WHILE (tmp >= 0 AND a[i+tmp] == 0)
// tmp := tmp - 1
// dst[i+63:i] := dst[i+63:i] + 1
// OD
// ENDFOR
// dst[MAX:128] := 0
//
// Instruction: 'VPLZCNTQ'. Intrinsic: '_mm_lzcnt_epi64'.
// Requires AVX512CD.
func LzcntEpi64(a x86.M128i) (dst x86.M128i) {
return x86.M128i(lzcntEpi64([16]byte(a)))
}
示例10: MaskLzcntEpi64
// MaskLzcntEpi64: Counts the number of leading zero bits in each packed 64-bit
// integer in 'a', and store the results in 'dst' using writemask 'k' (elements
// are copied from 'src' when the corresponding mask bit is not set).
//
// FOR j := 0 to 1
// i := j*64
// IF k[j]
// tmp := 63
// dst[i+63:i] := 0
// DO WHILE (tmp >= 0 AND a[i+tmp] == 0)
// tmp := tmp - 1
// dst[i+63:i] := dst[i+63:i] + 1
// OD
// ELSE
// dst[i+63:i] := src[i+63:i]
// FI
// ENDFOR
// dst[MAX:128] := 0
//
// Instruction: 'VPLZCNTQ'. Intrinsic: '_mm_mask_lzcnt_epi64'.
// Requires AVX512CD.
func MaskLzcntEpi64(src x86.M128i, k x86.Mmask8, a x86.M128i) (dst x86.M128i) {
return x86.M128i(maskLzcntEpi64([16]byte(src), uint8(k), [16]byte(a)))
}
示例11: BroadcastmwEpi32
// BroadcastmwEpi32: Broadcast the low 16-bits from input mask 'k' to all
// 32-bit elements of 'dst'.
//
// FOR j := 0 to 3
// i := j*32
// dst[i+31:i] := ZeroExtend(k[15:0])
// ENDFOR
// dst[MAX:128] := 0
//
// Instruction: 'VPBROADCASTMW2D'. Intrinsic: '_mm_broadcastmw_epi32'.
// Requires AVX512CD.
func BroadcastmwEpi32(k x86.Mmask16) (dst x86.M128i) {
return x86.M128i(broadcastmwEpi32(uint16(k)))
}
示例12: ConflictEpi64
// ConflictEpi64: Test each 64-bit element of 'a' for equality with all other
// elements in 'a' closer to the least significant bit. Each element's
// comparison forms a zero extended bit vector in 'dst'.
//
// FOR j := 0 to 1
// i := j*64
// FOR k := 0 to j-1
// m := k*64
// dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
// ENDFOR
// dst[i+63:i+j] := 0
// ENDFOR
// dst[MAX:128] := 0
//
// Instruction: 'VPCONFLICTQ'. Intrinsic: '_mm_conflict_epi64'.
// Requires AVX512CD.
func ConflictEpi64(a x86.M128i) (dst x86.M128i) {
return x86.M128i(conflictEpi64([16]byte(a)))
}
示例13: BroadcastmbEpi64
// BroadcastmbEpi64: Broadcast the low 8-bits from input mask 'k' to all 64-bit
// elements of 'dst'.
//
// FOR j := 0 to 1
// i := j*64
// dst[i+63:i] := ZeroExtend(k[7:0])
// ENDFOR
// dst[MAX:128] := 0
//
// Instruction: 'VPBROADCASTMB2Q'. Intrinsic: '_mm_broadcastmb_epi64'.
// Requires AVX512CD.
func BroadcastmbEpi64(k x86.Mmask8) (dst x86.M128i) {
return x86.M128i(broadcastmbEpi64(uint8(k)))
}
示例14: SignEpi8
// SignEpi8: Negate packed 8-bit integers in 'a' when the corresponding signed
// 8-bit integer in 'b' is negative, and store the results in 'dst'. Element in
// 'dst' are zeroed out when the corresponding element in 'b' is zero.
//
// FOR j := 0 to 15
// i := j*8
// IF b[i+7:i] < 0
// dst[i+7:i] := NEG(a[i+7:i])
// ELSE IF b[i+7:i] = 0
// dst[i+7:i] := 0
// ELSE
// dst[i+7:i] := a[i+7:i]
// FI
// ENDFOR
//
// Instruction: 'PSIGNB'. Intrinsic: '_mm_sign_epi8'.
// Requires SSSE3.
func SignEpi8(a x86.M128i, b x86.M128i) (dst x86.M128i) {
return x86.M128i(signEpi8([16]byte(a), [16]byte(b)))
}
示例15: AlignrEpi8
// AlignrEpi8: Concatenate 16-byte blocks in 'a' and 'b' into a 32-byte
// temporary result, shift the result right by 'count' bytes, and store the low
// 16 bytes in 'dst'.
//
// tmp[255:0] := ((a[127:0] << 128) OR b[127:0]) >> (count[7:0]*8)
// dst[127:0] := tmp[127:0]
//
// Instruction: 'PALIGNR'. Intrinsic: '_mm_alignr_epi8'.
// Requires SSSE3.
func AlignrEpi8(a x86.M128i, b x86.M128i, count int) (dst x86.M128i) {
return x86.M128i(alignrEpi8([16]byte(a), [16]byte(b), count))
}