From c39d7fdef5615f51662e0d3f83654558bee22d66 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sat, 19 Jan 2019 15:20:47 -0500 Subject: [PATCH] Add VecMergeHi and VecMergeLo PowerPC wrappers --- gf2n_simd.cpp | 44 +++++++++++++++++++++++--------------------- ppc_simd.h | 26 ++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 21 deletions(-) diff --git a/gf2n_simd.cpp b/gf2n_simd.cpp index b0296b5f..1630615c 100644 --- a/gf2n_simd.cpp +++ b/gf2n_simd.cpp @@ -317,6 +317,8 @@ using CryptoPP::VecXor; using CryptoPP::VecAnd; using CryptoPP::VecPermute; +using CryptoPP::VecMergeHi; +using CryptoPP::VecMergeLo; using CryptoPP::VecShiftLeft; using CryptoPP::VecShiftRight; using CryptoPP::VecRotateLeftOctet; @@ -335,11 +337,11 @@ inline uint64x2_p VMULL_00LE(const uint64x2_p& a, const uint64x2_p& b) { const uint64x2_p z={0}; #if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__)) - return VMULL2LE(__vpmsumd (vec_mergeh(z, a), vec_mergeh(z, b))); + return VMULL2LE(__vpmsumd (VecMergeHi(z, a), VecMergeHi(z, b))); #elif defined(__clang__) - return VMULL2LE(__builtin_altivec_crypto_vpmsumd (vec_mergeh(z, a), vec_mergeh(z, b))); + return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecMergeHi(z, a), VecMergeHi(z, b))); #else - return VMULL2LE(__builtin_crypto_vpmsumd (vec_mergeh(z, a), vec_mergeh(z, b))); + return VMULL2LE(__builtin_crypto_vpmsumd (VecMergeHi(z, a), VecMergeHi(z, b))); #endif } @@ -348,11 +350,11 @@ inline uint64x2_p VMULL_11LE(const uint64x2_p& a, const uint64x2_p& b) { const uint64x2_p z={0}; #if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__)) - return VMULL2LE(__vpmsumd (vec_mergel(z, a), b)); + return VMULL2LE(__vpmsumd (VecMergeLo(z, a), b)); #elif defined(__clang__) - return VMULL2LE(__builtin_altivec_crypto_vpmsumd (vec_mergel(z, a), b)); + return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecMergeLo(z, a), b)); #else - return VMULL2LE(__builtin_crypto_vpmsumd (vec_mergel(z, a), b)); + return VMULL2LE(__builtin_crypto_vpmsumd (VecMergeLo(z, a), b)); #endif } @@ -365,16 +367,16 @@ F2N_Multiply_128x128_POWER8(uint64x2_p& c1, uint64x2_p& c0, const uint64x2_p& a, c0 = VMULL_00LE(a, b); c1 = VMULL_11LE(a, b); - t1 = vec_mergel(a, a); + t1 = VecMergeLo(a, a); t1 = VecXor(a, t1); - t2 = vec_mergel(b, b); + t2 = VecMergeLo(b, b); t2 = VecXor(b, t2); t1 = VMULL_00LE(t1, t2); t1 = VecXor(c0, t1); t1 = VecXor(c1, t1); t2 = t1; - t1 = vec_mergeh(z0, t1); - t2 = vec_mergel(t2, z0); + t1 = VecMergeHi(z0, t1); + t2 = VecMergeLo(t2, z0); c0 = VecXor(c0, t1); c1 = VecXor(c1, t2); } @@ -388,7 +390,7 @@ inline uint64x2_p ShiftLeft128_POWER8(uint64x2_p x) x = VecShiftLeft(x); u = VecShiftRight<64-N>(u); - v = vec_mergeh(z, u); + v = VecMergeHi(z, u); x = VecOr(x, v); return x; } @@ -405,41 +407,41 @@ GF2NT_233_Reduce_POWER8(uint64x2_p& c3, uint64x2_p& c2, uint64x2_p& c1, uint64x2 const uint64x2_p z0={0}; b1 = c1; a1 = c1; - a0 = vec_mergeh(c1, z0); + a0 = VecMergeHi(c1, z0); a1 = VecShiftLeft<23>(a1); a1 = VecShiftRight<23>(a1); c1 = VecOr(a1, a0); b2 = VecShiftRight<64-23>(c2); c3 = ShiftLeft128_POWER8<23>(c3); - a0 = vec_mergel(b2, z0); + a0 = VecMergeLo(b2, z0); c3 = VecOr(c3, a0); b1 = VecShiftRight<64-23>(b1); c2 = ShiftLeft128_POWER8<23>(c2); - a0 = vec_mergel(b1, z0); + a0 = VecMergeLo(b1, z0); c2 = VecOr(c2, a0); b3 = c3; b2 = VecShiftRight<64-10>(c2); b3 = ShiftLeft128_POWER8<10>(b3); - a0 = vec_mergel(b2, z0); + a0 = VecMergeLo(b2, z0); b3 = VecOr(b3, a0); - a0 = vec_mergel(c3, z0); + a0 = VecMergeLo(c3, z0); b3 = VecXor(b3, a0); b1 = VecShiftRight<64-23>(b3); b3 = ShiftLeft128_POWER8<23>(b3); - b3 = vec_mergel(b3, z0); + b3 = VecMergeLo(b3, z0); b3 = VecOr(b3, b1); c2 = VecXor(c2, b3); b3 = c3; b2 = VecShiftRight<64-10>(c2); b3 = ShiftLeft128_POWER8<10>(b3); - b2 = vec_mergel(b2, z0); + b2 = VecMergeLo(b2, z0); b3 = VecOr(b3, b2); b2 = c2; b2 = ShiftLeft128_POWER8<10>(b2); - a0 = vec_mergeh(z0, b2); + a0 = VecMergeHi(z0, b2); c2 = VecXor(c2, a0); - a0 = vec_mergeh(z0, b3); - a1 = vec_mergel(b2, z0); + a0 = VecMergeHi(z0, b3); + a1 = VecMergeLo(b2, z0); a0 = VecOr(a0, a1); c3 = VecXor(c3, a0); c0 = VecXor(c0, c2); diff --git a/ppc_simd.h b/ppc_simd.h index 51e5b726..d9b2e91b 100644 --- a/ppc_simd.h +++ b/ppc_simd.h @@ -1187,6 +1187,32 @@ inline uint32x4_p VecShiftLeft(const uint32x4_p vec) return vec_sl(vec, m); } +/// \brief Merge two vectors +/// \param v1 the first vector +/// \param v2 the second vector +/// \returns vector +/// \par Wraps +/// vec_mergeh +/// \since Crypto++ 8.1 +template +inline T VecMergeHi(const T v1, const T v2) +{ + return vec_mergeh(v1, v2); +} + +/// \brief Merge two vectors +/// \param v1 the first vector +/// \param v2 the second vector +/// \returns vector +/// \par Wraps +/// vec_mergel +/// \since Crypto++ 8.1 +template +inline T VecMergeLo(const T v1, const T v2) +{ + return vec_mergel(v1, v2); +} + #if defined(_ARCH_PWR8) || defined(CRYPTOPP_DOXYGEN_PROCESSING) /// \brief Rotate a packed vector left