From f510b3498c13e7ba6e413fef26b6843e0c5f89c2 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sun, 20 Jan 2019 03:52:05 -0500 Subject: [PATCH] Update documentation --- ppc_simd.h | 121 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 79 insertions(+), 42 deletions(-) diff --git a/ppc_simd.h b/ppc_simd.h index 1bc358aa..37548448 100644 --- a/ppc_simd.h +++ b/ppc_simd.h @@ -864,46 +864,9 @@ inline void VecStoreBE(const T data, int off, word32 dest[4]) //@} -/// \name OTHER OPERATIONS +/// \name LOGICAL OPERATIONS //@{ -/// \brief Permutes a vector -/// \tparam T1 vector type -/// \tparam T2 vector type -/// \param vec the vector -/// \param mask vector mask -/// \returns vector -/// \details VecPermute() returns a new vector from vec based on -/// mask. mask is an uint8x16_p type vector. The return -/// vector is the same type as vec. -/// \par Wraps -/// vec_perm -/// \since Crypto++ 6.0 -template -inline T1 VecPermute(const T1 vec, const T2 mask) -{ - return (T1)vec_perm(vec, vec, (uint8x16_p)mask); -} - -/// \brief Permutes two vectors -/// \tparam T1 vector type -/// \tparam T2 vector type -/// \param vec1 the first vector -/// \param vec2 the second vector -/// \param mask vector mask -/// \returns vector -/// \details VecPermute() returns a new vector from vec1 and vec2 -/// based on mask. mask is an uint8x16_p type vector. The return -/// vector is the same type as vec1. -/// \par Wraps -/// vec_perm -/// \since Crypto++ 6.0 -template -inline T1 VecPermute(const T1 vec1, const T1 vec2, const T2 mask) -{ - return (T1)vec_perm(vec1, (T1)vec2, (uint8x16_p)mask); -} - /// \brief AND two vectors /// \tparam T1 vector type /// \tparam T2 vector type @@ -955,6 +918,11 @@ inline T1 VecXor(const T1 vec1, const T2 vec2) return (T1)vec_xor(vec1, (T1)vec2); } +//@} + +/// \name ARITHMETIC OPERATIONS +//@{ + /// \brief Add two vectors /// \tparam T1 vector type /// \tparam T2 vector type @@ -1021,6 +989,48 @@ inline uint32x4_p VecAdd64(const uint32x4_p& vec1, const uint32x4_p& vec2) #endif } +//@} + +/// \name OTHER OPERATIONS +//@{ + +/// \brief Permutes a vector +/// \tparam T1 vector type +/// \tparam T2 vector type +/// \param vec the vector +/// \param mask vector mask +/// \returns vector +/// \details VecPermute() returns a new vector from vec based on +/// mask. mask is an uint8x16_p type vector. The return +/// vector is the same type as vec. +/// \par Wraps +/// vec_perm +/// \since Crypto++ 6.0 +template +inline T1 VecPermute(const T1 vec, const T2 mask) +{ + return (T1)vec_perm(vec, vec, (uint8x16_p)mask); +} + +/// \brief Permutes two vectors +/// \tparam T1 vector type +/// \tparam T2 vector type +/// \param vec1 the first vector +/// \param vec2 the second vector +/// \param mask vector mask +/// \returns vector +/// \details VecPermute() returns a new vector from vec1 and vec2 +/// based on mask. mask is an uint8x16_p type vector. The return +/// vector is the same type as vec1. +/// \par Wraps +/// vec_perm +/// \since Crypto++ 6.0 +template +inline T1 VecPermute(const T1 vec1, const T1 vec2, const T2 mask) +{ + return (T1)vec_perm(vec1, (T1)vec2, (uint8x16_p)mask); +} + /// \brief Shift a vector left /// \tparam C shift byte count /// \tparam T vector type @@ -1441,7 +1451,7 @@ inline uint64x2_p VMULL2LE(const uint64x2_p& val) /// \param a the first term /// \param b the second term /// \returns vector product -/// \details VecPolyMultiply00LE perform polynomial multiplication and presents +/// \details VecPolyMultiply00LE performs polynomial multiplication and presents /// the result like Intel's c = _mm_clmulepi64_si128(a, b, 0x00). /// The 0x00 indicates the low 64-bits of a and b /// are multiplied. @@ -1465,7 +1475,7 @@ inline uint64x2_p VecPolyMultiply00LE(const uint64x2_p& a, const uint64x2_p& b) /// \param a the first term /// \param b the second term /// \returns vector product -/// \details VecPolyMultiply01LE perform polynomial multiplication and presents +/// \details VecPolyMultiply01LE performs polynomial multiplication and presents /// the result like Intel's c = _mm_clmulepi64_si128(a, b, 0x01). /// The 0x01 indicates the low 64-bits of a and high /// 64-bits of b are multiplied. @@ -1489,7 +1499,7 @@ inline uint64x2_p VecPolyMultiply01LE(const uint64x2_p& a, const uint64x2_p& b) /// \param a the first term /// \param b the second term /// \returns vector product -/// \details VecPolyMultiply10LE perform polynomial multiplication and presents +/// \details VecPolyMultiply10LE performs polynomial multiplication and presents /// the result like Intel's c = _mm_clmulepi64_si128(a, b, 0x10). /// The 0x10 indicates the high 64-bits of a and low /// 64-bits of b are multiplied. @@ -1513,7 +1523,7 @@ inline uint64x2_p VecPolyMultiply10LE(const uint64x2_p& a, const uint64x2_p& b) /// \param a the first term /// \param b the second term /// \returns vector product -/// \details VecPolyMultiply11LE perform polynomial multiplication and presents +/// \details VecPolyMultiply11LE performs polynomial multiplication and presents /// the result like Intel's c = _mm_clmulepi64_si128(a, b, 0x11). /// The 0x11 indicates the high 64-bits of a and b /// are multiplied. @@ -1533,6 +1543,33 @@ inline uint64x2_p VecPolyMultiply11LE(const uint64x2_p& a, const uint64x2_p& b) #endif } +/// \brief Polynomial multiplication +/// \tparam T the vector type +/// \param a the first term +/// \param b the second term +/// \returns vector product +/// \details VecPolyMultiply performs polynomial multiplication. POWER8 +/// polynomial multiplication multiplies the high and low terms, and then XOR's +/// the high and low products. That is, the result is ah*bh XOR al*bl. +/// It is different behavior than Intel polynomial multiplication. +/// To obtain a single product without the XOR, then set one of the high or +/// low terms to 0. For example, setting ah=0 results in 0*bh +/// XOR al*bl = al*bl. +/// \par Wraps +/// __vpmsumd, __builtin_altivec_crypto_vpmsumd and __builtin_crypto_vpmsumd. +/// \since Crypto++ 8.1 +template +inline T VecPolyMultiply(const T& a, const T& b) +{ +#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__)) + return (T)__vpmsumd (a, b); +#elif defined(__clang__) + return (T)__builtin_altivec_crypto_vpmsumd (a, b); +#else + return (T)__builtin_crypto_vpmsumd (a, b); +#endif +} + //@} /// \name AES ENCRYPTION