diff --git a/gcm-simd.cpp b/gcm-simd.cpp index f55f18f2..39a858bc 100644 --- a/gcm-simd.cpp +++ b/gcm-simd.cpp @@ -178,10 +178,14 @@ using CryptoPP::VectorGetLow; using CryptoPP::VectorGetHigh; using CryptoPP::VectorRotateLeft; -// Carryless multiples appear to be endian-sensitive. Big-endian -// multiplies return a result {a,b}, while little-endian return -// a result {b,a}. Since the multiply routines are reflective and -// use LE the BE results need a fixup. +// Carryless multiples are endian-sensitive. Big-endian multiplies +// return a result {a,b}, while little-endian return a result {b,a}. +// Since the multiply routines are reflective and use LE the BE results +// need a fixup using AdjustBE. Additionally, parameters to VMULL_NN +// are presented in a reverse arrangement so we swap the use of +// VectorGetHigh and VectorGetLow. The presentaion detail is why +// VMULL_NN is located in this source file rather than ppc-simd.h. + inline uint64x2_p AdjustBE(const uint64x2_p& val) { #if CRYPTOPP_BIG_ENDIAN @@ -208,10 +212,8 @@ inline uint64x2_p VMULL_01(const uint64x2_p& a, const uint64x2_p& b) // The 0 used in the vmull yields 0 for the high product, so the high // dword of 'a' is "don't care". #if defined(__xlc__) || defined(__xlC__) - // return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetHigh(b))); return AdjustBE(__vpmsumd (a, VectorGetHigh(b))); #else - // return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetHigh(b))); return AdjustBE(__builtin_crypto_vpmsumd (a, VectorGetHigh(b))); #endif } @@ -223,10 +225,8 @@ inline uint64x2_p VMULL_10(const uint64x2_p& a, const uint64x2_p& b) // The 0 used in the vmull yields 0 for the high product, so the high // dword of 'b' is "don't care". #if defined(__xlc__) || defined(__xlC__) - // return AdjustBE(__vpmsumd (VectorGetHigh(a), VectorGetLow(b))); return AdjustBE(__vpmsumd (VectorGetHigh(a), b)); #else - // return AdjustBE(__builtin_crypto_vpmsumd (VectorGetHigh(a), VectorGetLow(b))); return AdjustBE(__builtin_crypto_vpmsumd (VectorGetHigh(a), b)); #endif } @@ -238,10 +238,8 @@ inline uint64x2_p VMULL_11(const uint64x2_p& a, const uint64x2_p& b) // The 0 used in the vmull yields 0 for the high product, so the high // dword of 'b' is "don't care". #if defined(__xlc__) || defined(__xlC__) - // return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetLow(b))); return AdjustBE(__vpmsumd (VectorGetLow(a), b)); #else - // return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetLow(b))); return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), b)); #endif }