diff --git a/rijndael-simd.cpp b/rijndael-simd.cpp index e1bda7b4..06c65ea0 100644 --- a/rijndael-simd.cpp +++ b/rijndael-simd.cpp @@ -771,22 +771,7 @@ void Rijndael_UncheckedSetKeyRev_AESNI(word32 *key, unsigned int rounds) typedef __vector unsigned char uint8x16_p8; typedef __vector unsigned long long uint64x2_p8; -/* Reverses a 16-byte array as needed */ -void ByteReverseArrayLE(byte dest[16], const byte src[16]) -{ -#if defined(CRYPTOPP_XLC_VERSION) && defined(IS_LITTLE_ENDIAN) - vec_st(vec_reve(vec_ld(0, src)), 0, dest); -#elif defined(IS_LITTLE_ENDIAN) - const uint8x16_p8 mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0}; - const uint8x16_p8 zero = {0}; - vec_vsx_st(vec_perm(vec_vsx_ld(0, src), zero, mask), 0, dest); -#else - if (src != dest) - std::memcpy(dest, src, 16); -#endif -} - -void ByteReverseArrayLE(byte src[16]) +void ByteReverseArray(byte src[16]) { #if defined(CRYPTOPP_XLC_VERSION) && defined(IS_LITTLE_ENDIAN) vec_st(vec_reve(vec_ld(0, src)), 0, src); @@ -797,78 +782,92 @@ void ByteReverseArrayLE(byte src[16]) #endif } -uint8x16_p8 Load8x16(const uint8_t src[16]) +static inline uint8x16_p8 Reverse8x16(const uint8x16_p8& src) +{ + const uint8x16_p8 mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0}; + const uint8x16_p8 zero = {0}; + return vec_perm(src, zero, mask); +} + +static inline uint64x2_p8 Reverse64x2(const uint64x2_p8& src) +{ + const uint8x16_p8 mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0}; + const uint8x16_p8 zero = {0}; + return (uint64x2_p8)vec_perm((uint8x16_p8)src, zero, mask); +} + +static inline uint8x16_p8 Load8x16(const uint8_t src[16]) { #if defined(CRYPTOPP_XLC_VERSION) - /* http://stackoverflow.com/q/46124383/608639 */ return vec_xl_be(0, (uint8_t*)src); #else - return (uint8x16_p8)vec_vsx_ld(0, src); +# if defined(IS_LITTLE_ENDIAN) + return Reverse8x16(vec_vsx_ld(0, src)); +# else + return vec_vsx_ld(0, src); +# endif #endif } -uint8x16_p8 Load8x16(int off, const uint8_t src[16]) +static inline uint8x16_p8 Load8x16(int off, const uint8_t src[16]) { #if defined(CRYPTOPP_XLC_VERSION) - /* http://stackoverflow.com/q/46124383/608639 */ return vec_xl_be(off, (uint8_t*)src); #else - return (uint8x16_p8)vec_vsx_ld(off, src); +# if defined(IS_LITTLE_ENDIAN) + return Reverse8x16(vec_vsx_ld(off, src)); +# else + return vec_vsx_ld(off, src); +# endif #endif } -void Store8x16(const uint8x16_p8 src, uint8_t dest[16]) +static inline void Store8x16(const uint8x16_p8 src, uint8_t dest[16]) { #if defined(CRYPTOPP_XLC_VERSION) - /* http://stackoverflow.com/q/46124383/608639 */ vec_xst_be(src, 0, (uint8_t*)dest); #else +# if defined(IS_LITTLE_ENDIAN) + vec_vsx_st(Reverse8x16(src), 0, dest); +# else vec_vsx_st(src, 0, dest); +# endif #endif } -uint64x2_p8 Load64x2(const uint8_t src[16]) +static inline uint64x2_p8 Load64x2(const uint8_t src[16]) { #if defined(CRYPTOPP_XLC_VERSION) - /* http://stackoverflow.com/q/46124383/608639 */ return (uint64x2_p8)vec_xl_be(0, (uint8_t*)src); #else # if defined(IS_LITTLE_ENDIAN) - const uint8x16_p8 mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0}; - const uint8x16_p8 zero = {0}; - return (uint64x2_p8)vec_perm(vec_vsx_ld(0, src), zero, mask); + return Reverse64x2((uint64x2_p8)vec_vsx_ld(0, src)); # else return (uint64x2_p8)vec_vsx_ld(0, src); # endif #endif } -uint64x2_p8 Load64x2(int off, const uint8_t src[16]) +static inline uint64x2_p8 Load64x2(int off, const uint8_t src[16]) { #if defined(CRYPTOPP_XLC_VERSION) - /* http://stackoverflow.com/q/46124383/608639 */ return (uint64x2_p8)vec_xl_be(off, (uint8_t*)src); #else # if defined(IS_LITTLE_ENDIAN) - const uint8x16_p8 mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0}; - const uint8x16_p8 zero = {0}; - return (uint64x2_p8)vec_perm(vec_vsx_ld(off, src), zero, mask); + return Reverse64x2((uint64x2_p8)vec_vsx_ld(off, src)); # else return (uint64x2_p8)vec_vsx_ld(off, src); # endif #endif } -void Store64x2(const uint64x2_p8 src, uint8_t dest[16]) +static inline void Store64x2(const uint64x2_p8 src, uint8_t dest[16]) { #if defined(CRYPTOPP_XLC_VERSION) - /* http://stackoverflow.com/q/46124383/608639 */ vec_xst_be((uint8x16_p8)src, 0, (uint8_t*)dest); #else # if defined(IS_LITTLE_ENDIAN) - const uint8x16_p8 mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0}; - const uint8x16_p8 zero = {0}; - vec_vsx_st(vec_perm((uint8x16_p8)src, zero, mask), 0, dest); + vec_vsx_st((uint8x16_p8)Reverse64x2(src), 0, dest); # else vec_vsx_st((uint8x16_p8)src, 0, dest); # endif diff --git a/rijndael.cpp b/rijndael.cpp index 43c7fa80..94085ac1 100644 --- a/rijndael.cpp +++ b/rijndael.cpp @@ -251,7 +251,7 @@ extern size_t Rijndael_Dec_AdvancedProcessBlocks_ARMV8(const word32 *subkeys, si #endif #if (CRYPTOPP_POWER8_AES_AVAILABLE) -extern void ByteReverseArrayLE(byte src[16]); +extern void ByteReverseArray(byte src[16]); extern size_t Rijndael_Enc_AdvancedProcessBlocks_POWER8(const word32 *subkeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags); @@ -329,7 +329,7 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLen, c // reversed on little-endian systems to ensure it loads properly. byte * ptr = reinterpret_cast(rk); for (unsigned int i=0; i<=m_rounds; i++) - ByteReverseArrayLE(ptr+i*16); + ByteReverseArray(ptr+i*16); #endif // IS_LITTLE_ENDIAN return;