From c6b096ddd44f14a7db19a847af7cdaf65ee89643 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Thu, 21 Sep 2017 01:08:44 -0400 Subject: [PATCH] Move Rijndael_UncheckedSetKey_POWER8 prior to GetUserKey call Arg... GetUserKey was performing a 32-bit word reverse. It was part of the problem on little endian machines --- rijndael-simd.cpp | 21 ++++++++++++++++++--- rijndael.cpp | 13 +++++++------ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/rijndael-simd.cpp b/rijndael-simd.cpp index 9f8102df..e558a26d 100644 --- a/rijndael-simd.cpp +++ b/rijndael-simd.cpp @@ -1070,7 +1070,13 @@ static const uint32_t s_rcon[3][4] = { /* Permute mask */ CRYPTOPP_ALIGN_DATA(16) static const uint32_t s_mask[4] = { +#if defined(IS_LITTLE_ENDIAN) + // 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c + // 0x01020300, 0x01020300, 0x01020300, 0x01020300 + 0x02010003, 0x02010003, 0x02010003, 0x02010003 +#else 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c +#endif }; static inline uint8x16_p8 @@ -1097,15 +1103,23 @@ Rijndael_Subkey_POWER8(uint8x16_p8 r1, const uint8x16_p8 r4, const uint8x16_p8 r return r1; } -void Rijndael_UncheckedSetKey_POWER8(word32* rk, size_t keyLen, const word32* rc, +void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen, word32* rk, const word32* rc, const byte* Se, unsigned int rounds) { #if defined(IS_BIG_ENDIAN) - // Testing shows this is about 125 to 275 cycles faster. + // Testing shows this is about 150 to 350 cycles faster. if (keyLen == 16) { +#if defined(IS_BIG_ENDIAN) uint8_t* skptr = (uint8_t*)rk; - uint8x16_p8 r1 = (uint8x16_p8)VectorLoad((uint8_t*)skptr); + std::memcpy(rk, userKey, keyLen); +#else + uint8_t* skptr = (uint8_t*)rk; + std::memcpy(rk, userKey, keyLen); + ReverseByteArrayLE(skptr); +#endif + + uint8x16_p8 r1 = (uint8x16_p8)VectorLoadKey(skptr); uint8x16_p8 r4 = (uint8x16_p8)VectorLoadKey(s_rcon[0]); uint8x16_p8 r5 = (uint8x16_p8)VectorLoadKey(s_mask); @@ -1140,6 +1154,7 @@ void Rijndael_UncheckedSetKey_POWER8(word32* rk, size_t keyLen, const word32* rc else #endif { + GetUserKey(BIG_ENDIAN_ORDER, rk, keyLen/4, userKey, keyLen); word32 *rk_saved = rk, temp; // keySize: m_key allocates 4*(rounds+1) word32's. diff --git a/rijndael.cpp b/rijndael.cpp index 56ea9e77..06c96d28 100644 --- a/rijndael.cpp +++ b/rijndael.cpp @@ -253,7 +253,7 @@ extern size_t Rijndael_Dec_AdvancedProcessBlocks_ARMV8(const word32 *subkeys, si #if (CRYPTOPP_POWER8_AES_AVAILABLE) extern void ReverseByteArrayLE(byte src[16]); -extern void Rijndael_UncheckedSetKey_POWER8(word32* rk, size_t keyLen, +extern void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen, word32* rk, const word32* rc, const byte* Se, unsigned int rounds); extern size_t Rijndael_Enc_AdvancedProcessBlocks_POWER8(const word32 *subkeys, size_t rounds, @@ -284,18 +284,19 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLen, c } #endif - GetUserKey(BIG_ENDIAN_ORDER, rk, keyLen/4, userKey, keyLen); - const word32 *rc = rcon; - word32 temp; - #if CRYPTOPP_POWER8_AES_AVAILABLE if (HasAES()) { - Rijndael_UncheckedSetKey_POWER8(rk, keyLen, rc, Se, m_rounds); + // We still need rcon and Se to fallback to C/C++ for AES-192 and AES-256 + Rijndael_UncheckedSetKey_POWER8(userKey, keyLen, rk, rcon, Se, m_rounds); return; } #endif + GetUserKey(BIG_ENDIAN_ORDER, rk, keyLen/4, userKey, keyLen); + const word32 *rc = rcon; + word32 temp; + while (true) { temp = rk[keyLen/4-1];