From 701ec3aa1f45a7543a327560f0c2bbfbc3029098 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sun, 6 Aug 2017 21:25:36 -0400 Subject: [PATCH] Fix ARMv8 AES Encryption ARMv8 AES decryption is not working at the moment. This check-in will allow us to test the current changes more widespread. We expected AES decryption failures only --- config.h | 3 -- rijndael-simd.cpp | 105 +++++++++++++++++++++++++++++++++++++++++++--- rijndael.cpp | 44 +++++++++++++------ 3 files changed, 132 insertions(+), 20 deletions(-) diff --git a/config.h b/config.h index c22be086..6d1a0ed5 100644 --- a/config.h +++ b/config.h @@ -578,9 +578,6 @@ NAMESPACE_END # endif #endif -// TODO... -#undef CRYPTOPP_ARM_AES_AVAILABLE - #endif // ARM32, ARM64 // ***************** Miscellaneous ******************** diff --git a/rijndael-simd.cpp b/rijndael-simd.cpp index 5ab14c52..3e546eb8 100644 --- a/rijndael-simd.cpp +++ b/rijndael-simd.cpp @@ -10,6 +10,10 @@ #include "config.h" #include "misc.h" +// TODO: Remove after debugging +#include +#include + // Clang and GCC hoops... #if !(defined(__ARM_FEATURE_CRYPTO) || defined(_MSC_VER)) # undef CRYPTOPP_ARM_AES_AVAILABLE @@ -122,6 +126,97 @@ bool CPU_TryAES_ARMV8() #endif // ARM32 or ARM64 #if (CRYPTOPP_ARM_AES_AVAILABLE) + +void PrintMessage(const byte *inBlock) +{ + printf("M: "); + for (unsigned int j=0; j<16; ++j) + printf("%02X", inBlock[j]); + printf("\n"); +} + +void PrintCipher(const byte *outBlock) +{ + printf("C: "); + for (unsigned int j=0; j<16; ++j) + printf("%02X", outBlock[j]); + printf("\n"); +} + +void PrintSubKeys(const word32 *keys, unsigned int rounds) +{ + const byte* k = (const byte*)keys; + for (unsigned int i=0; i(subKeys); + + unsigned int i; + for (i=0; i(subKeys); + + // AES single round decryption + data = vaesdq_u8(data, vld1q_u8(keys)); + + unsigned int i; + for (i=0; i inline size_t Rijndael_AdvancedProcessBlocks_AESNI(F1 func1, F4 func4, - MAYBE_CONST __m128i *subkeys, unsigned int rounds, const byte *inBlocks, + MAYBE_CONST __m128i *subkeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { size_t blockSize = 16; @@ -310,20 +405,20 @@ inline size_t Rijndael_AdvancedProcessBlocks_AESNI(F1 func1, F4 func4, return length; } -size_t Rijndael_AdvancedProcessBlocks_Enc_AESNI(MAYBE_CONST word32 *subkeys, unsigned int rounds, +size_t Rijndael_AdvancedProcessBlocks_Enc_AESNI(MAYBE_CONST word32 *subkeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { MAYBE_CONST __m128i* keys = reinterpret_cast(subkeys); return Rijndael_AdvancedProcessBlocks_AESNI(AESNI_Enc_Block, AESNI_Enc_4_Blocks, - keys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); + keys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } -size_t Rijndael_AdvancedProcessBlocks_Dec_AESNI(MAYBE_CONST word32 *subkeys, unsigned int rounds, +size_t Rijndael_AdvancedProcessBlocks_Dec_AESNI(MAYBE_CONST word32 *subkeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { MAYBE_CONST __m128i* keys = reinterpret_cast(subkeys); return Rijndael_AdvancedProcessBlocks_AESNI(AESNI_Dec_Block, AESNI_Dec_4_Blocks, - keys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); + keys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } void Rijndael_UncheckedSetKey_SSE4_AESNI(const byte *userKey, size_t keyLen, word32 *rk) diff --git a/rijndael.cpp b/rijndael.cpp index 4d803001..d28c280f 100644 --- a/rijndael.cpp +++ b/rijndael.cpp @@ -74,12 +74,6 @@ being unloaded from L1 cache, until that round is finished. #include "misc.h" #include "cpu.h" -// TODO: remove... -#if (CRYPTOPP_ARM_AES_AVAILABLE) -# include "arm_neon.h" -# include "arm_acle.h" -#endif - NAMESPACE_BEGIN(CryptoPP) // Hack for http://github.com/weidai11/cryptopp/issues/42 and http://github.com/weidai11/cryptopp/issues/132 @@ -224,12 +218,19 @@ void Rijndael::Base::FillDecTable() extern void Rijndael_UncheckedSetKey_SSE4_AESNI(const byte *userKey, size_t keyLen, word32* rk); extern void Rijndael_UncheckedSetKeyRev_SSE4_AESNI(word32 *key, unsigned int rounds); -extern size_t Rijndael_AdvancedProcessBlocks_Enc_AESNI(const word32 *subkeys, unsigned int rounds, +extern size_t Rijndael_AdvancedProcessBlocks_Enc_AESNI(const word32 *subkeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags); -extern size_t Rijndael_AdvancedProcessBlocks_Dec_AESNI(const word32 *subkeys, unsigned int rounds, +extern size_t Rijndael_AdvancedProcessBlocks_Dec_AESNI(const word32 *subkeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags); #endif +#if (CRYPTOPP_ARM_AES_AVAILABLE) +extern void Rijndael_Enc_ProcessAndXorBlock_ARMV8(const byte *inBlock, const byte *xorBlock, byte *outBlock, + const word32 *subKeys, unsigned int rounds); +extern void Rijndael_Dec_ProcessAndXorBlock_ARMV8(const byte *inBlock, const byte *xorBlock, byte *outBlock, + const word32 *subKeys, unsigned int rounds); +#endif + void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLen, const NameValuePairs &) { AssertValidKeyLength(keyLen); @@ -327,22 +328,34 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLen, c if (HasAESNI()) ConditionalByteReverse(BIG_ENDIAN_ORDER, rk+4, rk+4, (m_rounds-1)*16); #endif +#if CRYPTOPP_ARM_AES_AVAILABLE + if (HasAES()) + ConditionalByteReverse(BIG_ENDIAN_ORDER, rk+4, rk+4, (m_rounds-1)*16); +#endif } void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const { #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE) || CRYPTOPP_AESNI_AVAILABLE -#if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM) +# if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM) if (HasSSE2()) -#else +# else if (HasAESNI()) -#endif +# endif { (void)Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0); return; } #endif +#if (CRYPTOPP_ARM_AES_AVAILABLE) + if (HasAES()) + { + Rijndael_Enc_ProcessAndXorBlock_ARMV8(inBlock, xorBlock, outBlock, m_key.begin(), m_rounds); + return; + } +#endif + typedef BlockGetAndPut Block; word32 s0, s1, s2, s3, t0, t1, t2, t3; @@ -421,6 +434,14 @@ void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock } #endif +#if (CRYPTOPP_ARM_AES_AVAILABLE) && 0 + if (HasAES()) + { + Rijndael_Dec_ProcessAndXorBlock_ARMV8(inBlock, xorBlock, outBlock, m_key.begin(), m_rounds); + return; + } +#endif + typedef BlockGetAndPut Block; word32 s0, s1, s2, s3, t0, t1, t2, t3; @@ -1049,7 +1070,6 @@ size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo #if CRYPTOPP_AESNI_AVAILABLE if (HasAESNI()) return Rijndael_AdvancedProcessBlocks_Enc_AESNI(m_key.begin(), m_rounds, inBlocks, xorBlocks, outBlocks, length, flags); - #endif #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM)