From a33b95325ffee5a1d92e66d14c1ad5207e1375de Mon Sep 17 00:00:00 2001 From: John Byrd Date: Thu, 22 Sep 2016 17:43:57 -0700 Subject: [PATCH] When calculating the AES block cipher, allocate 4K of memory on the stack instead of 256+ bytes. Search within that 4K space to put the 256-byte aligned Locals struct in a place which does not have 4K cache conflicts with the Te temporary buffer. This permits us to call _malloca() or alloca() once per call of this function. This commit also makes sure that the Microsoft-only _freea() occurs at the correct location instead of at a pointer to the middle of the stack, when the memory allocated by _malloca() or alloca() is not 256-byte aligned. --- rijndael.cpp | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/rijndael.cpp b/rijndael.cpp index b4ca2d84..cca13db0 100644 --- a/rijndael.cpp +++ b/rijndael.cpp @@ -1035,6 +1035,9 @@ void Rijndael_Enc_AdvancedProcessBlocks(void *locals, const word32 *k); #if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86 +/* Determine whether the range between begin and end overlaps + * with the same 4k block offsets as the Te table. + */ static inline bool AliasedWithTable(const byte *begin, const byte *end) { size_t s0 = size_t(begin)%4096, s1 = size_t(end)%4096; @@ -1250,19 +1253,25 @@ size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo }; const byte* zeros = (byte *)(Te+256); - byte *space = NULL; + byte *space = NULL, *originalSpace = NULL; - do { + const size_t aliasPageSize = 4096; + const size_t aliasBlockSize = 256; + const size_t sizeToAllocate = aliasPageSize + aliasBlockSize + sizeof(Locals); #if (CRYPTOPP_MSC_VERSION >= 1400) - // http://msdn.microsoft.com/en-us/library/5471dc8s.aspx - space = (byte *)_malloca(255+sizeof(Locals)); - space += (256-(size_t)space%256)%256; + originalSpace = (byte *)_malloca(sizeToAllocate); #else - space = (byte *)alloca(255+sizeof(Locals)); - space += (256-(size_t)space%256)%256; + originalSpace = (byte *)alloca(sizeToAllocate); #endif + /* round up to nearest 256 byte boundary */ + space = originalSpace + + (aliasBlockSize - (size_t)originalSpace % aliasBlockSize) + % aliasBlockSize; + while (AliasedWithTable(space, space + sizeof(Locals))) + { + space += 256; + CRYPTOPP_ASSERT(space < (originalSpace + aliasPageSize)); } - while (AliasedWithTable(space, space+sizeof(Locals))); size_t increment = BLOCKSIZE; if (flags & BT_ReverseDirection) @@ -1293,7 +1302,7 @@ size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo Rijndael_Enc_AdvancedProcessBlocks(&locals, m_key); #if (CRYPTOPP_MSC_VERSION >= 1400) - _freea(space); + _freea(originalSpace); #endif return length % BLOCKSIZE;