When calculating the AES block cipher, allocate 4K of memory on the stack instead of 256+ bytes. Search within that 4K space to put the 256-byte aligned Locals struct in a place which does not have 4K cache conflicts with the Te temporary buffer. This permits us to call _malloca() or alloca() once per call of this function. This commit also makes sure that the Microsoft-only _freea() occurs at the correct location instead of at a pointer to the middle of the stack, when the memory allocated by _malloca() or alloca() is not 256-byte aligned.

pull/301/head
John Byrd 2016-09-22 17:43:57 -07:00
parent ddac25ead8
commit a33b95325f
1 changed files with 18 additions and 9 deletions

View File

@ -1035,6 +1035,9 @@ void Rijndael_Enc_AdvancedProcessBlocks(void *locals, const word32 *k);
#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86
/* Determine whether the range between begin and end overlaps
* with the same 4k block offsets as the Te table.
*/
static inline bool AliasedWithTable(const byte *begin, const byte *end)
{
size_t s0 = size_t(begin)%4096, s1 = size_t(end)%4096;
@ -1250,19 +1253,25 @@ size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo
};
const byte* zeros = (byte *)(Te+256);
byte *space = NULL;
byte *space = NULL, *originalSpace = NULL;
do {
const size_t aliasPageSize = 4096;
const size_t aliasBlockSize = 256;
const size_t sizeToAllocate = aliasPageSize + aliasBlockSize + sizeof(Locals);
#if (CRYPTOPP_MSC_VERSION >= 1400)
// http://msdn.microsoft.com/en-us/library/5471dc8s.aspx
space = (byte *)_malloca(255+sizeof(Locals));
space += (256-(size_t)space%256)%256;
originalSpace = (byte *)_malloca(sizeToAllocate);
#else
space = (byte *)alloca(255+sizeof(Locals));
space += (256-(size_t)space%256)%256;
originalSpace = (byte *)alloca(sizeToAllocate);
#endif
/* round up to nearest 256 byte boundary */
space = originalSpace +
(aliasBlockSize - (size_t)originalSpace % aliasBlockSize)
% aliasBlockSize;
while (AliasedWithTable(space, space + sizeof(Locals)))
{
space += 256;
CRYPTOPP_ASSERT(space < (originalSpace + aliasPageSize));
}
while (AliasedWithTable(space, space+sizeof(Locals)));
size_t increment = BLOCKSIZE;
if (flags & BT_ReverseDirection)
@ -1293,7 +1302,7 @@ size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo
Rijndael_Enc_AdvancedProcessBlocks(&locals, m_key);
#if (CRYPTOPP_MSC_VERSION >= 1400)
_freea(space);
_freea(originalSpace);
#endif
return length % BLOCKSIZE;