When calculating the AES block cipher, allocate 4K of memory on the stack instead of 256+ bytes. Search within that 4K space to put the 256-byte aligned Locals struct in a place which does not have 4K cache conflicts with the Te temporary buffer. This permits us to call _malloca() or alloca() once per call of this function. This commit also makes sure that the Microsoft-only _freea() occurs at the correct location instead of at a pointer to the middle of the stack, when the memory allocated by _malloca() or alloca() is not 256-byte aligned.
parent
ddac25ead8
commit
a33b95325f
27
rijndael.cpp
27
rijndael.cpp
|
|
@ -1035,6 +1035,9 @@ void Rijndael_Enc_AdvancedProcessBlocks(void *locals, const word32 *k);
|
||||||
|
|
||||||
#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86
|
#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86
|
||||||
|
|
||||||
|
/* Determine whether the range between begin and end overlaps
|
||||||
|
* with the same 4k block offsets as the Te table.
|
||||||
|
*/
|
||||||
static inline bool AliasedWithTable(const byte *begin, const byte *end)
|
static inline bool AliasedWithTable(const byte *begin, const byte *end)
|
||||||
{
|
{
|
||||||
size_t s0 = size_t(begin)%4096, s1 = size_t(end)%4096;
|
size_t s0 = size_t(begin)%4096, s1 = size_t(end)%4096;
|
||||||
|
|
@ -1250,19 +1253,25 @@ size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo
|
||||||
};
|
};
|
||||||
|
|
||||||
const byte* zeros = (byte *)(Te+256);
|
const byte* zeros = (byte *)(Te+256);
|
||||||
byte *space = NULL;
|
byte *space = NULL, *originalSpace = NULL;
|
||||||
|
|
||||||
do {
|
const size_t aliasPageSize = 4096;
|
||||||
|
const size_t aliasBlockSize = 256;
|
||||||
|
const size_t sizeToAllocate = aliasPageSize + aliasBlockSize + sizeof(Locals);
|
||||||
#if (CRYPTOPP_MSC_VERSION >= 1400)
|
#if (CRYPTOPP_MSC_VERSION >= 1400)
|
||||||
// http://msdn.microsoft.com/en-us/library/5471dc8s.aspx
|
originalSpace = (byte *)_malloca(sizeToAllocate);
|
||||||
space = (byte *)_malloca(255+sizeof(Locals));
|
|
||||||
space += (256-(size_t)space%256)%256;
|
|
||||||
#else
|
#else
|
||||||
space = (byte *)alloca(255+sizeof(Locals));
|
originalSpace = (byte *)alloca(sizeToAllocate);
|
||||||
space += (256-(size_t)space%256)%256;
|
|
||||||
#endif
|
#endif
|
||||||
|
/* round up to nearest 256 byte boundary */
|
||||||
|
space = originalSpace +
|
||||||
|
(aliasBlockSize - (size_t)originalSpace % aliasBlockSize)
|
||||||
|
% aliasBlockSize;
|
||||||
|
while (AliasedWithTable(space, space + sizeof(Locals)))
|
||||||
|
{
|
||||||
|
space += 256;
|
||||||
|
CRYPTOPP_ASSERT(space < (originalSpace + aliasPageSize));
|
||||||
}
|
}
|
||||||
while (AliasedWithTable(space, space+sizeof(Locals)));
|
|
||||||
|
|
||||||
size_t increment = BLOCKSIZE;
|
size_t increment = BLOCKSIZE;
|
||||||
if (flags & BT_ReverseDirection)
|
if (flags & BT_ReverseDirection)
|
||||||
|
|
@ -1293,7 +1302,7 @@ size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo
|
||||||
Rijndael_Enc_AdvancedProcessBlocks(&locals, m_key);
|
Rijndael_Enc_AdvancedProcessBlocks(&locals, m_key);
|
||||||
|
|
||||||
#if (CRYPTOPP_MSC_VERSION >= 1400)
|
#if (CRYPTOPP_MSC_VERSION >= 1400)
|
||||||
_freea(space);
|
_freea(originalSpace);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return length % BLOCKSIZE;
|
return length % BLOCKSIZE;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue