Move Rijndael_UncheckedSetKey_POWER8 prior to GetUserKey call
Arg... GetUserKey was performing a 32-bit word reverse. It was part of the problem on little endian machinespull/484/merge
parent
9fd5d023f9
commit
c6b096ddd4
|
|
@ -1070,7 +1070,13 @@ static const uint32_t s_rcon[3][4] = {
|
||||||
/* Permute mask */
|
/* Permute mask */
|
||||||
CRYPTOPP_ALIGN_DATA(16)
|
CRYPTOPP_ALIGN_DATA(16)
|
||||||
static const uint32_t s_mask[4] = {
|
static const uint32_t s_mask[4] = {
|
||||||
|
#if defined(IS_LITTLE_ENDIAN)
|
||||||
|
// 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c
|
||||||
|
// 0x01020300, 0x01020300, 0x01020300, 0x01020300
|
||||||
|
0x02010003, 0x02010003, 0x02010003, 0x02010003
|
||||||
|
#else
|
||||||
0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c
|
0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline uint8x16_p8
|
static inline uint8x16_p8
|
||||||
|
|
@ -1097,15 +1103,23 @@ Rijndael_Subkey_POWER8(uint8x16_p8 r1, const uint8x16_p8 r4, const uint8x16_p8 r
|
||||||
return r1;
|
return r1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Rijndael_UncheckedSetKey_POWER8(word32* rk, size_t keyLen, const word32* rc,
|
void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen, word32* rk, const word32* rc,
|
||||||
const byte* Se, unsigned int rounds)
|
const byte* Se, unsigned int rounds)
|
||||||
{
|
{
|
||||||
#if defined(IS_BIG_ENDIAN)
|
#if defined(IS_BIG_ENDIAN)
|
||||||
// Testing shows this is about 125 to 275 cycles faster.
|
// Testing shows this is about 150 to 350 cycles faster.
|
||||||
if (keyLen == 16)
|
if (keyLen == 16)
|
||||||
{
|
{
|
||||||
|
#if defined(IS_BIG_ENDIAN)
|
||||||
uint8_t* skptr = (uint8_t*)rk;
|
uint8_t* skptr = (uint8_t*)rk;
|
||||||
uint8x16_p8 r1 = (uint8x16_p8)VectorLoad((uint8_t*)skptr);
|
std::memcpy(rk, userKey, keyLen);
|
||||||
|
#else
|
||||||
|
uint8_t* skptr = (uint8_t*)rk;
|
||||||
|
std::memcpy(rk, userKey, keyLen);
|
||||||
|
ReverseByteArrayLE(skptr);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
uint8x16_p8 r1 = (uint8x16_p8)VectorLoadKey(skptr);
|
||||||
uint8x16_p8 r4 = (uint8x16_p8)VectorLoadKey(s_rcon[0]);
|
uint8x16_p8 r4 = (uint8x16_p8)VectorLoadKey(s_rcon[0]);
|
||||||
uint8x16_p8 r5 = (uint8x16_p8)VectorLoadKey(s_mask);
|
uint8x16_p8 r5 = (uint8x16_p8)VectorLoadKey(s_mask);
|
||||||
|
|
||||||
|
|
@ -1140,6 +1154,7 @@ void Rijndael_UncheckedSetKey_POWER8(word32* rk, size_t keyLen, const word32* rc
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
|
GetUserKey(BIG_ENDIAN_ORDER, rk, keyLen/4, userKey, keyLen);
|
||||||
word32 *rk_saved = rk, temp;
|
word32 *rk_saved = rk, temp;
|
||||||
|
|
||||||
// keySize: m_key allocates 4*(rounds+1) word32's.
|
// keySize: m_key allocates 4*(rounds+1) word32's.
|
||||||
|
|
|
||||||
13
rijndael.cpp
13
rijndael.cpp
|
|
@ -253,7 +253,7 @@ extern size_t Rijndael_Dec_AdvancedProcessBlocks_ARMV8(const word32 *subkeys, si
|
||||||
#if (CRYPTOPP_POWER8_AES_AVAILABLE)
|
#if (CRYPTOPP_POWER8_AES_AVAILABLE)
|
||||||
extern void ReverseByteArrayLE(byte src[16]);
|
extern void ReverseByteArrayLE(byte src[16]);
|
||||||
|
|
||||||
extern void Rijndael_UncheckedSetKey_POWER8(word32* rk, size_t keyLen,
|
extern void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen, word32* rk,
|
||||||
const word32* rc, const byte* Se, unsigned int rounds);
|
const word32* rc, const byte* Se, unsigned int rounds);
|
||||||
|
|
||||||
extern size_t Rijndael_Enc_AdvancedProcessBlocks_POWER8(const word32 *subkeys, size_t rounds,
|
extern size_t Rijndael_Enc_AdvancedProcessBlocks_POWER8(const word32 *subkeys, size_t rounds,
|
||||||
|
|
@ -284,18 +284,19 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLen, c
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
GetUserKey(BIG_ENDIAN_ORDER, rk, keyLen/4, userKey, keyLen);
|
|
||||||
const word32 *rc = rcon;
|
|
||||||
word32 temp;
|
|
||||||
|
|
||||||
#if CRYPTOPP_POWER8_AES_AVAILABLE
|
#if CRYPTOPP_POWER8_AES_AVAILABLE
|
||||||
if (HasAES())
|
if (HasAES())
|
||||||
{
|
{
|
||||||
Rijndael_UncheckedSetKey_POWER8(rk, keyLen, rc, Se, m_rounds);
|
// We still need rcon and Se to fallback to C/C++ for AES-192 and AES-256
|
||||||
|
Rijndael_UncheckedSetKey_POWER8(userKey, keyLen, rk, rcon, Se, m_rounds);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
GetUserKey(BIG_ENDIAN_ORDER, rk, keyLen/4, userKey, keyLen);
|
||||||
|
const word32 *rc = rcon;
|
||||||
|
word32 temp;
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
temp = rk[keyLen/4-1];
|
temp = rk[keyLen/4-1];
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue