Cleanup signed integer overflow on ppc64 (GH #588)
The code below was flagged by undefined behavior santizier under GCC 8. The offender was the doubling at "r4 = vec_add(r4, r4)". R4 is rcon and an unsigned type. It depends on integer wrap but GCC is generating code that is being flagged for signed overflow. GCC 7 and below is OK.
for (unsigned int i=0; i<8; ++i)
{
r1 = Rijndael_Subkey_POWER8(r1, r4, r5);
r4 = vec_add(r4, r4);
skptr = IncrementPointerAndStore(r1, skptr);
}
// Final two rounds using table lookup
...
pull/589/head
parent
48033dac0a
commit
5b09d46665
|
|
@ -709,96 +709,61 @@ void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen, word32*
|
|||
const word32* rc, const byte* Se)
|
||||
{
|
||||
const size_t rounds = keyLen / 4 + 6;
|
||||
if (keyLen == 16)
|
||||
{
|
||||
std::memcpy(rk, userKey, keyLen);
|
||||
uint8_t* skptr = (uint8_t*)rk;
|
||||
GetUserKey(BIG_ENDIAN_ORDER, rk, keyLen/4, userKey, keyLen);
|
||||
word32 *rk_saved = rk, temp;
|
||||
|
||||
uint8x16_p r1 = (uint8x16_p)VectorLoadKey(skptr);
|
||||
uint8x16_p r4 = (uint8x16_p)VectorLoadKey(s_rcon[0]);
|
||||
uint8x16_p r5 = (uint8x16_p)VectorLoadKey(s_mask);
|
||||
// keySize: m_key allocates 4*(rounds+1) word32's.
|
||||
const size_t keySize = 4*(rounds+1);
|
||||
const word32* end = rk + keySize;
|
||||
|
||||
while (true)
|
||||
{
|
||||
temp = rk[keyLen/4-1];
|
||||
word32 x = (word32(Se[GETBYTE(temp, 2)]) << 24) ^ (word32(Se[GETBYTE(temp, 1)]) << 16) ^
|
||||
(word32(Se[GETBYTE(temp, 0)]) << 8) ^ Se[GETBYTE(temp, 3)];
|
||||
rk[keyLen/4] = rk[0] ^ x ^ *(rc++);
|
||||
rk[keyLen/4+1] = rk[1] ^ rk[keyLen/4];
|
||||
rk[keyLen/4+2] = rk[2] ^ rk[keyLen/4+1];
|
||||
rk[keyLen/4+3] = rk[3] ^ rk[keyLen/4+2];
|
||||
|
||||
if (rk + keyLen/4 + 4 == end)
|
||||
break;
|
||||
|
||||
if (keyLen == 24)
|
||||
{
|
||||
rk[10] = rk[ 4] ^ rk[ 9];
|
||||
rk[11] = rk[ 5] ^ rk[10];
|
||||
}
|
||||
else if (keyLen == 32)
|
||||
{
|
||||
temp = rk[11];
|
||||
rk[12] = rk[ 4] ^ (word32(Se[GETBYTE(temp, 3)]) << 24) ^ (word32(Se[GETBYTE(temp, 2)]) << 16) ^ (word32(Se[GETBYTE(temp, 1)]) << 8) ^ Se[GETBYTE(temp, 0)];
|
||||
rk[13] = rk[ 5] ^ rk[12];
|
||||
rk[14] = rk[ 6] ^ rk[13];
|
||||
rk[15] = rk[ 7] ^ rk[14];
|
||||
}
|
||||
rk += keyLen/4;
|
||||
}
|
||||
|
||||
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||
// Only the user key requires byte reversing.
|
||||
// The subkeys are stored in proper endianess.
|
||||
ReverseByteArrayLE(skptr);
|
||||
rk = rk_saved;
|
||||
const uint8x16_p mask = ((uint8x16_p){12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3});
|
||||
const uint8x16_p zero = {0};
|
||||
|
||||
unsigned int i=0;
|
||||
for (i=0; i<rounds; i+=2, rk+=8)
|
||||
{
|
||||
uint8x16_p d1 = vec_vsx_ld( 0, (uint8_t*)rk);
|
||||
uint8x16_p d2 = vec_vsx_ld(16, (uint8_t*)rk);
|
||||
d1 = vec_perm(d1, zero, mask);
|
||||
d2 = vec_perm(d2, zero, mask);
|
||||
vec_vsx_st(d1, 0, (uint8_t*)rk);
|
||||
vec_vsx_st(d2, 16, (uint8_t*)rk);
|
||||
}
|
||||
|
||||
for ( ; i<rounds+1; i++, rk+=4)
|
||||
vec_vsx_st(vec_perm(vec_vsx_ld(0, (uint8_t*)rk), zero, mask), 0, (uint8_t*)rk);
|
||||
#endif
|
||||
|
||||
for (unsigned int i=0; i<rounds-2; ++i)
|
||||
{
|
||||
r1 = Rijndael_Subkey_POWER8(r1, r4, r5);
|
||||
r4 = vec_add(r4, r4);
|
||||
skptr = IncrementPointerAndStore(r1, skptr);
|
||||
}
|
||||
|
||||
/* Round 9 using rcon=0x1b */
|
||||
r4 = (uint8x16_p)VectorLoadKey(s_rcon[1]);
|
||||
r1 = Rijndael_Subkey_POWER8(r1, r4, r5);
|
||||
skptr = IncrementPointerAndStore(r1, skptr);
|
||||
|
||||
/* Round 10 using rcon=0x36 */
|
||||
r4 = (uint8x16_p)VectorLoadKey(s_rcon[2]);
|
||||
r1 = Rijndael_Subkey_POWER8(r1, r4, r5);
|
||||
skptr = IncrementPointerAndStore(r1, skptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
GetUserKey(BIG_ENDIAN_ORDER, rk, keyLen/4, userKey, keyLen);
|
||||
word32 *rk_saved = rk, temp;
|
||||
|
||||
// keySize: m_key allocates 4*(rounds+1) word32's.
|
||||
const size_t keySize = 4*(rounds+1);
|
||||
const word32* end = rk + keySize;
|
||||
|
||||
while (true)
|
||||
{
|
||||
temp = rk[keyLen/4-1];
|
||||
word32 x = (word32(Se[GETBYTE(temp, 2)]) << 24) ^ (word32(Se[GETBYTE(temp, 1)]) << 16) ^
|
||||
(word32(Se[GETBYTE(temp, 0)]) << 8) ^ Se[GETBYTE(temp, 3)];
|
||||
rk[keyLen/4] = rk[0] ^ x ^ *(rc++);
|
||||
rk[keyLen/4+1] = rk[1] ^ rk[keyLen/4];
|
||||
rk[keyLen/4+2] = rk[2] ^ rk[keyLen/4+1];
|
||||
rk[keyLen/4+3] = rk[3] ^ rk[keyLen/4+2];
|
||||
|
||||
if (rk + keyLen/4 + 4 == end)
|
||||
break;
|
||||
|
||||
if (keyLen == 24)
|
||||
{
|
||||
rk[10] = rk[ 4] ^ rk[ 9];
|
||||
rk[11] = rk[ 5] ^ rk[10];
|
||||
}
|
||||
else if (keyLen == 32)
|
||||
{
|
||||
temp = rk[11];
|
||||
rk[12] = rk[ 4] ^ (word32(Se[GETBYTE(temp, 3)]) << 24) ^ (word32(Se[GETBYTE(temp, 2)]) << 16) ^ (word32(Se[GETBYTE(temp, 1)]) << 8) ^ Se[GETBYTE(temp, 0)];
|
||||
rk[13] = rk[ 5] ^ rk[12];
|
||||
rk[14] = rk[ 6] ^ rk[13];
|
||||
rk[15] = rk[ 7] ^ rk[14];
|
||||
}
|
||||
rk += keyLen/4;
|
||||
}
|
||||
|
||||
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||
rk = rk_saved;
|
||||
const uint8x16_p mask = ((uint8x16_p){12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3});
|
||||
const uint8x16_p zero = {0};
|
||||
|
||||
unsigned int i=0;
|
||||
for (i=0; i<rounds; i+=2, rk+=8)
|
||||
{
|
||||
uint8x16_p d1 = vec_vsx_ld( 0, (uint8_t*)rk);
|
||||
uint8x16_p d2 = vec_vsx_ld(16, (uint8_t*)rk);
|
||||
d1 = vec_perm(d1, zero, mask);
|
||||
d2 = vec_perm(d2, zero, mask);
|
||||
vec_vsx_st(d1, 0, (uint8_t*)rk);
|
||||
vec_vsx_st(d2, 16, (uint8_t*)rk);
|
||||
}
|
||||
|
||||
for ( ; i<rounds+1; i++, rk+=4)
|
||||
vec_vsx_st(vec_perm(vec_vsx_ld(0, (uint8_t*)rk), zero, mask), 0, (uint8_t*)rk);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
size_t Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(const word32 *subKeys, size_t rounds,
|
||||
|
|
|
|||
Loading…
Reference in New Issue