Cleanup signed integer overflow on ppc64 (GH #588)
The code below was flagged by undefined behavior santizier under GCC 8. The offender was the doubling at "r4 = vec_add(r4, r4)". R4 is rcon and an unsigned type. It depends on integer wrap but GCC is generating code that is being flagged for signed overflow. GCC 7 and below is OK.
for (unsigned int i=0; i<8; ++i)
{
r1 = Rijndael_Subkey_POWER8(r1, r4, r5);
r4 = vec_add(r4, r4);
skptr = IncrementPointerAndStore(r1, skptr);
}
// Final two rounds using table lookup
...
pull/589/head
parent
48033dac0a
commit
5b09d46665
|
|
@ -709,96 +709,61 @@ void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen, word32*
|
||||||
const word32* rc, const byte* Se)
|
const word32* rc, const byte* Se)
|
||||||
{
|
{
|
||||||
const size_t rounds = keyLen / 4 + 6;
|
const size_t rounds = keyLen / 4 + 6;
|
||||||
if (keyLen == 16)
|
GetUserKey(BIG_ENDIAN_ORDER, rk, keyLen/4, userKey, keyLen);
|
||||||
{
|
word32 *rk_saved = rk, temp;
|
||||||
std::memcpy(rk, userKey, keyLen);
|
|
||||||
uint8_t* skptr = (uint8_t*)rk;
|
|
||||||
|
|
||||||
uint8x16_p r1 = (uint8x16_p)VectorLoadKey(skptr);
|
// keySize: m_key allocates 4*(rounds+1) word32's.
|
||||||
uint8x16_p r4 = (uint8x16_p)VectorLoadKey(s_rcon[0]);
|
const size_t keySize = 4*(rounds+1);
|
||||||
uint8x16_p r5 = (uint8x16_p)VectorLoadKey(s_mask);
|
const word32* end = rk + keySize;
|
||||||
|
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
temp = rk[keyLen/4-1];
|
||||||
|
word32 x = (word32(Se[GETBYTE(temp, 2)]) << 24) ^ (word32(Se[GETBYTE(temp, 1)]) << 16) ^
|
||||||
|
(word32(Se[GETBYTE(temp, 0)]) << 8) ^ Se[GETBYTE(temp, 3)];
|
||||||
|
rk[keyLen/4] = rk[0] ^ x ^ *(rc++);
|
||||||
|
rk[keyLen/4+1] = rk[1] ^ rk[keyLen/4];
|
||||||
|
rk[keyLen/4+2] = rk[2] ^ rk[keyLen/4+1];
|
||||||
|
rk[keyLen/4+3] = rk[3] ^ rk[keyLen/4+2];
|
||||||
|
|
||||||
|
if (rk + keyLen/4 + 4 == end)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (keyLen == 24)
|
||||||
|
{
|
||||||
|
rk[10] = rk[ 4] ^ rk[ 9];
|
||||||
|
rk[11] = rk[ 5] ^ rk[10];
|
||||||
|
}
|
||||||
|
else if (keyLen == 32)
|
||||||
|
{
|
||||||
|
temp = rk[11];
|
||||||
|
rk[12] = rk[ 4] ^ (word32(Se[GETBYTE(temp, 3)]) << 24) ^ (word32(Se[GETBYTE(temp, 2)]) << 16) ^ (word32(Se[GETBYTE(temp, 1)]) << 8) ^ Se[GETBYTE(temp, 0)];
|
||||||
|
rk[13] = rk[ 5] ^ rk[12];
|
||||||
|
rk[14] = rk[ 6] ^ rk[13];
|
||||||
|
rk[15] = rk[ 7] ^ rk[14];
|
||||||
|
}
|
||||||
|
rk += keyLen/4;
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||||
// Only the user key requires byte reversing.
|
rk = rk_saved;
|
||||||
// The subkeys are stored in proper endianess.
|
const uint8x16_p mask = ((uint8x16_p){12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3});
|
||||||
ReverseByteArrayLE(skptr);
|
const uint8x16_p zero = {0};
|
||||||
|
|
||||||
|
unsigned int i=0;
|
||||||
|
for (i=0; i<rounds; i+=2, rk+=8)
|
||||||
|
{
|
||||||
|
uint8x16_p d1 = vec_vsx_ld( 0, (uint8_t*)rk);
|
||||||
|
uint8x16_p d2 = vec_vsx_ld(16, (uint8_t*)rk);
|
||||||
|
d1 = vec_perm(d1, zero, mask);
|
||||||
|
d2 = vec_perm(d2, zero, mask);
|
||||||
|
vec_vsx_st(d1, 0, (uint8_t*)rk);
|
||||||
|
vec_vsx_st(d2, 16, (uint8_t*)rk);
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( ; i<rounds+1; i++, rk+=4)
|
||||||
|
vec_vsx_st(vec_perm(vec_vsx_ld(0, (uint8_t*)rk), zero, mask), 0, (uint8_t*)rk);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (unsigned int i=0; i<rounds-2; ++i)
|
|
||||||
{
|
|
||||||
r1 = Rijndael_Subkey_POWER8(r1, r4, r5);
|
|
||||||
r4 = vec_add(r4, r4);
|
|
||||||
skptr = IncrementPointerAndStore(r1, skptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Round 9 using rcon=0x1b */
|
|
||||||
r4 = (uint8x16_p)VectorLoadKey(s_rcon[1]);
|
|
||||||
r1 = Rijndael_Subkey_POWER8(r1, r4, r5);
|
|
||||||
skptr = IncrementPointerAndStore(r1, skptr);
|
|
||||||
|
|
||||||
/* Round 10 using rcon=0x36 */
|
|
||||||
r4 = (uint8x16_p)VectorLoadKey(s_rcon[2]);
|
|
||||||
r1 = Rijndael_Subkey_POWER8(r1, r4, r5);
|
|
||||||
skptr = IncrementPointerAndStore(r1, skptr);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
GetUserKey(BIG_ENDIAN_ORDER, rk, keyLen/4, userKey, keyLen);
|
|
||||||
word32 *rk_saved = rk, temp;
|
|
||||||
|
|
||||||
// keySize: m_key allocates 4*(rounds+1) word32's.
|
|
||||||
const size_t keySize = 4*(rounds+1);
|
|
||||||
const word32* end = rk + keySize;
|
|
||||||
|
|
||||||
while (true)
|
|
||||||
{
|
|
||||||
temp = rk[keyLen/4-1];
|
|
||||||
word32 x = (word32(Se[GETBYTE(temp, 2)]) << 24) ^ (word32(Se[GETBYTE(temp, 1)]) << 16) ^
|
|
||||||
(word32(Se[GETBYTE(temp, 0)]) << 8) ^ Se[GETBYTE(temp, 3)];
|
|
||||||
rk[keyLen/4] = rk[0] ^ x ^ *(rc++);
|
|
||||||
rk[keyLen/4+1] = rk[1] ^ rk[keyLen/4];
|
|
||||||
rk[keyLen/4+2] = rk[2] ^ rk[keyLen/4+1];
|
|
||||||
rk[keyLen/4+3] = rk[3] ^ rk[keyLen/4+2];
|
|
||||||
|
|
||||||
if (rk + keyLen/4 + 4 == end)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (keyLen == 24)
|
|
||||||
{
|
|
||||||
rk[10] = rk[ 4] ^ rk[ 9];
|
|
||||||
rk[11] = rk[ 5] ^ rk[10];
|
|
||||||
}
|
|
||||||
else if (keyLen == 32)
|
|
||||||
{
|
|
||||||
temp = rk[11];
|
|
||||||
rk[12] = rk[ 4] ^ (word32(Se[GETBYTE(temp, 3)]) << 24) ^ (word32(Se[GETBYTE(temp, 2)]) << 16) ^ (word32(Se[GETBYTE(temp, 1)]) << 8) ^ Se[GETBYTE(temp, 0)];
|
|
||||||
rk[13] = rk[ 5] ^ rk[12];
|
|
||||||
rk[14] = rk[ 6] ^ rk[13];
|
|
||||||
rk[15] = rk[ 7] ^ rk[14];
|
|
||||||
}
|
|
||||||
rk += keyLen/4;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
|
||||||
rk = rk_saved;
|
|
||||||
const uint8x16_p mask = ((uint8x16_p){12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3});
|
|
||||||
const uint8x16_p zero = {0};
|
|
||||||
|
|
||||||
unsigned int i=0;
|
|
||||||
for (i=0; i<rounds; i+=2, rk+=8)
|
|
||||||
{
|
|
||||||
uint8x16_p d1 = vec_vsx_ld( 0, (uint8_t*)rk);
|
|
||||||
uint8x16_p d2 = vec_vsx_ld(16, (uint8_t*)rk);
|
|
||||||
d1 = vec_perm(d1, zero, mask);
|
|
||||||
d2 = vec_perm(d2, zero, mask);
|
|
||||||
vec_vsx_st(d1, 0, (uint8_t*)rk);
|
|
||||||
vec_vsx_st(d2, 16, (uint8_t*)rk);
|
|
||||||
}
|
|
||||||
|
|
||||||
for ( ; i<rounds+1; i++, rk+=4)
|
|
||||||
vec_vsx_st(vec_perm(vec_vsx_ld(0, (uint8_t*)rk), zero, mask), 0, (uint8_t*)rk);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(const word32 *subKeys, size_t rounds,
|
size_t Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(const word32 *subKeys, size_t rounds,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue