From de7f4a0894f3458b2f1189682099c1e924165277 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Mon, 13 Aug 2018 01:44:23 -0400 Subject: [PATCH] Fix carry bug in AdvancedProcessBlocks128_6x1_ALTIVEC --- adv-simd.h | 18 ++++++++++++++++-- ppc-simd.h | 4 ++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/adv-simd.h b/adv-simd.h index d9a37538..9c8b7b87 100644 --- a/adv-simd.h +++ b/adv-simd.h @@ -1830,7 +1830,7 @@ inline size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6, { while (length >= 6*blockSize) { - uint32x4_p block0, block1, block2, block3, block4, block5, temp; + uint32x4_p block0, block1, block2, block3, block4, block5; if (flags & BT_InBlockIsCounter) { @@ -1840,7 +1840,21 @@ inline size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6, block3 = VectorAdd(block2, s_one); block4 = VectorAdd(block3, s_one); block5 = VectorAdd(block4, s_one); - temp = VectorAdd(block5, s_one); + + // Hack due to big-endian loads used by POWER8 (and maybe ARM-BE). + // CTR_ModePolicy::OperateKeystream is wired such that after + // returning from this function if the last counter byte is 0 then + // CTR_ModePolicy increments the next to last byte. The problem is, + // with a big-endian load, inBlocks[15] is located at index 15. The + // vector addition using a 32-bit element generates a carry into + // inBlocks[14] and then CTR_ModePolicy increments inBlocks[14] too. + // + // To find this bug we needed a test case with a ctr of 0xNN...FA. + // The last octet is 0xFA and adding 6 creates the wrap to trigger + // the issue. If the last octet was 0xFC then 4 would trigger it. + // We dumb-lucked into the test with SPECK-128. The test case of + // interest is the one with IV 348ECA9766C09F04 826520DE47A212FA. + uint8x16_p temp = VectorAdd((uint8x16_p)block5, (uint8x16_p)s_one); VectorStoreBE(temp, const_cast(inBlocks)); } else diff --git a/ppc-simd.h b/ppc-simd.h index 686b53ca..99d2e5f4 100644 --- a/ppc-simd.h +++ b/ppc-simd.h @@ -421,9 +421,9 @@ inline uint32x4_p VectorLoadBE(int off, const byte src[16]) return (uint32x4_p)vec_xl_be(off, (byte*)src); #else # if defined(CRYPTOPP_BIG_ENDIAN) - return (uint32x4_p)vec_vsx_ld(off, src); + return (uint32x4_p)vec_vsx_ld(off, (byte*)src); # else - return (uint32x4_p)Reverse(vec_vsx_ld(off, src)); + return (uint32x4_p)Reverse(vec_vsx_ld(off, (byte*)src)); # endif #endif }