diff --git a/adv-simd.h b/adv-simd.h index c9632b0a..7bd98e6a 100644 --- a/adv-simd.h +++ b/adv-simd.h @@ -64,18 +64,6 @@ CRYPTOPP_CONSTANT(BT_InBlockIsCounter = BlockTransformation::BT_InBlockIsCounter CRYPTOPP_CONSTANT(BT_ReverseDirection = BlockTransformation::BT_ReverseDirection) CRYPTOPP_CONSTANT(BT_DontIncrementInOutPointers = BlockTransformation::BT_DontIncrementInOutPointers) -// Coverity finding on xorBlocks. While not obvious, xorBlocks is -// always non-NULL when BT_XorInput is set. All callers follow the -// convention. Also see https://stackoverflow.com/q/33719379/608639. -inline word32 XorBlocksToFlags(const byte* xorBlocks, word32 flags) -{ -#if defined(__COVERITY__) - return xorBlocks ? (flags) : (flags &= ~BT_XorInput); -#else - return CRYPTOPP_UNUSED(xorBlocks), flags; -#endif -} - ANONYMOUS_NAMESPACE_END // *************************** ARM NEON ************************** // @@ -121,9 +109,12 @@ inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6, const ptrdiff_t neonBlockSize = 16; ptrdiff_t inIncrement = (flags & (BT_InBlockIsCounter|BT_DontIncrementInOutPointers)) ? 0 : neonBlockSize; - ptrdiff_t xorIncrement = xorBlocks ? neonBlockSize : 0; + ptrdiff_t xorIncrement = (xorBlocks != NULLPTR) ? neonBlockSize : 0; ptrdiff_t outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : neonBlockSize; - flags = XorBlocksToFlags(xorBlocks, flags); // Coverity hack + + // Clang and Coverity are generating findings using xorBlocks as a flag. + const bool xorInput = (xorBlocks != NULLPTR) && (flags & BT_XorInput); + const bool xorOutput = (xorBlocks != NULLPTR) && !(flags & BT_XorInput); if (flags & BT_ReverseDirection) { @@ -176,7 +167,7 @@ inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6, inBlocks += inIncrement; } - if (flags & BT_XorInput) + if (xorInput) { block0 = veorq_u32(block0, vreinterpretq_u32_u8(vld1q_u8(xorBlocks))); xorBlocks += xorIncrement; @@ -194,7 +185,7 @@ inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6, func6(block0, block1, block2, block3, block4, block5, subKeys, static_cast(rounds)); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) { block0 = veorq_u32(block0, vreinterpretq_u32_u8(vld1q_u8(xorBlocks))); xorBlocks += xorIncrement; @@ -253,7 +244,7 @@ inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6, inBlocks += inIncrement; } - if (flags & BT_XorInput) + if (xorInput) { block0 = veorq_u32(block0, vreinterpretq_u32_u8(vld1q_u8(xorBlocks))); xorBlocks += xorIncrement; @@ -263,7 +254,7 @@ inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6, func2(block0, block1, subKeys, static_cast(rounds)); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) { block0 = veorq_u32(block0, vreinterpretq_u32_u8(vld1q_u8(xorBlocks))); xorBlocks += xorIncrement; @@ -306,7 +297,7 @@ inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6, const uint8x8_t v = vld1_u8(inBlocks); block = vreinterpretq_u32_u8(vcombine_u8(v,v)); - if (flags & BT_XorInput) + if (xorInput) { const uint8x8_t x = vld1_u8(xorBlocks); block = veorq_u32(block, vreinterpretq_u32_u8(vcombine_u8(x,x))); @@ -317,7 +308,7 @@ inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6, func2(block, zero, subKeys, static_cast(rounds)); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) { const uint8x8_t x = vld1_u8(xorBlocks); block = veorq_u32(block, vreinterpretq_u32_u8(vcombine_u8(x,x))); @@ -350,9 +341,12 @@ size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6, // const ptrdiff_t neonBlockSize = 16; ptrdiff_t inIncrement = (flags & (BT_InBlockIsCounter|BT_DontIncrementInOutPointers)) ? 0 : blockSize; - ptrdiff_t xorIncrement = xorBlocks ? blockSize : 0; + ptrdiff_t xorIncrement = (xorBlocks != NULLPTR) ? blockSize : 0; ptrdiff_t outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : blockSize; - flags = XorBlocksToFlags(xorBlocks, flags); // Coverity hack + + // Clang and Coverity are generating findings using xorBlocks as a flag. + const bool xorInput = (xorBlocks != NULLPTR) && (flags & BT_XorInput); + const bool xorOutput = (xorBlocks != NULLPTR) && !(flags & BT_XorInput); if (flags & BT_ReverseDirection) { @@ -398,7 +392,7 @@ size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6, inBlocks += inIncrement; } - if (flags & BT_XorInput) + if (xorInput) { block0 = veorq_u64(block0, vreinterpretq_u64_u8(vld1q_u8(xorBlocks))); xorBlocks += xorIncrement; @@ -416,7 +410,7 @@ size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6, func6(block0, block1, block2, block3, block4, block5, subKeys, static_cast(rounds)); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) { block0 = veorq_u64(block0, vreinterpretq_u64_u8(vld1q_u8(xorBlocks))); xorBlocks += xorIncrement; @@ -454,7 +448,7 @@ size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6, uint64x2_t block; block = vreinterpretq_u64_u8(vld1q_u8(inBlocks)); - if (flags & BT_XorInput) + if (xorInput) block = veorq_u64(block, vreinterpretq_u64_u8(vld1q_u8(xorBlocks))); if (flags & BT_InBlockIsCounter) @@ -462,7 +456,7 @@ size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6, func1(block, subKeys, static_cast(rounds)); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) block = veorq_u64(block, vreinterpretq_u64_u8(vld1q_u8(xorBlocks))); vst1q_u8(outBlocks, vreinterpretq_u8_u64(block)); @@ -490,9 +484,12 @@ size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6, // const ptrdiff_t neonBlockSize = 16; ptrdiff_t inIncrement = (flags & (BT_InBlockIsCounter|BT_DontIncrementInOutPointers)) ? 0 : blockSize; - ptrdiff_t xorIncrement = xorBlocks ? blockSize : 0; + ptrdiff_t xorIncrement = (xorBlocks != NULLPTR) ? blockSize : 0; ptrdiff_t outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : blockSize; - flags = XorBlocksToFlags(xorBlocks, flags); // Coverity hack + + // Clang and Coverity are generating findings using xorBlocks as a flag. + const bool xorInput = (xorBlocks != NULLPTR) && (flags & BT_XorInput); + const bool xorOutput = (xorBlocks != NULLPTR) && !(flags & BT_XorInput); if (flags & BT_ReverseDirection) { @@ -538,7 +535,7 @@ size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6, inBlocks += inIncrement; } - if (flags & BT_XorInput) + if (xorInput) { block0 = veorq_u64(block0, vreinterpretq_u64_u8(vld1q_u8(xorBlocks))); xorBlocks += xorIncrement; @@ -556,7 +553,7 @@ size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6, func6(block0, block1, block2, block3, block4, block5, subKeys, static_cast(rounds)); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) { block0 = veorq_u64(block0, vreinterpretq_u64_u8(vld1q_u8(xorBlocks))); xorBlocks += xorIncrement; @@ -608,7 +605,7 @@ size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6, inBlocks += inIncrement; } - if (flags & BT_XorInput) + if (xorInput) { block0 = veorq_u64(block0, vreinterpretq_u64_u8(vld1q_u8(xorBlocks))); xorBlocks += xorIncrement; @@ -618,7 +615,7 @@ size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6, func2(block0, block1, subKeys, static_cast(rounds)); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) { block0 = veorq_u64(block0, vreinterpretq_u64_u8(vld1q_u8(xorBlocks))); xorBlocks += xorIncrement; @@ -640,7 +637,7 @@ size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6, uint64x2_t block, zero = {0,0}; block = vreinterpretq_u64_u8(vld1q_u8(inBlocks)); - if (flags & BT_XorInput) + if (xorInput) block = veorq_u64(block, vreinterpretq_u64_u8(vld1q_u8(xorBlocks))); if (flags & BT_InBlockIsCounter) @@ -648,7 +645,7 @@ size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6, func2(block, zero, subKeys, static_cast(rounds)); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) block = veorq_u64(block, vreinterpretq_u64_u8(vld1q_u8(xorBlocks))); vst1q_u8(outBlocks, vreinterpretq_u8_u64(block)); @@ -726,9 +723,12 @@ inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6, const ptrdiff_t xmmBlockSize = 16; ptrdiff_t inIncrement = (flags & (BT_InBlockIsCounter|BT_DontIncrementInOutPointers)) ? 0 : xmmBlockSize; - ptrdiff_t xorIncrement = xorBlocks ? xmmBlockSize : 0; + ptrdiff_t xorIncrement = (xorBlocks != NULLPTR) ? xmmBlockSize : 0; ptrdiff_t outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : xmmBlockSize; - flags = XorBlocksToFlags(xorBlocks, flags); // Coverity hack + + // Clang and Coverity are generating findings using xorBlocks as a flag. + const bool xorInput = (xorBlocks != NULLPTR) && (flags & BT_XorInput); + const bool xorOutput = (xorBlocks != NULLPTR) && !(flags & BT_XorInput); if (flags & BT_ReverseDirection) { @@ -781,7 +781,7 @@ inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6, inBlocks += inIncrement; } - if (flags & BT_XorInput) + if (xorInput) { block0 = _mm_xor_si128(block0, _mm_loadu_si128(CONST_M128_CAST(xorBlocks))); xorBlocks += xorIncrement; @@ -799,7 +799,7 @@ inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6, func6(block0, block1, block2, block3, block4, block5, subKeys, static_cast(rounds)); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) { block0 = _mm_xor_si128(block0, _mm_loadu_si128(CONST_M128_CAST(xorBlocks))); xorBlocks += xorIncrement; @@ -858,7 +858,7 @@ inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6, inBlocks += inIncrement; } - if (flags & BT_XorInput) + if (xorInput) { block0 = _mm_xor_si128(block0, _mm_loadu_si128(CONST_M128_CAST(xorBlocks))); xorBlocks += xorIncrement; @@ -868,7 +868,7 @@ inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6, func2(block0, block1, subKeys, static_cast(rounds)); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) { block0 = _mm_xor_si128(block0, _mm_loadu_si128(CONST_M128_CAST(xorBlocks))); xorBlocks += xorIncrement; @@ -911,7 +911,7 @@ inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6, // UBsan false positive; mem_addr can be unaligned. _mm_load_sd(CONST_DOUBLE_CAST(inBlocks))); - if (flags & BT_XorInput) + if (xorInput) { block = _mm_xor_si128(block, _mm_castpd_si128( // UBsan false positive; mem_addr can be unaligned. @@ -923,7 +923,7 @@ inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6, func2(block, zero, subKeys, static_cast(rounds)); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) { block = _mm_xor_si128(block, _mm_castpd_si128( // UBsan false positive; mem_addr can be unaligned. @@ -957,9 +957,12 @@ inline size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6, // const ptrdiff_t xmmBlockSize = 16; ptrdiff_t inIncrement = (flags & (BT_InBlockIsCounter|BT_DontIncrementInOutPointers)) ? 0 : blockSize; - ptrdiff_t xorIncrement = xorBlocks ? blockSize : 0; + ptrdiff_t xorIncrement = (xorBlocks != NULLPTR) ? blockSize : 0; ptrdiff_t outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : blockSize; - flags = XorBlocksToFlags(xorBlocks, flags); // Coverity hack + + // Clang and Coverity are generating findings using xorBlocks as a flag. + const bool xorInput = (xorBlocks != NULLPTR) && (flags & BT_XorInput); + const bool xorOutput = (xorBlocks != NULLPTR) && !(flags & BT_XorInput); if (flags & BT_ReverseDirection) { @@ -1003,7 +1006,7 @@ inline size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6, inBlocks += inIncrement; } - if (flags & BT_XorInput) + if (xorInput) { block0 = _mm_xor_si128(block0, _mm_loadu_si128(CONST_M128_CAST(xorBlocks))); xorBlocks += xorIncrement; @@ -1021,7 +1024,7 @@ inline size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6, func6(block0, block1, block2, block3, block4, block5, subKeys, static_cast(rounds)); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) { block0 = _mm_xor_si128(block0, _mm_loadu_si128(CONST_M128_CAST(xorBlocks))); xorBlocks += xorIncrement; @@ -1071,7 +1074,7 @@ inline size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6, inBlocks += inIncrement; } - if (flags & BT_XorInput) + if (xorInput) { block0 = _mm_xor_si128(block0, _mm_loadu_si128(CONST_M128_CAST(xorBlocks))); xorBlocks += xorIncrement; @@ -1081,7 +1084,7 @@ inline size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6, func2(block0, block1, subKeys, static_cast(rounds)); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) { block0 = _mm_xor_si128(block0, _mm_loadu_si128(CONST_M128_CAST(xorBlocks))); xorBlocks += xorIncrement; @@ -1103,7 +1106,7 @@ inline size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6, __m128i block, zero = _mm_setzero_si128(); block = _mm_loadu_si128(CONST_M128_CAST(inBlocks)); - if (flags & BT_XorInput) + if (xorInput) block = _mm_xor_si128(block, _mm_loadu_si128(CONST_M128_CAST(xorBlocks))); if (flags & BT_InBlockIsCounter) @@ -1111,7 +1114,7 @@ inline size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6, func2(block, zero, subKeys, static_cast(rounds)); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) block = _mm_xor_si128(block, _mm_loadu_si128(CONST_M128_CAST(xorBlocks))); _mm_storeu_si128(M128_CAST(outBlocks), block); @@ -1139,9 +1142,12 @@ inline size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4, // const ptrdiff_t xmmBlockSize = 16; ptrdiff_t inIncrement = (flags & (BT_InBlockIsCounter|BT_DontIncrementInOutPointers)) ? 0 : blockSize; - ptrdiff_t xorIncrement = xorBlocks ? blockSize : 0; + ptrdiff_t xorIncrement = (xorBlocks != NULLPTR) ? blockSize : 0; ptrdiff_t outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : blockSize; - flags = XorBlocksToFlags(xorBlocks, flags); // Coverity hack + + // Clang and Coverity are generating findings using xorBlocks as a flag. + const bool xorInput = (xorBlocks != NULLPTR) && (flags & BT_XorInput); + const bool xorOutput = (xorBlocks != NULLPTR) && !(flags & BT_XorInput); if (flags & BT_ReverseDirection) { @@ -1179,7 +1185,7 @@ inline size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4, inBlocks += inIncrement; } - if (flags & BT_XorInput) + if (xorInput) { block0 = _mm_xor_si128(block0, _mm_loadu_si128(CONST_M128_CAST(xorBlocks))); xorBlocks += xorIncrement; @@ -1193,7 +1199,7 @@ inline size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4, func4(block0, block1, block2, block3, subKeys, static_cast(rounds)); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) { block0 = _mm_xor_si128(block0, _mm_loadu_si128(CONST_M128_CAST(xorBlocks))); xorBlocks += xorIncrement; @@ -1222,7 +1228,7 @@ inline size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4, { __m128i block = _mm_loadu_si128(CONST_M128_CAST(inBlocks)); - if (flags & BT_XorInput) + if (xorInput) block = _mm_xor_si128(block, _mm_loadu_si128(CONST_M128_CAST(xorBlocks))); if (flags & BT_InBlockIsCounter) @@ -1230,7 +1236,7 @@ inline size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4, func1(block, subKeys, static_cast(rounds)); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) block = _mm_xor_si128(block, _mm_loadu_si128(CONST_M128_CAST(xorBlocks))); _mm_storeu_si128(M128_CAST(outBlocks), block); @@ -1279,9 +1285,12 @@ size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6, const word32 *su // const ptrdiff_t vexBlockSize = 16; ptrdiff_t inIncrement = (flags & (BT_InBlockIsCounter|BT_DontIncrementInOutPointers)) ? 0 : blockSize; - ptrdiff_t xorIncrement = xorBlocks ? blockSize : 0; + ptrdiff_t xorIncrement = (xorBlocks != NULLPTR) ? blockSize : 0; ptrdiff_t outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : blockSize; - flags = XorBlocksToFlags(xorBlocks, flags); // Coverity hack + + // Clang and Coverity are generating findings using xorBlocks as a flag. + const bool xorInput = (xorBlocks != NULLPTR) && (flags & BT_XorInput); + const bool xorOutput = (xorBlocks != NULLPTR) && !(flags & BT_XorInput); if (flags & BT_ReverseDirection) { @@ -1326,7 +1335,7 @@ size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6, const word32 *su inBlocks += inIncrement; } - if (flags & BT_XorInput) + if (xorInput) { block0 = VectorXor(block0, VectorLoad(xorBlocks)); xorBlocks += xorIncrement; @@ -1344,7 +1353,7 @@ size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6, const word32 *su func6(block0, block1, block2, block3, block4, block5, subKeys, rounds); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) { block0 = VectorXor(block0, VectorLoad(xorBlocks)); xorBlocks += xorIncrement; @@ -1381,7 +1390,7 @@ size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6, const word32 *su { uint32x4_p block = VectorLoad(inBlocks); - if (flags & BT_XorInput) + if (xorInput) block = VectorXor(block, VectorLoad(xorBlocks)); if (flags & BT_InBlockIsCounter) @@ -1389,7 +1398,7 @@ size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6, const word32 *su func1(block, subKeys, rounds); - if (xorBlocks && !(flags & BT_XorInput)) + if (xorOutput) block = VectorXor(block, VectorLoad(xorBlocks)); VectorStore(block, outBlocks);