From 147ecba5df345b796d82841ae61b6fa4b4f5591c Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Mon, 4 Dec 2017 14:52:36 -0500 Subject: [PATCH] Add temp working variable for SPECK64_AdvancedProcessBlocks_SSE41 Avoid potential undefined behavior by using aligned words --- speck-simd.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/speck-simd.cpp b/speck-simd.cpp index 75907f47..7275a33a 100644 --- a/speck-simd.cpp +++ b/speck-simd.cpp @@ -1312,6 +1312,7 @@ inline size_t SPECK64_AdvancedProcessBlocks_SSE41(F1 func1, F4 func4, size_t inIncrement = (flags & (BlockTransformation::BT_InBlockIsCounter|BlockTransformation::BT_DontIncrementInOutPointers)) ? 0 : blockSize; size_t xorIncrement = xorBlocks ? blockSize : 0; size_t outIncrement = (flags & BlockTransformation::BT_DontIncrementInOutPointers) ? 0 : blockSize; + word32 temp[2]; if (flags & BlockTransformation::BT_ReverseDirection) { @@ -1379,15 +1380,15 @@ inline size_t SPECK64_AdvancedProcessBlocks_SSE41(F1 func1, F4 func4, while (length >= blockSize) { - const word32* inPtr = reinterpret_cast(inBlocks); - __m128i block = _mm_insert_epi32(_mm_setzero_si128(), inPtr[0], 0); - block = _mm_insert_epi32(block, inPtr[1], 1); + std::memcpy(&temp, inBlocks, sizeof(temp)); + __m128i block = _mm_insert_epi32(_mm_setzero_si128(), temp[0], 0); + block = _mm_insert_epi32(block, temp[1], 1); if (flags & BlockTransformation::BT_XorInput) { - const word32* xorPtr = reinterpret_cast(xorBlocks); - __m128i x = _mm_insert_epi32(_mm_setzero_si128(), xorPtr[0], 0); - block = _mm_xor_si128(block, _mm_insert_epi32(x, xorPtr[1], 1)); + std::memcpy(&temp, xorBlocks, sizeof(temp)); + __m128i x = _mm_insert_epi32(_mm_setzero_si128(), temp[0], 0); + block = _mm_xor_si128(block, _mm_insert_epi32(x, temp[1], 1)); } if (flags & BlockTransformation::BT_InBlockIsCounter) @@ -1397,15 +1398,14 @@ inline size_t SPECK64_AdvancedProcessBlocks_SSE41(F1 func1, F4 func4, if (xorBlocks && !(flags & BlockTransformation::BT_XorInput)) { - const word32* xorPtr = reinterpret_cast(xorBlocks); - __m128i x = _mm_insert_epi32(_mm_setzero_si128(), xorPtr[0], 0); - block = _mm_xor_si128(block, _mm_insert_epi32(x, xorPtr[1], 1)); + std::memcpy(&temp, xorBlocks, sizeof(temp)); + __m128i x = _mm_insert_epi32(_mm_setzero_si128(), temp[0], 0); + block = _mm_xor_si128(block, _mm_insert_epi32(x, temp[1], 1)); } - word32 t[2]; - t[0] = _mm_extract_epi32(block, 0); - t[1] = _mm_extract_epi32(block, 1); - std::memcpy(outBlocks, t, sizeof(t)); + temp[0] = _mm_extract_epi32(block, 0); + temp[1] = _mm_extract_epi32(block, 1); + std::memcpy(outBlocks, temp, sizeof(temp)); inBlocks += inIncrement; outBlocks += outIncrement;