Add temp working variable for SPECK64_AdvancedProcessBlocks_SSE41

Avoid potential undefined behavior by using aligned words
pull/548/head
Jeffrey Walton 2017-12-04 14:52:36 -05:00
parent 076937eb81
commit 147ecba5df
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
1 changed files with 13 additions and 13 deletions

View File

@ -1312,6 +1312,7 @@ inline size_t SPECK64_AdvancedProcessBlocks_SSE41(F1 func1, F4 func4,
size_t inIncrement = (flags & (BlockTransformation::BT_InBlockIsCounter|BlockTransformation::BT_DontIncrementInOutPointers)) ? 0 : blockSize;
size_t xorIncrement = xorBlocks ? blockSize : 0;
size_t outIncrement = (flags & BlockTransformation::BT_DontIncrementInOutPointers) ? 0 : blockSize;
word32 temp[2];
if (flags & BlockTransformation::BT_ReverseDirection)
{
@ -1379,15 +1380,15 @@ inline size_t SPECK64_AdvancedProcessBlocks_SSE41(F1 func1, F4 func4,
while (length >= blockSize)
{
const word32* inPtr = reinterpret_cast<const word32*>(inBlocks);
__m128i block = _mm_insert_epi32(_mm_setzero_si128(), inPtr[0], 0);
block = _mm_insert_epi32(block, inPtr[1], 1);
std::memcpy(&temp, inBlocks, sizeof(temp));
__m128i block = _mm_insert_epi32(_mm_setzero_si128(), temp[0], 0);
block = _mm_insert_epi32(block, temp[1], 1);
if (flags & BlockTransformation::BT_XorInput)
{
const word32* xorPtr = reinterpret_cast<const word32*>(xorBlocks);
__m128i x = _mm_insert_epi32(_mm_setzero_si128(), xorPtr[0], 0);
block = _mm_xor_si128(block, _mm_insert_epi32(x, xorPtr[1], 1));
std::memcpy(&temp, xorBlocks, sizeof(temp));
__m128i x = _mm_insert_epi32(_mm_setzero_si128(), temp[0], 0);
block = _mm_xor_si128(block, _mm_insert_epi32(x, temp[1], 1));
}
if (flags & BlockTransformation::BT_InBlockIsCounter)
@ -1397,15 +1398,14 @@ inline size_t SPECK64_AdvancedProcessBlocks_SSE41(F1 func1, F4 func4,
if (xorBlocks && !(flags & BlockTransformation::BT_XorInput))
{
const word32* xorPtr = reinterpret_cast<const word32*>(xorBlocks);
__m128i x = _mm_insert_epi32(_mm_setzero_si128(), xorPtr[0], 0);
block = _mm_xor_si128(block, _mm_insert_epi32(x, xorPtr[1], 1));
std::memcpy(&temp, xorBlocks, sizeof(temp));
__m128i x = _mm_insert_epi32(_mm_setzero_si128(), temp[0], 0);
block = _mm_xor_si128(block, _mm_insert_epi32(x, temp[1], 1));
}
word32 t[2];
t[0] = _mm_extract_epi32(block, 0);
t[1] = _mm_extract_epi32(block, 1);
std::memcpy(outBlocks, t, sizeof(t));
temp[0] = _mm_extract_epi32(block, 0);
temp[1] = _mm_extract_epi32(block, 1);
std::memcpy(outBlocks, temp, sizeof(temp));
inBlocks += inIncrement;
outBlocks += outIncrement;