Add temp working variable for SPECK64_AdvancedProcessBlocks_SSE41
Avoid potential undefined behavior by using aligned wordspull/548/head
parent
076937eb81
commit
147ecba5df
|
|
@ -1312,6 +1312,7 @@ inline size_t SPECK64_AdvancedProcessBlocks_SSE41(F1 func1, F4 func4,
|
||||||
size_t inIncrement = (flags & (BlockTransformation::BT_InBlockIsCounter|BlockTransformation::BT_DontIncrementInOutPointers)) ? 0 : blockSize;
|
size_t inIncrement = (flags & (BlockTransformation::BT_InBlockIsCounter|BlockTransformation::BT_DontIncrementInOutPointers)) ? 0 : blockSize;
|
||||||
size_t xorIncrement = xorBlocks ? blockSize : 0;
|
size_t xorIncrement = xorBlocks ? blockSize : 0;
|
||||||
size_t outIncrement = (flags & BlockTransformation::BT_DontIncrementInOutPointers) ? 0 : blockSize;
|
size_t outIncrement = (flags & BlockTransformation::BT_DontIncrementInOutPointers) ? 0 : blockSize;
|
||||||
|
word32 temp[2];
|
||||||
|
|
||||||
if (flags & BlockTransformation::BT_ReverseDirection)
|
if (flags & BlockTransformation::BT_ReverseDirection)
|
||||||
{
|
{
|
||||||
|
|
@ -1379,15 +1380,15 @@ inline size_t SPECK64_AdvancedProcessBlocks_SSE41(F1 func1, F4 func4,
|
||||||
|
|
||||||
while (length >= blockSize)
|
while (length >= blockSize)
|
||||||
{
|
{
|
||||||
const word32* inPtr = reinterpret_cast<const word32*>(inBlocks);
|
std::memcpy(&temp, inBlocks, sizeof(temp));
|
||||||
__m128i block = _mm_insert_epi32(_mm_setzero_si128(), inPtr[0], 0);
|
__m128i block = _mm_insert_epi32(_mm_setzero_si128(), temp[0], 0);
|
||||||
block = _mm_insert_epi32(block, inPtr[1], 1);
|
block = _mm_insert_epi32(block, temp[1], 1);
|
||||||
|
|
||||||
if (flags & BlockTransformation::BT_XorInput)
|
if (flags & BlockTransformation::BT_XorInput)
|
||||||
{
|
{
|
||||||
const word32* xorPtr = reinterpret_cast<const word32*>(xorBlocks);
|
std::memcpy(&temp, xorBlocks, sizeof(temp));
|
||||||
__m128i x = _mm_insert_epi32(_mm_setzero_si128(), xorPtr[0], 0);
|
__m128i x = _mm_insert_epi32(_mm_setzero_si128(), temp[0], 0);
|
||||||
block = _mm_xor_si128(block, _mm_insert_epi32(x, xorPtr[1], 1));
|
block = _mm_xor_si128(block, _mm_insert_epi32(x, temp[1], 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & BlockTransformation::BT_InBlockIsCounter)
|
if (flags & BlockTransformation::BT_InBlockIsCounter)
|
||||||
|
|
@ -1397,15 +1398,14 @@ inline size_t SPECK64_AdvancedProcessBlocks_SSE41(F1 func1, F4 func4,
|
||||||
|
|
||||||
if (xorBlocks && !(flags & BlockTransformation::BT_XorInput))
|
if (xorBlocks && !(flags & BlockTransformation::BT_XorInput))
|
||||||
{
|
{
|
||||||
const word32* xorPtr = reinterpret_cast<const word32*>(xorBlocks);
|
std::memcpy(&temp, xorBlocks, sizeof(temp));
|
||||||
__m128i x = _mm_insert_epi32(_mm_setzero_si128(), xorPtr[0], 0);
|
__m128i x = _mm_insert_epi32(_mm_setzero_si128(), temp[0], 0);
|
||||||
block = _mm_xor_si128(block, _mm_insert_epi32(x, xorPtr[1], 1));
|
block = _mm_xor_si128(block, _mm_insert_epi32(x, temp[1], 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
word32 t[2];
|
temp[0] = _mm_extract_epi32(block, 0);
|
||||||
t[0] = _mm_extract_epi32(block, 0);
|
temp[1] = _mm_extract_epi32(block, 1);
|
||||||
t[1] = _mm_extract_epi32(block, 1);
|
std::memcpy(outBlocks, temp, sizeof(temp));
|
||||||
std::memcpy(outBlocks, t, sizeof(t));
|
|
||||||
|
|
||||||
inBlocks += inIncrement;
|
inBlocks += inIncrement;
|
||||||
outBlocks += outIncrement;
|
outBlocks += outIncrement;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue