Remove unneeded temp[] array

pull/548/head
Jeffrey Walton 2017-12-05 20:35:57 -05:00
parent 490701acca
commit e9654192f2
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
2 changed files with 24 additions and 28 deletions

View File

@ -1629,7 +1629,6 @@ inline size_t SIMON64_AdvancedProcessBlocks_SSE41(F2 func2, F4 func4,
size_t inIncrement = (flags & (BlockTransformation::BT_InBlockIsCounter|BlockTransformation::BT_DontIncrementInOutPointers)) ? 0 : xmmBlockSize; size_t inIncrement = (flags & (BlockTransformation::BT_InBlockIsCounter|BlockTransformation::BT_DontIncrementInOutPointers)) ? 0 : xmmBlockSize;
size_t xorIncrement = xorBlocks ? xmmBlockSize : 0; size_t xorIncrement = xorBlocks ? xmmBlockSize : 0;
size_t outIncrement = (flags & BlockTransformation::BT_DontIncrementInOutPointers) ? 0 : xmmBlockSize; size_t outIncrement = (flags & BlockTransformation::BT_DontIncrementInOutPointers) ? 0 : xmmBlockSize;
CRYPTOPP_ALIGN_DATA(16) word32 temp[4];
if (flags & BlockTransformation::BT_ReverseDirection) if (flags & BlockTransformation::BT_ReverseDirection)
{ {
@ -1728,15 +1727,14 @@ inline size_t SIMON64_AdvancedProcessBlocks_SSE41(F2 func2, F4 func4,
while (length >= blockSize) while (length >= blockSize)
{ {
// temp[] is an aligned array
std::memcpy(temp, inBlocks, 8);
__m128i block, zero = _mm_setzero_si128(); __m128i block, zero = _mm_setzero_si128();
block = _mm_load_si128(CONST_M128_CAST(temp)); block = _mm_xor_si128(block, _mm_castpd_si128(
_mm_loaddup_pd(reinterpret_cast<const double*>(inBlocks))));
if (flags & BlockTransformation::BT_XorInput) if (flags & BlockTransformation::BT_XorInput)
{ {
std::memcpy(temp, xorBlocks, 8); block = _mm_xor_si128(block, _mm_castpd_si128(
block = _mm_xor_si128(block, _mm_load_si128(CONST_M128_CAST(temp))); _mm_loaddup_pd(reinterpret_cast<const double*>(xorBlocks))));
} }
if (flags & BlockTransformation::BT_InBlockIsCounter) if (flags & BlockTransformation::BT_InBlockIsCounter)
@ -1746,12 +1744,12 @@ inline size_t SIMON64_AdvancedProcessBlocks_SSE41(F2 func2, F4 func4,
if (xorBlocks && !(flags & BlockTransformation::BT_XorInput)) if (xorBlocks && !(flags & BlockTransformation::BT_XorInput))
{ {
std::memcpy(temp, xorBlocks, 8); block = _mm_xor_si128(block, _mm_castpd_si128(
block = _mm_xor_si128(block, _mm_load_si128(CONST_M128_CAST(temp))); _mm_loaddup_pd(reinterpret_cast<const double*>(xorBlocks))));
} }
_mm_store_si128(M128_CAST(temp), block); const word64 temp = _mm_cvtsi128_si64x(block);
std::memcpy(outBlocks, temp, 8); std::memcpy(outBlocks, &temp, 8);
inBlocks += inIncrement; inBlocks += inIncrement;
outBlocks += outIncrement; outBlocks += outIncrement;

View File

@ -1517,7 +1517,6 @@ inline size_t SPECK64_AdvancedProcessBlocks_SSE41(F2 func2, F4 func4,
size_t inIncrement = (flags & (BlockTransformation::BT_InBlockIsCounter|BlockTransformation::BT_DontIncrementInOutPointers)) ? 0 : xmmBlockSize; size_t inIncrement = (flags & (BlockTransformation::BT_InBlockIsCounter|BlockTransformation::BT_DontIncrementInOutPointers)) ? 0 : xmmBlockSize;
size_t xorIncrement = xorBlocks ? xmmBlockSize : 0; size_t xorIncrement = xorBlocks ? xmmBlockSize : 0;
size_t outIncrement = (flags & BlockTransformation::BT_DontIncrementInOutPointers) ? 0 : xmmBlockSize; size_t outIncrement = (flags & BlockTransformation::BT_DontIncrementInOutPointers) ? 0 : xmmBlockSize;
CRYPTOPP_ALIGN_DATA(16) word32 temp[4];
if (flags & BlockTransformation::BT_ReverseDirection) if (flags & BlockTransformation::BT_ReverseDirection)
{ {
@ -1616,15 +1615,14 @@ inline size_t SPECK64_AdvancedProcessBlocks_SSE41(F2 func2, F4 func4,
while (length >= blockSize) while (length >= blockSize)
{ {
// temp[] is an aligned array
std::memcpy(temp, inBlocks, 8);
__m128i block, zero = _mm_setzero_si128(); __m128i block, zero = _mm_setzero_si128();
block = _mm_load_si128(CONST_M128_CAST(temp)); block = _mm_xor_si128(block, _mm_castpd_si128(
_mm_loaddup_pd(reinterpret_cast<const double*>(inBlocks))));
if (flags & BlockTransformation::BT_XorInput) if (flags & BlockTransformation::BT_XorInput)
{ {
std::memcpy(temp, xorBlocks, 8); block = _mm_xor_si128(block, _mm_castpd_si128(
block = _mm_xor_si128(block, _mm_load_si128(CONST_M128_CAST(temp))); _mm_loaddup_pd(reinterpret_cast<const double*>(xorBlocks))));
} }
if (flags & BlockTransformation::BT_InBlockIsCounter) if (flags & BlockTransformation::BT_InBlockIsCounter)
@ -1634,12 +1632,12 @@ inline size_t SPECK64_AdvancedProcessBlocks_SSE41(F2 func2, F4 func4,
if (xorBlocks && !(flags & BlockTransformation::BT_XorInput)) if (xorBlocks && !(flags & BlockTransformation::BT_XorInput))
{ {
std::memcpy(temp, xorBlocks, 8); block = _mm_xor_si128(block, _mm_castpd_si128(
block = _mm_xor_si128(block, _mm_load_si128(CONST_M128_CAST(temp))); _mm_loaddup_pd(reinterpret_cast<const double*>(xorBlocks))));
} }
_mm_store_si128(M128_CAST(temp), block); const word64 temp = _mm_cvtsi128_si64x(block);
std::memcpy(outBlocks, temp, 8); std::memcpy(outBlocks, &temp, 8);
inBlocks += inIncrement; inBlocks += inIncrement;
outBlocks += outIncrement; outBlocks += outIncrement;