From b0b749f392ca42ce032d41314f2ca5794bbbbd87 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Wed, 16 Aug 2017 10:40:00 -0400 Subject: [PATCH] Fold shuffles after loads for SHACAL2 --- config.h | 5 ----- shacal2.cpp | 21 ++++++++++----------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/config.h b/config.h index 3176832c..ba779641 100644 --- a/config.h +++ b/config.h @@ -530,11 +530,6 @@ NAMESPACE_END #endif // How to declare class constants -//#if (_MSC_VER == 1300) || defined(__INTEL_COMPILER) || defined(__BORLANDC__) -//# define CRYPTOPP_CONSTANT(x) enum {x}; -//#else -//# define CRYPTOPP_CONSTANT(x) static const int x; -//#endif #if defined(CRYPTOPP_DOXYGEN_PROCESSING) # define CRYPTOPP_CONSTANT(x) static const int x; #else diff --git a/shacal2.cpp b/shacal2.cpp index 4cbd2cb0..299c0e01 100644 --- a/shacal2.cpp +++ b/shacal2.cpp @@ -36,17 +36,15 @@ void SHACAL2_Enc_ProcessAndXorBlock_SHANI(const word32* subKeys, const byte *inB CRYPTOPP_ASSERT(inBlock); CRYPTOPP_ASSERT(outBlock); - __m128i MASK = _mm_set_epi64x(0x0C0D0E0F08090A0B, 0x0405060700010203); - __m128i B0 = _mm_loadu_si128(CONST_M128_CAST(inBlock + 0)); - __m128i B1 = _mm_loadu_si128(CONST_M128_CAST(inBlock + 16)); + // MASK1 produces the CDAB arrangement + const __m128i MASK1 = _mm_set_epi8(8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7); + __m128i B0 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(inBlock + 0)), MASK1); - B0 = _mm_shuffle_epi8(B0, MASK); - B1 = _mm_shuffle_epi8(B1, MASK); + // MASK2 produces the EFGH arrangement + const __m128i MASK2 = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15); + __m128i B1 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(inBlock + 16)), MASK2); - B0 = _mm_shuffle_epi32(B0, 0xB1); // CDAB - B1 = _mm_shuffle_epi32(B1, 0x1B); // EFGH - - __m128i TMP = _mm_alignr_epi8(B0, B1, 8); // ABEF + __m128i TMP = _mm_alignr_epi8(B0, B1, 8); // ABEF B1 = _mm_blend_epi16(B1, B0, 0xF0); // CDGH B0 = TMP; @@ -63,8 +61,9 @@ void SHACAL2_Enc_ProcessAndXorBlock_SHANI(const word32* subKeys, const byte *inB B0 = _mm_blend_epi16(TMP, B1, 0xF0); // DCBA B1 = _mm_alignr_epi8(B1, TMP, 8); // ABEF - B0 = _mm_shuffle_epi8(B0, MASK); - B1 = _mm_shuffle_epi8(B1, MASK); + const __m128i MASK3 = _mm_set_epi8(12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3); + B0 = _mm_shuffle_epi8(B0, MASK3); + B1 = _mm_shuffle_epi8(B1, MASK3); if (xorBlock) {