From ecbf7910155a3b88691cde33b9ba93eaa40cd487 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Wed, 24 Oct 2018 19:02:20 -0400 Subject: [PATCH] Remove unneeded SIMON128 functions in simon64-simd.cpp This looks like artifacts from when we split simon-simd.cpp into simon64-simd.cpp and simon128-simd.cpp. --- simon64-simd.cpp | 237 +---------------------------------------------- 1 file changed, 1 insertion(+), 236 deletions(-) diff --git a/simon64-simd.cpp b/simon64-simd.cpp index 42444180..62bf25f0 100644 --- a/simon64-simd.cpp +++ b/simon64-simd.cpp @@ -292,23 +292,7 @@ inline void SIMON64_Dec_6_Blocks(uint32x4_t &block0, uint32x4_t &block1, // ***************************** IA-32 ***************************** // -#if (CRYPTOPP_SSSE3_AVAILABLE) - -// Clang __m128i casts, http://bugs.llvm.org/show_bug.cgi?id=20670 -#ifndef M128_CAST -# define M128_CAST(x) ((__m128i *)(void *)(x)) -#endif -#ifndef CONST_M128_CAST -# define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x)) -#endif - -// GCC double casts, https://www.spinics.net/lists/gcchelp/msg47735.html -#ifndef DOUBLE_CAST -# define DOUBLE_CAST(x) ((double *)(void *)(x)) -#endif -#ifndef CONST_DOUBLE_CAST -# define CONST_DOUBLE_CAST(x) ((const double *)(const void *)(x)) -#endif +#if defined(CRYPTOPP_SSE41_AVAILABLE) inline void Swap128(__m128i& a,__m128i& b) { @@ -321,225 +305,6 @@ inline void Swap128(__m128i& a,__m128i& b) #endif } -template -inline __m128i RotateLeft64(const __m128i& val) -{ -#if defined(CRYPTOPP_AVX512_ROTATE) - return _mm_rol_epi64(val, R); -#elif defined(__XOP__) - return _mm_roti_epi64(val, R); -#else - return _mm_or_si128( - _mm_slli_epi64(val, R), _mm_srli_epi64(val, 64-R)); -#endif -} - -template -inline __m128i RotateRight64(const __m128i& val) -{ -#if defined(CRYPTOPP_AVX512_ROTATE) - return _mm_ror_epi64(val, R); -#elif defined(__XOP__) - return _mm_roti_epi64(val, 64-R); -#else - return _mm_or_si128( - _mm_slli_epi64(val, 64-R), _mm_srli_epi64(val, R)); -#endif -} - -// Faster than two Shifts and an Or. Thanks to Louis Wingers and Bryan Weeks. -template <> -inline __m128i RotateLeft64<8>(const __m128i& val) -{ -#if defined(__XOP__) - return _mm_roti_epi64(val, 8); -#else - const __m128i mask = _mm_set_epi8(14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7); - return _mm_shuffle_epi8(val, mask); -#endif -} - -// Faster than two Shifts and an Or. Thanks to Louis Wingers and Bryan Weeks. -template <> -inline __m128i RotateRight64<8>(const __m128i& val) -{ -#if defined(__XOP__) - return _mm_roti_epi64(val, 64-8); -#else - const __m128i mask = _mm_set_epi8(8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1); - return _mm_shuffle_epi8(val, mask); -#endif -} - -inline __m128i SIMON128_f(const __m128i& v) -{ - return _mm_xor_si128(RotateLeft64<2>(v), - _mm_and_si128(RotateLeft64<1>(v), RotateLeft64<8>(v))); -} - -inline void SIMON128_Enc_Block(__m128i &block0, __m128i &block1, - const word64 *subkeys, unsigned int rounds) -{ - // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ... - __m128i x1 = _mm_unpackhi_epi64(block0, block1); - __m128i y1 = _mm_unpacklo_epi64(block0, block1); - - for (int i = 0; i < static_cast(rounds & ~1)-1; i += 2) - { - const __m128i rk1 = _mm_castpd_si128( - _mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys+i))); - y1 = _mm_xor_si128(_mm_xor_si128(y1, SIMON128_f(x1)), rk1); - - const __m128i rk2 = _mm_castpd_si128( - _mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys+i+1))); - x1 = _mm_xor_si128(_mm_xor_si128(x1, SIMON128_f(y1)), rk2); - } - - if (rounds & 1) - { - const __m128i rk = _mm_castpd_si128( - _mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys+rounds-1))); - - y1 = _mm_xor_si128(_mm_xor_si128(y1, SIMON128_f(x1)), rk); - Swap128(x1, y1); - } - - // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... - block0 = _mm_unpacklo_epi64(y1, x1); - block1 = _mm_unpackhi_epi64(y1, x1); -} - -inline void SIMON128_Enc_6_Blocks(__m128i &block0, __m128i &block1, - __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5, - const word64 *subkeys, unsigned int rounds) -{ - // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ... - __m128i x1 = _mm_unpackhi_epi64(block0, block1); - __m128i y1 = _mm_unpacklo_epi64(block0, block1); - __m128i x2 = _mm_unpackhi_epi64(block2, block3); - __m128i y2 = _mm_unpacklo_epi64(block2, block3); - __m128i x3 = _mm_unpackhi_epi64(block4, block5); - __m128i y3 = _mm_unpacklo_epi64(block4, block5); - - for (int i = 0; i < static_cast(rounds & ~1) - 1; i += 2) - { - const __m128i rk1 = _mm_castpd_si128( - _mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys + i))); - y1 = _mm_xor_si128(_mm_xor_si128(y1, SIMON128_f(x1)), rk1); - y2 = _mm_xor_si128(_mm_xor_si128(y2, SIMON128_f(x2)), rk1); - y3 = _mm_xor_si128(_mm_xor_si128(y3, SIMON128_f(x3)), rk1); - - const __m128i rk2 = _mm_castpd_si128( - _mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys + i + 1))); - x1 = _mm_xor_si128(_mm_xor_si128(x1, SIMON128_f(y1)), rk2); - x2 = _mm_xor_si128(_mm_xor_si128(x2, SIMON128_f(y2)), rk2); - x3 = _mm_xor_si128(_mm_xor_si128(x3, SIMON128_f(y3)), rk2); - } - - if (rounds & 1) - { - const __m128i rk = _mm_castpd_si128( - _mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys + rounds - 1))); - y1 = _mm_xor_si128(_mm_xor_si128(y1, SIMON128_f(x1)), rk); - y2 = _mm_xor_si128(_mm_xor_si128(y2, SIMON128_f(x2)), rk); - y3 = _mm_xor_si128(_mm_xor_si128(y3, SIMON128_f(x3)), rk); - Swap128(x1, y1); Swap128(x2, y2); Swap128(x3, y3); - } - - // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... - block0 = _mm_unpacklo_epi64(y1, x1); - block1 = _mm_unpackhi_epi64(y1, x1); - block2 = _mm_unpacklo_epi64(y2, x2); - block3 = _mm_unpackhi_epi64(y2, x2); - block4 = _mm_unpacklo_epi64(y3, x3); - block5 = _mm_unpackhi_epi64(y3, x3); -} - -inline void SIMON128_Dec_Block(__m128i &block0, __m128i &block1, - const word64 *subkeys, unsigned int rounds) -{ - // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ... - __m128i x1 = _mm_unpackhi_epi64(block0, block1); - __m128i y1 = _mm_unpacklo_epi64(block0, block1); - - if (rounds & 1) - { - const __m128i rk = _mm_castpd_si128( - _mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys + rounds - 1))); - - Swap128(x1, y1); - y1 = _mm_xor_si128(_mm_xor_si128(y1, rk), SIMON128_f(x1)); - rounds--; - } - - for (int i = static_cast(rounds-2); i >= 0; i -= 2) - { - const __m128i rk1 = _mm_castpd_si128( - _mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys+i+1))); - x1 = _mm_xor_si128(_mm_xor_si128(x1, SIMON128_f(y1)), rk1); - - const __m128i rk2 = _mm_castpd_si128( - _mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys+i))); - y1 = _mm_xor_si128(_mm_xor_si128(y1, SIMON128_f(x1)), rk2); - } - - // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... - block0 = _mm_unpacklo_epi64(y1, x1); - block1 = _mm_unpackhi_epi64(y1, x1); -} - -inline void SIMON128_Dec_6_Blocks(__m128i &block0, __m128i &block1, - __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5, - const word64 *subkeys, unsigned int rounds) -{ - // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ... - __m128i x1 = _mm_unpackhi_epi64(block0, block1); - __m128i y1 = _mm_unpacklo_epi64(block0, block1); - __m128i x2 = _mm_unpackhi_epi64(block2, block3); - __m128i y2 = _mm_unpacklo_epi64(block2, block3); - __m128i x3 = _mm_unpackhi_epi64(block4, block5); - __m128i y3 = _mm_unpacklo_epi64(block4, block5); - - if (rounds & 1) - { - const __m128i rk = _mm_castpd_si128( - _mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys + rounds - 1))); - - Swap128(x1, y1); Swap128(x2, y2); Swap128(x3, y3); - y1 = _mm_xor_si128(_mm_xor_si128(y1, rk), SIMON128_f(x1)); - y2 = _mm_xor_si128(_mm_xor_si128(y2, rk), SIMON128_f(x2)); - y3 = _mm_xor_si128(_mm_xor_si128(y3, rk), SIMON128_f(x3)); - rounds--; - } - - for (int i = static_cast(rounds-2); i >= 0; i -= 2) - { - const __m128i rk1 = _mm_castpd_si128( - _mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys + i + 1))); - x1 = _mm_xor_si128(_mm_xor_si128(x1, SIMON128_f(y1)), rk1); - x2 = _mm_xor_si128(_mm_xor_si128(x2, SIMON128_f(y2)), rk1); - x3 = _mm_xor_si128(_mm_xor_si128(x3, SIMON128_f(y3)), rk1); - - const __m128i rk2 = _mm_castpd_si128( - _mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys + i))); - y1 = _mm_xor_si128(_mm_xor_si128(y1, SIMON128_f(x1)), rk2); - y2 = _mm_xor_si128(_mm_xor_si128(y2, SIMON128_f(x2)), rk2); - y3 = _mm_xor_si128(_mm_xor_si128(y3, SIMON128_f(x3)), rk2); - } - - // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... - block0 = _mm_unpacklo_epi64(y1, x1); - block1 = _mm_unpackhi_epi64(y1, x1); - block2 = _mm_unpacklo_epi64(y2, x2); - block3 = _mm_unpackhi_epi64(y2, x2); - block4 = _mm_unpacklo_epi64(y3, x3); - block5 = _mm_unpackhi_epi64(y3, x3); -} - -#endif // CRYPTOPP_SSSE3_AVAILABLE - -#if defined(CRYPTOPP_SSE41_AVAILABLE) - template inline __m128i RotateLeft32(const __m128i& val) {