From 80ae9f4f0a66596a164fee67e3fdd09628d3871f Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Fri, 22 Jun 2018 17:44:16 -0400 Subject: [PATCH] Add AVX512 rotates to RotateLeft and RotateRight templates --- cham-simd.cpp | 13 +++++++++++++ lea-simd.cpp | 12 ++++++++++++ simon-simd.cpp | 30 ++++++++---------------------- speck-simd.cpp | 31 ++++++++----------------------- 4 files changed, 41 insertions(+), 45 deletions(-) diff --git a/cham-simd.cpp b/cham-simd.cpp index a4eb706e..a503aa4a 100644 --- a/cham-simd.cpp +++ b/cham-simd.cpp @@ -22,6 +22,11 @@ # include #endif +#if defined(__AVX512F__) && defined(__AVX512VL__) +# define CRYPTOPP_AVX512_ROTATE 1 +# include +#endif + ANONYMOUS_NAMESPACE_BEGIN using CryptoPP::word16; @@ -775,15 +780,23 @@ NAMESPACE_BEGIN(W32) // CHAM128, 32-bit word size template inline __m128i RotateLeft32(const __m128i& val) { +#if defined(CRYPTOPP_AVX512_ROTATE) + return _mm_rol_epi32(val, R); +#else return _mm_or_si128( _mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R)); +#endif } template inline __m128i RotateRight32(const __m128i& val) { +#if defined(CRYPTOPP_AVX512_ROTATE) + return _mm_ror_epi32(val, R); +#else return _mm_or_si128( _mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R)); +#endif } // Faster than two Shifts and an Or. Thanks to Louis Wingers and Bryan Weeks. diff --git a/lea-simd.cpp b/lea-simd.cpp index 0076926d..f91702c9 100644 --- a/lea-simd.cpp +++ b/lea-simd.cpp @@ -22,6 +22,18 @@ # include #endif +#if defined(__AVX512F__) && defined(__AVX512VL__) +# define CRYPTOPP_AVX512_ROTATE 1 +# include +#endif + +// Can't use CRYPTOPP_ARM_XXX_AVAILABLE because too many +// compilers don't follow ACLE conventions for the include. +#if defined(CRYPTOPP_ARM_ACLE_AVAILABLE) +# include +# include +#endif + ANONYMOUS_NAMESPACE_BEGIN using CryptoPP::word32; diff --git a/simon-simd.cpp b/simon-simd.cpp index 91f10779..b947cdd3 100644 --- a/simon-simd.cpp +++ b/simon-simd.cpp @@ -43,14 +43,6 @@ # include #endif -// https://www.spinics.net/lists/gcchelp/msg47735.html and -// https://www.spinics.net/lists/gcchelp/msg47749.html -#if (CRYPTOPP_GCC_VERSION >= 40900) -# define GCC_NO_UBSAN __attribute__ ((no_sanitize_undefined)) -#else -# define GCC_NO_UBSAN -#endif - ANONYMOUS_NAMESPACE_BEGIN using CryptoPP::byte; @@ -571,31 +563,26 @@ inline void Swap128(__m128i& a,__m128i& b) #endif } +template +inline __m128i RotateLeft64(const __m128i& val) +{ #if defined(CRYPTOPP_AVX512_ROTATE) -template -inline __m128i RotateLeft64(const __m128i& val) -{ return _mm_rol_epi64(val, R); -} - -template -inline __m128i RotateRight64(const __m128i& val) -{ - return _mm_ror_epi64(val, R); -} #else -template -inline __m128i RotateLeft64(const __m128i& val) -{ return _mm_or_si128( _mm_slli_epi64(val, R), _mm_srli_epi64(val, 64-R)); +#endif } template inline __m128i RotateRight64(const __m128i& val) { +#if defined(CRYPTOPP_AVX512_ROTATE) + return _mm_ror_epi64(val, R); +#else return _mm_or_si128( _mm_slli_epi64(val, 64-R), _mm_srli_epi64(val, R)); +#endif } // Faster than two Shifts and an Or. Thanks to Louis Wingers and Bryan Weeks. @@ -613,7 +600,6 @@ inline __m128i RotateRight64<8>(const __m128i& val) const __m128i mask = _mm_set_epi8(8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1); return _mm_shuffle_epi8(val, mask); } -#endif // CRYPTOPP_AVX512_ROTATE inline __m128i SIMON128_f(const __m128i& v) { diff --git a/speck-simd.cpp b/speck-simd.cpp index 3666f240..f9be52e4 100644 --- a/speck-simd.cpp +++ b/speck-simd.cpp @@ -43,14 +43,6 @@ # include #endif -// https://www.spinics.net/lists/gcchelp/msg47735.html and -// https://www.spinics.net/lists/gcchelp/msg47749.html -#if (CRYPTOPP_GCC_VERSION >= 40900) -# define GCC_NO_UBSAN __attribute__ ((no_sanitize_undefined)) -#else -# define GCC_NO_UBSAN -#endif - ANONYMOUS_NAMESPACE_BEGIN using CryptoPP::byte; @@ -507,31 +499,26 @@ inline void SPECK128_Dec_6_Blocks(uint64x2_t &block0, uint64x2_t &block1, # define CONST_DOUBLE_CAST(x) ((const double *)(const void *)(x)) #endif +template +inline __m128i RotateLeft64(const __m128i& val) +{ #if defined(CRYPTOPP_AVX512_ROTATE) -template -inline __m128i RotateLeft64(const __m128i& val) -{ return _mm_rol_epi64(val, R); -} - -template -inline __m128i RotateRight64(const __m128i& val) -{ - return _mm_ror_epi64(val, R); -} #else -template -inline __m128i RotateLeft64(const __m128i& val) -{ return _mm_or_si128( _mm_slli_epi64(val, R), _mm_srli_epi64(val, 64-R)); +#endif } template inline __m128i RotateRight64(const __m128i& val) { +#if defined(CRYPTOPP_AVX512_ROTATE) + return _mm_ror_epi64(val, R); +#else return _mm_or_si128( _mm_slli_epi64(val, 64-R), _mm_srli_epi64(val, R)); +#endif } // Faster than two Shifts and an Or. Thanks to Louis Wingers and Bryan Weeks. @@ -550,8 +537,6 @@ inline __m128i RotateRight64<8>(const __m128i& val) return _mm_shuffle_epi8(val, mask); } -#endif // CRYPTOPP_AVX512_ROTATE - inline void GCC_NO_UBSAN SPECK128_Enc_Block(__m128i &block0, __m128i &block1, const word64 *subkeys, unsigned int rounds) {