diff --git a/simon-simd.cpp b/simon-simd.cpp index 1bf8ccab..42f8db0a 100644 --- a/simon-simd.cpp +++ b/simon-simd.cpp @@ -30,6 +30,11 @@ # include #endif +#if defined(__AVX512F__) && defined(__AVX512VL__) +# define CRYPTOPP_AVX512_ROTATE 1 +# include +#endif + // Clang __m128i casts, http://bugs.llvm.org/show_bug.cgi?id=20670 #define M128_CAST(x) ((__m128i *)(void *)(x)) #define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x)) @@ -436,6 +441,19 @@ inline void Swap128(__m128i& a,__m128i& b) #endif } +#if defined(CRYPTOPP_AVX512_ROTATE) +template +inline __m128i RotateLeft64(const __m128i& val) +{ + return _mm_rol_epi64(val, R); +} + +template +inline __m128i RotateRight64(const __m128i& val) +{ + return _mm_ror_epi64(val, R); +} +#else template inline __m128i RotateLeft64(const __m128i& val) { @@ -465,6 +483,7 @@ inline __m128i RotateRight64<8>(const __m128i& val) const __m128i mask = _mm_set_epi8(8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1); return _mm_shuffle_epi8(val, mask); } +#endif // CRYPTOPP_AVX512_ROTATE inline __m128i SIMON128_f(const __m128i& v) { diff --git a/speck-simd.cpp b/speck-simd.cpp index f883515c..d7dee151 100644 --- a/speck-simd.cpp +++ b/speck-simd.cpp @@ -30,6 +30,11 @@ # include #endif +#if defined(__AVX512F__) && defined(__AVX512VL__) +# define CRYPTOPP_AVX512_ROTATE 1 +# include +#endif + // Clang __m128i casts, http://bugs.llvm.org/show_bug.cgi?id=20670 #define M128_CAST(x) ((__m128i *)(void *)(x)) #define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x)) @@ -417,22 +422,31 @@ size_t SPECK128_AdvancedProcessBlocks_NEON(F1 func1, F6 func6, CRYPTOPP_ALIGN_DATA(16) const word32 s_one[] = {0, 0, 0, 1<<24}; +#if defined(CRYPTOPP_AVX512_ROTATE) template inline __m128i RotateLeft64(const __m128i& val) { - CRYPTOPP_ASSERT(R < 64); - const __m128i a = _mm_slli_epi64(val, R); - const __m128i b = _mm_srli_epi64(val, 64-R); - return _mm_or_si128(a, b); + return _mm_rol_epi64(val, R); } template inline __m128i RotateRight64(const __m128i& val) { - CRYPTOPP_ASSERT(R < 64); - const __m128i a = _mm_slli_epi64(val, 64-R); - const __m128i b = _mm_srli_epi64(val, R); - return _mm_or_si128(a, b); + return _mm_ror_epi64(val, R); +} +#else +template +inline __m128i RotateLeft64(const __m128i& val) +{ + return _mm_or_si128( + _mm_slli_epi64(val, R), _mm_srli_epi64(val, 64-R)); +} + +template +inline __m128i RotateRight64(const __m128i& val) +{ + return _mm_or_si128( + _mm_slli_epi64(val, 64-R), _mm_srli_epi64(val, R)); } // Faster than two Shifts and an Or. Thanks to Louis Wingers and Bryan Weeks. @@ -450,6 +464,7 @@ inline __m128i RotateRight64<8>(const __m128i& val) const __m128i mask = _mm_set_epi8(8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1); return _mm_shuffle_epi8(val, mask); } +#endif // CRYPTOPP_AVX512_ROTATE inline void SPECK128_Enc_Block(__m128i &block0, const word64 *subkeys, unsigned int rounds) {