Add SSE3 <pmmintrin.h> for SImon and Speck

Add additional comments for WORKAROUND_GCC_OPTERON_ISSUE
pull/548/head
Jeffrey Walton 2017-12-08 13:54:00 -05:00
parent 148202369b
commit e457ca26f7
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
2 changed files with 13 additions and 10 deletions

View File

@ -22,6 +22,7 @@
#endif #endif
#if (CRYPTOPP_SSSE3_AVAILABLE) #if (CRYPTOPP_SSSE3_AVAILABLE)
# include <pmmintrin.h>
# include <tmmintrin.h> # include <tmmintrin.h>
#endif #endif

View File

@ -22,6 +22,7 @@
#endif #endif
#if (CRYPTOPP_SSSE3_AVAILABLE) #if (CRYPTOPP_SSSE3_AVAILABLE)
# include <pmmintrin.h>
# include <tmmintrin.h> # include <tmmintrin.h>
#endif #endif
@ -34,19 +35,20 @@
# include <immintrin.h> # include <immintrin.h>
#endif #endif
// Weird GCC 7.0 issue on GCC118 which is Aarch64. The 2x blocks produce
// a bad result. The same code works fine with Speck (it was copied/pasted).
// It may affect more versions, but we can only test GCC 7.2, 4.8 and 4.9.
#if defined(__aarch32__) || defined(__aarch64__)
# if defined(__GNUC__) && (__GNUC__ >= 7)
# define WORKAROUND_GCC_7_ISSUE 1
# endif
#endif
// Clang __m128i casts, http://bugs.llvm.org/show_bug.cgi?id=20670 // Clang __m128i casts, http://bugs.llvm.org/show_bug.cgi?id=20670
#define M128_CAST(x) ((__m128i *)(void *)(x)) #define M128_CAST(x) ((__m128i *)(void *)(x))
#define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x)) #define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x))
// GCC118 (AMD Opteron Aarch64) and GCC 7 issue. The 6x and 2x blocks produce
// a bad result. The same code works fine with Speck (it was copied/pasted).
// The same code is also fine on other Aarch64 test devices and A-32 NEON.
// It may affect more versions, but we can only test GCC 7.2, 4.8 and 4.9.
#if defined(__aarch32__) || defined(__aarch64__)
# if defined(__GNUC__) && (__GNUC__ >= 7)
# define WORKAROUND_GCC_OPTERON_ISSUE 1
# endif
#endif
ANONYMOUS_NAMESPACE_BEGIN ANONYMOUS_NAMESPACE_BEGIN
using CryptoPP::byte; using CryptoPP::byte;
@ -347,7 +349,7 @@ inline size_t SPECK64_AdvancedProcessBlocks_NEON(F2 func2, F6 func6,
outIncrement = 0-outIncrement; outIncrement = 0-outIncrement;
} }
#if defined(WORKAROUND_GCC_7_ISSUE) #if defined(WORKAROUND_GCC_OPTERON_ISSUE)
flags &= ~BlockTransformation::BT_AllowParallel; flags &= ~BlockTransformation::BT_AllowParallel;
#endif #endif