From e846beac352cc69208c1960b218b95c906407b19 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Mon, 16 May 2016 18:47:31 -0400 Subject: [PATCH 1/3] Add defines for ARMv8 CRC32 and Crypto extensions --- config.h | 25 ++++++++++++++++++------- config.recommend | 25 ++++++++++++++++++------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/config.h b/config.h index 9a9a1baa..dae9f77d 100644 --- a/config.h +++ b/config.h @@ -534,24 +534,35 @@ NAMESPACE_END #define CRYPTOPP_BOOL_ARM64 0 #endif +// Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under the toolchains. #if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) # if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_APPLE_CLANG_VERSION >= 60000) || (CRYPTOPP_MSC_VERSION >= 1700)) -# if defined(__ARM_NEON__) || defined(__ARM_NEON) || defined(_M_ARM) || (__ARM_ARCH >= 8) +# if defined(__ARM_NEON__) || defined(__ARM_NEON) || (__ARM_ARCH >= 7) || defined(_M_ARM) # define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1 # endif # endif #endif -#if !defined(CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) -# if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_APPLE_CLANG_VERSION >= 60000)) -# if defined(__ARM_FEATURE_CRYPTO) -# define CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE 1 +// Requires ARMv8 and ACLE 2.0. +// Microsoft plans to support ARM-64, but its not clear how to detect it. +// TODO: Add MSC_VER and ARM-64 platform define when available +#if !defined(CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE) +# if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_APPLE_CLANG_VERSION >= 60000) || (CRYPTOPP_MSC_VERSION >= 2000)) +# if defined(__ARM_FEATURE_CRC32) || (__ARM_ARCH >= 8) || defined(_M_ARM64) +# define CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE 1 # endif # endif #endif -#ifndef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE -# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 0 +// Requires ARMv8 and ACLE 2.0. +// Microsoft plans to support ARM-64, but its not clear how to detect it. +// TODO: Add MSC_VER and ARM-64 platform define when available +#if !defined(CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) +# if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_APPLE_CLANG_VERSION >= 60000) || (CRYPTOPP_MSC_VERSION >= 2000)) +# if defined(__ARM_FEATURE_CRYPTO) || (__ARM_ARCH >= 8) || defined(_M_ARM64) +# define CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE 1 +# endif +# endif #endif #if !defined(CRYPTOPP_NO_UNALIGNED_DATA_ACCESS) && !defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) diff --git a/config.recommend b/config.recommend index 983f8a41..a824ce1c 100644 --- a/config.recommend +++ b/config.recommend @@ -534,24 +534,35 @@ NAMESPACE_END #define CRYPTOPP_BOOL_ARM64 0 #endif +// Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under the toolchains. #if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) # if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_APPLE_CLANG_VERSION >= 60000) || (CRYPTOPP_MSC_VERSION >= 1700)) -# if defined(__ARM_NEON__) || defined(__ARM_NEON) || defined(_M_ARM) || (__ARM_ARCH >= 8) +# if defined(__ARM_NEON__) || defined(__ARM_NEON) || (__ARM_ARCH >= 7) || defined(_M_ARM) # define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1 # endif # endif #endif -#if !defined(CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) -# if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_APPLE_CLANG_VERSION >= 60000)) -# if defined(__ARM_FEATURE_CRYPTO) -# define CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE 1 +// Requires ARMv8 and ACLE 2.0. +// Microsoft plans to support ARM-64, but its not clear how to detect it. +// TODO: Add MSC_VER and ARM-64 platform define when available +#if !defined(CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE) +# if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_APPLE_CLANG_VERSION >= 60000) || (CRYPTOPP_MSC_VERSION >= 2000)) +# if defined(__ARM_FEATURE_CRC32) || (__ARM_ARCH >= 8) || defined(_M_ARM64) +# define CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE 1 # endif # endif #endif -#ifndef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE -# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 0 +// Requires ARMv8 and ACLE 2.0. +// Microsoft plans to support ARM-64, but its not clear how to detect it. +// TODO: Add MSC_VER and ARM-64 platform define when available +#if !defined(CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) +# if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_APPLE_CLANG_VERSION >= 60000) || (CRYPTOPP_MSC_VERSION >= 2000)) +# if defined(__ARM_FEATURE_CRYPTO) || (__ARM_ARCH >= 8) || defined(_M_ARM64) +# define CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE 1 +# endif +# endif #endif #if !defined(CRYPTOPP_NO_UNALIGNED_DATA_ACCESS) && !defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) From 392c55d57355d3ccc53a8aa224f5c00c3e5faee0 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Mon, 16 May 2016 18:47:43 -0400 Subject: [PATCH 2/3] Add runtime detection code for ARM NEON, CRC32 and Crypto extensions --- cpu.cpp | 231 ++++++++++++++++++++++++++++++++++++++++++++++---------- cpu.h | 29 ++++--- 2 files changed, 211 insertions(+), 49 deletions(-) diff --git a/cpu.cpp b/cpu.cpp index bdbd06e4..1ee18a8e 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -24,10 +24,17 @@ NAMESPACE_BEGIN(CryptoPP) +#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY + // MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it. #define HAVE_GCC_CONSTRUCTOR1 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && ((CRYPTOPP_GCC_VERSION >= 40300) || (CRYPTOPP_CLANG_VERSION >= 20900) || (_INTEL_COMPILER >= 300)) && !(MACPORTS_GCC_COMPILER > 0)) #define HAVE_GCC_CONSTRUCTOR0 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && !(MACPORTS_GCC_COMPILER > 0)) +extern "C" { + typedef void (*SigHandler)(int); +}; +#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY + #ifdef CRYPTOPP_CPUID_AVAILABLE #if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64 @@ -42,7 +49,6 @@ bool CpuId(word32 input, word32 output[4]) #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY extern "C" { -typedef void (*SigHandler)(int); static jmp_buf s_jmpNoCPUID; static void SigIllHandlerCPUID(int) @@ -258,9 +264,189 @@ void DetectX86Features() bool g_ArmDetectionDone = false; bool g_hasNEON = false, g_hasCRC32 = false, g_hasCrypto = false; -// This is avaiable in a status register, but we need privileged code to perform the read word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; +// The ARM equivalent of CPUID is reading a MSR. For example, fetch crypto capabilities with: +// #if defined(__arm64__) || defined(__aarch64__) +// word64 caps = 0; // Read ID_AA64ISAR0_EL1 +// __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps)); +// #elif defined(__arm__) || defined(__aarch32__) +// word32 caps = 0; // Read ID_ISAR5_EL1 +// __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps)); +// #endif +// The code requires Exception Level 1 (EL1) and above, but user space runs at EL0. +// Attempting to run the code results in a SIGILL and termination. + +#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY +extern "C" { + + static jmp_buf s_jmpNoNEON; + static void SigIllHandlerNEON(int) + { + longjmp(s_jmpNoNEON, 1); + } + + static jmp_buf s_jmpNoCRC32; + static void SigIllHandlerCRC32(int) + { + longjmp(s_jmpNoCRC32, 1); + } + + static jmp_buf s_jmpNoCrypto; + static void SigIllHandlerCrypto(int) + { + longjmp(s_jmpNoCrypto, 1); + } +}; +#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY + +static bool TryNEON() +{ +#if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) +# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + __try + { + static const uint32_t v1[4] = {1,1,1,1}; + uint32x4_t x1 = vld1q_u32(v1); + static const uint64_t v2[2] = {1,1}; + uint64x2_t x2 = vld1q_u64(v2); + + uint32x4_t x3 = vdupq_n_u32(0); + x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0); + x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3); + uint64x2_t x4 = vdupq_n_u64(0); + x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0); + x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + return true; +# else + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 + // http://stackoverflow.com/q/7721854 + volatile bool result = true; + + SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON); + if (oldHandler == SIG_ERR) + result = false; + + if (setjmp(s_jmpNoNEON)) + result = false; + else + { + static const uint32_t v1[4] = {1,1,1,1}; + uint32x4_t x1 = vld1q_u32(v1); + static const uint64_t v2[2] = {1,1}; + uint64x2_t x2 = vld1q_u64(v2); + + uint32x4_t x3 = vdupq_n_u32(0); + x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0); + x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3); + uint64x2_t x4 = vdupq_n_u64(0); + x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0); + x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1); + } + + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE +} + +static bool TryCRC32() +{ +#if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE) +# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + __try + { + word32 w=0, x=0; word16 y=0; byte z=0; + w = __crc32cw(w,x); + w = __crc32ch(w,y); + w = __crc32cb(w,z); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + return true; +# else + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 + // http://stackoverflow.com/q/7721854 + volatile bool result = true; + + SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32); + if (oldHandler == SIG_ERR) + result = false; + + if (setjmp(s_jmpNoCRC32)) + result = false; + else + { + word32 w=0, x=0; word16 y=0; byte z=0; + w = __crc32cw(w,x); + w = __crc32ch(w,y); + w = __crc32cb(w,z); + } + + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE +} + +static bool TryCrypto() +{ +#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) +# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + __try + { + // AES encrypt and decrypt + static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); + uint8x16_t r1 = vaeseq_u8(data, key); + uint8x16_t r2 = vaesdq_u8(data, key); + + // + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + return true; +# else + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 + // http://stackoverflow.com/q/7721854 + volatile bool result = true; + + SigHandler oldHandler = signal(SIGILL, SigIllHandlerCrypto); + if (oldHandler == SIG_ERR) + result = false; + + if (setjmp(s_jmpNoCrypto)) + result = false; + else + { + static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); + uint8x16_t r1 = vaeseq_u8(data, key); + uint8x16_t r2 = vaesdq_u8(data, key); + } + + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE +} + #if HAVE_GCC_CONSTRUCTOR1 void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures() #elif HAVE_GCC_CONSTRUCTOR0 @@ -269,43 +455,10 @@ void __attribute__ ((constructor)) DetectArmFeatures() void DetectArmFeatures() #endif { -#if defined(__linux__) && defined(__aarch64__) // ARM-64 - const unsigned long hwcaps = getauxval(AT_HWCAP); - g_hasNEON = !!(hwcaps & HWCAP_ASIMD); -# if defined(__ARM_FEATURE_CRC32) - g_hasCRC32 = !!(hwcaps & HWCAP_CRC32); -# else - g_hasCRC32 = false; -# endif -#elif defined(__linux__) // ARM-32 - const unsigned long hwcaps = getauxval(AT_HWCAP); - g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON); -# if defined(__ARM_FEATURE_CRC32) - g_hasCRC32 = !!(hwcaps & HWCAP_ARM_CRC32); -# else - g_hasCRC32 = false; -# endif -#elif defined(__APPLE__) - g_hasNEON = true; -# if defined(__ARM_FEATURE_CRC32) - g_hasCRC32 = true; -# else - g_hasCRC32 = false; -# endif -# if defined(__ARM_FEATURE_CRYPTO) - g_hasCrypto = true; -# else - g_hasCrypto = false; -# endif -#elif defined(_WIN32) - g_hasNEON = true; - g_hasCRC32 = false; - g_hasCrypto = false; -#else - g_hasNEON = false; - g_hasCRC32 = false; - g_hasCrypto = false; -#endif + g_hasNEON = TryNEON(); + g_hasCRC32 = TryCRC32(); + g_hasCrypto = TryCrypto(); + *((volatile bool*)&g_ArmDetectionDone) = true; } diff --git a/cpu.h b/cpu.h index 98f4e6d5..7d0b4d7e 100644 --- a/cpu.h +++ b/cpu.h @@ -12,16 +12,19 @@ #include "config.h" #if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) -# if defined(__linux__) -# include -# include -# include +# if defined(_MSC_VER) || defined(__BORLANDC__) +# define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY +# else +# define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY # endif # if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE # include # endif -# if (defined(__ARM_FEATURE_CRC32) || defined(__ARM_FEATURE_CRYPTO) || (__ARM_ACLE >= 200)) && !defined(__APPLE__) -# include +# if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE || CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE) +# include +# if (defined(__ARM_ACLE) || defined(__GNUC__)) && !defined(__APPLE__) +# include +# endif # endif #endif // ARM-32 or ARM-64 @@ -242,6 +245,7 @@ void CRYPTOPP_API DetectArmFeatures(); //! \brief Determine if an ARM processor has Advanced SIMD available //! \returns true if the hardware is capable of Advanced SIMD at runtime, false otherwise. +//! \details Advanced SIMD instructions are available under Aarch64 (ARM-64) and Aarch32 (ARM-32). //! \details Runtime support requires compile time support. When compiling with GCC, you may //! need to compile with -mfpu=neon (32-bit) or -march=armv8-a //! (64-bit). Also see ARM's __ARM_NEON preprocessor macro. @@ -254,9 +258,12 @@ inline bool HasNEON() //! \brief Determine if an ARM processor has CRC32 available //! \returns true if the hardware is capable of CRC32 at runtime, false otherwise. +//! \details CRC32 instructions provide access to the processor's CRC32 and CRC32-C intructions. +//! They are provided by ARM C Language Extensions 2.0 (ACLE 2.0) and available under Aarch64 +//! (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an AArch32 execution environment). //! \details Runtime support requires compile time support. When compiling with GCC, you may -//! need to compile with -march=armv8-a+crc. Also see ARM's __ARM_FEATURE_CRC32 -//! preprocessor macro. +//! need to compile with -march=armv8-a+crc; while Apple requires +//! -arch arm64. Also see ARM's __ARM_FEATURE_CRC32 preprocessor macro. inline bool HasCRC32() { if (!g_ArmDetectionDone) @@ -266,10 +273,12 @@ inline bool HasCRC32() //! \brief Determine if an ARM processor has Crypto available //! \returns true if the hardware is capable of Crypto at runtime, false otherwise. +//! \details Crypto instructions provide access to the processor's AES, SHA-1, SHA-224 and SHA-256 intructions. +//! They are provided by ARM C Language Extensions 2.0 (ACLE 2.0) and available under Aarch64 +//! (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an AArch32 execution environment). //! \details Runtime support requires compile time support. When compiling with GCC, you may //! need to compile with -march=armv8-a+crypto; while Apple requires -//! -arch armv7s or -arch arm64. Also see ARM's __ARM_FEATURE_CRYPTO -//! preprocessor macro. +//! -arch arm64. Also see ARM's __ARM_FEATURE_CRYPTO preprocessor macro. inline bool HasCrypto() { if (!g_ArmDetectionDone) From 72308f3f076796f4c077bfbf346ce72389858178 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Mon, 16 May 2016 18:48:22 -0400 Subject: [PATCH 3/3] Use CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE to detect extension --- crc.cpp | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/crc.cpp b/crc.cpp index bdb4e2f1..738d1238 100644 --- a/crc.cpp +++ b/crc.cpp @@ -131,6 +131,22 @@ CRC32::CRC32() void CRC32::Update(const byte *s, size_t n) { +#if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE) + if (HasCRC32()) + { + for(; !IsAligned(s) && n > 0; s++, n--) + m_crc = __crc32b(m_crc, *s); + + for(; n > 4; s+=4, n-=4) + m_crc = __crc32w(m_crc, *(const word32 *)(void*)s); + + for(; n > 0; s++, n--) + m_crc = __crc32b(m_crc, *s); + + return; + } +#endif + word32 crc = m_crc; for(; !IsAligned(s) && n > 0; n--) @@ -295,6 +311,20 @@ void CRC32C::Update(const byte *s, size_t n) for(; n > 0; s++, n--) m_crc = _mm_crc32_u8(m_crc, *s); + return; + } +#elif (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE) + if (HasCRC32()) + { + for(; !IsAligned(s) && n > 0; s++, n--) + m_crc = __crc32cb(m_crc, *s); + + for(; n > 4; s+=4, n-=4) + m_crc = __crc32cw(m_crc, *(const word32 *)(void*)s); + + for(; n > 0; s++, n--) + m_crc = __crc32cb(m_crc, *s); + return; } #endif