From e8603143dc23841e460d1acb1e7a50d64949803e Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Tue, 21 May 2019 02:21:15 -0400 Subject: [PATCH] Whitespace check-in We also simplified the CPU_ProbeNEON logic a bit to a vmov.u32 and vshl.u32. --- neon_simd.cpp | 199 +++++++++++++++++++++++--------------------------- 1 file changed, 92 insertions(+), 107 deletions(-) diff --git a/neon_simd.cpp b/neon_simd.cpp index aa13118f..910844c3 100644 --- a/neon_simd.cpp +++ b/neon_simd.cpp @@ -6,6 +6,7 @@ // ARMv8a NEON instructions. A separate source file is needed // because additional CXXFLAGS are required to enable the // appropriate instructions sets in some build configurations. +// For Linux and Unix additional flags are not required. #include "pch.h" #include "config.h" @@ -39,55 +40,55 @@ NAMESPACE_BEGIN(CryptoPP) extern "C" { typedef void (*SigHandler)(int); - static jmp_buf s_jmpSIGILL; - static void SigIllHandler(int) - { - longjmp(s_jmpSIGILL, 1); - } + static jmp_buf s_jmpSIGILL; + static void SigIllHandler(int) + { + longjmp(s_jmpSIGILL, 1); + } } #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY bool CPU_ProbeARMv7() { #if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) - return false; + return false; #elif CRYPTOPP_BOOL_ARM32 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) - volatile bool result = true; - __try - { - // Modern MS hardware is ARMv7 - result = true; - } - __except (EXCEPTION_EXECUTE_HANDLER) - { - return false; - } - return result; -# elif defined(__GNUC__) || defined(__clang__) - // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 - volatile bool result = true; + volatile bool result = true; + __try + { + // Modern MS hardware is ARMv7 + result = true; + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + return result; +# else + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile bool result = true; - volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); - if (oldHandler == SIG_ERR) - return false; + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); + if (oldHandler == SIG_ERR) + return false; - volatile sigset_t oldMask; - if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) - return false; + volatile sigset_t oldMask; + if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) + return false; - if (setjmp(s_jmpSIGILL)) - result = false; - else - { + if (setjmp(s_jmpSIGILL)) + result = false; + else + { #if 0 - // ARMv7 added movt and movw - int a; - asm volatile("movw %0,%1 \n" - "movt %0,%1 \n" - : "=r"(a) : "i"(0x1234)); + // ARMv7 added movt and movw + int a; + asm volatile("movw %0,%1 \n" + "movt %0,%1 \n" + : "=r"(a) : "i"(0x1234)); 00000010 <_Z5test2v>: // ARM 10: e3010234 movw r0, #4660 ; 0x1234 @@ -100,96 +101,80 @@ bool CPU_ProbeARMv7() 24: e12fff1e bx lr #endif - int a; - asm volatile ( - ".arm \n\t" - ".inst 0xe3010234 \n\t" // movw r0, 0x1234 - ".inst 0xe3410234 \n\t" // movt r0, 0x1234 - "mov %0, r0 \n\t" // mov [a], r0 - : "=r" (a) : : "r0"); + volatile int a; + asm volatile ( + ".arm \n\t" + ".inst 0xe3010234 \n\t" // movw r0, 0x1234 + ".inst 0xe3410234 \n\t" // movt r0, 0x1234 + "mov %0, r0 \n\t" // mov [a], r0 + : "=r" (a) : : "r0"); - result = (a == 0x12341234); - } + result = (a == 0x12341234); + } - sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); - signal(SIGILL, oldHandler); - return result; + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); + signal(SIGILL, oldHandler); + return result; # endif #else - return false; + return false; #endif // CRYPTOPP_BOOL_ARM32 } bool CPU_ProbeNEON() { #if defined(__aarch32__) || defined(__aarch64__) - return true; + return true; #elif defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) - return false; -#elif (CRYPTOPP_ARM_NEON_AVAILABLE) + return false; +#elif CRYPTOPP_BOOL_ARM32 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) - volatile bool result = true; - __try - { - uint32_t v1[4] = {1,1,1,1}; - uint32x4_t x1 = vld1q_u32(v1); - uint64_t v2[2] = {1,1}; - uint64x2_t x2 = vld1q_u64(v2); - - uint32x4_t x3 = vdupq_n_u32(2); - x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0); - x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3); - uint64x2_t x4 = vdupq_n_u64(2); - x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0); - x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1); - - result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1)); - } - __except (EXCEPTION_EXECUTE_HANDLER) - { - return false; - } - return result; + volatile bool result = true; + __try + { + volatile uint32x4_t x = vdupq_n_u32(1); + volatile uint32x4_t y = vshlq_n_u32(x, 4); + return (y[0] & y[1] & y[2] & y[3]) == 16; + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + return result; # else + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile bool result = true; - // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 - volatile bool result = true; + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); + if (oldHandler == SIG_ERR) + return false; - volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); - if (oldHandler == SIG_ERR) - return false; + volatile sigset_t oldMask; + if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) + return false; - volatile sigset_t oldMask; - if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) - return false; + if (setjmp(s_jmpSIGILL)) + result = false; + else + { + // This is risky... When we hand encode the instructions + // for vmov.u32 and vshl.u32 we get a SIGILL. Apparently + // we need more than just the instructions. Using + // intrinsics introduces the risk because the whole + // file gets built with ISA options, and the higher ISA + // may escape the try block with the SIGILL guard. + uint32x4_t x = vdupq_n_u32(1); + uint32x4_t y = vshlq_n_u32(x, 4); + return (y[0] & y[1] & y[2] & y[3]) == 16; + } - if (setjmp(s_jmpSIGILL)) - result = false; - else - { - uint32_t v1[4] = {1,1,1,1}; - uint32x4_t x1 = vld1q_u32(v1); - uint64_t v2[2] = {1,1}; - uint64x2_t x2 = vld1q_u64(v2); - - uint32x4_t x3 = {0,0,0,0}; - x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0); - x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3); - uint64x2_t x4 = {0,0}; - x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0); - x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1); - - // Hack... GCC optimizes away the code and returns true - result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1)); - } - - sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); - signal(SIGILL, oldHandler); - return result; + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); + signal(SIGILL, oldHandler); + return result; # endif #else - return false; + return false; #endif // CRYPTOPP_ARM_NEON_AVAILABLE }