diff --git a/cpu.cpp b/cpu.cpp index d8f7d3c5..7fb737ce 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -48,19 +48,19 @@ bool CpuId(word32 input, word32 output[4]) #else #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY -extern "C" { - -static jmp_buf s_jmpNoCPUID; -static void SigIllHandlerCPUID(int) +extern "C" { - longjmp(s_jmpNoCPUID, 1); -} + static jmp_buf s_jmpNoCPUID; + static void SigIllHandlerCPUID(int) + { + longjmp(s_jmpNoCPUID, 1); + } -static jmp_buf s_jmpNoSSE2; -static void SigIllHandlerSSE2(int) -{ - longjmp(s_jmpNoSSE2, 1); -} + static jmp_buf s_jmpNoSSE2; + static void SigIllHandlerSSE2(int) + { + longjmp(s_jmpNoSSE2, 1); + } } #endif @@ -97,10 +97,14 @@ bool CpuId(word32 input, word32 output[4]) // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; - SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID); + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID); if (oldHandler == SIG_ERR) result = false; + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + result = false; + if (setjmp(s_jmpNoCPUID)) result = false; else @@ -119,6 +123,7 @@ bool CpuId(word32 input, word32 output[4]) ); } + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); signal(SIGILL, oldHandler); return result; #endif @@ -151,7 +156,7 @@ static bool TrySSE2() // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; - SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2); + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2); if (oldHandler == SIG_ERR) return false; @@ -167,6 +172,7 @@ static bool TrySSE2() #endif } + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); signal(SIGILL, oldHandler); return result; #endif @@ -298,29 +304,30 @@ void DetectX86Features() *((volatile bool*)&g_x86DetectionDone) = true; } -// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu -// http://stackoverflow.com/questions/26701262/how-to-check-the-existence-of-neon-on-arm #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) +// The ARM equivalent of CPUID probing is reading a MSR. The code requires Exception Level 1 (EL1) and above, but user space runs at EL0. +// Attempting to run the code results in a SIGILL and termination. +// +// #if defined(__arm64__) || defined(__aarch64__) +// word64 caps = 0; // Read ID_AA64ISAR0_EL1 +// __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps)); +// #elif defined(__arm__) || defined(__aarch32__) +// word32 caps = 0; // Read ID_ISAR5_EL1 +// __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps)); +// #endif +// +// The following does not work well either. Its appears to be missing constants, and it does not detect Aarch32 execution environments on Aarch64 +// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu +// bool g_ArmDetectionDone = false; bool g_hasNEON = false, g_hasCRC32 = false, g_hasAES = false, g_hasSHA1 = false, g_hasSHA2 = false; word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; -// The ARM equivalent of CPUID is reading a MSR. For example, fetch crypto capabilities with: -// #if defined(__arm64__) || defined(__aarch64__) -// word64 caps = 0; // Read ID_AA64ISAR0_EL1 -// __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps)); -// #elif defined(__arm__) || defined(__aarch32__) -// word32 caps = 0; // Read ID_ISAR5_EL1 -// __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps)); -// #endif -// The code requires Exception Level 1 (EL1) and above, but user space runs at EL0. -// Attempting to run the code results in a SIGILL and termination. - #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY -extern "C" { - +extern "C" +{ static jmp_buf s_jmpNoNEON; static void SigIllHandlerNEON(int) { @@ -359,15 +366,15 @@ static bool TryNEON() # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) __try { - static const uint32_t v1[4] = {1,1,1,1}; + uint32_t v1[4] = {1,1,1,1}; uint32x4_t x1 = vld1q_u32(v1); - static const uint64_t v2[2] = {1,1}; + uint64_t v2[2] = {1,1}; uint64x2_t x2 = vld1q_u64(v2); - uint32x4_t x3 = vdupq_n_u32(0); + uint32x4_t x3 = {0,0,0,0}; x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0); x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3); - uint64x2_t x4 = vdupq_n_u64(0); + uint64x2_t x4 = {0,0}; x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0); x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1); } @@ -381,27 +388,35 @@ static bool TryNEON() // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; - SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON); + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON); if (oldHandler == SIG_ERR) result = false; + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + result = false; + if (setjmp(s_jmpNoNEON)) result = false; else { - static const uint32_t v1[4] = {1,1,1,1}; + uint32_t v1[4] = {1,1,1,1}; uint32x4_t x1 = vld1q_u32(v1); - static const uint64_t v2[2] = {1,1}; + uint64_t v2[2] = {1,1}; uint64x2_t x2 = vld1q_u64(v2); - uint32x4_t x3 = vdupq_n_u32(0); + uint32x4_t x3 = {0,0,0,0}; x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0); x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3); - uint64x2_t x4 = vdupq_n_u64(0); + uint64x2_t x4 = {0,0}; x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0); x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1); + + // Hack... GCC optimizes away the code and returns true + result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1)); } + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); signal(SIGILL, oldHandler); return result; # endif @@ -431,10 +446,14 @@ static bool TryCRC32() // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; - SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32); + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32); if (oldHandler == SIG_ERR) result = false; + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + result = false; + if (setjmp(s_jmpNoCRC32)) result = false; else @@ -443,8 +462,12 @@ static bool TryCRC32() w = __crc32cw(w,x); w = __crc32ch(w,y); w = __crc32cb(w,z); + + // Hack... GCC optimizes away the code and returns true + result = !!w; } + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); signal(SIGILL, oldHandler); return result; # endif @@ -460,7 +483,7 @@ static bool TryAES() __try { // AES encrypt and decrypt - static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); + uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); uint8x16_t r1 = vaeseq_u8(data, key); uint8x16_t r2 = vaesdq_u8(data, key); CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2); @@ -475,20 +498,27 @@ static bool TryAES() // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; - SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES); + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES); if (oldHandler == SIG_ERR) result = false; + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + result = false; + if (setjmp(s_jmpNoAES)) result = false; else { - static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); + uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); uint8x16_t r1 = vaeseq_u8(data, key); uint8x16_t r2 = vaesdq_u8(data, key); - CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2); + + // Hack... GCC optimizes away the code and returns true + result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7)); } + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); signal(SIGILL, oldHandler); return result; # endif @@ -503,8 +533,8 @@ static bool TrySHA1() # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) __try { - static const uint32x4_t data = vdupq_n_u32(0); - static const uint32_t hash = 0x0; + uint32x4_t data = {0,0,0,0}; + uint32_t hash = 0x0; uint32x4_t r1 = vsha1cq_u32 (data, hash, data); uint32x4_t r2 = vsha1mq_u32 (data, hash, data); @@ -521,23 +551,30 @@ static bool TrySHA1() // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; - SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1); + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1); if (oldHandler == SIG_ERR) result = false; + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + result = false; + if (setjmp(s_jmpNoSHA1)) result = false; else { - static const uint32x4_t data = vdupq_n_u32(0); - static const uint32_t hash = 0x0; + uint32x4_t data = {0,0,0,0}; + uint32_t hash = 0x0; - uint32x4_t r1 = vsha1cq_u32 (data, hash, data); + uint32x4_t r1 = vsha1cq_u32 (data, hash, data); uint32x4_t r2 = vsha1mq_u32 (data, hash, data); uint32x4_t r3 = vsha1pq_u32 (data, hash, data); - CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3); + + // Hack... GCC optimizes away the code and returns true + result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2)); } + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); signal(SIGILL, oldHandler); return result; # endif @@ -552,8 +589,8 @@ static bool TrySHA2() # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) __try { - static const uint32x4_t data = vdupq_n_u32(0); - static const uint32x4_t hash = vdupq_n_u32(0); + uint32x4_t data = {0,0,0,0}; + uint32x4_t hash = {0,0,0,0}; uint32x4_t r1 = vsha256hq_u32 (hash, hash, data); uint32x4_t r2 = vsha256h2q_u32 (hash, hash, data); @@ -571,24 +608,31 @@ static bool TrySHA2() // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; - SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2); + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2); if (oldHandler == SIG_ERR) result = false; + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + result = false; + if (setjmp(s_jmpNoSHA2)) result = false; else { - static const uint32x4_t data = vdupq_n_u32(0); - static const uint32x4_t hash = vdupq_n_u32(0); + uint32x4_t data = {0,0,0,0}; + uint32x4_t hash = {0,0,0,0}; uint32x4_t r1 = vsha256hq_u32 (hash, hash, data); uint32x4_t r2 = vsha256h2q_u32 (hash, hash, data); uint32x4_t r3 = vsha256su0q_u32 (data, data); uint32x4_t r4 = vsha256su1q_u32 (data, data, data); - CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3), CRYPTOPP_UNUSED(r4); + + // Hack... GCC optimizes away the code and returns true + result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3)); } + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); signal(SIGILL, oldHandler); return result; # endif