// cpu.cpp - originally written and placed in the public domain by Wei Dai #include "pch.h" #include "config.h" #ifndef EXCEPTION_EXECUTE_HANDLER # define EXCEPTION_EXECUTE_HANDLER 1 #endif #ifndef CRYPTOPP_IMPORTS #include "cpu.h" #include "misc.h" #include "stdcpp.h" #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY #include #include #endif NAMESPACE_BEGIN(CryptoPP) #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY extern "C" { typedef void (*SigHandler)(int); }; #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY #ifdef CRYPTOPP_CPUID_AVAILABLE #if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64 bool CpuId(word32 input, word32 output[4]) { __cpuid((int *)output, input); return true; } #else #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY extern "C" { static jmp_buf s_jmpNoCPUID; static void SigIllHandlerCPUID(int) { longjmp(s_jmpNoCPUID, 1); } static jmp_buf s_jmpNoSSE2; static void SigIllHandlerSSE2(int) { longjmp(s_jmpNoSSE2, 1); } } #endif bool CpuId(word32 input, word32 output[4]) { #if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) __try { __asm { mov eax, input mov ecx, 0 cpuid mov edi, output mov [edi], eax mov [edi+4], ebx mov [edi+8], ecx mov [edi+12], edx } } // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION __except (EXCEPTION_EXECUTE_HANDLER) { return false; } // function 0 returns the highest basic function understood in EAX if(input == 0) return !!output[0]; return true; #else // longjmp and clobber warnings. Volatile is required. // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID); if (oldHandler == SIG_ERR) return false; # ifndef __MINGW32__ volatile sigset_t oldMask; if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) return false; # endif if (setjmp(s_jmpNoCPUID)) result = false; else { asm volatile ( // save ebx in case -fPIC is being used // TODO: this might need an early clobber on EDI. # if CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 "pushq %%rbx; cpuid; mov %%ebx, %%edi; popq %%rbx" # else "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx" # endif : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3]) : "a" (input), "c" (0) : "cc" ); } # ifndef __MINGW32__ sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); # endif signal(SIGILL, oldHandler); return result; #endif } #endif static bool TrySSE2() { #if CRYPTOPP_BOOL_X64 return true; #elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) __try { #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE AS2(por xmm0, xmm0) // executing SSE2 instruction #elif CRYPTOPP_SSE2_AVAILABLE __m128i x = _mm_setzero_si128(); return _mm_cvtsi128_si32(x) == 0; #endif } // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION __except (EXCEPTION_EXECUTE_HANDLER) { return false; } return true; #else // longjmp and clobber warnings. Volatile is required. // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2); if (oldHandler == SIG_ERR) return false; # ifndef __MINGW32__ volatile sigset_t oldMask; if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) return false; # endif if (setjmp(s_jmpNoSSE2)) result = false; else { #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE __asm __volatile ("por %xmm0, %xmm0"); #elif CRYPTOPP_SSE2_AVAILABLE __m128i x = _mm_setzero_si128(); result = _mm_cvtsi128_si32(x) == 0; #endif } # ifndef __MINGW32__ sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); # endif signal(SIGILL, oldHandler); return result; #endif } bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false; bool CRYPTOPP_SECTION_INIT CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false; bool CRYPTOPP_SECTION_INIT g_hasSSE41 = false, CRYPTOPP_SECTION_INIT g_hasSSE42 = false; bool CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false, CRYPTOPP_SECTION_INIT g_hasSHA = false; bool CRYPTOPP_SECTION_INIT g_hasRDRAND = false, CRYPTOPP_SECTION_INIT g_hasRDSEED = false, CRYPTOPP_SECTION_INIT g_isP4 = false; bool CRYPTOPP_SECTION_INIT g_hasPadlockRNG = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE2 = false; bool CRYPTOPP_SECTION_INIT g_hasPadlockPHE = false, CRYPTOPP_SECTION_INIT g_hasPadlockPMM = false; word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; static inline bool IsIntel(const word32 output[4]) { // This is the "GenuineIntel" string return (output[1] /*EBX*/ == 0x756e6547) && (output[2] /*ECX*/ == 0x6c65746e) && (output[3] /*EDX*/ == 0x49656e69); } static inline bool IsAMD(const word32 output[4]) { // This is the "AuthenticAMD" string. Some early K5's can return "AMDisbetter!" return (output[1] /*EBX*/ == 0x68747541) && (output[2] /*ECX*/ == 0x444D4163) && (output[3] /*EDX*/ == 0x69746E65); } static inline bool IsVIA(const word32 output[4]) { // This is the "CentaurHauls" string. Some non-PadLock's can return "VIA VIA VIA " return (output[1] /*EBX*/ == 0x746e6543) && (output[2] /*ECX*/ == 0x736c7561) && (output[3] /*EDX*/ == 0x48727561); } void DetectX86Features() { // Coverity finding CID 171239... word32 cpuid1[4]={0}, cpuid2[4]={0}, cpuid3[4]={0}; if (!CpuId(0, cpuid1)) return; if (!CpuId(1, cpuid2)) return; if ((cpuid2[3] & (1 << 26)) != 0) g_hasSSE2 = TrySSE2(); g_hasSSSE3 = g_hasSSE2 && (cpuid2[2] & (1<<9)); g_hasSSE41 = g_hasSSE2 && (cpuid2[2] & (1<<19)); g_hasSSE42 = g_hasSSE2 && (cpuid2[2] & (1<<20)); g_hasAESNI = g_hasSSE2 && (cpuid2[2] & (1<<25)); g_hasCLMUL = g_hasSSE2 && (cpuid2[2] & (1<<1)); if (IsIntel(cpuid1)) { static const unsigned int RDRAND_FLAG = (1 << 30); static const unsigned int RDSEED_FLAG = (1 << 18); static const unsigned int SHA_FLAG = (1 << 29); g_isP4 = ((cpuid2[0] >> 8) & 0xf) == 0xf; g_cacheLineSize = 8 * GETBYTE(cpuid2[1], 1); g_hasRDRAND = !!(cpuid2[2] /*ECX*/ & RDRAND_FLAG); if (cpuid1[0] /*EAX*/ >= 7) { if (CpuId(7, cpuid3)) { g_hasRDSEED = !!(cpuid3[1] /*EBX*/ & RDSEED_FLAG); g_hasSHA = !!(cpuid3[1] /*EBX*/ & SHA_FLAG); } } } else if (IsAMD(cpuid1)) { static const unsigned int RDRAND_FLAG = (1 << 30); static const unsigned int RDSEED_FLAG = (1 << 18); static const unsigned int SHA_FLAG = (1 << 29); CpuId(0x01, cpuid1); g_hasRDRAND = !!(cpuid1[2] /*ECX*/ & RDRAND_FLAG); if (cpuid1[0] /*EAX*/ >= 7) { if (CpuId(7, cpuid3)) { g_hasRDSEED = !!(cpuid3[1] /*EBX*/ & RDSEED_FLAG); g_hasSHA = !!(cpuid3[1] /*EBX*/ & SHA_FLAG); } } CpuId(0x80000005, cpuid1); g_cacheLineSize = GETBYTE(cpuid1[2], 0); } else if (IsVIA(cpuid1)) { static const unsigned int RNG_FLAGS = (0x3 << 2); static const unsigned int ACE_FLAGS = (0x3 << 6); static const unsigned int ACE2_FLAGS = (0x3 << 8); static const unsigned int PHE_FLAGS = (0x3 << 10); static const unsigned int PMM_FLAGS = (0x3 << 12); CpuId(0xC0000000, cpuid1); if (cpuid1[0] >= 0xC0000001) { // Extended features available CpuId(0xC0000001, cpuid1); g_hasPadlockRNG = !!(cpuid1[3] /*EDX*/ & RNG_FLAGS); g_hasPadlockACE = !!(cpuid1[3] /*EDX*/ & ACE_FLAGS); g_hasPadlockACE2 = !!(cpuid1[3] /*EDX*/ & ACE2_FLAGS); g_hasPadlockPHE = !!(cpuid1[3] /*EDX*/ & PHE_FLAGS); g_hasPadlockPMM = !!(cpuid1[3] /*EDX*/ & PMM_FLAGS); } } if (!g_cacheLineSize) g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; g_x86DetectionDone = true; } #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) // The ARM equivalent of CPUID probing is reading a MSR. The code requires Exception Level 1 (EL1) // and above, but user space runs at EL0. Attempting to run the code results in a SIGILL and termination. // // #if defined(__arm64__) || defined(__aarch64__) // word64 caps = 0; // Read ID_AA64ISAR0_EL1 // __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps)); // #elif defined(__arm__) || defined(__aarch32__) // word32 caps = 0; // Read ID_ISAR5_EL1 // __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps)); // #endif // // The following does not work well either. Its appears to be missing constants, and it does not detect // Aarch32 execution environments on Aarch64 // http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu // bool CRYPTOPP_SECTION_INIT g_ArmDetectionDone = false; bool CRYPTOPP_SECTION_INIT g_hasNEON = false, CRYPTOPP_SECTION_INIT g_hasPMULL = false, CRYPTOPP_SECTION_INIT g_hasCRC32 = false; bool CRYPTOPP_SECTION_INIT g_hasAES = false, CRYPTOPP_SECTION_INIT g_hasSHA1 = false, CRYPTOPP_SECTION_INIT g_hasSHA2 = false; word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; extern bool CPU_TryNEON_ARM(); extern bool CPU_TryAES_ARMV8(); extern bool CPU_TrySHA1_ARMV8(); extern bool CPU_TrySHA2_ARMV8(); extern bool CPU_TryCRC32_ARMV8(); extern bool CPU_TryPMULL_ARMV8(); void DetectArmFeatures() { g_hasNEON = CPU_TryNEON_ARM(); g_hasPMULL = CPU_TryPMULL_ARMV8(); g_hasCRC32 = CPU_TryCRC32_ARMV8(); g_hasAES = CPU_TryAES_ARMV8(); g_hasSHA1 = CPU_TrySHA1_ARMV8(); g_hasSHA2 = CPU_TrySHA2_ARMV8(); g_ArmDetectionDone = true; } #endif NAMESPACE_END // ***************** C++ Static Initialization ******************** ANONYMOUS_NAMESPACE_BEGIN struct InitializeCpu { InitializeCpu() { #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 CryptoPP::DetectX86Features(); #elif CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64 CryptoPP::DetectArmFeatures(); #endif } }; #if HAVE_GCC_INIT_PRIORITY const InitializeCpu s_init __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 20))) = InitializeCpu(); #elif HAVE_MSC_INIT_PRIORITY #pragma warning(disable: 4075) #pragma init_seg(".CRT$XCU-020") const InitializeCpu s_init; #pragma warning(default: 4075) #else const InitializeCpu& s_init = CryptoPP::Singleton().Ref(); #endif ANONYMOUS_NAMESPACE_END #endif // CRYPTOPP_IMPORTS