diff --git a/cpu.cpp b/cpu.cpp index ef0ddf69..13e15b36 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -21,6 +21,14 @@ # include #endif +//#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86 +//# if defined(_MSC_VER) +//# include +//# else +//# include +//# endif +//#endif + // Capability queries, requires Glibc 2.16, http://lwn.net/Articles/519085/ // CRYPTOPP_GLIBC_VERSION not used because config.h is missing #if (((__GLIBC__ * 100) + __GLIBC_MINOR__) >= 216) @@ -187,6 +195,8 @@ bool CRYPTOPP_SECTION_INIT g_hasSSE2 = false; bool CRYPTOPP_SECTION_INIT g_hasSSSE3 = false; bool CRYPTOPP_SECTION_INIT g_hasSSE41 = false; bool CRYPTOPP_SECTION_INIT g_hasSSE42 = false; +bool CRYPTOPP_SECTION_INIT g_hasAVX = false; +bool CRYPTOPP_SECTION_INIT g_hasAVX2 = false; bool CRYPTOPP_SECTION_INIT g_hasAESNI = false; bool CRYPTOPP_SECTION_INIT g_hasCLMUL = false; bool CRYPTOPP_SECTION_INIT g_hasADX = false; @@ -245,12 +255,38 @@ void DetectX86Features() g_hasAESNI = g_hasSSE2 && ((cpuid1[2] & (1<<25)) != 0); g_hasCLMUL = g_hasSSE2 && ((cpuid1[2] & (1<< 1)) != 0); + // AVX is similar to SSE, but check both bits 27 (SSE) and 28 (AVX). + // https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled + CRYPTOPP_CONSTANT(YMM_FLAG = (3 << 1)) + CRYPTOPP_CONSTANT(AVX_FLAG = (3 << 27)) + if ((cpuid1[2] & AVX_FLAG) == AVX_FLAG) + { +#if defined(__GNUC__) + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71659 and + // http://www.agner.org/optimize/vectorclass/read.php?i=65 + word32 a=0, d=0; + __asm __volatile + ( + // GCC 4.1/Binutils 2.17 cannot consume xgetbv + // "xgetbv" : "=a"(a), "=d"(d) : "c"(0) : + ".byte 0x0f, 0x01, 0xd0" "\n\t" + : "=a"(a), "=d"(d) : "c"(0) : + ); + word64 xcr0 = a | static_cast(d) << 32; + g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG; +#else + word64 xcr0 = _xgetbv(0); + g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG; +#endif + } + if (IsIntel(cpuid0)) { CRYPTOPP_CONSTANT(RDRAND_FLAG = (1 << 30)) CRYPTOPP_CONSTANT(RDSEED_FLAG = (1 << 18)) CRYPTOPP_CONSTANT( ADX_FLAG = (1 << 19)) CRYPTOPP_CONSTANT( SHA_FLAG = (1 << 29)) + CRYPTOPP_CONSTANT( AVX2_FLAG = (1 << 5)) g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf; g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1); @@ -263,6 +299,7 @@ void DetectX86Features() g_hasRDSEED = (cpuid2[1] /*EBX*/ & RDSEED_FLAG) != 0; g_hasADX = (cpuid2[1] /*EBX*/ & ADX_FLAG) != 0; g_hasSHA = (cpuid2[1] /*EBX*/ & SHA_FLAG) != 0; + g_hasAVX2 = (cpuid2[1] /*EBX*/ & AVX2_FLAG) != 0; } } } @@ -272,6 +309,7 @@ void DetectX86Features() CRYPTOPP_CONSTANT(RDSEED_FLAG = (1 << 18)) CRYPTOPP_CONSTANT( ADX_FLAG = (1 << 19)) CRYPTOPP_CONSTANT( SHA_FLAG = (1 << 29)) + CRYPTOPP_CONSTANT( AVX2_FLAG = (1 << 5)) CpuId(0x80000005, 0, cpuid2); g_cacheLineSize = GETBYTE(cpuid2[2], 0); @@ -284,6 +322,7 @@ void DetectX86Features() g_hasRDSEED = (cpuid2[1] /*EBX*/ & RDSEED_FLAG) != 0; g_hasADX = (cpuid2[1] /*EBX*/ & ADX_FLAG) != 0; g_hasSHA = (cpuid2[1] /*EBX*/ & SHA_FLAG) != 0; + g_hasAVX2 = (cpuid2[1] /*EBX*/ & AVX2_FLAG) != 0; } } } @@ -300,11 +339,11 @@ void DetectX86Features() { // Extended features available CpuId(0xC0000001, 0, cpuid2); - g_hasPadlockRNG = (cpuid2[3] /*EDX*/ & RNG_FLAGS) != 0; - g_hasPadlockACE = (cpuid2[3] /*EDX*/ & ACE_FLAGS) != 0; - g_hasPadlockACE2 = (cpuid2[3] /*EDX*/ & ACE2_FLAGS) != 0; - g_hasPadlockPHE = (cpuid2[3] /*EDX*/ & PHE_FLAGS) != 0; - g_hasPadlockPMM = (cpuid2[3] /*EDX*/ & PMM_FLAGS) != 0; + g_hasPadlockRNG = (cpuid2[3] /*EDX*/ & RNG_FLAGS) == RNG_FLAGS; + g_hasPadlockACE = (cpuid2[3] /*EDX*/ & ACE_FLAGS) == ACE_FLAGS; + g_hasPadlockACE2 = (cpuid2[3] /*EDX*/ & ACE2_FLAGS) == ACE2_FLAGS; + g_hasPadlockPHE = (cpuid2[3] /*EDX*/ & PHE_FLAGS) == PHE_FLAGS; + g_hasPadlockPMM = (cpuid2[3] /*EDX*/ & PMM_FLAGS) == PMM_FLAGS; } } diff --git a/cpu.h b/cpu.h index 68ac804c..0d555b4f 100644 --- a/cpu.h +++ b/cpu.h @@ -85,6 +85,8 @@ extern CRYPTOPP_DLL bool g_hasSSE2; extern CRYPTOPP_DLL bool g_hasSSSE3; extern CRYPTOPP_DLL bool g_hasSSE41; extern CRYPTOPP_DLL bool g_hasSSE42; +extern CRYPTOPP_DLL bool g_hasAVX; +extern CRYPTOPP_DLL bool g_hasAVX2; extern CRYPTOPP_DLL bool g_hasAESNI; extern CRYPTOPP_DLL bool g_hasCLMUL; extern CRYPTOPP_DLL bool g_hasSHA; @@ -158,6 +160,7 @@ inline bool HasSSE42() /// \brief Determines AES-NI availability /// \returns true if AES-NI is determined to be available, false otherwise /// \details HasAESNI() is a runtime check performed using CPUID +/// \since Crypto++ 5.6.1 /// \note This function is only available on Intel IA-32 platforms inline bool HasAESNI() { @@ -169,6 +172,7 @@ inline bool HasAESNI() /// \brief Determines Carryless Multiply availability /// \returns true if pclmulqdq is determined to be available, false otherwise /// \details HasCLMUL() is a runtime check performed using CPUID +/// \since Crypto++ 5.6.1 /// \note This function is only available on Intel IA-32 platforms inline bool HasCLMUL() { @@ -180,6 +184,7 @@ inline bool HasCLMUL() /// \brief Determines SHA availability /// \returns true if SHA is determined to be available, false otherwise /// \details HasSHA() is a runtime check performed using CPUID +/// \since Crypto++ 6.0 /// \note This function is only available on Intel IA-32 platforms inline bool HasSHA() { @@ -191,6 +196,7 @@ inline bool HasSHA() /// \brief Determines ADX availability /// \returns true if ADX is determined to be available, false otherwise /// \details HasADX() is a runtime check performed using CPUID +/// \since Crypto++ 7.0 /// \note This function is only available on Intel IA-32 platforms inline bool HasADX() { @@ -199,6 +205,30 @@ inline bool HasADX() return g_hasADX; } +/// \brief Determines AVX availability +/// \returns true if AVX is determined to be available, false otherwise +/// \details HasAVX() is a runtime check performed using CPUID +/// \since Crypto++ 7.1 +/// \note This function is only available on Intel IA-32 platforms +inline bool HasAVX() +{ + if (!g_x86DetectionDone) + DetectX86Features(); + return g_hasAVX; +} + +/// \brief Determines AVX2 availability +/// \returns true if AVX2 is determined to be available, false otherwise +/// \details HasAVX2() is a runtime check performed using CPUID +/// \since Crypto++ 7.1 +/// \note This function is only available on Intel IA-32 platforms +inline bool HasAVX2() +{ + if (!g_x86DetectionDone) + DetectX86Features(); + return g_hasAVX2; +} + /// \brief Determines if the CPU is an Intel P4 /// \returns true if the CPU is a P4, false otherwise /// \details IsP4() is a runtime check performed using CPUID diff --git a/validat1.cpp b/validat1.cpp index 02e5ef95..c8d58255 100644 --- a/validat1.cpp +++ b/validat1.cpp @@ -369,11 +369,20 @@ bool TestSettings() bool hasSSSE3 = HasSSSE3(); bool hasSSE41 = HasSSE41(); bool hasSSE42 = HasSSE42(); + bool hasAVX = HasAVX(); + bool hasAVX2 = HasAVX2(); + bool hasAESNI = HasAESNI(); + bool hasCLMUL = HasCLMUL(); + bool hasRDRAND = HasRDRAND(); + bool hasRDSEED = HasRDSEED(); + bool hasSHA = HasSHA(); bool isP4 = IsP4(); - std::cout << "hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSSE3 << ", hasSSE4.1 == " << hasSSE41 << ", hasSSE4.2 == " << hasSSE42; - std::cout << ", hasAESNI == " << HasAESNI() << ", hasCLMUL == " << HasCLMUL() << ", hasRDRAND == " << HasRDRAND() << ", hasRDSEED == " << HasRDSEED(); - std::cout << ", hasSHA == " << HasSHA() << ", isP4 == " << isP4 << "\n"; + std::cout << "hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSSE3 << ", hasSSE4.1 == " << hasSSE41; + std::cout << ", hasSSE4.2 == " << hasSSE42 << ", hasAVX == " << hasAVX << ", hasAVX2 == " << hasAVX2; + std::cout << ", hasAESNI == " << hasAESNI << ", hasCLMUL == " << hasCLMUL << ", hasRDRAND == " << HasRDRAND; + std::cout << ", hasRDSEED == " << HasRDSEED << ", hasSHA == " << HasSHA() << ", isP4 == " << isP4; + std::cout << "\n"; #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) bool hasNEON = HasNEON();