diff --git a/cpu.cpp b/cpu.cpp index 285b17cd..67d4efba 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -1,3 +1,4 @@ + // cpu.cpp - originally written and placed in the public domain by Wei Dai #include "pch.h" @@ -56,6 +57,11 @@ unsigned long int getauxval(unsigned long int) { return 0; } # include #endif +// Visual Studio 2008 and below is missing _xgetbv. See x64dll.asm for the body. +#if defined(_MSC_VER) && defined(_M_X64) +extern "C" unsigned long long __fastcall ExtendedControlRegister(unsigned int); +#endif + ANONYMOUS_NAMESPACE_BEGIN #if defined(__APPLE__) @@ -309,19 +315,44 @@ void DetectX86Features() CRYPTOPP_CONSTANT(AVX_FLAG = (3 << 27)) if ((cpuid1[2] & AVX_FLAG) == AVX_FLAG) { +// GCC 4.1/Binutils 2.17 cannot consume xgetbv #if defined(__GNUC__) || defined(__SUNPRO_CC) || defined(__BORLANDC__) // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71659 and // http://www.agner.org/optimize/vectorclass/read.php?i=65 word32 a=0, d=0; __asm __volatile ( - // GCC 4.1/Binutils 2.17 cannot consume xgetbv // "xgetbv" : "=a"(a), "=d"(d) : "c"(0) : ".byte 0x0f, 0x01, 0xd0" "\n\t" : "=a"(a), "=d"(d) : "c"(0) : ); word64 xcr0 = a | static_cast(d) << 32; g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG; + +// Visual Studio 2008 and below lack xgetbv +#elif defined(_MSC_VER) && defined(_M_IX86) + word32 a=0, d=0; + __asm { + push eax + push edx + push ecx + mov ecx, 0 + _emit 0x0f + _emit 0x01 + _emit 0xd0 + mov a, eax + mov d, edx + pop ecx + pop edx + pop eax + } + word64 xcr0 = a | static_cast(d) << 32; + g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG; + +// Visual Studio 2008 and below lack xgetbv +#elif defined(_MSC_VER) && defined(_M_X64) + word64 xcr0 = ExtendedControlRegister(0); + g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG; #else word64 xcr0 = _xgetbv(0); g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG; diff --git a/x64dll.asm b/x64dll.asm index 6f90239c..7857f5f1 100644 --- a/x64dll.asm +++ b/x64dll.asm @@ -1964,5 +1964,19 @@ pop rsi ret SHA256_HashMultipleBlocks_SSE2 ENDP + ALIGN 8 +ExtendedControlRegister PROC +;; First paramter is RCX, and xgetbv expects the CTR in ECX +;; http://www.agner.org/optimize/vectorclass/read.php?i=65 +DB 0fh +DB 01h +DB 0d0h +;; xcr = (EDX << 32) | EAX +and rax, 0ffffffffh +shl rdx, 32 +or rax, rdx +ret +ExtendedControlRegister ENDP + _TEXT ENDS END