Detect XSAVE/XRESTORE OS feature support (GH #521)

This avoids the probe for SSE2 in most circumstances. The SSE2 test is mostly benign nowadays since SSE2 and OS support is nearly ubiquitous. But the define CRYPTOPP_NO_CPU_FEATURE_PROBES added for Apple OSes was interacting badly on x86 machines. Also see GH #511.
pull/531/head
Jeffrey Walton 2017-10-12 20:14:21 -04:00
parent c7a7385d52
commit f1a80e6a58
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
1 changed files with 18 additions and 17 deletions

35
cpu.cpp
View File

@ -21,7 +21,7 @@
# include <unistd.h> # include <unistd.h>
#endif #endif
// Capability queries, requires Glibc 2.16, https://lwn.net/Articles/519085/ // Capability queries, requires Glibc 2.16, http://lwn.net/Articles/519085/
// CRYPTOPP_GLIBC_VERSION not used because config.h is missing <feature.h> // CRYPTOPP_GLIBC_VERSION not used because config.h is missing <feature.h>
#if (((__GLIBC__ * 100) + __GLIBC_MINOR__) >= 216) #if (((__GLIBC__ * 100) + __GLIBC_MINOR__) >= 216)
# define CRYPTOPP_GETAUXV_AVAILABLE 1 # define CRYPTOPP_GETAUXV_AVAILABLE 1
@ -102,9 +102,7 @@ extern "C"
// cpu.cpp (131): E2211 Inline assembly not allowed in inline and template functions // cpu.cpp (131): E2211 Inline assembly not allowed in inline and template functions
bool CpuId(word32 func, word32 subfunc, word32 output[4]) bool CpuId(word32 func, word32 subfunc, word32 output[4])
{ {
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) #if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) || defined(__BORLANDC__)
return false;
#elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) || defined(__BORLANDC__)
__try __try
{ {
// Borland/Embarcadero and Issue 500 // Borland/Embarcadero and Issue 500
@ -184,19 +182,19 @@ bool CpuId(word32 func, word32 subfunc, word32 output[4])
static bool CPU_ProbeSSE2() static bool CPU_ProbeSSE2()
{ {
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) #if CRYPTOPP_BOOL_X64
return false;
#elif CRYPTOPP_BOOL_X64
return true; return true;
#elif defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
return false;
#elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) #elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
__try __try
{ {
#if CRYPTOPP_SSE2_ASM_AVAILABLE # if CRYPTOPP_SSE2_ASM_AVAILABLE
AS2(por xmm0, xmm0) // executing SSE2 instruction AS2(por xmm0, xmm0) // executing SSE2 instruction
#elif CRYPTOPP_SSE2_INTRIN_AVAILABLE # elif CRYPTOPP_SSE2_INTRIN_AVAILABLE
__m128i x = _mm_setzero_si128(); __m128i x = _mm_setzero_si128();
return _mm_cvtsi128_si32(x) == 0; return _mm_cvtsi128_si32(x) == 0;
#endif # endif
} }
// GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
__except (EXCEPTION_EXECUTE_HANDLER) __except (EXCEPTION_EXECUTE_HANDLER)
@ -223,12 +221,12 @@ static bool CPU_ProbeSSE2()
result = false; result = false;
else else
{ {
#if CRYPTOPP_SSE2_ASM_AVAILABLE # if CRYPTOPP_SSE2_ASM_AVAILABLE
__asm __volatile ("por %xmm0, %xmm0"); __asm __volatile ("por %xmm0, %xmm0");
#elif CRYPTOPP_SSE2_INTRIN_AVAILABLE # elif CRYPTOPP_SSE2_INTRIN_AVAILABLE
__m128i x = _mm_setzero_si128(); __m128i x = _mm_setzero_si128();
result = _mm_cvtsi128_si32(x) == 0; result = _mm_cvtsi128_si32(x) == 0;
#endif # endif
} }
# ifndef __MINGW32__ # ifndef __MINGW32__
@ -282,8 +280,11 @@ void DetectX86Features()
if (!CpuId(1, 0, cpuid1)) if (!CpuId(1, 0, cpuid1))
return; return;
// cpuid1[2] & (1 << 27) is XSAVE/XRESTORE and signals OS support for SSE; use it to avoid probes.
// See http://github.com/weidai11/cryptopp/issues/511 and http://stackoverflow.com/a/22521619/608639
if ((cpuid1[3] & (1 << 26)) != 0) if ((cpuid1[3] & (1 << 26)) != 0)
g_hasSSE2 = CPU_ProbeSSE2(); g_hasSSE2 = (cpuid1[2] & (1 << 27)) || CPU_ProbeSSE2();
g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9)); g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9));
g_hasSSE41 = g_hasSSE2 && (cpuid1[2] & (1<<19)); g_hasSSE41 = g_hasSSE2 && (cpuid1[2] & (1<<19));
g_hasSSE42 = g_hasSSE2 && (cpuid1[2] & (1<<20)); g_hasSSE42 = g_hasSSE2 && (cpuid1[2] & (1<<20));
@ -510,7 +511,7 @@ inline bool CPU_QueryAES()
if (getauxval(AT_HWCAP2) & HWCAP2_AES) if (getauxval(AT_HWCAP2) & HWCAP2_AES)
return true; return true;
#elif defined(__APPLE__) && defined(__aarch64__) #elif defined(__APPLE__) && defined(__aarch64__)
// https://stackoverflow.com/questions/45637888/how-to-determine-armv8-features-at-runtime-on-ios // http://stackoverflow.com/questions/45637888/how-to-determine-armv8-features-at-runtime-on-ios
struct utsname systemInfo; struct utsname systemInfo;
systemInfo.machine[0] = '\0'; systemInfo.machine[0] = '\0';
uname(&systemInfo); uname(&systemInfo);
@ -545,7 +546,7 @@ inline bool CPU_QuerySHA1()
if (getauxval(AT_HWCAP2) & HWCAP2_SHA1) if (getauxval(AT_HWCAP2) & HWCAP2_SHA1)
return true; return true;
#elif defined(__APPLE__) && defined(__aarch64__) #elif defined(__APPLE__) && defined(__aarch64__)
// https://stackoverflow.com/questions/45637888/how-to-determine-armv8-features-at-runtime-on-ios // http://stackoverflow.com/questions/45637888/how-to-determine-armv8-features-at-runtime-on-ios
struct utsname systemInfo; struct utsname systemInfo;
systemInfo.machine[0] = '\0'; systemInfo.machine[0] = '\0';
uname(&systemInfo); uname(&systemInfo);
@ -580,7 +581,7 @@ inline bool CPU_QuerySHA2()
if (getauxval(AT_HWCAP2) & HWCAP2_SHA2) if (getauxval(AT_HWCAP2) & HWCAP2_SHA2)
return true; return true;
#elif defined(__APPLE__) && defined(__aarch64__) #elif defined(__APPLE__) && defined(__aarch64__)
// https://stackoverflow.com/questions/45637888/how-to-determine-armv8-features-at-runtime-on-ios // http://stackoverflow.com/questions/45637888/how-to-determine-armv8-features-at-runtime-on-ios
struct utsname systemInfo; struct utsname systemInfo;
systemInfo.machine[0] = '\0'; systemInfo.machine[0] = '\0';
uname(&systemInfo); uname(&systemInfo);