Detect XSAVE/XRESTORE OS feature support (GH #521)
This avoids the probe for SSE2 in most circumstances. The SSE2 test is mostly benign nowadays since SSE2 and OS support is nearly ubiquitous. But the define CRYPTOPP_NO_CPU_FEATURE_PROBES added for Apple OSes was interacting badly on x86 machines. Also see GH #511.pull/531/head
parent
c7a7385d52
commit
f1a80e6a58
35
cpu.cpp
35
cpu.cpp
|
|
@ -21,7 +21,7 @@
|
||||||
# include <unistd.h>
|
# include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Capability queries, requires Glibc 2.16, https://lwn.net/Articles/519085/
|
// Capability queries, requires Glibc 2.16, http://lwn.net/Articles/519085/
|
||||||
// CRYPTOPP_GLIBC_VERSION not used because config.h is missing <feature.h>
|
// CRYPTOPP_GLIBC_VERSION not used because config.h is missing <feature.h>
|
||||||
#if (((__GLIBC__ * 100) + __GLIBC_MINOR__) >= 216)
|
#if (((__GLIBC__ * 100) + __GLIBC_MINOR__) >= 216)
|
||||||
# define CRYPTOPP_GETAUXV_AVAILABLE 1
|
# define CRYPTOPP_GETAUXV_AVAILABLE 1
|
||||||
|
|
@ -102,9 +102,7 @@ extern "C"
|
||||||
// cpu.cpp (131): E2211 Inline assembly not allowed in inline and template functions
|
// cpu.cpp (131): E2211 Inline assembly not allowed in inline and template functions
|
||||||
bool CpuId(word32 func, word32 subfunc, word32 output[4])
|
bool CpuId(word32 func, word32 subfunc, word32 output[4])
|
||||||
{
|
{
|
||||||
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
#if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) || defined(__BORLANDC__)
|
||||||
return false;
|
|
||||||
#elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) || defined(__BORLANDC__)
|
|
||||||
__try
|
__try
|
||||||
{
|
{
|
||||||
// Borland/Embarcadero and Issue 500
|
// Borland/Embarcadero and Issue 500
|
||||||
|
|
@ -184,19 +182,19 @@ bool CpuId(word32 func, word32 subfunc, word32 output[4])
|
||||||
|
|
||||||
static bool CPU_ProbeSSE2()
|
static bool CPU_ProbeSSE2()
|
||||||
{
|
{
|
||||||
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
#if CRYPTOPP_BOOL_X64
|
||||||
return false;
|
|
||||||
#elif CRYPTOPP_BOOL_X64
|
|
||||||
return true;
|
return true;
|
||||||
|
#elif defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
||||||
|
return false;
|
||||||
#elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
#elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
||||||
__try
|
__try
|
||||||
{
|
{
|
||||||
#if CRYPTOPP_SSE2_ASM_AVAILABLE
|
# if CRYPTOPP_SSE2_ASM_AVAILABLE
|
||||||
AS2(por xmm0, xmm0) // executing SSE2 instruction
|
AS2(por xmm0, xmm0) // executing SSE2 instruction
|
||||||
#elif CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
# elif CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
||||||
__m128i x = _mm_setzero_si128();
|
__m128i x = _mm_setzero_si128();
|
||||||
return _mm_cvtsi128_si32(x) == 0;
|
return _mm_cvtsi128_si32(x) == 0;
|
||||||
#endif
|
# endif
|
||||||
}
|
}
|
||||||
// GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
|
// GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
|
||||||
__except (EXCEPTION_EXECUTE_HANDLER)
|
__except (EXCEPTION_EXECUTE_HANDLER)
|
||||||
|
|
@ -223,12 +221,12 @@ static bool CPU_ProbeSSE2()
|
||||||
result = false;
|
result = false;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
#if CRYPTOPP_SSE2_ASM_AVAILABLE
|
# if CRYPTOPP_SSE2_ASM_AVAILABLE
|
||||||
__asm __volatile ("por %xmm0, %xmm0");
|
__asm __volatile ("por %xmm0, %xmm0");
|
||||||
#elif CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
# elif CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
||||||
__m128i x = _mm_setzero_si128();
|
__m128i x = _mm_setzero_si128();
|
||||||
result = _mm_cvtsi128_si32(x) == 0;
|
result = _mm_cvtsi128_si32(x) == 0;
|
||||||
#endif
|
# endif
|
||||||
}
|
}
|
||||||
|
|
||||||
# ifndef __MINGW32__
|
# ifndef __MINGW32__
|
||||||
|
|
@ -282,8 +280,11 @@ void DetectX86Features()
|
||||||
if (!CpuId(1, 0, cpuid1))
|
if (!CpuId(1, 0, cpuid1))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
// cpuid1[2] & (1 << 27) is XSAVE/XRESTORE and signals OS support for SSE; use it to avoid probes.
|
||||||
|
// See http://github.com/weidai11/cryptopp/issues/511 and http://stackoverflow.com/a/22521619/608639
|
||||||
if ((cpuid1[3] & (1 << 26)) != 0)
|
if ((cpuid1[3] & (1 << 26)) != 0)
|
||||||
g_hasSSE2 = CPU_ProbeSSE2();
|
g_hasSSE2 = (cpuid1[2] & (1 << 27)) || CPU_ProbeSSE2();
|
||||||
|
|
||||||
g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9));
|
g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9));
|
||||||
g_hasSSE41 = g_hasSSE2 && (cpuid1[2] & (1<<19));
|
g_hasSSE41 = g_hasSSE2 && (cpuid1[2] & (1<<19));
|
||||||
g_hasSSE42 = g_hasSSE2 && (cpuid1[2] & (1<<20));
|
g_hasSSE42 = g_hasSSE2 && (cpuid1[2] & (1<<20));
|
||||||
|
|
@ -510,7 +511,7 @@ inline bool CPU_QueryAES()
|
||||||
if (getauxval(AT_HWCAP2) & HWCAP2_AES)
|
if (getauxval(AT_HWCAP2) & HWCAP2_AES)
|
||||||
return true;
|
return true;
|
||||||
#elif defined(__APPLE__) && defined(__aarch64__)
|
#elif defined(__APPLE__) && defined(__aarch64__)
|
||||||
// https://stackoverflow.com/questions/45637888/how-to-determine-armv8-features-at-runtime-on-ios
|
// http://stackoverflow.com/questions/45637888/how-to-determine-armv8-features-at-runtime-on-ios
|
||||||
struct utsname systemInfo;
|
struct utsname systemInfo;
|
||||||
systemInfo.machine[0] = '\0';
|
systemInfo.machine[0] = '\0';
|
||||||
uname(&systemInfo);
|
uname(&systemInfo);
|
||||||
|
|
@ -545,7 +546,7 @@ inline bool CPU_QuerySHA1()
|
||||||
if (getauxval(AT_HWCAP2) & HWCAP2_SHA1)
|
if (getauxval(AT_HWCAP2) & HWCAP2_SHA1)
|
||||||
return true;
|
return true;
|
||||||
#elif defined(__APPLE__) && defined(__aarch64__)
|
#elif defined(__APPLE__) && defined(__aarch64__)
|
||||||
// https://stackoverflow.com/questions/45637888/how-to-determine-armv8-features-at-runtime-on-ios
|
// http://stackoverflow.com/questions/45637888/how-to-determine-armv8-features-at-runtime-on-ios
|
||||||
struct utsname systemInfo;
|
struct utsname systemInfo;
|
||||||
systemInfo.machine[0] = '\0';
|
systemInfo.machine[0] = '\0';
|
||||||
uname(&systemInfo);
|
uname(&systemInfo);
|
||||||
|
|
@ -580,7 +581,7 @@ inline bool CPU_QuerySHA2()
|
||||||
if (getauxval(AT_HWCAP2) & HWCAP2_SHA2)
|
if (getauxval(AT_HWCAP2) & HWCAP2_SHA2)
|
||||||
return true;
|
return true;
|
||||||
#elif defined(__APPLE__) && defined(__aarch64__)
|
#elif defined(__APPLE__) && defined(__aarch64__)
|
||||||
// https://stackoverflow.com/questions/45637888/how-to-determine-armv8-features-at-runtime-on-ios
|
// http://stackoverflow.com/questions/45637888/how-to-determine-armv8-features-at-runtime-on-ios
|
||||||
struct utsname systemInfo;
|
struct utsname systemInfo;
|
||||||
systemInfo.machine[0] = '\0';
|
systemInfo.machine[0] = '\0';
|
||||||
uname(&systemInfo);
|
uname(&systemInfo);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue