Add SSE4.1 and SSE4.2 feature detection

pull/157/head
Jeffrey Walton 2016-04-15 16:23:04 -04:00
parent 22f493dda9
commit a11c9e7574
5 changed files with 28 additions and 2 deletions

View File

@ -416,6 +416,14 @@ NAMESPACE_END
#define CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE 0 #define CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE 0
#endif #endif
// Intrinsics availible in GCC 4.3 (http://gcc.gnu.org/gcc-4.3/changes.html) and
// MSVC 2008 (http://msdn.microsoft.com/en-us/library/bb892950%28v=vs.90%29.aspx)
#if !defined(CRYPTOPP_DISABLE_SSE4) && ((_MSC_VER >= 1500) || defined(__SSE4_1__) || defined(__SSE4_2__))
#define CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE 1
#else
#define CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE 0
#endif
#if !defined(CRYPTOPP_DISABLE_SSSE3) && !defined(CRYPTOPP_DISABLE_AESNI) && CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && (CRYPTOPP_GCC_VERSION >= 40400 || _MSC_FULL_VER >= 150030729 || __INTEL_COMPILER >= 1110 || defined(__AES__)) #if !defined(CRYPTOPP_DISABLE_SSSE3) && !defined(CRYPTOPP_DISABLE_AESNI) && CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && (CRYPTOPP_GCC_VERSION >= 40400 || _MSC_FULL_VER >= 150030729 || __INTEL_COMPILER >= 1110 || defined(__AES__))
#define CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE 1 #define CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE 1
#else #else

View File

@ -416,6 +416,14 @@ NAMESPACE_END
#define CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE 0 #define CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE 0
#endif #endif
// Intrinsics availible in GCC 4.3 (http://gcc.gnu.org/gcc-4.3/changes.html) and
// MSVC 2008 (http://msdn.microsoft.com/en-us/library/bb892950%28v=vs.90%29.aspx)
#if !defined(CRYPTOPP_DISABLE_SSE4) && ((_MSC_VER >= 1500) || defined(__SSE4_1__) || defined(__SSE4_2__))
#define CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE 1
#else
#define CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE 0
#endif
#if !defined(CRYPTOPP_DISABLE_SSSE3) && !defined(CRYPTOPP_DISABLE_AESNI) && CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && (CRYPTOPP_GCC_VERSION >= 40400 || _MSC_FULL_VER >= 150030729 || __INTEL_COMPILER >= 1110 || defined(__AES__)) #if !defined(CRYPTOPP_DISABLE_SSSE3) && !defined(CRYPTOPP_DISABLE_AESNI) && CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && (CRYPTOPP_GCC_VERSION >= 40400 || _MSC_FULL_VER >= 150030729 || __INTEL_COMPILER >= 1110 || defined(__AES__))
#define CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE 1 #define CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE 1
#else #else

View File

@ -165,7 +165,7 @@ static bool TrySSE2()
} }
bool g_x86DetectionDone = false; bool g_x86DetectionDone = false;
bool g_hasMMX = false, g_hasISSE = false, g_hasSSE2 = false, g_hasSSSE3 = false, g_hasAESNI = false, g_hasCLMUL = false, g_isP4 = false, g_hasRDRAND = false, g_hasRDSEED = false; bool g_hasMMX = false, g_hasISSE = false, g_hasSSE2 = false, g_hasSSSE3 = false, g_hasSSE4 = false, g_hasAESNI = false, g_hasCLMUL = false, g_isP4 = false, g_hasRDRAND = false, g_hasRDSEED = false;
word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
// MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it. // MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it.
@ -206,6 +206,7 @@ void DetectX86Features()
if ((cpuid1[3] & (1 << 26)) != 0) if ((cpuid1[3] & (1 << 26)) != 0)
g_hasSSE2 = TrySSE2(); g_hasSSE2 = TrySSE2();
g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9)); g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9));
g_hasSSE4 = g_hasSSE2 && ((cpuid1[2] & (1<<19)) || (cpuid1[2] & (1<<20)));
g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25)); g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25));
g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1)); g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1));

8
cpu.h
View File

@ -118,6 +118,7 @@ extern CRYPTOPP_DLL bool g_hasMMX;
extern CRYPTOPP_DLL bool g_hasISSE; extern CRYPTOPP_DLL bool g_hasISSE;
extern CRYPTOPP_DLL bool g_hasSSE2; extern CRYPTOPP_DLL bool g_hasSSE2;
extern CRYPTOPP_DLL bool g_hasSSSE3; extern CRYPTOPP_DLL bool g_hasSSSE3;
extern CRYPTOPP_DLL bool g_hasSSE4;
extern CRYPTOPP_DLL bool g_hasAESNI; extern CRYPTOPP_DLL bool g_hasAESNI;
extern CRYPTOPP_DLL bool g_hasCLMUL; extern CRYPTOPP_DLL bool g_hasCLMUL;
extern CRYPTOPP_DLL bool g_isP4; extern CRYPTOPP_DLL bool g_isP4;
@ -168,6 +169,13 @@ inline bool HasSSSE3()
return g_hasSSSE3; return g_hasSSSE3;
} }
inline bool HasSSE4()
{
if (!g_x86DetectionDone)
DetectX86Features();
return g_hasSSE4;
}
inline bool HasAESNI() inline bool HasAESNI()
{ {
if (!g_x86DetectionDone) if (!g_x86DetectionDone)

View File

@ -279,6 +279,7 @@ bool TestSettings()
bool hasISSE = HasISSE(); bool hasISSE = HasISSE();
bool hasSSE2 = HasSSE2(); bool hasSSE2 = HasSSE2();
bool hasSSSE3 = HasSSSE3(); bool hasSSSE3 = HasSSSE3();
bool hasSSE4 = HasSSE4();
bool isP4 = IsP4(); bool isP4 = IsP4();
int cacheLineSize = GetCacheLineSize(); int cacheLineSize = GetCacheLineSize();
@ -290,7 +291,7 @@ bool TestSettings()
else else
cout << "passed: "; cout << "passed: ";
cout << "hasMMX == " << hasMMX << ", hasISSE == " << hasISSE << ", hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSSE3 << ", hasAESNI == " << HasAESNI() << ", hasRDRAND == " << HasRDRAND() << ", hasRDSEED == " << HasRDSEED() << ", hasCLMUL == " << HasCLMUL() << ", isP4 == " << isP4 << ", cacheLineSize == " << cacheLineSize; cout << "hasMMX == " << hasMMX << ", hasISSE == " << hasISSE << ", hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSE4 << ", hasSSE4 == " << hasSSSE3 << ", hasAESNI == " << HasAESNI() << ", hasRDRAND == " << HasRDRAND() << ", hasRDSEED == " << HasRDSEED() << ", hasCLMUL == " << HasCLMUL() << ", isP4 == " << isP4 << ", cacheLineSize == " << cacheLineSize;
cout << ", AESNI_INTRINSICS == " << CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE << endl; cout << ", AESNI_INTRINSICS == " << CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE << endl;
#endif #endif