diff --git a/GNUmakefile b/GNUmakefile index f1ca8ca7..846f6c40 100755 --- a/GNUmakefile +++ b/GNUmakefile @@ -273,9 +273,13 @@ ifeq ($(SUN_COMPILER),1) ARIA_FLAG = -xarch=ssse3 -D__SSSE3__=1 LDFLAGS += -xarch=ssse3 endif + COUNT := $(shell $(CXX) $(CXXFLAGS) -E -xarch=sse4_1 -xdumpmacros /dev/null 2>&1 | $(GREP) -i -c "illegal") + ifeq ($(COUNT),0) + BLAKE2_FLAG = -xarch=sse4_1 -D__SSE4_1__=1 + LDFLAGS += -xarch=sse4_1 + endif COUNT := $(shell $(CXX) $(CXXFLAGS) -E -xarch=sse4_2 -xdumpmacros /dev/null 2>&1 | $(GREP) -i -c "illegal") ifeq ($(COUNT),0) - BLAKE2_FLAG = -xarch=sse4_2 -D__SSE4_2__=1 CRC_FLAG = -xarch=sse4_2 -D__SSE4_2__=1 LDFLAGS += -xarch=sse4_2 endif diff --git a/cpu.cpp b/cpu.cpp index 1d023297..c3ad7c43 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -247,7 +247,8 @@ static bool CPU_ProbeSSE2() bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false; bool CRYPTOPP_SECTION_INIT CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false; bool CRYPTOPP_SECTION_INIT g_hasSSE41 = false, CRYPTOPP_SECTION_INIT g_hasSSE42 = false; -bool CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false, CRYPTOPP_SECTION_INIT g_hasSHA = false; +bool CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false; +bool CRYPTOPP_SECTION_INIT g_hasADX = false, CRYPTOPP_SECTION_INIT g_hasSHA = false; bool CRYPTOPP_SECTION_INIT g_hasRDRAND = false, CRYPTOPP_SECTION_INIT g_hasRDSEED = false, CRYPTOPP_SECTION_INIT g_isP4 = false; bool CRYPTOPP_SECTION_INIT g_hasPadlockRNG = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE2 = false; bool CRYPTOPP_SECTION_INIT g_hasPadlockPHE = false, CRYPTOPP_SECTION_INIT g_hasPadlockPMM = false; @@ -301,6 +302,7 @@ void DetectX86Features() { CRYPTOPP_CONSTANT(RDRAND_FLAG = (1 << 30)) CRYPTOPP_CONSTANT(RDSEED_FLAG = (1 << 18)) + CRYPTOPP_CONSTANT( ADX_FLAG = (1 << 19)) CRYPTOPP_CONSTANT( SHA_FLAG = (1 << 29)) g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf; @@ -312,6 +314,7 @@ void DetectX86Features() if (CpuId(7, 0, cpuid2)) { g_hasRDSEED = !!(cpuid2[1] /*EBX*/ & RDSEED_FLAG); + g_hasADX = !!(cpuid2[1] /*EBX*/ & ADX_FLAG); g_hasSHA = !!(cpuid2[1] /*EBX*/ & SHA_FLAG); } } @@ -320,6 +323,7 @@ void DetectX86Features() { CRYPTOPP_CONSTANT(RDRAND_FLAG = (1 << 30)) CRYPTOPP_CONSTANT(RDSEED_FLAG = (1 << 18)) + CRYPTOPP_CONSTANT( ADX_FLAG = (1 << 19)) CRYPTOPP_CONSTANT( SHA_FLAG = (1 << 29)) CpuId(0x80000005, 0, cpuid2); @@ -331,6 +335,7 @@ void DetectX86Features() if (CpuId(7, 0, cpuid2)) { g_hasRDSEED = !!(cpuid2[1] /*EBX*/ & RDSEED_FLAG); + g_hasADX = !!(cpuid2[1] /*EBX*/ & ADX_FLAG); g_hasSHA = !!(cpuid2[1] /*EBX*/ & SHA_FLAG); } } diff --git a/cpu.h b/cpu.h index 5e2cca86..41892a7c 100644 --- a/cpu.h +++ b/cpu.h @@ -66,6 +66,7 @@ extern CRYPTOPP_DLL bool g_hasSSE42; extern CRYPTOPP_DLL bool g_hasAESNI; extern CRYPTOPP_DLL bool g_hasCLMUL; extern CRYPTOPP_DLL bool g_hasSHA; +extern CRYPTOPP_DLL bool g_hasADX; extern CRYPTOPP_DLL bool g_isP4; extern CRYPTOPP_DLL bool g_hasRDRAND; extern CRYPTOPP_DLL bool g_hasRDSEED; @@ -165,6 +166,17 @@ inline bool HasSHA() return g_hasSHA; } +//! \brief Determines ADX availability +//! \returns true if ADX is determined to be available, false otherwise +//! \details HasADX() is a runtime check performed using CPUID +//! \note This function is only available on Intel IA-32 platforms +inline bool HasADX() +{ + if (!g_x86DetectionDone) + DetectX86Features(); + return g_hasADX; +} + //! \brief Determines if the CPU is an Intel P4 //! \returns true if the CPU is a P4, false otherwise //! \details IsP4() is a runtime check performed using CPUID diff --git a/gcm-simd.cpp b/gcm-simd.cpp index 992c9c2a..de9e493e 100644 --- a/gcm-simd.cpp +++ b/gcm-simd.cpp @@ -30,6 +30,10 @@ # undef CRYPTOPP_ARM_PMULL_AVAILABLE #endif +#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE) +# include +#endif + #if (CRYPTOPP_CLMUL_AVAILABLE) # include # include @@ -428,6 +432,19 @@ void GCM_ReverseHashBufferIfNeeded_PMULL(byte *hashBuffer) } #endif +#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE +# if defined (__SUNPRO_CC) +// SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in +// a source file with a SSE architecture switch. Also see GH #226 and GH #284. +void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c) +{ + _mm_store_si128(M128_CAST(a), _mm_xor_si128( + _mm_load_si128(CONST_M128_CAST(b)), + _mm_load_si128(CONST_M128_CAST(c)))); +} +# endif +#endif + #if CRYPTOPP_CLMUL_AVAILABLE ANONYMOUS_NAMESPACE_BEGIN diff --git a/gcm.cpp b/gcm.cpp index 72d23890..c029ae1b 100644 --- a/gcm.cpp +++ b/gcm.cpp @@ -27,15 +27,15 @@ # undef CRYPTOPP_CLMUL_AVAILABLE #endif +#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE) +# include +#endif + #include "gcm.h" #include "cpu.h" NAMESPACE_BEGIN(CryptoPP) -#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE) -# include "emmintrin.h" -#endif - #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) // Different assemblers accept different mnemonics: 'movd eax, xmm0' vs // 'movd rax, xmm0' vs 'mov eax, xmm0' vs 'mov rax, xmm0' @@ -77,20 +77,24 @@ static inline void Xor16(byte *a, const byte *b, const byte *c) } #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE +// SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in +// a source file with a SSE architecture switch. Also see GH #226 and GH #284. +# if defined (__SUNPRO_CC) +extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c); +# else static inline void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c) { -// SunCC 5.14 crash. Also see http://github.com/weidai11/cryptopp/issues/226 -// and http://github.com/weidai11/cryptopp/issues/284 -# if CRYPTOPP_SSE2_ASM_AVAILABLE && !defined(__SUNPRO_CC) +# if CRYPTOPP_SSE2_ASM_AVAILABLE asm ("movdqa %1, %%xmm0; pxor %2, %%xmm0; movdqa %%xmm0, %0;" - : "=m" (a[0]) : "m"(b[0]), "m"(c[0])); -# else // CRYPTOPP_SSE2_INTRIN_AVAILABLE + : "=m" (a[0]) : "rm"(b[0]), "rm"(c[0])); +# else // CRYPTOPP_SSE2_INTRIN_AVAILABLE _mm_store_si128(M128_CAST(a), _mm_xor_si128( _mm_load_si128(CONST_M128_CAST(b)), _mm_load_si128(CONST_M128_CAST(c)))); -# endif +# endif } -#endif +# endif // SunCC +#endif // SSE2 #if CRYPTOPP_CLMUL_AVAILABLE extern void GCM_SetKeyWithoutResync_CLMUL(const byte *hashKey, byte *mulTable, unsigned int tableSize);