diff --git a/GNUmakefile b/GNUmakefile index 89950acf..a14b3daf 100755 --- a/GNUmakefile +++ b/GNUmakefile @@ -238,9 +238,12 @@ ifeq ($(findstring -DCRYPTOPP_DISABLE_SSSE3,$(CXXFLAGS)),) SSSE3_FLAG = -mssse3 endif ifeq ($(findstring -DCRYPTOPP_DISABLE_SSE4,$(CXXFLAGS)),) + HAVE_SSE4 = $(shell echo | $(CXX) -x c++ $(CXXFLAGS) -msse4.1 -dM -E - 2>/dev/null | $(GREP) -i -c __SSE4_1__) + ifeq ($(HAVE_SSE4),1) + BLAKE2_FLAG = -msse4.1 + endif HAVE_SSE4 = $(shell echo | $(CXX) -x c++ $(CXXFLAGS) -msse4.2 -dM -E - 2>/dev/null | $(GREP) -i -c __SSE4_2__) ifeq ($(HAVE_SSE4),1) - BLAKE2_FLAG = -msse4.2 CRC_FLAG = -msse4.2 endif ifeq ($(findstring -DCRYPTOPP_DISABLE_AESNI,$(CXXFLAGS)),) diff --git a/blake2-simd.cpp b/blake2-simd.cpp index a3f05c76..6ad53f97 100644 --- a/blake2-simd.cpp +++ b/blake2-simd.cpp @@ -13,16 +13,17 @@ // Uncomment for benchmarking C++ against SSE2 or NEON. // Do so in both blake2.cpp and blake2-simd.cpp. -// #undef CRYPTOPP_SSE42_AVAILABLE +// #undef CRYPTOPP_SSE41_AVAILABLE // #undef CRYPTOPP_ARM_NEON_AVAILABLE #if !(defined(__ARM_NEON) || defined(_MSC_VER)) # undef CRYPTOPP_ARM_NEON_AVAILABLE #endif -#if (CRYPTOPP_SSE42_AVAILABLE) +#if (CRYPTOPP_SSE41_AVAILABLE) # include -# include +# include +# include #endif #if (CRYPTOPP_ARM_NEON_AVAILABLE) @@ -75,7 +76,7 @@ const word64 BLAKE2B_IV[8] = { ANONYMOUS_NAMESPACE_END -#if CRYPTOPP_SSE42_AVAILABLE +#if CRYPTOPP_SSE41_AVAILABLE void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2_State& state) { __m128i row1, row2, row3, row4; @@ -1605,7 +1606,7 @@ void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2_State& state _mm_storeu_si128(M128_CAST(&state.h[4]), _mm_xor_si128(_mm_loadu_si128(CONST_M128_CAST(&state.h[4])), row2l)); _mm_storeu_si128(M128_CAST(&state.h[6]), _mm_xor_si128(_mm_loadu_si128(CONST_M128_CAST(&state.h[6])), row2h)); } -#endif // CRYPTOPP_SSE42_AVAILABLE +#endif // CRYPTOPP_SSE41_AVAILABLE // Disable NEON for Cortex-A53 and A57. Also see http://github.com/weidai11/cryptopp/issues/367 #if CRYPTOPP_BOOL_ARM32 && CRYPTOPP_ARM_NEON_AVAILABLE diff --git a/blake2.cpp b/blake2.cpp index c4be4621..580d941e 100644 --- a/blake2.cpp +++ b/blake2.cpp @@ -14,19 +14,13 @@ NAMESPACE_BEGIN(CryptoPP) // Uncomment for benchmarking C++ against SSE2 or NEON. // Do so in both blake2.cpp and blake2-simd.cpp. -// #undef CRYPTOPP_SSE42_AVAILABLE +// #undef CRYPTOPP_SSE41_AVAILABLE // #undef CRYPTOPP_ARM_NEON_AVAILABLE -// Apple Clang 6.0/Clang 3.5 does not have SSSE3 intrinsics -// http://llvm.org/bugs/show_bug.cgi?id=20213 -#if (defined(CRYPTOPP_APPLE_CLANG_VERSION) && (CRYPTOPP_APPLE_CLANG_VERSION <= 60000)) || (defined(CRYPTOPP_LLVM_CLANG_VERSION) && (CRYPTOPP_LLVM_CLANG_VERSION <= 30500)) -# undef CRYPTOPP_SSE42_AVAILABLE -#endif - void BLAKE2_Compress32_CXX(const byte* input, BLAKE2_State& state); void BLAKE2_Compress64_CXX(const byte* input, BLAKE2_State& state); -#if CRYPTOPP_SSE42_AVAILABLE +#if CRYPTOPP_SSE41_AVAILABLE extern void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2_State& state); extern void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2_State& state); #endif @@ -95,8 +89,8 @@ typedef void (*pfnCompress64)(const byte*, BLAKE2_State&); pfnCompress64 InitializeCompress64Fn() { -#if CRYPTOPP_SSE42_AVAILABLE - if (HasSSE42()) +#if CRYPTOPP_SSE41_AVAILABLE + if (HasSSE41()) return &BLAKE2_Compress64_SSE4; else #endif @@ -110,8 +104,8 @@ pfnCompress64 InitializeCompress64Fn() pfnCompress32 InitializeCompress32Fn() { -#if CRYPTOPP_SSE42_AVAILABLE - if (HasSSE42()) +#if CRYPTOPP_SSE41_AVAILABLE + if (HasSSE41()) return &BLAKE2_Compress32_SSE4; else #endif