Use SSE4.1 instead of SSE4.2 for BLAKE2

BLAKE2 requires SSE4.1, no SSE4.2. This change should have been made when we split SSE4 into .1 and .2, but we needed more OS X and LLVM testing
pull/548/head
Jeffrey Walton 2017-11-15 20:08:06 -05:00
parent a3784a3ac5
commit e8bed05b7d
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
3 changed files with 16 additions and 18 deletions

View File

@ -238,9 +238,12 @@ ifeq ($(findstring -DCRYPTOPP_DISABLE_SSSE3,$(CXXFLAGS)),)
SSSE3_FLAG = -mssse3 SSSE3_FLAG = -mssse3
endif endif
ifeq ($(findstring -DCRYPTOPP_DISABLE_SSE4,$(CXXFLAGS)),) ifeq ($(findstring -DCRYPTOPP_DISABLE_SSE4,$(CXXFLAGS)),)
HAVE_SSE4 = $(shell echo | $(CXX) -x c++ $(CXXFLAGS) -msse4.1 -dM -E - 2>/dev/null | $(GREP) -i -c __SSE4_1__)
ifeq ($(HAVE_SSE4),1)
BLAKE2_FLAG = -msse4.1
endif
HAVE_SSE4 = $(shell echo | $(CXX) -x c++ $(CXXFLAGS) -msse4.2 -dM -E - 2>/dev/null | $(GREP) -i -c __SSE4_2__) HAVE_SSE4 = $(shell echo | $(CXX) -x c++ $(CXXFLAGS) -msse4.2 -dM -E - 2>/dev/null | $(GREP) -i -c __SSE4_2__)
ifeq ($(HAVE_SSE4),1) ifeq ($(HAVE_SSE4),1)
BLAKE2_FLAG = -msse4.2
CRC_FLAG = -msse4.2 CRC_FLAG = -msse4.2
endif endif
ifeq ($(findstring -DCRYPTOPP_DISABLE_AESNI,$(CXXFLAGS)),) ifeq ($(findstring -DCRYPTOPP_DISABLE_AESNI,$(CXXFLAGS)),)

View File

@ -13,16 +13,17 @@
// Uncomment for benchmarking C++ against SSE2 or NEON. // Uncomment for benchmarking C++ against SSE2 or NEON.
// Do so in both blake2.cpp and blake2-simd.cpp. // Do so in both blake2.cpp and blake2-simd.cpp.
// #undef CRYPTOPP_SSE42_AVAILABLE // #undef CRYPTOPP_SSE41_AVAILABLE
// #undef CRYPTOPP_ARM_NEON_AVAILABLE // #undef CRYPTOPP_ARM_NEON_AVAILABLE
#if !(defined(__ARM_NEON) || defined(_MSC_VER)) #if !(defined(__ARM_NEON) || defined(_MSC_VER))
# undef CRYPTOPP_ARM_NEON_AVAILABLE # undef CRYPTOPP_ARM_NEON_AVAILABLE
#endif #endif
#if (CRYPTOPP_SSE42_AVAILABLE) #if (CRYPTOPP_SSE41_AVAILABLE)
# include <emmintrin.h> # include <emmintrin.h>
# include <nmmintrin.h> # include <tmmintrin.h>
# include <smmintrin.h>
#endif #endif
#if (CRYPTOPP_ARM_NEON_AVAILABLE) #if (CRYPTOPP_ARM_NEON_AVAILABLE)
@ -75,7 +76,7 @@ const word64 BLAKE2B_IV[8] = {
ANONYMOUS_NAMESPACE_END ANONYMOUS_NAMESPACE_END
#if CRYPTOPP_SSE42_AVAILABLE #if CRYPTOPP_SSE41_AVAILABLE
void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2_State<word32, false>& state) void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2_State<word32, false>& state)
{ {
__m128i row1, row2, row3, row4; __m128i row1, row2, row3, row4;
@ -1605,7 +1606,7 @@ void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2_State<word64, true>& state
_mm_storeu_si128(M128_CAST(&state.h[4]), _mm_xor_si128(_mm_loadu_si128(CONST_M128_CAST(&state.h[4])), row2l)); _mm_storeu_si128(M128_CAST(&state.h[4]), _mm_xor_si128(_mm_loadu_si128(CONST_M128_CAST(&state.h[4])), row2l));
_mm_storeu_si128(M128_CAST(&state.h[6]), _mm_xor_si128(_mm_loadu_si128(CONST_M128_CAST(&state.h[6])), row2h)); _mm_storeu_si128(M128_CAST(&state.h[6]), _mm_xor_si128(_mm_loadu_si128(CONST_M128_CAST(&state.h[6])), row2h));
} }
#endif // CRYPTOPP_SSE42_AVAILABLE #endif // CRYPTOPP_SSE41_AVAILABLE
// Disable NEON for Cortex-A53 and A57. Also see http://github.com/weidai11/cryptopp/issues/367 // Disable NEON for Cortex-A53 and A57. Also see http://github.com/weidai11/cryptopp/issues/367
#if CRYPTOPP_BOOL_ARM32 && CRYPTOPP_ARM_NEON_AVAILABLE #if CRYPTOPP_BOOL_ARM32 && CRYPTOPP_ARM_NEON_AVAILABLE

View File

@ -14,19 +14,13 @@ NAMESPACE_BEGIN(CryptoPP)
// Uncomment for benchmarking C++ against SSE2 or NEON. // Uncomment for benchmarking C++ against SSE2 or NEON.
// Do so in both blake2.cpp and blake2-simd.cpp. // Do so in both blake2.cpp and blake2-simd.cpp.
// #undef CRYPTOPP_SSE42_AVAILABLE // #undef CRYPTOPP_SSE41_AVAILABLE
// #undef CRYPTOPP_ARM_NEON_AVAILABLE // #undef CRYPTOPP_ARM_NEON_AVAILABLE
// Apple Clang 6.0/Clang 3.5 does not have SSSE3 intrinsics
// http://llvm.org/bugs/show_bug.cgi?id=20213
#if (defined(CRYPTOPP_APPLE_CLANG_VERSION) && (CRYPTOPP_APPLE_CLANG_VERSION <= 60000)) || (defined(CRYPTOPP_LLVM_CLANG_VERSION) && (CRYPTOPP_LLVM_CLANG_VERSION <= 30500))
# undef CRYPTOPP_SSE42_AVAILABLE
#endif
void BLAKE2_Compress32_CXX(const byte* input, BLAKE2_State<word32, false>& state); void BLAKE2_Compress32_CXX(const byte* input, BLAKE2_State<word32, false>& state);
void BLAKE2_Compress64_CXX(const byte* input, BLAKE2_State<word64, true>& state); void BLAKE2_Compress64_CXX(const byte* input, BLAKE2_State<word64, true>& state);
#if CRYPTOPP_SSE42_AVAILABLE #if CRYPTOPP_SSE41_AVAILABLE
extern void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2_State<word32, false>& state); extern void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2_State<word32, false>& state);
extern void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2_State<word64, true>& state); extern void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2_State<word64, true>& state);
#endif #endif
@ -95,8 +89,8 @@ typedef void (*pfnCompress64)(const byte*, BLAKE2_State<word64, true>&);
pfnCompress64 InitializeCompress64Fn() pfnCompress64 InitializeCompress64Fn()
{ {
#if CRYPTOPP_SSE42_AVAILABLE #if CRYPTOPP_SSE41_AVAILABLE
if (HasSSE42()) if (HasSSE41())
return &BLAKE2_Compress64_SSE4; return &BLAKE2_Compress64_SSE4;
else else
#endif #endif
@ -110,8 +104,8 @@ pfnCompress64 InitializeCompress64Fn()
pfnCompress32 InitializeCompress32Fn() pfnCompress32 InitializeCompress32Fn()
{ {
#if CRYPTOPP_SSE42_AVAILABLE #if CRYPTOPP_SSE41_AVAILABLE
if (HasSSE42()) if (HasSSE41())
return &BLAKE2_Compress32_SSE4; return &BLAKE2_Compress32_SSE4;
else else
#endif #endif