Use SSE4.1 instead of SSE4.2 for BLAKE2
BLAKE2 requires SSE4.1, no SSE4.2. This change should have been made when we split SSE4 into .1 and .2, but we needed more OS X and LLVM testingpull/548/head
parent
a3784a3ac5
commit
e8bed05b7d
|
|
@ -238,9 +238,12 @@ ifeq ($(findstring -DCRYPTOPP_DISABLE_SSSE3,$(CXXFLAGS)),)
|
|||
SSSE3_FLAG = -mssse3
|
||||
endif
|
||||
ifeq ($(findstring -DCRYPTOPP_DISABLE_SSE4,$(CXXFLAGS)),)
|
||||
HAVE_SSE4 = $(shell echo | $(CXX) -x c++ $(CXXFLAGS) -msse4.1 -dM -E - 2>/dev/null | $(GREP) -i -c __SSE4_1__)
|
||||
ifeq ($(HAVE_SSE4),1)
|
||||
BLAKE2_FLAG = -msse4.1
|
||||
endif
|
||||
HAVE_SSE4 = $(shell echo | $(CXX) -x c++ $(CXXFLAGS) -msse4.2 -dM -E - 2>/dev/null | $(GREP) -i -c __SSE4_2__)
|
||||
ifeq ($(HAVE_SSE4),1)
|
||||
BLAKE2_FLAG = -msse4.2
|
||||
CRC_FLAG = -msse4.2
|
||||
endif
|
||||
ifeq ($(findstring -DCRYPTOPP_DISABLE_AESNI,$(CXXFLAGS)),)
|
||||
|
|
|
|||
|
|
@ -13,16 +13,17 @@
|
|||
|
||||
// Uncomment for benchmarking C++ against SSE2 or NEON.
|
||||
// Do so in both blake2.cpp and blake2-simd.cpp.
|
||||
// #undef CRYPTOPP_SSE42_AVAILABLE
|
||||
// #undef CRYPTOPP_SSE41_AVAILABLE
|
||||
// #undef CRYPTOPP_ARM_NEON_AVAILABLE
|
||||
|
||||
#if !(defined(__ARM_NEON) || defined(_MSC_VER))
|
||||
# undef CRYPTOPP_ARM_NEON_AVAILABLE
|
||||
#endif
|
||||
|
||||
#if (CRYPTOPP_SSE42_AVAILABLE)
|
||||
#if (CRYPTOPP_SSE41_AVAILABLE)
|
||||
# include <emmintrin.h>
|
||||
# include <nmmintrin.h>
|
||||
# include <tmmintrin.h>
|
||||
# include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
||||
|
|
@ -75,7 +76,7 @@ const word64 BLAKE2B_IV[8] = {
|
|||
|
||||
ANONYMOUS_NAMESPACE_END
|
||||
|
||||
#if CRYPTOPP_SSE42_AVAILABLE
|
||||
#if CRYPTOPP_SSE41_AVAILABLE
|
||||
void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2_State<word32, false>& state)
|
||||
{
|
||||
__m128i row1, row2, row3, row4;
|
||||
|
|
@ -1605,7 +1606,7 @@ void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2_State<word64, true>& state
|
|||
_mm_storeu_si128(M128_CAST(&state.h[4]), _mm_xor_si128(_mm_loadu_si128(CONST_M128_CAST(&state.h[4])), row2l));
|
||||
_mm_storeu_si128(M128_CAST(&state.h[6]), _mm_xor_si128(_mm_loadu_si128(CONST_M128_CAST(&state.h[6])), row2h));
|
||||
}
|
||||
#endif // CRYPTOPP_SSE42_AVAILABLE
|
||||
#endif // CRYPTOPP_SSE41_AVAILABLE
|
||||
|
||||
// Disable NEON for Cortex-A53 and A57. Also see http://github.com/weidai11/cryptopp/issues/367
|
||||
#if CRYPTOPP_BOOL_ARM32 && CRYPTOPP_ARM_NEON_AVAILABLE
|
||||
|
|
|
|||
18
blake2.cpp
18
blake2.cpp
|
|
@ -14,19 +14,13 @@ NAMESPACE_BEGIN(CryptoPP)
|
|||
|
||||
// Uncomment for benchmarking C++ against SSE2 or NEON.
|
||||
// Do so in both blake2.cpp and blake2-simd.cpp.
|
||||
// #undef CRYPTOPP_SSE42_AVAILABLE
|
||||
// #undef CRYPTOPP_SSE41_AVAILABLE
|
||||
// #undef CRYPTOPP_ARM_NEON_AVAILABLE
|
||||
|
||||
// Apple Clang 6.0/Clang 3.5 does not have SSSE3 intrinsics
|
||||
// http://llvm.org/bugs/show_bug.cgi?id=20213
|
||||
#if (defined(CRYPTOPP_APPLE_CLANG_VERSION) && (CRYPTOPP_APPLE_CLANG_VERSION <= 60000)) || (defined(CRYPTOPP_LLVM_CLANG_VERSION) && (CRYPTOPP_LLVM_CLANG_VERSION <= 30500))
|
||||
# undef CRYPTOPP_SSE42_AVAILABLE
|
||||
#endif
|
||||
|
||||
void BLAKE2_Compress32_CXX(const byte* input, BLAKE2_State<word32, false>& state);
|
||||
void BLAKE2_Compress64_CXX(const byte* input, BLAKE2_State<word64, true>& state);
|
||||
|
||||
#if CRYPTOPP_SSE42_AVAILABLE
|
||||
#if CRYPTOPP_SSE41_AVAILABLE
|
||||
extern void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2_State<word32, false>& state);
|
||||
extern void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2_State<word64, true>& state);
|
||||
#endif
|
||||
|
|
@ -95,8 +89,8 @@ typedef void (*pfnCompress64)(const byte*, BLAKE2_State<word64, true>&);
|
|||
|
||||
pfnCompress64 InitializeCompress64Fn()
|
||||
{
|
||||
#if CRYPTOPP_SSE42_AVAILABLE
|
||||
if (HasSSE42())
|
||||
#if CRYPTOPP_SSE41_AVAILABLE
|
||||
if (HasSSE41())
|
||||
return &BLAKE2_Compress64_SSE4;
|
||||
else
|
||||
#endif
|
||||
|
|
@ -110,8 +104,8 @@ pfnCompress64 InitializeCompress64Fn()
|
|||
|
||||
pfnCompress32 InitializeCompress32Fn()
|
||||
{
|
||||
#if CRYPTOPP_SSE42_AVAILABLE
|
||||
if (HasSSE42())
|
||||
#if CRYPTOPP_SSE41_AVAILABLE
|
||||
if (HasSSE41())
|
||||
return &BLAKE2_Compress32_SSE4;
|
||||
else
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue