From c7b329db9033d79718d8c0fa68d806ddb1ad71ad Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Fri, 22 Jul 2016 23:17:46 -0400 Subject: [PATCH 1/7] Add fpv5 fpu option --- cryptest.sh | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/cryptest.sh b/cryptest.sh index 5689d691..b7837bb9 100755 --- a/cryptest.sh +++ b/cryptest.sh @@ -522,6 +522,11 @@ if [[ ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0") ]]; then if [[ ("$HAVE_ARM_VFPV4" -gt "0") ]]; then HAVE_ARM_VFPV4=1; fi fi + if [[ (-z "$HAVE_ARM_VFPV5") ]]; then + HAVE_ARM_VFPV5=$(echo "$ARM_FEATURES" | "$GREP" -i -c 'fpv5') + if [[ ("$HAVE_ARM_VFPV5" -gt "0") ]]; then HAVE_ARM_VFPV5=1; fi + fi + if [[ (-z "$HAVE_ARM_VFPD32") ]]; then HAVE_ARM_VFPD32=$(echo "$ARM_FEATURES" | "$GREP" -i -c 'vfpd32') if [[ ("$HAVE_ARM_VFPD32" -gt "0") ]]; then HAVE_ARM_VFPD32=1; fi @@ -859,10 +864,18 @@ if [[ ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0") ]]; then # Also see http://lists.linaro.org/pipermail/linaro-toolchain/2016-July/005821.html if [[ ("$HAVE_ARM_NEON" -ne "0" && "$HAVE_ARM_VFPV4" -ne "0") ]]; then PLATFORM_CXXFLAGS+=("-mfpu=neon-vfpv4 ") - elif [[ ("$HAVE_ARM_VFPV3" -ne "0" || "$HAVE_ARM_VFPV4" -ne "0") && "$HAVE_ARM_VFPD32" -ne "0" ]]; then - PLATFORM_CXXFLAGS+=("-mfpu=neon ") elif [[ ("$HAVE_ARM_NEON" -ne "0") ]]; then PLATFORM_CXXFLAGS+=("-mfpu=neon ") + elif [[ ("$HAVE_ARM_VFPV3" -ne "0" || "$HAVE_ARM_VFPV4" -ne "0") && "$HAVE_ARM_VFPD32" -ne "0" ]]; then + PLATFORM_CXXFLAGS+=("-mfpu=neon ") + elif [[ ("$HAVE_ARM_VFPV5" -ne "0" && "$HAVE_ARM_VFPD32" -ne "0") ]]; then + PLATFORM_CXXFLAGS+=("-mfpu=fpv5 ") + elif [[ ("$HAVE_ARM_VFPV4" -ne "0" && "$HAVE_ARM_VFPD32" -ne "0") ]]; then + PLATFORM_CXXFLAGS+=("-mfpu=vfpv4 ") + elif [[ ("$HAVE_ARM_VFPV3" -ne "0" && "$HAVE_ARM_VFPD32" -ne "0") ]]; then + PLATFORM_CXXFLAGS+=("-mfpu=vfpv3 ") + elif [[ ("$HAVE_ARM_VFPV5" -ne "0") ]]; then + PLATFORM_CXXFLAGS+=("-mfpu=fpv5-d16 ") elif [[ ("$HAVE_ARM_VFPV4" -ne "0") ]]; then PLATFORM_CXXFLAGS+=("-mfpu=vfpv4-d16 ") elif [[ ("$HAVE_ARM_VFPV3" -ne "0") ]]; then From eba3f83327bb80a7d2e20251a64b8aa676c23af8 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sat, 23 Jul 2016 08:10:38 -0400 Subject: [PATCH 2/7] Use CRYPTOPP_DISABLE_INTEL_ASM to disable Panama's ASM --- panama.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/panama.h b/panama.h index e7f20c95..18e60338 100644 --- a/panama.h +++ b/panama.h @@ -11,7 +11,7 @@ #include "secblock.h" // Clang 3.3 integrated assembler crash on Linux. Clang 3.4 due to compiler error with .intel_syntax -#if CRYPTOPP_BOOL_X32 || (defined(CRYPTOPP_LLVM_CLANG_VERSION) && (CRYPTOPP_LLVM_CLANG_VERSION < 30500)) +#if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_INTEL_ASM) # define CRYPTOPP_DISABLE_PANAMA_ASM #endif From 7378a1b86d8e68279ee62fe31addbdb81603e4d2 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sat, 23 Jul 2016 19:37:17 -0400 Subject: [PATCH 3/7] Cleared analysis warning on use of boolean in arithmetic expression --- chacha.cpp | 4 ++-- chacha.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/chacha.cpp b/chacha.cpp index b6ebd9c5..833a6acb 100644 --- a/chacha.cpp +++ b/chacha.cpp @@ -1,6 +1,6 @@ // chacha.cpp - written and placed in the public domain by Jeffrey Walton. // Copyright assigned to the Crypto++ project. -// Based on Wei Dai's Salsa20 and Bernstein's reference ChaCha +// Based on Wei Dai's Salsa20 and Bernstein's reference ChaCha // family implementation at http://cr.yp.to/chacha.html. #include "pch.h" @@ -141,7 +141,7 @@ void ChaCha_Policy::OperateKeystream(KeystreamOperation operation, byte *outp #endif ++m_state[12]; - m_state[13] += !!(m_state[12] == 0); + m_state[13] += static_cast(m_state[12] == 0); } } diff --git a/chacha.h b/chacha.h index 302c477d..28b8c7af 100644 --- a/chacha.h +++ b/chacha.h @@ -1,6 +1,6 @@ // chacha.h - written and placed in the public domain by Jeffrey Walton. // Copyright assigned to the Crypto++ project. -// Based on Wei Dai's Salsa20 and Bernstein's reference ChaCha +// Based on Wei Dai's Salsa20 and Bernstein's reference ChaCha // family implementation at http://cr.yp.to/chacha.html. //! \file chacha.h From f532b02a96edb154ba7edcc3684f6175cea009bf Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sat, 23 Jul 2016 21:39:18 -0400 Subject: [PATCH 4/7] Add replacement for _mm_set_epi64x under Sun Studio 12.3 and below --- blake2.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/blake2.cpp b/blake2.cpp index 1fa6ef44..e44d99ef 100644 --- a/blake2.cpp +++ b/blake2.cpp @@ -34,10 +34,20 @@ NAMESPACE_BEGIN(CryptoPP) # undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE #endif -// SunCC needs 12.4 for _mm_set_epi64x, _mm_blend_epi16, _mm_shuffle_epi16, etc +// Sun Studio 12.3 and earlier lack SSE2's _mm_set_epi64x. +// Also see http://stackoverflow.com/a/38547909/608639 #if defined(__SUNPRO_CC) && (__SUNPRO_CC < 0x5130) -# undef CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE -# undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE +inline __m128i _mm_set_epi64x(const uint64_t a, const uint64_t b) +{ + union INT_128_64 { + __m128i v128; + uint64_t v64[2]; + }; + + INT_128_64 v; + v.v64[0] = a; v.v64[1] = b; + return v.v128; +} #endif // C/C++ implementation From d8638a597fe364bca762fe55639899adcafc7e2b Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sat, 23 Jul 2016 22:09:26 -0400 Subject: [PATCH 5/7] Fixed arguments to _mm_set_epi64x --- blake2.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blake2.cpp b/blake2.cpp index e44d99ef..2e73bdd1 100644 --- a/blake2.cpp +++ b/blake2.cpp @@ -45,7 +45,7 @@ inline __m128i _mm_set_epi64x(const uint64_t a, const uint64_t b) }; INT_128_64 v; - v.v64[0] = a; v.v64[1] = b; + v.v64[0] = b; v.v64[1] = a; return v.v128; } #endif From ef4f185d9beed6709e6bf843b98f579c5fb38e8c Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sat, 23 Jul 2016 23:49:26 -0400 Subject: [PATCH 6/7] Avoid accessing union value through inactive member --- blake2.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/blake2.cpp b/blake2.cpp index 2e73bdd1..cb274c99 100644 --- a/blake2.cpp +++ b/blake2.cpp @@ -13,7 +13,8 @@ NAMESPACE_BEGIN(CryptoPP) -// Uncomment for benchmarking C++ against NEON +// Uncomment for benchmarking C++ against SSE2 or NEON +// #undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE // #undef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE // Visual Studio needs both VS2005 (1400) and _M_64 for SSE2 and _mm_set_epi64x() @@ -46,7 +47,7 @@ inline __m128i _mm_set_epi64x(const uint64_t a, const uint64_t b) INT_128_64 v; v.v64[0] = b; v.v64[1] = a; - return v.v128; + return *(reinterpret_cast<__m128i*>(v.v64)); } #endif From 32e6276baffaa558314e91db0c4b4603caaae042 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sun, 24 Jul 2016 00:34:55 -0400 Subject: [PATCH 7/7] Prefer union access over pointer cast Also see http://stackoverflow.com/a/38547909/608639 --- blake2.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/blake2.cpp b/blake2.cpp index cb274c99..e8f3ca68 100644 --- a/blake2.cpp +++ b/blake2.cpp @@ -14,7 +14,7 @@ NAMESPACE_BEGIN(CryptoPP) // Uncomment for benchmarking C++ against SSE2 or NEON -// #undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE +#undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE // #undef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE // Visual Studio needs both VS2005 (1400) and _M_64 for SSE2 and _mm_set_epi64x() @@ -40,14 +40,14 @@ NAMESPACE_BEGIN(CryptoPP) #if defined(__SUNPRO_CC) && (__SUNPRO_CC < 0x5130) inline __m128i _mm_set_epi64x(const uint64_t a, const uint64_t b) { - union INT_128_64 { + union INT_128_64x2 { __m128i v128; uint64_t v64[2]; }; - INT_128_64 v; - v.v64[0] = b; v.v64[1] = a; - return *(reinterpret_cast<__m128i*>(v.v64)); + INT_128_64x2 val; + val.v64[0] = b; val.v64[1] = a; + return val.v128; } #endif