From 9d8a89256db1b2a2625aa86434e89169a80025b1 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Mon, 31 Jul 2017 07:13:34 -0400 Subject: [PATCH] Cleanup ARMv7 and ARMv8 --- config.h | 6 +++--- crc-simd.cpp | 3 +-- crc.cpp | 1 + gcm-simd.cpp | 35 ++++++++++++++++++++++++----------- gcm.cpp | 42 ++++++++++++++++++++---------------------- sha-simd.cpp | 6 ++---- 6 files changed, 51 insertions(+), 42 deletions(-) diff --git a/config.h b/config.h index 65196a37..9b69e904 100644 --- a/config.h +++ b/config.h @@ -551,7 +551,7 @@ NAMESPACE_END // TODO: Add MSC_VER and ARM-64 platform define when available #if !defined(CRYPTOPP_ARM_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) # if defined(__ARM_FEATURE_CRC32) || (CRYPTOPP_MSC_VERSION >= 2000) || \ - (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500) + defined(__aarch32__) || defined(__aarch64__) # define CRYPTOPP_ARM_CRC32_AVAILABLE 1 # endif #endif @@ -562,7 +562,7 @@ NAMESPACE_END // TODO: Add MSC_VER and ARM-64 platform define when available #if !defined(CRYPTOPP_ARM_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__apple_build_version__) # if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VERSION >= 2000) || \ - (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500) + defined(__aarch32__) || defined(__aarch64__) # define CRYPTOPP_ARM_PMULL_AVAILABLE 1 # endif #endif @@ -573,7 +573,7 @@ NAMESPACE_END // TODO: Add MSC_VER and ARM-64 platform define when available #if !defined(CRYPTOPP_ARM_CRYPTO_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) # if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VERSION >= 2000) || \ - (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500) + defined(__aarch32__) || defined(__aarch64__) # define CRYPTOPP_ARM_AES_AVAILABLE 1 # define CRYPTOPP_ARM_PMULL_AVAILABLE 1 # define CRYPTOPP_ARM_SHA_AVAILABLE 1 diff --git a/crc-simd.cpp b/crc-simd.cpp index 181327a0..f1f61288 100644 --- a/crc-simd.cpp +++ b/crc-simd.cpp @@ -11,7 +11,7 @@ #include "misc.h" // Clang and GCC hoops... -#if !(defined(__ARM_FEATURE_CRC32) || defined(__aarch32__) || defined(__aarch64__) || defined(_MSC_VER)) +#if !(defined(__ARM_FEATURE_CRC32) || defined(_MSC_VER)) # undef CRYPTOPP_ARM_CRC32_AVAILABLE #endif @@ -20,7 +20,6 @@ #endif #if (CRYPTOPP_ARM_CRC32_AVAILABLE) -# include "arm_neon.h" # include "arm_acle.h" #endif diff --git a/crc.cpp b/crc.cpp index 6046bf83..da0d1c3a 100644 --- a/crc.cpp +++ b/crc.cpp @@ -1,6 +1,7 @@ // crc.cpp - originally written and placed in the public domain by Wei Dai #include "pch.h" +#include "config.h" #include "crc.h" #include "misc.h" #include "cpu.h" diff --git a/gcm-simd.cpp b/gcm-simd.cpp index ccf713ac..c8b60b26 100644 --- a/gcm-simd.cpp +++ b/gcm-simd.cpp @@ -11,7 +11,7 @@ #include "misc.h" // Clang and GCC hoops... -#if !(defined(__ARM_FEATURE_CRYPTO) || defined(__aarch32__) || defined(__aarch64__) || defined(_MSC_VER)) +#if !(defined(__ARM_FEATURE_CRYPTO) || defined(_MSC_VER)) # undef CRYPTOPP_ARM_PMULL_AVAILABLE #endif @@ -20,8 +20,11 @@ # include "wmmintrin.h" #endif -#if (CRYPTOPP_ARM_PMULL_AVAILABLE) +#if (CRYPTOPP_ARM_NEON_AVAILABLE) # include "arm_neon.h" +#endif + +#if (CRYPTOPP_ARM_PMULL_AVAILABLE) # include "arm_acle.h" #endif @@ -307,6 +310,15 @@ void GCM_SetKeyWithoutResync_PMULL(const byte *hashKey, byte *mulTable, unsigned vst1_u64((uint64_t *)(mulTable+i+8), vget_low_u64(h1)); } +void GCM_ReverseHashBufferIfNeeded_NEON(byte *hashBuffer) +{ + if (GetNativeByteOrder() != BIG_ENDIAN_ORDER) + { + const uint8x16_t x = vrev64q_u8(vld1q_u8(hashBuffer)); + vst1q_u8(hashBuffer, vextq_u8(x, x, 8)); + } +} + size_t GCM_AuthenticateBlocks_PMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer) { const uint64x2_t* table = reinterpret_cast(mtable); @@ -499,12 +511,6 @@ void GCM_SetKeyWithoutResync_CLMUL(const byte *hashKey, byte *mulTable, unsigned _mm_storel_epi64((__m128i *)(void *)(mulTable+i+8), h1); } -void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer) -{ - __m128i &x = *(__m128i *)(void *)hashBuffer; - x = _mm_shuffle_epi8(x, s_clmulConstants[1]); -} - size_t GCM_AuthenticateBlocks_CLMUL(const byte *data, size_t len, const byte *mtable, byte *hbuffer) { const __m128i *table = (const __m128i *)(const void *)mtable; @@ -521,9 +527,9 @@ size_t GCM_AuthenticateBlocks_CLMUL(const byte *data, size_t len, const byte *mt while (true) { - __m128i h0 = _mm_load_si128(table+i); - __m128i h1 = _mm_load_si128(table+i+1); - __m128i h2 = _mm_xor_si128(h0, h1); + const __m128i h0 = _mm_load_si128(table+i); + const __m128i h1 = _mm_load_si128(table+i+1); + const __m128i h2 = _mm_xor_si128(h0, h1); if (++i == s) { @@ -569,7 +575,14 @@ size_t GCM_AuthenticateBlocks_CLMUL(const byte *data, size_t len, const byte *mt _mm_store_si128((__m128i *)(void *)hbuffer, x); return len; } +#endif +#if CRYPTOPP_SSSE3_AVAILABLE +void GCM_ReverseHashBufferIfNeeded_SSSE3(byte *hashBuffer) +{ + __m128i &x = *(__m128i *)(void *)hashBuffer; + x = _mm_shuffle_epi8(x, s_clmulConstants[1]); +} #endif NAMESPACE_END \ No newline at end of file diff --git a/gcm.cpp b/gcm.cpp index 9d44411b..25e312ae 100644 --- a/gcm.cpp +++ b/gcm.cpp @@ -9,10 +9,6 @@ #include "pch.h" #include "config.h" -#if CRYPTOPP_MSC_VERSION -# pragma warning(disable: 4189) -#endif - #ifndef CRYPTOPP_IMPORTS #ifndef CRYPTOPP_GENERATE_X64_MASM @@ -23,9 +19,9 @@ // SunCC 5.13 and below crash with AES-NI/CLMUL and C++{03|11}. Disable one or the other. // Also see http://github.com/weidai11/cryptopp/issues/226 -#if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x513) -# undef CRYPTOPP_CLMUL_AVAILABLE -#endif +// #if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x513) +// # undef CRYPTOPP_CLMUL_AVAILABLE +// #endif #include "gcm.h" #include "cpu.h" @@ -71,7 +67,7 @@ inline static void Xor16(byte *a, const byte *b, const byte *c) } #if CRYPTOPP_SSE2_AVAILABLE || CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE -inline static void SSE2_Xor16(byte *a, const byte *b, const byte *c) +inline static void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c) { // SunCC 5.14 crash (bewildering since asserts are not in effect in release builds) // Also see http://github.com/weidai11/cryptopp/issues/226 and http://github.com/weidai11/cryptopp/issues/284 @@ -88,11 +84,13 @@ inline static void SSE2_Xor16(byte *a, const byte *b, const byte *c) } #endif +#if CRYPTOPP_SSSE3_AVAILABLE +extern void GCM_ReverseHashBufferIfNeeded_SSSE3(byte *hashBuffer); +#endif + #if CRYPTOPP_CLMUL_AVAILABLE extern __m128i GCM_Multiply_CLMUL(const __m128i &x, const __m128i &h, const __m128i &r); -extern __m128i GCM_Reduce_CLMUL(__m128i c0, __m128i c1, __m128i c2, const __m128i &r); extern void GCM_SetKeyWithoutResync_CLMUL(const byte *hashKey, byte *mulTable, unsigned int tableSize); -extern void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer); extern size_t GCM_AuthenticateBlocks_CLMUL(const byte *data, size_t len, const byte *mtable, byte *hbuffer); CRYPTOPP_ALIGN_DATA(16) @@ -105,10 +103,14 @@ const __m128i *s_clmulConstants = (const __m128i *)(const void *)s_clmulConstant const unsigned int s_cltableSizeInBlocks = 8; #endif // CRYPTOPP_CLMUL_AVAILABLE +#if CRYPTOPP_ARM_NEON_AVAILABLE +extern void GCM_ReverseHashBufferIfNeeded_NEON(byte *hashBuffer); +#endif + #if CRYPTOPP_ARM_PMULL_AVAILABLE -extern size_t GCM_AuthenticateBlocks_PMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer); extern uint64x2_t GCM_Multiply_PMULL(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r); extern void GCM_SetKeyWithoutResync_PMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize); +extern size_t GCM_AuthenticateBlocks_PMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer); CRYPTOPP_ALIGN_DATA(16) const word64 s_clmulConstants64[] = { @@ -203,7 +205,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const if (HasSSE2()) for (j=2; j<=0x80; j*=2) for (k=1; k