From a8462328f48acf3bf95172ea7e4be5c8b6d8e3c8 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Mon, 31 Jul 2017 03:07:32 -0400 Subject: [PATCH] Fix Aarch64 build. Cleanup Windows build --- GNUmakefile | 18 +++--- blake2-simd.cpp | 6 +- config.h | 6 +- cpu.cpp | 2 +- crc-simd.cpp | 146 +++++++++++++++++++++++++----------------------- gcm-simd.cpp | 14 +++-- neon.cpp | 14 +++-- sha-simd.cpp | 18 ++++-- sha.cpp | 14 ++--- 9 files changed, 129 insertions(+), 109 deletions(-) mode change 100755 => 100644 GNUmakefile diff --git a/GNUmakefile b/GNUmakefile old mode 100755 new mode 100644 index 8e6fa818..2e2d456e --- a/GNUmakefile +++ b/GNUmakefile @@ -199,15 +199,15 @@ endif # -DCRYPTOPP_DISABLE_SSSE3 endif # -DCRYPTOPP_DISABLE_ASM endif # CXXFLAGS -SSSE3_FLAG = $(shell echo "" | $(CXX) $(CXXFLAGS) -mssse3 -dM -E - | grep -i -c -q __SSSE3__ && echo "-mssse3") +SSSE3_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -mssse3 -dM -E - | grep -i -c -q __SSSE3__ && echo "-mssse3") ARIA_FLAG = $(SSSE3_FLAG) ifeq ($(findstring -DCRYPTOPP_DISABLE_SSE4,$(CXXFLAGS)),) -SSE42_FLAG = $(shell echo "" | $(CXX) $(CXXFLAGS) -msse4.2 -dM -E - | grep -i -c -q __SSE4_2__ && echo "-msse4.2") +SSE42_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -msse4.2 -dM -E - | grep -i -c -q __SSE4_2__ && echo "-msse4.2") ifeq ($(findstring -DCRYPTOPP_DISABLE_AESNI,$(CXXFLAGS)),) -GCM_FLAG = $(shell echo "" | $(CXX) $(CXXFLAGS) -mssse3 -mpclmul -dM -E - | grep -i -c -q __PCLMUL__ && echo "-mssse3 -mpclmul") -AES_FLAG = $(shell echo "" | $(CXX) $(CXXFLAGS) -maes -dM -E - | grep -i -c -q __AES__ && echo "-maes") +GCM_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -mssse3 -mpclmul -dM -E - | grep -i -c -q __PCLMUL__ && echo "-mssse3 -mpclmul") +AES_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -maes -dM -E - | grep -i -c -q __AES__ && echo "-maes") ifeq ($(findstring -DCRYPTOPP_DISABLE_SHA,$(CXXFLAGS)),) -SHA_FLAG = $(shell echo "" | $(CXX) $(CXXFLAGS) -msse4.2 -msha -dM -E - | grep -i -c -q __SHA__ && echo "-msse4.2 -msha") +SHA_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -msse4.2 -msha -dM -E - | grep -i -c -q __SHA__ && echo "-msse4.2 -msha") BLAKE2_FLAG = $(SSE42_FLAG) CRC_FLAG = $(SSE42_FLAG) endif @@ -304,16 +304,16 @@ endif endif ifeq ($(IS_NEON),1) - NEON_FLAG = $(shell echo "" | $(CXX) $(CXXFLAGS) -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon") + NEON_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon") GCM_FLAG = $(NEON_FLAG) ARIA_FLAG = $(NEON_FLAG) BLAKE2_FLAG = $(NEON_FLAG) endif ifeq ($(IS_ARMV8),1) - ARMV8A_NEON_FLAG = $(shell echo "" | $(CXX) $(CXXFLAGS) -march=armv8-a -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv8-a") - ARMV8A_CRC_FLAG = $(shell echo "" | $(CXX) $(CXXFLAGS) -march=armv8-a+crc -dM -E - | grep -i -c -q __ARM_FEATURE_CRC32 && echo "-march=armv8-a+crc") - ARMV8A_CRYPTO_FLAG = $(shell echo "" | $(CXX) $(CXXFLAGS) -march=armv8-a+crypto -dM -E - | grep -i -c -q __ARM_FEATURE_CRYPTO && echo "-march=armv8-a+crypto") + ARMV8A_NEON_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv8-a") + ARMV8A_CRC_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crc -dM -E - | grep -i -c -q __ARM_FEATURE_CRC32 && echo "-march=armv8-a+crc") + ARMV8A_CRYPTO_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crypto -dM -E - | grep -i -c -q __ARM_FEATURE_CRYPTO && echo "-march=armv8-a+crypto") CRC_FLAG = $(ARMV8A_CRC_FLAG) AES_FLAG = $(ARMV8A_CRYPTO_FLAG) GCM_FLAG = $(ARMV8A_CRYPTO_FLAG) diff --git a/blake2-simd.cpp b/blake2-simd.cpp index 1d025356..924cf7b3 100644 --- a/blake2-simd.cpp +++ b/blake2-simd.cpp @@ -15,7 +15,7 @@ # include "nmmintrin.h" #endif -#if (CRYPTOPP_ARM_NEON_AVAILABLE) && defined(__GNUC__) +#if (CRYPTOPP_ARM_NEON_AVAILABLE) # include "arm_neon.h" #endif @@ -24,6 +24,10 @@ # include #endif +#ifndef EXCEPTION_EXECUTE_HANDLER +# define EXCEPTION_EXECUTE_HANDLER 1 +#endif + NAMESPACE_BEGIN(CryptoPP) ANONYMOUS_NAMESPACE_BEGIN diff --git a/config.h b/config.h index d456af48..b729fa5b 100644 --- a/config.h +++ b/config.h @@ -568,13 +568,13 @@ NAMESPACE_END // LLVM Clang requires 3.5. Apple Clang is unknown at the moment. // Microsoft plans to support ARM-64, but its not clear how to detect it. // TODO: Add MSC_VER and ARM-64 platform define when available -#if !defined(CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) +#if !defined(CRYPTOPP_ARM_CRYPTO_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) # if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VERSION >= 2000) || \ (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500) # define CRYPTOPP_ARM_AES_AVAILABLE 1 # define CRYPTOPP_ARM_PMULL_AVAILABLE 1 -# define CRYPTOPP_ARMV8A_SHA_AVAILABLE 1 -# define CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE 1 +# define CRYPTOPP_ARM_SHA_AVAILABLE 1 +# define CRYPTOPP_ARM_CRYPTO_AVAILABLE 1 # endif #endif diff --git a/cpu.cpp b/cpu.cpp index e07a9428..08d9f856 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -402,7 +402,7 @@ static bool TryAES() # endif #else return false; -#endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE +#endif // CRYPTOPP_ARM_CRYPTO_AVAILABLE } void DetectArmFeatures() diff --git a/crc-simd.cpp b/crc-simd.cpp index 78d5de7f..158f921f 100644 --- a/crc-simd.cpp +++ b/crc-simd.cpp @@ -14,11 +14,11 @@ # include "nmmintrin.h" #endif -#if (CRYPTOPP_ARM_CRC32_AVAILABLE) +#if (CRYPTOPP_ARM_NEON_AVAILABLE) # include "arm_neon.h" -#if defined(__GNUC__) +# if (CRYPTOPP_ARM_CRC32_AVAILABLE) # include "arm_acle.h" -#endif +# endif #endif #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY @@ -26,17 +26,21 @@ # include #endif +#ifndef EXCEPTION_EXECUTE_HANDLER +# define EXCEPTION_EXECUTE_HANDLER 1 +#endif + NAMESPACE_BEGIN(CryptoPP) #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY extern "C" { typedef void (*SigHandler)(int); - static jmp_buf s_jmpSIGILL; - static void SigIllHandler(int) - { - longjmp(s_jmpSIGILL, 1); - } + static jmp_buf s_jmpSIGILL; + static void SigIllHandler(int) + { + longjmp(s_jmpSIGILL, 1); + } }; #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY @@ -44,99 +48,99 @@ bool CPU_TryCRC32_ARMV8() { #if (CRYPTOPP_ARM_CRC32_AVAILABLE) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) - volatile bool result = true; - __try - { - word32 w=0, x=1; word16 y=2; byte z=3; - w = __crc32w(w,x); - w = __crc32h(w,y); - w = __crc32b(w,z); - w = __crc32cw(w,x); - w = __crc32ch(w,y); - w = __crc32cb(w,z); + volatile bool result = true; + __try + { + word32 w=0, x=1; word16 y=2; byte z=3; + w = __crc32w(w,x); + w = __crc32h(w,y); + w = __crc32b(w,z); + w = __crc32cw(w,x); + w = __crc32ch(w,y); + w = __crc32cb(w,z); - result = !!w; - } - __except (EXCEPTION_EXECUTE_HANDLER) - { - return false; - } - return result; + result = !!w; + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + return result; #else - // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 - volatile bool result = true; + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile bool result = true; - volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); - if (oldHandler == SIG_ERR) - return false; + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); + if (oldHandler == SIG_ERR) + return false; - volatile sigset_t oldMask; - if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) - return false; + volatile sigset_t oldMask; + if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) + return false; - if (setjmp(s_jmpSIGILL)) - result = false; - else - { - word32 w=0, x=1; word16 y=2; byte z=3; - w = __crc32w(w,x); - w = __crc32h(w,y); - w = __crc32b(w,z); - w = __crc32cw(w,x); - w = __crc32ch(w,y); - w = __crc32cb(w,z); + if (setjmp(s_jmpSIGILL)) + result = false; + else + { + word32 w=0, x=1; word16 y=2; byte z=3; + w = __crc32w(w,x); + w = __crc32h(w,y); + w = __crc32b(w,z); + w = __crc32cw(w,x); + w = __crc32ch(w,y); + w = __crc32cb(w,z); - // Hack... GCC optimizes away the code and returns true - result = !!w; - } + // Hack... GCC optimizes away the code and returns true + result = !!w; + } - sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); - signal(SIGILL, oldHandler); - return result; + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); + signal(SIGILL, oldHandler); + return result; # endif #else - return false; + return false; +#endif // CRYPTOPP_ARM_CRC32_AVAILABLE } -#endif // CRYPTOPP_ARMV8A_SHA_AVAILABLE #if (CRYPTOPP_ARM_CRC32_AVAILABLE) void CRC32_Update_ARMV8(const byte *s, size_t n, word32& c) { - for(; !IsAligned(s) && n > 0; s++, n--) - c = __crc32b(c, *s); + for(; !IsAligned(s) && n > 0; s++, n--) + c = __crc32b(c, *s); - for(; n > 4; s+=4, n-=4) - c = __crc32w(c, *(const word32 *)(void*)s); + for(; n > 4; s+=4, n-=4) + c = __crc32w(c, *(const word32 *)(void*)s); - for(; n > 0; s++, n--) - c = __crc32b(c, *s); + for(; n > 0; s++, n--) + c = __crc32b(c, *s); } void CRC32C_Update_ARMV8(const byte *s, size_t n, word32& c) { - for(; !IsAligned(s) && n > 0; s++, n--) - c = __crc32cb(c, *s); + for(; !IsAligned(s) && n > 0; s++, n--) + c = __crc32cb(c, *s); - for(; n > 4; s+=4, n-=4) - c = __crc32cw(c, *(const word32 *)(void*)s); + for(; n > 4; s+=4, n-=4) + c = __crc32cw(c, *(const word32 *)(void*)s); - for(; n > 0; s++, n--) - c = __crc32cb(c, *s); + for(; n > 0; s++, n--) + c = __crc32cb(c, *s); } #endif #if (CRYPTOPP_SSE42_AVAILABLE) void CRC32C_Update_SSE42(const byte *s, size_t n, word32& c) { - for(; !IsAligned(s) && n > 0; s++, n--) - c = _mm_crc32_u8(c, *s); + for(; !IsAligned(s) && n > 0; s++, n--) + c = _mm_crc32_u8(c, *s); - for(; n > 4; s+=4, n-=4) - c = _mm_crc32_u32(c, *(const word32 *)(void*)s); + for(; n > 4; s+=4, n-=4) + c = _mm_crc32_u32(c, *(const word32 *)(void*)s); - for(; n > 0; s++, n--) - c = _mm_crc32_u8(c, *s); + for(; n > 0; s++, n--) + c = _mm_crc32_u8(c, *s); } #endif diff --git a/gcm-simd.cpp b/gcm-simd.cpp index 6566af68..f5fa74b1 100644 --- a/gcm-simd.cpp +++ b/gcm-simd.cpp @@ -17,9 +17,9 @@ #if (CRYPTOPP_ARM_NEON_AVAILABLE) # include "arm_neon.h" -#if (CRYPTOPP_ARM_PMULL_AVAILABLE) +# if (CRYPTOPP_ARM_PMULL_AVAILABLE) # include "arm_acle.h" -#endif +# endif #endif #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY @@ -27,6 +27,10 @@ # include #endif +#ifndef EXCEPTION_EXECUTE_HANDLER +# define EXCEPTION_EXECUTE_HANDLER 1 +#endif + ANONYMOUS_NAMESPACE_BEGIN // GCC 4.8 and 4.9 are missing PMULL gear @@ -216,12 +220,12 @@ bool CPU_TryPMULL_ARMV8() sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); signal(SIGILL, oldHandler); - return result; + return result; # endif #else - return false; + return false; +#endif // CRYPTOPP_ARM_SHA_AVAILABLE } -#endif // CRYPTOPP_ARMV8A_SHA_AVAILABLE #if CRYPTOPP_ARM_NEON_AVAILABLE void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c) diff --git a/neon.cpp b/neon.cpp index 3e6adedf..0b55e464 100644 --- a/neon.cpp +++ b/neon.cpp @@ -9,10 +9,6 @@ #include "pch.h" #include "config.h" -#ifndef EXCEPTION_EXECUTE_HANDLER -# define EXCEPTION_EXECUTE_HANDLER 1 -#endif - #if (CRYPTOPP_ARM_NEON_AVAILABLE) # include "arm_neon.h" #endif @@ -22,6 +18,10 @@ # include #endif +#ifndef EXCEPTION_EXECUTE_HANDLER +# define EXCEPTION_EXECUTE_HANDLER 1 +#endif + NAMESPACE_BEGIN(CryptoPP) #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY @@ -36,9 +36,9 @@ extern "C" { }; #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY -#if (CRYPTOPP_ARM_NEON_AVAILABLE) bool CPU_TryNEON_ARM() { +#if (CRYPTOPP_ARM_NEON_AVAILABLE) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) volatile bool result = true; __try @@ -99,7 +99,9 @@ bool CPU_TryNEON_ARM() signal(SIGILL, oldHandler); return result; # endif -} +#else + return false; #endif // CRYPTOPP_ARM_NEON_AVAILABLE +} NAMESPACE_END diff --git a/sha-simd.cpp b/sha-simd.cpp index 01d60006..7a70b4f4 100644 --- a/sha-simd.cpp +++ b/sha-simd.cpp @@ -18,9 +18,11 @@ # include "immintrin.h" #endif -#if (CRYPTOPP_ARMV8A_SHA_AVAILABLE) && defined(__GNUC__) +#if (CRYPTOPP_ARM_NEON_AVAILABLE) # include "arm_neon.h" +# if (CRYPTOPP_ARM_SHA_AVAILABLE) # include "arm_acle.h" +# endif #endif #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY @@ -28,6 +30,10 @@ # include #endif +#ifndef EXCEPTION_EXECUTE_HANDLER +# define EXCEPTION_EXECUTE_HANDLER 1 +#endif + NAMESPACE_BEGIN(CryptoPP) #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY @@ -46,7 +52,7 @@ extern const word32 SHA256_K[64]; bool CPU_TrySHA1_ARMV8() { -#if (CRYPTOPP_ARMV8A_SHA_AVAILABLE) +#if (CRYPTOPP_ARM_SHA_AVAILABLE) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) volatile bool result = true; __try @@ -100,12 +106,12 @@ bool CPU_TrySHA1_ARMV8() # endif #else return false; +#endif // CRYPTOPP_ARM_SHA_AVAILABLE } -#endif // CRYPTOPP_ARMV8A_SHA_AVAILABLE bool CPU_TrySHA2_ARMV8() { -#if (CRYPTOPP_ARMV8A_SHA_AVAILABLE) +#if (CRYPTOPP_ARM_SHA_AVAILABLE) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) volatile bool result = true; __try @@ -157,8 +163,8 @@ bool CPU_TrySHA2_ARMV8() # endif #else return false; +#endif // CRYPTOPP_ARM_SHA_AVAILABLE } -#endif // CRYPTOPP_ARMV8A_SHA_AVAILABLE /////////////////////////////////// // start of Walton/Gulley's code // @@ -554,7 +560,7 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data // start of Walton/Schneiders/O'Rourke/Hovsmith's code // ///////////////////////////////////////////////////////// -#if CRYPTOPP_ARMV8A_SHA_AVAILABLE +#if CRYPTOPP_ARM_SHA_AVAILABLE void SHA1_Transform_ARMV8A(word32 *state, const word32 *data) { uint32x4_t C0, C1, C2, C3; diff --git a/sha.cpp b/sha.cpp index 6f483bff..39c2b5b7 100644 --- a/sha.cpp +++ b/sha.cpp @@ -100,7 +100,7 @@ static void SHA1_Transform_CXX(word32 *state, const word32 *data) #if CRYPTOPP_SHANI_AVAILABLE extern void SHA1_Transform_SHANI(word32 *state, const word32 *data); extern void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data, size_t length); -#elif CRYPTOPP_ARMV8A_SHA_AVAILABLE +#elif CRYPTOPP_ARM_SHA_AVAILABLE extern void SHA1_Transform_ARMV8A(word32 *state, const word32 *data); extern void CRYPTOPP_FASTCALL SHA256_HashBlocks_ARMV8A(word32 *state, const word32 *data, size_t length); #endif @@ -112,7 +112,7 @@ static pfnSHATransform InitializeSHA1Transform() return &SHA1_Transform_SHANI; else #endif -#if CRYPTOPP_ARMV8A_SHA_AVAILABLE +#if CRYPTOPP_ARM_SHA_AVAILABLE if (HasSHA1()) return &SHA1_Transform_ARMV8A; else @@ -173,7 +173,7 @@ void SHA256::InitState(HashWordType *state) memcpy(state, s, sizeof(s)); } -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || CRYPTOPP_ARMV8A_SHA_AVAILABLE +#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || CRYPTOPP_ARM_SHA_AVAILABLE CRYPTOPP_ALIGN_DATA(16) extern const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = { #else extern const word32 SHA256_K[64] = { @@ -538,7 +538,7 @@ static pfnSHAHashBlocks InitializeSHA256HashBlocks() return &SHA256_HashBlocks_SHANI; else #endif -#if CRYPTOPP_ARMV8A_SHA_AVAILABLE +#if CRYPTOPP_ARM_SHA_AVAILABLE if (HasSHA2()) return &SHA256_HashBlocks_ARMV8A; else @@ -707,12 +707,12 @@ static void SHA256_Transform_SHANI(word32 *state, const word32 *data) } #endif // CRYPTOPP_SHANI_AVAILABLE -#if CRYPTOPP_ARMV8A_SHA_AVAILABLE +#if CRYPTOPP_ARM_SHA_AVAILABLE static void SHA256_Transform_ARMV8A(word32 *state, const word32 *data) { return SHA256_HashBlocks_ARMV8A(state, data, SHA256::BLOCKSIZE); } -#endif // CRYPTOPP_ARMV8A_SHA_AVAILABLE +#endif // CRYPTOPP_ARM_SHA_AVAILABLE /////////////////////////////////// // start of Walton/Gulley's code // @@ -730,7 +730,7 @@ static pfnSHATransform InitializeSHA256Transform() return &SHA256_Transform_SSE2; else #endif -#if CRYPTOPP_ARMV8A_SHA_AVAILABLE +#if CRYPTOPP_ARM_SHA_AVAILABLE if (HasSHA2()) return &SHA256_Transform_ARMV8A; else