From 4e3a1ea962d8f8cc58b97d2dd59554479a2b2db9 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sun, 15 Jul 2018 08:35:14 -0400 Subject: [PATCH] Add ARMv8.4 cpu feature detection support (GH #685) (#687) This PR adds ARMv8.4 cpu feature detection support. Previously we only needed ARMv8.1 and things were much easier. For example, ARMv8.1 `__ARM_FEATURE_CRYPTO` meant PMULL, AES, SHA-1 and SHA-256 were available. ARMv8.4 `__ARM_FEATURE_CRYPTO` means PMULL, AES, SHA-1, SHA-256, SHA-512, SHA-3, SM3 and SM4 are available. We still use the same pattern as before. We make something available based on compiler version and/or preprocessor macros. But this time around we had to tighten things up a bit to ensure ARMv8.4 did not cross-pollinate down into ARMv8.1. ARMv8.4 is largely untested at the moment. There is no hardware in the field and CI lacks QEMU with the relevant patches/support. We will probably have to revisit some of this stuff in the future. Since this update applies to ARM gadgets we took the time to expand Android and iOS testing on Travis. Travis now tests more platforms, and includes Autotools and CMake builds, too. --- .travis.yml | 44 ++++++- GNUmakefile | 6 +- GNUmakefile-cross | 22 +++- Readme.txt | 1 + TestScripts/cryptest-android.sh | 8 +- TestScripts/cryptest-ios.sh | 2 +- TestScripts/setenv-android-gcc.sh | 4 +- TestScripts/setenv-android-old.sh | 4 +- adv-simd.h | 5 + config.h | 187 ++++++++++++++++++++---------- cpu.cpp | 6 +- crc-simd.cpp | 5 +- gcm-simd.cpp | 15 +-- neon-simd.cpp | 2 +- rijndael-simd.cpp | 5 +- sha-simd.cpp | 19 ++- sha.cpp | 25 ++-- shacal2-simd.cpp | 12 -- sm4-simd.cpp | 6 + validat1.cpp | 20 +++- 20 files changed, 260 insertions(+), 138 deletions(-) diff --git a/.travis.yml b/.travis.yml index cc5fc8af..d7c84d2f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -51,8 +51,16 @@ matrix: include: - os: linux env: - - BUILD_MODE=android - - PLATFORM=armeabi + - BUILD_MODE=autotools + - os: linux + env: + - BUILD_MODE=cmake + - os: osx + env: + - BUILD_MODE=autotools + - os: osx + env: + - BUILD_MODE=cmake - os: linux env: - BUILD_MODE=android @@ -85,6 +93,10 @@ matrix: env: - BUILD_MODE=ios - PLATFORM=iPhoneOS + - os: osx + env: + - BUILD_MODE=ios + - PLATFORM=Arm64 - os: osx env: - BUILD_MODE=ios @@ -103,10 +115,24 @@ matrix: env: - BUILD_MODE=ios - PLATFORM=WatchOS + - os: linux + env: + - BUILD_MODE=android + - PLATFORM=armeabi + - os: linux + env: + - BUILD_MODE=android + - PLATFORM=mipsel + - os: linux + env: + - BUILD_MODE=android + - PLATFORM=mipsel64 before_install: - | if [[ "$BUILD_MODE" == "android" ]]; then + # https://github.com/travis-ci/travis-ci/issues/9037 + sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 0C49F3730359A145 TestScripts/install-android.sh fi @@ -114,10 +140,18 @@ script: - | if [[ "$BUILD_MODE" == "ios" ]]; then cp ./TestScripts/setenv-ios.sh . - TestScripts/cryptest-ios.sh + cp ./TestScripts/cryptest-ios.sh . + ./cryptest-ios.sh elif [[ "$BUILD_MODE" == "android" ]]; then - cp ./TestScripts/setenv-android.sh . - TestScripts/cryptest-android.sh + cp ./TestScripts/setenv-android-gcc.sh . + cp ./TestScripts/cryptest-android.sh . + ./cryptest-android.sh + elif [[ "$BUILD_MODE" == "autotools" ]]; then + cp ./TestScripts/cryptest-autotools.sh . + ./cryptest-autotools.sh + elif [[ "$BUILD_MODE" == "cmake" ]]; then + cp ./TestScripts/cryptest-cmake.sh . + ./cryptest-cmake.sh elif [[ "$BUILD_MODE" == "debug" ]]; then CXXFLAGS="-DDEBUG -g2 -O1" make -j "$BUILD_JOBS" ./cryptest.exe v diff --git a/GNUmakefile b/GNUmakefile index bd6e90c3..c27a1e3e 100755 --- a/GNUmakefile +++ b/GNUmakefile @@ -385,12 +385,14 @@ ifeq ($(IS_NEON),1) HAVE_NEON = $(shell echo | $(CXX) -x c++ $(CXXFLAGS) -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon -dM -E - 2>/dev/null | $(GREP) -i -c -E '\<__ARM_NEON\>') ifeq ($(HAVE_NEON),1) NEON_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - AES_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - GCM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon ARIA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon + AES_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon + CRC_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon + GCM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon BLAKE2_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon CHAM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon LEA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon + SHA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon SIMECK_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon SIMON_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon SPECK_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon diff --git a/GNUmakefile-cross b/GNUmakefile-cross index 7611d24d..14671fb2 100755 --- a/GNUmakefile-cross +++ b/GNUmakefile-cross @@ -32,10 +32,11 @@ endif IS_i686 := $(shell echo "$HOSTX" | $(EGREP) -v 64 | $(EGREP) -i -c 'i.86') IS_x86_64 := $(shell echo "$HOSTX" | $(EGREP) -i -c 'x86_64|amd64') -IS_ARM := $(shell echo "$HOSTX" | $(EGREP) -i -c 'arm') +IS_ARM32 := $(shell echo "$(HOSTX)" | $(EGREP) -i -c -E 'arm|armhf|arm7l|eabihf') IS_ARMv8 := $(shell echo "$HOSTX" | $(EGREP) -i -c 'aarch32|aarch64') -CLANG_COMPILER := $(shell $(CXX) --version 2>&1 | $(EGREP) -i -c "clang") +GCC_COMPILER := $(shell $(CXX) --version 2>/dev/null | $(EGREP) -v -E '(llvm|clang)' | $(EGREP) -i -c -E '(gcc|g\+\+)') +CLANG_COMPILER := $(shell $(CXX) --version 2>/dev/null | $(EGREP) -i -c -E '(llvm|clang)') IS_IOS ?= 0 IS_ANDROID ?= 0 @@ -321,16 +322,25 @@ endif # List cryptlib.cpp first, then cpu.cpp, then integer.cpp to tame C++ static initialization problems. SRCS := cryptlib.cpp cpu.cpp integer.cpp $(filter-out cryptlib.cpp cpu.cpp integer.cpp pch.cpp simple.cpp winpipes.cpp cryptlib_bds.cpp,$(sort $(wildcard *.cpp))) - # For Makefile.am; resource.h is Windows INCL := $(filter-out resource.h,$(sort $(wildcard *.h))) +# Cryptogams AES for ARMv4 and above. We couple to ARMv7. +# Disable Thumb via -marm due to unaligned byte buffers. +ifeq ($(IS_ARM32),1) +CRYPTOGAMS_AES_ARCH = -march=armv7-a -marm +SRCS += aes-armv4.S +endif + # List cryptlib.cpp first, then cpu.cpp, then integer.cpp to tame C++ static initialization problems. OBJS := $(SRCS:.cpp=.o) +OBJS := $(OBJS:.S=.o) # List test.cpp first to tame C++ static initialization problems. -TESTSRCS := adhoc.cpp test.cpp bench1.cpp bench2.cpp validat0.cpp validat1.cpp validat2.cpp validat3.cpp validat4.cpp datatest.cpp regtest1.cpp regtest2.cpp regtest3.cpp fipsalgt.cpp dlltest.cpp +TESTSRCS := adhoc.cpp test.cpp bench1.cpp bench2.cpp validat0.cpp validat1.cpp validat2.cpp validat3.cpp validat4.cpp datatest.cpp regtest1.cpp regtest2.cpp regtest3.cpp dlltest.cpp fipsalgt.cpp TESTINCL := bench.h factory.h validate.h + +# Test objects TESTOBJS := $(TESTSRCS:.cpp=.o) LIBOBJS := $(filter-out $(TESTOBJS),$(OBJS)) @@ -494,6 +504,10 @@ endif # Dependencies cpu-features.o: cpu-features.h cpu-features.c $(CXX) $(strip $(CXXFLAGS) -fpermissive -c) cpu-features.c +# Cryptogams ARM asm implementation. CRYPTOGAMS_AES_ARCH includes -marm. +aes-armv4.o : aes-armv4.S + $(CXX) $(strip $(CXXFLAGS) $(CRYPTOGAMS_AES_ARCH) -c) $< + # SSE4.2 or NEON available aria-simd.o : aria-simd.cpp $(CXX) $(strip $(CXXFLAGS) $(ARIA_FLAG) -c) $< diff --git a/Readme.txt b/Readme.txt index 8967f568..b746cef5 100644 --- a/Readme.txt +++ b/Readme.txt @@ -407,3 +407,4 @@ June 2015 - Changing of the guard. Wei Dai turned the library over to the still provides guidance when we have questions. Originally written by Wei Dai, maintained by the Crypto++ Project + diff --git a/TestScripts/cryptest-android.sh b/TestScripts/cryptest-android.sh index 627b7994..b0b6f6fb 100755 --- a/TestScripts/cryptest-android.sh +++ b/TestScripts/cryptest-android.sh @@ -17,6 +17,7 @@ else PLATFORMS=(${PLATFORM}) fi RUNTIMES=(gnu-static gnu-shared stlport-static stlport-shared) #llvm-static llvm-shared + for platform in ${PLATFORMS[@]} do for runtime in ${RUNTIMES[@]} @@ -30,14 +31,15 @@ do # Test if we can set the environment for the platform ./setenv-android-gcc.sh "$platform" "$runtime" - if [ "$?" -eq "0" ]; then + if [ "$?" -eq "0" ]; + then echo echo "Building for $platform using $runtime..." echo # run in subshell to not keep any env vars ( - . ./setenv-android-gcc.sh "$platform" "$runtime" > /dev/null 2>&1 + source ./setenv-android-gcc.sh "$platform" "$runtime" > /dev/null 2>&1 make -f GNUmakefile-cross static dynamic cryptest.exe if [ "$?" -eq "0" ]; then echo "$platform:$runtime ==> SUCCESS" >> /tmp/build.log @@ -48,7 +50,7 @@ do ) else echo - echo "$platform with $runtime not supported by Android" + echo "There were problems testing $platform with $runtime" echo "$platform:$runtime ==> FAILURE" >> /tmp/build.log touch /tmp/build.failed fi diff --git a/TestScripts/cryptest-ios.sh b/TestScripts/cryptest-ios.sh index a7cf00dc..130f5dce 100755 --- a/TestScripts/cryptest-ios.sh +++ b/TestScripts/cryptest-ios.sh @@ -11,7 +11,7 @@ # ==================================================================== if [ -z "${PLATFORM-}" ]; then - PLATFORMS=(iPhoneOS iPhoneSimulator WatchOS WatchSimulator AppleTVOS AppleTVSimulator) + PLATFORMS=(iPhoneOS iPhoneSimulator Arm64 WatchOS WatchSimulator AppleTVOS AppleTVSimulator) else PLATFORMS=(${PLATFORM}) fi diff --git a/TestScripts/setenv-android-gcc.sh b/TestScripts/setenv-android-gcc.sh index d49f2000..69fb2f72 100755 --- a/TestScripts/setenv-android-gcc.sh +++ b/TestScripts/setenv-android-gcc.sh @@ -167,14 +167,14 @@ case "$THE_ARCH" in TOOLCHAIN_NAME="i686-linux-android" AOSP_ABI="x86" AOSP_ARCH="arch-x86" - AOSP_FLAGS="-mtune=intel -mssse3 -mfpmath=sse -DCRYPTOPP_DISABLE_SSE4 -funwind-tables -fexceptions -frtti" + AOSP_FLAGS="-mtune=intel -mssse3 -mfpmath=sse -funwind-tables -fexceptions -frtti" ;; x86_64|x64) TOOLCHAIN_ARCH="x86_64" TOOLCHAIN_NAME="x86_64-linux-android" AOSP_ABI="x86_64" AOSP_ARCH="arch-x86_64" - AOSP_FLAGS="-march=x86-64 -msse4.2 -mpopcnt -mtune=intel -DCRYPTOPP_DISABLE_CLMUL -DCRYPTOPP_DISABLE_AESNI -DCRYPTOPP_DISABLE_SHANI -funwind-tables -fexceptions -frtti" + AOSP_FLAGS="-march=x86-64 -msse4.2 -mpopcnt -mtune=intel -funwind-tables -fexceptions -frtti" ;; *) echo "ERROR: Unknown architecture $1" diff --git a/TestScripts/setenv-android-old.sh b/TestScripts/setenv-android-old.sh index b8a6f1fb..d93b568d 100755 --- a/TestScripts/setenv-android-old.sh +++ b/TestScripts/setenv-android-old.sh @@ -157,14 +157,14 @@ case "$THE_ARCH" in TOOLCHAIN_NAME="i686-linux-android" AOSP_ABI="x86" AOSP_ARCH="arch-x86" - AOSP_FLAGS="-mtune=intel -mssse3 -mfpmath=sse -DCRYPTOPP_DISABLE_SSE4 -DCRYPTOPP_DISABLE_CLMUL -DCRYPTOPP_DISABLE_AES -DCRYPTOPP_DISABLE_SHA -funwind-tables -fexceptions -frtti" + AOSP_FLAGS="-mtune=intel -mssse3 -mfpmath=sse -funwind-tables -fexceptions -frtti" ;; x86_64|x64) TOOLCHAIN_ARCH="x86_64" TOOLCHAIN_NAME="x86_64-linux-android" AOSP_ABI="x86_64" AOSP_ARCH="arch-x86_64" - AOSP_FLAGS="-march=x86-64 -msse4.2 -mpopcnt -mtune=intel -DCRYPTOPP_DISABLE_CLMUL -DCRYPTOPP_DISABLE_AES -DCRYPTOPP_DISABLE_SHA -funwind-tables -fexceptions -frtti" + AOSP_FLAGS="-march=x86-64 -msse4.2 -mpopcnt -mtune=intel -funwind-tables -fexceptions -frtti" ;; *) echo "ERROR: Unknown architecture $1" diff --git a/adv-simd.h b/adv-simd.h index 934db323..931c0c0c 100644 --- a/adv-simd.h +++ b/adv-simd.h @@ -46,6 +46,11 @@ # include #endif +#if defined(CRYPTOPP_ARM_ACLE_AVAILABLE) +# include +# include +#endif + #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE) # include # include diff --git a/config.h b/config.h index dd104e30..e4bcf946 100644 --- a/config.h +++ b/config.h @@ -569,96 +569,165 @@ NAMESPACE_END #define CRYPTOPP_SHANI_AVAILABLE 1 #endif +// Fixup Android and SSE, Crypto. It may be enabled based on compiler version. +#if (defined(__ANDROID__) || defined(ANDROID)) +# if (CRYPTOPP_BOOL_X86) +# undef CRYPTOPP_SSE41_AVAILABLE +# undef CRYPTOPP_SSE42_AVAILABLE +# undef CRYPTOPP_CLMUL_AVAILABLE +# undef CRYPTOPP_AESNI_AVAILABLE +# undef CRYPTOPP_SHANI_AVAILABLE +# endif +# if (CRYPTOPP_BOOL_X64) +# undef CRYPTOPP_CLMUL_AVAILABLE +# undef CRYPTOPP_AESNI_AVAILABLE +# undef CRYPTOPP_SHANI_AVAILABLE +# endif +#endif + #endif // X86, X32, X64 // ***************** ARM CPU features ******************** #if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) -// Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under most toolchains. -// Android still uses ARMv5 and ARMv6 so we have to be conservative when enabling NEON. +// Requires ARMv7 and ACLE 1.0. -march=armv7-a or above must be present +// Requires GCC 4.3, Clang 2.8 or Visual Studio 2012 +// Do not use APPLE_CLANG_VERSION; use __ARM_FEATURE_XXX instead. #if !defined(CRYPTOPP_ARM_NEON_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) -# if defined(__ARM_NEON) || defined(__ARM_NEON_FP) || defined(__ARM_FEATURE_NEON) || \ - (__ARM_ARCH >= 7) || (CRYPTOPP_MSC_VERSION >= 1700) -# define CRYPTOPP_ARM_NEON_AVAILABLE 1 -# endif +# if defined(__arm__) || defined(__ARM_NEON) || defined(__ARM_FEATURE_NEON) || defined(_M_ARM) +# if (CRYPTOPP_GCC_VERSION >= 40300) || (CRYPTOPP_CLANG_VERSION >= 20800) || \ + (CRYPTOPP_MSC_VERSION >= 1700) +# define CRYPTOPP_ARM_NEON_AVAILABLE 1 +# endif // Compilers +# endif // Platforms #endif -// ARMv8 and ASIMD, which is NEON. It is part of ARMv8 core. -// TODO: Add MSC_VER and ARM-64 platform define when available +// ARMv8 and ASIMD. -march=armv8-a or above must be present +// Requires GCC 4.8, Clang 3.3 or Visual Studio 2017 +// Do not use APPLE_CLANG_VERSION; use __ARM_FEATURE_XXX instead. #if !defined(CRYPTOPP_ARM_ASIMD_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) -# if defined(__aarch32__) || defined(__aarch64__) || (CRYPTOPP_MSC_VERSION >= 1910) -# define CRYPTOPP_ARM_ASIMD_AVAILABLE 1 -# endif +# if defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64) +# if defined(__ARM_NEON) || defined(__ARM_FEATURE_NEON) || defined(__ARM_FEATURE_ASIMD) || \ + (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_CLANG_VERSION >= 30300) || \ + (CRYPTOPP_MSC_VERSION >= 1910) +# define CRYPTOPP_ARM_NEON_AVAILABLE 1 +# define CRYPTOPP_ARM_ASIMD_AVAILABLE 1 +# endif // Compilers +# endif // Platforms #endif -// Requires ARMv8 and ACLE 2.0. GCC requires 4.8 and above. -// LLVM Clang requires 3.5. Apple Clang is unknown at the moment. -// Microsoft plans to support ARM-64, but its not clear how to detect it. -// TODO: Add Android ARMv8 support for CRC32 -// TODO: Add MSC_VER and ARM-64 platform define when available -#if !defined(CRYPTOPP_ARM_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__apple_build_version__) && !defined(__ANDROID__) -# if (defined(__ARM_FEATURE_CRC32) || (CRYPTOPP_MSC_VERSION >= 1910) || \ - defined(__aarch32__) || defined(__aarch64__)) -# define CRYPTOPP_ARM_CRC32_AVAILABLE 1 -# endif +// ARMv8 and ASIMD. -march=armv8-a+crc or above must be present +// Requires GCC 4.8, Clang 3.3 or Visual Studio 2017 +// Do not use APPLE_CLANG_VERSION; use __ARM_FEATURE_XXX instead. +#if !defined(CRYPTOPP_ARM_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) +# if defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64) +# if defined(__ARM_FEATURE_CRC32) || (CRYPTOPP_GCC_VERSION >= 40800) || \ + (CRYPTOPP_CLANG_VERSION >= 30300) || (CRYPTOPP_MSC_VERSION >= 1910) +# define CRYPTOPP_ARM_CRC32_AVAILABLE 1 +# endif // Compilers +# endif // Platforms #endif -// Requires ARMv8 and ACLE 2.0. GCC requires 4.8 and above. -// LLVM Clang requires 3.5. Apple Clang is unknown at the moment. -// Microsoft plans to support ARM-64, but its not clear how to detect it. -// TODO: Add Android ARMv8 support for PMULL -// TODO: Add MSC_VER and ARM-64 platform define when available -#if !defined(CRYPTOPP_ARM_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__apple_build_version__) && !defined(__ANDROID__) -# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VERSION >= 1910) || \ - defined(__aarch32__) || defined(__aarch64__) -# define CRYPTOPP_ARM_PMULL_AVAILABLE 1 -# endif +// ARMv8 and ASIMD. -march=armv8-a+crypto or above must be present +// Requires GCC 4.8, Clang 3.3 or Visual Studio 2017 +// Do not use APPLE_CLANG_VERSION; use __ARM_FEATURE_XXX instead. +#if !defined(CRYPTOPP_ARM_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) +# if defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64) +# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_GCC_VERSION >= 40800) || \ + (CRYPTOPP_CLANG_VERSION >= 30300) || (CRYPTOPP_MSC_VERSION >= 1910) +# define CRYPTOPP_ARM_PMULL_AVAILABLE 1 +# endif // Compilers +# endif // Platforms #endif -// Requires ARMv8 and ACLE 2.0. GCC requires 4.8 and above. -// LLVM Clang requires 3.5. Apple Clang is unknown at the moment. -// Microsoft plans to support ARM-64, but its not clear how to detect it. -// TODO: Add Android ARMv8 support for AES and SHA -// TODO: Add MSC_VER and ARM-64 platform define when available -#if !defined(CRYPTOPP_ARM_AES_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__ANDROID__) -# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VERSION >= 1910) || \ - defined(__aarch32__) || defined(__aarch64__) -# define CRYPTOPP_ARM_AES_AVAILABLE 1 -# endif +// ARMv8 and AES. -march=armv8-a+crypto or above must be present +// Requires GCC 4.8, Clang 3.3 or Visual Studio 2017 +// Do not use APPLE_CLANG_VERSION; use __ARM_FEATURE_XXX instead. +#if !defined(CRYPTOPP_ARM_AES_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) +# if defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64) +# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_GCC_VERSION >= 40800) || \ + (CRYPTOPP_CLANG_VERSION >= 30300) || (CRYPTOPP_MSC_VERSION >= 1910) +# define CRYPTOPP_ARM_AES_AVAILABLE 1 +# endif // Compilers +# endif // Platforms #endif -// Requires ARMv8 and ACLE 2.0. GCC requires 4.8 and above. -// LLVM Clang requires 3.5. Apple Clang is unknown at the moment. -// Microsoft plans to support ARM-64, but its not clear how to detect it. -// TODO: Add Android ARMv8 support for AES and SHA -// TODO: Add MSC_VER and ARM-64 platform define when available -#if !defined(CRYPTOPP_ARM_SHA_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__ANDROID__) -# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VERSION >= 1910) || \ - defined(__aarch32__) || defined(__aarch64__) -# define CRYPTOPP_ARM_SHA_AVAILABLE 1 -# endif +// ARMv8 and SHA-1, SHA-256. -march=armv8-a+crypto or above must be present +// Requires GCC 4.8, Clang 3.3 or Visual Studio 2017 +// Do not use APPLE_CLANG_VERSION; use __ARM_FEATURE_XXX instead. +#if !defined(CRYPTOPP_ARM_SHA_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) +# if defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64) +# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_GCC_VERSION >= 40800) || \ + (CRYPTOPP_CLANG_VERSION >= 30300) || (CRYPTOPP_MSC_VERSION >= 1910) +# define CRYPTOPP_ARM_SHA1_AVAILABLE 1 +# define CRYPTOPP_ARM_SHA2_AVAILABLE 1 +# endif // Compilers +# endif // Platforms +#endif + +// ARMv8 and SHA-512, SHA-3. -march=armv8.4-a+crypto or above must be present +// Requires GCC 8.0, Clang 6.0 or Visual Studio 2021??? +// Do not use APPLE_CLANG_VERSION; use __ARM_FEATURE_XXX instead. +#if !defined(CRYPTOPP_ARM_SHA_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) +# if defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64) +# if defined(__ARM_FEATURE_SHA3) || (CRYPTOPP_GCC_VERSION >= 80000) || \ + (CRYPTOPP_MSC_VERSION >= 2100) +# define CRYPTOPP_ARM_SHA512_AVAILABLE 1 +# define CRYPTOPP_ARM_SHA3_AVAILABLE 1 +# endif // Compilers +# endif // Platforms +#endif + +// ARMv8 and SM3, SM4. -march=armv8.4-a+crypto or above must be present +// Requires GCC 8.0, Clang 6.0 or Visual Studio 2021??? +// Do not use APPLE_CLANG_VERSION; use __ARM_FEATURE_XXX instead. +#if !defined(CRYPTOPP_ARM_SM3_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) +# if defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64) +# if defined(__ARM_FEATURE_SM3) || (CRYPTOPP_GCC_VERSION >= 80000) || \ + (CRYPTOPP_MSC_VERSION >= 2100) +# define CRYPTOPP_ARM_SM3_AVAILABLE 1 +# define CRYPTOPP_ARM_SM4_AVAILABLE 1 +# endif // Compilers +# endif // Platforms #endif // Limit the include. #if defined(__aarch32__) || defined(__aarch64__) || (__ARM_ARCH >= 8) || defined(__ARM_ACLE) -# define CRYPTOPP_ARM_ACLE_AVAILABLE 1 +# if !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__) +# define CRYPTOPP_ARM_ACLE_AVAILABLE 1 +# endif #endif -// Man, this is borked. Apple Clang defines __ARM_ACLE but then fails -// to compile with "fatal error: 'arm_acle.h' file not found" -#if defined(__ANDROID__) || defined(ANDROID) || defined(__APPLE__) -# undef CRYPTOPP_ARM_ACLE_AVAILABLE +// Fixup Apple Clang and PMULL. Apple defines __ARM_FEATURE_CRYPTO for Xcode 6 +// but does not provide PMULL. TODO: determine when PMULL is available. +#if defined(CRYPTOPP_APPLE_CLANG_VERSION) && (CRYPTOPP_APPLE_CLANG_VERSION < 70000) +# undef CRYPTOPP_ARM_PMULL_AVAILABLE +#endif + +// Fixup Android and CRC32. It may be enabled based on compiler version. +#if (defined(__ANDROID__) || defined(ANDROID)) && !defined(__ARM_FEATURE_CRC32) +# undef CRYPTOPP_ARM_CRC32_AVAILABLE +#endif + +// Fixup Android and Crypto. It may be enabled based on compiler version. +#if (defined(__ANDROID__) || defined(ANDROID)) && !defined(__ARM_FEATURE_CRYPTO) +# undef CRYPTOPP_ARM_PMULL_AVAILABLE +# undef CRYPTOPP_ARM_AES_AVAILABLE +# undef CRYPTOPP_ARM_SHA1_AVAILABLE +# undef CRYPTOPP_ARM_SHA2_AVAILABLE #endif // Cryptogams offers an ARM asm AES implementation. Crypto++ does -// not provide an ARM implementation. The Cryptogams implementation +// not provide an asm implementation. The Cryptogams implementation // is about 2x faster than C/C++. Define this to use the Cryptogams // AES implementation on GNU Linux systems. When defined, Crypto++ // will use aes-armv4.S. LLVM miscompiles aes-armv4.S so disable // under Clang. See https://bugs.llvm.org/show_bug.cgi?id=38133. -#if !defined(CRYPTOPP_DISABLE_ASM) && defined(__arm__) && defined(__GNUC__) && !defined(__clang__) -# define CRYPTOGAMS_ARM_AES 1 +#if !defined(CRYPTOPP_DISABLE_ASM) && defined(__arm__) +# if defined(__GNUC__) && !defined(__clang__) +# define CRYPTOGAMS_ARM_AES 1 +# endif #endif #endif // ARM32, ARM64 diff --git a/cpu.cpp b/cpu.cpp index 9521e1fe..44148908 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -452,8 +452,7 @@ extern bool CPU_ProbeSM3(); extern bool CPU_ProbeSM4(); extern bool CPU_ProbePMULL(); -#if CRYPTOPP_GETAUXV_AVAILABLE - +// https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h #ifndef HWCAP_ARMv7 # define HWCAP_ARMv7 (1 << 29) #endif @@ -493,7 +492,6 @@ extern bool CPU_ProbePMULL(); #ifndef HWCAP2_SHA2 # define HWCAP2_SHA2 (1 << 3) #endif -// https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h #ifndef HWCAP_SHA3 # define HWCAP_SHA3 (1 << 17) #endif @@ -507,8 +505,6 @@ extern bool CPU_ProbePMULL(); # define HWCAP_SHA512 (1 << 21) #endif -#endif // CRYPTOPP_GETAUXV_AVAILABLE - inline bool CPU_QueryARMv7() { #if defined(__aarch32__) || defined(__aarch64__) diff --git a/crc-simd.cpp b/crc-simd.cpp index 0d277450..e41cf2c2 100644 --- a/crc-simd.cpp +++ b/crc-simd.cpp @@ -14,13 +14,10 @@ # include #endif -// Use ARMv8 rather than NEON due to compiler inconsistencies -#if (CRYPTOPP_ARM_CRC32_AVAILABLE) +#if (CRYPTOPP_ARM_NEON_AVAILABLE) # include #endif -// Can't use CRYPTOPP_ARM_XXX_AVAILABLE because too many -// compilers don't follow ACLE conventions for the include. #if defined(CRYPTOPP_ARM_ACLE_AVAILABLE) # include # include diff --git a/gcm-simd.cpp b/gcm-simd.cpp index 7d099c51..900ea8f6 100644 --- a/gcm-simd.cpp +++ b/gcm-simd.cpp @@ -25,11 +25,6 @@ # undef CRYPTOPP_CLMUL_AVAILABLE #endif -// Clang and GCC hoops... -#if !(defined(__ARM_FEATURE_CRYPTO) || defined(_MSC_VER)) -# undef CRYPTOPP_ARM_PMULL_AVAILABLE -#endif - #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE) # include #endif @@ -43,8 +38,6 @@ # include #endif -// Can't use CRYPTOPP_ARM_XXX_AVAILABLE because too many -// compilers don't follow ACLE conventions for the include. #if defined(CRYPTOPP_ARM_ACLE_AVAILABLE) # include # include @@ -227,8 +220,8 @@ bool CPU_ProbePMULL() const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2)); // Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233. - const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum} - const uint64x2_t& t2 = (uint64x2_t)(r2); // {bignum,bignum} + const uint64x2_t t1 = (uint64x2_t)(r1); // {bignum,bignum} + const uint64x2_t t2 = (uint64x2_t)(r2); // {bignum,bignum} result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 && vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00); @@ -269,8 +262,8 @@ bool CPU_ProbePMULL() const poly128_t r2 = VMULL_HIGH_P64((poly64x2_t)(a2), (poly64x2_t)(b2)); // Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233. - const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum} - const uint64x2_t& t2 = (uint64x2_t)(r2); // {bignum,bignum} + const uint64x2_t t1 = (uint64x2_t)(r1); // {bignum,bignum} + const uint64x2_t t2 = (uint64x2_t)(r2); // {bignum,bignum} result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 && vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00); diff --git a/neon-simd.cpp b/neon-simd.cpp index 2dd1fa27..d95ee1b9 100644 --- a/neon-simd.cpp +++ b/neon-simd.cpp @@ -64,7 +64,7 @@ bool CPU_ProbeARMv7() return false; } return result; -# elif defined(__arm__) +# elif defined(__arm__) && (__ARM_ARCH >= 7) // longjmp and clobber warnings. Volatile is required. // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; diff --git a/rijndael-simd.cpp b/rijndael-simd.cpp index 893f2905..6abfe3e5 100644 --- a/rijndael-simd.cpp +++ b/rijndael-simd.cpp @@ -37,13 +37,10 @@ # include #endif -// Use ARMv8 rather than NEON due to compiler inconsistencies -#if (CRYPTOPP_ARM_AES_AVAILABLE) +#if (CRYPTOPP_ARM_NEON_AVAILABLE) # include #endif -// Can't use CRYPTOPP_ARM_XXX_AVAILABLE because too many -// compilers don't follow ACLE conventions for the include. #if defined(CRYPTOPP_ARM_ACLE_AVAILABLE) # include # include diff --git a/sha-simd.cpp b/sha-simd.cpp index 5b097d3f..b2639840 100644 --- a/sha-simd.cpp +++ b/sha-simd.cpp @@ -16,13 +16,10 @@ # include #endif -// Use ARMv8 rather than NEON due to compiler inconsistencies -#if (CRYPTOPP_ARM_SHA_AVAILABLE) +#if (CRYPTOPP_ARM_NEON_AVAILABLE) # include #endif -// Can't use CRYPTOPP_ARM_XXX_AVAILABLE because too many -// compilers don't follow ACLE conventions for the include. #if defined(CRYPTOPP_ARM_ACLE_AVAILABLE) # include # include @@ -69,7 +66,7 @@ bool CPU_ProbeSHA1() { #if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) return false; -#elif (CRYPTOPP_ARM_SHA_AVAILABLE) +#elif (CRYPTOPP_ARM_SHA1_AVAILABLE) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) volatile bool result = true; __try @@ -124,14 +121,14 @@ bool CPU_ProbeSHA1() # endif #else return false; -#endif // CRYPTOPP_ARM_SHA_AVAILABLE +#endif // CRYPTOPP_ARM_SHA1_AVAILABLE } bool CPU_ProbeSHA2() { #if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) return false; -#elif (CRYPTOPP_ARM_SHA_AVAILABLE) +#elif (CRYPTOPP_ARM_SHA2_AVAILABLE) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) volatile bool result = true; __try @@ -184,7 +181,7 @@ bool CPU_ProbeSHA2() # endif #else return false; -#endif // CRYPTOPP_ARM_SHA_AVAILABLE +#endif // CRYPTOPP_ARM_SHA2_AVAILABLE } #endif // ARM32 or ARM64 @@ -612,7 +609,7 @@ void SHA256_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t l // start of Walton, Schneiders, O'Rourke and Hovsmith code // ///////////////////////////////////////////////////////////// -#if CRYPTOPP_ARM_SHA_AVAILABLE +#if CRYPTOPP_ARM_SHA1_AVAILABLE void SHA1_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order) { CRYPTOPP_ASSERT(state); @@ -799,7 +796,9 @@ void SHA1_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t len vst1q_u32(&state[0], ABCD); state[4] = E0; } +#endif // CRYPTOPP_ARM_SHA1_AVAILABLE +#if CRYPTOPP_ARM_SHA2_AVAILABLE void SHA256_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order) { CRYPTOPP_ASSERT(state); @@ -967,7 +966,7 @@ void SHA256_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t l vst1q_u32(&state[0], STATE0); vst1q_u32(&state[4], STATE1); } -#endif // CRYPTOPP_ARM_SHA_AVAILABLE +#endif // CRYPTOPP_ARM_SHA2_AVAILABLE /////////////////////////////////////////////////////////// // end of Walton, Schneiders, O'Rourke and Hovsmith code // diff --git a/sha.cpp b/sha.cpp index 889f909c..ab4fb7f2 100644 --- a/sha.cpp +++ b/sha.cpp @@ -6,7 +6,7 @@ // code from Johannes Schneiders, Skip Hovsmith and Barry O'Rourke. // All code is in the public domain. -// In August 2017 Walton reworked the internals to align all the implementations. +// In August 2017 JW reworked the internals to align all the implementations. // Formerly all hashes were software based, IterHashBase handled endian conversions, // and IterHashBase dispatched a single to block SHA{N}::Transform. SHA{N}::Transform // then performed the single block hashing. It was repeated for multiple blocks. @@ -62,11 +62,18 @@ extern void SHA1_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, siz extern void SHA256_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order); #endif -#if CRYPTOPP_ARM_SHA_AVAILABLE +#if CRYPTOPP_ARM_SHA1_AVAILABLE extern void SHA1_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order); +#endif + +#if CRYPTOPP_ARM_SHA2_AVAILABLE extern void SHA256_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order); #endif +#if CRYPTOPP_ARM_SHA512_AVAILABLE +extern void SHA512_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order); +#endif + #if CRYPTOPP_POWER8_SHA_AVAILABLE extern void SHA256_HashMultipleBlocks_POWER8(word32 *state, const word32 *data, size_t length, ByteOrder order); extern void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t length, ByteOrder order); @@ -161,7 +168,7 @@ std::string SHA1::AlgorithmProvider() const if (HasSSE2()) return "SSE2"; #endif -#if CRYPTOPP_ARM_SHA_AVAILABLE +#if CRYPTOPP_ARM_SHA1_AVAILABLE if (HasSHA1()) return "ARMv8"; #endif @@ -189,7 +196,7 @@ void SHA1::Transform(word32 *state, const word32 *data) return; } #endif -#if CRYPTOPP_ARM_SHA_AVAILABLE +#if CRYPTOPP_ARM_SHA1_AVAILABLE if (HasSHA1()) { SHA1_HashMultipleBlocks_ARMV8(state, data, SHA1::BLOCKSIZE, LITTLE_ENDIAN_ORDER); @@ -212,7 +219,7 @@ size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length) return length & (SHA1::BLOCKSIZE - 1); } #endif -#if CRYPTOPP_ARM_SHA_AVAILABLE +#if CRYPTOPP_ARM_SHA1_AVAILABLE if (HasSHA1()) { SHA1_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER); @@ -347,7 +354,7 @@ std::string SHA256_AlgorithmProvider() if (HasSSE2()) return "SSE2"; #endif -#if CRYPTOPP_ARM_SHA_AVAILABLE +#if CRYPTOPP_ARM_SHA2_AVAILABLE if (HasSHA2()) return "ARMv8"; #endif @@ -728,7 +735,7 @@ void SHA256::Transform(word32 *state, const word32 *data) return; } #endif -#if CRYPTOPP_ARM_SHA_AVAILABLE +#if CRYPTOPP_ARM_SHA2_AVAILABLE if (HasSHA2()) { SHA256_HashMultipleBlocks_ARMV8(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER); @@ -766,7 +773,7 @@ size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length) return res; } #endif -#if CRYPTOPP_ARM_SHA_AVAILABLE +#if CRYPTOPP_ARM_SHA2_AVAILABLE if (HasSHA2()) { SHA256_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER); @@ -822,7 +829,7 @@ size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length) return res; } #endif -#if CRYPTOPP_ARM_SHA_AVAILABLE +#if CRYPTOPP_ARM_SHA2_AVAILABLE if (HasSHA2()) { SHA256_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER); diff --git a/shacal2-simd.cpp b/shacal2-simd.cpp index e7dbfd8a..dd64089c 100644 --- a/shacal2-simd.cpp +++ b/shacal2-simd.cpp @@ -22,18 +22,6 @@ # include #endif -// Use ARMv8 rather than NEON due to compiler inconsistencies -#if (CRYPTOPP_ARM_SHA_AVAILABLE) -# include -#endif - -// Can't use CRYPTOPP_ARM_XXX_AVAILABLE because too many -// compilers don't follow ACLE conventions for the include. -#if defined(CRYPTOPP_ARM_ACLE_AVAILABLE) -# include -# include -#endif - // Clang __m128i casts, http://bugs.llvm.org/show_bug.cgi?id=20670 #define M128_CAST(x) ((__m128i *)(void *)(x)) #define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x)) diff --git a/sm4-simd.cpp b/sm4-simd.cpp index 6185c9ba..72e97c13 100644 --- a/sm4-simd.cpp +++ b/sm4-simd.cpp @@ -21,6 +21,12 @@ // Do so in both simon.cpp and simon-simd.cpp. // #undef CRYPTOPP_AESNI_AVAILABLE +#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE) +# include +# include +# include +#endif + #if (CRYPTOPP_AESNI_AVAILABLE) # include # include diff --git a/validat1.cpp b/validat1.cpp index 77dba93f..1eec0a6f 100644 --- a/validat1.cpp +++ b/validat1.cpp @@ -381,19 +381,31 @@ bool TestSettings() std::cout << "\n"; #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) + +# if defined(__arm__) bool hasARMv7 = HasARMv7(); bool hasNEON = HasNEON(); + + std::cout << "passed: "; + std::cout << "hasARMv7 == " << hasARMv7 << ", hasNEON == " << hasNEON << "\n"; +# else // __arch32__ and __aarch64__ bool hasCRC32 = HasCRC32(); bool hasPMULL = HasPMULL(); bool hasAES = HasAES(); bool hasSHA1 = HasSHA1(); bool hasSHA2 = HasSHA2(); + bool hasSHA512 = HasSHA512(); + bool hasSHA3 = HasSHA3(); + bool hasSM3 = HasSM3(); + bool hasSM4 = HasSM4(); std::cout << "passed: "; - std::cout << "hasARMv7 == " << hasARMv7 << ", hasNEON == " << hasNEON; - std::cout << ", hasCRC32 == " << hasCRC32 << ", hasPMULL == " << hasPMULL; - std::cout << ", hasAES == " << hasAES << ", hasSHA1 == " << hasSHA1; - std::cout << ", hasSHA2 == " << hasSHA2 << "\n"; + std::cout << ", hasCRC32 == " << hasCRC32 << ", hasAES == " << hasAES; + std::cout << ", hasPMULL == " << hasPMULL << ", hasSHA1 == " << hasSHA1; + std::cout << ", hasSHA2 == " << hasSHA2 << ", hasSHA512 == " << hasSHA512; + std::cout << ", hasSHA3 == " << hasSHA3 << ", hasSM3 == " << hasSM3; + std::cout << ", hasSM4 == " << hasSM4 << "\n"; +# endif #elif (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64) const bool hasAltivec = HasAltivec();