From 6169b5d4d6fe92389988fdb7154271cf5aecc84a Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sun, 30 Jul 2017 19:14:47 -0400 Subject: [PATCH] Cleaned up ARM related defines, like CRYPTOPP_ARM_NEON_AVAILABLE We only need to base it on the compiler in config.h. config.h activates the code path guarded by HasNEON(). The source file that actially provides the NEON implementation will be compiled with -fpu=neon or -march=armv8-a. Since we are providing the specialized implementation in a sequestered source file (and not a header file), we can probably avoid the defines like CRYPTOPP_ARM_NEON_AVAILABLE altogether. --- GNUmakefile | 45 ++++---- TestScripts/cryptest.sh | 220 ++++++++++++++++++------------------- config.h | 22 ++-- cpu.cpp | 59 ++-------- cryptest.sh | 4 +- gcm-simd.cpp | 235 +++++++++++++++++++++++++++++++++++++++- gcm.cpp | 199 ++-------------------------------- 7 files changed, 391 insertions(+), 393 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index eed0c647..8bb5526f 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -202,6 +202,7 @@ endif # CXXFLAGS ifeq ($(findstring -DCRYPTOPP_DISABLE_SSE4,$(CXXFLAGS)),) SSE42_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -msse4.2 -dM -E - | grep -i -c -q __SSE4_2__ && echo "-msse4.2") ifeq ($(findstring -DCRYPTOPP_DISABLE_AESNI,$(CXXFLAGS)),) +GCM_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -mpclmul -dM -E - | grep -i -c -q __PCLMUL__ && echo "-mpclmul") AES_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -maes -dM -E - | grep -i -c -q __AES__ && echo "-maes") ifeq ($(findstring -DCRYPTOPP_DISABLE_SHA,$(CXXFLAGS)),) SHA_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -msse4.2 -msha -dM -E - | grep -i -c -q __SHA__ && echo "-msse4.2 -msha") @@ -308,14 +309,16 @@ ifeq ($(IS_NEON),1) endif ifeq ($(IS_ARMV8),1) - ARMV8A_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv8-a") - CRC_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crc -dM -E - | grep -i -c -q __ARM_FEATURE_CRC32 && echo "-march=armv8-a+crc") - AES_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crypto -dM -E - | grep -i -c -q __ARM_FEATURE_CRYPTO && echo "-march=armv8-a+crypto") - SHA_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crypto -dM -E - | grep -i -c -q __ARM_FEATURE_CRYPTO && echo "-march=armv8-a+crypto") - GCM_FLAG = $(ARMV8A_FLAG) - ARIA_FLAG = $(ARMV8A_FLAG) - BLAKE2_FLAG = $(ARMV8A_FLAG) - NEON_FLAG = $(ARMV8A_FLAG) + ARMV8A_NEON_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv8-a") + ARMV8A_CRC_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crc -dM -E - | grep -i -c -q __ARM_FEATURE_CRC32 && echo "-march=armv8-a+crc") + ARMV8A_CRYPTO_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crypto -dM -E - | grep -i -c -q __ARM_FEATURE_CRYPTO && echo "-march=armv8-a+crypto") + CRC_FLAG = $(ARMV8A_CRC_FLAG) + AES_FLAG = $(ARMV8A_CRYPTO_FLAG) + GCM_FLAG = $(ARMV8A_CRYPTO_FLAG) + SHA_FLAG = $(ARMV8A_CRYPTO_FLAG) + ARIA_FLAG = $(ARMV8A_NEON_FLAG) + BLAKE2_FLAG = $(ARMV8A_NEON_FLAG) + NEON_FLAG = $(ARMV8A_NEON_FLAG) endif endif # IS_X86 @@ -851,7 +854,7 @@ endif # Dependencies # Run rdrand-nasm.sh to create the object files ifeq ($(USE_NASM),1) rdrand.o: rdrand.h rdrand.cpp rdrand.s - $(CXX) $(strip $(CXXFLAGS)) -DNASM_RDRAND_ASM_AVAILABLE=1 -DNASM_RDSEED_ASM_AVAILABLE=1 -c rdrand.cpp + $(CXX) $(strip $(CXXFLAGS) -DNASM_RDRAND_ASM_AVAILABLE=1 -DNASM_RDSEED_ASM_AVAILABLE=1 -c rdrand.cpp) rdrand-%.o: ./rdrand-nasm.sh endif @@ -884,49 +887,49 @@ sha-simd.o : sha-simd.cpp # Also see https://stackoverflow.com/q/12983137/608639. ifeq ($(findstring true,$(CI)),true) threefish.o : threefish.cpp - $(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS))) -c $< + $(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS)) -c) $< endif # Don't build Rijndael with UBsan. Too much noise due to unaligned data accesses. ifneq ($(findstring -fsanitize=undefined,$(CXXFLAGS)),) rijndael.o : rijndael.cpp - $(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS))) -c $< + $(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS)) -c) $< endif # Don't build VMAC and friends with Asan. Too many false positives. ifneq ($(findstring -fsanitize=address,$(CXXFLAGS)),) vmac.o : vmac.cpp - $(CXX) $(strip $(subst -fsanitize=address,,$(CXXFLAGS))) -c $< + $(CXX) $(strip $(subst -fsanitize=address,,$(CXXFLAGS)) -c) $< endif # Only use CRYPTOPP_DATA_DIR if its not set in CXXFLAGS ifeq ($(findstring -DCRYPTOPP_DATA_DIR, $(strip $(CXXFLAGS))),) ifneq ($(strip $(CRYPTOPP_DATA_DIR)),) validat%.o : validat%.cpp - $(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $< + $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $< bench%.o : bench%.cpp - $(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $< + $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $< datatest.o : datatest.cpp - $(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $< + $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $< test.o : test.cpp - $(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $< + $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $< endif endif %.dllonly.o : %.cpp - $(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DLL_ONLY -c $< -o $@ + $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DLL_ONLY -c) $< -o $@ %.import.o : %.cpp - $(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_IMPORTS -c $< -o $@ + $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_IMPORTS -c) $< -o $@ %.export.o : %.cpp - $(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_EXPORTS -c $< -o $@ + $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_EXPORTS -c) $< -o $@ %.bc : %.cpp - $(CXX) $(strip $(CXXFLAGS)) -c $< + $(CXX) $(strip $(CXXFLAGS) -c) $< %.o : %.cpp - $(CXX) $(strip $(CXXFLAGS)) -c $< + $(CXX) $(strip $(CXXFLAGS) -c) $< .PHONY: so_warning so_warning: diff --git a/TestScripts/cryptest.sh b/TestScripts/cryptest.sh index 21cc2af9..55c07170 100755 --- a/TestScripts/cryptest.sh +++ b/TestScripts/cryptest.sh @@ -250,15 +250,15 @@ if [[ ("$SUNCC_510_OR_ABOVE" -ne "0") ]]; then HAVE_OFAST=0 fi -if [[ (-z "$TMP") ]]; then +if [[ (-z "$TMPDIR") ]]; then if [[ (-d "/tmp") ]]; then - TMP=/tmp + TMPDIR=/tmp elif [[ (-d "/temp") ]]; then - TMP=/temp + TMPDIR=/temp elif [[ (-d "$HOME/tmp") ]]; then - TMP="$HOME/tmp" + TMPDIR="$HOME/tmp" else - echo "Please set TMP to a valid directory" + echo "Please set TMPDIR to a valid directory" [[ "$0" = "$BASH_SOURCE" ]] && exit 1 || return 1 fi fi @@ -267,74 +267,74 @@ fi rm -f adhoc.cpp > /dev/null 2>&1 cp adhoc.cpp.proto adhoc.cpp -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_CXX17") ]]; then HAVE_CXX17=0 - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++17 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++17 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_CXX17=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_GNU17") ]]; then HAVE_GNU17=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++17 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++17 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_GNU17=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_CXX14") ]]; then HAVE_CXX14=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++14 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++14 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_CXX14=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_GNU14") ]]; then HAVE_GNU14=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++14 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++14 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_GNU14=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_CXX11") ]]; then HAVE_CXX11=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++11 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++11 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_CXX11=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_GNU11") ]]; then HAVE_GNU11=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++11 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++11 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_GNU11=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_CXX03") ]]; then HAVE_CXX03=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++03 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++03 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_CXX03=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_GNU03") ]]; then HAVE_GNU03=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++03 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++03 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_GNU03=1 fi @@ -342,13 +342,13 @@ fi # Use a fallback strategy so OPT_O0 can be used with DEBUG_CXXFLAGS OPT_O0= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -O0 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -O0 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O0=-O0 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO0 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO0 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O0=-xO0 fi @@ -356,13 +356,13 @@ fi # Use a fallback strategy so OPT_O1 can be used with VALGRIND_CXXFLAGS OPT_O1= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -O1 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -O1 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O1=-O1 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO1 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO1 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O1=-xO1 fi @@ -370,13 +370,13 @@ fi # Use a fallback strategy so OPT_O2 can be used with RELEASE_CXXFLAGS OPT_O2= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -O2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -O2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O2=-O2 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O2=-xO2 fi @@ -385,14 +385,14 @@ fi if [[ (-z "$HAVE_O3") ]]; then HAVE_O3=0 OPT_O3= - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_O3=1 OPT_O3=-O3 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_O3=1 OPT_O3=-xO3 @@ -404,14 +404,14 @@ fi if [[ ( (-z "$HAVE_O5") && ("$CLANG_COMPILER" -eq "0") ) ]]; then HAVE_O5=0 OPT_O5= - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -O5 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -O5 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_O5=1 OPT_O5=-O5 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO5 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO5 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_O5=1 OPT_O5=-xO5 @@ -423,8 +423,8 @@ fi if [[ (-z "$HAVE_OS") ]]; then HAVE_OS=0 OPT_OS= - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -Os adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -Os adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_OS=1 OPT_OS=-Os @@ -435,8 +435,8 @@ fi if [[ (-z "$HAVE_OFAST") ]]; then HAVE_OFAST=0 OPT_OFAST= - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -Ofast adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -Ofast adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_OFAST=1 OPT_OFAST=-Ofast @@ -445,13 +445,13 @@ fi # Use a fallback strategy so OPT_G2 can be used with RELEASE_CXXFLAGS OPT_G2= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -g2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -g2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_G2=-g2 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_G2=-g fi @@ -459,13 +459,13 @@ fi # Use a fallback strategy so OPT_G3 can be used with DEBUG_CXXFLAGS OPT_G3= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -g3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -g3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_G3=-g3 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_G3=-g fi @@ -473,10 +473,10 @@ fi # Cygwin and noisy compiles OPT_PIC= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_PIC") ]]; then HAVE_PIC=0 - PIC_PROBLEMS=$("$CXX" -DCRYPTOPP_ADHOC_MAIN -fPIC adhoc.cpp -o "$TMP/adhoc.exe" 2>&1 | "$EGREP" -ic '(warning|error)') + PIC_PROBLEMS=$("$CXX" -DCRYPTOPP_ADHOC_MAIN -fPIC adhoc.cpp -o "$TMPDIR/adhoc.exe" 2>&1 | "$EGREP" -ic '(warning|error)') if [[ "$PIC_PROBLEMS" -eq "0" ]]; then HAVE_PIC=1 OPT_PIC=-fPIC @@ -484,12 +484,12 @@ if [[ (-z "$HAVE_PIC") ]]; then fi # GCC 4.8; Clang 3.4 -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_UBSAN") ]]; then HAVE_UBSAN=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=undefined adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=undefined adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then - "$TMP/adhoc.exe" > /dev/null 2>&1 + "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_UBSAN=1 fi @@ -497,12 +497,12 @@ if [[ (-z "$HAVE_UBSAN") ]]; then fi # GCC 4.8; Clang 3.4 -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_ASAN") ]]; then HAVE_ASAN=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=address adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=address adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then - "$TMP/adhoc.exe" > /dev/null 2>&1 + "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_ASAN=1 fi @@ -510,41 +510,41 @@ if [[ (-z "$HAVE_ASAN") ]]; then fi # GCC 6.0; maybe Clang -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_BSAN") ]]; then HAVE_BSAN=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=bounds-strict adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=bounds-strict adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then - "$TMP/adhoc.exe" > /dev/null 2>&1 + "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_BSAN=1 fi fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_OMP") ]]; then HAVE_OMP=0 if [[ "$GCC_COMPILER" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_OMP=1 OMP_FLAGS=(-fopenmp -O3) fi elif [[ "$INTEL_COMPILER" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -openmp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -openmp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_OMP=1 OMP_FLAGS=(-openmp -O3) fi elif [[ "$CLANG_COMPILER" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp=libomp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp=libomp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_OMP=1 OMP_FLAGS=(-fopenmp=libomp -O3) fi elif [[ "$SUN_COMPILER" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xopenmp=parallel -xO3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xopenmp=parallel -xO3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_OMP=1 OMP_FLAGS=(-xopenmp=parallel -xO3) @@ -552,33 +552,33 @@ if [[ (-z "$HAVE_OMP") ]]; then fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_INTEL_MULTIARCH") ]]; then HAVE_INTEL_MULTIARCH=0 if [[ ("$IS_DARWIN" -ne "0") && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0") ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_INTEL_MULTIARCH=1 fi fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_PPC_MULTIARCH") ]]; then HAVE_PPC_MULTIARCH=0 if [[ ("$IS_DARWIN" -ne "0") && ("$IS_PPC" -ne "0") ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch ppc -arch ppc64 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch ppc -arch ppc64 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_PPC_MULTIARCH=1 fi fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_X32") ]]; then HAVE_X32=0 if [[ "$IS_X32" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -mx32 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -mx32 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_X32=1 fi @@ -588,8 +588,8 @@ fi # Hit or miss, mostly hit if [[ (-z "$HAVE_NATIVE_ARCH") ]]; then HAVE_NATIVE_ARCH=0 - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=native adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=native adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_NATIVE_ARCH=1 fi @@ -603,7 +603,7 @@ if [[ (-z "$HAVE_LDGOLD") ]]; then if [[ (! -z "$LD_GOLD") && (! -z "$ELF_FILE") ]]; then LD_GOLD=$(file "$LD_GOLD" | cut -d":" -f 2 | "$EGREP" -i -c "elf") if [[ ("$LD_GOLD" -ne "0") ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fuse-ld=gold adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fuse-ld=gold adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_LDGOLD=1 fi @@ -688,10 +688,10 @@ fi # Used to disassemble object modules so we can verify some aspects of code generation if [[ (-z "$HAVE_DISASS") ]]; then - echo "int main(int argc, char* argv[]) {return 0;}" > "$TMP/test.cc" - "$CXX" "$TMP/test.cc" -o "$TMP/test.exe" > /dev/null 2>&1 + echo "int main(int argc, char* argv[]) {return 0;}" > "$TMPDIR/test.cc" + "$CXX" "$TMPDIR/test.cc" -o "$TMPDIR/test.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then - "$DISASS" "${DISASSARGS[@]}" "$TMP/test.exe" > /dev/null 2>&1 + "$DISASS" "${DISASSARGS[@]}" "$TMPDIR/test.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_DISASS=1 else @@ -1167,7 +1167,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t echo OBJFILE=sha.o; rm -f "$OBJFILE" 2>/dev/null - CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" + CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" DISASS_TEXT=$("$DISASS" "${DISASSARGS[@]}" "$OBJFILE" 2>/dev/null) @@ -1201,7 +1201,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # Test CRC-32C code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -msse4.2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -msse4.2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_CRC32=1 fi @@ -1239,7 +1239,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # Test AES-NI code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_AESNI=1 fi @@ -1251,7 +1251,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t echo OBJFILE=rijndael.o; rm -f "$OBJFILE" 2>/dev/null - CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" + CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" COUNT=0 FAILED=0 @@ -1301,7 +1301,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # X86 carryless multiply code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -mpclmul adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -mpclmul adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_PCLMUL=1 fi @@ -1312,8 +1312,8 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t echo "Testing: X86 carryless multiply code generation" | tee -a "$TEST_RESULTS" echo - OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null - CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" + OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null + CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" COUNT=0 FAILED=0 @@ -1339,11 +1339,11 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # Test RDRAND and RDSEED code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdrnd adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdrnd adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_RDRAND=1 fi - "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdseed adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdseed adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_RDSEED=1 fi @@ -1355,7 +1355,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t echo OBJFILE=rdrand.o; rm -f "$OBJFILE" 2>/dev/null - CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" + CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" COUNT=0 FAILED=0 @@ -1385,7 +1385,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # X86 SHA code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -msha adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -msha adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_SHA=1 fi @@ -1397,7 +1397,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t echo OBJFILE=sha-simd.o; rm -f "$OBJFILE" 2>/dev/null - CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" + CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" COUNT=0 FAILED=0 @@ -1465,7 +1465,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ] echo "Testing: ARM NEON code generation" | tee -a "$TEST_RESULTS" echo - OBJFILE=aria.o; rm -f "$OBJFILE" 2>/dev/null + OBJFILE=aria-simd.o; rm -f "$OBJFILE" 2>/dev/null CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" COUNT=0 @@ -1515,7 +1515,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ] ############################################ # ARM CRC32 code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crc adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crc adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ARM_CRC32=1 fi @@ -1565,7 +1565,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ] ############################################ # ARM carryless multiply code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ARM_PMULL=1 fi @@ -1576,7 +1576,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ] echo "Testing: ARM carryless multiply code generation" | tee -a "$TEST_RESULTS" echo - OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null + OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" COUNT=0 @@ -1603,7 +1603,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ] ############################################ # ARM SHA code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ARM_SHA=1 fi @@ -5098,7 +5098,7 @@ fi if [[ ("$CLANG_COMPILER" -eq "0") ]]; then CLANG_CXX=$(which clang++ 2>&1 | "$GREP" -v "no clang++" | head -1) - "$CLANG_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CLANG_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5133,7 +5133,7 @@ fi if [[ ("$GCC_COMPILER" -eq "0") ]]; then GCC_CXX=$(which g++ 2>&1 | "$GREP" -v "no g++" | head -1) - "$GCC_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$GCC_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5171,7 +5171,7 @@ if [[ ("$INTEL_COMPILER" -eq "0") ]]; then if [[ (-z "$INTEL_CXX") ]]; then INTEL_CXX=$(find /opt/intel -name icpc 2>/dev/null | "$GREP" -iv composer | head -1) fi - "$INTEL_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$INTEL_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5207,7 +5207,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-4*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5240,7 +5240,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-5*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5273,7 +5273,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-6*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5306,7 +5306,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-7*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5339,7 +5339,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.7*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5371,7 +5371,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.8*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5403,7 +5403,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.9*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5435,7 +5435,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-4*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ diff --git a/config.h b/config.h index 9b02fece..d6af3c9d 100644 --- a/config.h +++ b/config.h @@ -517,7 +517,7 @@ NAMESPACE_END // Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under most toolchains. #if !defined(CRYPTOPP_ARM_NEON_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) -# if defined(__ARM_NEON__) || defined(__ARM_NEON) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) || \ +# if defined(__ARM_NEON__) || defined(__ARM_FEATURE_NEON) || (CRYPTOPP_MSC_VER >= 1900) || \ (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500) # define CRYPTOPP_ARM_NEON_AVAILABLE 1 # endif @@ -528,9 +528,8 @@ NAMESPACE_END // Microsoft plans to support ARM-64, but its not clear how to detect it. // TODO: Add MSC_VER and ARM-64 platform define when available #if !defined(CRYPTOPP_ARMV8A_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) -# if defined(__ARM_FEATURE_CRC32) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \ - (defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \ - (defined(__AARCH32EL__) || defined(__AARCH64EL__)) +# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VER >= 2000) || \ + (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500) # define CRYPTOPP_ARMV8A_CRC32_AVAILABLE 1 # endif #endif @@ -540,9 +539,8 @@ NAMESPACE_END // it at the moment. Microsoft plans to support ARM-64, but its not clear how to detect it. // TODO: Add MSC_VER and ARM-64 platform define when available #if !defined(CRYPTOPP_ARMV8A_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__apple_build_version__) -# if defined(__ARM_FEATURE_CRYPTO) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \ - (defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \ - (defined(__AARCH32EL__) || defined(__AARCH64EL__)) +# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VER >= 2000) || \ + (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500) # define CRYPTOPP_ARMV8A_PMULL_AVAILABLE 1 # endif #endif @@ -552,19 +550,17 @@ NAMESPACE_END // Microsoft plans to support ARM-64, but its not clear how to detect it. // TODO: Add MSC_VER and ARM-64 platform define when available #if !defined(CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) -# if defined(__ARM_FEATURE_CRYPTO) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \ - (defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \ - (defined(__AARCH32EL__) || defined(__AARCH64EL__)) +# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VER >= 2000) || \ + (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500) # define CRYPTOPP_ARMV8A_AES_AVAILABLE 1 +# define CRYPTOPP_ARMV8A_PMULL_AVAILABLE 1 # define CRYPTOPP_ARMV8A_SHA_AVAILABLE 1 # define CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE 1 # endif #endif -// ARM CRC testing +// TODO... #undef CRYPTOPP_ARMV8A_AES_AVAILABLE -#undef CRYPTOPP_ARMV8A_PMULL_AVAILABLE -#undef CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE #endif // ARM32, ARM64 diff --git a/cpu.cpp b/cpu.cpp index 91706867..b9e2ea9b 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -352,36 +352,9 @@ extern "C" }; #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY -static bool TryNEON() -{ -#if (CRYPTOPP_ARM_NEON_AVAILABLE) - return CPU_TryNEON_ARM(); -#else - return false; -#endif -} - -static bool TryCRC32() -{ -#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE) - return CPU_TryCRC32_ARMV8(); -#else - return false; -#endif -} - -static bool TryPMULL() -{ -#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE) - return CPU_TryPMULL_ARMV8(); -#else - return false; -#endif -} - static bool TryAES() { -#if (CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE) +#if (CRYPTOPP_ARMV8A_AES_AVAILABLE) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) volatile bool result = true; __try @@ -432,32 +405,14 @@ static bool TryAES() #endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE } -static bool TrySHA1() -{ -#if (CRYPTOPP_ARMV8A_SHA_AVAILABLE) - return CPU_TrySHA1_ARMV8(); -#else - return false; -#endif -} - -static bool TrySHA2() -{ -#if (CRYPTOPP_ARMV8A_SHA_AVAILABLE) - return CPU_TrySHA2_ARMV8(); -#else - return false; -#endif -} - void DetectArmFeatures() { - g_hasNEON = TryNEON(); - g_hasPMULL = TryPMULL(); - g_hasCRC32 = TryCRC32(); - g_hasAES = TryAES(); - g_hasSHA1 = TrySHA1(); - g_hasSHA2 = TrySHA2(); + g_hasNEON = CPU_TryNEON_ARM(); + g_hasPMULL = CPU_TryPMULL_ARMV8(); + g_hasCRC32 = CPU_TryCRC32_ARMV8(); + g_hasAES = TryAES(); // TODO + g_hasSHA1 = CPU_TrySHA1_ARMV8(); + g_hasSHA2 = CPU_TrySHA2_ARMV8(); g_ArmDetectionDone = true; } diff --git a/cryptest.sh b/cryptest.sh index e2aa8714..55c07170 100755 --- a/cryptest.sh +++ b/cryptest.sh @@ -1312,7 +1312,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t echo "Testing: X86 carryless multiply code generation" | tee -a "$TEST_RESULTS" echo - OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null + OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" COUNT=0 @@ -1576,7 +1576,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ] echo "Testing: ARM carryless multiply code generation" | tee -a "$TEST_RESULTS" echo - OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null + OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" COUNT=0 diff --git a/gcm-simd.cpp b/gcm-simd.cpp index e8081001..3e57fc23 100644 --- a/gcm-simd.cpp +++ b/gcm-simd.cpp @@ -26,6 +26,121 @@ # include #endif +ANONYMOUS_NAMESPACE_BEGIN + +// GCC 4.8 and 4.9 are missing PMULL gear +#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE) +# if (CRYPTOPP_GCC_VERSION >= 40800) && (CRYPTOPP_GCC_VERSION < 50000) +inline poly128_t VMULL_P64(poly64_t a, poly64_t b) +{ + return __builtin_aarch64_crypto_pmulldi_ppp (a, b); +} + +inline poly128_t VMULL_HIGH_P64(poly64x2_t a, poly64x2_t b) +{ + return __builtin_aarch64_crypto_pmullv2di_ppp (a, b); +} +# endif +#endif + +#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_ARMV8A_PMULL_AVAILABLE +#if defined(__GNUC__) +// Schneiders, Hovsmith and O'Rourke used this trick. +// It results in much better code generation in production code +// by avoiding D-register spills when using vgetq_lane_u64. The +// problem does not surface under minimal test cases. +inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b) +{ + uint64x2_t r; + __asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t" + :"=w" (r) : "w" (a), "w" (b) ); + return r; +} + +inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b) +{ + uint64x2_t r; + __asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t" + :"=w" (r) : "w" (a), "w" (vget_high_u64(b)) ); + return r; +} + +inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b) +{ + uint64x2_t r; + __asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t" + :"=w" (r) : "w" (vget_high_u64(a)), "w" (b) ); + return r; +} + +inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b) +{ + uint64x2_t r; + __asm __volatile("pmull2 %0.1q, %1.2d, %2.2d \n\t" + :"=w" (r) : "w" (a), "w" (b) ); + return r; +} + +inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c) +{ + uint64x2_t r; + __asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t" + :"=w" (r) : "w" (a), "w" (b), "I" (c) ); + return r; +} + +// https://github.com/weidai11/cryptopp/issues/366 +template +inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b) +{ + uint64x2_t r; + __asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t" + :"=w" (r) : "w" (a), "w" (b), "I" (C) ); + return r; +} +#endif // GCC and compatibles + +#if defined(_MSC_VER) +inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b) +{ + return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0), + vgetq_lane_u64(vreinterpretq_u64_u8(b),0))); +} + +inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b) +{ + return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0), + vgetq_lane_u64(vreinterpretq_u64_u8(b),1))); +} + +inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b) +{ + return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1), + vgetq_lane_u64(vreinterpretq_u64_u8(b),0))); +} + +inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b) +{ + return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1), + vgetq_lane_u64(vreinterpretq_u64_u8(b),1))); +} + +inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c) +{ + return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), c); +} + +// https://github.com/weidai11/cryptopp/issues/366 +template +inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b) +{ + return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C); +} +#endif // Microsoft and compatibles +#endif // CRYPTOPP_ARMV8A_PMULL_AVAILABLE + +ANONYMOUS_NAMESPACE_END + NAMESPACE_BEGIN(CryptoPP) #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY @@ -71,7 +186,7 @@ bool CPU_TryPMULL_ARMV8() // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; - volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL); + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); if (oldHandler == SIG_ERR) return false; @@ -79,7 +194,7 @@ bool CPU_TryPMULL_ARMV8() if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) return false; - if (setjmp(s_jmpNoPMULL)) + if (setjmp(s_jmpSIGILL)) result = false; else { @@ -87,8 +202,8 @@ bool CPU_TryPMULL_ARMV8() const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0}, b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0}; - const poly128_t r1 = vmull_p64(a1, b1); - const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2)); + const poly128_t r1 = VMULL_P64(a1, b1); + const poly128_t r2 = VMULL_HIGH_P64((poly64x2_t)(a2), (poly64x2_t)(b2)); // Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233. const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum} @@ -115,4 +230,116 @@ void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c) } #endif +#if CRYPTOPP_ARMV8A_PMULL_AVAILABLE + +ANONYMOUS_NAMESPACE_BEGIN + +CRYPTOPP_ALIGN_DATA(16) +const word64 s_clmulConstants64[] = { + W64LIT(0xe100000000000000), W64LIT(0xc200000000000000), // Used for ARM and x86; polynomial coefficients + W64LIT(0x08090a0b0c0d0e0f), W64LIT(0x0001020304050607), // Unused for ARM; used for x86 _mm_shuffle_epi8 + W64LIT(0x0001020304050607), W64LIT(0x08090a0b0c0d0e0f) // Unused for ARM; used for x86 _mm_shuffle_epi8 +}; + +const uint64x2_t *s_clmulConstants = (const uint64x2_t *)s_clmulConstants64; +const unsigned int s_clmulTableSizeInBlocks = 8; + +ANONYMOUS_NAMESPACE_END + +uint64x2_t GCM_Reduce_ARMV8A(uint64x2_t c0, uint64x2_t c1, uint64x2_t c2, const uint64x2_t &r) +{ + c1 = veorq_u64(c1, VEXT_U8<8>(vdupq_n_u64(0), c0)); + c1 = veorq_u64(c1, PMULL_01(c0, r)); + c0 = VEXT_U8<8>(c0, vdupq_n_u64(0)); + c0 = vshlq_n_u64(veorq_u64(c0, c1), 1); + c0 = PMULL_00(c0, r); + c2 = veorq_u64(c2, c0); + c2 = veorq_u64(c2, VEXT_U8<8>(c1, vdupq_n_u64(0))); + c1 = vshrq_n_u64(vcombine_u64(vget_low_u64(c1), vget_low_u64(c2)), 63); + c2 = vshlq_n_u64(c2, 1); + + return veorq_u64(c2, c1); +} + +uint64x2_t GCM_Multiply_ARMV8A(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r) +{ + const uint64x2_t c0 = PMULL_00(x, h); + const uint64x2_t c1 = veorq_u64(PMULL_10(x, h), PMULL_01(x, h)); + const uint64x2_t c2 = PMULL_11(x, h); + + return GCM_Reduce_ARMV8A(c0, c1, c2, r); +} + +size_t GCM_AuthenticateBlocks_ARMV8(const byte *data, size_t len, const byte *mtable, byte *hbuffer) +{ + const uint64x2_t* table = reinterpret_cast(mtable); + uint64x2_t x = vreinterpretq_u64_u8(vld1q_u8(hbuffer)); + const uint64x2_t r = s_clmulConstants[0]; + + const size_t BLOCKSIZE = 16; + while (len >= BLOCKSIZE) + { + size_t s = UnsignedMin(len/BLOCKSIZE, s_clmulTableSizeInBlocks), i=0; + uint64x2_t d1, d2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-1)*BLOCKSIZE))); + uint64x2_t c0 = vdupq_n_u64(0); + uint64x2_t c1 = vdupq_n_u64(0); + uint64x2_t c2 = vdupq_n_u64(0); + + while (true) + { + const uint64x2_t h0 = vld1q_u64((const uint64_t*)(table+i)); + const uint64x2_t h1 = vld1q_u64((const uint64_t*)(table+i+1)); + const uint64x2_t h2 = veorq_u64(h0, h1); + + if (++i == s) + { + const uint64x2_t t1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data))); + d1 = veorq_u64(vextq_u64(t1, t1, 1), x); + c0 = veorq_u64(c0, PMULL_00(d1, h0)); + c2 = veorq_u64(c2, PMULL_10(d1, h1)); + d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)), + vget_low_u32(vreinterpretq_u32_u64(d1)))); + c1 = veorq_u64(c1, PMULL_00(d1, h2)); + + break; + } + + d1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8))); + c0 = veorq_u64(c0, PMULL_10(d2, h0)); + c2 = veorq_u64(c2, PMULL_10(d1, h1)); + d2 = veorq_u64(d2, d1); + c1 = veorq_u64(c1, PMULL_10(d2, h2)); + + if (++i == s) + { + const uint64x2_t t2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data))); + d1 = veorq_u64(vextq_u64(t2, t2, 1), x); + c0 = veorq_u64(c0, PMULL_01(d1, h0)); + c2 = veorq_u64(c2, PMULL_11(d1, h1)); + d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)), + vget_low_u32(vreinterpretq_u32_u64(d1)))); + c1 = veorq_u64(c1, PMULL_01(d1, h2)); + + break; + } + + const uint64x2_t t3 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8))); + d2 = vextq_u64(t3, t3, 1); + c0 = veorq_u64(c0, PMULL_01(d1, h0)); + c2 = veorq_u64(c2, PMULL_01(d2, h1)); + d1 = veorq_u64(d1, d2); + c1 = veorq_u64(c1, PMULL_01(d1, h2)); + } + data += s*16; + len -= s*16; + + c1 = veorq_u64(veorq_u64(c1, c0), c2); + x = GCM_Reduce_ARMV8A(c0, c1, c2, r); + } + + vst1q_u64(reinterpret_cast(hbuffer), x); + return len; +} +#endif + NAMESPACE_END \ No newline at end of file diff --git a/gcm.cpp b/gcm.cpp index 2c306dd0..da1ea1f4 100644 --- a/gcm.cpp +++ b/gcm.cpp @@ -53,102 +53,6 @@ NAMESPACE_BEGIN(CryptoPP) extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c); #endif -#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_ARMV8A_PMULL_AVAILABLE -#if defined(__GNUC__) -// Schneiders, Hovsmith and O'Rourke used this trick. -// It results in much better code generation in production code -// by avoiding D-register spills when using vgetq_lane_u64. The -// problem does not surface under minimal test cases. -inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b) -{ - uint64x2_t r; - __asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t" - :"=w" (r) : "w" (a), "w" (b) ); - return r; -} - -inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b) -{ - uint64x2_t r; - __asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t" - :"=w" (r) : "w" (a), "w" (vget_high_u64(b)) ); - return r; -} - -inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b) -{ - uint64x2_t r; - __asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t" - :"=w" (r) : "w" (vget_high_u64(a)), "w" (b) ); - return r; -} - -inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b) -{ - uint64x2_t r; - __asm __volatile("pmull2 %0.1q, %1.2d, %2.2d \n\t" - :"=w" (r) : "w" (a), "w" (b) ); - return r; -} - -inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c) -{ - uint64x2_t r; - __asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t" - :"=w" (r) : "w" (a), "w" (b), "I" (c) ); - return r; -} - -// https://github.com/weidai11/cryptopp/issues/366 -template -inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b) -{ - uint64x2_t r; - __asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t" - :"=w" (r) : "w" (a), "w" (b), "I" (C) ); - return r; -} -#endif // GCC and compatibles - -#if defined(_MSC_VER) -inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b) -{ - return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0), - vgetq_lane_u64(vreinterpretq_u64_u8(b),0))); -} - -inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b) -{ - return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0), - vgetq_lane_u64(vreinterpretq_u64_u8(b),1))); -} - -inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b) -{ - return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1), - vgetq_lane_u64(vreinterpretq_u64_u8(b),0))); -} - -inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b) -{ - return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1), - vgetq_lane_u64(vreinterpretq_u64_u8(b),1))); -} - -inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c) -{ - return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), c); -} - -// https://github.com/weidai11/cryptopp/issues/366 -template -inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b) -{ - return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C); -} -#endif // Microsoft and compatibles -#endif // CRYPTOPP_ARMV8A_PMULL_AVAILABLE - word16 GCM_Base::s_reductionTable[256]; volatile bool GCM_Base::s_reductionTableInitialized = false; @@ -278,6 +182,9 @@ inline __m128i CLMUL_GF_Mul(const __m128i &x, const __m128i &h, const __m128i &r #if CRYPTOPP_ARMV8A_PMULL_AVAILABLE +extern size_t GCM_AuthenticateBlocks_ARMV8(const byte *data, size_t len, const byte *mtable, byte *hbuffer); +extern uint64x2_t GCM_Multiply_ARMV8A(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r); + CRYPTOPP_ALIGN_DATA(16) static const word64 s_clmulConstants64[] = { W64LIT(0xe100000000000000), W64LIT(0xc200000000000000), // Used for ARM and x86; polynomial coefficients @@ -287,31 +194,6 @@ static const word64 s_clmulConstants64[] = { static const uint64x2_t *s_clmulConstants = (const uint64x2_t *)s_clmulConstants64; static const unsigned int s_clmulTableSizeInBlocks = 8; - -inline uint64x2_t PMULL_Reduce(uint64x2_t c0, uint64x2_t c1, uint64x2_t c2, const uint64x2_t &r) -{ - // See comments fo CLMUL_Reduce - c1 = veorq_u64(c1, VEXT_U8<8>(vdupq_n_u64(0), c0)); - c1 = veorq_u64(c1, PMULL_01(c0, r)); - c0 = VEXT_U8<8>(c0, vdupq_n_u64(0)); - c0 = vshlq_n_u64(veorq_u64(c0, c1), 1); - c0 = PMULL_00(c0, r); - c2 = veorq_u64(c2, c0); - c2 = veorq_u64(c2, VEXT_U8<8>(c1, vdupq_n_u64(0))); - c1 = vshrq_n_u64(vcombine_u64(vget_low_u64(c1), vget_low_u64(c2)), 63); - c2 = vshlq_n_u64(c2, 1); - - return veorq_u64(c2, c1); -} - -inline uint64x2_t PMULL_GF_Mul(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r) -{ - const uint64x2_t c0 = PMULL_00(x, h); - const uint64x2_t c1 = veorq_u64(PMULL_10(x, h), PMULL_01(x, h)); - const uint64x2_t c2 = PMULL_11(x, h); - - return PMULL_Reduce(c0, c1, c2, r); -} #endif void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const NameValuePairs ¶ms) @@ -388,15 +270,15 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const uint64x2_t h = h0; for (i=0; i= 16) - { - size_t s = UnsignedMin(len/16, s_clmulTableSizeInBlocks), i=0; - uint64x2_t d1, d2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-1)*16))); - uint64x2_t c0 = vdupq_n_u64(0); - uint64x2_t c1 = vdupq_n_u64(0); - uint64x2_t c2 = vdupq_n_u64(0); - - while (true) - { - const uint64x2_t h0 = vld1q_u64((const uint64_t*)(table+i)); - const uint64x2_t h1 = vld1q_u64((const uint64_t*)(table+i+1)); - const uint64x2_t h2 = veorq_u64(h0, h1); - - if (++i == s) - { - const uint64x2_t t1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data))); - d1 = veorq_u64(vextq_u64(t1, t1, 1), x); - c0 = veorq_u64(c0, PMULL_00(d1, h0)); - c2 = veorq_u64(c2, PMULL_10(d1, h1)); - d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)), - vget_low_u32(vreinterpretq_u32_u64(d1)))); - c1 = veorq_u64(c1, PMULL_00(d1, h2)); - - break; - } - - d1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8))); - c0 = veorq_u64(c0, PMULL_10(d2, h0)); - c2 = veorq_u64(c2, PMULL_10(d1, h1)); - d2 = veorq_u64(d2, d1); - c1 = veorq_u64(c1, PMULL_10(d2, h2)); - - if (++i == s) - { - const uint64x2_t t2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data))); - d1 = veorq_u64(vextq_u64(t2, t2, 1), x); - c0 = veorq_u64(c0, PMULL_01(d1, h0)); - c2 = veorq_u64(c2, PMULL_11(d1, h1)); - d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)), - vget_low_u32(vreinterpretq_u32_u64(d1)))); - c1 = veorq_u64(c1, PMULL_01(d1, h2)); - - break; - } - - const uint64x2_t t3 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8))); - d2 = vextq_u64(t3, t3, 1); - c0 = veorq_u64(c0, PMULL_01(d1, h0)); - c2 = veorq_u64(c2, PMULL_01(d2, h1)); - d1 = veorq_u64(d1, d2); - c1 = veorq_u64(c1, PMULL_01(d1, h2)); - } - data += s*16; - len -= s*16; - - c1 = veorq_u64(veorq_u64(c1, c0), c2); - x = PMULL_Reduce(c0, c1, c2, r); - } - - vst1q_u64((uint64_t *)HashBuffer(), x); - return len; -} + return GCM_AuthenticateBlocks_ARMV8(data, len, MulTable(), HashBuffer()); + } #endif typedef BlockGetAndPut Block;