diff --git a/GNUmakefile b/GNUmakefile index eed0c647..8bb5526f 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -202,6 +202,7 @@ endif # CXXFLAGS ifeq ($(findstring -DCRYPTOPP_DISABLE_SSE4,$(CXXFLAGS)),) SSE42_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -msse4.2 -dM -E - | grep -i -c -q __SSE4_2__ && echo "-msse4.2") ifeq ($(findstring -DCRYPTOPP_DISABLE_AESNI,$(CXXFLAGS)),) +GCM_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -mpclmul -dM -E - | grep -i -c -q __PCLMUL__ && echo "-mpclmul") AES_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -maes -dM -E - | grep -i -c -q __AES__ && echo "-maes") ifeq ($(findstring -DCRYPTOPP_DISABLE_SHA,$(CXXFLAGS)),) SHA_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -msse4.2 -msha -dM -E - | grep -i -c -q __SHA__ && echo "-msse4.2 -msha") @@ -308,14 +309,16 @@ ifeq ($(IS_NEON),1) endif ifeq ($(IS_ARMV8),1) - ARMV8A_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv8-a") - CRC_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crc -dM -E - | grep -i -c -q __ARM_FEATURE_CRC32 && echo "-march=armv8-a+crc") - AES_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crypto -dM -E - | grep -i -c -q __ARM_FEATURE_CRYPTO && echo "-march=armv8-a+crypto") - SHA_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crypto -dM -E - | grep -i -c -q __ARM_FEATURE_CRYPTO && echo "-march=armv8-a+crypto") - GCM_FLAG = $(ARMV8A_FLAG) - ARIA_FLAG = $(ARMV8A_FLAG) - BLAKE2_FLAG = $(ARMV8A_FLAG) - NEON_FLAG = $(ARMV8A_FLAG) + ARMV8A_NEON_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv8-a") + ARMV8A_CRC_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crc -dM -E - | grep -i -c -q __ARM_FEATURE_CRC32 && echo "-march=armv8-a+crc") + ARMV8A_CRYPTO_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crypto -dM -E - | grep -i -c -q __ARM_FEATURE_CRYPTO && echo "-march=armv8-a+crypto") + CRC_FLAG = $(ARMV8A_CRC_FLAG) + AES_FLAG = $(ARMV8A_CRYPTO_FLAG) + GCM_FLAG = $(ARMV8A_CRYPTO_FLAG) + SHA_FLAG = $(ARMV8A_CRYPTO_FLAG) + ARIA_FLAG = $(ARMV8A_NEON_FLAG) + BLAKE2_FLAG = $(ARMV8A_NEON_FLAG) + NEON_FLAG = $(ARMV8A_NEON_FLAG) endif endif # IS_X86 @@ -851,7 +854,7 @@ endif # Dependencies # Run rdrand-nasm.sh to create the object files ifeq ($(USE_NASM),1) rdrand.o: rdrand.h rdrand.cpp rdrand.s - $(CXX) $(strip $(CXXFLAGS)) -DNASM_RDRAND_ASM_AVAILABLE=1 -DNASM_RDSEED_ASM_AVAILABLE=1 -c rdrand.cpp + $(CXX) $(strip $(CXXFLAGS) -DNASM_RDRAND_ASM_AVAILABLE=1 -DNASM_RDSEED_ASM_AVAILABLE=1 -c rdrand.cpp) rdrand-%.o: ./rdrand-nasm.sh endif @@ -884,49 +887,49 @@ sha-simd.o : sha-simd.cpp # Also see https://stackoverflow.com/q/12983137/608639. ifeq ($(findstring true,$(CI)),true) threefish.o : threefish.cpp - $(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS))) -c $< + $(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS)) -c) $< endif # Don't build Rijndael with UBsan. Too much noise due to unaligned data accesses. ifneq ($(findstring -fsanitize=undefined,$(CXXFLAGS)),) rijndael.o : rijndael.cpp - $(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS))) -c $< + $(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS)) -c) $< endif # Don't build VMAC and friends with Asan. Too many false positives. ifneq ($(findstring -fsanitize=address,$(CXXFLAGS)),) vmac.o : vmac.cpp - $(CXX) $(strip $(subst -fsanitize=address,,$(CXXFLAGS))) -c $< + $(CXX) $(strip $(subst -fsanitize=address,,$(CXXFLAGS)) -c) $< endif # Only use CRYPTOPP_DATA_DIR if its not set in CXXFLAGS ifeq ($(findstring -DCRYPTOPP_DATA_DIR, $(strip $(CXXFLAGS))),) ifneq ($(strip $(CRYPTOPP_DATA_DIR)),) validat%.o : validat%.cpp - $(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $< + $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $< bench%.o : bench%.cpp - $(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $< + $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $< datatest.o : datatest.cpp - $(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $< + $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $< test.o : test.cpp - $(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $< + $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $< endif endif %.dllonly.o : %.cpp - $(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DLL_ONLY -c $< -o $@ + $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DLL_ONLY -c) $< -o $@ %.import.o : %.cpp - $(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_IMPORTS -c $< -o $@ + $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_IMPORTS -c) $< -o $@ %.export.o : %.cpp - $(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_EXPORTS -c $< -o $@ + $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_EXPORTS -c) $< -o $@ %.bc : %.cpp - $(CXX) $(strip $(CXXFLAGS)) -c $< + $(CXX) $(strip $(CXXFLAGS) -c) $< %.o : %.cpp - $(CXX) $(strip $(CXXFLAGS)) -c $< + $(CXX) $(strip $(CXXFLAGS) -c) $< .PHONY: so_warning so_warning: diff --git a/TestScripts/cryptest.sh b/TestScripts/cryptest.sh index 21cc2af9..55c07170 100755 --- a/TestScripts/cryptest.sh +++ b/TestScripts/cryptest.sh @@ -250,15 +250,15 @@ if [[ ("$SUNCC_510_OR_ABOVE" -ne "0") ]]; then HAVE_OFAST=0 fi -if [[ (-z "$TMP") ]]; then +if [[ (-z "$TMPDIR") ]]; then if [[ (-d "/tmp") ]]; then - TMP=/tmp + TMPDIR=/tmp elif [[ (-d "/temp") ]]; then - TMP=/temp + TMPDIR=/temp elif [[ (-d "$HOME/tmp") ]]; then - TMP="$HOME/tmp" + TMPDIR="$HOME/tmp" else - echo "Please set TMP to a valid directory" + echo "Please set TMPDIR to a valid directory" [[ "$0" = "$BASH_SOURCE" ]] && exit 1 || return 1 fi fi @@ -267,74 +267,74 @@ fi rm -f adhoc.cpp > /dev/null 2>&1 cp adhoc.cpp.proto adhoc.cpp -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_CXX17") ]]; then HAVE_CXX17=0 - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++17 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++17 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_CXX17=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_GNU17") ]]; then HAVE_GNU17=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++17 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++17 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_GNU17=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_CXX14") ]]; then HAVE_CXX14=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++14 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++14 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_CXX14=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_GNU14") ]]; then HAVE_GNU14=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++14 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++14 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_GNU14=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_CXX11") ]]; then HAVE_CXX11=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++11 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++11 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_CXX11=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_GNU11") ]]; then HAVE_GNU11=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++11 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++11 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_GNU11=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_CXX03") ]]; then HAVE_CXX03=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++03 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++03 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_CXX03=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_GNU03") ]]; then HAVE_GNU03=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++03 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++03 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_GNU03=1 fi @@ -342,13 +342,13 @@ fi # Use a fallback strategy so OPT_O0 can be used with DEBUG_CXXFLAGS OPT_O0= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -O0 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -O0 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O0=-O0 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO0 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO0 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O0=-xO0 fi @@ -356,13 +356,13 @@ fi # Use a fallback strategy so OPT_O1 can be used with VALGRIND_CXXFLAGS OPT_O1= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -O1 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -O1 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O1=-O1 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO1 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO1 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O1=-xO1 fi @@ -370,13 +370,13 @@ fi # Use a fallback strategy so OPT_O2 can be used with RELEASE_CXXFLAGS OPT_O2= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -O2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -O2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O2=-O2 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O2=-xO2 fi @@ -385,14 +385,14 @@ fi if [[ (-z "$HAVE_O3") ]]; then HAVE_O3=0 OPT_O3= - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_O3=1 OPT_O3=-O3 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_O3=1 OPT_O3=-xO3 @@ -404,14 +404,14 @@ fi if [[ ( (-z "$HAVE_O5") && ("$CLANG_COMPILER" -eq "0") ) ]]; then HAVE_O5=0 OPT_O5= - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -O5 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -O5 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_O5=1 OPT_O5=-O5 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO5 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO5 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_O5=1 OPT_O5=-xO5 @@ -423,8 +423,8 @@ fi if [[ (-z "$HAVE_OS") ]]; then HAVE_OS=0 OPT_OS= - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -Os adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -Os adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_OS=1 OPT_OS=-Os @@ -435,8 +435,8 @@ fi if [[ (-z "$HAVE_OFAST") ]]; then HAVE_OFAST=0 OPT_OFAST= - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -Ofast adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -Ofast adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_OFAST=1 OPT_OFAST=-Ofast @@ -445,13 +445,13 @@ fi # Use a fallback strategy so OPT_G2 can be used with RELEASE_CXXFLAGS OPT_G2= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -g2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -g2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_G2=-g2 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_G2=-g fi @@ -459,13 +459,13 @@ fi # Use a fallback strategy so OPT_G3 can be used with DEBUG_CXXFLAGS OPT_G3= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -g3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -g3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_G3=-g3 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_G3=-g fi @@ -473,10 +473,10 @@ fi # Cygwin and noisy compiles OPT_PIC= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_PIC") ]]; then HAVE_PIC=0 - PIC_PROBLEMS=$("$CXX" -DCRYPTOPP_ADHOC_MAIN -fPIC adhoc.cpp -o "$TMP/adhoc.exe" 2>&1 | "$EGREP" -ic '(warning|error)') + PIC_PROBLEMS=$("$CXX" -DCRYPTOPP_ADHOC_MAIN -fPIC adhoc.cpp -o "$TMPDIR/adhoc.exe" 2>&1 | "$EGREP" -ic '(warning|error)') if [[ "$PIC_PROBLEMS" -eq "0" ]]; then HAVE_PIC=1 OPT_PIC=-fPIC @@ -484,12 +484,12 @@ if [[ (-z "$HAVE_PIC") ]]; then fi # GCC 4.8; Clang 3.4 -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_UBSAN") ]]; then HAVE_UBSAN=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=undefined adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=undefined adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then - "$TMP/adhoc.exe" > /dev/null 2>&1 + "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_UBSAN=1 fi @@ -497,12 +497,12 @@ if [[ (-z "$HAVE_UBSAN") ]]; then fi # GCC 4.8; Clang 3.4 -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_ASAN") ]]; then HAVE_ASAN=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=address adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=address adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then - "$TMP/adhoc.exe" > /dev/null 2>&1 + "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_ASAN=1 fi @@ -510,41 +510,41 @@ if [[ (-z "$HAVE_ASAN") ]]; then fi # GCC 6.0; maybe Clang -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_BSAN") ]]; then HAVE_BSAN=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=bounds-strict adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=bounds-strict adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then - "$TMP/adhoc.exe" > /dev/null 2>&1 + "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_BSAN=1 fi fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_OMP") ]]; then HAVE_OMP=0 if [[ "$GCC_COMPILER" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_OMP=1 OMP_FLAGS=(-fopenmp -O3) fi elif [[ "$INTEL_COMPILER" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -openmp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -openmp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_OMP=1 OMP_FLAGS=(-openmp -O3) fi elif [[ "$CLANG_COMPILER" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp=libomp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp=libomp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_OMP=1 OMP_FLAGS=(-fopenmp=libomp -O3) fi elif [[ "$SUN_COMPILER" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xopenmp=parallel -xO3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xopenmp=parallel -xO3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_OMP=1 OMP_FLAGS=(-xopenmp=parallel -xO3) @@ -552,33 +552,33 @@ if [[ (-z "$HAVE_OMP") ]]; then fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_INTEL_MULTIARCH") ]]; then HAVE_INTEL_MULTIARCH=0 if [[ ("$IS_DARWIN" -ne "0") && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0") ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_INTEL_MULTIARCH=1 fi fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_PPC_MULTIARCH") ]]; then HAVE_PPC_MULTIARCH=0 if [[ ("$IS_DARWIN" -ne "0") && ("$IS_PPC" -ne "0") ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch ppc -arch ppc64 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch ppc -arch ppc64 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_PPC_MULTIARCH=1 fi fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_X32") ]]; then HAVE_X32=0 if [[ "$IS_X32" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -mx32 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -mx32 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_X32=1 fi @@ -588,8 +588,8 @@ fi # Hit or miss, mostly hit if [[ (-z "$HAVE_NATIVE_ARCH") ]]; then HAVE_NATIVE_ARCH=0 - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=native adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=native adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_NATIVE_ARCH=1 fi @@ -603,7 +603,7 @@ if [[ (-z "$HAVE_LDGOLD") ]]; then if [[ (! -z "$LD_GOLD") && (! -z "$ELF_FILE") ]]; then LD_GOLD=$(file "$LD_GOLD" | cut -d":" -f 2 | "$EGREP" -i -c "elf") if [[ ("$LD_GOLD" -ne "0") ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fuse-ld=gold adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fuse-ld=gold adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_LDGOLD=1 fi @@ -688,10 +688,10 @@ fi # Used to disassemble object modules so we can verify some aspects of code generation if [[ (-z "$HAVE_DISASS") ]]; then - echo "int main(int argc, char* argv[]) {return 0;}" > "$TMP/test.cc" - "$CXX" "$TMP/test.cc" -o "$TMP/test.exe" > /dev/null 2>&1 + echo "int main(int argc, char* argv[]) {return 0;}" > "$TMPDIR/test.cc" + "$CXX" "$TMPDIR/test.cc" -o "$TMPDIR/test.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then - "$DISASS" "${DISASSARGS[@]}" "$TMP/test.exe" > /dev/null 2>&1 + "$DISASS" "${DISASSARGS[@]}" "$TMPDIR/test.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_DISASS=1 else @@ -1167,7 +1167,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t echo OBJFILE=sha.o; rm -f "$OBJFILE" 2>/dev/null - CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" + CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" DISASS_TEXT=$("$DISASS" "${DISASSARGS[@]}" "$OBJFILE" 2>/dev/null) @@ -1201,7 +1201,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # Test CRC-32C code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -msse4.2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -msse4.2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_CRC32=1 fi @@ -1239,7 +1239,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # Test AES-NI code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_AESNI=1 fi @@ -1251,7 +1251,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t echo OBJFILE=rijndael.o; rm -f "$OBJFILE" 2>/dev/null - CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" + CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" COUNT=0 FAILED=0 @@ -1301,7 +1301,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # X86 carryless multiply code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -mpclmul adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -mpclmul adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_PCLMUL=1 fi @@ -1312,8 +1312,8 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t echo "Testing: X86 carryless multiply code generation" | tee -a "$TEST_RESULTS" echo - OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null - CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" + OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null + CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" COUNT=0 FAILED=0 @@ -1339,11 +1339,11 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # Test RDRAND and RDSEED code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdrnd adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdrnd adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_RDRAND=1 fi - "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdseed adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdseed adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_RDSEED=1 fi @@ -1355,7 +1355,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t echo OBJFILE=rdrand.o; rm -f "$OBJFILE" 2>/dev/null - CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" + CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" COUNT=0 FAILED=0 @@ -1385,7 +1385,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # X86 SHA code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -msha adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -msha adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_SHA=1 fi @@ -1397,7 +1397,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t echo OBJFILE=sha-simd.o; rm -f "$OBJFILE" 2>/dev/null - CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" + CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" COUNT=0 FAILED=0 @@ -1465,7 +1465,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ] echo "Testing: ARM NEON code generation" | tee -a "$TEST_RESULTS" echo - OBJFILE=aria.o; rm -f "$OBJFILE" 2>/dev/null + OBJFILE=aria-simd.o; rm -f "$OBJFILE" 2>/dev/null CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" COUNT=0 @@ -1515,7 +1515,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ] ############################################ # ARM CRC32 code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crc adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crc adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ARM_CRC32=1 fi @@ -1565,7 +1565,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ] ############################################ # ARM carryless multiply code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ARM_PMULL=1 fi @@ -1576,7 +1576,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ] echo "Testing: ARM carryless multiply code generation" | tee -a "$TEST_RESULTS" echo - OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null + OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" COUNT=0 @@ -1603,7 +1603,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ] ############################################ # ARM SHA code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ARM_SHA=1 fi @@ -5098,7 +5098,7 @@ fi if [[ ("$CLANG_COMPILER" -eq "0") ]]; then CLANG_CXX=$(which clang++ 2>&1 | "$GREP" -v "no clang++" | head -1) - "$CLANG_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CLANG_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5133,7 +5133,7 @@ fi if [[ ("$GCC_COMPILER" -eq "0") ]]; then GCC_CXX=$(which g++ 2>&1 | "$GREP" -v "no g++" | head -1) - "$GCC_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$GCC_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5171,7 +5171,7 @@ if [[ ("$INTEL_COMPILER" -eq "0") ]]; then if [[ (-z "$INTEL_CXX") ]]; then INTEL_CXX=$(find /opt/intel -name icpc 2>/dev/null | "$GREP" -iv composer | head -1) fi - "$INTEL_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$INTEL_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5207,7 +5207,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-4*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5240,7 +5240,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-5*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5273,7 +5273,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-6*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5306,7 +5306,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-7*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5339,7 +5339,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.7*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5371,7 +5371,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.8*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5403,7 +5403,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.9*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5435,7 +5435,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-4*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ diff --git a/config.h b/config.h index 9b02fece..d6af3c9d 100644 --- a/config.h +++ b/config.h @@ -517,7 +517,7 @@ NAMESPACE_END // Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under most toolchains. #if !defined(CRYPTOPP_ARM_NEON_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) -# if defined(__ARM_NEON__) || defined(__ARM_NEON) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) || \ +# if defined(__ARM_NEON__) || defined(__ARM_FEATURE_NEON) || (CRYPTOPP_MSC_VER >= 1900) || \ (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500) # define CRYPTOPP_ARM_NEON_AVAILABLE 1 # endif @@ -528,9 +528,8 @@ NAMESPACE_END // Microsoft plans to support ARM-64, but its not clear how to detect it. // TODO: Add MSC_VER and ARM-64 platform define when available #if !defined(CRYPTOPP_ARMV8A_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) -# if defined(__ARM_FEATURE_CRC32) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \ - (defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \ - (defined(__AARCH32EL__) || defined(__AARCH64EL__)) +# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VER >= 2000) || \ + (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500) # define CRYPTOPP_ARMV8A_CRC32_AVAILABLE 1 # endif #endif @@ -540,9 +539,8 @@ NAMESPACE_END // it at the moment. Microsoft plans to support ARM-64, but its not clear how to detect it. // TODO: Add MSC_VER and ARM-64 platform define when available #if !defined(CRYPTOPP_ARMV8A_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__apple_build_version__) -# if defined(__ARM_FEATURE_CRYPTO) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \ - (defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \ - (defined(__AARCH32EL__) || defined(__AARCH64EL__)) +# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VER >= 2000) || \ + (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500) # define CRYPTOPP_ARMV8A_PMULL_AVAILABLE 1 # endif #endif @@ -552,19 +550,17 @@ NAMESPACE_END // Microsoft plans to support ARM-64, but its not clear how to detect it. // TODO: Add MSC_VER and ARM-64 platform define when available #if !defined(CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) -# if defined(__ARM_FEATURE_CRYPTO) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \ - (defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \ - (defined(__AARCH32EL__) || defined(__AARCH64EL__)) +# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VER >= 2000) || \ + (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500) # define CRYPTOPP_ARMV8A_AES_AVAILABLE 1 +# define CRYPTOPP_ARMV8A_PMULL_AVAILABLE 1 # define CRYPTOPP_ARMV8A_SHA_AVAILABLE 1 # define CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE 1 # endif #endif -// ARM CRC testing +// TODO... #undef CRYPTOPP_ARMV8A_AES_AVAILABLE -#undef CRYPTOPP_ARMV8A_PMULL_AVAILABLE -#undef CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE #endif // ARM32, ARM64 diff --git a/cpu.cpp b/cpu.cpp index 91706867..b9e2ea9b 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -352,36 +352,9 @@ extern "C" }; #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY -static bool TryNEON() -{ -#if (CRYPTOPP_ARM_NEON_AVAILABLE) - return CPU_TryNEON_ARM(); -#else - return false; -#endif -} - -static bool TryCRC32() -{ -#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE) - return CPU_TryCRC32_ARMV8(); -#else - return false; -#endif -} - -static bool TryPMULL() -{ -#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE) - return CPU_TryPMULL_ARMV8(); -#else - return false; -#endif -} - static bool TryAES() { -#if (CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE) +#if (CRYPTOPP_ARMV8A_AES_AVAILABLE) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) volatile bool result = true; __try @@ -432,32 +405,14 @@ static bool TryAES() #endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE } -static bool TrySHA1() -{ -#if (CRYPTOPP_ARMV8A_SHA_AVAILABLE) - return CPU_TrySHA1_ARMV8(); -#else - return false; -#endif -} - -static bool TrySHA2() -{ -#if (CRYPTOPP_ARMV8A_SHA_AVAILABLE) - return CPU_TrySHA2_ARMV8(); -#else - return false; -#endif -} - void DetectArmFeatures() { - g_hasNEON = TryNEON(); - g_hasPMULL = TryPMULL(); - g_hasCRC32 = TryCRC32(); - g_hasAES = TryAES(); - g_hasSHA1 = TrySHA1(); - g_hasSHA2 = TrySHA2(); + g_hasNEON = CPU_TryNEON_ARM(); + g_hasPMULL = CPU_TryPMULL_ARMV8(); + g_hasCRC32 = CPU_TryCRC32_ARMV8(); + g_hasAES = TryAES(); // TODO + g_hasSHA1 = CPU_TrySHA1_ARMV8(); + g_hasSHA2 = CPU_TrySHA2_ARMV8(); g_ArmDetectionDone = true; } diff --git a/cryptest.sh b/cryptest.sh index e2aa8714..55c07170 100755 --- a/cryptest.sh +++ b/cryptest.sh @@ -1312,7 +1312,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t echo "Testing: X86 carryless multiply code generation" | tee -a "$TEST_RESULTS" echo - OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null + OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" COUNT=0 @@ -1576,7 +1576,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ] echo "Testing: ARM carryless multiply code generation" | tee -a "$TEST_RESULTS" echo - OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null + OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" COUNT=0 diff --git a/gcm-simd.cpp b/gcm-simd.cpp index e8081001..3e57fc23 100644 --- a/gcm-simd.cpp +++ b/gcm-simd.cpp @@ -26,6 +26,121 @@ # include #endif +ANONYMOUS_NAMESPACE_BEGIN + +// GCC 4.8 and 4.9 are missing PMULL gear +#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE) +# if (CRYPTOPP_GCC_VERSION >= 40800) && (CRYPTOPP_GCC_VERSION < 50000) +inline poly128_t VMULL_P64(poly64_t a, poly64_t b) +{ + return __builtin_aarch64_crypto_pmulldi_ppp (a, b); +} + +inline poly128_t VMULL_HIGH_P64(poly64x2_t a, poly64x2_t b) +{ + return __builtin_aarch64_crypto_pmullv2di_ppp (a, b); +} +# endif +#endif + +#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_ARMV8A_PMULL_AVAILABLE +#if defined(__GNUC__) +// Schneiders, Hovsmith and O'Rourke used this trick. +// It results in much better code generation in production code +// by avoiding D-register spills when using vgetq_lane_u64. The +// problem does not surface under minimal test cases. +inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b) +{ + uint64x2_t r; + __asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t" + :"=w" (r) : "w" (a), "w" (b) ); + return r; +} + +inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b) +{ + uint64x2_t r; + __asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t" + :"=w" (r) : "w" (a), "w" (vget_high_u64(b)) ); + return r; +} + +inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b) +{ + uint64x2_t r; + __asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t" + :"=w" (r) : "w" (vget_high_u64(a)), "w" (b) ); + return r; +} + +inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b) +{ + uint64x2_t r; + __asm __volatile("pmull2 %0.1q, %1.2d, %2.2d \n\t" + :"=w" (r) : "w" (a), "w" (b) ); + return r; +} + +inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c) +{ + uint64x2_t r; + __asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t" + :"=w" (r) : "w" (a), "w" (b), "I" (c) ); + return r; +} + +// https://github.com/weidai11/cryptopp/issues/366 +template +inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b) +{ + uint64x2_t r; + __asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t" + :"=w" (r) : "w" (a), "w" (b), "I" (C) ); + return r; +} +#endif // GCC and compatibles + +#if defined(_MSC_VER) +inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b) +{ + return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0), + vgetq_lane_u64(vreinterpretq_u64_u8(b),0))); +} + +inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b) +{ + return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0), + vgetq_lane_u64(vreinterpretq_u64_u8(b),1))); +} + +inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b) +{ + return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1), + vgetq_lane_u64(vreinterpretq_u64_u8(b),0))); +} + +inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b) +{ + return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1), + vgetq_lane_u64(vreinterpretq_u64_u8(b),1))); +} + +inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c) +{ + return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), c); +} + +// https://github.com/weidai11/cryptopp/issues/366 +template +inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b) +{ + return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C); +} +#endif // Microsoft and compatibles +#endif // CRYPTOPP_ARMV8A_PMULL_AVAILABLE + +ANONYMOUS_NAMESPACE_END + NAMESPACE_BEGIN(CryptoPP) #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY @@ -71,7 +186,7 @@ bool CPU_TryPMULL_ARMV8() // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; - volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL); + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); if (oldHandler == SIG_ERR) return false; @@ -79,7 +194,7 @@ bool CPU_TryPMULL_ARMV8() if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) return false; - if (setjmp(s_jmpNoPMULL)) + if (setjmp(s_jmpSIGILL)) result = false; else { @@ -87,8 +202,8 @@ bool CPU_TryPMULL_ARMV8() const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0}, b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0}; - const poly128_t r1 = vmull_p64(a1, b1); - const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2)); + const poly128_t r1 = VMULL_P64(a1, b1); + const poly128_t r2 = VMULL_HIGH_P64((poly64x2_t)(a2), (poly64x2_t)(b2)); // Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233. const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum} @@ -115,4 +230,116 @@ void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c) } #endif +#if CRYPTOPP_ARMV8A_PMULL_AVAILABLE + +ANONYMOUS_NAMESPACE_BEGIN + +CRYPTOPP_ALIGN_DATA(16) +const word64 s_clmulConstants64[] = { + W64LIT(0xe100000000000000), W64LIT(0xc200000000000000), // Used for ARM and x86; polynomial coefficients + W64LIT(0x08090a0b0c0d0e0f), W64LIT(0x0001020304050607), // Unused for ARM; used for x86 _mm_shuffle_epi8 + W64LIT(0x0001020304050607), W64LIT(0x08090a0b0c0d0e0f) // Unused for ARM; used for x86 _mm_shuffle_epi8 +}; + +const uint64x2_t *s_clmulConstants = (const uint64x2_t *)s_clmulConstants64; +const unsigned int s_clmulTableSizeInBlocks = 8; + +ANONYMOUS_NAMESPACE_END + +uint64x2_t GCM_Reduce_ARMV8A(uint64x2_t c0, uint64x2_t c1, uint64x2_t c2, const uint64x2_t &r) +{ + c1 = veorq_u64(c1, VEXT_U8<8>(vdupq_n_u64(0), c0)); + c1 = veorq_u64(c1, PMULL_01(c0, r)); + c0 = VEXT_U8<8>(c0, vdupq_n_u64(0)); + c0 = vshlq_n_u64(veorq_u64(c0, c1), 1); + c0 = PMULL_00(c0, r); + c2 = veorq_u64(c2, c0); + c2 = veorq_u64(c2, VEXT_U8<8>(c1, vdupq_n_u64(0))); + c1 = vshrq_n_u64(vcombine_u64(vget_low_u64(c1), vget_low_u64(c2)), 63); + c2 = vshlq_n_u64(c2, 1); + + return veorq_u64(c2, c1); +} + +uint64x2_t GCM_Multiply_ARMV8A(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r) +{ + const uint64x2_t c0 = PMULL_00(x, h); + const uint64x2_t c1 = veorq_u64(PMULL_10(x, h), PMULL_01(x, h)); + const uint64x2_t c2 = PMULL_11(x, h); + + return GCM_Reduce_ARMV8A(c0, c1, c2, r); +} + +size_t GCM_AuthenticateBlocks_ARMV8(const byte *data, size_t len, const byte *mtable, byte *hbuffer) +{ + const uint64x2_t* table = reinterpret_cast(mtable); + uint64x2_t x = vreinterpretq_u64_u8(vld1q_u8(hbuffer)); + const uint64x2_t r = s_clmulConstants[0]; + + const size_t BLOCKSIZE = 16; + while (len >= BLOCKSIZE) + { + size_t s = UnsignedMin(len/BLOCKSIZE, s_clmulTableSizeInBlocks), i=0; + uint64x2_t d1, d2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-1)*BLOCKSIZE))); + uint64x2_t c0 = vdupq_n_u64(0); + uint64x2_t c1 = vdupq_n_u64(0); + uint64x2_t c2 = vdupq_n_u64(0); + + while (true) + { + const uint64x2_t h0 = vld1q_u64((const uint64_t*)(table+i)); + const uint64x2_t h1 = vld1q_u64((const uint64_t*)(table+i+1)); + const uint64x2_t h2 = veorq_u64(h0, h1); + + if (++i == s) + { + const uint64x2_t t1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data))); + d1 = veorq_u64(vextq_u64(t1, t1, 1), x); + c0 = veorq_u64(c0, PMULL_00(d1, h0)); + c2 = veorq_u64(c2, PMULL_10(d1, h1)); + d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)), + vget_low_u32(vreinterpretq_u32_u64(d1)))); + c1 = veorq_u64(c1, PMULL_00(d1, h2)); + + break; + } + + d1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8))); + c0 = veorq_u64(c0, PMULL_10(d2, h0)); + c2 = veorq_u64(c2, PMULL_10(d1, h1)); + d2 = veorq_u64(d2, d1); + c1 = veorq_u64(c1, PMULL_10(d2, h2)); + + if (++i == s) + { + const uint64x2_t t2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data))); + d1 = veorq_u64(vextq_u64(t2, t2, 1), x); + c0 = veorq_u64(c0, PMULL_01(d1, h0)); + c2 = veorq_u64(c2, PMULL_11(d1, h1)); + d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)), + vget_low_u32(vreinterpretq_u32_u64(d1)))); + c1 = veorq_u64(c1, PMULL_01(d1, h2)); + + break; + } + + const uint64x2_t t3 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8))); + d2 = vextq_u64(t3, t3, 1); + c0 = veorq_u64(c0, PMULL_01(d1, h0)); + c2 = veorq_u64(c2, PMULL_01(d2, h1)); + d1 = veorq_u64(d1, d2); + c1 = veorq_u64(c1, PMULL_01(d1, h2)); + } + data += s*16; + len -= s*16; + + c1 = veorq_u64(veorq_u64(c1, c0), c2); + x = GCM_Reduce_ARMV8A(c0, c1, c2, r); + } + + vst1q_u64(reinterpret_cast(hbuffer), x); + return len; +} +#endif + NAMESPACE_END \ No newline at end of file diff --git a/gcm.cpp b/gcm.cpp index 2c306dd0..da1ea1f4 100644 --- a/gcm.cpp +++ b/gcm.cpp @@ -53,102 +53,6 @@ NAMESPACE_BEGIN(CryptoPP) extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c); #endif -#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_ARMV8A_PMULL_AVAILABLE -#if defined(__GNUC__) -// Schneiders, Hovsmith and O'Rourke used this trick. -// It results in much better code generation in production code -// by avoiding D-register spills when using vgetq_lane_u64. The -// problem does not surface under minimal test cases. -inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b) -{ - uint64x2_t r; - __asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t" - :"=w" (r) : "w" (a), "w" (b) ); - return r; -} - -inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b) -{ - uint64x2_t r; - __asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t" - :"=w" (r) : "w" (a), "w" (vget_high_u64(b)) ); - return r; -} - -inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b) -{ - uint64x2_t r; - __asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t" - :"=w" (r) : "w" (vget_high_u64(a)), "w" (b) ); - return r; -} - -inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b) -{ - uint64x2_t r; - __asm __volatile("pmull2 %0.1q, %1.2d, %2.2d \n\t" - :"=w" (r) : "w" (a), "w" (b) ); - return r; -} - -inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c) -{ - uint64x2_t r; - __asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t" - :"=w" (r) : "w" (a), "w" (b), "I" (c) ); - return r; -} - -// https://github.com/weidai11/cryptopp/issues/366 -template -inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b) -{ - uint64x2_t r; - __asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t" - :"=w" (r) : "w" (a), "w" (b), "I" (C) ); - return r; -} -#endif // GCC and compatibles - -#if defined(_MSC_VER) -inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b) -{ - return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0), - vgetq_lane_u64(vreinterpretq_u64_u8(b),0))); -} - -inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b) -{ - return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0), - vgetq_lane_u64(vreinterpretq_u64_u8(b),1))); -} - -inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b) -{ - return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1), - vgetq_lane_u64(vreinterpretq_u64_u8(b),0))); -} - -inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b) -{ - return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1), - vgetq_lane_u64(vreinterpretq_u64_u8(b),1))); -} - -inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c) -{ - return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), c); -} - -// https://github.com/weidai11/cryptopp/issues/366 -template -inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b) -{ - return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C); -} -#endif // Microsoft and compatibles -#endif // CRYPTOPP_ARMV8A_PMULL_AVAILABLE - word16 GCM_Base::s_reductionTable[256]; volatile bool GCM_Base::s_reductionTableInitialized = false; @@ -278,6 +182,9 @@ inline __m128i CLMUL_GF_Mul(const __m128i &x, const __m128i &h, const __m128i &r #if CRYPTOPP_ARMV8A_PMULL_AVAILABLE +extern size_t GCM_AuthenticateBlocks_ARMV8(const byte *data, size_t len, const byte *mtable, byte *hbuffer); +extern uint64x2_t GCM_Multiply_ARMV8A(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r); + CRYPTOPP_ALIGN_DATA(16) static const word64 s_clmulConstants64[] = { W64LIT(0xe100000000000000), W64LIT(0xc200000000000000), // Used for ARM and x86; polynomial coefficients @@ -287,31 +194,6 @@ static const word64 s_clmulConstants64[] = { static const uint64x2_t *s_clmulConstants = (const uint64x2_t *)s_clmulConstants64; static const unsigned int s_clmulTableSizeInBlocks = 8; - -inline uint64x2_t PMULL_Reduce(uint64x2_t c0, uint64x2_t c1, uint64x2_t c2, const uint64x2_t &r) -{ - // See comments fo CLMUL_Reduce - c1 = veorq_u64(c1, VEXT_U8<8>(vdupq_n_u64(0), c0)); - c1 = veorq_u64(c1, PMULL_01(c0, r)); - c0 = VEXT_U8<8>(c0, vdupq_n_u64(0)); - c0 = vshlq_n_u64(veorq_u64(c0, c1), 1); - c0 = PMULL_00(c0, r); - c2 = veorq_u64(c2, c0); - c2 = veorq_u64(c2, VEXT_U8<8>(c1, vdupq_n_u64(0))); - c1 = vshrq_n_u64(vcombine_u64(vget_low_u64(c1), vget_low_u64(c2)), 63); - c2 = vshlq_n_u64(c2, 1); - - return veorq_u64(c2, c1); -} - -inline uint64x2_t PMULL_GF_Mul(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r) -{ - const uint64x2_t c0 = PMULL_00(x, h); - const uint64x2_t c1 = veorq_u64(PMULL_10(x, h), PMULL_01(x, h)); - const uint64x2_t c2 = PMULL_11(x, h); - - return PMULL_Reduce(c0, c1, c2, r); -} #endif void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const NameValuePairs ¶ms) @@ -388,15 +270,15 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const uint64x2_t h = h0; for (i=0; i= 16) - { - size_t s = UnsignedMin(len/16, s_clmulTableSizeInBlocks), i=0; - uint64x2_t d1, d2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-1)*16))); - uint64x2_t c0 = vdupq_n_u64(0); - uint64x2_t c1 = vdupq_n_u64(0); - uint64x2_t c2 = vdupq_n_u64(0); - - while (true) - { - const uint64x2_t h0 = vld1q_u64((const uint64_t*)(table+i)); - const uint64x2_t h1 = vld1q_u64((const uint64_t*)(table+i+1)); - const uint64x2_t h2 = veorq_u64(h0, h1); - - if (++i == s) - { - const uint64x2_t t1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data))); - d1 = veorq_u64(vextq_u64(t1, t1, 1), x); - c0 = veorq_u64(c0, PMULL_00(d1, h0)); - c2 = veorq_u64(c2, PMULL_10(d1, h1)); - d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)), - vget_low_u32(vreinterpretq_u32_u64(d1)))); - c1 = veorq_u64(c1, PMULL_00(d1, h2)); - - break; - } - - d1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8))); - c0 = veorq_u64(c0, PMULL_10(d2, h0)); - c2 = veorq_u64(c2, PMULL_10(d1, h1)); - d2 = veorq_u64(d2, d1); - c1 = veorq_u64(c1, PMULL_10(d2, h2)); - - if (++i == s) - { - const uint64x2_t t2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data))); - d1 = veorq_u64(vextq_u64(t2, t2, 1), x); - c0 = veorq_u64(c0, PMULL_01(d1, h0)); - c2 = veorq_u64(c2, PMULL_11(d1, h1)); - d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)), - vget_low_u32(vreinterpretq_u32_u64(d1)))); - c1 = veorq_u64(c1, PMULL_01(d1, h2)); - - break; - } - - const uint64x2_t t3 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8))); - d2 = vextq_u64(t3, t3, 1); - c0 = veorq_u64(c0, PMULL_01(d1, h0)); - c2 = veorq_u64(c2, PMULL_01(d2, h1)); - d1 = veorq_u64(d1, d2); - c1 = veorq_u64(c1, PMULL_01(d1, h2)); - } - data += s*16; - len -= s*16; - - c1 = veorq_u64(veorq_u64(c1, c0), c2); - x = PMULL_Reduce(c0, c1, c2, r); - } - - vst1q_u64((uint64_t *)HashBuffer(), x); - return len; -} + return GCM_AuthenticateBlocks_ARMV8(data, len, MulTable(), HashBuffer()); + } #endif typedef BlockGetAndPut Block;