Cleaned up ARM related defines, like CRYPTOPP_ARM_NEON_AVAILABLE

We only need to base it on the compiler in config.h. config.h activates the code path guarded by HasNEON(). The source file that actially provides the NEON implementation will be compiled with -fpu=neon or -march=armv8-a.
Since we are providing the specialized implementation in a sequestered source file (and not a header file), we can probably avoid the defines like CRYPTOPP_ARM_NEON_AVAILABLE altogether.
pull/461/head
Jeffrey Walton 2017-07-30 19:14:47 -04:00
parent b4f6882237
commit 6169b5d4d6
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
7 changed files with 391 additions and 393 deletions

View File

@ -202,6 +202,7 @@ endif # CXXFLAGS
ifeq ($(findstring -DCRYPTOPP_DISABLE_SSE4,$(CXXFLAGS)),) ifeq ($(findstring -DCRYPTOPP_DISABLE_SSE4,$(CXXFLAGS)),)
SSE42_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -msse4.2 -dM -E - | grep -i -c -q __SSE4_2__ && echo "-msse4.2") SSE42_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -msse4.2 -dM -E - | grep -i -c -q __SSE4_2__ && echo "-msse4.2")
ifeq ($(findstring -DCRYPTOPP_DISABLE_AESNI,$(CXXFLAGS)),) ifeq ($(findstring -DCRYPTOPP_DISABLE_AESNI,$(CXXFLAGS)),)
GCM_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -mpclmul -dM -E - | grep -i -c -q __PCLMUL__ && echo "-mpclmul")
AES_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -maes -dM -E - | grep -i -c -q __AES__ && echo "-maes") AES_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -maes -dM -E - | grep -i -c -q __AES__ && echo "-maes")
ifeq ($(findstring -DCRYPTOPP_DISABLE_SHA,$(CXXFLAGS)),) ifeq ($(findstring -DCRYPTOPP_DISABLE_SHA,$(CXXFLAGS)),)
SHA_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -msse4.2 -msha -dM -E - | grep -i -c -q __SHA__ && echo "-msse4.2 -msha") SHA_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -msse4.2 -msha -dM -E - | grep -i -c -q __SHA__ && echo "-msse4.2 -msha")
@ -308,14 +309,16 @@ ifeq ($(IS_NEON),1)
endif endif
ifeq ($(IS_ARMV8),1) ifeq ($(IS_ARMV8),1)
ARMV8A_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv8-a") ARMV8A_NEON_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv8-a")
CRC_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crc -dM -E - | grep -i -c -q __ARM_FEATURE_CRC32 && echo "-march=armv8-a+crc") ARMV8A_CRC_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crc -dM -E - | grep -i -c -q __ARM_FEATURE_CRC32 && echo "-march=armv8-a+crc")
AES_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crypto -dM -E - | grep -i -c -q __ARM_FEATURE_CRYPTO && echo "-march=armv8-a+crypto") ARMV8A_CRYPTO_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crypto -dM -E - | grep -i -c -q __ARM_FEATURE_CRYPTO && echo "-march=armv8-a+crypto")
SHA_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crypto -dM -E - | grep -i -c -q __ARM_FEATURE_CRYPTO && echo "-march=armv8-a+crypto") CRC_FLAG = $(ARMV8A_CRC_FLAG)
GCM_FLAG = $(ARMV8A_FLAG) AES_FLAG = $(ARMV8A_CRYPTO_FLAG)
ARIA_FLAG = $(ARMV8A_FLAG) GCM_FLAG = $(ARMV8A_CRYPTO_FLAG)
BLAKE2_FLAG = $(ARMV8A_FLAG) SHA_FLAG = $(ARMV8A_CRYPTO_FLAG)
NEON_FLAG = $(ARMV8A_FLAG) ARIA_FLAG = $(ARMV8A_NEON_FLAG)
BLAKE2_FLAG = $(ARMV8A_NEON_FLAG)
NEON_FLAG = $(ARMV8A_NEON_FLAG)
endif endif
endif # IS_X86 endif # IS_X86
@ -851,7 +854,7 @@ endif # Dependencies
# Run rdrand-nasm.sh to create the object files # Run rdrand-nasm.sh to create the object files
ifeq ($(USE_NASM),1) ifeq ($(USE_NASM),1)
rdrand.o: rdrand.h rdrand.cpp rdrand.s rdrand.o: rdrand.h rdrand.cpp rdrand.s
$(CXX) $(strip $(CXXFLAGS)) -DNASM_RDRAND_ASM_AVAILABLE=1 -DNASM_RDSEED_ASM_AVAILABLE=1 -c rdrand.cpp $(CXX) $(strip $(CXXFLAGS) -DNASM_RDRAND_ASM_AVAILABLE=1 -DNASM_RDSEED_ASM_AVAILABLE=1 -c rdrand.cpp)
rdrand-%.o: rdrand-%.o:
./rdrand-nasm.sh ./rdrand-nasm.sh
endif endif
@ -884,49 +887,49 @@ sha-simd.o : sha-simd.cpp
# Also see https://stackoverflow.com/q/12983137/608639. # Also see https://stackoverflow.com/q/12983137/608639.
ifeq ($(findstring true,$(CI)),true) ifeq ($(findstring true,$(CI)),true)
threefish.o : threefish.cpp threefish.o : threefish.cpp
$(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS))) -c $< $(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS)) -c) $<
endif endif
# Don't build Rijndael with UBsan. Too much noise due to unaligned data accesses. # Don't build Rijndael with UBsan. Too much noise due to unaligned data accesses.
ifneq ($(findstring -fsanitize=undefined,$(CXXFLAGS)),) ifneq ($(findstring -fsanitize=undefined,$(CXXFLAGS)),)
rijndael.o : rijndael.cpp rijndael.o : rijndael.cpp
$(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS))) -c $< $(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS)) -c) $<
endif endif
# Don't build VMAC and friends with Asan. Too many false positives. # Don't build VMAC and friends with Asan. Too many false positives.
ifneq ($(findstring -fsanitize=address,$(CXXFLAGS)),) ifneq ($(findstring -fsanitize=address,$(CXXFLAGS)),)
vmac.o : vmac.cpp vmac.o : vmac.cpp
$(CXX) $(strip $(subst -fsanitize=address,,$(CXXFLAGS))) -c $< $(CXX) $(strip $(subst -fsanitize=address,,$(CXXFLAGS)) -c) $<
endif endif
# Only use CRYPTOPP_DATA_DIR if its not set in CXXFLAGS # Only use CRYPTOPP_DATA_DIR if its not set in CXXFLAGS
ifeq ($(findstring -DCRYPTOPP_DATA_DIR, $(strip $(CXXFLAGS))),) ifeq ($(findstring -DCRYPTOPP_DATA_DIR, $(strip $(CXXFLAGS))),)
ifneq ($(strip $(CRYPTOPP_DATA_DIR)),) ifneq ($(strip $(CRYPTOPP_DATA_DIR)),)
validat%.o : validat%.cpp validat%.o : validat%.cpp
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $< $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $<
bench%.o : bench%.cpp bench%.o : bench%.cpp
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $< $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $<
datatest.o : datatest.cpp datatest.o : datatest.cpp
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $< $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $<
test.o : test.cpp test.o : test.cpp
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $< $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $<
endif endif
endif endif
%.dllonly.o : %.cpp %.dllonly.o : %.cpp
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DLL_ONLY -c $< -o $@ $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DLL_ONLY -c) $< -o $@
%.import.o : %.cpp %.import.o : %.cpp
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_IMPORTS -c $< -o $@ $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_IMPORTS -c) $< -o $@
%.export.o : %.cpp %.export.o : %.cpp
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_EXPORTS -c $< -o $@ $(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_EXPORTS -c) $< -o $@
%.bc : %.cpp %.bc : %.cpp
$(CXX) $(strip $(CXXFLAGS)) -c $< $(CXX) $(strip $(CXXFLAGS) -c) $<
%.o : %.cpp %.o : %.cpp
$(CXX) $(strip $(CXXFLAGS)) -c $< $(CXX) $(strip $(CXXFLAGS) -c) $<
.PHONY: so_warning .PHONY: so_warning
so_warning: so_warning:

View File

@ -250,15 +250,15 @@ if [[ ("$SUNCC_510_OR_ABOVE" -ne "0") ]]; then
HAVE_OFAST=0 HAVE_OFAST=0
fi fi
if [[ (-z "$TMP") ]]; then if [[ (-z "$TMPDIR") ]]; then
if [[ (-d "/tmp") ]]; then if [[ (-d "/tmp") ]]; then
TMP=/tmp TMPDIR=/tmp
elif [[ (-d "/temp") ]]; then elif [[ (-d "/temp") ]]; then
TMP=/temp TMPDIR=/temp
elif [[ (-d "$HOME/tmp") ]]; then elif [[ (-d "$HOME/tmp") ]]; then
TMP="$HOME/tmp" TMPDIR="$HOME/tmp"
else else
echo "Please set TMP to a valid directory" echo "Please set TMPDIR to a valid directory"
[[ "$0" = "$BASH_SOURCE" ]] && exit 1 || return 1 [[ "$0" = "$BASH_SOURCE" ]] && exit 1 || return 1
fi fi
fi fi
@ -267,74 +267,74 @@ fi
rm -f adhoc.cpp > /dev/null 2>&1 rm -f adhoc.cpp > /dev/null 2>&1
cp adhoc.cpp.proto adhoc.cpp cp adhoc.cpp.proto adhoc.cpp
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_CXX17") ]]; then if [[ (-z "$HAVE_CXX17") ]]; then
HAVE_CXX17=0 HAVE_CXX17=0
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++17 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++17 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
HAVE_CXX17=1 HAVE_CXX17=1
fi fi
fi fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_GNU17") ]]; then if [[ (-z "$HAVE_GNU17") ]]; then
HAVE_GNU17=0 HAVE_GNU17=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++17 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++17 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
HAVE_GNU17=1 HAVE_GNU17=1
fi fi
fi fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_CXX14") ]]; then if [[ (-z "$HAVE_CXX14") ]]; then
HAVE_CXX14=0 HAVE_CXX14=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++14 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++14 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
HAVE_CXX14=1 HAVE_CXX14=1
fi fi
fi fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_GNU14") ]]; then if [[ (-z "$HAVE_GNU14") ]]; then
HAVE_GNU14=0 HAVE_GNU14=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++14 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++14 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
HAVE_GNU14=1 HAVE_GNU14=1
fi fi
fi fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_CXX11") ]]; then if [[ (-z "$HAVE_CXX11") ]]; then
HAVE_CXX11=0 HAVE_CXX11=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++11 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++11 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
HAVE_CXX11=1 HAVE_CXX11=1
fi fi
fi fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_GNU11") ]]; then if [[ (-z "$HAVE_GNU11") ]]; then
HAVE_GNU11=0 HAVE_GNU11=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++11 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++11 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
HAVE_GNU11=1 HAVE_GNU11=1
fi fi
fi fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_CXX03") ]]; then if [[ (-z "$HAVE_CXX03") ]]; then
HAVE_CXX03=0 HAVE_CXX03=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++03 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++03 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
HAVE_CXX03=1 HAVE_CXX03=1
fi fi
fi fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_GNU03") ]]; then if [[ (-z "$HAVE_GNU03") ]]; then
HAVE_GNU03=0 HAVE_GNU03=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++03 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++03 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
HAVE_GNU03=1 HAVE_GNU03=1
fi fi
@ -342,13 +342,13 @@ fi
# Use a fallback strategy so OPT_O0 can be used with DEBUG_CXXFLAGS # Use a fallback strategy so OPT_O0 can be used with DEBUG_CXXFLAGS
OPT_O0= OPT_O0=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O0 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -O0 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
OPT_O0=-O0 OPT_O0=-O0
else else
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO0 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO0 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
OPT_O0=-xO0 OPT_O0=-xO0
fi fi
@ -356,13 +356,13 @@ fi
# Use a fallback strategy so OPT_O1 can be used with VALGRIND_CXXFLAGS # Use a fallback strategy so OPT_O1 can be used with VALGRIND_CXXFLAGS
OPT_O1= OPT_O1=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O1 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -O1 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
OPT_O1=-O1 OPT_O1=-O1
else else
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO1 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO1 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
OPT_O1=-xO1 OPT_O1=-xO1
fi fi
@ -370,13 +370,13 @@ fi
# Use a fallback strategy so OPT_O2 can be used with RELEASE_CXXFLAGS # Use a fallback strategy so OPT_O2 can be used with RELEASE_CXXFLAGS
OPT_O2= OPT_O2=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -O2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
OPT_O2=-O2 OPT_O2=-O2
else else
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
OPT_O2=-xO2 OPT_O2=-xO2
fi fi
@ -385,14 +385,14 @@ fi
if [[ (-z "$HAVE_O3") ]]; then if [[ (-z "$HAVE_O3") ]]; then
HAVE_O3=0 HAVE_O3=0
OPT_O3= OPT_O3=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
HAVE_O3=1 HAVE_O3=1
OPT_O3=-O3 OPT_O3=-O3
else else
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
HAVE_O3=1 HAVE_O3=1
OPT_O3=-xO3 OPT_O3=-xO3
@ -404,14 +404,14 @@ fi
if [[ ( (-z "$HAVE_O5") && ("$CLANG_COMPILER" -eq "0") ) ]]; then if [[ ( (-z "$HAVE_O5") && ("$CLANG_COMPILER" -eq "0") ) ]]; then
HAVE_O5=0 HAVE_O5=0
OPT_O5= OPT_O5=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O5 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -O5 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
HAVE_O5=1 HAVE_O5=1
OPT_O5=-O5 OPT_O5=-O5
else else
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO5 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO5 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
HAVE_O5=1 HAVE_O5=1
OPT_O5=-xO5 OPT_O5=-xO5
@ -423,8 +423,8 @@ fi
if [[ (-z "$HAVE_OS") ]]; then if [[ (-z "$HAVE_OS") ]]; then
HAVE_OS=0 HAVE_OS=0
OPT_OS= OPT_OS=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -Os adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -Os adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
HAVE_OS=1 HAVE_OS=1
OPT_OS=-Os OPT_OS=-Os
@ -435,8 +435,8 @@ fi
if [[ (-z "$HAVE_OFAST") ]]; then if [[ (-z "$HAVE_OFAST") ]]; then
HAVE_OFAST=0 HAVE_OFAST=0
OPT_OFAST= OPT_OFAST=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -Ofast adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -Ofast adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
HAVE_OFAST=1 HAVE_OFAST=1
OPT_OFAST=-Ofast OPT_OFAST=-Ofast
@ -445,13 +445,13 @@ fi
# Use a fallback strategy so OPT_G2 can be used with RELEASE_CXXFLAGS # Use a fallback strategy so OPT_G2 can be used with RELEASE_CXXFLAGS
OPT_G2= OPT_G2=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -g2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
OPT_G2=-g2 OPT_G2=-g2
else else
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
OPT_G2=-g OPT_G2=-g
fi fi
@ -459,13 +459,13 @@ fi
# Use a fallback strategy so OPT_G3 can be used with DEBUG_CXXFLAGS # Use a fallback strategy so OPT_G3 can be used with DEBUG_CXXFLAGS
OPT_G3= OPT_G3=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -g3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
OPT_G3=-g3 OPT_G3=-g3
else else
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
OPT_G3=-g OPT_G3=-g
fi fi
@ -473,10 +473,10 @@ fi
# Cygwin and noisy compiles # Cygwin and noisy compiles
OPT_PIC= OPT_PIC=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_PIC") ]]; then if [[ (-z "$HAVE_PIC") ]]; then
HAVE_PIC=0 HAVE_PIC=0
PIC_PROBLEMS=$("$CXX" -DCRYPTOPP_ADHOC_MAIN -fPIC adhoc.cpp -o "$TMP/adhoc.exe" 2>&1 | "$EGREP" -ic '(warning|error)') PIC_PROBLEMS=$("$CXX" -DCRYPTOPP_ADHOC_MAIN -fPIC adhoc.cpp -o "$TMPDIR/adhoc.exe" 2>&1 | "$EGREP" -ic '(warning|error)')
if [[ "$PIC_PROBLEMS" -eq "0" ]]; then if [[ "$PIC_PROBLEMS" -eq "0" ]]; then
HAVE_PIC=1 HAVE_PIC=1
OPT_PIC=-fPIC OPT_PIC=-fPIC
@ -484,12 +484,12 @@ if [[ (-z "$HAVE_PIC") ]]; then
fi fi
# GCC 4.8; Clang 3.4 # GCC 4.8; Clang 3.4
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_UBSAN") ]]; then if [[ (-z "$HAVE_UBSAN") ]]; then
HAVE_UBSAN=0 HAVE_UBSAN=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=undefined adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=undefined adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
"$TMP/adhoc.exe" > /dev/null 2>&1 "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
HAVE_UBSAN=1 HAVE_UBSAN=1
fi fi
@ -497,12 +497,12 @@ if [[ (-z "$HAVE_UBSAN") ]]; then
fi fi
# GCC 4.8; Clang 3.4 # GCC 4.8; Clang 3.4
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_ASAN") ]]; then if [[ (-z "$HAVE_ASAN") ]]; then
HAVE_ASAN=0 HAVE_ASAN=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=address adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=address adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
"$TMP/adhoc.exe" > /dev/null 2>&1 "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
HAVE_ASAN=1 HAVE_ASAN=1
fi fi
@ -510,41 +510,41 @@ if [[ (-z "$HAVE_ASAN") ]]; then
fi fi
# GCC 6.0; maybe Clang # GCC 6.0; maybe Clang
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_BSAN") ]]; then if [[ (-z "$HAVE_BSAN") ]]; then
HAVE_BSAN=0 HAVE_BSAN=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=bounds-strict adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=bounds-strict adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
"$TMP/adhoc.exe" > /dev/null 2>&1 "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
HAVE_BSAN=1 HAVE_BSAN=1
fi fi
fi fi
fi fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_OMP") ]]; then if [[ (-z "$HAVE_OMP") ]]; then
HAVE_OMP=0 HAVE_OMP=0
if [[ "$GCC_COMPILER" -ne "0" ]]; then if [[ "$GCC_COMPILER" -ne "0" ]]; then
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
HAVE_OMP=1 HAVE_OMP=1
OMP_FLAGS=(-fopenmp -O3) OMP_FLAGS=(-fopenmp -O3)
fi fi
elif [[ "$INTEL_COMPILER" -ne "0" ]]; then elif [[ "$INTEL_COMPILER" -ne "0" ]]; then
"$CXX" -DCRYPTOPP_ADHOC_MAIN -openmp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -openmp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
HAVE_OMP=1 HAVE_OMP=1
OMP_FLAGS=(-openmp -O3) OMP_FLAGS=(-openmp -O3)
fi fi
elif [[ "$CLANG_COMPILER" -ne "0" ]]; then elif [[ "$CLANG_COMPILER" -ne "0" ]]; then
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp=libomp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp=libomp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
HAVE_OMP=1 HAVE_OMP=1
OMP_FLAGS=(-fopenmp=libomp -O3) OMP_FLAGS=(-fopenmp=libomp -O3)
fi fi
elif [[ "$SUN_COMPILER" -ne "0" ]]; then elif [[ "$SUN_COMPILER" -ne "0" ]]; then
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xopenmp=parallel -xO3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -xopenmp=parallel -xO3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
HAVE_OMP=1 HAVE_OMP=1
OMP_FLAGS=(-xopenmp=parallel -xO3) OMP_FLAGS=(-xopenmp=parallel -xO3)
@ -552,33 +552,33 @@ if [[ (-z "$HAVE_OMP") ]]; then
fi fi
fi fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_INTEL_MULTIARCH") ]]; then if [[ (-z "$HAVE_INTEL_MULTIARCH") ]]; then
HAVE_INTEL_MULTIARCH=0 HAVE_INTEL_MULTIARCH=0
if [[ ("$IS_DARWIN" -ne "0") && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0") ]]; then if [[ ("$IS_DARWIN" -ne "0") && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0") ]]; then
"$CXX" -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
HAVE_INTEL_MULTIARCH=1 HAVE_INTEL_MULTIARCH=1
fi fi
fi fi
fi fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_PPC_MULTIARCH") ]]; then if [[ (-z "$HAVE_PPC_MULTIARCH") ]]; then
HAVE_PPC_MULTIARCH=0 HAVE_PPC_MULTIARCH=0
if [[ ("$IS_DARWIN" -ne "0") && ("$IS_PPC" -ne "0") ]]; then if [[ ("$IS_DARWIN" -ne "0") && ("$IS_PPC" -ne "0") ]]; then
"$CXX" -DCRYPTOPP_ADHOC_MAIN -arch ppc -arch ppc64 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch ppc -arch ppc64 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
HAVE_PPC_MULTIARCH=1 HAVE_PPC_MULTIARCH=1
fi fi
fi fi
fi fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_X32") ]]; then if [[ (-z "$HAVE_X32") ]]; then
HAVE_X32=0 HAVE_X32=0
if [[ "$IS_X32" -ne "0" ]]; then if [[ "$IS_X32" -ne "0" ]]; then
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mx32 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -mx32 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
HAVE_X32=1 HAVE_X32=1
fi fi
@ -588,8 +588,8 @@ fi
# Hit or miss, mostly hit # Hit or miss, mostly hit
if [[ (-z "$HAVE_NATIVE_ARCH") ]]; then if [[ (-z "$HAVE_NATIVE_ARCH") ]]; then
HAVE_NATIVE_ARCH=0 HAVE_NATIVE_ARCH=0
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=native adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=native adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then if [[ ("$?" -eq "0") ]]; then
HAVE_NATIVE_ARCH=1 HAVE_NATIVE_ARCH=1
fi fi
@ -603,7 +603,7 @@ if [[ (-z "$HAVE_LDGOLD") ]]; then
if [[ (! -z "$LD_GOLD") && (! -z "$ELF_FILE") ]]; then if [[ (! -z "$LD_GOLD") && (! -z "$ELF_FILE") ]]; then
LD_GOLD=$(file "$LD_GOLD" | cut -d":" -f 2 | "$EGREP" -i -c "elf") LD_GOLD=$(file "$LD_GOLD" | cut -d":" -f 2 | "$EGREP" -i -c "elf")
if [[ ("$LD_GOLD" -ne "0") ]]; then if [[ ("$LD_GOLD" -ne "0") ]]; then
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fuse-ld=gold adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -fuse-ld=gold adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
HAVE_LDGOLD=1 HAVE_LDGOLD=1
fi fi
@ -688,10 +688,10 @@ fi
# Used to disassemble object modules so we can verify some aspects of code generation # Used to disassemble object modules so we can verify some aspects of code generation
if [[ (-z "$HAVE_DISASS") ]]; then if [[ (-z "$HAVE_DISASS") ]]; then
echo "int main(int argc, char* argv[]) {return 0;}" > "$TMP/test.cc" echo "int main(int argc, char* argv[]) {return 0;}" > "$TMPDIR/test.cc"
"$CXX" "$TMP/test.cc" -o "$TMP/test.exe" > /dev/null 2>&1 "$CXX" "$TMPDIR/test.cc" -o "$TMPDIR/test.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
"$DISASS" "${DISASSARGS[@]}" "$TMP/test.exe" > /dev/null 2>&1 "$DISASS" "${DISASSARGS[@]}" "$TMPDIR/test.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
HAVE_DISASS=1 HAVE_DISASS=1
else else
@ -1167,7 +1167,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
echo echo
OBJFILE=sha.o; rm -f "$OBJFILE" 2>/dev/null OBJFILE=sha.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
DISASS_TEXT=$("$DISASS" "${DISASSARGS[@]}" "$OBJFILE" 2>/dev/null) DISASS_TEXT=$("$DISASS" "${DISASSARGS[@]}" "$OBJFILE" 2>/dev/null)
@ -1201,7 +1201,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
############################################ ############################################
# Test CRC-32C code generation # Test CRC-32C code generation
"$CXX" -DCRYPTOPP_ADHOC_MAIN -msse4.2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -msse4.2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
X86_CRC32=1 X86_CRC32=1
fi fi
@ -1239,7 +1239,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
############################################ ############################################
# Test AES-NI code generation # Test AES-NI code generation
"$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
X86_AESNI=1 X86_AESNI=1
fi fi
@ -1251,7 +1251,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
echo echo
OBJFILE=rijndael.o; rm -f "$OBJFILE" 2>/dev/null OBJFILE=rijndael.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0 COUNT=0
FAILED=0 FAILED=0
@ -1301,7 +1301,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
############################################ ############################################
# X86 carryless multiply code generation # X86 carryless multiply code generation
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mpclmul adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -mpclmul adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
X86_PCLMUL=1 X86_PCLMUL=1
fi fi
@ -1312,8 +1312,8 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
echo "Testing: X86 carryless multiply code generation" | tee -a "$TEST_RESULTS" echo "Testing: X86 carryless multiply code generation" | tee -a "$TEST_RESULTS"
echo echo
OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0 COUNT=0
FAILED=0 FAILED=0
@ -1339,11 +1339,11 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
############################################ ############################################
# Test RDRAND and RDSEED code generation # Test RDRAND and RDSEED code generation
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdrnd adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdrnd adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
X86_RDRAND=1 X86_RDRAND=1
fi fi
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdseed adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdseed adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
X86_RDSEED=1 X86_RDSEED=1
fi fi
@ -1355,7 +1355,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
echo echo
OBJFILE=rdrand.o; rm -f "$OBJFILE" 2>/dev/null OBJFILE=rdrand.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0 COUNT=0
FAILED=0 FAILED=0
@ -1385,7 +1385,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
############################################ ############################################
# X86 SHA code generation # X86 SHA code generation
"$CXX" -DCRYPTOPP_ADHOC_MAIN -msha adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -msha adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
X86_SHA=1 X86_SHA=1
fi fi
@ -1397,7 +1397,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
echo echo
OBJFILE=sha-simd.o; rm -f "$OBJFILE" 2>/dev/null OBJFILE=sha-simd.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0 COUNT=0
FAILED=0 FAILED=0
@ -1465,7 +1465,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
echo "Testing: ARM NEON code generation" | tee -a "$TEST_RESULTS" echo "Testing: ARM NEON code generation" | tee -a "$TEST_RESULTS"
echo echo
OBJFILE=aria.o; rm -f "$OBJFILE" 2>/dev/null OBJFILE=aria-simd.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0 COUNT=0
@ -1515,7 +1515,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
############################################ ############################################
# ARM CRC32 code generation # ARM CRC32 code generation
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crc adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crc adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
ARM_CRC32=1 ARM_CRC32=1
fi fi
@ -1565,7 +1565,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
############################################ ############################################
# ARM carryless multiply code generation # ARM carryless multiply code generation
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
ARM_PMULL=1 ARM_PMULL=1
fi fi
@ -1576,7 +1576,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
echo "Testing: ARM carryless multiply code generation" | tee -a "$TEST_RESULTS" echo "Testing: ARM carryless multiply code generation" | tee -a "$TEST_RESULTS"
echo echo
OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0 COUNT=0
@ -1603,7 +1603,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
############################################ ############################################
# ARM SHA code generation # ARM SHA code generation
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
ARM_SHA=1 ARM_SHA=1
fi fi
@ -5098,7 +5098,7 @@ fi
if [[ ("$CLANG_COMPILER" -eq "0") ]]; then if [[ ("$CLANG_COMPILER" -eq "0") ]]; then
CLANG_CXX=$(which clang++ 2>&1 | "$GREP" -v "no clang++" | head -1) CLANG_CXX=$(which clang++ 2>&1 | "$GREP" -v "no clang++" | head -1)
"$CLANG_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CLANG_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
############################################ ############################################
@ -5133,7 +5133,7 @@ fi
if [[ ("$GCC_COMPILER" -eq "0") ]]; then if [[ ("$GCC_COMPILER" -eq "0") ]]; then
GCC_CXX=$(which g++ 2>&1 | "$GREP" -v "no g++" | head -1) GCC_CXX=$(which g++ 2>&1 | "$GREP" -v "no g++" | head -1)
"$GCC_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$GCC_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
############################################ ############################################
@ -5171,7 +5171,7 @@ if [[ ("$INTEL_COMPILER" -eq "0") ]]; then
if [[ (-z "$INTEL_CXX") ]]; then if [[ (-z "$INTEL_CXX") ]]; then
INTEL_CXX=$(find /opt/intel -name icpc 2>/dev/null | "$GREP" -iv composer | head -1) INTEL_CXX=$(find /opt/intel -name icpc 2>/dev/null | "$GREP" -iv composer | head -1)
fi fi
"$INTEL_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$INTEL_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
############################################ ############################################
@ -5207,7 +5207,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-4*' 2>/dev/null | head -1) MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-4*' 2>/dev/null | head -1)
if [[ (! -z "$MACPORTS_CXX") ]]; then if [[ (! -z "$MACPORTS_CXX") ]]; then
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
############################################ ############################################
@ -5240,7 +5240,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-5*' 2>/dev/null | head -1) MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-5*' 2>/dev/null | head -1)
if [[ (! -z "$MACPORTS_CXX") ]]; then if [[ (! -z "$MACPORTS_CXX") ]]; then
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
############################################ ############################################
@ -5273,7 +5273,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-6*' 2>/dev/null | head -1) MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-6*' 2>/dev/null | head -1)
if [[ (! -z "$MACPORTS_CXX") ]]; then if [[ (! -z "$MACPORTS_CXX") ]]; then
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
############################################ ############################################
@ -5306,7 +5306,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-7*' 2>/dev/null | head -1) MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-7*' 2>/dev/null | head -1)
if [[ (! -z "$MACPORTS_CXX") ]]; then if [[ (! -z "$MACPORTS_CXX") ]]; then
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
############################################ ############################################
@ -5339,7 +5339,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.7*' 2>/dev/null | head -1) MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.7*' 2>/dev/null | head -1)
if [[ (! -z "$MACPORTS_CXX") ]]; then if [[ (! -z "$MACPORTS_CXX") ]]; then
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
############################################ ############################################
@ -5371,7 +5371,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.8*' 2>/dev/null | head -1) MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.8*' 2>/dev/null | head -1)
if [[ (! -z "$MACPORTS_CXX") ]]; then if [[ (! -z "$MACPORTS_CXX") ]]; then
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
############################################ ############################################
@ -5403,7 +5403,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.9*' 2>/dev/null | head -1) MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.9*' 2>/dev/null | head -1)
if [[ (! -z "$MACPORTS_CXX") ]]; then if [[ (! -z "$MACPORTS_CXX") ]]; then
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
############################################ ############################################
@ -5435,7 +5435,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-4*' 2>/dev/null | head -1) MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-4*' 2>/dev/null | head -1)
if [[ (! -z "$MACPORTS_CXX") ]]; then if [[ (! -z "$MACPORTS_CXX") ]]; then
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
############################################ ############################################

View File

@ -517,7 +517,7 @@ NAMESPACE_END
// Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under most toolchains. // Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under most toolchains.
#if !defined(CRYPTOPP_ARM_NEON_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) #if !defined(CRYPTOPP_ARM_NEON_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
# if defined(__ARM_NEON__) || defined(__ARM_NEON) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) || \ # if defined(__ARM_NEON__) || defined(__ARM_FEATURE_NEON) || (CRYPTOPP_MSC_VER >= 1900) || \
(CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500) (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
# define CRYPTOPP_ARM_NEON_AVAILABLE 1 # define CRYPTOPP_ARM_NEON_AVAILABLE 1
# endif # endif
@ -528,9 +528,8 @@ NAMESPACE_END
// Microsoft plans to support ARM-64, but its not clear how to detect it. // Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available // TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_ARMV8A_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) #if !defined(CRYPTOPP_ARMV8A_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
# if defined(__ARM_FEATURE_CRC32) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \ # if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VER >= 2000) || \
(defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \ (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
(defined(__AARCH32EL__) || defined(__AARCH64EL__))
# define CRYPTOPP_ARMV8A_CRC32_AVAILABLE 1 # define CRYPTOPP_ARMV8A_CRC32_AVAILABLE 1
# endif # endif
#endif #endif
@ -540,9 +539,8 @@ NAMESPACE_END
// it at the moment. Microsoft plans to support ARM-64, but its not clear how to detect it. // it at the moment. Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available // TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_ARMV8A_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__apple_build_version__) #if !defined(CRYPTOPP_ARMV8A_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__apple_build_version__)
# if defined(__ARM_FEATURE_CRYPTO) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \ # if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VER >= 2000) || \
(defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \ (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
(defined(__AARCH32EL__) || defined(__AARCH64EL__))
# define CRYPTOPP_ARMV8A_PMULL_AVAILABLE 1 # define CRYPTOPP_ARMV8A_PMULL_AVAILABLE 1
# endif # endif
#endif #endif
@ -552,19 +550,17 @@ NAMESPACE_END
// Microsoft plans to support ARM-64, but its not clear how to detect it. // Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available // TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) #if !defined(CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
# if defined(__ARM_FEATURE_CRYPTO) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \ # if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VER >= 2000) || \
(defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \ (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
(defined(__AARCH32EL__) || defined(__AARCH64EL__))
# define CRYPTOPP_ARMV8A_AES_AVAILABLE 1 # define CRYPTOPP_ARMV8A_AES_AVAILABLE 1
# define CRYPTOPP_ARMV8A_PMULL_AVAILABLE 1
# define CRYPTOPP_ARMV8A_SHA_AVAILABLE 1 # define CRYPTOPP_ARMV8A_SHA_AVAILABLE 1
# define CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE 1 # define CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE 1
# endif # endif
#endif #endif
// ARM CRC testing // TODO...
#undef CRYPTOPP_ARMV8A_AES_AVAILABLE #undef CRYPTOPP_ARMV8A_AES_AVAILABLE
#undef CRYPTOPP_ARMV8A_PMULL_AVAILABLE
#undef CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
#endif // ARM32, ARM64 #endif // ARM32, ARM64

59
cpu.cpp
View File

@ -352,36 +352,9 @@ extern "C"
}; };
#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
static bool TryNEON()
{
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
return CPU_TryNEON_ARM();
#else
return false;
#endif
}
static bool TryCRC32()
{
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
return CPU_TryCRC32_ARMV8();
#else
return false;
#endif
}
static bool TryPMULL()
{
#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE)
return CPU_TryPMULL_ARMV8();
#else
return false;
#endif
}
static bool TryAES() static bool TryAES()
{ {
#if (CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE) #if (CRYPTOPP_ARMV8A_AES_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true; volatile bool result = true;
__try __try
@ -432,32 +405,14 @@ static bool TryAES()
#endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE #endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
} }
static bool TrySHA1()
{
#if (CRYPTOPP_ARMV8A_SHA_AVAILABLE)
return CPU_TrySHA1_ARMV8();
#else
return false;
#endif
}
static bool TrySHA2()
{
#if (CRYPTOPP_ARMV8A_SHA_AVAILABLE)
return CPU_TrySHA2_ARMV8();
#else
return false;
#endif
}
void DetectArmFeatures() void DetectArmFeatures()
{ {
g_hasNEON = TryNEON(); g_hasNEON = CPU_TryNEON_ARM();
g_hasPMULL = TryPMULL(); g_hasPMULL = CPU_TryPMULL_ARMV8();
g_hasCRC32 = TryCRC32(); g_hasCRC32 = CPU_TryCRC32_ARMV8();
g_hasAES = TryAES(); g_hasAES = TryAES(); // TODO
g_hasSHA1 = TrySHA1(); g_hasSHA1 = CPU_TrySHA1_ARMV8();
g_hasSHA2 = TrySHA2(); g_hasSHA2 = CPU_TrySHA2_ARMV8();
g_ArmDetectionDone = true; g_ArmDetectionDone = true;
} }

View File

@ -1312,7 +1312,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
echo "Testing: X86 carryless multiply code generation" | tee -a "$TEST_RESULTS" echo "Testing: X86 carryless multiply code generation" | tee -a "$TEST_RESULTS"
echo echo
OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0 COUNT=0
@ -1576,7 +1576,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
echo "Testing: ARM carryless multiply code generation" | tee -a "$TEST_RESULTS" echo "Testing: ARM carryless multiply code generation" | tee -a "$TEST_RESULTS"
echo echo
OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS" CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0 COUNT=0

View File

@ -26,6 +26,121 @@
# include <setjmp.h> # include <setjmp.h>
#endif #endif
ANONYMOUS_NAMESPACE_BEGIN
// GCC 4.8 and 4.9 are missing PMULL gear
#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE)
# if (CRYPTOPP_GCC_VERSION >= 40800) && (CRYPTOPP_GCC_VERSION < 50000)
inline poly128_t VMULL_P64(poly64_t a, poly64_t b)
{
return __builtin_aarch64_crypto_pmulldi_ppp (a, b);
}
inline poly128_t VMULL_HIGH_P64(poly64x2_t a, poly64x2_t b)
{
return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
}
# endif
#endif
#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_ARMV8A_PMULL_AVAILABLE
#if defined(__GNUC__)
// Schneiders, Hovsmith and O'Rourke used this trick.
// It results in much better code generation in production code
// by avoiding D-register spills when using vgetq_lane_u64. The
// problem does not surface under minimal test cases.
inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
:"=w" (r) : "w" (a), "w" (b) );
return r;
}
inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
:"=w" (r) : "w" (a), "w" (vget_high_u64(b)) );
return r;
}
inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
:"=w" (r) : "w" (vget_high_u64(a)), "w" (b) );
return r;
}
inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("pmull2 %0.1q, %1.2d, %2.2d \n\t"
:"=w" (r) : "w" (a), "w" (b) );
return r;
}
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
{
uint64x2_t r;
__asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t"
:"=w" (r) : "w" (a), "w" (b), "I" (c) );
return r;
}
// https://github.com/weidai11/cryptopp/issues/366
template <unsigned int C>
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t"
:"=w" (r) : "w" (a), "w" (b), "I" (C) );
return r;
}
#endif // GCC and compatibles
#if defined(_MSC_VER)
inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
{
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
}
inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
{
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
}
inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
{
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
}
inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
{
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
}
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
{
return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), c);
}
// https://github.com/weidai11/cryptopp/issues/366
template <unsigned int C>
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
{
return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C);
}
#endif // Microsoft and compatibles
#endif // CRYPTOPP_ARMV8A_PMULL_AVAILABLE
ANONYMOUS_NAMESPACE_END
NAMESPACE_BEGIN(CryptoPP) NAMESPACE_BEGIN(CryptoPP)
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
@ -71,7 +186,7 @@ bool CPU_TryPMULL_ARMV8()
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true; volatile bool result = true;
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL); volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
if (oldHandler == SIG_ERR) if (oldHandler == SIG_ERR)
return false; return false;
@ -79,7 +194,7 @@ bool CPU_TryPMULL_ARMV8()
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
return false; return false;
if (setjmp(s_jmpNoPMULL)) if (setjmp(s_jmpSIGILL))
result = false; result = false;
else else
{ {
@ -87,8 +202,8 @@ bool CPU_TryPMULL_ARMV8()
const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0}, const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0}; b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
const poly128_t r1 = vmull_p64(a1, b1); const poly128_t r1 = VMULL_P64(a1, b1);
const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2)); const poly128_t r2 = VMULL_HIGH_P64((poly64x2_t)(a2), (poly64x2_t)(b2));
// Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233. // Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233.
const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum} const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum}
@ -115,4 +230,116 @@ void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c)
} }
#endif #endif
#if CRYPTOPP_ARMV8A_PMULL_AVAILABLE
ANONYMOUS_NAMESPACE_BEGIN
CRYPTOPP_ALIGN_DATA(16)
const word64 s_clmulConstants64[] = {
W64LIT(0xe100000000000000), W64LIT(0xc200000000000000), // Used for ARM and x86; polynomial coefficients
W64LIT(0x08090a0b0c0d0e0f), W64LIT(0x0001020304050607), // Unused for ARM; used for x86 _mm_shuffle_epi8
W64LIT(0x0001020304050607), W64LIT(0x08090a0b0c0d0e0f) // Unused for ARM; used for x86 _mm_shuffle_epi8
};
const uint64x2_t *s_clmulConstants = (const uint64x2_t *)s_clmulConstants64;
const unsigned int s_clmulTableSizeInBlocks = 8;
ANONYMOUS_NAMESPACE_END
uint64x2_t GCM_Reduce_ARMV8A(uint64x2_t c0, uint64x2_t c1, uint64x2_t c2, const uint64x2_t &r)
{
c1 = veorq_u64(c1, VEXT_U8<8>(vdupq_n_u64(0), c0));
c1 = veorq_u64(c1, PMULL_01(c0, r));
c0 = VEXT_U8<8>(c0, vdupq_n_u64(0));
c0 = vshlq_n_u64(veorq_u64(c0, c1), 1);
c0 = PMULL_00(c0, r);
c2 = veorq_u64(c2, c0);
c2 = veorq_u64(c2, VEXT_U8<8>(c1, vdupq_n_u64(0)));
c1 = vshrq_n_u64(vcombine_u64(vget_low_u64(c1), vget_low_u64(c2)), 63);
c2 = vshlq_n_u64(c2, 1);
return veorq_u64(c2, c1);
}
uint64x2_t GCM_Multiply_ARMV8A(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r)
{
const uint64x2_t c0 = PMULL_00(x, h);
const uint64x2_t c1 = veorq_u64(PMULL_10(x, h), PMULL_01(x, h));
const uint64x2_t c2 = PMULL_11(x, h);
return GCM_Reduce_ARMV8A(c0, c1, c2, r);
}
size_t GCM_AuthenticateBlocks_ARMV8(const byte *data, size_t len, const byte *mtable, byte *hbuffer)
{
const uint64x2_t* table = reinterpret_cast<const uint64x2_t*>(mtable);
uint64x2_t x = vreinterpretq_u64_u8(vld1q_u8(hbuffer));
const uint64x2_t r = s_clmulConstants[0];
const size_t BLOCKSIZE = 16;
while (len >= BLOCKSIZE)
{
size_t s = UnsignedMin(len/BLOCKSIZE, s_clmulTableSizeInBlocks), i=0;
uint64x2_t d1, d2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-1)*BLOCKSIZE)));
uint64x2_t c0 = vdupq_n_u64(0);
uint64x2_t c1 = vdupq_n_u64(0);
uint64x2_t c2 = vdupq_n_u64(0);
while (true)
{
const uint64x2_t h0 = vld1q_u64((const uint64_t*)(table+i));
const uint64x2_t h1 = vld1q_u64((const uint64_t*)(table+i+1));
const uint64x2_t h2 = veorq_u64(h0, h1);
if (++i == s)
{
const uint64x2_t t1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data)));
d1 = veorq_u64(vextq_u64(t1, t1, 1), x);
c0 = veorq_u64(c0, PMULL_00(d1, h0));
c2 = veorq_u64(c2, PMULL_10(d1, h1));
d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)),
vget_low_u32(vreinterpretq_u32_u64(d1))));
c1 = veorq_u64(c1, PMULL_00(d1, h2));
break;
}
d1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8)));
c0 = veorq_u64(c0, PMULL_10(d2, h0));
c2 = veorq_u64(c2, PMULL_10(d1, h1));
d2 = veorq_u64(d2, d1);
c1 = veorq_u64(c1, PMULL_10(d2, h2));
if (++i == s)
{
const uint64x2_t t2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data)));
d1 = veorq_u64(vextq_u64(t2, t2, 1), x);
c0 = veorq_u64(c0, PMULL_01(d1, h0));
c2 = veorq_u64(c2, PMULL_11(d1, h1));
d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)),
vget_low_u32(vreinterpretq_u32_u64(d1))));
c1 = veorq_u64(c1, PMULL_01(d1, h2));
break;
}
const uint64x2_t t3 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8)));
d2 = vextq_u64(t3, t3, 1);
c0 = veorq_u64(c0, PMULL_01(d1, h0));
c2 = veorq_u64(c2, PMULL_01(d2, h1));
d1 = veorq_u64(d1, d2);
c1 = veorq_u64(c1, PMULL_01(d1, h2));
}
data += s*16;
len -= s*16;
c1 = veorq_u64(veorq_u64(c1, c0), c2);
x = GCM_Reduce_ARMV8A(c0, c1, c2, r);
}
vst1q_u64(reinterpret_cast<uint64_t *>(hbuffer), x);
return len;
}
#endif
NAMESPACE_END NAMESPACE_END

197
gcm.cpp
View File

@ -53,102 +53,6 @@ NAMESPACE_BEGIN(CryptoPP)
extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c); extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
#endif #endif
#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_ARMV8A_PMULL_AVAILABLE
#if defined(__GNUC__)
// Schneiders, Hovsmith and O'Rourke used this trick.
// It results in much better code generation in production code
// by avoiding D-register spills when using vgetq_lane_u64. The
// problem does not surface under minimal test cases.
inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
:"=w" (r) : "w" (a), "w" (b) );
return r;
}
inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
:"=w" (r) : "w" (a), "w" (vget_high_u64(b)) );
return r;
}
inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
:"=w" (r) : "w" (vget_high_u64(a)), "w" (b) );
return r;
}
inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("pmull2 %0.1q, %1.2d, %2.2d \n\t"
:"=w" (r) : "w" (a), "w" (b) );
return r;
}
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
{
uint64x2_t r;
__asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t"
:"=w" (r) : "w" (a), "w" (b), "I" (c) );
return r;
}
// https://github.com/weidai11/cryptopp/issues/366
template <unsigned int C>
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t"
:"=w" (r) : "w" (a), "w" (b), "I" (C) );
return r;
}
#endif // GCC and compatibles
#if defined(_MSC_VER)
inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
{
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
}
inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
{
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
}
inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
{
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
}
inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
{
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
}
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
{
return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), c);
}
// https://github.com/weidai11/cryptopp/issues/366
template <unsigned int C>
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
{
return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C);
}
#endif // Microsoft and compatibles
#endif // CRYPTOPP_ARMV8A_PMULL_AVAILABLE
word16 GCM_Base::s_reductionTable[256]; word16 GCM_Base::s_reductionTable[256];
volatile bool GCM_Base::s_reductionTableInitialized = false; volatile bool GCM_Base::s_reductionTableInitialized = false;
@ -278,6 +182,9 @@ inline __m128i CLMUL_GF_Mul(const __m128i &x, const __m128i &h, const __m128i &r
#if CRYPTOPP_ARMV8A_PMULL_AVAILABLE #if CRYPTOPP_ARMV8A_PMULL_AVAILABLE
extern size_t GCM_AuthenticateBlocks_ARMV8(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
extern uint64x2_t GCM_Multiply_ARMV8A(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r);
CRYPTOPP_ALIGN_DATA(16) CRYPTOPP_ALIGN_DATA(16)
static const word64 s_clmulConstants64[] = { static const word64 s_clmulConstants64[] = {
W64LIT(0xe100000000000000), W64LIT(0xc200000000000000), // Used for ARM and x86; polynomial coefficients W64LIT(0xe100000000000000), W64LIT(0xc200000000000000), // Used for ARM and x86; polynomial coefficients
@ -287,31 +194,6 @@ static const word64 s_clmulConstants64[] = {
static const uint64x2_t *s_clmulConstants = (const uint64x2_t *)s_clmulConstants64; static const uint64x2_t *s_clmulConstants = (const uint64x2_t *)s_clmulConstants64;
static const unsigned int s_clmulTableSizeInBlocks = 8; static const unsigned int s_clmulTableSizeInBlocks = 8;
inline uint64x2_t PMULL_Reduce(uint64x2_t c0, uint64x2_t c1, uint64x2_t c2, const uint64x2_t &r)
{
// See comments fo CLMUL_Reduce
c1 = veorq_u64(c1, VEXT_U8<8>(vdupq_n_u64(0), c0));
c1 = veorq_u64(c1, PMULL_01(c0, r));
c0 = VEXT_U8<8>(c0, vdupq_n_u64(0));
c0 = vshlq_n_u64(veorq_u64(c0, c1), 1);
c0 = PMULL_00(c0, r);
c2 = veorq_u64(c2, c0);
c2 = veorq_u64(c2, VEXT_U8<8>(c1, vdupq_n_u64(0)));
c1 = vshrq_n_u64(vcombine_u64(vget_low_u64(c1), vget_low_u64(c2)), 63);
c2 = vshlq_n_u64(c2, 1);
return veorq_u64(c2, c1);
}
inline uint64x2_t PMULL_GF_Mul(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r)
{
const uint64x2_t c0 = PMULL_00(x, h);
const uint64x2_t c1 = veorq_u64(PMULL_10(x, h), PMULL_01(x, h));
const uint64x2_t c2 = PMULL_11(x, h);
return PMULL_Reduce(c0, c1, c2, r);
}
#endif #endif
void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const NameValuePairs &params) void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const NameValuePairs &params)
@ -388,15 +270,15 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
uint64x2_t h = h0; uint64x2_t h = h0;
for (i=0; i<tableSize-32; i+=32) for (i=0; i<tableSize-32; i+=32)
{ {
const uint64x2_t h1 = PMULL_GF_Mul(h, h0, r); const uint64x2_t h1 = GCM_Multiply_ARMV8A(h, h0, r);
vst1_u64((uint64_t *)(table+i), vget_low_u64(h)); vst1_u64((uint64_t *)(table+i), vget_low_u64(h));
vst1q_u64((uint64_t *)(table+i+16), h1); vst1q_u64((uint64_t *)(table+i+16), h1);
vst1q_u64((uint64_t *)(table+i+8), h); vst1q_u64((uint64_t *)(table+i+8), h);
vst1_u64((uint64_t *)(table+i+8), vget_low_u64(h1)); vst1_u64((uint64_t *)(table+i+8), vget_low_u64(h1));
h = PMULL_GF_Mul(h1, h0, r); h = GCM_Multiply_ARMV8A(h1, h0, r);
} }
const uint64x2_t h1 = PMULL_GF_Mul(h, h0, r); const uint64x2_t h1 = GCM_Multiply_ARMV8A(h, h0, r);
vst1_u64((uint64_t *)(table+i), vget_low_u64(h)); vst1_u64((uint64_t *)(table+i), vget_low_u64(h));
vst1q_u64((uint64_t *)(table+i+16), h1); vst1q_u64((uint64_t *)(table+i+16), h1);
vst1q_u64((uint64_t *)(table+i+8), h); vst1q_u64((uint64_t *)(table+i+8), h);
@ -667,73 +549,8 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
#elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE #elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
if (HasPMULL()) if (HasPMULL())
{ {
const uint64x2_t *table = (const uint64x2_t *)MulTable(); return GCM_AuthenticateBlocks_ARMV8(data, len, MulTable(), HashBuffer());
uint64x2_t x = vreinterpretq_u64_u8(vld1q_u8(HashBuffer()));
const uint64x2_t r = s_clmulConstants[0];
while (len >= 16)
{
size_t s = UnsignedMin(len/16, s_clmulTableSizeInBlocks), i=0;
uint64x2_t d1, d2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-1)*16)));
uint64x2_t c0 = vdupq_n_u64(0);
uint64x2_t c1 = vdupq_n_u64(0);
uint64x2_t c2 = vdupq_n_u64(0);
while (true)
{
const uint64x2_t h0 = vld1q_u64((const uint64_t*)(table+i));
const uint64x2_t h1 = vld1q_u64((const uint64_t*)(table+i+1));
const uint64x2_t h2 = veorq_u64(h0, h1);
if (++i == s)
{
const uint64x2_t t1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data)));
d1 = veorq_u64(vextq_u64(t1, t1, 1), x);
c0 = veorq_u64(c0, PMULL_00(d1, h0));
c2 = veorq_u64(c2, PMULL_10(d1, h1));
d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)),
vget_low_u32(vreinterpretq_u32_u64(d1))));
c1 = veorq_u64(c1, PMULL_00(d1, h2));
break;
} }
d1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8)));
c0 = veorq_u64(c0, PMULL_10(d2, h0));
c2 = veorq_u64(c2, PMULL_10(d1, h1));
d2 = veorq_u64(d2, d1);
c1 = veorq_u64(c1, PMULL_10(d2, h2));
if (++i == s)
{
const uint64x2_t t2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data)));
d1 = veorq_u64(vextq_u64(t2, t2, 1), x);
c0 = veorq_u64(c0, PMULL_01(d1, h0));
c2 = veorq_u64(c2, PMULL_11(d1, h1));
d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)),
vget_low_u32(vreinterpretq_u32_u64(d1))));
c1 = veorq_u64(c1, PMULL_01(d1, h2));
break;
}
const uint64x2_t t3 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8)));
d2 = vextq_u64(t3, t3, 1);
c0 = veorq_u64(c0, PMULL_01(d1, h0));
c2 = veorq_u64(c2, PMULL_01(d2, h1));
d1 = veorq_u64(d1, d2);
c1 = veorq_u64(c1, PMULL_01(d1, h2));
}
data += s*16;
len -= s*16;
c1 = veorq_u64(veorq_u64(c1, c0), c2);
x = PMULL_Reduce(c0, c1, c2, r);
}
vst1q_u64((uint64_t *)HashBuffer(), x);
return len;
}
#endif #endif
typedef BlockGetAndPut<word64, NativeByteOrder> Block; typedef BlockGetAndPut<word64, NativeByteOrder> Block;