Cleaned up ARM related defines, like CRYPTOPP_ARM_NEON_AVAILABLE

We only need to base it on the compiler in config.h. config.h activates the code path guarded by HasNEON(). The source file that actially provides the NEON implementation will be compiled with -fpu=neon or -march=armv8-a.
Since we are providing the specialized implementation in a sequestered source file (and not a header file), we can probably avoid the defines like CRYPTOPP_ARM_NEON_AVAILABLE altogether.
pull/461/head
Jeffrey Walton 2017-07-30 19:14:47 -04:00
parent b4f6882237
commit 6169b5d4d6
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
7 changed files with 391 additions and 393 deletions

View File

@ -202,6 +202,7 @@ endif # CXXFLAGS
ifeq ($(findstring -DCRYPTOPP_DISABLE_SSE4,$(CXXFLAGS)),)
SSE42_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -msse4.2 -dM -E - | grep -i -c -q __SSE4_2__ && echo "-msse4.2")
ifeq ($(findstring -DCRYPTOPP_DISABLE_AESNI,$(CXXFLAGS)),)
GCM_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -mpclmul -dM -E - | grep -i -c -q __PCLMUL__ && echo "-mpclmul")
AES_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -maes -dM -E - | grep -i -c -q __AES__ && echo "-maes")
ifeq ($(findstring -DCRYPTOPP_DISABLE_SHA,$(CXXFLAGS)),)
SHA_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -msse4.2 -msha -dM -E - | grep -i -c -q __SHA__ && echo "-msse4.2 -msha")
@ -308,14 +309,16 @@ ifeq ($(IS_NEON),1)
endif
ifeq ($(IS_ARMV8),1)
ARMV8A_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv8-a")
CRC_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crc -dM -E - | grep -i -c -q __ARM_FEATURE_CRC32 && echo "-march=armv8-a+crc")
AES_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crypto -dM -E - | grep -i -c -q __ARM_FEATURE_CRYPTO && echo "-march=armv8-a+crypto")
SHA_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crypto -dM -E - | grep -i -c -q __ARM_FEATURE_CRYPTO && echo "-march=armv8-a+crypto")
GCM_FLAG = $(ARMV8A_FLAG)
ARIA_FLAG = $(ARMV8A_FLAG)
BLAKE2_FLAG = $(ARMV8A_FLAG)
NEON_FLAG = $(ARMV8A_FLAG)
ARMV8A_NEON_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv8-a")
ARMV8A_CRC_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crc -dM -E - | grep -i -c -q __ARM_FEATURE_CRC32 && echo "-march=armv8-a+crc")
ARMV8A_CRYPTO_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crypto -dM -E - | grep -i -c -q __ARM_FEATURE_CRYPTO && echo "-march=armv8-a+crypto")
CRC_FLAG = $(ARMV8A_CRC_FLAG)
AES_FLAG = $(ARMV8A_CRYPTO_FLAG)
GCM_FLAG = $(ARMV8A_CRYPTO_FLAG)
SHA_FLAG = $(ARMV8A_CRYPTO_FLAG)
ARIA_FLAG = $(ARMV8A_NEON_FLAG)
BLAKE2_FLAG = $(ARMV8A_NEON_FLAG)
NEON_FLAG = $(ARMV8A_NEON_FLAG)
endif
endif # IS_X86
@ -851,7 +854,7 @@ endif # Dependencies
# Run rdrand-nasm.sh to create the object files
ifeq ($(USE_NASM),1)
rdrand.o: rdrand.h rdrand.cpp rdrand.s
$(CXX) $(strip $(CXXFLAGS)) -DNASM_RDRAND_ASM_AVAILABLE=1 -DNASM_RDSEED_ASM_AVAILABLE=1 -c rdrand.cpp
$(CXX) $(strip $(CXXFLAGS) -DNASM_RDRAND_ASM_AVAILABLE=1 -DNASM_RDSEED_ASM_AVAILABLE=1 -c rdrand.cpp)
rdrand-%.o:
./rdrand-nasm.sh
endif
@ -884,49 +887,49 @@ sha-simd.o : sha-simd.cpp
# Also see https://stackoverflow.com/q/12983137/608639.
ifeq ($(findstring true,$(CI)),true)
threefish.o : threefish.cpp
$(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS))) -c $<
$(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS)) -c) $<
endif
# Don't build Rijndael with UBsan. Too much noise due to unaligned data accesses.
ifneq ($(findstring -fsanitize=undefined,$(CXXFLAGS)),)
rijndael.o : rijndael.cpp
$(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS))) -c $<
$(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS)) -c) $<
endif
# Don't build VMAC and friends with Asan. Too many false positives.
ifneq ($(findstring -fsanitize=address,$(CXXFLAGS)),)
vmac.o : vmac.cpp
$(CXX) $(strip $(subst -fsanitize=address,,$(CXXFLAGS))) -c $<
$(CXX) $(strip $(subst -fsanitize=address,,$(CXXFLAGS)) -c) $<
endif
# Only use CRYPTOPP_DATA_DIR if its not set in CXXFLAGS
ifeq ($(findstring -DCRYPTOPP_DATA_DIR, $(strip $(CXXFLAGS))),)
ifneq ($(strip $(CRYPTOPP_DATA_DIR)),)
validat%.o : validat%.cpp
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $<
$(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $<
bench%.o : bench%.cpp
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $<
$(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $<
datatest.o : datatest.cpp
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $<
$(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $<
test.o : test.cpp
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $<
$(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $<
endif
endif
%.dllonly.o : %.cpp
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DLL_ONLY -c $< -o $@
$(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DLL_ONLY -c) $< -o $@
%.import.o : %.cpp
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_IMPORTS -c $< -o $@
$(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_IMPORTS -c) $< -o $@
%.export.o : %.cpp
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_EXPORTS -c $< -o $@
$(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_EXPORTS -c) $< -o $@
%.bc : %.cpp
$(CXX) $(strip $(CXXFLAGS)) -c $<
$(CXX) $(strip $(CXXFLAGS) -c) $<
%.o : %.cpp
$(CXX) $(strip $(CXXFLAGS)) -c $<
$(CXX) $(strip $(CXXFLAGS) -c) $<
.PHONY: so_warning
so_warning:

View File

@ -250,15 +250,15 @@ if [[ ("$SUNCC_510_OR_ABOVE" -ne "0") ]]; then
HAVE_OFAST=0
fi
if [[ (-z "$TMP") ]]; then
if [[ (-z "$TMPDIR") ]]; then
if [[ (-d "/tmp") ]]; then
TMP=/tmp
TMPDIR=/tmp
elif [[ (-d "/temp") ]]; then
TMP=/temp
TMPDIR=/temp
elif [[ (-d "$HOME/tmp") ]]; then
TMP="$HOME/tmp"
TMPDIR="$HOME/tmp"
else
echo "Please set TMP to a valid directory"
echo "Please set TMPDIR to a valid directory"
[[ "$0" = "$BASH_SOURCE" ]] && exit 1 || return 1
fi
fi
@ -267,74 +267,74 @@ fi
rm -f adhoc.cpp > /dev/null 2>&1
cp adhoc.cpp.proto adhoc.cpp
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_CXX17") ]]; then
HAVE_CXX17=0
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++17 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++17 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
HAVE_CXX17=1
fi
fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_GNU17") ]]; then
HAVE_GNU17=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++17 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++17 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
HAVE_GNU17=1
fi
fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_CXX14") ]]; then
HAVE_CXX14=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++14 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++14 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
HAVE_CXX14=1
fi
fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_GNU14") ]]; then
HAVE_GNU14=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++14 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++14 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
HAVE_GNU14=1
fi
fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_CXX11") ]]; then
HAVE_CXX11=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++11 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++11 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
HAVE_CXX11=1
fi
fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_GNU11") ]]; then
HAVE_GNU11=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++11 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++11 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
HAVE_GNU11=1
fi
fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_CXX03") ]]; then
HAVE_CXX03=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++03 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++03 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
HAVE_CXX03=1
fi
fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_GNU03") ]]; then
HAVE_GNU03=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++03 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++03 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
HAVE_GNU03=1
fi
@ -342,13 +342,13 @@ fi
# Use a fallback strategy so OPT_O0 can be used with DEBUG_CXXFLAGS
OPT_O0=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O0 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O0 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
OPT_O0=-O0
else
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO0 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO0 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
OPT_O0=-xO0
fi
@ -356,13 +356,13 @@ fi
# Use a fallback strategy so OPT_O1 can be used with VALGRIND_CXXFLAGS
OPT_O1=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O1 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O1 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
OPT_O1=-O1
else
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO1 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO1 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
OPT_O1=-xO1
fi
@ -370,13 +370,13 @@ fi
# Use a fallback strategy so OPT_O2 can be used with RELEASE_CXXFLAGS
OPT_O2=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
OPT_O2=-O2
else
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
OPT_O2=-xO2
fi
@ -385,14 +385,14 @@ fi
if [[ (-z "$HAVE_O3") ]]; then
HAVE_O3=0
OPT_O3=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
HAVE_O3=1
OPT_O3=-O3
else
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
HAVE_O3=1
OPT_O3=-xO3
@ -404,14 +404,14 @@ fi
if [[ ( (-z "$HAVE_O5") && ("$CLANG_COMPILER" -eq "0") ) ]]; then
HAVE_O5=0
OPT_O5=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O5 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O5 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
HAVE_O5=1
OPT_O5=-O5
else
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO5 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO5 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
HAVE_O5=1
OPT_O5=-xO5
@ -423,8 +423,8 @@ fi
if [[ (-z "$HAVE_OS") ]]; then
HAVE_OS=0
OPT_OS=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -Os adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -Os adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
HAVE_OS=1
OPT_OS=-Os
@ -435,8 +435,8 @@ fi
if [[ (-z "$HAVE_OFAST") ]]; then
HAVE_OFAST=0
OPT_OFAST=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -Ofast adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -Ofast adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
HAVE_OFAST=1
OPT_OFAST=-Ofast
@ -445,13 +445,13 @@ fi
# Use a fallback strategy so OPT_G2 can be used with RELEASE_CXXFLAGS
OPT_G2=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
OPT_G2=-g2
else
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
OPT_G2=-g
fi
@ -459,13 +459,13 @@ fi
# Use a fallback strategy so OPT_G3 can be used with DEBUG_CXXFLAGS
OPT_G3=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
OPT_G3=-g3
else
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
OPT_G3=-g
fi
@ -473,10 +473,10 @@ fi
# Cygwin and noisy compiles
OPT_PIC=
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_PIC") ]]; then
HAVE_PIC=0
PIC_PROBLEMS=$("$CXX" -DCRYPTOPP_ADHOC_MAIN -fPIC adhoc.cpp -o "$TMP/adhoc.exe" 2>&1 | "$EGREP" -ic '(warning|error)')
PIC_PROBLEMS=$("$CXX" -DCRYPTOPP_ADHOC_MAIN -fPIC adhoc.cpp -o "$TMPDIR/adhoc.exe" 2>&1 | "$EGREP" -ic '(warning|error)')
if [[ "$PIC_PROBLEMS" -eq "0" ]]; then
HAVE_PIC=1
OPT_PIC=-fPIC
@ -484,12 +484,12 @@ if [[ (-z "$HAVE_PIC") ]]; then
fi
# GCC 4.8; Clang 3.4
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_UBSAN") ]]; then
HAVE_UBSAN=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=undefined adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=undefined adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
"$TMP/adhoc.exe" > /dev/null 2>&1
"$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
HAVE_UBSAN=1
fi
@ -497,12 +497,12 @@ if [[ (-z "$HAVE_UBSAN") ]]; then
fi
# GCC 4.8; Clang 3.4
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_ASAN") ]]; then
HAVE_ASAN=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=address adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=address adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
"$TMP/adhoc.exe" > /dev/null 2>&1
"$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
HAVE_ASAN=1
fi
@ -510,41 +510,41 @@ if [[ (-z "$HAVE_ASAN") ]]; then
fi
# GCC 6.0; maybe Clang
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_BSAN") ]]; then
HAVE_BSAN=0
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=bounds-strict adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=bounds-strict adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
"$TMP/adhoc.exe" > /dev/null 2>&1
"$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
HAVE_BSAN=1
fi
fi
fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_OMP") ]]; then
HAVE_OMP=0
if [[ "$GCC_COMPILER" -ne "0" ]]; then
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
HAVE_OMP=1
OMP_FLAGS=(-fopenmp -O3)
fi
elif [[ "$INTEL_COMPILER" -ne "0" ]]; then
"$CXX" -DCRYPTOPP_ADHOC_MAIN -openmp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -openmp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
HAVE_OMP=1
OMP_FLAGS=(-openmp -O3)
fi
elif [[ "$CLANG_COMPILER" -ne "0" ]]; then
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp=libomp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp=libomp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
HAVE_OMP=1
OMP_FLAGS=(-fopenmp=libomp -O3)
fi
elif [[ "$SUN_COMPILER" -ne "0" ]]; then
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xopenmp=parallel -xO3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xopenmp=parallel -xO3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
HAVE_OMP=1
OMP_FLAGS=(-xopenmp=parallel -xO3)
@ -552,33 +552,33 @@ if [[ (-z "$HAVE_OMP") ]]; then
fi
fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_INTEL_MULTIARCH") ]]; then
HAVE_INTEL_MULTIARCH=0
if [[ ("$IS_DARWIN" -ne "0") && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0") ]]; then
"$CXX" -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
HAVE_INTEL_MULTIARCH=1
fi
fi
fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_PPC_MULTIARCH") ]]; then
HAVE_PPC_MULTIARCH=0
if [[ ("$IS_DARWIN" -ne "0") && ("$IS_PPC" -ne "0") ]]; then
"$CXX" -DCRYPTOPP_ADHOC_MAIN -arch ppc -arch ppc64 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -arch ppc -arch ppc64 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
HAVE_PPC_MULTIARCH=1
fi
fi
fi
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ (-z "$HAVE_X32") ]]; then
HAVE_X32=0
if [[ "$IS_X32" -ne "0" ]]; then
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mx32 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mx32 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
HAVE_X32=1
fi
@ -588,8 +588,8 @@ fi
# Hit or miss, mostly hit
if [[ (-z "$HAVE_NATIVE_ARCH") ]]; then
HAVE_NATIVE_ARCH=0
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=native adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=native adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ ("$?" -eq "0") ]]; then
HAVE_NATIVE_ARCH=1
fi
@ -603,7 +603,7 @@ if [[ (-z "$HAVE_LDGOLD") ]]; then
if [[ (! -z "$LD_GOLD") && (! -z "$ELF_FILE") ]]; then
LD_GOLD=$(file "$LD_GOLD" | cut -d":" -f 2 | "$EGREP" -i -c "elf")
if [[ ("$LD_GOLD" -ne "0") ]]; then
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fuse-ld=gold adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fuse-ld=gold adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
HAVE_LDGOLD=1
fi
@ -688,10 +688,10 @@ fi
# Used to disassemble object modules so we can verify some aspects of code generation
if [[ (-z "$HAVE_DISASS") ]]; then
echo "int main(int argc, char* argv[]) {return 0;}" > "$TMP/test.cc"
"$CXX" "$TMP/test.cc" -o "$TMP/test.exe" > /dev/null 2>&1
echo "int main(int argc, char* argv[]) {return 0;}" > "$TMPDIR/test.cc"
"$CXX" "$TMPDIR/test.cc" -o "$TMPDIR/test.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
"$DISASS" "${DISASSARGS[@]}" "$TMP/test.exe" > /dev/null 2>&1
"$DISASS" "${DISASSARGS[@]}" "$TMPDIR/test.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
HAVE_DISASS=1
else
@ -1167,7 +1167,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
echo
OBJFILE=sha.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
DISASS_TEXT=$("$DISASS" "${DISASSARGS[@]}" "$OBJFILE" 2>/dev/null)
@ -1201,7 +1201,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
############################################
# Test CRC-32C code generation
"$CXX" -DCRYPTOPP_ADHOC_MAIN -msse4.2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -msse4.2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
X86_CRC32=1
fi
@ -1239,7 +1239,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
############################################
# Test AES-NI code generation
"$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
X86_AESNI=1
fi
@ -1251,7 +1251,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
echo
OBJFILE=rijndael.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0
FAILED=0
@ -1301,7 +1301,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
############################################
# X86 carryless multiply code generation
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mpclmul adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mpclmul adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
X86_PCLMUL=1
fi
@ -1312,8 +1312,8 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
echo "Testing: X86 carryless multiply code generation" | tee -a "$TEST_RESULTS"
echo
OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0
FAILED=0
@ -1339,11 +1339,11 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
############################################
# Test RDRAND and RDSEED code generation
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdrnd adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdrnd adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
X86_RDRAND=1
fi
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdseed adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdseed adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
X86_RDSEED=1
fi
@ -1355,7 +1355,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
echo
OBJFILE=rdrand.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0
FAILED=0
@ -1385,7 +1385,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
############################################
# X86 SHA code generation
"$CXX" -DCRYPTOPP_ADHOC_MAIN -msha adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -msha adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
X86_SHA=1
fi
@ -1397,7 +1397,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
echo
OBJFILE=sha-simd.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0
FAILED=0
@ -1465,7 +1465,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
echo "Testing: ARM NEON code generation" | tee -a "$TEST_RESULTS"
echo
OBJFILE=aria.o; rm -f "$OBJFILE" 2>/dev/null
OBJFILE=aria-simd.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0
@ -1515,7 +1515,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
############################################
# ARM CRC32 code generation
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crc adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crc adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
ARM_CRC32=1
fi
@ -1565,7 +1565,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
############################################
# ARM carryless multiply code generation
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
ARM_PMULL=1
fi
@ -1576,7 +1576,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
echo "Testing: ARM carryless multiply code generation" | tee -a "$TEST_RESULTS"
echo
OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null
OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0
@ -1603,7 +1603,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
############################################
# ARM SHA code generation
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
ARM_SHA=1
fi
@ -5098,7 +5098,7 @@ fi
if [[ ("$CLANG_COMPILER" -eq "0") ]]; then
CLANG_CXX=$(which clang++ 2>&1 | "$GREP" -v "no clang++" | head -1)
"$CLANG_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$CLANG_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
############################################
@ -5133,7 +5133,7 @@ fi
if [[ ("$GCC_COMPILER" -eq "0") ]]; then
GCC_CXX=$(which g++ 2>&1 | "$GREP" -v "no g++" | head -1)
"$GCC_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$GCC_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
############################################
@ -5171,7 +5171,7 @@ if [[ ("$INTEL_COMPILER" -eq "0") ]]; then
if [[ (-z "$INTEL_CXX") ]]; then
INTEL_CXX=$(find /opt/intel -name icpc 2>/dev/null | "$GREP" -iv composer | head -1)
fi
"$INTEL_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$INTEL_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
############################################
@ -5207,7 +5207,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-4*' 2>/dev/null | head -1)
if [[ (! -z "$MACPORTS_CXX") ]]; then
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
############################################
@ -5240,7 +5240,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-5*' 2>/dev/null | head -1)
if [[ (! -z "$MACPORTS_CXX") ]]; then
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
############################################
@ -5273,7 +5273,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-6*' 2>/dev/null | head -1)
if [[ (! -z "$MACPORTS_CXX") ]]; then
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
############################################
@ -5306,7 +5306,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-7*' 2>/dev/null | head -1)
if [[ (! -z "$MACPORTS_CXX") ]]; then
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
############################################
@ -5339,7 +5339,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.7*' 2>/dev/null | head -1)
if [[ (! -z "$MACPORTS_CXX") ]]; then
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
############################################
@ -5371,7 +5371,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.8*' 2>/dev/null | head -1)
if [[ (! -z "$MACPORTS_CXX") ]]; then
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
############################################
@ -5403,7 +5403,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.9*' 2>/dev/null | head -1)
if [[ (! -z "$MACPORTS_CXX") ]]; then
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
############################################
@ -5435,7 +5435,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-4*' 2>/dev/null | head -1)
if [[ (! -z "$MACPORTS_CXX") ]]; then
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
############################################

View File

@ -517,7 +517,7 @@ NAMESPACE_END
// Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under most toolchains.
#if !defined(CRYPTOPP_ARM_NEON_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
# if defined(__ARM_NEON__) || defined(__ARM_NEON) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) || \
# if defined(__ARM_NEON__) || defined(__ARM_FEATURE_NEON) || (CRYPTOPP_MSC_VER >= 1900) || \
(CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
# define CRYPTOPP_ARM_NEON_AVAILABLE 1
# endif
@ -528,9 +528,8 @@ NAMESPACE_END
// Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_ARMV8A_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
# if defined(__ARM_FEATURE_CRC32) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \
(defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \
(defined(__AARCH32EL__) || defined(__AARCH64EL__))
# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VER >= 2000) || \
(CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
# define CRYPTOPP_ARMV8A_CRC32_AVAILABLE 1
# endif
#endif
@ -540,9 +539,8 @@ NAMESPACE_END
// it at the moment. Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_ARMV8A_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__apple_build_version__)
# if defined(__ARM_FEATURE_CRYPTO) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \
(defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \
(defined(__AARCH32EL__) || defined(__AARCH64EL__))
# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VER >= 2000) || \
(CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
# define CRYPTOPP_ARMV8A_PMULL_AVAILABLE 1
# endif
#endif
@ -552,19 +550,17 @@ NAMESPACE_END
// Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
# if defined(__ARM_FEATURE_CRYPTO) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \
(defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \
(defined(__AARCH32EL__) || defined(__AARCH64EL__))
# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VER >= 2000) || \
(CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
# define CRYPTOPP_ARMV8A_AES_AVAILABLE 1
# define CRYPTOPP_ARMV8A_PMULL_AVAILABLE 1
# define CRYPTOPP_ARMV8A_SHA_AVAILABLE 1
# define CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE 1
# endif
#endif
// ARM CRC testing
// TODO...
#undef CRYPTOPP_ARMV8A_AES_AVAILABLE
#undef CRYPTOPP_ARMV8A_PMULL_AVAILABLE
#undef CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
#endif // ARM32, ARM64

59
cpu.cpp
View File

@ -352,36 +352,9 @@ extern "C"
};
#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
static bool TryNEON()
{
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
return CPU_TryNEON_ARM();
#else
return false;
#endif
}
static bool TryCRC32()
{
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
return CPU_TryCRC32_ARMV8();
#else
return false;
#endif
}
static bool TryPMULL()
{
#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE)
return CPU_TryPMULL_ARMV8();
#else
return false;
#endif
}
static bool TryAES()
{
#if (CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE)
#if (CRYPTOPP_ARMV8A_AES_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try
@ -432,32 +405,14 @@ static bool TryAES()
#endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
}
static bool TrySHA1()
{
#if (CRYPTOPP_ARMV8A_SHA_AVAILABLE)
return CPU_TrySHA1_ARMV8();
#else
return false;
#endif
}
static bool TrySHA2()
{
#if (CRYPTOPP_ARMV8A_SHA_AVAILABLE)
return CPU_TrySHA2_ARMV8();
#else
return false;
#endif
}
void DetectArmFeatures()
{
g_hasNEON = TryNEON();
g_hasPMULL = TryPMULL();
g_hasCRC32 = TryCRC32();
g_hasAES = TryAES();
g_hasSHA1 = TrySHA1();
g_hasSHA2 = TrySHA2();
g_hasNEON = CPU_TryNEON_ARM();
g_hasPMULL = CPU_TryPMULL_ARMV8();
g_hasCRC32 = CPU_TryCRC32_ARMV8();
g_hasAES = TryAES(); // TODO
g_hasSHA1 = CPU_TrySHA1_ARMV8();
g_hasSHA2 = CPU_TrySHA2_ARMV8();
g_ArmDetectionDone = true;
}

View File

@ -1312,7 +1312,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
echo "Testing: X86 carryless multiply code generation" | tee -a "$TEST_RESULTS"
echo
OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null
OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0
@ -1576,7 +1576,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
echo "Testing: ARM carryless multiply code generation" | tee -a "$TEST_RESULTS"
echo
OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null
OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0

View File

@ -26,6 +26,121 @@
# include <setjmp.h>
#endif
ANONYMOUS_NAMESPACE_BEGIN
// GCC 4.8 and 4.9 are missing PMULL gear
#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE)
# if (CRYPTOPP_GCC_VERSION >= 40800) && (CRYPTOPP_GCC_VERSION < 50000)
inline poly128_t VMULL_P64(poly64_t a, poly64_t b)
{
return __builtin_aarch64_crypto_pmulldi_ppp (a, b);
}
inline poly128_t VMULL_HIGH_P64(poly64x2_t a, poly64x2_t b)
{
return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
}
# endif
#endif
#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_ARMV8A_PMULL_AVAILABLE
#if defined(__GNUC__)
// Schneiders, Hovsmith and O'Rourke used this trick.
// It results in much better code generation in production code
// by avoiding D-register spills when using vgetq_lane_u64. The
// problem does not surface under minimal test cases.
inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
:"=w" (r) : "w" (a), "w" (b) );
return r;
}
inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
:"=w" (r) : "w" (a), "w" (vget_high_u64(b)) );
return r;
}
inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
:"=w" (r) : "w" (vget_high_u64(a)), "w" (b) );
return r;
}
inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("pmull2 %0.1q, %1.2d, %2.2d \n\t"
:"=w" (r) : "w" (a), "w" (b) );
return r;
}
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
{
uint64x2_t r;
__asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t"
:"=w" (r) : "w" (a), "w" (b), "I" (c) );
return r;
}
// https://github.com/weidai11/cryptopp/issues/366
template <unsigned int C>
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t"
:"=w" (r) : "w" (a), "w" (b), "I" (C) );
return r;
}
#endif // GCC and compatibles
#if defined(_MSC_VER)
inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
{
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
}
inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
{
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
}
inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
{
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
}
inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
{
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
}
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
{
return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), c);
}
// https://github.com/weidai11/cryptopp/issues/366
template <unsigned int C>
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
{
return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C);
}
#endif // Microsoft and compatibles
#endif // CRYPTOPP_ARMV8A_PMULL_AVAILABLE
ANONYMOUS_NAMESPACE_END
NAMESPACE_BEGIN(CryptoPP)
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
@ -71,7 +186,7 @@ bool CPU_TryPMULL_ARMV8()
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true;
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL);
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
if (oldHandler == SIG_ERR)
return false;
@ -79,7 +194,7 @@ bool CPU_TryPMULL_ARMV8()
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoPMULL))
if (setjmp(s_jmpSIGILL))
result = false;
else
{
@ -87,8 +202,8 @@ bool CPU_TryPMULL_ARMV8()
const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
const poly128_t r1 = vmull_p64(a1, b1);
const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2));
const poly128_t r1 = VMULL_P64(a1, b1);
const poly128_t r2 = VMULL_HIGH_P64((poly64x2_t)(a2), (poly64x2_t)(b2));
// Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233.
const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum}
@ -115,4 +230,116 @@ void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c)
}
#endif
#if CRYPTOPP_ARMV8A_PMULL_AVAILABLE
ANONYMOUS_NAMESPACE_BEGIN
CRYPTOPP_ALIGN_DATA(16)
const word64 s_clmulConstants64[] = {
W64LIT(0xe100000000000000), W64LIT(0xc200000000000000), // Used for ARM and x86; polynomial coefficients
W64LIT(0x08090a0b0c0d0e0f), W64LIT(0x0001020304050607), // Unused for ARM; used for x86 _mm_shuffle_epi8
W64LIT(0x0001020304050607), W64LIT(0x08090a0b0c0d0e0f) // Unused for ARM; used for x86 _mm_shuffle_epi8
};
const uint64x2_t *s_clmulConstants = (const uint64x2_t *)s_clmulConstants64;
const unsigned int s_clmulTableSizeInBlocks = 8;
ANONYMOUS_NAMESPACE_END
uint64x2_t GCM_Reduce_ARMV8A(uint64x2_t c0, uint64x2_t c1, uint64x2_t c2, const uint64x2_t &r)
{
c1 = veorq_u64(c1, VEXT_U8<8>(vdupq_n_u64(0), c0));
c1 = veorq_u64(c1, PMULL_01(c0, r));
c0 = VEXT_U8<8>(c0, vdupq_n_u64(0));
c0 = vshlq_n_u64(veorq_u64(c0, c1), 1);
c0 = PMULL_00(c0, r);
c2 = veorq_u64(c2, c0);
c2 = veorq_u64(c2, VEXT_U8<8>(c1, vdupq_n_u64(0)));
c1 = vshrq_n_u64(vcombine_u64(vget_low_u64(c1), vget_low_u64(c2)), 63);
c2 = vshlq_n_u64(c2, 1);
return veorq_u64(c2, c1);
}
uint64x2_t GCM_Multiply_ARMV8A(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r)
{
const uint64x2_t c0 = PMULL_00(x, h);
const uint64x2_t c1 = veorq_u64(PMULL_10(x, h), PMULL_01(x, h));
const uint64x2_t c2 = PMULL_11(x, h);
return GCM_Reduce_ARMV8A(c0, c1, c2, r);
}
size_t GCM_AuthenticateBlocks_ARMV8(const byte *data, size_t len, const byte *mtable, byte *hbuffer)
{
const uint64x2_t* table = reinterpret_cast<const uint64x2_t*>(mtable);
uint64x2_t x = vreinterpretq_u64_u8(vld1q_u8(hbuffer));
const uint64x2_t r = s_clmulConstants[0];
const size_t BLOCKSIZE = 16;
while (len >= BLOCKSIZE)
{
size_t s = UnsignedMin(len/BLOCKSIZE, s_clmulTableSizeInBlocks), i=0;
uint64x2_t d1, d2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-1)*BLOCKSIZE)));
uint64x2_t c0 = vdupq_n_u64(0);
uint64x2_t c1 = vdupq_n_u64(0);
uint64x2_t c2 = vdupq_n_u64(0);
while (true)
{
const uint64x2_t h0 = vld1q_u64((const uint64_t*)(table+i));
const uint64x2_t h1 = vld1q_u64((const uint64_t*)(table+i+1));
const uint64x2_t h2 = veorq_u64(h0, h1);
if (++i == s)
{
const uint64x2_t t1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data)));
d1 = veorq_u64(vextq_u64(t1, t1, 1), x);
c0 = veorq_u64(c0, PMULL_00(d1, h0));
c2 = veorq_u64(c2, PMULL_10(d1, h1));
d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)),
vget_low_u32(vreinterpretq_u32_u64(d1))));
c1 = veorq_u64(c1, PMULL_00(d1, h2));
break;
}
d1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8)));
c0 = veorq_u64(c0, PMULL_10(d2, h0));
c2 = veorq_u64(c2, PMULL_10(d1, h1));
d2 = veorq_u64(d2, d1);
c1 = veorq_u64(c1, PMULL_10(d2, h2));
if (++i == s)
{
const uint64x2_t t2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data)));
d1 = veorq_u64(vextq_u64(t2, t2, 1), x);
c0 = veorq_u64(c0, PMULL_01(d1, h0));
c2 = veorq_u64(c2, PMULL_11(d1, h1));
d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)),
vget_low_u32(vreinterpretq_u32_u64(d1))));
c1 = veorq_u64(c1, PMULL_01(d1, h2));
break;
}
const uint64x2_t t3 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8)));
d2 = vextq_u64(t3, t3, 1);
c0 = veorq_u64(c0, PMULL_01(d1, h0));
c2 = veorq_u64(c2, PMULL_01(d2, h1));
d1 = veorq_u64(d1, d2);
c1 = veorq_u64(c1, PMULL_01(d1, h2));
}
data += s*16;
len -= s*16;
c1 = veorq_u64(veorq_u64(c1, c0), c2);
x = GCM_Reduce_ARMV8A(c0, c1, c2, r);
}
vst1q_u64(reinterpret_cast<uint64_t *>(hbuffer), x);
return len;
}
#endif
NAMESPACE_END

199
gcm.cpp
View File

@ -53,102 +53,6 @@ NAMESPACE_BEGIN(CryptoPP)
extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
#endif
#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_ARMV8A_PMULL_AVAILABLE
#if defined(__GNUC__)
// Schneiders, Hovsmith and O'Rourke used this trick.
// It results in much better code generation in production code
// by avoiding D-register spills when using vgetq_lane_u64. The
// problem does not surface under minimal test cases.
inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
:"=w" (r) : "w" (a), "w" (b) );
return r;
}
inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
:"=w" (r) : "w" (a), "w" (vget_high_u64(b)) );
return r;
}
inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
:"=w" (r) : "w" (vget_high_u64(a)), "w" (b) );
return r;
}
inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("pmull2 %0.1q, %1.2d, %2.2d \n\t"
:"=w" (r) : "w" (a), "w" (b) );
return r;
}
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
{
uint64x2_t r;
__asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t"
:"=w" (r) : "w" (a), "w" (b), "I" (c) );
return r;
}
// https://github.com/weidai11/cryptopp/issues/366
template <unsigned int C>
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
{
uint64x2_t r;
__asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t"
:"=w" (r) : "w" (a), "w" (b), "I" (C) );
return r;
}
#endif // GCC and compatibles
#if defined(_MSC_VER)
inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
{
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
}
inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
{
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
}
inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
{
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
}
inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
{
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
}
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
{
return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), c);
}
// https://github.com/weidai11/cryptopp/issues/366
template <unsigned int C>
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
{
return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C);
}
#endif // Microsoft and compatibles
#endif // CRYPTOPP_ARMV8A_PMULL_AVAILABLE
word16 GCM_Base::s_reductionTable[256];
volatile bool GCM_Base::s_reductionTableInitialized = false;
@ -278,6 +182,9 @@ inline __m128i CLMUL_GF_Mul(const __m128i &x, const __m128i &h, const __m128i &r
#if CRYPTOPP_ARMV8A_PMULL_AVAILABLE
extern size_t GCM_AuthenticateBlocks_ARMV8(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
extern uint64x2_t GCM_Multiply_ARMV8A(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r);
CRYPTOPP_ALIGN_DATA(16)
static const word64 s_clmulConstants64[] = {
W64LIT(0xe100000000000000), W64LIT(0xc200000000000000), // Used for ARM and x86; polynomial coefficients
@ -287,31 +194,6 @@ static const word64 s_clmulConstants64[] = {
static const uint64x2_t *s_clmulConstants = (const uint64x2_t *)s_clmulConstants64;
static const unsigned int s_clmulTableSizeInBlocks = 8;
inline uint64x2_t PMULL_Reduce(uint64x2_t c0, uint64x2_t c1, uint64x2_t c2, const uint64x2_t &r)
{
// See comments fo CLMUL_Reduce
c1 = veorq_u64(c1, VEXT_U8<8>(vdupq_n_u64(0), c0));
c1 = veorq_u64(c1, PMULL_01(c0, r));
c0 = VEXT_U8<8>(c0, vdupq_n_u64(0));
c0 = vshlq_n_u64(veorq_u64(c0, c1), 1);
c0 = PMULL_00(c0, r);
c2 = veorq_u64(c2, c0);
c2 = veorq_u64(c2, VEXT_U8<8>(c1, vdupq_n_u64(0)));
c1 = vshrq_n_u64(vcombine_u64(vget_low_u64(c1), vget_low_u64(c2)), 63);
c2 = vshlq_n_u64(c2, 1);
return veorq_u64(c2, c1);
}
inline uint64x2_t PMULL_GF_Mul(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r)
{
const uint64x2_t c0 = PMULL_00(x, h);
const uint64x2_t c1 = veorq_u64(PMULL_10(x, h), PMULL_01(x, h));
const uint64x2_t c2 = PMULL_11(x, h);
return PMULL_Reduce(c0, c1, c2, r);
}
#endif
void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const NameValuePairs &params)
@ -388,15 +270,15 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
uint64x2_t h = h0;
for (i=0; i<tableSize-32; i+=32)
{
const uint64x2_t h1 = PMULL_GF_Mul(h, h0, r);
const uint64x2_t h1 = GCM_Multiply_ARMV8A(h, h0, r);
vst1_u64((uint64_t *)(table+i), vget_low_u64(h));
vst1q_u64((uint64_t *)(table+i+16), h1);
vst1q_u64((uint64_t *)(table+i+8), h);
vst1_u64((uint64_t *)(table+i+8), vget_low_u64(h1));
h = PMULL_GF_Mul(h1, h0, r);
h = GCM_Multiply_ARMV8A(h1, h0, r);
}
const uint64x2_t h1 = PMULL_GF_Mul(h, h0, r);
const uint64x2_t h1 = GCM_Multiply_ARMV8A(h, h0, r);
vst1_u64((uint64_t *)(table+i), vget_low_u64(h));
vst1q_u64((uint64_t *)(table+i+16), h1);
vst1q_u64((uint64_t *)(table+i+8), h);
@ -667,73 +549,8 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
#elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
if (HasPMULL())
{
const uint64x2_t *table = (const uint64x2_t *)MulTable();
uint64x2_t x = vreinterpretq_u64_u8(vld1q_u8(HashBuffer()));
const uint64x2_t r = s_clmulConstants[0];
while (len >= 16)
{
size_t s = UnsignedMin(len/16, s_clmulTableSizeInBlocks), i=0;
uint64x2_t d1, d2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-1)*16)));
uint64x2_t c0 = vdupq_n_u64(0);
uint64x2_t c1 = vdupq_n_u64(0);
uint64x2_t c2 = vdupq_n_u64(0);
while (true)
{
const uint64x2_t h0 = vld1q_u64((const uint64_t*)(table+i));
const uint64x2_t h1 = vld1q_u64((const uint64_t*)(table+i+1));
const uint64x2_t h2 = veorq_u64(h0, h1);
if (++i == s)
{
const uint64x2_t t1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data)));
d1 = veorq_u64(vextq_u64(t1, t1, 1), x);
c0 = veorq_u64(c0, PMULL_00(d1, h0));
c2 = veorq_u64(c2, PMULL_10(d1, h1));
d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)),
vget_low_u32(vreinterpretq_u32_u64(d1))));
c1 = veorq_u64(c1, PMULL_00(d1, h2));
break;
}
d1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8)));
c0 = veorq_u64(c0, PMULL_10(d2, h0));
c2 = veorq_u64(c2, PMULL_10(d1, h1));
d2 = veorq_u64(d2, d1);
c1 = veorq_u64(c1, PMULL_10(d2, h2));
if (++i == s)
{
const uint64x2_t t2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data)));
d1 = veorq_u64(vextq_u64(t2, t2, 1), x);
c0 = veorq_u64(c0, PMULL_01(d1, h0));
c2 = veorq_u64(c2, PMULL_11(d1, h1));
d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)),
vget_low_u32(vreinterpretq_u32_u64(d1))));
c1 = veorq_u64(c1, PMULL_01(d1, h2));
break;
}
const uint64x2_t t3 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8)));
d2 = vextq_u64(t3, t3, 1);
c0 = veorq_u64(c0, PMULL_01(d1, h0));
c2 = veorq_u64(c2, PMULL_01(d2, h1));
d1 = veorq_u64(d1, d2);
c1 = veorq_u64(c1, PMULL_01(d1, h2));
}
data += s*16;
len -= s*16;
c1 = veorq_u64(veorq_u64(c1, c0), c2);
x = PMULL_Reduce(c0, c1, c2, r);
}
vst1q_u64((uint64_t *)HashBuffer(), x);
return len;
}
return GCM_AuthenticateBlocks_ARMV8(data, len, MulTable(), HashBuffer());
}
#endif
typedef BlockGetAndPut<word64, NativeByteOrder> Block;