Cleaned up ARM related defines, like CRYPTOPP_ARM_NEON_AVAILABLE
We only need to base it on the compiler in config.h. config.h activates the code path guarded by HasNEON(). The source file that actially provides the NEON implementation will be compiled with -fpu=neon or -march=armv8-a. Since we are providing the specialized implementation in a sequestered source file (and not a header file), we can probably avoid the defines like CRYPTOPP_ARM_NEON_AVAILABLE altogether.pull/461/head
parent
b4f6882237
commit
6169b5d4d6
45
GNUmakefile
45
GNUmakefile
|
|
@ -202,6 +202,7 @@ endif # CXXFLAGS
|
|||
ifeq ($(findstring -DCRYPTOPP_DISABLE_SSE4,$(CXXFLAGS)),)
|
||||
SSE42_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -msse4.2 -dM -E - | grep -i -c -q __SSE4_2__ && echo "-msse4.2")
|
||||
ifeq ($(findstring -DCRYPTOPP_DISABLE_AESNI,$(CXXFLAGS)),)
|
||||
GCM_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -mpclmul -dM -E - | grep -i -c -q __PCLMUL__ && echo "-mpclmul")
|
||||
AES_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -maes -dM -E - | grep -i -c -q __AES__ && echo "-maes")
|
||||
ifeq ($(findstring -DCRYPTOPP_DISABLE_SHA,$(CXXFLAGS)),)
|
||||
SHA_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -msse4.2 -msha -dM -E - | grep -i -c -q __SHA__ && echo "-msse4.2 -msha")
|
||||
|
|
@ -308,14 +309,16 @@ ifeq ($(IS_NEON),1)
|
|||
endif
|
||||
|
||||
ifeq ($(IS_ARMV8),1)
|
||||
ARMV8A_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv8-a")
|
||||
CRC_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crc -dM -E - | grep -i -c -q __ARM_FEATURE_CRC32 && echo "-march=armv8-a+crc")
|
||||
AES_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crypto -dM -E - | grep -i -c -q __ARM_FEATURE_CRYPTO && echo "-march=armv8-a+crypto")
|
||||
SHA_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crypto -dM -E - | grep -i -c -q __ARM_FEATURE_CRYPTO && echo "-march=armv8-a+crypto")
|
||||
GCM_FLAG = $(ARMV8A_FLAG)
|
||||
ARIA_FLAG = $(ARMV8A_FLAG)
|
||||
BLAKE2_FLAG = $(ARMV8A_FLAG)
|
||||
NEON_FLAG = $(ARMV8A_FLAG)
|
||||
ARMV8A_NEON_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv8-a")
|
||||
ARMV8A_CRC_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crc -dM -E - | grep -i -c -q __ARM_FEATURE_CRC32 && echo "-march=armv8-a+crc")
|
||||
ARMV8A_CRYPTO_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv8-a+crypto -dM -E - | grep -i -c -q __ARM_FEATURE_CRYPTO && echo "-march=armv8-a+crypto")
|
||||
CRC_FLAG = $(ARMV8A_CRC_FLAG)
|
||||
AES_FLAG = $(ARMV8A_CRYPTO_FLAG)
|
||||
GCM_FLAG = $(ARMV8A_CRYPTO_FLAG)
|
||||
SHA_FLAG = $(ARMV8A_CRYPTO_FLAG)
|
||||
ARIA_FLAG = $(ARMV8A_NEON_FLAG)
|
||||
BLAKE2_FLAG = $(ARMV8A_NEON_FLAG)
|
||||
NEON_FLAG = $(ARMV8A_NEON_FLAG)
|
||||
endif
|
||||
|
||||
endif # IS_X86
|
||||
|
|
@ -851,7 +854,7 @@ endif # Dependencies
|
|||
# Run rdrand-nasm.sh to create the object files
|
||||
ifeq ($(USE_NASM),1)
|
||||
rdrand.o: rdrand.h rdrand.cpp rdrand.s
|
||||
$(CXX) $(strip $(CXXFLAGS)) -DNASM_RDRAND_ASM_AVAILABLE=1 -DNASM_RDSEED_ASM_AVAILABLE=1 -c rdrand.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS) -DNASM_RDRAND_ASM_AVAILABLE=1 -DNASM_RDSEED_ASM_AVAILABLE=1 -c rdrand.cpp)
|
||||
rdrand-%.o:
|
||||
./rdrand-nasm.sh
|
||||
endif
|
||||
|
|
@ -884,49 +887,49 @@ sha-simd.o : sha-simd.cpp
|
|||
# Also see https://stackoverflow.com/q/12983137/608639.
|
||||
ifeq ($(findstring true,$(CI)),true)
|
||||
threefish.o : threefish.cpp
|
||||
$(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS))) -c $<
|
||||
$(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS)) -c) $<
|
||||
endif
|
||||
|
||||
# Don't build Rijndael with UBsan. Too much noise due to unaligned data accesses.
|
||||
ifneq ($(findstring -fsanitize=undefined,$(CXXFLAGS)),)
|
||||
rijndael.o : rijndael.cpp
|
||||
$(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS))) -c $<
|
||||
$(CXX) $(strip $(subst -fsanitize=undefined,,$(CXXFLAGS)) -c) $<
|
||||
endif
|
||||
|
||||
# Don't build VMAC and friends with Asan. Too many false positives.
|
||||
ifneq ($(findstring -fsanitize=address,$(CXXFLAGS)),)
|
||||
vmac.o : vmac.cpp
|
||||
$(CXX) $(strip $(subst -fsanitize=address,,$(CXXFLAGS))) -c $<
|
||||
$(CXX) $(strip $(subst -fsanitize=address,,$(CXXFLAGS)) -c) $<
|
||||
endif
|
||||
|
||||
# Only use CRYPTOPP_DATA_DIR if its not set in CXXFLAGS
|
||||
ifeq ($(findstring -DCRYPTOPP_DATA_DIR, $(strip $(CXXFLAGS))),)
|
||||
ifneq ($(strip $(CRYPTOPP_DATA_DIR)),)
|
||||
validat%.o : validat%.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $<
|
||||
$(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $<
|
||||
bench%.o : bench%.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $<
|
||||
$(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $<
|
||||
datatest.o : datatest.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $<
|
||||
$(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $<
|
||||
test.o : test.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c $<
|
||||
$(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DATA_DIR=\"$(CRYPTOPP_DATA_DIR)\" -c) $<
|
||||
endif
|
||||
endif
|
||||
|
||||
%.dllonly.o : %.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_DLL_ONLY -c $< -o $@
|
||||
$(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_DLL_ONLY -c) $< -o $@
|
||||
|
||||
%.import.o : %.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_IMPORTS -c $< -o $@
|
||||
$(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_IMPORTS -c) $< -o $@
|
||||
|
||||
%.export.o : %.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS)) -DCRYPTOPP_EXPORTS -c $< -o $@
|
||||
$(CXX) $(strip $(CXXFLAGS) -DCRYPTOPP_EXPORTS -c) $< -o $@
|
||||
|
||||
%.bc : %.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS)) -c $<
|
||||
$(CXX) $(strip $(CXXFLAGS) -c) $<
|
||||
|
||||
%.o : %.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS)) -c $<
|
||||
$(CXX) $(strip $(CXXFLAGS) -c) $<
|
||||
|
||||
.PHONY: so_warning
|
||||
so_warning:
|
||||
|
|
|
|||
|
|
@ -250,15 +250,15 @@ if [[ ("$SUNCC_510_OR_ABOVE" -ne "0") ]]; then
|
|||
HAVE_OFAST=0
|
||||
fi
|
||||
|
||||
if [[ (-z "$TMP") ]]; then
|
||||
if [[ (-z "$TMPDIR") ]]; then
|
||||
if [[ (-d "/tmp") ]]; then
|
||||
TMP=/tmp
|
||||
TMPDIR=/tmp
|
||||
elif [[ (-d "/temp") ]]; then
|
||||
TMP=/temp
|
||||
TMPDIR=/temp
|
||||
elif [[ (-d "$HOME/tmp") ]]; then
|
||||
TMP="$HOME/tmp"
|
||||
TMPDIR="$HOME/tmp"
|
||||
else
|
||||
echo "Please set TMP to a valid directory"
|
||||
echo "Please set TMPDIR to a valid directory"
|
||||
[[ "$0" = "$BASH_SOURCE" ]] && exit 1 || return 1
|
||||
fi
|
||||
fi
|
||||
|
|
@ -267,74 +267,74 @@ fi
|
|||
rm -f adhoc.cpp > /dev/null 2>&1
|
||||
cp adhoc.cpp.proto adhoc.cpp
|
||||
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ (-z "$HAVE_CXX17") ]]; then
|
||||
HAVE_CXX17=0
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++17 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++17 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
HAVE_CXX17=1
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ (-z "$HAVE_GNU17") ]]; then
|
||||
HAVE_GNU17=0
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++17 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++17 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
HAVE_GNU17=1
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ (-z "$HAVE_CXX14") ]]; then
|
||||
HAVE_CXX14=0
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++14 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++14 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
HAVE_CXX14=1
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ (-z "$HAVE_GNU14") ]]; then
|
||||
HAVE_GNU14=0
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++14 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++14 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
HAVE_GNU14=1
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ (-z "$HAVE_CXX11") ]]; then
|
||||
HAVE_CXX11=0
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++11 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++11 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
HAVE_CXX11=1
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ (-z "$HAVE_GNU11") ]]; then
|
||||
HAVE_GNU11=0
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++11 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++11 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
HAVE_GNU11=1
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ (-z "$HAVE_CXX03") ]]; then
|
||||
HAVE_CXX03=0
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++03 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++03 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
HAVE_CXX03=1
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ (-z "$HAVE_GNU03") ]]; then
|
||||
HAVE_GNU03=0
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++03 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++03 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
HAVE_GNU03=1
|
||||
fi
|
||||
|
|
@ -342,13 +342,13 @@ fi
|
|||
|
||||
# Use a fallback strategy so OPT_O0 can be used with DEBUG_CXXFLAGS
|
||||
OPT_O0=
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O0 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O0 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
OPT_O0=-O0
|
||||
else
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO0 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO0 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
OPT_O0=-xO0
|
||||
fi
|
||||
|
|
@ -356,13 +356,13 @@ fi
|
|||
|
||||
# Use a fallback strategy so OPT_O1 can be used with VALGRIND_CXXFLAGS
|
||||
OPT_O1=
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O1 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O1 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
OPT_O1=-O1
|
||||
else
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO1 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO1 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
OPT_O1=-xO1
|
||||
fi
|
||||
|
|
@ -370,13 +370,13 @@ fi
|
|||
|
||||
# Use a fallback strategy so OPT_O2 can be used with RELEASE_CXXFLAGS
|
||||
OPT_O2=
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
OPT_O2=-O2
|
||||
else
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
OPT_O2=-xO2
|
||||
fi
|
||||
|
|
@ -385,14 +385,14 @@ fi
|
|||
if [[ (-z "$HAVE_O3") ]]; then
|
||||
HAVE_O3=0
|
||||
OPT_O3=
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
HAVE_O3=1
|
||||
OPT_O3=-O3
|
||||
else
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
HAVE_O3=1
|
||||
OPT_O3=-xO3
|
||||
|
|
@ -404,14 +404,14 @@ fi
|
|||
if [[ ( (-z "$HAVE_O5") && ("$CLANG_COMPILER" -eq "0") ) ]]; then
|
||||
HAVE_O5=0
|
||||
OPT_O5=
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O5 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -O5 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
HAVE_O5=1
|
||||
OPT_O5=-O5
|
||||
else
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO5 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xO5 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
HAVE_O5=1
|
||||
OPT_O5=-xO5
|
||||
|
|
@ -423,8 +423,8 @@ fi
|
|||
if [[ (-z "$HAVE_OS") ]]; then
|
||||
HAVE_OS=0
|
||||
OPT_OS=
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -Os adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -Os adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
HAVE_OS=1
|
||||
OPT_OS=-Os
|
||||
|
|
@ -435,8 +435,8 @@ fi
|
|||
if [[ (-z "$HAVE_OFAST") ]]; then
|
||||
HAVE_OFAST=0
|
||||
OPT_OFAST=
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -Ofast adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -Ofast adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
HAVE_OFAST=1
|
||||
OPT_OFAST=-Ofast
|
||||
|
|
@ -445,13 +445,13 @@ fi
|
|||
|
||||
# Use a fallback strategy so OPT_G2 can be used with RELEASE_CXXFLAGS
|
||||
OPT_G2=
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
OPT_G2=-g2
|
||||
else
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
OPT_G2=-g
|
||||
fi
|
||||
|
|
@ -459,13 +459,13 @@ fi
|
|||
|
||||
# Use a fallback strategy so OPT_G3 can be used with DEBUG_CXXFLAGS
|
||||
OPT_G3=
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
OPT_G3=-g3
|
||||
else
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
OPT_G3=-g
|
||||
fi
|
||||
|
|
@ -473,10 +473,10 @@ fi
|
|||
|
||||
# Cygwin and noisy compiles
|
||||
OPT_PIC=
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ (-z "$HAVE_PIC") ]]; then
|
||||
HAVE_PIC=0
|
||||
PIC_PROBLEMS=$("$CXX" -DCRYPTOPP_ADHOC_MAIN -fPIC adhoc.cpp -o "$TMP/adhoc.exe" 2>&1 | "$EGREP" -ic '(warning|error)')
|
||||
PIC_PROBLEMS=$("$CXX" -DCRYPTOPP_ADHOC_MAIN -fPIC adhoc.cpp -o "$TMPDIR/adhoc.exe" 2>&1 | "$EGREP" -ic '(warning|error)')
|
||||
if [[ "$PIC_PROBLEMS" -eq "0" ]]; then
|
||||
HAVE_PIC=1
|
||||
OPT_PIC=-fPIC
|
||||
|
|
@ -484,12 +484,12 @@ if [[ (-z "$HAVE_PIC") ]]; then
|
|||
fi
|
||||
|
||||
# GCC 4.8; Clang 3.4
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ (-z "$HAVE_UBSAN") ]]; then
|
||||
HAVE_UBSAN=0
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=undefined adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=undefined adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
"$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
HAVE_UBSAN=1
|
||||
fi
|
||||
|
|
@ -497,12 +497,12 @@ if [[ (-z "$HAVE_UBSAN") ]]; then
|
|||
fi
|
||||
|
||||
# GCC 4.8; Clang 3.4
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ (-z "$HAVE_ASAN") ]]; then
|
||||
HAVE_ASAN=0
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=address adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=address adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
"$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
HAVE_ASAN=1
|
||||
fi
|
||||
|
|
@ -510,41 +510,41 @@ if [[ (-z "$HAVE_ASAN") ]]; then
|
|||
fi
|
||||
|
||||
# GCC 6.0; maybe Clang
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ (-z "$HAVE_BSAN") ]]; then
|
||||
HAVE_BSAN=0
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=bounds-strict adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=bounds-strict adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
"$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
HAVE_BSAN=1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ (-z "$HAVE_OMP") ]]; then
|
||||
HAVE_OMP=0
|
||||
if [[ "$GCC_COMPILER" -ne "0" ]]; then
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
HAVE_OMP=1
|
||||
OMP_FLAGS=(-fopenmp -O3)
|
||||
fi
|
||||
elif [[ "$INTEL_COMPILER" -ne "0" ]]; then
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -openmp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -openmp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
HAVE_OMP=1
|
||||
OMP_FLAGS=(-openmp -O3)
|
||||
fi
|
||||
elif [[ "$CLANG_COMPILER" -ne "0" ]]; then
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp=libomp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp=libomp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
HAVE_OMP=1
|
||||
OMP_FLAGS=(-fopenmp=libomp -O3)
|
||||
fi
|
||||
elif [[ "$SUN_COMPILER" -ne "0" ]]; then
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xopenmp=parallel -xO3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -xopenmp=parallel -xO3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
HAVE_OMP=1
|
||||
OMP_FLAGS=(-xopenmp=parallel -xO3)
|
||||
|
|
@ -552,33 +552,33 @@ if [[ (-z "$HAVE_OMP") ]]; then
|
|||
fi
|
||||
fi
|
||||
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ (-z "$HAVE_INTEL_MULTIARCH") ]]; then
|
||||
HAVE_INTEL_MULTIARCH=0
|
||||
if [[ ("$IS_DARWIN" -ne "0") && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0") ]]; then
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
HAVE_INTEL_MULTIARCH=1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ (-z "$HAVE_PPC_MULTIARCH") ]]; then
|
||||
HAVE_PPC_MULTIARCH=0
|
||||
if [[ ("$IS_DARWIN" -ne "0") && ("$IS_PPC" -ne "0") ]]; then
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -arch ppc -arch ppc64 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -arch ppc -arch ppc64 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
HAVE_PPC_MULTIARCH=1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ (-z "$HAVE_X32") ]]; then
|
||||
HAVE_X32=0
|
||||
if [[ "$IS_X32" -ne "0" ]]; then
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mx32 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mx32 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
HAVE_X32=1
|
||||
fi
|
||||
|
|
@ -588,8 +588,8 @@ fi
|
|||
# Hit or miss, mostly hit
|
||||
if [[ (-z "$HAVE_NATIVE_ARCH") ]]; then
|
||||
HAVE_NATIVE_ARCH=0
|
||||
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=native adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=native adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ ("$?" -eq "0") ]]; then
|
||||
HAVE_NATIVE_ARCH=1
|
||||
fi
|
||||
|
|
@ -603,7 +603,7 @@ if [[ (-z "$HAVE_LDGOLD") ]]; then
|
|||
if [[ (! -z "$LD_GOLD") && (! -z "$ELF_FILE") ]]; then
|
||||
LD_GOLD=$(file "$LD_GOLD" | cut -d":" -f 2 | "$EGREP" -i -c "elf")
|
||||
if [[ ("$LD_GOLD" -ne "0") ]]; then
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fuse-ld=gold adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -fuse-ld=gold adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
HAVE_LDGOLD=1
|
||||
fi
|
||||
|
|
@ -688,10 +688,10 @@ fi
|
|||
|
||||
# Used to disassemble object modules so we can verify some aspects of code generation
|
||||
if [[ (-z "$HAVE_DISASS") ]]; then
|
||||
echo "int main(int argc, char* argv[]) {return 0;}" > "$TMP/test.cc"
|
||||
"$CXX" "$TMP/test.cc" -o "$TMP/test.exe" > /dev/null 2>&1
|
||||
echo "int main(int argc, char* argv[]) {return 0;}" > "$TMPDIR/test.cc"
|
||||
"$CXX" "$TMPDIR/test.cc" -o "$TMPDIR/test.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
"$DISASS" "${DISASSARGS[@]}" "$TMP/test.exe" > /dev/null 2>&1
|
||||
"$DISASS" "${DISASSARGS[@]}" "$TMPDIR/test.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
HAVE_DISASS=1
|
||||
else
|
||||
|
|
@ -1167,7 +1167,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
|
|||
echo
|
||||
|
||||
OBJFILE=sha.o; rm -f "$OBJFILE" 2>/dev/null
|
||||
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
|
||||
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
|
||||
|
||||
DISASS_TEXT=$("$DISASS" "${DISASSARGS[@]}" "$OBJFILE" 2>/dev/null)
|
||||
|
||||
|
|
@ -1201,7 +1201,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
|
|||
############################################
|
||||
# Test CRC-32C code generation
|
||||
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -msse4.2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -msse4.2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
X86_CRC32=1
|
||||
fi
|
||||
|
|
@ -1239,7 +1239,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
|
|||
############################################
|
||||
# Test AES-NI code generation
|
||||
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
X86_AESNI=1
|
||||
fi
|
||||
|
|
@ -1251,7 +1251,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
|
|||
echo
|
||||
|
||||
OBJFILE=rijndael.o; rm -f "$OBJFILE" 2>/dev/null
|
||||
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
|
||||
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
|
||||
|
||||
COUNT=0
|
||||
FAILED=0
|
||||
|
|
@ -1301,7 +1301,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
|
|||
############################################
|
||||
# X86 carryless multiply code generation
|
||||
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mpclmul adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mpclmul adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
X86_PCLMUL=1
|
||||
fi
|
||||
|
|
@ -1312,8 +1312,8 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
|
|||
echo "Testing: X86 carryless multiply code generation" | tee -a "$TEST_RESULTS"
|
||||
echo
|
||||
|
||||
OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null
|
||||
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
|
||||
OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null
|
||||
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
|
||||
|
||||
COUNT=0
|
||||
FAILED=0
|
||||
|
|
@ -1339,11 +1339,11 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
|
|||
############################################
|
||||
# Test RDRAND and RDSEED code generation
|
||||
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdrnd adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdrnd adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
X86_RDRAND=1
|
||||
fi
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdseed adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdseed adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
X86_RDSEED=1
|
||||
fi
|
||||
|
|
@ -1355,7 +1355,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
|
|||
echo
|
||||
|
||||
OBJFILE=rdrand.o; rm -f "$OBJFILE" 2>/dev/null
|
||||
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
|
||||
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
|
||||
|
||||
COUNT=0
|
||||
FAILED=0
|
||||
|
|
@ -1385,7 +1385,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
|
|||
############################################
|
||||
# X86 SHA code generation
|
||||
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -msha adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -msha adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
X86_SHA=1
|
||||
fi
|
||||
|
|
@ -1397,7 +1397,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
|
|||
echo
|
||||
|
||||
OBJFILE=sha-simd.o; rm -f "$OBJFILE" 2>/dev/null
|
||||
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1 -msse -msse2" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
|
||||
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
|
||||
|
||||
COUNT=0
|
||||
FAILED=0
|
||||
|
|
@ -1465,7 +1465,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
|
|||
echo "Testing: ARM NEON code generation" | tee -a "$TEST_RESULTS"
|
||||
echo
|
||||
|
||||
OBJFILE=aria.o; rm -f "$OBJFILE" 2>/dev/null
|
||||
OBJFILE=aria-simd.o; rm -f "$OBJFILE" 2>/dev/null
|
||||
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
|
||||
|
||||
COUNT=0
|
||||
|
|
@ -1515,7 +1515,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
|
|||
############################################
|
||||
# ARM CRC32 code generation
|
||||
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crc adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crc adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
ARM_CRC32=1
|
||||
fi
|
||||
|
|
@ -1565,7 +1565,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
|
|||
############################################
|
||||
# ARM carryless multiply code generation
|
||||
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
ARM_PMULL=1
|
||||
fi
|
||||
|
|
@ -1576,7 +1576,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
|
|||
echo "Testing: ARM carryless multiply code generation" | tee -a "$TEST_RESULTS"
|
||||
echo
|
||||
|
||||
OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null
|
||||
OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null
|
||||
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
|
||||
|
||||
COUNT=0
|
||||
|
|
@ -1603,7 +1603,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
|
|||
############################################
|
||||
# ARM SHA code generation
|
||||
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CXX" -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
ARM_SHA=1
|
||||
fi
|
||||
|
|
@ -5098,7 +5098,7 @@ fi
|
|||
if [[ ("$CLANG_COMPILER" -eq "0") ]]; then
|
||||
|
||||
CLANG_CXX=$(which clang++ 2>&1 | "$GREP" -v "no clang++" | head -1)
|
||||
"$CLANG_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$CLANG_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
|
||||
############################################
|
||||
|
|
@ -5133,7 +5133,7 @@ fi
|
|||
if [[ ("$GCC_COMPILER" -eq "0") ]]; then
|
||||
|
||||
GCC_CXX=$(which g++ 2>&1 | "$GREP" -v "no g++" | head -1)
|
||||
"$GCC_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$GCC_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
|
||||
############################################
|
||||
|
|
@ -5171,7 +5171,7 @@ if [[ ("$INTEL_COMPILER" -eq "0") ]]; then
|
|||
if [[ (-z "$INTEL_CXX") ]]; then
|
||||
INTEL_CXX=$(find /opt/intel -name icpc 2>/dev/null | "$GREP" -iv composer | head -1)
|
||||
fi
|
||||
"$INTEL_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$INTEL_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
|
||||
############################################
|
||||
|
|
@ -5207,7 +5207,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
|
|||
|
||||
MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-4*' 2>/dev/null | head -1)
|
||||
if [[ (! -z "$MACPORTS_CXX") ]]; then
|
||||
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
|
||||
############################################
|
||||
|
|
@ -5240,7 +5240,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
|
|||
|
||||
MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-5*' 2>/dev/null | head -1)
|
||||
if [[ (! -z "$MACPORTS_CXX") ]]; then
|
||||
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
|
||||
############################################
|
||||
|
|
@ -5273,7 +5273,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
|
|||
|
||||
MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-6*' 2>/dev/null | head -1)
|
||||
if [[ (! -z "$MACPORTS_CXX") ]]; then
|
||||
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
|
||||
############################################
|
||||
|
|
@ -5306,7 +5306,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
|
|||
|
||||
MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-7*' 2>/dev/null | head -1)
|
||||
if [[ (! -z "$MACPORTS_CXX") ]]; then
|
||||
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
|
||||
############################################
|
||||
|
|
@ -5339,7 +5339,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
|
|||
|
||||
MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.7*' 2>/dev/null | head -1)
|
||||
if [[ (! -z "$MACPORTS_CXX") ]]; then
|
||||
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
|
||||
############################################
|
||||
|
|
@ -5371,7 +5371,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
|
|||
|
||||
MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.8*' 2>/dev/null | head -1)
|
||||
if [[ (! -z "$MACPORTS_CXX") ]]; then
|
||||
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
|
||||
############################################
|
||||
|
|
@ -5403,7 +5403,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
|
|||
|
||||
MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.9*' 2>/dev/null | head -1)
|
||||
if [[ (! -z "$MACPORTS_CXX") ]]; then
|
||||
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
|
||||
############################################
|
||||
|
|
@ -5435,7 +5435,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then
|
|||
|
||||
MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-4*' 2>/dev/null | head -1)
|
||||
if [[ (! -z "$MACPORTS_CXX") ]]; then
|
||||
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1
|
||||
"$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1
|
||||
if [[ "$?" -eq "0" ]]; then
|
||||
|
||||
############################################
|
||||
|
|
|
|||
22
config.h
22
config.h
|
|
@ -517,7 +517,7 @@ NAMESPACE_END
|
|||
|
||||
// Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under most toolchains.
|
||||
#if !defined(CRYPTOPP_ARM_NEON_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
|
||||
# if defined(__ARM_NEON__) || defined(__ARM_NEON) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) || \
|
||||
# if defined(__ARM_NEON__) || defined(__ARM_FEATURE_NEON) || (CRYPTOPP_MSC_VER >= 1900) || \
|
||||
(CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
|
||||
# define CRYPTOPP_ARM_NEON_AVAILABLE 1
|
||||
# endif
|
||||
|
|
@ -528,9 +528,8 @@ NAMESPACE_END
|
|||
// Microsoft plans to support ARM-64, but its not clear how to detect it.
|
||||
// TODO: Add MSC_VER and ARM-64 platform define when available
|
||||
#if !defined(CRYPTOPP_ARMV8A_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
|
||||
# if defined(__ARM_FEATURE_CRC32) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \
|
||||
(defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \
|
||||
(defined(__AARCH32EL__) || defined(__AARCH64EL__))
|
||||
# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VER >= 2000) || \
|
||||
(CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
|
||||
# define CRYPTOPP_ARMV8A_CRC32_AVAILABLE 1
|
||||
# endif
|
||||
#endif
|
||||
|
|
@ -540,9 +539,8 @@ NAMESPACE_END
|
|||
// it at the moment. Microsoft plans to support ARM-64, but its not clear how to detect it.
|
||||
// TODO: Add MSC_VER and ARM-64 platform define when available
|
||||
#if !defined(CRYPTOPP_ARMV8A_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__apple_build_version__)
|
||||
# if defined(__ARM_FEATURE_CRYPTO) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \
|
||||
(defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \
|
||||
(defined(__AARCH32EL__) || defined(__AARCH64EL__))
|
||||
# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VER >= 2000) || \
|
||||
(CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
|
||||
# define CRYPTOPP_ARMV8A_PMULL_AVAILABLE 1
|
||||
# endif
|
||||
#endif
|
||||
|
|
@ -552,19 +550,17 @@ NAMESPACE_END
|
|||
// Microsoft plans to support ARM-64, but its not clear how to detect it.
|
||||
// TODO: Add MSC_VER and ARM-64 platform define when available
|
||||
#if !defined(CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
|
||||
# if defined(__ARM_FEATURE_CRYPTO) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \
|
||||
(defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \
|
||||
(defined(__AARCH32EL__) || defined(__AARCH64EL__))
|
||||
# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VER >= 2000) || \
|
||||
(CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
|
||||
# define CRYPTOPP_ARMV8A_AES_AVAILABLE 1
|
||||
# define CRYPTOPP_ARMV8A_PMULL_AVAILABLE 1
|
||||
# define CRYPTOPP_ARMV8A_SHA_AVAILABLE 1
|
||||
# define CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// ARM CRC testing
|
||||
// TODO...
|
||||
#undef CRYPTOPP_ARMV8A_AES_AVAILABLE
|
||||
#undef CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
#undef CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
|
||||
|
||||
#endif // ARM32, ARM64
|
||||
|
||||
|
|
|
|||
59
cpu.cpp
59
cpu.cpp
|
|
@ -352,36 +352,9 @@ extern "C"
|
|||
};
|
||||
#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
||||
|
||||
static bool TryNEON()
|
||||
{
|
||||
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
||||
return CPU_TryNEON_ARM();
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool TryCRC32()
|
||||
{
|
||||
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
|
||||
return CPU_TryCRC32_ARMV8();
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool TryPMULL()
|
||||
{
|
||||
#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE)
|
||||
return CPU_TryPMULL_ARMV8();
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool TryAES()
|
||||
{
|
||||
#if (CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE)
|
||||
#if (CRYPTOPP_ARMV8A_AES_AVAILABLE)
|
||||
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
||||
volatile bool result = true;
|
||||
__try
|
||||
|
|
@ -432,32 +405,14 @@ static bool TryAES()
|
|||
#endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
|
||||
}
|
||||
|
||||
static bool TrySHA1()
|
||||
{
|
||||
#if (CRYPTOPP_ARMV8A_SHA_AVAILABLE)
|
||||
return CPU_TrySHA1_ARMV8();
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool TrySHA2()
|
||||
{
|
||||
#if (CRYPTOPP_ARMV8A_SHA_AVAILABLE)
|
||||
return CPU_TrySHA2_ARMV8();
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void DetectArmFeatures()
|
||||
{
|
||||
g_hasNEON = TryNEON();
|
||||
g_hasPMULL = TryPMULL();
|
||||
g_hasCRC32 = TryCRC32();
|
||||
g_hasAES = TryAES();
|
||||
g_hasSHA1 = TrySHA1();
|
||||
g_hasSHA2 = TrySHA2();
|
||||
g_hasNEON = CPU_TryNEON_ARM();
|
||||
g_hasPMULL = CPU_TryPMULL_ARMV8();
|
||||
g_hasCRC32 = CPU_TryCRC32_ARMV8();
|
||||
g_hasAES = TryAES(); // TODO
|
||||
g_hasSHA1 = CPU_TrySHA1_ARMV8();
|
||||
g_hasSHA2 = CPU_TrySHA2_ARMV8();
|
||||
|
||||
g_ArmDetectionDone = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1312,7 +1312,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
|
|||
echo "Testing: X86 carryless multiply code generation" | tee -a "$TEST_RESULTS"
|
||||
echo
|
||||
|
||||
OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null
|
||||
OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null
|
||||
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
|
||||
|
||||
COUNT=0
|
||||
|
|
@ -1576,7 +1576,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]
|
|||
echo "Testing: ARM carryless multiply code generation" | tee -a "$TEST_RESULTS"
|
||||
echo
|
||||
|
||||
OBJFILE=gcm.o; rm -f "$OBJFILE" 2>/dev/null
|
||||
OBJFILE=gcm-simd.o; rm -f "$OBJFILE" 2>/dev/null
|
||||
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
|
||||
|
||||
COUNT=0
|
||||
|
|
|
|||
235
gcm-simd.cpp
235
gcm-simd.cpp
|
|
@ -26,6 +26,121 @@
|
|||
# include <setjmp.h>
|
||||
#endif
|
||||
|
||||
ANONYMOUS_NAMESPACE_BEGIN
|
||||
|
||||
// GCC 4.8 and 4.9 are missing PMULL gear
|
||||
#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE)
|
||||
# if (CRYPTOPP_GCC_VERSION >= 40800) && (CRYPTOPP_GCC_VERSION < 50000)
|
||||
inline poly128_t VMULL_P64(poly64_t a, poly64_t b)
|
||||
{
|
||||
return __builtin_aarch64_crypto_pmulldi_ppp (a, b);
|
||||
}
|
||||
|
||||
inline poly128_t VMULL_HIGH_P64(poly64x2_t a, poly64x2_t b)
|
||||
{
|
||||
return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
#if defined(__GNUC__)
|
||||
// Schneiders, Hovsmith and O'Rourke used this trick.
|
||||
// It results in much better code generation in production code
|
||||
// by avoiding D-register spills when using vgetq_lane_u64. The
|
||||
// problem does not surface under minimal test cases.
|
||||
inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
|
||||
{
|
||||
uint64x2_t r;
|
||||
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
|
||||
:"=w" (r) : "w" (a), "w" (b) );
|
||||
return r;
|
||||
}
|
||||
|
||||
inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
|
||||
{
|
||||
uint64x2_t r;
|
||||
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
|
||||
:"=w" (r) : "w" (a), "w" (vget_high_u64(b)) );
|
||||
return r;
|
||||
}
|
||||
|
||||
inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
|
||||
{
|
||||
uint64x2_t r;
|
||||
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
|
||||
:"=w" (r) : "w" (vget_high_u64(a)), "w" (b) );
|
||||
return r;
|
||||
}
|
||||
|
||||
inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
|
||||
{
|
||||
uint64x2_t r;
|
||||
__asm __volatile("pmull2 %0.1q, %1.2d, %2.2d \n\t"
|
||||
:"=w" (r) : "w" (a), "w" (b) );
|
||||
return r;
|
||||
}
|
||||
|
||||
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
|
||||
{
|
||||
uint64x2_t r;
|
||||
__asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t"
|
||||
:"=w" (r) : "w" (a), "w" (b), "I" (c) );
|
||||
return r;
|
||||
}
|
||||
|
||||
// https://github.com/weidai11/cryptopp/issues/366
|
||||
template <unsigned int C>
|
||||
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
|
||||
{
|
||||
uint64x2_t r;
|
||||
__asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t"
|
||||
:"=w" (r) : "w" (a), "w" (b), "I" (C) );
|
||||
return r;
|
||||
}
|
||||
#endif // GCC and compatibles
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
|
||||
{
|
||||
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
|
||||
vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
|
||||
}
|
||||
|
||||
inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
|
||||
{
|
||||
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
|
||||
vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
|
||||
}
|
||||
|
||||
inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
|
||||
{
|
||||
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
|
||||
vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
|
||||
}
|
||||
|
||||
inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
|
||||
{
|
||||
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
|
||||
vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
|
||||
}
|
||||
|
||||
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
|
||||
{
|
||||
return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), c);
|
||||
}
|
||||
|
||||
// https://github.com/weidai11/cryptopp/issues/366
|
||||
template <unsigned int C>
|
||||
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
|
||||
{
|
||||
return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C);
|
||||
}
|
||||
#endif // Microsoft and compatibles
|
||||
#endif // CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
|
||||
ANONYMOUS_NAMESPACE_END
|
||||
|
||||
NAMESPACE_BEGIN(CryptoPP)
|
||||
|
||||
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
||||
|
|
@ -71,7 +186,7 @@ bool CPU_TryPMULL_ARMV8()
|
|||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||
volatile bool result = true;
|
||||
|
||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL);
|
||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
|
||||
if (oldHandler == SIG_ERR)
|
||||
return false;
|
||||
|
||||
|
|
@ -79,7 +194,7 @@ bool CPU_TryPMULL_ARMV8()
|
|||
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
||||
return false;
|
||||
|
||||
if (setjmp(s_jmpNoPMULL))
|
||||
if (setjmp(s_jmpSIGILL))
|
||||
result = false;
|
||||
else
|
||||
{
|
||||
|
|
@ -87,8 +202,8 @@ bool CPU_TryPMULL_ARMV8()
|
|||
const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
|
||||
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
||||
|
||||
const poly128_t r1 = vmull_p64(a1, b1);
|
||||
const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2));
|
||||
const poly128_t r1 = VMULL_P64(a1, b1);
|
||||
const poly128_t r2 = VMULL_HIGH_P64((poly64x2_t)(a2), (poly64x2_t)(b2));
|
||||
|
||||
// Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233.
|
||||
const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum}
|
||||
|
|
@ -115,4 +230,116 @@ void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c)
|
|||
}
|
||||
#endif
|
||||
|
||||
#if CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
|
||||
ANONYMOUS_NAMESPACE_BEGIN
|
||||
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word64 s_clmulConstants64[] = {
|
||||
W64LIT(0xe100000000000000), W64LIT(0xc200000000000000), // Used for ARM and x86; polynomial coefficients
|
||||
W64LIT(0x08090a0b0c0d0e0f), W64LIT(0x0001020304050607), // Unused for ARM; used for x86 _mm_shuffle_epi8
|
||||
W64LIT(0x0001020304050607), W64LIT(0x08090a0b0c0d0e0f) // Unused for ARM; used for x86 _mm_shuffle_epi8
|
||||
};
|
||||
|
||||
const uint64x2_t *s_clmulConstants = (const uint64x2_t *)s_clmulConstants64;
|
||||
const unsigned int s_clmulTableSizeInBlocks = 8;
|
||||
|
||||
ANONYMOUS_NAMESPACE_END
|
||||
|
||||
uint64x2_t GCM_Reduce_ARMV8A(uint64x2_t c0, uint64x2_t c1, uint64x2_t c2, const uint64x2_t &r)
|
||||
{
|
||||
c1 = veorq_u64(c1, VEXT_U8<8>(vdupq_n_u64(0), c0));
|
||||
c1 = veorq_u64(c1, PMULL_01(c0, r));
|
||||
c0 = VEXT_U8<8>(c0, vdupq_n_u64(0));
|
||||
c0 = vshlq_n_u64(veorq_u64(c0, c1), 1);
|
||||
c0 = PMULL_00(c0, r);
|
||||
c2 = veorq_u64(c2, c0);
|
||||
c2 = veorq_u64(c2, VEXT_U8<8>(c1, vdupq_n_u64(0)));
|
||||
c1 = vshrq_n_u64(vcombine_u64(vget_low_u64(c1), vget_low_u64(c2)), 63);
|
||||
c2 = vshlq_n_u64(c2, 1);
|
||||
|
||||
return veorq_u64(c2, c1);
|
||||
}
|
||||
|
||||
uint64x2_t GCM_Multiply_ARMV8A(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r)
|
||||
{
|
||||
const uint64x2_t c0 = PMULL_00(x, h);
|
||||
const uint64x2_t c1 = veorq_u64(PMULL_10(x, h), PMULL_01(x, h));
|
||||
const uint64x2_t c2 = PMULL_11(x, h);
|
||||
|
||||
return GCM_Reduce_ARMV8A(c0, c1, c2, r);
|
||||
}
|
||||
|
||||
size_t GCM_AuthenticateBlocks_ARMV8(const byte *data, size_t len, const byte *mtable, byte *hbuffer)
|
||||
{
|
||||
const uint64x2_t* table = reinterpret_cast<const uint64x2_t*>(mtable);
|
||||
uint64x2_t x = vreinterpretq_u64_u8(vld1q_u8(hbuffer));
|
||||
const uint64x2_t r = s_clmulConstants[0];
|
||||
|
||||
const size_t BLOCKSIZE = 16;
|
||||
while (len >= BLOCKSIZE)
|
||||
{
|
||||
size_t s = UnsignedMin(len/BLOCKSIZE, s_clmulTableSizeInBlocks), i=0;
|
||||
uint64x2_t d1, d2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-1)*BLOCKSIZE)));
|
||||
uint64x2_t c0 = vdupq_n_u64(0);
|
||||
uint64x2_t c1 = vdupq_n_u64(0);
|
||||
uint64x2_t c2 = vdupq_n_u64(0);
|
||||
|
||||
while (true)
|
||||
{
|
||||
const uint64x2_t h0 = vld1q_u64((const uint64_t*)(table+i));
|
||||
const uint64x2_t h1 = vld1q_u64((const uint64_t*)(table+i+1));
|
||||
const uint64x2_t h2 = veorq_u64(h0, h1);
|
||||
|
||||
if (++i == s)
|
||||
{
|
||||
const uint64x2_t t1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data)));
|
||||
d1 = veorq_u64(vextq_u64(t1, t1, 1), x);
|
||||
c0 = veorq_u64(c0, PMULL_00(d1, h0));
|
||||
c2 = veorq_u64(c2, PMULL_10(d1, h1));
|
||||
d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)),
|
||||
vget_low_u32(vreinterpretq_u32_u64(d1))));
|
||||
c1 = veorq_u64(c1, PMULL_00(d1, h2));
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
d1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8)));
|
||||
c0 = veorq_u64(c0, PMULL_10(d2, h0));
|
||||
c2 = veorq_u64(c2, PMULL_10(d1, h1));
|
||||
d2 = veorq_u64(d2, d1);
|
||||
c1 = veorq_u64(c1, PMULL_10(d2, h2));
|
||||
|
||||
if (++i == s)
|
||||
{
|
||||
const uint64x2_t t2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data)));
|
||||
d1 = veorq_u64(vextq_u64(t2, t2, 1), x);
|
||||
c0 = veorq_u64(c0, PMULL_01(d1, h0));
|
||||
c2 = veorq_u64(c2, PMULL_11(d1, h1));
|
||||
d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)),
|
||||
vget_low_u32(vreinterpretq_u32_u64(d1))));
|
||||
c1 = veorq_u64(c1, PMULL_01(d1, h2));
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
const uint64x2_t t3 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8)));
|
||||
d2 = vextq_u64(t3, t3, 1);
|
||||
c0 = veorq_u64(c0, PMULL_01(d1, h0));
|
||||
c2 = veorq_u64(c2, PMULL_01(d2, h1));
|
||||
d1 = veorq_u64(d1, d2);
|
||||
c1 = veorq_u64(c1, PMULL_01(d1, h2));
|
||||
}
|
||||
data += s*16;
|
||||
len -= s*16;
|
||||
|
||||
c1 = veorq_u64(veorq_u64(c1, c0), c2);
|
||||
x = GCM_Reduce_ARMV8A(c0, c1, c2, r);
|
||||
}
|
||||
|
||||
vst1q_u64(reinterpret_cast<uint64_t *>(hbuffer), x);
|
||||
return len;
|
||||
}
|
||||
#endif
|
||||
|
||||
NAMESPACE_END
|
||||
199
gcm.cpp
199
gcm.cpp
|
|
@ -53,102 +53,6 @@ NAMESPACE_BEGIN(CryptoPP)
|
|||
extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
|
||||
#endif
|
||||
|
||||
#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
#if defined(__GNUC__)
|
||||
// Schneiders, Hovsmith and O'Rourke used this trick.
|
||||
// It results in much better code generation in production code
|
||||
// by avoiding D-register spills when using vgetq_lane_u64. The
|
||||
// problem does not surface under minimal test cases.
|
||||
inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
|
||||
{
|
||||
uint64x2_t r;
|
||||
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
|
||||
:"=w" (r) : "w" (a), "w" (b) );
|
||||
return r;
|
||||
}
|
||||
|
||||
inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
|
||||
{
|
||||
uint64x2_t r;
|
||||
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
|
||||
:"=w" (r) : "w" (a), "w" (vget_high_u64(b)) );
|
||||
return r;
|
||||
}
|
||||
|
||||
inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
|
||||
{
|
||||
uint64x2_t r;
|
||||
__asm __volatile("pmull %0.1q, %1.1d, %2.1d \n\t"
|
||||
:"=w" (r) : "w" (vget_high_u64(a)), "w" (b) );
|
||||
return r;
|
||||
}
|
||||
|
||||
inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
|
||||
{
|
||||
uint64x2_t r;
|
||||
__asm __volatile("pmull2 %0.1q, %1.2d, %2.2d \n\t"
|
||||
:"=w" (r) : "w" (a), "w" (b) );
|
||||
return r;
|
||||
}
|
||||
|
||||
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
|
||||
{
|
||||
uint64x2_t r;
|
||||
__asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t"
|
||||
:"=w" (r) : "w" (a), "w" (b), "I" (c) );
|
||||
return r;
|
||||
}
|
||||
|
||||
// https://github.com/weidai11/cryptopp/issues/366
|
||||
template <unsigned int C>
|
||||
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
|
||||
{
|
||||
uint64x2_t r;
|
||||
__asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t"
|
||||
:"=w" (r) : "w" (a), "w" (b), "I" (C) );
|
||||
return r;
|
||||
}
|
||||
#endif // GCC and compatibles
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
|
||||
{
|
||||
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
|
||||
vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
|
||||
}
|
||||
|
||||
inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
|
||||
{
|
||||
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
|
||||
vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
|
||||
}
|
||||
|
||||
inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
|
||||
{
|
||||
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
|
||||
vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
|
||||
}
|
||||
|
||||
inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
|
||||
{
|
||||
return (uint64x2_t)(vmull_p64(vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
|
||||
vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
|
||||
}
|
||||
|
||||
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
|
||||
{
|
||||
return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), c);
|
||||
}
|
||||
|
||||
// https://github.com/weidai11/cryptopp/issues/366
|
||||
template <unsigned int C>
|
||||
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
|
||||
{
|
||||
return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C);
|
||||
}
|
||||
#endif // Microsoft and compatibles
|
||||
#endif // CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
|
||||
word16 GCM_Base::s_reductionTable[256];
|
||||
volatile bool GCM_Base::s_reductionTableInitialized = false;
|
||||
|
||||
|
|
@ -278,6 +182,9 @@ inline __m128i CLMUL_GF_Mul(const __m128i &x, const __m128i &h, const __m128i &r
|
|||
|
||||
#if CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
|
||||
extern size_t GCM_AuthenticateBlocks_ARMV8(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
|
||||
extern uint64x2_t GCM_Multiply_ARMV8A(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r);
|
||||
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
static const word64 s_clmulConstants64[] = {
|
||||
W64LIT(0xe100000000000000), W64LIT(0xc200000000000000), // Used for ARM and x86; polynomial coefficients
|
||||
|
|
@ -287,31 +194,6 @@ static const word64 s_clmulConstants64[] = {
|
|||
|
||||
static const uint64x2_t *s_clmulConstants = (const uint64x2_t *)s_clmulConstants64;
|
||||
static const unsigned int s_clmulTableSizeInBlocks = 8;
|
||||
|
||||
inline uint64x2_t PMULL_Reduce(uint64x2_t c0, uint64x2_t c1, uint64x2_t c2, const uint64x2_t &r)
|
||||
{
|
||||
// See comments fo CLMUL_Reduce
|
||||
c1 = veorq_u64(c1, VEXT_U8<8>(vdupq_n_u64(0), c0));
|
||||
c1 = veorq_u64(c1, PMULL_01(c0, r));
|
||||
c0 = VEXT_U8<8>(c0, vdupq_n_u64(0));
|
||||
c0 = vshlq_n_u64(veorq_u64(c0, c1), 1);
|
||||
c0 = PMULL_00(c0, r);
|
||||
c2 = veorq_u64(c2, c0);
|
||||
c2 = veorq_u64(c2, VEXT_U8<8>(c1, vdupq_n_u64(0)));
|
||||
c1 = vshrq_n_u64(vcombine_u64(vget_low_u64(c1), vget_low_u64(c2)), 63);
|
||||
c2 = vshlq_n_u64(c2, 1);
|
||||
|
||||
return veorq_u64(c2, c1);
|
||||
}
|
||||
|
||||
inline uint64x2_t PMULL_GF_Mul(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r)
|
||||
{
|
||||
const uint64x2_t c0 = PMULL_00(x, h);
|
||||
const uint64x2_t c1 = veorq_u64(PMULL_10(x, h), PMULL_01(x, h));
|
||||
const uint64x2_t c2 = PMULL_11(x, h);
|
||||
|
||||
return PMULL_Reduce(c0, c1, c2, r);
|
||||
}
|
||||
#endif
|
||||
|
||||
void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const NameValuePairs ¶ms)
|
||||
|
|
@ -388,15 +270,15 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
|
|||
uint64x2_t h = h0;
|
||||
for (i=0; i<tableSize-32; i+=32)
|
||||
{
|
||||
const uint64x2_t h1 = PMULL_GF_Mul(h, h0, r);
|
||||
const uint64x2_t h1 = GCM_Multiply_ARMV8A(h, h0, r);
|
||||
vst1_u64((uint64_t *)(table+i), vget_low_u64(h));
|
||||
vst1q_u64((uint64_t *)(table+i+16), h1);
|
||||
vst1q_u64((uint64_t *)(table+i+8), h);
|
||||
vst1_u64((uint64_t *)(table+i+8), vget_low_u64(h1));
|
||||
h = PMULL_GF_Mul(h1, h0, r);
|
||||
h = GCM_Multiply_ARMV8A(h1, h0, r);
|
||||
}
|
||||
|
||||
const uint64x2_t h1 = PMULL_GF_Mul(h, h0, r);
|
||||
const uint64x2_t h1 = GCM_Multiply_ARMV8A(h, h0, r);
|
||||
vst1_u64((uint64_t *)(table+i), vget_low_u64(h));
|
||||
vst1q_u64((uint64_t *)(table+i+16), h1);
|
||||
vst1q_u64((uint64_t *)(table+i+8), h);
|
||||
|
|
@ -667,73 +549,8 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
|
|||
#elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
if (HasPMULL())
|
||||
{
|
||||
const uint64x2_t *table = (const uint64x2_t *)MulTable();
|
||||
uint64x2_t x = vreinterpretq_u64_u8(vld1q_u8(HashBuffer()));
|
||||
const uint64x2_t r = s_clmulConstants[0];
|
||||
|
||||
while (len >= 16)
|
||||
{
|
||||
size_t s = UnsignedMin(len/16, s_clmulTableSizeInBlocks), i=0;
|
||||
uint64x2_t d1, d2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-1)*16)));
|
||||
uint64x2_t c0 = vdupq_n_u64(0);
|
||||
uint64x2_t c1 = vdupq_n_u64(0);
|
||||
uint64x2_t c2 = vdupq_n_u64(0);
|
||||
|
||||
while (true)
|
||||
{
|
||||
const uint64x2_t h0 = vld1q_u64((const uint64_t*)(table+i));
|
||||
const uint64x2_t h1 = vld1q_u64((const uint64_t*)(table+i+1));
|
||||
const uint64x2_t h2 = veorq_u64(h0, h1);
|
||||
|
||||
if (++i == s)
|
||||
{
|
||||
const uint64x2_t t1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data)));
|
||||
d1 = veorq_u64(vextq_u64(t1, t1, 1), x);
|
||||
c0 = veorq_u64(c0, PMULL_00(d1, h0));
|
||||
c2 = veorq_u64(c2, PMULL_10(d1, h1));
|
||||
d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)),
|
||||
vget_low_u32(vreinterpretq_u32_u64(d1))));
|
||||
c1 = veorq_u64(c1, PMULL_00(d1, h2));
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
d1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8)));
|
||||
c0 = veorq_u64(c0, PMULL_10(d2, h0));
|
||||
c2 = veorq_u64(c2, PMULL_10(d1, h1));
|
||||
d2 = veorq_u64(d2, d1);
|
||||
c1 = veorq_u64(c1, PMULL_10(d2, h2));
|
||||
|
||||
if (++i == s)
|
||||
{
|
||||
const uint64x2_t t2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data)));
|
||||
d1 = veorq_u64(vextq_u64(t2, t2, 1), x);
|
||||
c0 = veorq_u64(c0, PMULL_01(d1, h0));
|
||||
c2 = veorq_u64(c2, PMULL_11(d1, h1));
|
||||
d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)),
|
||||
vget_low_u32(vreinterpretq_u32_u64(d1))));
|
||||
c1 = veorq_u64(c1, PMULL_01(d1, h2));
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
const uint64x2_t t3 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8)));
|
||||
d2 = vextq_u64(t3, t3, 1);
|
||||
c0 = veorq_u64(c0, PMULL_01(d1, h0));
|
||||
c2 = veorq_u64(c2, PMULL_01(d2, h1));
|
||||
d1 = veorq_u64(d1, d2);
|
||||
c1 = veorq_u64(c1, PMULL_01(d1, h2));
|
||||
}
|
||||
data += s*16;
|
||||
len -= s*16;
|
||||
|
||||
c1 = veorq_u64(veorq_u64(c1, c0), c2);
|
||||
x = PMULL_Reduce(c0, c1, c2, r);
|
||||
}
|
||||
|
||||
vst1q_u64((uint64_t *)HashBuffer(), x);
|
||||
return len;
|
||||
}
|
||||
return GCM_AuthenticateBlocks_ARMV8(data, len, MulTable(), HashBuffer());
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef BlockGetAndPut<word64, NativeByteOrder> Block;
|
||||
|
|
|
|||
Loading…
Reference in New Issue