Merge branch 'master' into hmqv

pull/263/head
Jeffrey Walton 2016-08-24 21:16:20 -04:00
commit 932801bd5f
2 changed files with 580 additions and 101 deletions

View File

@ -115,6 +115,9 @@ if [[ "$IS_SOLARIS" -ne "0" ]]; then
else
AWK=nawk;
fi
DISASS=dis
DISASSARGS=()
fi
# Fixup
@ -138,7 +141,9 @@ done
# We need to use the C++ compiler to determine feature availablility. Otherwise
# mis-detections occur on a number of platforms.
if [[ ((-z "$CXX") || ("$CXX" == "gcc")) ]]; then
if [[ "$IS_DARWIN" -ne "0" ]]; then
if [[ ("$CXX" == "gcc") ]]; then
CXX=g++
elif [[ "$IS_DARWIN" -ne "0" ]]; then
CXX=c++
elif [[ "$IS_SOLARIS" -ne "0" ]]; then
if [[ (-e "/opt/developerstudio12.5/bin/CC") ]]; then
@ -182,7 +187,7 @@ if [[ ("$SUN_COMPILER" -eq "0") ]]; then
fi
# Now that the compiler is fixed, determine the compiler version for fixups
CLANG_37_OR_ABOVE=$("$CXX" -v 2>&1 | "$EGREP" -i -c 'clang version (3\.[7-9]|[5-9])')
CLANG_37_OR_ABOVE=$("$CXX" -v 2>&1 | "$EGREP" -i -c 'clang version (3\.[7-9]|[4-9]\.[0-9])')
GCC_60_OR_ABOVE=$("$CXX" -v 2>&1 | "$EGREP" -i -c 'gcc version (6\.[0-9]|[7-9])')
GCC_51_OR_ABOVE=$("$CXX" -v 2>&1 | "$EGREP" -i -c 'gcc version (5\.[1-9]|[6-9])')
GCC_48_COMPILER=$("$CXX" -v 2>&1 | "$EGREP" -i -c 'gcc version 4\.8')
@ -521,7 +526,7 @@ HAVE_X86_AES=0
HAVE_X86_RDRAND=0
HAVE_X86_RDSEED=0
HAVE_X86_PCLMUL=0
if [[ (("$IS_X86" -ne "0") || ("$IS_X64" -ne "0")) && ("$SUN_COMPILER" -eq "0") ]]; then
if [[ ("$IS_X86" -ne "0" || "$IS_X64" -ne "0") && ("$SUN_COMPILER" -eq "0") ]]; then
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
@ -569,7 +574,11 @@ fi
# ARMv7 and ARMv8, including NEON, CRC32 and Crypto extensions
if [[ ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0") ]]; then
ARM_FEATURES=$(cat /proc/cpuinfo 2>&1 | "$AWK" '{IGNORECASE=1}{if ($1 == "Features") print}' | cut -f 2 -d ':')
if [[ ("$IS_DARWIN" -ne "0") ]]; then
ARM_FEATURES=$(sysctl machdep.cpu.features 2>&1 | cut -f 2 -d ':')
else
ARM_FEATURES=$(cat /proc/cpuinfo 2>&1 | "$AWK" '{IGNORECASE=1}{if ($1 == "Features") print}' | cut -f 2 -d ':')
fi
if [[ (-z "$HAVE_ARMV7A" && "$IS_ARM32" -ne "0") ]]; then
HAVE_ARMV7A=$(echo "$ARM_FEATURES" | "$GREP" -i -c 'neon')
@ -646,7 +655,7 @@ fi
# Used to disassemble object modules so we can verify some aspects of code generation
if [[ (-z "$HAVE_DISASS") ]]; then
echo "int main(int argc, char* argv[]) {return 0;}" > "$TMP/test.cc"
"$CXX" -x c "$TMP/test.cc" -o "$TMP/test.exe" > /dev/null 2>&1
"$CXX" "$TMP/test.cc" -o "$TMP/test.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
"$DISASS" "${DISASSARGS[@]}" "$TMP/test.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
@ -1014,99 +1023,387 @@ echo "Start time: $TEST_BEGIN" | tee -a "$TEST_RESULTS"
############################################
# Test AES-NI code generation
if [[ ("$HAVE_DISASS" -ne "0" && "$HAVE_X86_AES" -ne "0") ]]; then
echo
echo "************************************" | tee -a "$TEST_RESULTS"
echo "Testing: AES-NI code generation" | tee -a "$TEST_RESULTS"
echo
if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; then
"$MAKE" clean > /dev/null 2>&1
rm -f adhoc.cpp > /dev/null 2>&1
# This works for SunCC, but we need something like:
# /opt/solarisstudio12.4/bin/CC -DNDEBUG -g2 -O2 -xarch=aes -m64 -D__SSE2__ -D__SSE3__ \
# -D__SSE4_1__ -D__SSE4_2__ -D__AES__ -D__PCLMUL__ -c rijndael.cpp
OBJFILE=rijndael.o
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0
FAILED=0
DISASS_TEXT=$("$DISASS" "${DISASSARGS[@]}" "$OBJFILE" 2>/dev/null)
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c aesenc)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate aesenc instruction" | tee -a "$TEST_RESULTS"
fi
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c aesenclast)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate aesenclast instruction" | tee -a "$TEST_RESULTS"
fi
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c aesdec)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate aesdec instruction" | tee -a "$TEST_RESULTS"
fi
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c aesdeclast)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate aesdeclast instruction" | tee -a "$TEST_RESULTS"
fi
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c aesimc)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate aesimc instruction" | tee -a "$TEST_RESULTS"
fi
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c aeskeygenassist)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate aeskeygenassist instruction" | tee -a "$TEST_RESULTS"
fi
if [[ ("$FAILED" -eq "0") ]];then
echo "Verified aesenc, aesenclast, aesdec, aesdeclast, aesimc, aeskeygenassist machine instruction generation" | tee -a "$TEST_RESULTS"
if [[ ("$IS_DARWIN" -ne "0") ]]; then
X86_AESNI=$(sysctl machdep.cpu.features 2>/dev/null | "$GREP" -i -c aes)
elif [[ ("$IS_SOLARIS" -ne "0") ]]; then
X86_AESNI=$(isainfo -v 2>/dev/null | "$GREP" -i -c aes)
else
if [[ ("$CLANG_COMPILER" -ne "0" && "$CLANG_37_OR_ABOVE" -eq "0") ]]; then
echo "This could be due to Clang and lack of expected support for SSSE3 in some versions of the compiler. If so, try Clang 3.7 or above"
X86_AESNI=$(cat /proc/cpuinfo 2>/dev/null | "$GREP" -i -c aes)
fi
if [[ ("$X86_AESNI" -ne "0") ]]; then
echo
echo "************************************" | tee -a "$TEST_RESULTS"
echo "Testing: X86 AES-NI code generation" | tee -a "$TEST_RESULTS"
echo
"$MAKE" clean > /dev/null 2>&1
rm -f adhoc.cpp > /dev/null 2>&1
OBJFILE=rijndael.o
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS ${PLATFORM_CXXFLAGS[@]}" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0
FAILED=0
DISASS_TEXT=$("$DISASS" "${DISASSARGS[@]}" "$OBJFILE" 2>/dev/null)
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c aesenc)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate aesenc instruction" | tee -a "$TEST_RESULTS"
fi
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c aesenclast)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate aesenclast instruction" | tee -a "$TEST_RESULTS"
fi
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c aesdec)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate aesdec instruction" | tee -a "$TEST_RESULTS"
fi
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c aesdeclast)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate aesdeclast instruction" | tee -a "$TEST_RESULTS"
fi
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c aesimc)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate aesimc instruction" | tee -a "$TEST_RESULTS"
fi
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c aeskeygenassist)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate aeskeygenassist instruction" | tee -a "$TEST_RESULTS"
fi
if [[ ("$FAILED" -eq "0") ]];then
echo "Verified aesenc, aesenclast, aesdec, aesdeclast, aesimc, aeskeygenassist machine instructions" | tee -a "$TEST_RESULTS"
else
if [[ ("$CLANG_COMPILER" -ne "0" && "$CLANG_37_OR_ABOVE" -eq "0") ]]; then
echo "This could be due to Clang and lack of expected support for SSSE3 in some versions of the compiler. If so, try Clang 3.7 or above"
fi
fi
fi
fi
############################################
# ARM 64x64→128-bit multiply code generation
if [[ ("$HAVE_DISASS" -ne "0" && "$HAVE_ARM_CRYPTO" -ne "0") ]]; then
echo
echo "************************************" | tee -a "$TEST_RESULTS"
echo "Testing: ARM 64x64→128-bit multiply code generation" | tee -a "$TEST_RESULTS"
echo
# X86 carryless multiply code generation
if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; then
"$MAKE" clean > /dev/null 2>&1
rm -f adhoc.cpp > /dev/null 2>&1
# This works for SunCC, but we need something like:
# /opt/solarisstudio12.4/bin/CC -DNDEBUG -g2 -O2 -xarch=aes -m64 -D__SSE2__ -D__SSE3__ \
# -D__SSE4_1__ -D__SSE4_2__ -D__AES__ -D__PCLMUL__ -D__RDRND__ -D__RDSEED__ -c gcm.cpp
OBJFILE=gcm.o
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0
FAILED=0
DISASS_TEXT=$("$DISASS" "${DISASSARGS[@]}" "$OBJFILE" 2>/dev/null)
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -v pmull2 | "$GREP" -i -c pmull)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate pmull instruction" | tee -a "$TEST_RESULTS"
if [[ ("$IS_DARWIN" -ne "0") ]]; then
X86_PCLMUL=$(sysctl machdep.cpu.features 2>/dev/null | "$GREP" -i -c pclmulq)
elif [[ ("$IS_SOLARIS" -ne "0") ]]; then
X86_PCLMUL=$(isainfo -v 2>/dev/null | "$GREP" -i -c pclmulq)
else
X86_PCLMUL=$(cat /proc/cpuinfo 2>/dev/null | "$GREP" -i -c pclmulq)
fi
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c pmull2)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate pmull2 instruction" | tee -a "$TEST_RESULTS"
if [[ ("$X86_PCLMUL" -ne "0") ]]; then
echo
echo "************************************" | tee -a "$TEST_RESULTS"
echo "Testing: X86 carryless multiply code generation" | tee -a "$TEST_RESULTS"
echo
"$MAKE" clean > /dev/null 2>&1
rm -f adhoc.cpp > /dev/null 2>&1
OBJFILE=gcm.o
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS ${PLATFORM_CXXFLAGS[@]}" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0
FAILED=0
DISASS_TEXT=$("$DISASS" "${DISASSARGS[@]}" "$OBJFILE" 2>/dev/null)
COUNT=$(echo "$DISASS_TEXT" | "$EGREP" -i -c '(pclmullqh|vpclmulqdq)')
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate pclmullqh instruction" | tee -a "$TEST_RESULTS"
fi
COUNT=$(echo "$DISASS_TEXT" | "$EGREP" -i -c '(pclmullql|vpclmulqdq)')
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate pclmullql instruction" | tee -a "$TEST_RESULTS"
fi
if [[ ("$FAILED" -eq "0") ]];then
echo "Verified pclmullqh and pclmullql machine instructions" | tee -a "$TEST_RESULTS"
else
if [[ ("$CLANG_COMPILER" -ne "0") ]]; then
echo "This is probably due to Clang and its integrated assembler. The integrated assembler cannot consume a lot of ASM used by the library"
fi
fi
fi
fi
############################################
# Test RDRAND and RDSEED code generation
if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; then
# This works for SunCC, but we need something like:
# /opt/solarisstudio12.4/bin/CC -DNDEBUG -g2 -O2 -xarch=avx_i -m64 -D__SSE2__ -D__SSE3__ \
# -D__SSE4_1__ -D__SSE4_2__ -D__AES__ -D__PCLMUL__ -D__RDRND__ -D__RDSEED__ -c rdrand.cpp
if [[ ("$IS_DARWIN" -ne "0") ]]; then
X86_RDRAND=$(sysctl machdep.cpu.features 2>/dev/null | "$GREP" -i -c rdrand)
X86_RDSEED=$(sysctl machdep.cpu.features 2>/dev/null | "$GREP" -i -c rdseed)
elif [[ ("$IS_SOLARIS" -ne "0") ]]; then
X86_RDRAND=$(isainfo -v 2>/dev/null | "$GREP" -i -c rdrand)
X86_RDSEED=$(isainfo -v 2>/dev/null | "$GREP" -i -c rdseed)
else
X86_RDRAND=$(cat /proc/cpuinfo 2>/dev/null | "$GREP" -i -c rdrand)
X86_RDSEED=$(cat /proc/cpuinfo 2>/dev/null | "$GREP" -i -c rdseed)
fi
if [[ ("$FAILED" -eq "0") ]];then
echo "Verified pmull and pmull2 machine instruction generation" | tee -a "$TEST_RESULTS"
if [[ ("$X86_RDRAND" -ne "0" || "$X86_RDSEED" -ne "0") ]]; then
echo
echo "************************************" | tee -a "$TEST_RESULTS"
echo "Testing: X86 RDRAND and RDSEED code generation" | tee -a "$TEST_RESULTS"
echo
"$MAKE" clean > /dev/null 2>&1
rm -f adhoc.cpp > /dev/null 2>&1
OBJFILE=rdrand.o
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS ${PLATFORM_CXXFLAGS[@]}" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0
FAILED=0
DISASS_TEXT=$("$DISASS" "${DISASSARGS[@]}" "$OBJFILE" 2>/dev/null)
if [[ "$X86_RDRAND" -ne "0" ]]; then
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c rdrand)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate rdrand instruction" | tee -a "$TEST_RESULTS"
fi
fi
if [[ "$X86_RDSEED" -ne "0" ]]; then
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c rdseed)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate rdseed instruction" | tee -a "$TEST_RESULTS"
fi
fi
if [[ ("$FAILED" -eq "0") ]];then
echo "Verified rdrand and rdseed machine instructions" | tee -a "$TEST_RESULTS"
fi
fi
fi
############################################
# X86 CRC32 code generation
if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; then
# This works for SunCC, but we need something like:
# /opt/solarisstudio12.3/bin/CC -DNDEBUG -g2 -O2 -xarch=sse4_2 -m64 -D__SSE2__ -D__SSE3__ \
# -D__SSE4_1__ -D__SSE4_2__ -c crc.cpp
if [[ ("$IS_DARWIN" -ne "0") ]]; then
X86_CRC32=$(sysctl machdep.cpu.features 2>/dev/null | "$GREP" -i -c sse4.2)
elif [[ ("$IS_SOLARIS" -ne "0") ]]; then
X86_CRC32=$(isainfo -v 2>/dev/null | "$GREP" -i -c sse4_2)
else
X86_CRC32=$(cat /proc/cpuinfo 2>/dev/null | "$GREP" -i -c sse4_2)
fi
if [[ ("$X86_CRC32" -ne "0") ]]; then
echo
echo "************************************" | tee -a "$TEST_RESULTS"
echo "Testing: X86 CRC32 code generation" | tee -a "$TEST_RESULTS"
echo
"$MAKE" clean > /dev/null 2>&1
rm -f adhoc.cpp > /dev/null 2>&1
OBJFILE=crc.o
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS ${PLATFORM_CXXFLAGS[@]}" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0
FAILED=0
DISASS_TEXT=$("$DISASS" "${DISASSARGS[@]}" "$OBJFILE" 2>/dev/null)
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c crc32l)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate crc32l instruction" | tee -a "$TEST_RESULTS"
fi
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c crc32b)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate crc32b instruction" | tee -a "$TEST_RESULTS"
fi
if [[ ("$FAILED" -eq "0") ]];then
echo "Verified crc32l and crc32b machine instructions" | tee -a "$TEST_RESULTS"
fi
fi
fi
############################################
# ARM NEON code generation
if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]]; then
if [[ ("$IS_DARWIN" -ne "0") ]]; then
ARM_NEON=$(sysctl machdep.cpu.features 2>/dev/null | "$EGREP" -i -c '(neon|asimd)')
else
ARM_NEON=$(cat /proc/cpuinfo 2>/dev/null | "$EGREP" -i -c '(neon|asimd)')
fi
if [[ ("$ARM_NEON" -ne "0" || "$HAVE_ARM_NEON" -ne "0") ]]; then
echo
echo "************************************" | tee -a "$TEST_RESULTS"
echo "Testing: ARM NEON code generation" | tee -a "$TEST_RESULTS"
echo
"$MAKE" clean > /dev/null 2>&1
rm -f adhoc.cpp > /dev/null 2>&1
OBJFILE=blake2.o
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS ${PLATFORM_CXXFLAGS[@]}" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0
FAILED=0
DISASS_TEXT=$("$DISASS" "${DISASSARGS[@]}" "$OBJFILE" 2>/dev/null)
# BLAKE2_NEON_Compress32: 40 each vld1q_u8 and vld1q_u64
# BLAKE2_NEON_Compress64: 22 each vld1q_u8 and vld1q_u64
COUNT=$(echo "$DISASS_TEXT" | "$EGREP" -i -c 'ldr.*q')
if [[ ("$COUNT" -lt "62") ]]; then
FAILED=1
echo "ERROR: failed to generate expected vector load instructions" | tee -a "$TEST_RESULTS"
fi
# BLAKE2_NEON_Compress{32|64}: 6 each vst1q_u32 and vst1q_u64
COUNT=$(echo "$DISASS_TEXT" | "$EGREP" -i -c 'str.*q')
if [[ ("$COUNT" -lt "6") ]]; then
FAILED=1
echo "ERROR: failed to generate expected vector store instructions" | tee -a "$TEST_RESULTS"
fi
if [[ ("$FAILED" -eq "0") ]];then
echo "Verified vector load and store machine instructions" | tee -a "$TEST_RESULTS"
fi
fi
fi
############################################
# ARM carryless multiply code generation
if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]]; then
if [[ ("$IS_DARWIN" -ne "0") ]]; then
ARM_PMULL=$(sysctl machdep.cpu.features 2>/dev/null | "$GREP" -i -c pmull)
else
ARM_PMULL=$(cat /proc/cpuinfo 2>/dev/null | "$GREP" -i -c pmull)
fi
if [[ ("$ARM_PMULL" -ne "0" || "$HAVE_ARM_CRYPTO" -ne "0") ]]; then
echo
echo "************************************" | tee -a "$TEST_RESULTS"
echo "Testing: ARM carryless multiply code generation" | tee -a "$TEST_RESULTS"
echo
"$MAKE" clean > /dev/null 2>&1
rm -f adhoc.cpp > /dev/null 2>&1
OBJFILE=gcm.o
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS ${PLATFORM_CXXFLAGS[@]}" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0
FAILED=0
DISASS_TEXT=$("$DISASS" "${DISASSARGS[@]}" "$OBJFILE" 2>/dev/null)
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -v pmull2 | "$GREP" -i -c pmull)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate pmull instruction" | tee -a "$TEST_RESULTS"
fi
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c pmull2)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate pmull2 instruction" | tee -a "$TEST_RESULTS"
fi
if [[ ("$FAILED" -eq "0") ]];then
echo "Verified pmull and pmull2 machine instructions" | tee -a "$TEST_RESULTS"
fi
fi
fi
############################################
# ARM CRC32 code generation
if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_ARM32" -ne "0" || "$IS_ARM64" -ne "0")) ]]; then
if [[ ("$IS_DARWIN" -ne "0") ]]; then
ARM_CRC32=$(sysctl machdep.cpu.features 2>/dev/null | "$GREP" -i -c crc)
else
ARM_CRC32=$(cat /proc/cpuinfo 2>/dev/null | "$GREP" -i -c crc32)
fi
if [[ ("$ARM_CRC32" -ne "0") ]]; then
echo
echo "************************************" | tee -a "$TEST_RESULTS"
echo "Testing: ARM CRC32 code generation" | tee -a "$TEST_RESULTS"
echo
"$MAKE" clean > /dev/null 2>&1
rm -f adhoc.cpp > /dev/null 2>&1
OBJFILE=crc.o
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS ${PLATFORM_CXXFLAGS[@]}" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0
FAILED=0
DISASS_TEXT=$("$DISASS" "${DISASSARGS[@]}" "$OBJFILE" 2>/dev/null)
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c crc32cb)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate crc32cb instruction" | tee -a "$TEST_RESULTS"
fi
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c crc32cw)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate crc32cw instruction" | tee -a "$TEST_RESULTS"
fi
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c crc32b)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate crc32b instruction" | tee -a "$TEST_RESULTS"
fi
COUNT=$(echo "$DISASS_TEXT" | "$GREP" -i -c crc32w)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate crc32w instruction" | tee -a "$TEST_RESULTS"
fi
if [[ ("$FAILED" -eq "0") ]];then
echo "Verified crc32cb, crc32cw, crc32b and crc32w machine instructions" | tee -a "$TEST_RESULTS"
fi
fi
fi

216
gcm.cpp
View File

@ -12,7 +12,7 @@
#ifndef CRYPTOPP_IMPORTS
#ifndef CRYPTOPP_GENERATE_X64_MASM
// Clang 3.3 integrated assembler crash on Linux. MacPorts GCC compile error. SunCC crash under Sun Studio 12.5 and below.
// Clang 3.3 integrated assembler crash on Linux. MacPorts GCC compile error. SunCC crash under SunCC 5.14 and below.
#if (defined(CRYPTOPP_LLVM_CLANG_VERSION) && (CRYPTOPP_LLVM_CLANG_VERSION < 30400)) || defined(CRYPTOPP_CLANG_INTEGRATED_ASSEMBLER) || (defined(__SUNPRO_CC) && __SUNPRO_CC <= 0x5140)
# undef CRYPTOPP_X86_ASM_AVAILABLE
# undef CRYPTOPP_X32_ASM_AVAILABLE
@ -20,11 +20,9 @@
# undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
# undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
# undef CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
# undef CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
# define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
# define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0
# define CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE 0
# define CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE 0
#endif
#include "gcm.h"
@ -94,6 +92,16 @@ inline static void SSE2_Xor16(byte *a, const byte *b, const byte *c)
}
#endif
#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
inline static void NEON_Xor16(byte *a, const byte *b, const byte *c)
{
assert(IsAlignedOn(a,GetAlignmentOf<uint64x2_t>()));
assert(IsAlignedOn(b,GetAlignmentOf<uint64x2_t>()));
assert(IsAlignedOn(c,GetAlignmentOf<uint64x2_t>()));
*(uint64x2_t*)a = veorq_u64(*(uint64x2_t*)b, *(uint64x2_t*)c);
}
#endif
inline static void Xor16(byte *a, const byte *b, const byte *c)
{
assert(IsAlignedOn(a,GetAlignmentOf<word64>()));
@ -109,6 +117,7 @@ static const word64 s_clmulConstants64[] = {
W64LIT(0xe100000000000000), W64LIT(0xc200000000000000),
W64LIT(0x08090a0b0c0d0e0f), W64LIT(0x0001020304050607),
W64LIT(0x0001020304050607), W64LIT(0x08090a0b0c0d0e0f)};
static const __m128i *s_clmulConstants = (const __m128i *)(const void *)s_clmulConstants64;
static const unsigned int s_clmulTableSizeInBlocks = 8;
@ -146,14 +155,56 @@ inline __m128i CLMUL_Reduce(__m128i c0, __m128i c1, __m128i c2, const __m128i &r
inline __m128i CLMUL_GF_Mul(const __m128i &x, const __m128i &h, const __m128i &r)
{
__m128i c0 = _mm_clmulepi64_si128(x,h,0);
__m128i c1 = _mm_xor_si128(_mm_clmulepi64_si128(x,h,1), _mm_clmulepi64_si128(x,h,0x10));
__m128i c2 = _mm_clmulepi64_si128(x,h,0x11);
const __m128i c0 = _mm_clmulepi64_si128(x,h,0);
const __m128i c1 = _mm_xor_si128(_mm_clmulepi64_si128(x,h,1), _mm_clmulepi64_si128(x,h,0x10));
const __m128i c2 = _mm_clmulepi64_si128(x,h,0x11);
return CLMUL_Reduce(c0, c1, c2, r);
}
#endif
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
CRYPTOPP_ALIGN_DATA(16)
static const word64 s_clmulConstants64[] = {
W64LIT(0xe100000000000000), W64LIT(0xc200000000000000), // Used for ARM and x86; polynomial coefficients
W64LIT(0x08090a0b0c0d0e0f), W64LIT(0x0001020304050607), // Unused for ARM; used for x86 _mm_shuffle_epi8
W64LIT(0x0001020304050607), W64LIT(0x08090a0b0c0d0e0f) // Unused for ARM; used for x86 _mm_shuffle_epi8
};
static const uint64x2_t *s_clmulConstants = (const uint64x2_t *)s_clmulConstants64;
static const unsigned int s_clmulTableSizeInBlocks = 8;
inline uint64x2_t PMULL_Reduce(uint64x2_t c0, uint64x2_t c1, uint64x2_t c2, const uint64x2_t &r)
{
// See comments fo CLMUL_Reduce
c1 = veorq_u64(c1, (uint64x2_t)vextq_u8(vdupq_n_u8(0), (uint8x16_t)c0, 8));
c1 = veorq_u64(c1, (uint64x2_t)vmull_p64(vgetq_lane_u64(c0, 0), vgetq_lane_u64(r, 1)));
c0 = (uint64x2_t)vextq_u8((uint8x16_t)c0, vdupq_n_u8(0), 8);
c0 = veorq_u64(c0, c1);
c0 = vshlq_n_u64(c0, 1);
c0 = (uint64x2_t)vmull_p64(vgetq_lane_u64(c0, 0), vgetq_lane_u64(r, 0));
c2 = veorq_u64(c2, c0);
c2 = veorq_u64(c2, (uint64x2_t)vextq_u8((uint8x16_t)c1, vdupq_n_u8(0), 8));
c1 = vcombine_u64(vget_low_u64(c1), vget_low_u64(c2));
c1 = vshrq_n_u64(c1, 63);
c2 = vshlq_n_u64(c2, 1);
return veorq_u64(c2, c1);
}
inline uint64x2_t PMULL_GF_Mul(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r)
{
const uint64x2_t c0 = (uint64x2_t)vmull_p64(vgetq_lane_u64(x, 0), vgetq_lane_u64(h, 0));
const uint64x2_t c1 = veorq_u64((uint64x2_t)vmull_p64(vgetq_lane_u64(x, 1), vgetq_lane_u64(h,0)),
(uint64x2_t)vmull_p64(vgetq_lane_u64(x, 0), vgetq_lane_u64(h, 1)));
const uint64x2_t c2 = (uint64x2_t)vmull_high_p64((poly64x2_t)x, (poly64x2_t)h);
return PMULL_Reduce(c0, c1, c2, r);
}
#endif
void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const NameValuePairs &params)
{
BlockCipher &blockCipher = AccessBlockCipher();
@ -172,6 +223,14 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
tableSize = s_clmulTableSizeInBlocks * REQUIRED_BLOCKSIZE;
}
else
#elif CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
if (HasPMULL())
{
// Avoid "parameter not used" error and suppress Coverity finding
(void)params.GetIntValue(Name::TableSize(), tableSize);
tableSize = s_clmulTableSizeInBlocks * REQUIRED_BLOCKSIZE;
}
else
#endif
{
if (params.GetIntValue(Name::TableSize(), tableSize))
@ -208,6 +267,32 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
h = CLMUL_GF_Mul(h1, h0, r);
}
return;
}
#elif CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
if (HasPMULL())
{
const uint64x2_t r = s_clmulConstants[0];
const uint64x2_t t = vld1q_u64((uint64_t *)hashKey);
const uint64x2_t h0 = (uint64x2_t)vrev64q_u8((uint8x16_t)vcombine_u64(vget_high_u64(t), vget_low_u64(t)));
uint64x2_t h = h0;
for (i=0; i<tableSize-32; i+=32)
{
const uint64x2_t h1 = PMULL_GF_Mul(h, h0, r);
vst1_u64((uint64_t *)(table+i), vget_low_u64(h));
vst1q_u64((uint64_t *)(table+i+16), h1);
vst1q_u64((uint64_t *)(table+i+8), h);
vst1_u64((uint64_t *)(table+i+8), vget_low_u64(h1));
h = PMULL_GF_Mul(h1, h0, r);
}
const uint64x2_t h1 = PMULL_GF_Mul(h, h0, r);
vst1_u64((uint64_t *)(table+i), vget_low_u64(h));
vst1q_u64((uint64_t *)(table+i+16), h1);
vst1q_u64((uint64_t *)(table+i+8), h);
vst1_u64((uint64_t *)(table+i+8), vget_low_u64(h1));
return;
}
#endif
@ -237,6 +322,12 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
for (k=1; k<j; k++)
SSE2_Xor16(table+i*256*16+(j+k)*16, table+i*256*16+j*16, table+i*256*16+k*16);
else
#elif CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
if (HasNEON())
for (j=2; j<=0x80; j*=2)
for (k=1; k<j; k++)
NEON_Xor16(table+i*256*16+(j+k)*16, table+i*256*16+j*16, table+i*256*16+k*16);
else
#endif
for (j=2; j<=0x80; j*=2)
for (k=1; k<j; k++)
@ -286,6 +377,15 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
SSE2_Xor16(table+1024+i*256+(j+k)*16, table+1024+i*256+j*16, table+1024+i*256+k*16);
}
else
#elif CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
if (HasNEON())
for (j=2; j<=8; j*=2)
for (k=1; k<j; k++)
{
NEON_Xor16(table+i*256+(j+k)*16, table+i*256+j*16, table+i*256+k*16);
NEON_Xor16(table+1024+i*256+(j+k)*16, table+1024+i*256+j*16, table+1024+i*256+k*16);
}
else
#endif
for (j=2; j<=8; j*=2)
for (k=1; k<j; k++)
@ -305,6 +405,15 @@ inline void GCM_Base::ReverseHashBufferIfNeeded()
__m128i &x = *(__m128i *)(void *)HashBuffer();
x = _mm_shuffle_epi8(x, s_clmulConstants[1]);
}
#elif CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
if (HasPMULL())
{
if (GetNativeByteOrder() != BIG_ENDIAN_ORDER)
{
const uint8x16_t x = vrev64q_u8(vld1q_u8(HashBuffer()));
vst1q_u8(HashBuffer(), (uint8x16_t)vcombine_u64(vget_high_u64((uint64x2_t)x), vget_low_u64((uint64x2_t)x)));
}
}
#endif
}
@ -358,6 +467,8 @@ unsigned int GCM_Base::OptimalDataAlignment() const
return
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
HasSSE2() ? 16 :
#elif CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
HasNEON() ? 16 :
#endif
GetBlockCipher().OptimalDataAlignment();
}
@ -384,12 +495,12 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
{
const __m128i *table = (const __m128i *)(const void *)MulTable();
__m128i x = _mm_load_si128((__m128i *)(void *)HashBuffer());
const __m128i r = s_clmulConstants[0], bswapMask = s_clmulConstants[1], bswapMask2 = s_clmulConstants[2];
const __m128i r = s_clmulConstants[0], mask1 = s_clmulConstants[1], mask2 = s_clmulConstants[2];
while (len >= 16)
{
size_t s = UnsignedMin(len/16, s_clmulTableSizeInBlocks), i=0;
__m128i d, d2 = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)(data+(s-1)*16)), bswapMask2);;
__m128i d, d2 = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)(data+(s-1)*16)), mask2);;
__m128i c0 = _mm_setzero_si128();
__m128i c1 = _mm_setzero_si128();
__m128i c2 = _mm_setzero_si128();
@ -398,41 +509,41 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
{
__m128i h0 = _mm_load_si128(table+i);
__m128i h1 = _mm_load_si128(table+i+1);
__m128i h01 = _mm_xor_si128(h0, h1);
__m128i h2 = _mm_xor_si128(h0, h1);
if (++i == s)
{
d = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)data), bswapMask);
d = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)data), mask1);
d = _mm_xor_si128(d, x);
c0 = _mm_xor_si128(c0, _mm_clmulepi64_si128(d, h0, 0));
c2 = _mm_xor_si128(c2, _mm_clmulepi64_si128(d, h1, 1));
d = _mm_xor_si128(d, _mm_shuffle_epi32(d, _MM_SHUFFLE(1, 0, 3, 2)));
c1 = _mm_xor_si128(c1, _mm_clmulepi64_si128(d, h01, 0));
c1 = _mm_xor_si128(c1, _mm_clmulepi64_si128(d, h2, 0));
break;
}
d = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)(data+(s-i)*16-8)), bswapMask2);
d = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)(data+(s-i)*16-8)), mask2);
c0 = _mm_xor_si128(c0, _mm_clmulepi64_si128(d2, h0, 1));
c2 = _mm_xor_si128(c2, _mm_clmulepi64_si128(d, h1, 1));
d2 = _mm_xor_si128(d2, d);
c1 = _mm_xor_si128(c1, _mm_clmulepi64_si128(d2, h01, 1));
c1 = _mm_xor_si128(c1, _mm_clmulepi64_si128(d2, h2, 1));
if (++i == s)
{
d = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)data), bswapMask);
d = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)data), mask1);
d = _mm_xor_si128(d, x);
c0 = _mm_xor_si128(c0, _mm_clmulepi64_si128(d, h0, 0x10));
c2 = _mm_xor_si128(c2, _mm_clmulepi64_si128(d, h1, 0x11));
d = _mm_xor_si128(d, _mm_shuffle_epi32(d, _MM_SHUFFLE(1, 0, 3, 2)));
c1 = _mm_xor_si128(c1, _mm_clmulepi64_si128(d, h01, 0x10));
c1 = _mm_xor_si128(c1, _mm_clmulepi64_si128(d, h2, 0x10));
break;
}
d2 = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)(data+(s-i)*16-8)), bswapMask);
d2 = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)(data+(s-i)*16-8)), mask1);
c0 = _mm_xor_si128(c0, _mm_clmulepi64_si128(d, h0, 0x10));
c2 = _mm_xor_si128(c2, _mm_clmulepi64_si128(d2, h1, 0x10));
d = _mm_xor_si128(d, d2);
c1 = _mm_xor_si128(c1, _mm_clmulepi64_si128(d, h01, 0x10));
c1 = _mm_xor_si128(c1, _mm_clmulepi64_si128(d, h2, 0x10));
}
data += s*16;
len -= s*16;
@ -444,6 +555,75 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
_mm_store_si128((__m128i *)(void *)HashBuffer(), x);
return len;
}
#elif CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
if (HasPMULL())
{
const uint64x2_t *table = (const uint64x2_t *)MulTable();
uint64x2_t x = vld1q_u64((const uint64_t*)HashBuffer());
const uint64x2_t r = s_clmulConstants[0];
while (len >= 16)
{
size_t s = UnsignedMin(len/16, s_clmulTableSizeInBlocks), i=0;
uint64x2_t d, d2 = (uint64x2_t)vrev64q_u8((uint8x16_t)vld1q_u64((const uint64_t *)(data+(s-1)*16)));
uint64x2_t c0 = vdupq_n_u64(0);
uint64x2_t c1 = vdupq_n_u64(0);
uint64x2_t c2 = vdupq_n_u64(0);
while (true)
{
const uint64x2_t h0 = vld1q_u64((const uint64_t*)(table+i));
const uint64x2_t h1 = vld1q_u64((const uint64_t*)(table+i+1));
const uint64x2_t h2 = veorq_u64(h0, h1);
if (++i == s)
{
const uint64x2_t t1 = vld1q_u64((const uint64_t *)data);
d = veorq_u64((uint64x2_t)vrev64q_u8((uint8x16_t)vcombine_u64(vget_high_u64(t1), vget_low_u64(t1))), x);
c0 = veorq_u64(c0, (uint64x2_t)vmull_p64(vgetq_lane_u64(d, 0), vgetq_lane_u64(h0, 0)));
c2 = veorq_u64(c2, (uint64x2_t)vmull_p64(vgetq_lane_u64(d, 1), vgetq_lane_u64(h1, 0)));
d = veorq_u64(d, (uint64x2_t)vcombine_u32(vget_high_u32((uint32x4_t)d), vget_low_u32((uint32x4_t)d)));
c1 = veorq_u64(c1, (uint64x2_t)vmull_p64(vgetq_lane_u64(d, 0), vgetq_lane_u64(h2, 0)));
break;
}
d = (uint64x2_t)vrev64q_u8((uint8x16_t)vld1q_u64((const uint64_t *)(data+(s-i)*16-8)));
c0 = veorq_u64(c0, (uint64x2_t)vmull_p64(vgetq_lane_u64(d2, 1), vgetq_lane_u64(h0, 0)));
c2 = veorq_u64(c2, (uint64x2_t)vmull_p64(vgetq_lane_u64(d, 1), vgetq_lane_u64(h1, 0)));
d2 = veorq_u64(d2, d);
c1 = veorq_u64(c1, (uint64x2_t)vmull_p64(vgetq_lane_u64(d2, 1), vgetq_lane_u64(h2, 0)));
if (++i == s)
{
const uint64x2_t t2 = vld1q_u64((const uint64_t *)data);
d = veorq_u64((uint64x2_t)vrev64q_u8((uint8x16_t)vcombine_u64(vget_high_u64(t2), vget_low_u64(t2))), x);
c0 = veorq_u64(c0, (uint64x2_t)vmull_p64(vgetq_lane_u64(d, 0), vgetq_lane_u64(h0, 1)));
c2 = veorq_u64(c2, (uint64x2_t)vmull_high_p64((poly64x2_t)d, (poly64x2_t)h1));
d = veorq_u64(d, (uint64x2_t)vcombine_u32(vget_high_u32((uint32x4_t)d), vget_low_u32((uint32x4_t)d)));
c1 = veorq_u64(c1, (uint64x2_t)vmull_p64(vgetq_lane_u64(d, 0), vgetq_lane_u64(h2, 1)));
break;
}
const uint64x2_t t3 = vld1q_u64((uint64_t *)(data+(s-i)*16-8));
d2 = (uint64x2_t)vrev64q_u8((uint8x16_t)vcombine_u64(vget_high_u64(t3), vget_low_u64(t3)));
c0 = veorq_u64(c0, (uint64x2_t)vmull_p64(vgetq_lane_u64(d, 0), vgetq_lane_u64(h0, 1)));
c2 = veorq_u64(c2, (uint64x2_t)vmull_p64(vgetq_lane_u64(d2, 0), vgetq_lane_u64(h1, 1)));
d = veorq_u64(d, d2);
c1 = veorq_u64(c1, (uint64x2_t)vmull_p64(vgetq_lane_u64(d, 0), vgetq_lane_u64(h2, 1)));
}
data += s*16;
len -= s*16;
c1 = veorq_u64(veorq_u64(c1, c0), c2);
x = PMULL_Reduce(c0, c1, c2, r);
}
vst1q_u64((uint64_t *)HashBuffer(), x);
return len;
}
#endif
typedef BlockGetAndPut<word64, NativeByteOrder> Block;
@ -453,6 +633,8 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
switch (2*(m_buffer.size()>=64*1024)
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
+ HasSSE2()
//#elif CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
// + HasNEON()
#endif
)
{