Move CRC32 probe code from cpu.cpp to crc-simd.cpp

pull/461/head
Jeffrey Walton 2017-07-29 03:49:48 -04:00
parent 368f344667
commit 3e7496803e
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
8 changed files with 167 additions and 126 deletions

View File

@ -194,11 +194,16 @@ endif # -DCRYPTOPP_DISABLE_SSSE3
endif # -DCRYPTOPP_DISABLE_ASM endif # -DCRYPTOPP_DISABLE_ASM
endif # CXXFLAGS endif # CXXFLAGS
HAS_CRC := $(shell $(CXX) $(CXXFLAGS) -msse4.2 -o $(TEMPDIR)/t.o -c crc-simd.cpp 2>/dev/null; echo $$?) HAS_CRC := $(shell $(CXX) $(CXXFLAGS) -msse4.2 -o $(TEMPDIR)/t.o -c crc-simd.cpp; echo $$?)
ifeq ($(HAS_CRC),0) ifeq ($(HAS_CRC),0)
CRC_FLAG := -msse4.2 CRC_FLAG := -msse4.2
endif endif
HAS_SHA := $(shell $(CXX) $(CXXFLAGS) -msse4.2 -msha -o $(TEMPDIR)/t.o -c sha.cpp; echo $$?)
ifeq ($(HAS_SHA),0)
SHA_FLAG := -msse4.2 -msha
endif
# BEGIN_NATIVE_ARCH # BEGIN_NATIVE_ARCH
# Guard use of -march=native (or -m{32|64} on some platforms) # Guard use of -march=native (or -m{32|64} on some platforms)
# Don't add anything if -march=XXX or -mtune=XXX is specified # Don't add anything if -march=XXX or -mtune=XXX is specified
@ -289,10 +294,14 @@ endif
endif endif
ifeq ($(IS_ARMV8),1) ifeq ($(IS_ARMV8),1)
HAS_CRC := $(shell $(CXX) $(CXXFLAGS) -march=armv8-a+crc -o $(TEMPDIR)/t.o -c crc-simd.cpp 2>/dev/null; echo $$?) HAS_CRC := $(shell $(CXX) $(CXXFLAGS) -march=armv8-a+crc -o $(TEMPDIR)/t.o -c crc-simd.cpp; echo $$?)
ifeq ($(HAS_CRC),0) ifeq ($(HAS_CRC),0)
CRC_FLAG := -march=armv8-a+crc CRC_FLAG := -march=armv8-a+crc
endif endif
HAS_SHA := $(shell $(CXX) $(CXXFLAGS) -march=armv8-a+crc -o $(TEMPDIR)/t.o -c sha.cpp; echo $$?)
ifeq ($(HAS_SHA),0)
SHA_FLAG := -march=armv8-a+crypto
endif
endif endif
endif # IS_X86 endif # IS_X86
@ -832,10 +841,14 @@ rdrand-%.o:
./rdrand-nasm.sh ./rdrand-nasm.sh
endif endif
# crc.cpp may have SSE4.2 or ARMv8a available # SSE4.2 or ARMv8a available
crc-simd.o : crc-simd.cpp crc-simd.o : crc-simd.cpp
$(CXX) $(strip $(CXXFLAGS) $(CRC_FLAG) -c) $< $(CXX) $(strip $(CXXFLAGS) $(CRC_FLAG) -c) $<
# SSE4.2/SHANI or ARMv8a available
sha.o : sha.cpp
$(CXX) $(strip $(CXXFLAGS) $(SHA_FLAG) -c) $<
# Don't build Threefish with UBsan on Travis CI. Timeouts cause the build to fail. # Don't build Threefish with UBsan on Travis CI. Timeouts cause the build to fail.
# Also see https://stackoverflow.com/q/12983137/608639. # Also see https://stackoverflow.com/q/12983137/608639.
ifeq ($(findstring true,$(CI)),true) ifeq ($(findstring true,$(CI)),true)

View File

@ -510,7 +510,7 @@ void Benchmark2(double t, double hertz)
if (HasCLMUL()) if (HasCLMUL())
BenchMarkByName2<AuthenticatedSymmetricCipher, MessageAuthenticationCode>("AES/GCM", 0, "GMAC(AES)"); BenchMarkByName2<AuthenticatedSymmetricCipher, MessageAuthenticationCode>("AES/GCM", 0, "GMAC(AES)");
else else
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE #elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
if (HasPMULL()) if (HasPMULL())
BenchMarkByName2<AuthenticatedSymmetricCipher, MessageAuthenticationCode>("AES/GCM", 0, "GMAC(AES)"); BenchMarkByName2<AuthenticatedSymmetricCipher, MessageAuthenticationCode>("AES/GCM", 0, "GMAC(AES)");
else else
@ -598,7 +598,7 @@ void Benchmark2(double t, double hertz)
if (HasCLMUL()) if (HasCLMUL())
BenchMarkByName2<AuthenticatedSymmetricCipher, AuthenticatedSymmetricCipher>("AES/GCM", 0, "AES/GCM"); BenchMarkByName2<AuthenticatedSymmetricCipher, AuthenticatedSymmetricCipher>("AES/GCM", 0, "AES/GCM");
else else
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE #elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
if (HasPMULL()) if (HasPMULL())
BenchMarkByName2<AuthenticatedSymmetricCipher, AuthenticatedSymmetricCipher>("AES/GCM", 0, "AES/GCM"); BenchMarkByName2<AuthenticatedSymmetricCipher, AuthenticatedSymmetricCipher>("AES/GCM", 0, "AES/GCM");
else else

View File

@ -431,6 +431,12 @@ NAMESPACE_END
#define CRYPTOPP_BOOL_ARM64 0 #define CRYPTOPP_BOOL_ARM64 0
#endif #endif
#if defined(_MSC_VER) || defined(__BORLANDC__)
# define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY 1
#else
# define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY 1
#endif
// ***************** IA32 CPU features ******************** // ***************** IA32 CPU features ********************
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
@ -521,7 +527,7 @@ NAMESPACE_END
// Microsoft plans to support ARM-64, but its not clear how to detect it. // Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available // TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_ARMV8A_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) #if !defined(CRYPTOPP_ARMV8A_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
# if defined(__ARM_FEATURE_CRC32) # if defined(__ARM_FEATURE_CRC32) || (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
# define CRYPTOPP_ARMV8A_CRC32_AVAILABLE 1 # define CRYPTOPP_ARMV8A_CRC32_AVAILABLE 1
# endif # endif
#endif #endif
@ -530,11 +536,9 @@ NAMESPACE_END
// LLVM Clang requires 3.5. Apple Clang does not support it at the moment. // LLVM Clang requires 3.5. Apple Clang does not support it at the moment.
// Microsoft plans to support ARM-64, but its not clear how to detect it. // Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available // TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) #if !defined(CRYPTOPP_ARMV8A_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__apple_build_version__)
# if defined(__ARM_FEATURE_CRYPTO) && !defined(__apple_build_version__) # if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
# if defined(__arm64__) || defined(__aarch64__) # define CRYPTOPP_ARMV8A_PMULL_AVAILABLE 1
# define CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE 1
# endif
# endif # endif
#endif #endif
@ -542,12 +546,20 @@ NAMESPACE_END
// LLVM Clang requires 3.5. Apple Clang is unknown at the moment. // LLVM Clang requires 3.5. Apple Clang is unknown at the moment.
// Microsoft plans to support ARM-64, but its not clear how to detect it. // Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available // TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) #if !defined(CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
# if defined(__ARM_FEATURE_CRYPTO) # if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
# define CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE 1 # define CRYPTOPP_ARMV8A_AES_AVAILABLE 1
# define CRYPTOPP_ARMV8A_SHA_AVAILABLE 1
# define CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE 1
# endif # endif
#endif #endif
// ARM CRC testing
#undef CRYPTOPP_ARMV8A_AES_AVAILABLE
#undef CRYPTOPP_ARMV8A_SHA_AVAILABLE
#undef CRYPTOPP_ARMV8A_PMULL_AVAILABLE
#undef CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
#endif // ARM32, ARM64 #endif // ARM32, ARM64
// ***************** Miscellaneous ******************** // ***************** Miscellaneous ********************

71
cpu.cpp
View File

@ -348,12 +348,6 @@ extern "C"
longjmp(s_jmpNoPMULL, 1); longjmp(s_jmpNoPMULL, 1);
} }
static jmp_buf s_jmpNoCRC32;
static void SigIllHandlerCRC32(int)
{
longjmp(s_jmpNoCRC32, 1);
}
static jmp_buf s_jmpNoAES; static jmp_buf s_jmpNoAES;
static void SigIllHandlerAES(int) static void SigIllHandlerAES(int)
{ {
@ -444,7 +438,7 @@ static bool TryNEON()
static bool TryPMULL() static bool TryPMULL()
{ {
#if (CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE) #if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true; volatile bool result = true;
__try __try
@ -506,66 +500,23 @@ static bool TryPMULL()
# endif # endif
#else #else
return false; return false;
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE #endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
} }
extern bool CPU_TryCRC32_ARMV8();
static bool TryCRC32() static bool TryCRC32()
{ {
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE) #if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) return CPU_TryCRC32_ARMV8();
volatile bool result = true;
__try
{
word32 w=0, x=1; word16 y=2; byte z=3;
w = __crc32cw(w,x);
w = __crc32ch(w,y);
w = __crc32cb(w,z);
result = !!w;
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
return false;
}
return result;
#else #else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true;
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
if (oldHandler == SIG_ERR)
return false; return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoCRC32))
result = false;
else
{
word32 w=0, x=1; word16 y=2; byte z=3;
w = __crc32cw(w,x);
w = __crc32ch(w,y);
w = __crc32cb(w,z);
// Hack... GCC optimizes away the code and returns true
result = !!w;
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
signal(SIGILL, oldHandler);
return result;
#endif #endif
#else
return false;
#endif // CRYPTOPP_ARMV8A_CRC32_AVAILABLE
} }
static bool TryAES() static bool TryAES()
{ {
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) #if (CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true; volatile bool result = true;
__try __try
@ -613,12 +564,12 @@ static bool TryAES()
# endif # endif
#else #else
return false; return false;
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE #endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
} }
static bool TrySHA1() static bool TrySHA1()
{ {
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) #if (CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true; volatile bool result = true;
__try __try
@ -673,12 +624,12 @@ static bool TrySHA1()
# endif # endif
#else #else
return false; return false;
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE #endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
} }
static bool TrySHA2() static bool TrySHA2()
{ {
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) #if (CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true; volatile bool result = true;
__try __try
@ -731,7 +682,7 @@ static bool TrySHA2()
# endif # endif
#else #else
return false; return false;
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE #endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
} }
void DetectArmFeatures() void DetectArmFeatures()

10
cpu.h
View File

@ -74,13 +74,6 @@
#endif // immintrin.h #endif // immintrin.h
#endif // X86/X64/X32 Headers #endif // X86/X64/X32 Headers
// Applies to both X86/X32/X64 and ARM32/ARM64. And we've got MIPS devices on the way.
#if defined(_MSC_VER) || defined(__BORLANDC__)
# define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
#else
# define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
#endif
// Applies to both X86/X32/X64 and ARM32/ARM64 // Applies to both X86/X32/X64 and ARM32/ARM64
#if defined(CRYPTOPP_LLVM_CLANG_VERSION) || defined(CRYPTOPP_APPLE_CLANG_VERSION) || defined(CRYPTOPP_CLANG_INTEGRATED_ASSEMBLER) #if defined(CRYPTOPP_LLVM_CLANG_VERSION) || defined(CRYPTOPP_APPLE_CLANG_VERSION) || defined(CRYPTOPP_CLANG_INTEGRATED_ASSEMBLER)
#define NEW_LINE "\n" #define NEW_LINE "\n"
@ -457,7 +450,6 @@ inline int GetCacheLineSize()
#define ASC(x, y) x label##y*newline* #define ASC(x, y) x label##y*newline*
#define AS_HEX(y) 0##y##h #define AS_HEX(y) 0##y##h
#elif defined(_MSC_VER) || defined(__BORLANDC__) #elif defined(_MSC_VER) || defined(__BORLANDC__)
#define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
#define AS1(x) __asm {x} #define AS1(x) __asm {x}
#define AS2(x, y) __asm {x, y} #define AS2(x, y) __asm {x, y}
#define AS3(x, y, z) __asm {x, y, z} #define AS3(x, y, z) __asm {x, y, z}
@ -468,8 +460,6 @@ inline int GetCacheLineSize()
#define CRYPTOPP_NAKED __declspec(naked) #define CRYPTOPP_NAKED __declspec(naked)
#define AS_HEX(y) 0x##y #define AS_HEX(y) 0x##y
#else #else
#define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
// define these in two steps to allow arguments to be expanded // define these in two steps to allow arguments to be expanded
#define GNU_AS1(x) #x ";" NEW_LINE #define GNU_AS1(x) #x ";" NEW_LINE
#define GNU_AS2(x, y) #x ", " #y ";" NEW_LINE #define GNU_AS2(x, y) #x ", " #y ";" NEW_LINE

View File

@ -19,8 +19,83 @@
# include "arm_acle.h" # include "arm_acle.h"
#endif #endif
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
# include <signal.h>
# include <setjmp.h>
#endif
NAMESPACE_BEGIN(CryptoPP) NAMESPACE_BEGIN(CryptoPP)
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
extern "C" {
typedef void (*SigHandler)(int);
static jmp_buf s_jmpNoCRC32;
static void SigIllHandlerCRC32(int)
{
longjmp(s_jmpNoCRC32, 1);
}
};
#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
bool CPU_TryCRC32_ARMV8()
{
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try
{
word32 w=0, x=1; word16 y=2; byte z=3;
w = __crc32w(w,x);
w = __crc32h(w,y);
w = __crc32b(w,z);
w = __crc32cw(w,x);
w = __crc32ch(w,y);
w = __crc32cb(w,z);
result = !!w;
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
return false;
}
return result;
#else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true;
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
if (oldHandler == SIG_ERR)
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoCRC32))
result = false;
else
{
word32 w=0, x=1; word16 y=2; byte z=3;
w = __crc32w(w,x);
w = __crc32h(w,y);
w = __crc32b(w,z);
w = __crc32cw(w,x);
w = __crc32ch(w,y);
w = __crc32cb(w,z);
// Hack... GCC optimizes away the code and returns true
result = !!w;
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
signal(SIGILL, oldHandler);
return result;
# endif
}
#endif // CRYPTOPP_ARMV8A_CRC32_AVAILABLE
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE) #if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
void CRC32_Update_ARMV8(const byte *s, size_t n, word32& c) void CRC32_Update_ARMV8(const byte *s, size_t n, word32& c)
{ {

14
gcm.cpp
View File

@ -49,7 +49,7 @@ NAMESPACE_BEGIN(CryptoPP)
#endif #endif
#endif #endif
#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE #if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_ARMV8A_PMULL_AVAILABLE
#if defined(__GNUC__) #if defined(__GNUC__)
// Schneiders, Hovsmith and O'Rourke used this trick. // Schneiders, Hovsmith and O'Rourke used this trick.
// It results in much better code generation in production code // It results in much better code generation in production code
@ -143,7 +143,7 @@ inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C); return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C);
} }
#endif // Microsoft and compatibles #endif // Microsoft and compatibles
#endif // CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE #endif // CRYPTOPP_ARMV8A_PMULL_AVAILABLE
word16 GCM_Base::s_reductionTable[256]; word16 GCM_Base::s_reductionTable[256];
volatile bool GCM_Base::s_reductionTableInitialized = false; volatile bool GCM_Base::s_reductionTableInitialized = false;
@ -282,7 +282,7 @@ inline __m128i CLMUL_GF_Mul(const __m128i &x, const __m128i &h, const __m128i &r
} }
#endif #endif
#if CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE #if CRYPTOPP_ARMV8A_PMULL_AVAILABLE
CRYPTOPP_ALIGN_DATA(16) CRYPTOPP_ALIGN_DATA(16)
static const word64 s_clmulConstants64[] = { static const word64 s_clmulConstants64[] = {
@ -338,7 +338,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
tableSize = s_clmulTableSizeInBlocks * REQUIRED_BLOCKSIZE; tableSize = s_clmulTableSizeInBlocks * REQUIRED_BLOCKSIZE;
} }
else else
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE #elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
if (HasPMULL()) if (HasPMULL())
{ {
// Avoid "parameter not used" error and suppress Coverity finding // Avoid "parameter not used" error and suppress Coverity finding
@ -384,7 +384,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
return; return;
} }
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE #elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
if (HasPMULL()) if (HasPMULL())
{ {
const uint64x2_t r = s_clmulConstants[0]; const uint64x2_t r = s_clmulConstants[0];
@ -520,7 +520,7 @@ inline void GCM_Base::ReverseHashBufferIfNeeded()
__m128i &x = *(__m128i *)(void *)HashBuffer(); __m128i &x = *(__m128i *)(void *)HashBuffer();
x = _mm_shuffle_epi8(x, s_clmulConstants[1]); x = _mm_shuffle_epi8(x, s_clmulConstants[1]);
} }
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE #elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
if (HasPMULL()) if (HasPMULL())
{ {
if (GetNativeByteOrder() != BIG_ENDIAN_ORDER) if (GetNativeByteOrder() != BIG_ENDIAN_ORDER)
@ -670,7 +670,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
_mm_store_si128((__m128i *)(void *)HashBuffer(), x); _mm_store_si128((__m128i *)(void *)HashBuffer(), x);
return len; return len;
} }
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE #elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
if (HasPMULL()) if (HasPMULL())
{ {
const uint64x2_t *table = (const uint64x2_t *)MulTable(); const uint64x2_t *table = (const uint64x2_t *)MulTable();

62
sha.cpp
View File

@ -103,7 +103,7 @@ static void SHA1_CXX_Transform(word32 *state, const word32 *data)
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
// Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley. // Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
static void SHA1_SSE_SHA_Transform(word32 *state, const word32 *data) static void SHA1_Transform_SHANI(word32 *state, const word32 *data)
{ {
__m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1; __m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1;
__m128i MASK, MSG0, MSG1, MSG2, MSG3; __m128i MASK, MSG0, MSG1, MSG2, MSG3;
@ -293,8 +293,8 @@ static void SHA1_SSE_SHA_Transform(word32 *state, const word32 *data)
// start of Walton/Schneiders/O'Rourke/Skip Hovsmith's code // // start of Walton/Schneiders/O'Rourke/Skip Hovsmith's code //
////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE #if CRYPTOPP_ARMV8A_SHA_AVAILABLE
static void SHA1_ARM_SHA_Transform(word32 *state, const word32 *data) static void SHA1_Transform_ARMV8A(word32 *state, const word32 *data)
{ {
uint32x4_t C0, C1, C2, C3; uint32x4_t C0, C1, C2, C3;
uint32x4_t ABCD, ABCD_SAVED; uint32x4_t ABCD, ABCD_SAVED;
@ -462,7 +462,7 @@ static void SHA1_ARM_SHA_Transform(word32 *state, const word32 *data)
vst1q_u32(&state[0], ABCD); vst1q_u32(&state[0], ABCD);
state[4] = E0; state[4] = E0;
} }
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE #endif // CRYPTOPP_ARMV8A_SHA_AVAILABLE
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// end of Walton/Schneiders/O'Rourke/Hovsmith's code // // end of Walton/Schneiders/O'Rourke/Hovsmith's code //
@ -472,12 +472,12 @@ pfnSHATransform InitializeSHA1Transform()
{ {
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
if (HasSHA()) if (HasSHA())
return &SHA1_SSE_SHA_Transform; return &SHA1_Transform_SHANI;
else else
#endif #endif
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE #if CRYPTOPP_ARMV8A_SHA_AVAILABLE
if (HasSHA1()) if (HasSHA1())
return &SHA1_ARM_SHA_Transform; return &SHA1_Transform_ARMV8A;
else else
#endif #endif
return &SHA1_CXX_Transform; return &SHA1_CXX_Transform;
@ -536,7 +536,7 @@ void SHA256::InitState(HashWordType *state)
memcpy(state, s, sizeof(s)); memcpy(state, s, sizeof(s));
} }
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || CRYPTOPP_ARMV8A_SHA_AVAILABLE
CRYPTOPP_ALIGN_DATA(16) extern const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = { CRYPTOPP_ALIGN_DATA(16) extern const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = {
#else #else
extern const word32 SHA256_K[64] = { extern const word32 SHA256_K[64] = {
@ -893,9 +893,9 @@ void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data,
#endif #endif
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const word32 *data, size_t length); static void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data, size_t length);
#elif CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE #elif CRYPTOPP_ARMV8A_SHA_AVAILABLE
static void CRYPTOPP_FASTCALL SHA256_ARM_SHA_HashBlocks(word32 *state, const word32 *data, size_t length); static void CRYPTOPP_FASTCALL SHA256_HashBlocks_ARMV8A(word32 *state, const word32 *data, size_t length);
#endif #endif
#if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_SHA_ASM) #if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_SHA_ASM)
@ -904,12 +904,12 @@ pfnSHAHashBlocks InitializeSHA256HashBlocks()
{ {
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
if (HasSHA()) if (HasSHA())
return &SHA256_SSE_SHA_HashBlocks; return &SHA256_HashBlocks_SHANI;
else else
#endif #endif
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE #if CRYPTOPP_ARMV8A_SHA_AVAILABLE
if (HasSHA2()) if (HasSHA2())
return &SHA256_ARM_SHA_HashBlocks; return &SHA256_HashBlocks_ARMV8A;
else else
#endif #endif
@ -956,7 +956,7 @@ size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
#if defined(__OPTIMIZE_SIZE__) #if defined(__OPTIMIZE_SIZE__)
// Smaller but slower // Smaller but slower
void SHA256_CXX_Transform(word32 *state, const word32 *data) void SHA256_Transform_CXX(word32 *state, const word32 *data)
{ {
word32 W[32], T[20]; word32 W[32], T[20];
unsigned int i = 0, j = 0; unsigned int i = 0, j = 0;
@ -1028,7 +1028,7 @@ void SHA256_CXX_Transform(word32 *state, const word32 *data)
} }
#else #else
// Bigger but faster // Bigger but faster
void SHA256_CXX_Transform(word32 *state, const word32 *data) void SHA256_Transform_CXX(word32 *state, const word32 *data)
{ {
word32 W[16], T[8]; word32 W[16], T[8];
/* Copy context->state[] to working vars */ /* Copy context->state[] to working vars */
@ -1060,7 +1060,7 @@ void SHA256_CXX_Transform(word32 *state, const word32 *data)
#undef R #undef R
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
static void SHA256_SSE2_Transform(word32 *state, const word32 *data) static void SHA256_Transform_SSE2(word32 *state, const word32 *data)
{ {
// this byte reverse is a waste of time, but this function is only called by MDC // this byte reverse is a waste of time, but this function is only called by MDC
word32 W[16]; word32 W[16];
@ -1070,18 +1070,18 @@ static void SHA256_SSE2_Transform(word32 *state, const word32 *data)
#endif // CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE #endif // CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
static void SHA256_SSE_SHA_Transform(word32 *state, const word32 *data) static void SHA256_Transform_SHANI(word32 *state, const word32 *data)
{ {
return SHA256_SSE_SHA_HashBlocks(state, data, SHA256::BLOCKSIZE); return SHA256_HashBlocks_SHANI(state, data, SHA256::BLOCKSIZE);
} }
#endif // CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE #endif // CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE #if CRYPTOPP_ARMV8A_SHA_AVAILABLE
static void SHA256_ARM_SHA_Transform(word32 *state, const word32 *data) static void SHA256_Transform_ARMV8A(word32 *state, const word32 *data)
{ {
return SHA256_ARM_SHA_HashBlocks(state, data, SHA256::BLOCKSIZE); return SHA256_HashBlocks_ARMV8A(state, data, SHA256::BLOCKSIZE);
} }
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE #endif // CRYPTOPP_ARMV8A_SHA_AVAILABLE
/////////////////////////////////// ///////////////////////////////////
// start of Walton/Gulley's code // // start of Walton/Gulley's code //
@ -1089,7 +1089,7 @@ static void SHA256_ARM_SHA_Transform(word32 *state, const word32 *data)
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
// Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley. // Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const word32 *data, size_t length) static void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data, size_t length)
{ {
CRYPTOPP_ASSERT(state); CRYPTOPP_ASSERT(data); CRYPTOPP_ASSERT(state); CRYPTOPP_ASSERT(data);
CRYPTOPP_ASSERT(length % SHA256::BLOCKSIZE == 0); CRYPTOPP_ASSERT(length % SHA256::BLOCKSIZE == 0);
@ -1294,8 +1294,8 @@ static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const wor
// start of Walton/Schneiders/O'Rourke/Hovsmith's code // // start of Walton/Schneiders/O'Rourke/Hovsmith's code //
///////////////////////////////////////////////////////// /////////////////////////////////////////////////////////
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE #if CRYPTOPP_ARMV8A_SHA_AVAILABLE
static void CRYPTOPP_FASTCALL SHA256_ARM_SHA_HashBlocks(word32 *state, const word32 *data, size_t length) static void CRYPTOPP_FASTCALL SHA256_HashBlocks_ARMV8A(word32 *state, const word32 *data, size_t length)
{ {
uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE; uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE;
uint32x4_t MSG0, MSG1, MSG2, MSG3; uint32x4_t MSG0, MSG1, MSG2, MSG3;
@ -1460,21 +1460,21 @@ pfnSHATransform InitializeSHA256Transform()
{ {
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
if (HasSHA()) if (HasSHA())
return &SHA256_SSE_SHA_Transform; return &SHA256_Transform_SHANI;
else else
#endif #endif
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
if (HasSSE2()) if (HasSSE2())
return &SHA256_SSE2_Transform; return &SHA256_Transform_SSE2;
else else
#endif #endif
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE #if CRYPTOPP_ARMV8A_SHA_AVAILABLE
if (HasSHA2()) if (HasSHA2())
return &SHA256_ARM_SHA_Transform; return &SHA256_Transform_ARMV8A;
else else
#endif #endif
return &SHA256_CXX_Transform; return &SHA256_Transform_CXX;
} }
void SHA256::Transform(word32 *state, const word32 *data) void SHA256::Transform(word32 *state, const word32 *data)