Move CRC32 probe code from cpu.cpp to crc-simd.cpp

pull/461/head
Jeffrey Walton 2017-07-29 03:49:48 -04:00
parent 368f344667
commit 3e7496803e
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
8 changed files with 167 additions and 126 deletions

View File

@ -194,11 +194,16 @@ endif # -DCRYPTOPP_DISABLE_SSSE3
endif # -DCRYPTOPP_DISABLE_ASM
endif # CXXFLAGS
HAS_CRC := $(shell $(CXX) $(CXXFLAGS) -msse4.2 -o $(TEMPDIR)/t.o -c crc-simd.cpp 2>/dev/null; echo $$?)
HAS_CRC := $(shell $(CXX) $(CXXFLAGS) -msse4.2 -o $(TEMPDIR)/t.o -c crc-simd.cpp; echo $$?)
ifeq ($(HAS_CRC),0)
CRC_FLAG := -msse4.2
endif
HAS_SHA := $(shell $(CXX) $(CXXFLAGS) -msse4.2 -msha -o $(TEMPDIR)/t.o -c sha.cpp; echo $$?)
ifeq ($(HAS_SHA),0)
SHA_FLAG := -msse4.2 -msha
endif
# BEGIN_NATIVE_ARCH
# Guard use of -march=native (or -m{32|64} on some platforms)
# Don't add anything if -march=XXX or -mtune=XXX is specified
@ -289,10 +294,14 @@ endif
endif
ifeq ($(IS_ARMV8),1)
HAS_CRC := $(shell $(CXX) $(CXXFLAGS) -march=armv8-a+crc -o $(TEMPDIR)/t.o -c crc-simd.cpp 2>/dev/null; echo $$?)
HAS_CRC := $(shell $(CXX) $(CXXFLAGS) -march=armv8-a+crc -o $(TEMPDIR)/t.o -c crc-simd.cpp; echo $$?)
ifeq ($(HAS_CRC),0)
CRC_FLAG := -march=armv8-a+crc
endif
HAS_SHA := $(shell $(CXX) $(CXXFLAGS) -march=armv8-a+crc -o $(TEMPDIR)/t.o -c sha.cpp; echo $$?)
ifeq ($(HAS_SHA),0)
SHA_FLAG := -march=armv8-a+crypto
endif
endif
endif # IS_X86
@ -832,10 +841,14 @@ rdrand-%.o:
./rdrand-nasm.sh
endif
# crc.cpp may have SSE4.2 or ARMv8a available
# SSE4.2 or ARMv8a available
crc-simd.o : crc-simd.cpp
$(CXX) $(strip $(CXXFLAGS) $(CRC_FLAG) -c) $<
# SSE4.2/SHANI or ARMv8a available
sha.o : sha.cpp
$(CXX) $(strip $(CXXFLAGS) $(SHA_FLAG) -c) $<
# Don't build Threefish with UBsan on Travis CI. Timeouts cause the build to fail.
# Also see https://stackoverflow.com/q/12983137/608639.
ifeq ($(findstring true,$(CI)),true)

View File

@ -510,7 +510,7 @@ void Benchmark2(double t, double hertz)
if (HasCLMUL())
BenchMarkByName2<AuthenticatedSymmetricCipher, MessageAuthenticationCode>("AES/GCM", 0, "GMAC(AES)");
else
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
#elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
if (HasPMULL())
BenchMarkByName2<AuthenticatedSymmetricCipher, MessageAuthenticationCode>("AES/GCM", 0, "GMAC(AES)");
else
@ -598,7 +598,7 @@ void Benchmark2(double t, double hertz)
if (HasCLMUL())
BenchMarkByName2<AuthenticatedSymmetricCipher, AuthenticatedSymmetricCipher>("AES/GCM", 0, "AES/GCM");
else
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
#elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
if (HasPMULL())
BenchMarkByName2<AuthenticatedSymmetricCipher, AuthenticatedSymmetricCipher>("AES/GCM", 0, "AES/GCM");
else

View File

@ -431,6 +431,12 @@ NAMESPACE_END
#define CRYPTOPP_BOOL_ARM64 0
#endif
#if defined(_MSC_VER) || defined(__BORLANDC__)
# define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY 1
#else
# define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY 1
#endif
// ***************** IA32 CPU features ********************
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
@ -521,7 +527,7 @@ NAMESPACE_END
// Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_ARMV8A_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
# if defined(__ARM_FEATURE_CRC32)
# if defined(__ARM_FEATURE_CRC32) || (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
# define CRYPTOPP_ARMV8A_CRC32_AVAILABLE 1
# endif
#endif
@ -530,11 +536,9 @@ NAMESPACE_END
// LLVM Clang requires 3.5. Apple Clang does not support it at the moment.
// Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
# if defined(__ARM_FEATURE_CRYPTO) && !defined(__apple_build_version__)
# if defined(__arm64__) || defined(__aarch64__)
# define CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE 1
# endif
#if !defined(CRYPTOPP_ARMV8A_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__apple_build_version__)
# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
# define CRYPTOPP_ARMV8A_PMULL_AVAILABLE 1
# endif
#endif
@ -542,12 +546,20 @@ NAMESPACE_END
// LLVM Clang requires 3.5. Apple Clang is unknown at the moment.
// Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
# if defined(__ARM_FEATURE_CRYPTO)
# define CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE 1
#if !defined(CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
# define CRYPTOPP_ARMV8A_AES_AVAILABLE 1
# define CRYPTOPP_ARMV8A_SHA_AVAILABLE 1
# define CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE 1
# endif
#endif
// ARM CRC testing
#undef CRYPTOPP_ARMV8A_AES_AVAILABLE
#undef CRYPTOPP_ARMV8A_SHA_AVAILABLE
#undef CRYPTOPP_ARMV8A_PMULL_AVAILABLE
#undef CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
#endif // ARM32, ARM64
// ***************** Miscellaneous ********************

71
cpu.cpp
View File

@ -348,12 +348,6 @@ extern "C"
longjmp(s_jmpNoPMULL, 1);
}
static jmp_buf s_jmpNoCRC32;
static void SigIllHandlerCRC32(int)
{
longjmp(s_jmpNoCRC32, 1);
}
static jmp_buf s_jmpNoAES;
static void SigIllHandlerAES(int)
{
@ -444,7 +438,7 @@ static bool TryNEON()
static bool TryPMULL()
{
#if (CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE)
#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try
@ -506,66 +500,23 @@ static bool TryPMULL()
# endif
#else
return false;
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
#endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
}
extern bool CPU_TryCRC32_ARMV8();
static bool TryCRC32()
{
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try
{
word32 w=0, x=1; word16 y=2; byte z=3;
w = __crc32cw(w,x);
w = __crc32ch(w,y);
w = __crc32cb(w,z);
result = !!w;
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
return false;
}
return result;
return CPU_TryCRC32_ARMV8();
#else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true;
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
if (oldHandler == SIG_ERR)
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoCRC32))
result = false;
else
{
word32 w=0, x=1; word16 y=2; byte z=3;
w = __crc32cw(w,x);
w = __crc32ch(w,y);
w = __crc32cb(w,z);
// Hack... GCC optimizes away the code and returns true
result = !!w;
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
signal(SIGILL, oldHandler);
return result;
#endif
#else
return false;
#endif // CRYPTOPP_ARMV8A_CRC32_AVAILABLE
}
static bool TryAES()
{
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
#if (CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try
@ -613,12 +564,12 @@ static bool TryAES()
# endif
#else
return false;
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
#endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
}
static bool TrySHA1()
{
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
#if (CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try
@ -673,12 +624,12 @@ static bool TrySHA1()
# endif
#else
return false;
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
#endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
}
static bool TrySHA2()
{
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
#if (CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try
@ -731,7 +682,7 @@ static bool TrySHA2()
# endif
#else
return false;
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
#endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
}
void DetectArmFeatures()

10
cpu.h
View File

@ -74,13 +74,6 @@
#endif // immintrin.h
#endif // X86/X64/X32 Headers
// Applies to both X86/X32/X64 and ARM32/ARM64. And we've got MIPS devices on the way.
#if defined(_MSC_VER) || defined(__BORLANDC__)
# define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
#else
# define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
#endif
// Applies to both X86/X32/X64 and ARM32/ARM64
#if defined(CRYPTOPP_LLVM_CLANG_VERSION) || defined(CRYPTOPP_APPLE_CLANG_VERSION) || defined(CRYPTOPP_CLANG_INTEGRATED_ASSEMBLER)
#define NEW_LINE "\n"
@ -457,7 +450,6 @@ inline int GetCacheLineSize()
#define ASC(x, y) x label##y*newline*
#define AS_HEX(y) 0##y##h
#elif defined(_MSC_VER) || defined(__BORLANDC__)
#define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
#define AS1(x) __asm {x}
#define AS2(x, y) __asm {x, y}
#define AS3(x, y, z) __asm {x, y, z}
@ -468,8 +460,6 @@ inline int GetCacheLineSize()
#define CRYPTOPP_NAKED __declspec(naked)
#define AS_HEX(y) 0x##y
#else
#define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
// define these in two steps to allow arguments to be expanded
#define GNU_AS1(x) #x ";" NEW_LINE
#define GNU_AS2(x, y) #x ", " #y ";" NEW_LINE

View File

@ -19,8 +19,83 @@
# include "arm_acle.h"
#endif
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
# include <signal.h>
# include <setjmp.h>
#endif
NAMESPACE_BEGIN(CryptoPP)
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
extern "C" {
typedef void (*SigHandler)(int);
static jmp_buf s_jmpNoCRC32;
static void SigIllHandlerCRC32(int)
{
longjmp(s_jmpNoCRC32, 1);
}
};
#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
bool CPU_TryCRC32_ARMV8()
{
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try
{
word32 w=0, x=1; word16 y=2; byte z=3;
w = __crc32w(w,x);
w = __crc32h(w,y);
w = __crc32b(w,z);
w = __crc32cw(w,x);
w = __crc32ch(w,y);
w = __crc32cb(w,z);
result = !!w;
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
return false;
}
return result;
#else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true;
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
if (oldHandler == SIG_ERR)
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoCRC32))
result = false;
else
{
word32 w=0, x=1; word16 y=2; byte z=3;
w = __crc32w(w,x);
w = __crc32h(w,y);
w = __crc32b(w,z);
w = __crc32cw(w,x);
w = __crc32ch(w,y);
w = __crc32cb(w,z);
// Hack... GCC optimizes away the code and returns true
result = !!w;
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
signal(SIGILL, oldHandler);
return result;
# endif
}
#endif // CRYPTOPP_ARMV8A_CRC32_AVAILABLE
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
void CRC32_Update_ARMV8(const byte *s, size_t n, word32& c)
{

14
gcm.cpp
View File

@ -49,7 +49,7 @@ NAMESPACE_BEGIN(CryptoPP)
#endif
#endif
#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_ARMV8A_PMULL_AVAILABLE
#if defined(__GNUC__)
// Schneiders, Hovsmith and O'Rourke used this trick.
// It results in much better code generation in production code
@ -143,7 +143,7 @@ inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C);
}
#endif // Microsoft and compatibles
#endif // CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
#endif // CRYPTOPP_ARMV8A_PMULL_AVAILABLE
word16 GCM_Base::s_reductionTable[256];
volatile bool GCM_Base::s_reductionTableInitialized = false;
@ -282,7 +282,7 @@ inline __m128i CLMUL_GF_Mul(const __m128i &x, const __m128i &h, const __m128i &r
}
#endif
#if CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
#if CRYPTOPP_ARMV8A_PMULL_AVAILABLE
CRYPTOPP_ALIGN_DATA(16)
static const word64 s_clmulConstants64[] = {
@ -338,7 +338,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
tableSize = s_clmulTableSizeInBlocks * REQUIRED_BLOCKSIZE;
}
else
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
#elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
if (HasPMULL())
{
// Avoid "parameter not used" error and suppress Coverity finding
@ -384,7 +384,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
return;
}
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
#elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
if (HasPMULL())
{
const uint64x2_t r = s_clmulConstants[0];
@ -520,7 +520,7 @@ inline void GCM_Base::ReverseHashBufferIfNeeded()
__m128i &x = *(__m128i *)(void *)HashBuffer();
x = _mm_shuffle_epi8(x, s_clmulConstants[1]);
}
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
#elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
if (HasPMULL())
{
if (GetNativeByteOrder() != BIG_ENDIAN_ORDER)
@ -670,7 +670,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
_mm_store_si128((__m128i *)(void *)HashBuffer(), x);
return len;
}
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
#elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
if (HasPMULL())
{
const uint64x2_t *table = (const uint64x2_t *)MulTable();

62
sha.cpp
View File

@ -103,7 +103,7 @@ static void SHA1_CXX_Transform(word32 *state, const word32 *data)
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
// Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
static void SHA1_SSE_SHA_Transform(word32 *state, const word32 *data)
static void SHA1_Transform_SHANI(word32 *state, const word32 *data)
{
__m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1;
__m128i MASK, MSG0, MSG1, MSG2, MSG3;
@ -293,8 +293,8 @@ static void SHA1_SSE_SHA_Transform(word32 *state, const word32 *data)
// start of Walton/Schneiders/O'Rourke/Skip Hovsmith's code //
//////////////////////////////////////////////////////////////
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
static void SHA1_ARM_SHA_Transform(word32 *state, const word32 *data)
#if CRYPTOPP_ARMV8A_SHA_AVAILABLE
static void SHA1_Transform_ARMV8A(word32 *state, const word32 *data)
{
uint32x4_t C0, C1, C2, C3;
uint32x4_t ABCD, ABCD_SAVED;
@ -462,7 +462,7 @@ static void SHA1_ARM_SHA_Transform(word32 *state, const word32 *data)
vst1q_u32(&state[0], ABCD);
state[4] = E0;
}
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
#endif // CRYPTOPP_ARMV8A_SHA_AVAILABLE
///////////////////////////////////////////////////////
// end of Walton/Schneiders/O'Rourke/Hovsmith's code //
@ -472,12 +472,12 @@ pfnSHATransform InitializeSHA1Transform()
{
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
if (HasSHA())
return &SHA1_SSE_SHA_Transform;
return &SHA1_Transform_SHANI;
else
#endif
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
#if CRYPTOPP_ARMV8A_SHA_AVAILABLE
if (HasSHA1())
return &SHA1_ARM_SHA_Transform;
return &SHA1_Transform_ARMV8A;
else
#endif
return &SHA1_CXX_Transform;
@ -536,7 +536,7 @@ void SHA256::InitState(HashWordType *state)
memcpy(state, s, sizeof(s));
}
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || CRYPTOPP_ARMV8A_SHA_AVAILABLE
CRYPTOPP_ALIGN_DATA(16) extern const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = {
#else
extern const word32 SHA256_K[64] = {
@ -893,9 +893,9 @@ void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data,
#endif
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const word32 *data, size_t length);
#elif CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
static void CRYPTOPP_FASTCALL SHA256_ARM_SHA_HashBlocks(word32 *state, const word32 *data, size_t length);
static void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data, size_t length);
#elif CRYPTOPP_ARMV8A_SHA_AVAILABLE
static void CRYPTOPP_FASTCALL SHA256_HashBlocks_ARMV8A(word32 *state, const word32 *data, size_t length);
#endif
#if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_SHA_ASM)
@ -904,12 +904,12 @@ pfnSHAHashBlocks InitializeSHA256HashBlocks()
{
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
if (HasSHA())
return &SHA256_SSE_SHA_HashBlocks;
return &SHA256_HashBlocks_SHANI;
else
#endif
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
#if CRYPTOPP_ARMV8A_SHA_AVAILABLE
if (HasSHA2())
return &SHA256_ARM_SHA_HashBlocks;
return &SHA256_HashBlocks_ARMV8A;
else
#endif
@ -956,7 +956,7 @@ size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
#if defined(__OPTIMIZE_SIZE__)
// Smaller but slower
void SHA256_CXX_Transform(word32 *state, const word32 *data)
void SHA256_Transform_CXX(word32 *state, const word32 *data)
{
word32 W[32], T[20];
unsigned int i = 0, j = 0;
@ -1028,7 +1028,7 @@ void SHA256_CXX_Transform(word32 *state, const word32 *data)
}
#else
// Bigger but faster
void SHA256_CXX_Transform(word32 *state, const word32 *data)
void SHA256_Transform_CXX(word32 *state, const word32 *data)
{
word32 W[16], T[8];
/* Copy context->state[] to working vars */
@ -1060,7 +1060,7 @@ void SHA256_CXX_Transform(word32 *state, const word32 *data)
#undef R
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
static void SHA256_SSE2_Transform(word32 *state, const word32 *data)
static void SHA256_Transform_SSE2(word32 *state, const word32 *data)
{
// this byte reverse is a waste of time, but this function is only called by MDC
word32 W[16];
@ -1070,18 +1070,18 @@ static void SHA256_SSE2_Transform(word32 *state, const word32 *data)
#endif // CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
static void SHA256_SSE_SHA_Transform(word32 *state, const word32 *data)
static void SHA256_Transform_SHANI(word32 *state, const word32 *data)
{
return SHA256_SSE_SHA_HashBlocks(state, data, SHA256::BLOCKSIZE);
return SHA256_HashBlocks_SHANI(state, data, SHA256::BLOCKSIZE);
}
#endif // CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
static void SHA256_ARM_SHA_Transform(word32 *state, const word32 *data)
#if CRYPTOPP_ARMV8A_SHA_AVAILABLE
static void SHA256_Transform_ARMV8A(word32 *state, const word32 *data)
{
return SHA256_ARM_SHA_HashBlocks(state, data, SHA256::BLOCKSIZE);
return SHA256_HashBlocks_ARMV8A(state, data, SHA256::BLOCKSIZE);
}
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
#endif // CRYPTOPP_ARMV8A_SHA_AVAILABLE
///////////////////////////////////
// start of Walton/Gulley's code //
@ -1089,7 +1089,7 @@ static void SHA256_ARM_SHA_Transform(word32 *state, const word32 *data)
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
// Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const word32 *data, size_t length)
static void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data, size_t length)
{
CRYPTOPP_ASSERT(state); CRYPTOPP_ASSERT(data);
CRYPTOPP_ASSERT(length % SHA256::BLOCKSIZE == 0);
@ -1294,8 +1294,8 @@ static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const wor
// start of Walton/Schneiders/O'Rourke/Hovsmith's code //
/////////////////////////////////////////////////////////
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
static void CRYPTOPP_FASTCALL SHA256_ARM_SHA_HashBlocks(word32 *state, const word32 *data, size_t length)
#if CRYPTOPP_ARMV8A_SHA_AVAILABLE
static void CRYPTOPP_FASTCALL SHA256_HashBlocks_ARMV8A(word32 *state, const word32 *data, size_t length)
{
uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE;
uint32x4_t MSG0, MSG1, MSG2, MSG3;
@ -1460,21 +1460,21 @@ pfnSHATransform InitializeSHA256Transform()
{
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
if (HasSHA())
return &SHA256_SSE_SHA_Transform;
return &SHA256_Transform_SHANI;
else
#endif
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
if (HasSSE2())
return &SHA256_SSE2_Transform;
return &SHA256_Transform_SSE2;
else
#endif
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
#if CRYPTOPP_ARMV8A_SHA_AVAILABLE
if (HasSHA2())
return &SHA256_ARM_SHA_Transform;
return &SHA256_Transform_ARMV8A;
else
#endif
return &SHA256_CXX_Transform;
return &SHA256_Transform_CXX;
}
void SHA256::Transform(word32 *state, const word32 *data)