Move CRC32 probe code from cpu.cpp to crc-simd.cpp
parent
368f344667
commit
3e7496803e
19
GNUmakefile
19
GNUmakefile
|
|
@ -194,11 +194,16 @@ endif # -DCRYPTOPP_DISABLE_SSSE3
|
|||
endif # -DCRYPTOPP_DISABLE_ASM
|
||||
endif # CXXFLAGS
|
||||
|
||||
HAS_CRC := $(shell $(CXX) $(CXXFLAGS) -msse4.2 -o $(TEMPDIR)/t.o -c crc-simd.cpp 2>/dev/null; echo $$?)
|
||||
HAS_CRC := $(shell $(CXX) $(CXXFLAGS) -msse4.2 -o $(TEMPDIR)/t.o -c crc-simd.cpp; echo $$?)
|
||||
ifeq ($(HAS_CRC),0)
|
||||
CRC_FLAG := -msse4.2
|
||||
endif
|
||||
|
||||
HAS_SHA := $(shell $(CXX) $(CXXFLAGS) -msse4.2 -msha -o $(TEMPDIR)/t.o -c sha.cpp; echo $$?)
|
||||
ifeq ($(HAS_SHA),0)
|
||||
SHA_FLAG := -msse4.2 -msha
|
||||
endif
|
||||
|
||||
# BEGIN_NATIVE_ARCH
|
||||
# Guard use of -march=native (or -m{32|64} on some platforms)
|
||||
# Don't add anything if -march=XXX or -mtune=XXX is specified
|
||||
|
|
@ -289,10 +294,14 @@ endif
|
|||
endif
|
||||
|
||||
ifeq ($(IS_ARMV8),1)
|
||||
HAS_CRC := $(shell $(CXX) $(CXXFLAGS) -march=armv8-a+crc -o $(TEMPDIR)/t.o -c crc-simd.cpp 2>/dev/null; echo $$?)
|
||||
HAS_CRC := $(shell $(CXX) $(CXXFLAGS) -march=armv8-a+crc -o $(TEMPDIR)/t.o -c crc-simd.cpp; echo $$?)
|
||||
ifeq ($(HAS_CRC),0)
|
||||
CRC_FLAG := -march=armv8-a+crc
|
||||
endif
|
||||
HAS_SHA := $(shell $(CXX) $(CXXFLAGS) -march=armv8-a+crc -o $(TEMPDIR)/t.o -c sha.cpp; echo $$?)
|
||||
ifeq ($(HAS_SHA),0)
|
||||
SHA_FLAG := -march=armv8-a+crypto
|
||||
endif
|
||||
endif
|
||||
|
||||
endif # IS_X86
|
||||
|
|
@ -832,10 +841,14 @@ rdrand-%.o:
|
|||
./rdrand-nasm.sh
|
||||
endif
|
||||
|
||||
# crc.cpp may have SSE4.2 or ARMv8a available
|
||||
# SSE4.2 or ARMv8a available
|
||||
crc-simd.o : crc-simd.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS) $(CRC_FLAG) -c) $<
|
||||
|
||||
# SSE4.2/SHANI or ARMv8a available
|
||||
sha.o : sha.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS) $(SHA_FLAG) -c) $<
|
||||
|
||||
# Don't build Threefish with UBsan on Travis CI. Timeouts cause the build to fail.
|
||||
# Also see https://stackoverflow.com/q/12983137/608639.
|
||||
ifeq ($(findstring true,$(CI)),true)
|
||||
|
|
|
|||
|
|
@ -510,7 +510,7 @@ void Benchmark2(double t, double hertz)
|
|||
if (HasCLMUL())
|
||||
BenchMarkByName2<AuthenticatedSymmetricCipher, MessageAuthenticationCode>("AES/GCM", 0, "GMAC(AES)");
|
||||
else
|
||||
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
|
||||
#elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
if (HasPMULL())
|
||||
BenchMarkByName2<AuthenticatedSymmetricCipher, MessageAuthenticationCode>("AES/GCM", 0, "GMAC(AES)");
|
||||
else
|
||||
|
|
@ -598,7 +598,7 @@ void Benchmark2(double t, double hertz)
|
|||
if (HasCLMUL())
|
||||
BenchMarkByName2<AuthenticatedSymmetricCipher, AuthenticatedSymmetricCipher>("AES/GCM", 0, "AES/GCM");
|
||||
else
|
||||
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
|
||||
#elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
if (HasPMULL())
|
||||
BenchMarkByName2<AuthenticatedSymmetricCipher, AuthenticatedSymmetricCipher>("AES/GCM", 0, "AES/GCM");
|
||||
else
|
||||
|
|
|
|||
30
config.h
30
config.h
|
|
@ -431,6 +431,12 @@ NAMESPACE_END
|
|||
#define CRYPTOPP_BOOL_ARM64 0
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) || defined(__BORLANDC__)
|
||||
# define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY 1
|
||||
#else
|
||||
# define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY 1
|
||||
#endif
|
||||
|
||||
// ***************** IA32 CPU features ********************
|
||||
|
||||
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
|
||||
|
|
@ -521,7 +527,7 @@ NAMESPACE_END
|
|||
// Microsoft plans to support ARM-64, but its not clear how to detect it.
|
||||
// TODO: Add MSC_VER and ARM-64 platform define when available
|
||||
#if !defined(CRYPTOPP_ARMV8A_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
|
||||
# if defined(__ARM_FEATURE_CRC32)
|
||||
# if defined(__ARM_FEATURE_CRC32) || (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
|
||||
# define CRYPTOPP_ARMV8A_CRC32_AVAILABLE 1
|
||||
# endif
|
||||
#endif
|
||||
|
|
@ -530,11 +536,9 @@ NAMESPACE_END
|
|||
// LLVM Clang requires 3.5. Apple Clang does not support it at the moment.
|
||||
// Microsoft plans to support ARM-64, but its not clear how to detect it.
|
||||
// TODO: Add MSC_VER and ARM-64 platform define when available
|
||||
#if !defined(CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
|
||||
# if defined(__ARM_FEATURE_CRYPTO) && !defined(__apple_build_version__)
|
||||
# if defined(__arm64__) || defined(__aarch64__)
|
||||
# define CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE 1
|
||||
# endif
|
||||
#if !defined(CRYPTOPP_ARMV8A_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__apple_build_version__)
|
||||
# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
|
||||
# define CRYPTOPP_ARMV8A_PMULL_AVAILABLE 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
|
@ -542,12 +546,20 @@ NAMESPACE_END
|
|||
// LLVM Clang requires 3.5. Apple Clang is unknown at the moment.
|
||||
// Microsoft plans to support ARM-64, but its not clear how to detect it.
|
||||
// TODO: Add MSC_VER and ARM-64 platform define when available
|
||||
#if !defined(CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
|
||||
# if defined(__ARM_FEATURE_CRYPTO)
|
||||
# define CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE 1
|
||||
#if !defined(CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
|
||||
# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
|
||||
# define CRYPTOPP_ARMV8A_AES_AVAILABLE 1
|
||||
# define CRYPTOPP_ARMV8A_SHA_AVAILABLE 1
|
||||
# define CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// ARM CRC testing
|
||||
#undef CRYPTOPP_ARMV8A_AES_AVAILABLE
|
||||
#undef CRYPTOPP_ARMV8A_SHA_AVAILABLE
|
||||
#undef CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
#undef CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
|
||||
|
||||
#endif // ARM32, ARM64
|
||||
|
||||
// ***************** Miscellaneous ********************
|
||||
|
|
|
|||
73
cpu.cpp
73
cpu.cpp
|
|
@ -348,12 +348,6 @@ extern "C"
|
|||
longjmp(s_jmpNoPMULL, 1);
|
||||
}
|
||||
|
||||
static jmp_buf s_jmpNoCRC32;
|
||||
static void SigIllHandlerCRC32(int)
|
||||
{
|
||||
longjmp(s_jmpNoCRC32, 1);
|
||||
}
|
||||
|
||||
static jmp_buf s_jmpNoAES;
|
||||
static void SigIllHandlerAES(int)
|
||||
{
|
||||
|
|
@ -444,7 +438,7 @@ static bool TryNEON()
|
|||
|
||||
static bool TryPMULL()
|
||||
{
|
||||
#if (CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE)
|
||||
#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE)
|
||||
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
||||
volatile bool result = true;
|
||||
__try
|
||||
|
|
@ -506,66 +500,23 @@ static bool TryPMULL()
|
|||
# endif
|
||||
#else
|
||||
return false;
|
||||
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
||||
#endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
|
||||
}
|
||||
|
||||
extern bool CPU_TryCRC32_ARMV8();
|
||||
|
||||
static bool TryCRC32()
|
||||
{
|
||||
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
|
||||
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
||||
volatile bool result = true;
|
||||
__try
|
||||
{
|
||||
word32 w=0, x=1; word16 y=2; byte z=3;
|
||||
w = __crc32cw(w,x);
|
||||
w = __crc32ch(w,y);
|
||||
w = __crc32cb(w,z);
|
||||
|
||||
result = !!w;
|
||||
}
|
||||
__except (EXCEPTION_EXECUTE_HANDLER)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return result;
|
||||
# else
|
||||
// longjmp and clobber warnings. Volatile is required.
|
||||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||
volatile bool result = true;
|
||||
|
||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
|
||||
if (oldHandler == SIG_ERR)
|
||||
return false;
|
||||
|
||||
volatile sigset_t oldMask;
|
||||
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
||||
return false;
|
||||
|
||||
if (setjmp(s_jmpNoCRC32))
|
||||
result = false;
|
||||
else
|
||||
{
|
||||
word32 w=0, x=1; word16 y=2; byte z=3;
|
||||
w = __crc32cw(w,x);
|
||||
w = __crc32ch(w,y);
|
||||
w = __crc32cb(w,z);
|
||||
|
||||
// Hack... GCC optimizes away the code and returns true
|
||||
result = !!w;
|
||||
}
|
||||
|
||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
||||
signal(SIGILL, oldHandler);
|
||||
return result;
|
||||
# endif
|
||||
return CPU_TryCRC32_ARMV8();
|
||||
#else
|
||||
return false;
|
||||
#endif // CRYPTOPP_ARMV8A_CRC32_AVAILABLE
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool TryAES()
|
||||
{
|
||||
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
|
||||
#if (CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE)
|
||||
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
||||
volatile bool result = true;
|
||||
__try
|
||||
|
|
@ -613,12 +564,12 @@ static bool TryAES()
|
|||
# endif
|
||||
#else
|
||||
return false;
|
||||
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
||||
#endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
|
||||
}
|
||||
|
||||
static bool TrySHA1()
|
||||
{
|
||||
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
|
||||
#if (CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE)
|
||||
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
||||
volatile bool result = true;
|
||||
__try
|
||||
|
|
@ -673,12 +624,12 @@ static bool TrySHA1()
|
|||
# endif
|
||||
#else
|
||||
return false;
|
||||
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
||||
#endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
|
||||
}
|
||||
|
||||
static bool TrySHA2()
|
||||
{
|
||||
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
|
||||
#if (CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE)
|
||||
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
||||
volatile bool result = true;
|
||||
__try
|
||||
|
|
@ -731,7 +682,7 @@ static bool TrySHA2()
|
|||
# endif
|
||||
#else
|
||||
return false;
|
||||
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
||||
#endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
|
||||
}
|
||||
|
||||
void DetectArmFeatures()
|
||||
|
|
|
|||
10
cpu.h
10
cpu.h
|
|
@ -74,13 +74,6 @@
|
|||
#endif // immintrin.h
|
||||
#endif // X86/X64/X32 Headers
|
||||
|
||||
// Applies to both X86/X32/X64 and ARM32/ARM64. And we've got MIPS devices on the way.
|
||||
#if defined(_MSC_VER) || defined(__BORLANDC__)
|
||||
# define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
||||
#else
|
||||
# define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
||||
#endif
|
||||
|
||||
// Applies to both X86/X32/X64 and ARM32/ARM64
|
||||
#if defined(CRYPTOPP_LLVM_CLANG_VERSION) || defined(CRYPTOPP_APPLE_CLANG_VERSION) || defined(CRYPTOPP_CLANG_INTEGRATED_ASSEMBLER)
|
||||
#define NEW_LINE "\n"
|
||||
|
|
@ -457,7 +450,6 @@ inline int GetCacheLineSize()
|
|||
#define ASC(x, y) x label##y*newline*
|
||||
#define AS_HEX(y) 0##y##h
|
||||
#elif defined(_MSC_VER) || defined(__BORLANDC__)
|
||||
#define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
||||
#define AS1(x) __asm {x}
|
||||
#define AS2(x, y) __asm {x, y}
|
||||
#define AS3(x, y, z) __asm {x, y, z}
|
||||
|
|
@ -468,8 +460,6 @@ inline int GetCacheLineSize()
|
|||
#define CRYPTOPP_NAKED __declspec(naked)
|
||||
#define AS_HEX(y) 0x##y
|
||||
#else
|
||||
#define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
||||
|
||||
// define these in two steps to allow arguments to be expanded
|
||||
#define GNU_AS1(x) #x ";" NEW_LINE
|
||||
#define GNU_AS2(x, y) #x ", " #y ";" NEW_LINE
|
||||
|
|
|
|||
75
crc-simd.cpp
75
crc-simd.cpp
|
|
@ -19,8 +19,83 @@
|
|||
# include "arm_acle.h"
|
||||
#endif
|
||||
|
||||
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
||||
# include <signal.h>
|
||||
# include <setjmp.h>
|
||||
#endif
|
||||
|
||||
NAMESPACE_BEGIN(CryptoPP)
|
||||
|
||||
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
||||
extern "C" {
|
||||
typedef void (*SigHandler)(int);
|
||||
|
||||
static jmp_buf s_jmpNoCRC32;
|
||||
static void SigIllHandlerCRC32(int)
|
||||
{
|
||||
longjmp(s_jmpNoCRC32, 1);
|
||||
}
|
||||
};
|
||||
#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
||||
|
||||
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
|
||||
bool CPU_TryCRC32_ARMV8()
|
||||
{
|
||||
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
||||
volatile bool result = true;
|
||||
__try
|
||||
{
|
||||
word32 w=0, x=1; word16 y=2; byte z=3;
|
||||
w = __crc32w(w,x);
|
||||
w = __crc32h(w,y);
|
||||
w = __crc32b(w,z);
|
||||
w = __crc32cw(w,x);
|
||||
w = __crc32ch(w,y);
|
||||
w = __crc32cb(w,z);
|
||||
|
||||
result = !!w;
|
||||
}
|
||||
__except (EXCEPTION_EXECUTE_HANDLER)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return result;
|
||||
#else
|
||||
// longjmp and clobber warnings. Volatile is required.
|
||||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||
volatile bool result = true;
|
||||
|
||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
|
||||
if (oldHandler == SIG_ERR)
|
||||
return false;
|
||||
|
||||
volatile sigset_t oldMask;
|
||||
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
||||
return false;
|
||||
|
||||
if (setjmp(s_jmpNoCRC32))
|
||||
result = false;
|
||||
else
|
||||
{
|
||||
word32 w=0, x=1; word16 y=2; byte z=3;
|
||||
w = __crc32w(w,x);
|
||||
w = __crc32h(w,y);
|
||||
w = __crc32b(w,z);
|
||||
w = __crc32cw(w,x);
|
||||
w = __crc32ch(w,y);
|
||||
w = __crc32cb(w,z);
|
||||
|
||||
// Hack... GCC optimizes away the code and returns true
|
||||
result = !!w;
|
||||
}
|
||||
|
||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
||||
signal(SIGILL, oldHandler);
|
||||
return result;
|
||||
# endif
|
||||
}
|
||||
#endif // CRYPTOPP_ARMV8A_CRC32_AVAILABLE
|
||||
|
||||
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
|
||||
void CRC32_Update_ARMV8(const byte *s, size_t n, word32& c)
|
||||
{
|
||||
|
|
|
|||
14
gcm.cpp
14
gcm.cpp
|
|
@ -49,7 +49,7 @@ NAMESPACE_BEGIN(CryptoPP)
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
|
||||
#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
#if defined(__GNUC__)
|
||||
// Schneiders, Hovsmith and O'Rourke used this trick.
|
||||
// It results in much better code generation in production code
|
||||
|
|
@ -143,7 +143,7 @@ inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
|
|||
return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C);
|
||||
}
|
||||
#endif // Microsoft and compatibles
|
||||
#endif // CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
|
||||
#endif // CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
|
||||
word16 GCM_Base::s_reductionTable[256];
|
||||
volatile bool GCM_Base::s_reductionTableInitialized = false;
|
||||
|
|
@ -282,7 +282,7 @@ inline __m128i CLMUL_GF_Mul(const __m128i &x, const __m128i &h, const __m128i &r
|
|||
}
|
||||
#endif
|
||||
|
||||
#if CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
|
||||
#if CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
static const word64 s_clmulConstants64[] = {
|
||||
|
|
@ -338,7 +338,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
|
|||
tableSize = s_clmulTableSizeInBlocks * REQUIRED_BLOCKSIZE;
|
||||
}
|
||||
else
|
||||
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
|
||||
#elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
if (HasPMULL())
|
||||
{
|
||||
// Avoid "parameter not used" error and suppress Coverity finding
|
||||
|
|
@ -384,7 +384,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
|
|||
|
||||
return;
|
||||
}
|
||||
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
|
||||
#elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
if (HasPMULL())
|
||||
{
|
||||
const uint64x2_t r = s_clmulConstants[0];
|
||||
|
|
@ -520,7 +520,7 @@ inline void GCM_Base::ReverseHashBufferIfNeeded()
|
|||
__m128i &x = *(__m128i *)(void *)HashBuffer();
|
||||
x = _mm_shuffle_epi8(x, s_clmulConstants[1]);
|
||||
}
|
||||
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
|
||||
#elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
if (HasPMULL())
|
||||
{
|
||||
if (GetNativeByteOrder() != BIG_ENDIAN_ORDER)
|
||||
|
|
@ -670,7 +670,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
|
|||
_mm_store_si128((__m128i *)(void *)HashBuffer(), x);
|
||||
return len;
|
||||
}
|
||||
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
|
||||
#elif CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
if (HasPMULL())
|
||||
{
|
||||
const uint64x2_t *table = (const uint64x2_t *)MulTable();
|
||||
|
|
|
|||
68
sha.cpp
68
sha.cpp
|
|
@ -103,7 +103,7 @@ static void SHA1_CXX_Transform(word32 *state, const word32 *data)
|
|||
|
||||
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
|
||||
// Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
|
||||
static void SHA1_SSE_SHA_Transform(word32 *state, const word32 *data)
|
||||
static void SHA1_Transform_SHANI(word32 *state, const word32 *data)
|
||||
{
|
||||
__m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1;
|
||||
__m128i MASK, MSG0, MSG1, MSG2, MSG3;
|
||||
|
|
@ -293,8 +293,8 @@ static void SHA1_SSE_SHA_Transform(word32 *state, const word32 *data)
|
|||
// start of Walton/Schneiders/O'Rourke/Skip Hovsmith's code //
|
||||
//////////////////////////////////////////////////////////////
|
||||
|
||||
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
||||
static void SHA1_ARM_SHA_Transform(word32 *state, const word32 *data)
|
||||
#if CRYPTOPP_ARMV8A_SHA_AVAILABLE
|
||||
static void SHA1_Transform_ARMV8A(word32 *state, const word32 *data)
|
||||
{
|
||||
uint32x4_t C0, C1, C2, C3;
|
||||
uint32x4_t ABCD, ABCD_SAVED;
|
||||
|
|
@ -462,7 +462,7 @@ static void SHA1_ARM_SHA_Transform(word32 *state, const word32 *data)
|
|||
vst1q_u32(&state[0], ABCD);
|
||||
state[4] = E0;
|
||||
}
|
||||
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
||||
#endif // CRYPTOPP_ARMV8A_SHA_AVAILABLE
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// end of Walton/Schneiders/O'Rourke/Hovsmith's code //
|
||||
|
|
@ -472,12 +472,12 @@ pfnSHATransform InitializeSHA1Transform()
|
|||
{
|
||||
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
|
||||
if (HasSHA())
|
||||
return &SHA1_SSE_SHA_Transform;
|
||||
return &SHA1_Transform_SHANI;
|
||||
else
|
||||
#endif
|
||||
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
||||
#if CRYPTOPP_ARMV8A_SHA_AVAILABLE
|
||||
if (HasSHA1())
|
||||
return &SHA1_ARM_SHA_Transform;
|
||||
return &SHA1_Transform_ARMV8A;
|
||||
else
|
||||
#endif
|
||||
return &SHA1_CXX_Transform;
|
||||
|
|
@ -536,7 +536,7 @@ void SHA256::InitState(HashWordType *state)
|
|||
memcpy(state, s, sizeof(s));
|
||||
}
|
||||
|
||||
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
||||
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || CRYPTOPP_ARMV8A_SHA_AVAILABLE
|
||||
CRYPTOPP_ALIGN_DATA(16) extern const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = {
|
||||
#else
|
||||
extern const word32 SHA256_K[64] = {
|
||||
|
|
@ -893,9 +893,9 @@ void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data,
|
|||
#endif
|
||||
|
||||
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
|
||||
static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const word32 *data, size_t length);
|
||||
#elif CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
||||
static void CRYPTOPP_FASTCALL SHA256_ARM_SHA_HashBlocks(word32 *state, const word32 *data, size_t length);
|
||||
static void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data, size_t length);
|
||||
#elif CRYPTOPP_ARMV8A_SHA_AVAILABLE
|
||||
static void CRYPTOPP_FASTCALL SHA256_HashBlocks_ARMV8A(word32 *state, const word32 *data, size_t length);
|
||||
#endif
|
||||
|
||||
#if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_SHA_ASM)
|
||||
|
|
@ -904,12 +904,12 @@ pfnSHAHashBlocks InitializeSHA256HashBlocks()
|
|||
{
|
||||
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
|
||||
if (HasSHA())
|
||||
return &SHA256_SSE_SHA_HashBlocks;
|
||||
return &SHA256_HashBlocks_SHANI;
|
||||
else
|
||||
#endif
|
||||
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
||||
#if CRYPTOPP_ARMV8A_SHA_AVAILABLE
|
||||
if (HasSHA2())
|
||||
return &SHA256_ARM_SHA_HashBlocks;
|
||||
return &SHA256_HashBlocks_ARMV8A;
|
||||
else
|
||||
#endif
|
||||
|
||||
|
|
@ -956,7 +956,7 @@ size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
|
|||
|
||||
#if defined(__OPTIMIZE_SIZE__)
|
||||
// Smaller but slower
|
||||
void SHA256_CXX_Transform(word32 *state, const word32 *data)
|
||||
void SHA256_Transform_CXX(word32 *state, const word32 *data)
|
||||
{
|
||||
word32 W[32], T[20];
|
||||
unsigned int i = 0, j = 0;
|
||||
|
|
@ -1028,7 +1028,7 @@ void SHA256_CXX_Transform(word32 *state, const word32 *data)
|
|||
}
|
||||
#else
|
||||
// Bigger but faster
|
||||
void SHA256_CXX_Transform(word32 *state, const word32 *data)
|
||||
void SHA256_Transform_CXX(word32 *state, const word32 *data)
|
||||
{
|
||||
word32 W[16], T[8];
|
||||
/* Copy context->state[] to working vars */
|
||||
|
|
@ -1060,7 +1060,7 @@ void SHA256_CXX_Transform(word32 *state, const word32 *data)
|
|||
#undef R
|
||||
|
||||
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
|
||||
static void SHA256_SSE2_Transform(word32 *state, const word32 *data)
|
||||
static void SHA256_Transform_SSE2(word32 *state, const word32 *data)
|
||||
{
|
||||
// this byte reverse is a waste of time, but this function is only called by MDC
|
||||
word32 W[16];
|
||||
|
|
@ -1070,18 +1070,18 @@ static void SHA256_SSE2_Transform(word32 *state, const word32 *data)
|
|||
#endif // CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
|
||||
|
||||
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
|
||||
static void SHA256_SSE_SHA_Transform(word32 *state, const word32 *data)
|
||||
static void SHA256_Transform_SHANI(word32 *state, const word32 *data)
|
||||
{
|
||||
return SHA256_SSE_SHA_HashBlocks(state, data, SHA256::BLOCKSIZE);
|
||||
return SHA256_HashBlocks_SHANI(state, data, SHA256::BLOCKSIZE);
|
||||
}
|
||||
#endif // CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
|
||||
|
||||
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
||||
static void SHA256_ARM_SHA_Transform(word32 *state, const word32 *data)
|
||||
#if CRYPTOPP_ARMV8A_SHA_AVAILABLE
|
||||
static void SHA256_Transform_ARMV8A(word32 *state, const word32 *data)
|
||||
{
|
||||
return SHA256_ARM_SHA_HashBlocks(state, data, SHA256::BLOCKSIZE);
|
||||
return SHA256_HashBlocks_ARMV8A(state, data, SHA256::BLOCKSIZE);
|
||||
}
|
||||
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
||||
#endif // CRYPTOPP_ARMV8A_SHA_AVAILABLE
|
||||
|
||||
///////////////////////////////////
|
||||
// start of Walton/Gulley's code //
|
||||
|
|
@ -1089,7 +1089,7 @@ static void SHA256_ARM_SHA_Transform(word32 *state, const word32 *data)
|
|||
|
||||
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
|
||||
// Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
|
||||
static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const word32 *data, size_t length)
|
||||
static void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data, size_t length)
|
||||
{
|
||||
CRYPTOPP_ASSERT(state); CRYPTOPP_ASSERT(data);
|
||||
CRYPTOPP_ASSERT(length % SHA256::BLOCKSIZE == 0);
|
||||
|
|
@ -1275,10 +1275,10 @@ static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const wor
|
|||
length -= SHA256::BLOCKSIZE;
|
||||
}
|
||||
|
||||
TMP = _mm_shuffle_epi32(STATE0, 0x1B); // FEBA
|
||||
STATE1 = _mm_shuffle_epi32(STATE1, 0xB1); // DCHG
|
||||
TMP = _mm_shuffle_epi32(STATE0, 0x1B); // FEBA
|
||||
STATE1 = _mm_shuffle_epi32(STATE1, 0xB1); // DCHG
|
||||
STATE0 = _mm_blend_epi16(TMP, STATE1, 0xF0); // DCBA
|
||||
STATE1 = _mm_alignr_epi8(STATE1, TMP, 8); // ABEF
|
||||
STATE1 = _mm_alignr_epi8(STATE1, TMP, 8); // ABEF
|
||||
|
||||
// Save state
|
||||
_mm_storeu_si128((__m128i*) &state[0], STATE0);
|
||||
|
|
@ -1294,8 +1294,8 @@ static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const wor
|
|||
// start of Walton/Schneiders/O'Rourke/Hovsmith's code //
|
||||
/////////////////////////////////////////////////////////
|
||||
|
||||
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
||||
static void CRYPTOPP_FASTCALL SHA256_ARM_SHA_HashBlocks(word32 *state, const word32 *data, size_t length)
|
||||
#if CRYPTOPP_ARMV8A_SHA_AVAILABLE
|
||||
static void CRYPTOPP_FASTCALL SHA256_HashBlocks_ARMV8A(word32 *state, const word32 *data, size_t length)
|
||||
{
|
||||
uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE;
|
||||
uint32x4_t MSG0, MSG1, MSG2, MSG3;
|
||||
|
|
@ -1460,21 +1460,21 @@ pfnSHATransform InitializeSHA256Transform()
|
|||
{
|
||||
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
|
||||
if (HasSHA())
|
||||
return &SHA256_SSE_SHA_Transform;
|
||||
return &SHA256_Transform_SHANI;
|
||||
else
|
||||
#endif
|
||||
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
|
||||
if (HasSSE2())
|
||||
return &SHA256_SSE2_Transform;
|
||||
return &SHA256_Transform_SSE2;
|
||||
else
|
||||
#endif
|
||||
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
||||
#if CRYPTOPP_ARMV8A_SHA_AVAILABLE
|
||||
if (HasSHA2())
|
||||
return &SHA256_ARM_SHA_Transform;
|
||||
return &SHA256_Transform_ARMV8A;
|
||||
else
|
||||
#endif
|
||||
|
||||
return &SHA256_CXX_Transform;
|
||||
return &SHA256_Transform_CXX;
|
||||
}
|
||||
|
||||
void SHA256::Transform(word32 *state, const word32 *data)
|
||||
|
|
|
|||
Loading…
Reference in New Issue