diff --git a/cpu.cpp b/cpu.cpp index 7efbc0aa..bd2f5183 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -22,14 +22,6 @@ #include #endif -#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE -# if defined(__linux__) -# include -# include -# endif -# include -#endif - NAMESPACE_BEGIN(CryptoPP) // MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it. @@ -264,9 +256,9 @@ void DetectX86Features() #elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) bool g_ArmDetectionDone = false; -bool g_hasNEON = false; +bool g_hasNEON = false, g_hasCRC32 = false; -// This is avaiable in a status register, but we need privileged code to execute the read +// This is avaiable in a status register, but we need privileged code to perform the read word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; #if HAVE_GCC_CONSTRUCTOR1 @@ -277,17 +269,18 @@ void __attribute__ ((constructor)) DetectArmFeatures() void DetectArmFeatures() #endif { - g_hasNEON = false; -#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE -# if defined(__linux__) && defined(__aarch64__) - const long hwcaps = getauxval(AT_HWCAP); +#if defined(__linux__) && defined(__aarch64__) + const unsigned long hwcaps = getauxval(AT_HWCAP); g_hasNEON = !!(hwcaps & HWCAP_ASIMD); -# elif defined(__linux__) - const long hwcaps = getauxval(AT_HWCAP); + g_hasCRC32 = !!(hwcaps & HWCAP_CRC32); +#elif defined(__linux__) + const unsigned long hwcaps = getauxval(AT_HWCAP); g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON); -# elif defined(_WIN32) && defined(_M_ARM) + // g_hasCRC32 = !!(hwcaps & HWCAP_ARM_CRC32); + g_hasCRC32 = false; +#elif defined(_WIN32) && defined(_M_ARM) g_hasNEON = true; -# endif + g_hasCRC32 = false; #endif *((volatile bool*)&g_ArmDetectionDone) = true; } diff --git a/cpu.h b/cpu.h index 895c35e2..90a499d0 100644 --- a/cpu.h +++ b/cpu.h @@ -1,13 +1,30 @@ // cpu.h - written and placed in the public domain by Wei Dai //! \file cpu.h -//! \brief Classes, functions, intrinsics and features for X86, X32 nd X64 assembly +//! \brief Functions for CPU features and intrinsics +//! \details At the moment, the functions are used heavily in X86/X32/X64 code paths +// for SSE, SSE2 and SSE4. The funtions are also used on occassion for AArch32 +//! and AArch64 code paths for NEON. #ifndef CRYPTOPP_CPU_H #define CRYPTOPP_CPU_H #include "config.h" +#if defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64) +# if defined(__linux__) +# include +# include +# include +# endif +# if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE +# include +# endif +# if (__ARM_ACLE >= 200) +# include +# endif +#endif // ARM-32 or ARM-64 + #ifdef CRYPTOPP_GENERATE_X64_MASM #define CRYPTOPP_X86_ASM_AVAILABLE @@ -217,12 +234,14 @@ inline int GetCacheLineSize() return g_cacheLineSize; } -#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) +#elif defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64) extern bool g_ArmDetectionDone; -extern bool g_hasNEON; +extern bool g_hasNEON, g_hasCRC32; void CRYPTOPP_API DetectArmFeatures(); +//! \brief Determine if an ARM processor has Advanced SIMD available +//! \returns true if the hardware is capable of Advanced SIMD at runtime, false otherwise. inline bool HasNEON() { if (!g_ArmDetectionDone) @@ -230,6 +249,19 @@ inline bool HasNEON() return g_hasNEON; } +//! \brief Determine if an ARM processor has CRC32 available +//! \returns true if the hardware is capable of CRC32 at runtime, false otherwise. +inline bool HasCRC32() +{ + if (!g_ArmDetectionDone) + DetectArmFeatures(); + return g_hasCRC32; +} + +//! \brief Provides the cache line size at runtime +//! \returns true if the hardware is capable of CRC32 at runtime, false otherwise. +//! \details GetCacheLineSize() provides is an estimate using CRYPTOPP_L1_CACHE_LINE_SIZE. +//! The runtime instructions to query the processor are privileged. inline int GetCacheLineSize() { return CRYPTOPP_L1_CACHE_LINE_SIZE; @@ -246,6 +278,8 @@ inline int GetCacheLineSize() #endif +#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 + #ifdef CRYPTOPP_GENERATE_X64_MASM #define AS1(x) x*newline* #define AS2(x, y) x, y*newline* @@ -448,6 +482,8 @@ inline int GetCacheLineSize() ASL(labelPrefix##9)\ AS2( add outputPtr, increment*16) +#endif // X86/X32/X64 + NAMESPACE_END -#endif +#endif // CRYPTOPP_CPU_H diff --git a/crc.cpp b/crc.cpp index bdb4e2f1..b007dd4f 100644 --- a/crc.cpp +++ b/crc.cpp @@ -131,6 +131,22 @@ CRC32::CRC32() void CRC32::Update(const byte *s, size_t n) { +#if defined(__ARM_FEATURE_CRC32) + if (HasCRC32()) + { + for(; !IsAligned(s) && n > 0; s++, n--) + m_crc = __crc32b(m_crc, *s); + + for(; n > 4; s+=4, n-=4) + m_crc = __crc32w(m_crc, *(const word32 *)(void*)s); + + for(; n > 0; s++, n--) + m_crc = __crc32b(m_crc, *s); + + return; + } +#endif + word32 crc = m_crc; for(; !IsAligned(s) && n > 0; n--) @@ -295,6 +311,20 @@ void CRC32C::Update(const byte *s, size_t n) for(; n > 0; s++, n--) m_crc = _mm_crc32_u8(m_crc, *s); + return; + } +#elif defined(__ARM_FEATURE_CRC32) + if (HasCRC32()) + { + for(; !IsAligned(s) && n > 0; s++, n--) + m_crc = __crc32cb(m_crc, *s); + + for(; n > 4; s+=4, n-=4) + m_crc = __crc32cw(m_crc, *(const word32 *)(void*)s); + + for(; n > 0; s++, n--) + m_crc = __crc32cb(m_crc, *s); + return; } #endif diff --git a/validat1.cpp b/validat1.cpp index 2bb0b917..e25273c2 100644 --- a/validat1.cpp +++ b/validat1.cpp @@ -314,9 +314,10 @@ bool TestSettings() #elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) bool hasNEON = HasNEON(); + bool hasCRC32 = HasCRC32(); cout << "passed: "; - cout << "hasNEON == " << hasNEON << endl; + cout << "hasNEON == " << hasNEON << ", hasCRC32 == " << hasCRC32 << endl; #endif if (!pass) @@ -912,7 +913,7 @@ bool TestOS_RNG() return pass; } -#ifdef NO_OS_DEPENDENCE +#if defined(NO_OS_DEPENDENCE) || !defined(OS_RNG_AVAILABLE) bool TestAutoSeeded() { return true;