From f0e8a2b1372e92a13cb0fc6308bb7fe25fe99d4f Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Thu, 5 May 2016 22:18:53 -0400 Subject: [PATCH 01/10] Add ARM NEON detection code --- GNUmakefile | 11 ++++++++--- config.h | 26 +++++++++++++++++++++++++- config.recommend | 26 +++++++++++++++++++++++++- cpu.cpp | 44 ++++++++++++++++++++++++++++++++++++++++---- cpu.h | 23 ++++++++++++++++++++--- validat1.cpp | 6 ++++++ 6 files changed, 124 insertions(+), 12 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index 43e14098..ab700c45 100755 --- a/GNUmakefile +++ b/GNUmakefile @@ -22,7 +22,8 @@ IS_X86 := $(shell uname -m | $(EGREP) -v "x86_64" | $(EGREP) -i -c "i.86|x86|i86 IS_X32 ?= 0 IS_X86_64 := $(shell uname -m | $(EGREP) -i -c "(_64|d64)") IS_PPC := $(shell uname -m | $(EGREP) -i -c "ppc|power") -IS_AARCH64 := $(shell uname -m | $(EGREP) -i -c "aarch64") +IS_ARM32 := $(shell uname -m | $(EGREP) -i -c "arm") +IS_ARM64 := $(shell uname -m | $(EGREP) -i -c "aarch64") IS_SUN := $(shell uname | $(EGREP) -i -c "SunOS") IS_LINUX := $(shell $(CXX) -dumpmachine 2>&1 | $(EGREP) -i -c "Linux") @@ -325,9 +326,13 @@ endif # HAS_SOLIB_VERSION # is the second candidate for explicit initialization order. SRCS := cryptlib.cpp cpu.cpp $(filter-out cryptlib.cpp cpu.cpp pch.cpp simple.cpp winpipes.cpp cryptlib_bds.cpp,$(wildcard *.cpp)) -# No need for CPU or RDRAND on non-X86 systems. X32 is represented with X64. +# Need CPU for X86/X64/X32 and ARM +ifeq ($(IS_X86)$(IS_X86_64)$(IS_ARM32)$(IS_ARM64),0000) + SRCS := $(filter-out cpu.cpp, $(SRCS)) +endif +# Need RDRAND for X86/X64/X32 ifeq ($(IS_X86)$(IS_X86_64),00) - SRCS := $(filter-out cpu.cpp rdrand.cpp, $(SRCS)) + SRCS := $(filter-out rdrand.cpp, $(SRCS)) endif ifneq ($(IS_MINGW),0) diff --git a/config.h b/config.h index 2e71bcb8..153f5a9b 100644 --- a/config.h +++ b/config.h @@ -497,7 +497,7 @@ NAMESPACE_END #else #define CRYPTOPP_BOOL_X64 0 #endif - + // Undo the ASM and Intrinsic related defines due to X32. #if CRYPTOPP_BOOL_X32 # undef CRYPTOPP_BOOL_X64 @@ -505,6 +505,30 @@ NAMESPACE_END # undef CRYPTOPP_X64_MASM_AVAILABLE #endif +#if defined(__arm__) || defined(__aarch32__) || defined(_M_ARM) + #define CRYPTOPP_BOOL_ARM32 1 +#else + #define CRYPTOPP_BOOL_ARM32 0 +#endif + +#if defined(__aarch64__) + #define CRYPTOPP_BOOL_ARM64 1 +#else + #define CRYPTOPP_BOOL_ARM64 0 +#endif + +#if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) +# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800)) +# if defined(__ARM_NEON__) || defined(__ARM_NEON) +# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1 +# endif +# endif +#endif + +#ifndef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE +# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 0 +#endif + #if !defined(CRYPTOPP_NO_UNALIGNED_DATA_ACCESS) && !defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) #if (CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || defined(__powerpc__) || (__ARM_FEATURE_UNALIGNED >= 1)) #define CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS diff --git a/config.recommend b/config.recommend index ad845e88..9546120a 100644 --- a/config.recommend +++ b/config.recommend @@ -381,7 +381,31 @@ NAMESPACE_END #define CRYPTOPP_DISABLE_ASM #define CRYPTOPP_DISABLE_SSE2 #endif - + +#if defined(__arm__) || defined(__aarch32__) || defined(_M_ARM) + #define CRYPTOPP_BOOL_ARM32 1 +#else + #define CRYPTOPP_BOOL_ARM32 0 +#endif + +#if defined(__aarch64__) + #define CRYPTOPP_BOOL_ARM64 1 +#else + #define CRYPTOPP_BOOL_ARM64 0 +#endif + +#if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) +# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800)) +# if defined(__ARM_NEON__) || defined(__ARM_NEON) +# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1 +# endif +# endif +#endif + +#ifndef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE +# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 0 +#endif + // Apple's Clang prior to 5.0 cannot handle SSE2 (and Apple does not use LLVM Clang numbering...) #if defined(CRYPTOPP_APPLE_CLANG_VERSION) && (CRYPTOPP_APPLE_CLANG_VERSION < 50000) # define CRYPTOPP_DISABLE_ASM diff --git a/cpu.cpp b/cpu.cpp index 2c5ee728..334b2ec9 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -22,8 +22,18 @@ #include #endif +#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE +#include +#include +#include +#endif + NAMESPACE_BEGIN(CryptoPP) +// MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it. +#define HAVE_GCC_CONSTRUCTOR1 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && ((CRYPTOPP_GCC_VERSION >= 40300) || (CRYPTOPP_CLANG_VERSION >= 20900) || (_INTEL_COMPILER >= 300)) && !(MACPORTS_GCC_COMPILER > 0)) +#define HAVE_GCC_CONSTRUCTOR0 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && !(MACPORTS_GCC_COMPILER > 0)) + #ifdef CRYPTOPP_CPUID_AVAILABLE #if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64 @@ -168,10 +178,6 @@ bool g_x86DetectionDone = false; bool g_hasMMX = false, g_hasISSE = false, g_hasSSE2 = false, g_hasSSSE3 = false, g_hasSSE4 = false, g_hasAESNI = false, g_hasCLMUL = false, g_isP4 = false, g_hasRDRAND = false, g_hasRDSEED = false; word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; -// MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it. -#define HAVE_GCC_CONSTRUCTOR1 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && ((CRYPTOPP_GCC_VERSION >= 40300) || (CRYPTOPP_CLANG_VERSION >= 20900) || (_INTEL_COMPILER >= 300)) && !(MACPORTS_GCC_COMPILER > 0)) -#define HAVE_GCC_CONSTRUCTOR0 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && !(MACPORTS_GCC_COMPILER > 0)) - static inline bool IsIntel(const word32 output[4]) { // This is the "GenuineIntel" string @@ -251,6 +257,36 @@ void DetectX86Features() *((volatile bool*)&g_x86DetectionDone) = true; } +// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu +#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) + +bool g_ArmDetectionDone = false; +bool g_hasNEON = false; + +// This is avaiable in a status register, but we need privileged code to execute the read +word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; + +#if HAVE_GCC_CONSTRUCTOR1 +void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures() +#elif HAVE_GCC_CONSTRUCTOR0 +void __attribute__ ((constructor)) DetectArmFeatures() +#else +void DetectArmFeatures() +#endif +{ + g_hasNEON = false; +#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE +# if defined(__linux__) && defined(__aarch64__) + const long hwcaps = getauxval(AT_HWCAP); + g_hasNEON = !!(hwcaps & HWCAP_ASIMD); +# elif defined(__linux__) + const long hwcaps = getauxval(AT_HWCAP); + g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON); +# endif +#endif + *((volatile bool*)&g_ArmDetectionDone) = true; +} + #endif NAMESPACE_END diff --git a/cpu.h b/cpu.h index ff3e39d4..895c35e2 100644 --- a/cpu.h +++ b/cpu.h @@ -1,7 +1,6 @@ // cpu.h - written and placed in the public domain by Wei Dai -//! \file -//! \headerfile cpu.h +//! \file cpu.h //! \brief Classes, functions, intrinsics and features for X86, X32 nd X64 assembly #ifndef CRYPTOPP_CPU_H @@ -218,6 +217,24 @@ inline int GetCacheLineSize() return g_cacheLineSize; } +#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) + +extern bool g_ArmDetectionDone; +extern bool g_hasNEON; +void CRYPTOPP_API DetectArmFeatures(); + +inline bool HasNEON() +{ + if (!g_ArmDetectionDone) + DetectArmFeatures(); + return g_hasNEON; +} + +inline int GetCacheLineSize() +{ + return CRYPTOPP_L1_CACHE_LINE_SIZE; +} + #else inline int GetCacheLineSize() @@ -225,7 +242,7 @@ inline int GetCacheLineSize() return CRYPTOPP_L1_CACHE_LINE_SIZE; } -#endif +#endif // X86/X32/X64 and ARM #endif diff --git a/validat1.cpp b/validat1.cpp index 8e214607..2bb0b917 100644 --- a/validat1.cpp +++ b/validat1.cpp @@ -311,6 +311,12 @@ bool TestSettings() cout << "hasMMX == " << hasMMX << ", hasISSE == " << hasISSE << ", hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSE4 << ", hasSSE4 == " << hasSSSE3 << ", hasAESNI == " << HasAESNI() << ", hasRDRAND == " << HasRDRAND() << ", hasRDSEED == " << HasRDSEED() << ", hasCLMUL == " << HasCLMUL() << ", isP4 == " << isP4 << ", cacheLineSize == " << cacheLineSize; cout << ", AESNI_INTRINSICS == " << CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE << endl; + +#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) + bool hasNEON = HasNEON(); + + cout << "passed: "; + cout << "hasNEON == " << hasNEON << endl; #endif if (!pass) From 15900246bb849088431c2dec2234ae5fffc068e0 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Fri, 6 May 2016 01:28:06 -0400 Subject: [PATCH 02/10] Add Windows ARM NEON dtection --- config.h | 4 ++-- config.recommend | 4 ++-- cpu.cpp | 11 ++++++++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/config.h b/config.h index 153f5a9b..b1e426fd 100644 --- a/config.h +++ b/config.h @@ -518,8 +518,8 @@ NAMESPACE_END #endif #if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) -# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800)) -# if defined(__ARM_NEON__) || defined(__ARM_NEON) +# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_MSC_VERSION >= 1700)) +# if defined(__ARM_NEON__) || defined(__ARM_NEON) || defined(_M_ARM) # define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1 # endif # endif diff --git a/config.recommend b/config.recommend index 9546120a..ae28a7bb 100644 --- a/config.recommend +++ b/config.recommend @@ -395,8 +395,8 @@ NAMESPACE_END #endif #if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) -# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800)) -# if defined(__ARM_NEON__) || defined(__ARM_NEON) +# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_MSC_VERSION >= 1700)) +# if defined(__ARM_NEON__) || defined(__ARM_NEON) || defined(_M_ARM) # define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1 # endif # endif diff --git a/cpu.cpp b/cpu.cpp index 334b2ec9..7efbc0aa 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -23,9 +23,11 @@ #endif #if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE -#include -#include -#include +# if defined(__linux__) +# include +# include +# endif +# include #endif NAMESPACE_BEGIN(CryptoPP) @@ -258,6 +260,7 @@ void DetectX86Features() } // http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu +// http://stackoverflow.com/questions/26701262/how-to-check-the-existence-of-neon-on-arm #elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) bool g_ArmDetectionDone = false; @@ -282,6 +285,8 @@ void DetectArmFeatures() # elif defined(__linux__) const long hwcaps = getauxval(AT_HWCAP); g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON); +# elif defined(_WIN32) && defined(_M_ARM) + g_hasNEON = true; # endif #endif *((volatile bool*)&g_ArmDetectionDone) = true; From 4574296caa63880f327882c6772391c1d8b10da9 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Fri, 6 May 2016 04:35:42 -0400 Subject: [PATCH 03/10] Updated etection code. Added ARMv8-a CRC32 implementations --- cpu.cpp | 29 +++++++++++------------------ cpu.h | 44 ++++++++++++++++++++++++++++++++++++++++---- crc.cpp | 30 ++++++++++++++++++++++++++++++ validat1.cpp | 5 +++-- 4 files changed, 84 insertions(+), 24 deletions(-) diff --git a/cpu.cpp b/cpu.cpp index 7efbc0aa..bd2f5183 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -22,14 +22,6 @@ #include #endif -#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE -# if defined(__linux__) -# include -# include -# endif -# include -#endif - NAMESPACE_BEGIN(CryptoPP) // MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it. @@ -264,9 +256,9 @@ void DetectX86Features() #elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) bool g_ArmDetectionDone = false; -bool g_hasNEON = false; +bool g_hasNEON = false, g_hasCRC32 = false; -// This is avaiable in a status register, but we need privileged code to execute the read +// This is avaiable in a status register, but we need privileged code to perform the read word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; #if HAVE_GCC_CONSTRUCTOR1 @@ -277,17 +269,18 @@ void __attribute__ ((constructor)) DetectArmFeatures() void DetectArmFeatures() #endif { - g_hasNEON = false; -#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE -# if defined(__linux__) && defined(__aarch64__) - const long hwcaps = getauxval(AT_HWCAP); +#if defined(__linux__) && defined(__aarch64__) + const unsigned long hwcaps = getauxval(AT_HWCAP); g_hasNEON = !!(hwcaps & HWCAP_ASIMD); -# elif defined(__linux__) - const long hwcaps = getauxval(AT_HWCAP); + g_hasCRC32 = !!(hwcaps & HWCAP_CRC32); +#elif defined(__linux__) + const unsigned long hwcaps = getauxval(AT_HWCAP); g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON); -# elif defined(_WIN32) && defined(_M_ARM) + // g_hasCRC32 = !!(hwcaps & HWCAP_ARM_CRC32); + g_hasCRC32 = false; +#elif defined(_WIN32) && defined(_M_ARM) g_hasNEON = true; -# endif + g_hasCRC32 = false; #endif *((volatile bool*)&g_ArmDetectionDone) = true; } diff --git a/cpu.h b/cpu.h index 895c35e2..90a499d0 100644 --- a/cpu.h +++ b/cpu.h @@ -1,13 +1,30 @@ // cpu.h - written and placed in the public domain by Wei Dai //! \file cpu.h -//! \brief Classes, functions, intrinsics and features for X86, X32 nd X64 assembly +//! \brief Functions for CPU features and intrinsics +//! \details At the moment, the functions are used heavily in X86/X32/X64 code paths +// for SSE, SSE2 and SSE4. The funtions are also used on occassion for AArch32 +//! and AArch64 code paths for NEON. #ifndef CRYPTOPP_CPU_H #define CRYPTOPP_CPU_H #include "config.h" +#if defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64) +# if defined(__linux__) +# include +# include +# include +# endif +# if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE +# include +# endif +# if (__ARM_ACLE >= 200) +# include +# endif +#endif // ARM-32 or ARM-64 + #ifdef CRYPTOPP_GENERATE_X64_MASM #define CRYPTOPP_X86_ASM_AVAILABLE @@ -217,12 +234,14 @@ inline int GetCacheLineSize() return g_cacheLineSize; } -#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) +#elif defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64) extern bool g_ArmDetectionDone; -extern bool g_hasNEON; +extern bool g_hasNEON, g_hasCRC32; void CRYPTOPP_API DetectArmFeatures(); +//! \brief Determine if an ARM processor has Advanced SIMD available +//! \returns true if the hardware is capable of Advanced SIMD at runtime, false otherwise. inline bool HasNEON() { if (!g_ArmDetectionDone) @@ -230,6 +249,19 @@ inline bool HasNEON() return g_hasNEON; } +//! \brief Determine if an ARM processor has CRC32 available +//! \returns true if the hardware is capable of CRC32 at runtime, false otherwise. +inline bool HasCRC32() +{ + if (!g_ArmDetectionDone) + DetectArmFeatures(); + return g_hasCRC32; +} + +//! \brief Provides the cache line size at runtime +//! \returns true if the hardware is capable of CRC32 at runtime, false otherwise. +//! \details GetCacheLineSize() provides is an estimate using CRYPTOPP_L1_CACHE_LINE_SIZE. +//! The runtime instructions to query the processor are privileged. inline int GetCacheLineSize() { return CRYPTOPP_L1_CACHE_LINE_SIZE; @@ -246,6 +278,8 @@ inline int GetCacheLineSize() #endif +#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 + #ifdef CRYPTOPP_GENERATE_X64_MASM #define AS1(x) x*newline* #define AS2(x, y) x, y*newline* @@ -448,6 +482,8 @@ inline int GetCacheLineSize() ASL(labelPrefix##9)\ AS2( add outputPtr, increment*16) +#endif // X86/X32/X64 + NAMESPACE_END -#endif +#endif // CRYPTOPP_CPU_H diff --git a/crc.cpp b/crc.cpp index bdb4e2f1..b007dd4f 100644 --- a/crc.cpp +++ b/crc.cpp @@ -131,6 +131,22 @@ CRC32::CRC32() void CRC32::Update(const byte *s, size_t n) { +#if defined(__ARM_FEATURE_CRC32) + if (HasCRC32()) + { + for(; !IsAligned(s) && n > 0; s++, n--) + m_crc = __crc32b(m_crc, *s); + + for(; n > 4; s+=4, n-=4) + m_crc = __crc32w(m_crc, *(const word32 *)(void*)s); + + for(; n > 0; s++, n--) + m_crc = __crc32b(m_crc, *s); + + return; + } +#endif + word32 crc = m_crc; for(; !IsAligned(s) && n > 0; n--) @@ -295,6 +311,20 @@ void CRC32C::Update(const byte *s, size_t n) for(; n > 0; s++, n--) m_crc = _mm_crc32_u8(m_crc, *s); + return; + } +#elif defined(__ARM_FEATURE_CRC32) + if (HasCRC32()) + { + for(; !IsAligned(s) && n > 0; s++, n--) + m_crc = __crc32cb(m_crc, *s); + + for(; n > 4; s+=4, n-=4) + m_crc = __crc32cw(m_crc, *(const word32 *)(void*)s); + + for(; n > 0; s++, n--) + m_crc = __crc32cb(m_crc, *s); + return; } #endif diff --git a/validat1.cpp b/validat1.cpp index 2bb0b917..e25273c2 100644 --- a/validat1.cpp +++ b/validat1.cpp @@ -314,9 +314,10 @@ bool TestSettings() #elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) bool hasNEON = HasNEON(); + bool hasCRC32 = HasCRC32(); cout << "passed: "; - cout << "hasNEON == " << hasNEON << endl; + cout << "hasNEON == " << hasNEON << ", hasCRC32 == " << hasCRC32 << endl; #endif if (!pass) @@ -912,7 +913,7 @@ bool TestOS_RNG() return pass; } -#ifdef NO_OS_DEPENDENCE +#if defined(NO_OS_DEPENDENCE) || !defined(OS_RNG_AVAILABLE) bool TestAutoSeeded() { return true; From 48e4bfd9154b13fa4d38dff902834d9cd7c51a41 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Fri, 6 May 2016 05:20:56 -0400 Subject: [PATCH 04/10] Improved compile time checking to make runtime check consitent with lack of availability --- cpu.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/cpu.cpp b/cpu.cpp index bd2f5183..3247e5e7 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -269,16 +269,23 @@ void __attribute__ ((constructor)) DetectArmFeatures() void DetectArmFeatures() #endif { -#if defined(__linux__) && defined(__aarch64__) +#if defined(__linux__) && defined(__aarch64__) // ARM-64 const unsigned long hwcaps = getauxval(AT_HWCAP); g_hasNEON = !!(hwcaps & HWCAP_ASIMD); +# if defined(__ARM_FEATURE_CRC32) g_hasCRC32 = !!(hwcaps & HWCAP_CRC32); -#elif defined(__linux__) +# else + g_hasCRC32 = false; +# endif +#elif defined(__linux__) // ARM-32 const unsigned long hwcaps = getauxval(AT_HWCAP); g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON); - // g_hasCRC32 = !!(hwcaps & HWCAP_ARM_CRC32); +# if defined(__ARM_FEATURE_CRC32) + g_hasCRC32 = !!(hwcaps & HWCAP_ARM_CRC32); +# else g_hasCRC32 = false; -#elif defined(_WIN32) && defined(_M_ARM) +# endif +#elif defined(_WIN32) && defined(_M_ARM) // Microsoft ARM g_hasNEON = true; g_hasCRC32 = false; #endif From 27b2e1d811477369b76d6a3c6ee215d859c87461 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Fri, 6 May 2016 06:56:08 -0400 Subject: [PATCH 05/10] Improve include logic for under GCC --- cpu.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpu.h b/cpu.h index 90a499d0..56baf5ae 100644 --- a/cpu.h +++ b/cpu.h @@ -20,7 +20,7 @@ # if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE # include # endif -# if (__ARM_ACLE >= 200) +# if defined(__ARM_FEATURE_CRC32) || (__ARM_ACLE >= 200) # include # endif #endif // ARM-32 or ARM-64 @@ -242,6 +242,7 @@ void CRYPTOPP_API DetectArmFeatures(); //! \brief Determine if an ARM processor has Advanced SIMD available //! \returns true if the hardware is capable of Advanced SIMD at runtime, false otherwise. +//! \details Runtime support requires compile time support. inline bool HasNEON() { if (!g_ArmDetectionDone) @@ -251,6 +252,8 @@ inline bool HasNEON() //! \brief Determine if an ARM processor has CRC32 available //! \returns true if the hardware is capable of CRC32 at runtime, false otherwise. +//! \details Runtime support requires compile time support. When compiling with GCC, you may +//! need to compile with -march=armv8-a+crc. inline bool HasCRC32() { if (!g_ArmDetectionDone) From a3fc031b33a9eb002e17b739e9554bd0e9f1f8f6 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Thu, 5 May 2016 22:18:53 -0400 Subject: [PATCH 06/10] Add ARM NEON detection code --- GNUmakefile | 11 ++++++++--- config.h | 26 +++++++++++++++++++++++++- config.recommend | 26 +++++++++++++++++++++++++- cpu.cpp | 44 ++++++++++++++++++++++++++++++++++++++++---- cpu.h | 23 ++++++++++++++++++++--- validat1.cpp | 6 ++++++ 6 files changed, 124 insertions(+), 12 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index 43e14098..ab700c45 100755 --- a/GNUmakefile +++ b/GNUmakefile @@ -22,7 +22,8 @@ IS_X86 := $(shell uname -m | $(EGREP) -v "x86_64" | $(EGREP) -i -c "i.86|x86|i86 IS_X32 ?= 0 IS_X86_64 := $(shell uname -m | $(EGREP) -i -c "(_64|d64)") IS_PPC := $(shell uname -m | $(EGREP) -i -c "ppc|power") -IS_AARCH64 := $(shell uname -m | $(EGREP) -i -c "aarch64") +IS_ARM32 := $(shell uname -m | $(EGREP) -i -c "arm") +IS_ARM64 := $(shell uname -m | $(EGREP) -i -c "aarch64") IS_SUN := $(shell uname | $(EGREP) -i -c "SunOS") IS_LINUX := $(shell $(CXX) -dumpmachine 2>&1 | $(EGREP) -i -c "Linux") @@ -325,9 +326,13 @@ endif # HAS_SOLIB_VERSION # is the second candidate for explicit initialization order. SRCS := cryptlib.cpp cpu.cpp $(filter-out cryptlib.cpp cpu.cpp pch.cpp simple.cpp winpipes.cpp cryptlib_bds.cpp,$(wildcard *.cpp)) -# No need for CPU or RDRAND on non-X86 systems. X32 is represented with X64. +# Need CPU for X86/X64/X32 and ARM +ifeq ($(IS_X86)$(IS_X86_64)$(IS_ARM32)$(IS_ARM64),0000) + SRCS := $(filter-out cpu.cpp, $(SRCS)) +endif +# Need RDRAND for X86/X64/X32 ifeq ($(IS_X86)$(IS_X86_64),00) - SRCS := $(filter-out cpu.cpp rdrand.cpp, $(SRCS)) + SRCS := $(filter-out rdrand.cpp, $(SRCS)) endif ifneq ($(IS_MINGW),0) diff --git a/config.h b/config.h index 2e71bcb8..153f5a9b 100644 --- a/config.h +++ b/config.h @@ -497,7 +497,7 @@ NAMESPACE_END #else #define CRYPTOPP_BOOL_X64 0 #endif - + // Undo the ASM and Intrinsic related defines due to X32. #if CRYPTOPP_BOOL_X32 # undef CRYPTOPP_BOOL_X64 @@ -505,6 +505,30 @@ NAMESPACE_END # undef CRYPTOPP_X64_MASM_AVAILABLE #endif +#if defined(__arm__) || defined(__aarch32__) || defined(_M_ARM) + #define CRYPTOPP_BOOL_ARM32 1 +#else + #define CRYPTOPP_BOOL_ARM32 0 +#endif + +#if defined(__aarch64__) + #define CRYPTOPP_BOOL_ARM64 1 +#else + #define CRYPTOPP_BOOL_ARM64 0 +#endif + +#if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) +# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800)) +# if defined(__ARM_NEON__) || defined(__ARM_NEON) +# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1 +# endif +# endif +#endif + +#ifndef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE +# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 0 +#endif + #if !defined(CRYPTOPP_NO_UNALIGNED_DATA_ACCESS) && !defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) #if (CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || defined(__powerpc__) || (__ARM_FEATURE_UNALIGNED >= 1)) #define CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS diff --git a/config.recommend b/config.recommend index ad845e88..9546120a 100644 --- a/config.recommend +++ b/config.recommend @@ -381,7 +381,31 @@ NAMESPACE_END #define CRYPTOPP_DISABLE_ASM #define CRYPTOPP_DISABLE_SSE2 #endif - + +#if defined(__arm__) || defined(__aarch32__) || defined(_M_ARM) + #define CRYPTOPP_BOOL_ARM32 1 +#else + #define CRYPTOPP_BOOL_ARM32 0 +#endif + +#if defined(__aarch64__) + #define CRYPTOPP_BOOL_ARM64 1 +#else + #define CRYPTOPP_BOOL_ARM64 0 +#endif + +#if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) +# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800)) +# if defined(__ARM_NEON__) || defined(__ARM_NEON) +# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1 +# endif +# endif +#endif + +#ifndef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE +# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 0 +#endif + // Apple's Clang prior to 5.0 cannot handle SSE2 (and Apple does not use LLVM Clang numbering...) #if defined(CRYPTOPP_APPLE_CLANG_VERSION) && (CRYPTOPP_APPLE_CLANG_VERSION < 50000) # define CRYPTOPP_DISABLE_ASM diff --git a/cpu.cpp b/cpu.cpp index 2c5ee728..334b2ec9 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -22,8 +22,18 @@ #include #endif +#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE +#include +#include +#include +#endif + NAMESPACE_BEGIN(CryptoPP) +// MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it. +#define HAVE_GCC_CONSTRUCTOR1 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && ((CRYPTOPP_GCC_VERSION >= 40300) || (CRYPTOPP_CLANG_VERSION >= 20900) || (_INTEL_COMPILER >= 300)) && !(MACPORTS_GCC_COMPILER > 0)) +#define HAVE_GCC_CONSTRUCTOR0 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && !(MACPORTS_GCC_COMPILER > 0)) + #ifdef CRYPTOPP_CPUID_AVAILABLE #if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64 @@ -168,10 +178,6 @@ bool g_x86DetectionDone = false; bool g_hasMMX = false, g_hasISSE = false, g_hasSSE2 = false, g_hasSSSE3 = false, g_hasSSE4 = false, g_hasAESNI = false, g_hasCLMUL = false, g_isP4 = false, g_hasRDRAND = false, g_hasRDSEED = false; word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; -// MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it. -#define HAVE_GCC_CONSTRUCTOR1 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && ((CRYPTOPP_GCC_VERSION >= 40300) || (CRYPTOPP_CLANG_VERSION >= 20900) || (_INTEL_COMPILER >= 300)) && !(MACPORTS_GCC_COMPILER > 0)) -#define HAVE_GCC_CONSTRUCTOR0 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && !(MACPORTS_GCC_COMPILER > 0)) - static inline bool IsIntel(const word32 output[4]) { // This is the "GenuineIntel" string @@ -251,6 +257,36 @@ void DetectX86Features() *((volatile bool*)&g_x86DetectionDone) = true; } +// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu +#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) + +bool g_ArmDetectionDone = false; +bool g_hasNEON = false; + +// This is avaiable in a status register, but we need privileged code to execute the read +word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; + +#if HAVE_GCC_CONSTRUCTOR1 +void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures() +#elif HAVE_GCC_CONSTRUCTOR0 +void __attribute__ ((constructor)) DetectArmFeatures() +#else +void DetectArmFeatures() +#endif +{ + g_hasNEON = false; +#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE +# if defined(__linux__) && defined(__aarch64__) + const long hwcaps = getauxval(AT_HWCAP); + g_hasNEON = !!(hwcaps & HWCAP_ASIMD); +# elif defined(__linux__) + const long hwcaps = getauxval(AT_HWCAP); + g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON); +# endif +#endif + *((volatile bool*)&g_ArmDetectionDone) = true; +} + #endif NAMESPACE_END diff --git a/cpu.h b/cpu.h index ff3e39d4..895c35e2 100644 --- a/cpu.h +++ b/cpu.h @@ -1,7 +1,6 @@ // cpu.h - written and placed in the public domain by Wei Dai -//! \file -//! \headerfile cpu.h +//! \file cpu.h //! \brief Classes, functions, intrinsics and features for X86, X32 nd X64 assembly #ifndef CRYPTOPP_CPU_H @@ -218,6 +217,24 @@ inline int GetCacheLineSize() return g_cacheLineSize; } +#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) + +extern bool g_ArmDetectionDone; +extern bool g_hasNEON; +void CRYPTOPP_API DetectArmFeatures(); + +inline bool HasNEON() +{ + if (!g_ArmDetectionDone) + DetectArmFeatures(); + return g_hasNEON; +} + +inline int GetCacheLineSize() +{ + return CRYPTOPP_L1_CACHE_LINE_SIZE; +} + #else inline int GetCacheLineSize() @@ -225,7 +242,7 @@ inline int GetCacheLineSize() return CRYPTOPP_L1_CACHE_LINE_SIZE; } -#endif +#endif // X86/X32/X64 and ARM #endif diff --git a/validat1.cpp b/validat1.cpp index 8e214607..2bb0b917 100644 --- a/validat1.cpp +++ b/validat1.cpp @@ -311,6 +311,12 @@ bool TestSettings() cout << "hasMMX == " << hasMMX << ", hasISSE == " << hasISSE << ", hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSE4 << ", hasSSE4 == " << hasSSSE3 << ", hasAESNI == " << HasAESNI() << ", hasRDRAND == " << HasRDRAND() << ", hasRDSEED == " << HasRDSEED() << ", hasCLMUL == " << HasCLMUL() << ", isP4 == " << isP4 << ", cacheLineSize == " << cacheLineSize; cout << ", AESNI_INTRINSICS == " << CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE << endl; + +#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) + bool hasNEON = HasNEON(); + + cout << "passed: "; + cout << "hasNEON == " << hasNEON << endl; #endif if (!pass) From dfc3fa04fd5accd7173ae89a9d48062f211288e3 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Fri, 6 May 2016 01:28:06 -0400 Subject: [PATCH 07/10] Add Windows ARM NEON dtection --- config.h | 4 ++-- config.recommend | 4 ++-- cpu.cpp | 11 ++++++++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/config.h b/config.h index 153f5a9b..b1e426fd 100644 --- a/config.h +++ b/config.h @@ -518,8 +518,8 @@ NAMESPACE_END #endif #if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) -# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800)) -# if defined(__ARM_NEON__) || defined(__ARM_NEON) +# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_MSC_VERSION >= 1700)) +# if defined(__ARM_NEON__) || defined(__ARM_NEON) || defined(_M_ARM) # define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1 # endif # endif diff --git a/config.recommend b/config.recommend index 9546120a..ae28a7bb 100644 --- a/config.recommend +++ b/config.recommend @@ -395,8 +395,8 @@ NAMESPACE_END #endif #if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) -# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800)) -# if defined(__ARM_NEON__) || defined(__ARM_NEON) +# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_MSC_VERSION >= 1700)) +# if defined(__ARM_NEON__) || defined(__ARM_NEON) || defined(_M_ARM) # define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1 # endif # endif diff --git a/cpu.cpp b/cpu.cpp index 334b2ec9..7efbc0aa 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -23,9 +23,11 @@ #endif #if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE -#include -#include -#include +# if defined(__linux__) +# include +# include +# endif +# include #endif NAMESPACE_BEGIN(CryptoPP) @@ -258,6 +260,7 @@ void DetectX86Features() } // http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu +// http://stackoverflow.com/questions/26701262/how-to-check-the-existence-of-neon-on-arm #elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) bool g_ArmDetectionDone = false; @@ -282,6 +285,8 @@ void DetectArmFeatures() # elif defined(__linux__) const long hwcaps = getauxval(AT_HWCAP); g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON); +# elif defined(_WIN32) && defined(_M_ARM) + g_hasNEON = true; # endif #endif *((volatile bool*)&g_ArmDetectionDone) = true; From 58aabf73d7699bd10f884acbcbd06ebf86b0017f Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Fri, 6 May 2016 04:35:42 -0400 Subject: [PATCH 08/10] Updated etection code. Added ARMv8-a CRC32 implementations --- cpu.cpp | 29 +++++++++++------------------ cpu.h | 44 ++++++++++++++++++++++++++++++++++++++++---- crc.cpp | 30 ++++++++++++++++++++++++++++++ validat1.cpp | 5 +++-- 4 files changed, 84 insertions(+), 24 deletions(-) diff --git a/cpu.cpp b/cpu.cpp index 7efbc0aa..bd2f5183 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -22,14 +22,6 @@ #include #endif -#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE -# if defined(__linux__) -# include -# include -# endif -# include -#endif - NAMESPACE_BEGIN(CryptoPP) // MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it. @@ -264,9 +256,9 @@ void DetectX86Features() #elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) bool g_ArmDetectionDone = false; -bool g_hasNEON = false; +bool g_hasNEON = false, g_hasCRC32 = false; -// This is avaiable in a status register, but we need privileged code to execute the read +// This is avaiable in a status register, but we need privileged code to perform the read word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; #if HAVE_GCC_CONSTRUCTOR1 @@ -277,17 +269,18 @@ void __attribute__ ((constructor)) DetectArmFeatures() void DetectArmFeatures() #endif { - g_hasNEON = false; -#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE -# if defined(__linux__) && defined(__aarch64__) - const long hwcaps = getauxval(AT_HWCAP); +#if defined(__linux__) && defined(__aarch64__) + const unsigned long hwcaps = getauxval(AT_HWCAP); g_hasNEON = !!(hwcaps & HWCAP_ASIMD); -# elif defined(__linux__) - const long hwcaps = getauxval(AT_HWCAP); + g_hasCRC32 = !!(hwcaps & HWCAP_CRC32); +#elif defined(__linux__) + const unsigned long hwcaps = getauxval(AT_HWCAP); g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON); -# elif defined(_WIN32) && defined(_M_ARM) + // g_hasCRC32 = !!(hwcaps & HWCAP_ARM_CRC32); + g_hasCRC32 = false; +#elif defined(_WIN32) && defined(_M_ARM) g_hasNEON = true; -# endif + g_hasCRC32 = false; #endif *((volatile bool*)&g_ArmDetectionDone) = true; } diff --git a/cpu.h b/cpu.h index 895c35e2..90a499d0 100644 --- a/cpu.h +++ b/cpu.h @@ -1,13 +1,30 @@ // cpu.h - written and placed in the public domain by Wei Dai //! \file cpu.h -//! \brief Classes, functions, intrinsics and features for X86, X32 nd X64 assembly +//! \brief Functions for CPU features and intrinsics +//! \details At the moment, the functions are used heavily in X86/X32/X64 code paths +// for SSE, SSE2 and SSE4. The funtions are also used on occassion for AArch32 +//! and AArch64 code paths for NEON. #ifndef CRYPTOPP_CPU_H #define CRYPTOPP_CPU_H #include "config.h" +#if defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64) +# if defined(__linux__) +# include +# include +# include +# endif +# if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE +# include +# endif +# if (__ARM_ACLE >= 200) +# include +# endif +#endif // ARM-32 or ARM-64 + #ifdef CRYPTOPP_GENERATE_X64_MASM #define CRYPTOPP_X86_ASM_AVAILABLE @@ -217,12 +234,14 @@ inline int GetCacheLineSize() return g_cacheLineSize; } -#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) +#elif defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64) extern bool g_ArmDetectionDone; -extern bool g_hasNEON; +extern bool g_hasNEON, g_hasCRC32; void CRYPTOPP_API DetectArmFeatures(); +//! \brief Determine if an ARM processor has Advanced SIMD available +//! \returns true if the hardware is capable of Advanced SIMD at runtime, false otherwise. inline bool HasNEON() { if (!g_ArmDetectionDone) @@ -230,6 +249,19 @@ inline bool HasNEON() return g_hasNEON; } +//! \brief Determine if an ARM processor has CRC32 available +//! \returns true if the hardware is capable of CRC32 at runtime, false otherwise. +inline bool HasCRC32() +{ + if (!g_ArmDetectionDone) + DetectArmFeatures(); + return g_hasCRC32; +} + +//! \brief Provides the cache line size at runtime +//! \returns true if the hardware is capable of CRC32 at runtime, false otherwise. +//! \details GetCacheLineSize() provides is an estimate using CRYPTOPP_L1_CACHE_LINE_SIZE. +//! The runtime instructions to query the processor are privileged. inline int GetCacheLineSize() { return CRYPTOPP_L1_CACHE_LINE_SIZE; @@ -246,6 +278,8 @@ inline int GetCacheLineSize() #endif +#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 + #ifdef CRYPTOPP_GENERATE_X64_MASM #define AS1(x) x*newline* #define AS2(x, y) x, y*newline* @@ -448,6 +482,8 @@ inline int GetCacheLineSize() ASL(labelPrefix##9)\ AS2( add outputPtr, increment*16) +#endif // X86/X32/X64 + NAMESPACE_END -#endif +#endif // CRYPTOPP_CPU_H diff --git a/crc.cpp b/crc.cpp index bdb4e2f1..b007dd4f 100644 --- a/crc.cpp +++ b/crc.cpp @@ -131,6 +131,22 @@ CRC32::CRC32() void CRC32::Update(const byte *s, size_t n) { +#if defined(__ARM_FEATURE_CRC32) + if (HasCRC32()) + { + for(; !IsAligned(s) && n > 0; s++, n--) + m_crc = __crc32b(m_crc, *s); + + for(; n > 4; s+=4, n-=4) + m_crc = __crc32w(m_crc, *(const word32 *)(void*)s); + + for(; n > 0; s++, n--) + m_crc = __crc32b(m_crc, *s); + + return; + } +#endif + word32 crc = m_crc; for(; !IsAligned(s) && n > 0; n--) @@ -295,6 +311,20 @@ void CRC32C::Update(const byte *s, size_t n) for(; n > 0; s++, n--) m_crc = _mm_crc32_u8(m_crc, *s); + return; + } +#elif defined(__ARM_FEATURE_CRC32) + if (HasCRC32()) + { + for(; !IsAligned(s) && n > 0; s++, n--) + m_crc = __crc32cb(m_crc, *s); + + for(; n > 4; s+=4, n-=4) + m_crc = __crc32cw(m_crc, *(const word32 *)(void*)s); + + for(; n > 0; s++, n--) + m_crc = __crc32cb(m_crc, *s); + return; } #endif diff --git a/validat1.cpp b/validat1.cpp index 2bb0b917..e25273c2 100644 --- a/validat1.cpp +++ b/validat1.cpp @@ -314,9 +314,10 @@ bool TestSettings() #elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) bool hasNEON = HasNEON(); + bool hasCRC32 = HasCRC32(); cout << "passed: "; - cout << "hasNEON == " << hasNEON << endl; + cout << "hasNEON == " << hasNEON << ", hasCRC32 == " << hasCRC32 << endl; #endif if (!pass) @@ -912,7 +913,7 @@ bool TestOS_RNG() return pass; } -#ifdef NO_OS_DEPENDENCE +#if defined(NO_OS_DEPENDENCE) || !defined(OS_RNG_AVAILABLE) bool TestAutoSeeded() { return true; From 13789c82bf171aa8f27d01e313422e22e3d2d7eb Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Fri, 6 May 2016 05:20:56 -0400 Subject: [PATCH 09/10] Improved compile time checking to make runtime check consitent with lack of availability --- cpu.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/cpu.cpp b/cpu.cpp index bd2f5183..3247e5e7 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -269,16 +269,23 @@ void __attribute__ ((constructor)) DetectArmFeatures() void DetectArmFeatures() #endif { -#if defined(__linux__) && defined(__aarch64__) +#if defined(__linux__) && defined(__aarch64__) // ARM-64 const unsigned long hwcaps = getauxval(AT_HWCAP); g_hasNEON = !!(hwcaps & HWCAP_ASIMD); +# if defined(__ARM_FEATURE_CRC32) g_hasCRC32 = !!(hwcaps & HWCAP_CRC32); -#elif defined(__linux__) +# else + g_hasCRC32 = false; +# endif +#elif defined(__linux__) // ARM-32 const unsigned long hwcaps = getauxval(AT_HWCAP); g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON); - // g_hasCRC32 = !!(hwcaps & HWCAP_ARM_CRC32); +# if defined(__ARM_FEATURE_CRC32) + g_hasCRC32 = !!(hwcaps & HWCAP_ARM_CRC32); +# else g_hasCRC32 = false; -#elif defined(_WIN32) && defined(_M_ARM) +# endif +#elif defined(_WIN32) && defined(_M_ARM) // Microsoft ARM g_hasNEON = true; g_hasCRC32 = false; #endif From 19ac7b0be8e58d6681d7c143875aa4f9d8777cda Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Fri, 6 May 2016 06:56:08 -0400 Subject: [PATCH 10/10] Improve include logic for under GCC --- cpu.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpu.h b/cpu.h index 90a499d0..56baf5ae 100644 --- a/cpu.h +++ b/cpu.h @@ -20,7 +20,7 @@ # if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE # include # endif -# if (__ARM_ACLE >= 200) +# if defined(__ARM_FEATURE_CRC32) || (__ARM_ACLE >= 200) # include # endif #endif // ARM-32 or ARM-64 @@ -242,6 +242,7 @@ void CRYPTOPP_API DetectArmFeatures(); //! \brief Determine if an ARM processor has Advanced SIMD available //! \returns true if the hardware is capable of Advanced SIMD at runtime, false otherwise. +//! \details Runtime support requires compile time support. inline bool HasNEON() { if (!g_ArmDetectionDone) @@ -251,6 +252,8 @@ inline bool HasNEON() //! \brief Determine if an ARM processor has CRC32 available //! \returns true if the hardware is capable of CRC32 at runtime, false otherwise. +//! \details Runtime support requires compile time support. When compiling with GCC, you may +//! need to compile with -march=armv8-a+crc. inline bool HasCRC32() { if (!g_ArmDetectionDone)