diff --git a/GNUmakefile b/GNUmakefile index 43e14098..ab700c45 100755 --- a/GNUmakefile +++ b/GNUmakefile @@ -22,7 +22,8 @@ IS_X86 := $(shell uname -m | $(EGREP) -v "x86_64" | $(EGREP) -i -c "i.86|x86|i86 IS_X32 ?= 0 IS_X86_64 := $(shell uname -m | $(EGREP) -i -c "(_64|d64)") IS_PPC := $(shell uname -m | $(EGREP) -i -c "ppc|power") -IS_AARCH64 := $(shell uname -m | $(EGREP) -i -c "aarch64") +IS_ARM32 := $(shell uname -m | $(EGREP) -i -c "arm") +IS_ARM64 := $(shell uname -m | $(EGREP) -i -c "aarch64") IS_SUN := $(shell uname | $(EGREP) -i -c "SunOS") IS_LINUX := $(shell $(CXX) -dumpmachine 2>&1 | $(EGREP) -i -c "Linux") @@ -325,9 +326,13 @@ endif # HAS_SOLIB_VERSION # is the second candidate for explicit initialization order. SRCS := cryptlib.cpp cpu.cpp $(filter-out cryptlib.cpp cpu.cpp pch.cpp simple.cpp winpipes.cpp cryptlib_bds.cpp,$(wildcard *.cpp)) -# No need for CPU or RDRAND on non-X86 systems. X32 is represented with X64. +# Need CPU for X86/X64/X32 and ARM +ifeq ($(IS_X86)$(IS_X86_64)$(IS_ARM32)$(IS_ARM64),0000) + SRCS := $(filter-out cpu.cpp, $(SRCS)) +endif +# Need RDRAND for X86/X64/X32 ifeq ($(IS_X86)$(IS_X86_64),00) - SRCS := $(filter-out cpu.cpp rdrand.cpp, $(SRCS)) + SRCS := $(filter-out rdrand.cpp, $(SRCS)) endif ifneq ($(IS_MINGW),0) diff --git a/config.h b/config.h index 2e71bcb8..153f5a9b 100644 --- a/config.h +++ b/config.h @@ -497,7 +497,7 @@ NAMESPACE_END #else #define CRYPTOPP_BOOL_X64 0 #endif - + // Undo the ASM and Intrinsic related defines due to X32. #if CRYPTOPP_BOOL_X32 # undef CRYPTOPP_BOOL_X64 @@ -505,6 +505,30 @@ NAMESPACE_END # undef CRYPTOPP_X64_MASM_AVAILABLE #endif +#if defined(__arm__) || defined(__aarch32__) || defined(_M_ARM) + #define CRYPTOPP_BOOL_ARM32 1 +#else + #define CRYPTOPP_BOOL_ARM32 0 +#endif + +#if defined(__aarch64__) + #define CRYPTOPP_BOOL_ARM64 1 +#else + #define CRYPTOPP_BOOL_ARM64 0 +#endif + +#if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) +# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800)) +# if defined(__ARM_NEON__) || defined(__ARM_NEON) +# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1 +# endif +# endif +#endif + +#ifndef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE +# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 0 +#endif + #if !defined(CRYPTOPP_NO_UNALIGNED_DATA_ACCESS) && !defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) #if (CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || defined(__powerpc__) || (__ARM_FEATURE_UNALIGNED >= 1)) #define CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS diff --git a/config.recommend b/config.recommend index ad845e88..9546120a 100644 --- a/config.recommend +++ b/config.recommend @@ -381,7 +381,31 @@ NAMESPACE_END #define CRYPTOPP_DISABLE_ASM #define CRYPTOPP_DISABLE_SSE2 #endif - + +#if defined(__arm__) || defined(__aarch32__) || defined(_M_ARM) + #define CRYPTOPP_BOOL_ARM32 1 +#else + #define CRYPTOPP_BOOL_ARM32 0 +#endif + +#if defined(__aarch64__) + #define CRYPTOPP_BOOL_ARM64 1 +#else + #define CRYPTOPP_BOOL_ARM64 0 +#endif + +#if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) +# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800)) +# if defined(__ARM_NEON__) || defined(__ARM_NEON) +# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1 +# endif +# endif +#endif + +#ifndef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE +# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 0 +#endif + // Apple's Clang prior to 5.0 cannot handle SSE2 (and Apple does not use LLVM Clang numbering...) #if defined(CRYPTOPP_APPLE_CLANG_VERSION) && (CRYPTOPP_APPLE_CLANG_VERSION < 50000) # define CRYPTOPP_DISABLE_ASM diff --git a/cpu.cpp b/cpu.cpp index 2c5ee728..334b2ec9 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -22,8 +22,18 @@ #include #endif +#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE +#include +#include +#include +#endif + NAMESPACE_BEGIN(CryptoPP) +// MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it. +#define HAVE_GCC_CONSTRUCTOR1 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && ((CRYPTOPP_GCC_VERSION >= 40300) || (CRYPTOPP_CLANG_VERSION >= 20900) || (_INTEL_COMPILER >= 300)) && !(MACPORTS_GCC_COMPILER > 0)) +#define HAVE_GCC_CONSTRUCTOR0 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && !(MACPORTS_GCC_COMPILER > 0)) + #ifdef CRYPTOPP_CPUID_AVAILABLE #if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64 @@ -168,10 +178,6 @@ bool g_x86DetectionDone = false; bool g_hasMMX = false, g_hasISSE = false, g_hasSSE2 = false, g_hasSSSE3 = false, g_hasSSE4 = false, g_hasAESNI = false, g_hasCLMUL = false, g_isP4 = false, g_hasRDRAND = false, g_hasRDSEED = false; word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; -// MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it. -#define HAVE_GCC_CONSTRUCTOR1 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && ((CRYPTOPP_GCC_VERSION >= 40300) || (CRYPTOPP_CLANG_VERSION >= 20900) || (_INTEL_COMPILER >= 300)) && !(MACPORTS_GCC_COMPILER > 0)) -#define HAVE_GCC_CONSTRUCTOR0 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && !(MACPORTS_GCC_COMPILER > 0)) - static inline bool IsIntel(const word32 output[4]) { // This is the "GenuineIntel" string @@ -251,6 +257,36 @@ void DetectX86Features() *((volatile bool*)&g_x86DetectionDone) = true; } +// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu +#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) + +bool g_ArmDetectionDone = false; +bool g_hasNEON = false; + +// This is avaiable in a status register, but we need privileged code to execute the read +word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; + +#if HAVE_GCC_CONSTRUCTOR1 +void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures() +#elif HAVE_GCC_CONSTRUCTOR0 +void __attribute__ ((constructor)) DetectArmFeatures() +#else +void DetectArmFeatures() +#endif +{ + g_hasNEON = false; +#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE +# if defined(__linux__) && defined(__aarch64__) + const long hwcaps = getauxval(AT_HWCAP); + g_hasNEON = !!(hwcaps & HWCAP_ASIMD); +# elif defined(__linux__) + const long hwcaps = getauxval(AT_HWCAP); + g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON); +# endif +#endif + *((volatile bool*)&g_ArmDetectionDone) = true; +} + #endif NAMESPACE_END diff --git a/cpu.h b/cpu.h index ff3e39d4..895c35e2 100644 --- a/cpu.h +++ b/cpu.h @@ -1,7 +1,6 @@ // cpu.h - written and placed in the public domain by Wei Dai -//! \file -//! \headerfile cpu.h +//! \file cpu.h //! \brief Classes, functions, intrinsics and features for X86, X32 nd X64 assembly #ifndef CRYPTOPP_CPU_H @@ -218,6 +217,24 @@ inline int GetCacheLineSize() return g_cacheLineSize; } +#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) + +extern bool g_ArmDetectionDone; +extern bool g_hasNEON; +void CRYPTOPP_API DetectArmFeatures(); + +inline bool HasNEON() +{ + if (!g_ArmDetectionDone) + DetectArmFeatures(); + return g_hasNEON; +} + +inline int GetCacheLineSize() +{ + return CRYPTOPP_L1_CACHE_LINE_SIZE; +} + #else inline int GetCacheLineSize() @@ -225,7 +242,7 @@ inline int GetCacheLineSize() return CRYPTOPP_L1_CACHE_LINE_SIZE; } -#endif +#endif // X86/X32/X64 and ARM #endif diff --git a/validat1.cpp b/validat1.cpp index 8e214607..2bb0b917 100644 --- a/validat1.cpp +++ b/validat1.cpp @@ -311,6 +311,12 @@ bool TestSettings() cout << "hasMMX == " << hasMMX << ", hasISSE == " << hasISSE << ", hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSE4 << ", hasSSE4 == " << hasSSSE3 << ", hasAESNI == " << HasAESNI() << ", hasRDRAND == " << HasRDRAND() << ", hasRDSEED == " << HasRDSEED() << ", hasCLMUL == " << HasCLMUL() << ", isP4 == " << isP4 << ", cacheLineSize == " << cacheLineSize; cout << ", AESNI_INTRINSICS == " << CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE << endl; + +#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) + bool hasNEON = HasNEON(); + + cout << "passed: "; + cout << "hasNEON == " << hasNEON << endl; #endif if (!pass)