Add ARM NEON detection code

pull/174/head
Jeffrey Walton 2016-05-05 22:18:53 -04:00
parent ed5f6e103b
commit d591c5ad8f
6 changed files with 124 additions and 12 deletions

View File

@ -22,7 +22,8 @@ IS_X86 := $(shell uname -m | $(EGREP) -v "x86_64" | $(EGREP) -i -c "i.86|x86|i86
IS_X32 ?= 0
IS_X86_64 := $(shell uname -m | $(EGREP) -i -c "(_64|d64)")
IS_PPC := $(shell uname -m | $(EGREP) -i -c "ppc|power")
IS_AARCH64 := $(shell uname -m | $(EGREP) -i -c "aarch64")
IS_ARM32 := $(shell uname -m | $(EGREP) -i -c "arm")
IS_ARM64 := $(shell uname -m | $(EGREP) -i -c "aarch64")
IS_SUN := $(shell uname | $(EGREP) -i -c "SunOS")
IS_LINUX := $(shell $(CXX) -dumpmachine 2>&1 | $(EGREP) -i -c "Linux")
@ -325,9 +326,13 @@ endif # HAS_SOLIB_VERSION
# is the second candidate for explicit initialization order.
SRCS := cryptlib.cpp cpu.cpp $(filter-out cryptlib.cpp cpu.cpp pch.cpp simple.cpp winpipes.cpp cryptlib_bds.cpp,$(wildcard *.cpp))
# No need for CPU or RDRAND on non-X86 systems. X32 is represented with X64.
# Need CPU for X86/X64/X32 and ARM
ifeq ($(IS_X86)$(IS_X86_64)$(IS_ARM32)$(IS_ARM64),0000)
SRCS := $(filter-out cpu.cpp, $(SRCS))
endif
# Need RDRAND for X86/X64/X32
ifeq ($(IS_X86)$(IS_X86_64),00)
SRCS := $(filter-out cpu.cpp rdrand.cpp, $(SRCS))
SRCS := $(filter-out rdrand.cpp, $(SRCS))
endif
ifneq ($(IS_MINGW),0)

View File

@ -497,7 +497,7 @@ NAMESPACE_END
#else
#define CRYPTOPP_BOOL_X64 0
#endif
// Undo the ASM and Intrinsic related defines due to X32.
#if CRYPTOPP_BOOL_X32
# undef CRYPTOPP_BOOL_X64
@ -505,6 +505,30 @@ NAMESPACE_END
# undef CRYPTOPP_X64_MASM_AVAILABLE
#endif
#if defined(__arm__) || defined(__aarch32__) || defined(_M_ARM)
#define CRYPTOPP_BOOL_ARM32 1
#else
#define CRYPTOPP_BOOL_ARM32 0
#endif
#if defined(__aarch64__)
#define CRYPTOPP_BOOL_ARM64 1
#else
#define CRYPTOPP_BOOL_ARM64 0
#endif
#if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800))
# if defined(__ARM_NEON__) || defined(__ARM_NEON)
# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1
# endif
# endif
#endif
#ifndef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 0
#endif
#if !defined(CRYPTOPP_NO_UNALIGNED_DATA_ACCESS) && !defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS)
#if (CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || defined(__powerpc__) || (__ARM_FEATURE_UNALIGNED >= 1))
#define CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS

View File

@ -381,7 +381,31 @@ NAMESPACE_END
#define CRYPTOPP_DISABLE_ASM
#define CRYPTOPP_DISABLE_SSE2
#endif
#if defined(__arm__) || defined(__aarch32__) || defined(_M_ARM)
#define CRYPTOPP_BOOL_ARM32 1
#else
#define CRYPTOPP_BOOL_ARM32 0
#endif
#if defined(__aarch64__)
#define CRYPTOPP_BOOL_ARM64 1
#else
#define CRYPTOPP_BOOL_ARM64 0
#endif
#if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800))
# if defined(__ARM_NEON__) || defined(__ARM_NEON)
# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1
# endif
# endif
#endif
#ifndef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 0
#endif
// Apple's Clang prior to 5.0 cannot handle SSE2 (and Apple does not use LLVM Clang numbering...)
#if defined(CRYPTOPP_APPLE_CLANG_VERSION) && (CRYPTOPP_APPLE_CLANG_VERSION < 50000)
# define CRYPTOPP_DISABLE_ASM

44
cpu.cpp
View File

@ -22,8 +22,18 @@
#include <emmintrin.h>
#endif
#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
#include <sys/auxv.h>
#include <asm/hwcap.h>
#include <arm_neon.h>
#endif
NAMESPACE_BEGIN(CryptoPP)
// MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it.
#define HAVE_GCC_CONSTRUCTOR1 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && ((CRYPTOPP_GCC_VERSION >= 40300) || (CRYPTOPP_CLANG_VERSION >= 20900) || (_INTEL_COMPILER >= 300)) && !(MACPORTS_GCC_COMPILER > 0))
#define HAVE_GCC_CONSTRUCTOR0 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && !(MACPORTS_GCC_COMPILER > 0))
#ifdef CRYPTOPP_CPUID_AVAILABLE
#if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64
@ -168,10 +178,6 @@ bool g_x86DetectionDone = false;
bool g_hasMMX = false, g_hasISSE = false, g_hasSSE2 = false, g_hasSSSE3 = false, g_hasSSE4 = false, g_hasAESNI = false, g_hasCLMUL = false, g_isP4 = false, g_hasRDRAND = false, g_hasRDSEED = false;
word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
// MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it.
#define HAVE_GCC_CONSTRUCTOR1 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && ((CRYPTOPP_GCC_VERSION >= 40300) || (CRYPTOPP_CLANG_VERSION >= 20900) || (_INTEL_COMPILER >= 300)) && !(MACPORTS_GCC_COMPILER > 0))
#define HAVE_GCC_CONSTRUCTOR0 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && !(MACPORTS_GCC_COMPILER > 0))
static inline bool IsIntel(const word32 output[4])
{
// This is the "GenuineIntel" string
@ -251,6 +257,36 @@ void DetectX86Features()
*((volatile bool*)&g_x86DetectionDone) = true;
}
// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64)
bool g_ArmDetectionDone = false;
bool g_hasNEON = false;
// This is avaiable in a status register, but we need privileged code to execute the read
word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
#if HAVE_GCC_CONSTRUCTOR1
void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures()
#elif HAVE_GCC_CONSTRUCTOR0
void __attribute__ ((constructor)) DetectArmFeatures()
#else
void DetectArmFeatures()
#endif
{
g_hasNEON = false;
#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
# if defined(__linux__) && defined(__aarch64__)
const long hwcaps = getauxval(AT_HWCAP);
g_hasNEON = !!(hwcaps & HWCAP_ASIMD);
# elif defined(__linux__)
const long hwcaps = getauxval(AT_HWCAP);
g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON);
# endif
#endif
*((volatile bool*)&g_ArmDetectionDone) = true;
}
#endif
NAMESPACE_END

23
cpu.h
View File

@ -1,7 +1,6 @@
// cpu.h - written and placed in the public domain by Wei Dai
//! \file
//! \headerfile cpu.h
//! \file cpu.h
//! \brief Classes, functions, intrinsics and features for X86, X32 nd X64 assembly
#ifndef CRYPTOPP_CPU_H
@ -218,6 +217,24 @@ inline int GetCacheLineSize()
return g_cacheLineSize;
}
#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64)
extern bool g_ArmDetectionDone;
extern bool g_hasNEON;
void CRYPTOPP_API DetectArmFeatures();
inline bool HasNEON()
{
if (!g_ArmDetectionDone)
DetectArmFeatures();
return g_hasNEON;
}
inline int GetCacheLineSize()
{
return CRYPTOPP_L1_CACHE_LINE_SIZE;
}
#else
inline int GetCacheLineSize()
@ -225,7 +242,7 @@ inline int GetCacheLineSize()
return CRYPTOPP_L1_CACHE_LINE_SIZE;
}
#endif
#endif // X86/X32/X64 and ARM
#endif

View File

@ -311,6 +311,12 @@ bool TestSettings()
cout << "hasMMX == " << hasMMX << ", hasISSE == " << hasISSE << ", hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSE4 << ", hasSSE4 == " << hasSSSE3 << ", hasAESNI == " << HasAESNI() << ", hasRDRAND == " << HasRDRAND() << ", hasRDSEED == " << HasRDSEED() << ", hasCLMUL == " << HasCLMUL() << ", isP4 == " << isP4 << ", cacheLineSize == " << cacheLineSize;
cout << ", AESNI_INTRINSICS == " << CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE << endl;
#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64)
bool hasNEON = HasNEON();
cout << "passed: ";
cout << "hasNEON == " << hasNEON << endl;
#endif
if (!pass)