Add ARM NEON detection code
parent
ed5f6e103b
commit
d591c5ad8f
11
GNUmakefile
11
GNUmakefile
|
|
@ -22,7 +22,8 @@ IS_X86 := $(shell uname -m | $(EGREP) -v "x86_64" | $(EGREP) -i -c "i.86|x86|i86
|
|||
IS_X32 ?= 0
|
||||
IS_X86_64 := $(shell uname -m | $(EGREP) -i -c "(_64|d64)")
|
||||
IS_PPC := $(shell uname -m | $(EGREP) -i -c "ppc|power")
|
||||
IS_AARCH64 := $(shell uname -m | $(EGREP) -i -c "aarch64")
|
||||
IS_ARM32 := $(shell uname -m | $(EGREP) -i -c "arm")
|
||||
IS_ARM64 := $(shell uname -m | $(EGREP) -i -c "aarch64")
|
||||
|
||||
IS_SUN := $(shell uname | $(EGREP) -i -c "SunOS")
|
||||
IS_LINUX := $(shell $(CXX) -dumpmachine 2>&1 | $(EGREP) -i -c "Linux")
|
||||
|
|
@ -325,9 +326,13 @@ endif # HAS_SOLIB_VERSION
|
|||
# is the second candidate for explicit initialization order.
|
||||
SRCS := cryptlib.cpp cpu.cpp $(filter-out cryptlib.cpp cpu.cpp pch.cpp simple.cpp winpipes.cpp cryptlib_bds.cpp,$(wildcard *.cpp))
|
||||
|
||||
# No need for CPU or RDRAND on non-X86 systems. X32 is represented with X64.
|
||||
# Need CPU for X86/X64/X32 and ARM
|
||||
ifeq ($(IS_X86)$(IS_X86_64)$(IS_ARM32)$(IS_ARM64),0000)
|
||||
SRCS := $(filter-out cpu.cpp, $(SRCS))
|
||||
endif
|
||||
# Need RDRAND for X86/X64/X32
|
||||
ifeq ($(IS_X86)$(IS_X86_64),00)
|
||||
SRCS := $(filter-out cpu.cpp rdrand.cpp, $(SRCS))
|
||||
SRCS := $(filter-out rdrand.cpp, $(SRCS))
|
||||
endif
|
||||
|
||||
ifneq ($(IS_MINGW),0)
|
||||
|
|
|
|||
26
config.h
26
config.h
|
|
@ -497,7 +497,7 @@ NAMESPACE_END
|
|||
#else
|
||||
#define CRYPTOPP_BOOL_X64 0
|
||||
#endif
|
||||
|
||||
|
||||
// Undo the ASM and Intrinsic related defines due to X32.
|
||||
#if CRYPTOPP_BOOL_X32
|
||||
# undef CRYPTOPP_BOOL_X64
|
||||
|
|
@ -505,6 +505,30 @@ NAMESPACE_END
|
|||
# undef CRYPTOPP_X64_MASM_AVAILABLE
|
||||
#endif
|
||||
|
||||
#if defined(__arm__) || defined(__aarch32__) || defined(_M_ARM)
|
||||
#define CRYPTOPP_BOOL_ARM32 1
|
||||
#else
|
||||
#define CRYPTOPP_BOOL_ARM32 0
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#define CRYPTOPP_BOOL_ARM64 1
|
||||
#else
|
||||
#define CRYPTOPP_BOOL_ARM64 0
|
||||
#endif
|
||||
|
||||
#if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
|
||||
# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800))
|
||||
# if defined(__ARM_NEON__) || defined(__ARM_NEON)
|
||||
# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
|
||||
# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 0
|
||||
#endif
|
||||
|
||||
#if !defined(CRYPTOPP_NO_UNALIGNED_DATA_ACCESS) && !defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS)
|
||||
#if (CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || defined(__powerpc__) || (__ARM_FEATURE_UNALIGNED >= 1))
|
||||
#define CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
|
||||
|
|
|
|||
|
|
@ -381,7 +381,31 @@ NAMESPACE_END
|
|||
#define CRYPTOPP_DISABLE_ASM
|
||||
#define CRYPTOPP_DISABLE_SSE2
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__arm__) || defined(__aarch32__) || defined(_M_ARM)
|
||||
#define CRYPTOPP_BOOL_ARM32 1
|
||||
#else
|
||||
#define CRYPTOPP_BOOL_ARM32 0
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#define CRYPTOPP_BOOL_ARM64 1
|
||||
#else
|
||||
#define CRYPTOPP_BOOL_ARM64 0
|
||||
#endif
|
||||
|
||||
#if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
|
||||
# if (defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800))
|
||||
# if defined(__ARM_NEON__) || defined(__ARM_NEON)
|
||||
# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
|
||||
# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 0
|
||||
#endif
|
||||
|
||||
// Apple's Clang prior to 5.0 cannot handle SSE2 (and Apple does not use LLVM Clang numbering...)
|
||||
#if defined(CRYPTOPP_APPLE_CLANG_VERSION) && (CRYPTOPP_APPLE_CLANG_VERSION < 50000)
|
||||
# define CRYPTOPP_DISABLE_ASM
|
||||
|
|
|
|||
44
cpu.cpp
44
cpu.cpp
|
|
@ -22,8 +22,18 @@
|
|||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
|
||||
#include <sys/auxv.h>
|
||||
#include <asm/hwcap.h>
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
NAMESPACE_BEGIN(CryptoPP)
|
||||
|
||||
// MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it.
|
||||
#define HAVE_GCC_CONSTRUCTOR1 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && ((CRYPTOPP_GCC_VERSION >= 40300) || (CRYPTOPP_CLANG_VERSION >= 20900) || (_INTEL_COMPILER >= 300)) && !(MACPORTS_GCC_COMPILER > 0))
|
||||
#define HAVE_GCC_CONSTRUCTOR0 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && !(MACPORTS_GCC_COMPILER > 0))
|
||||
|
||||
#ifdef CRYPTOPP_CPUID_AVAILABLE
|
||||
|
||||
#if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64
|
||||
|
|
@ -168,10 +178,6 @@ bool g_x86DetectionDone = false;
|
|||
bool g_hasMMX = false, g_hasISSE = false, g_hasSSE2 = false, g_hasSSSE3 = false, g_hasSSE4 = false, g_hasAESNI = false, g_hasCLMUL = false, g_isP4 = false, g_hasRDRAND = false, g_hasRDSEED = false;
|
||||
word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
|
||||
|
||||
// MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it.
|
||||
#define HAVE_GCC_CONSTRUCTOR1 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && ((CRYPTOPP_GCC_VERSION >= 40300) || (CRYPTOPP_CLANG_VERSION >= 20900) || (_INTEL_COMPILER >= 300)) && !(MACPORTS_GCC_COMPILER > 0))
|
||||
#define HAVE_GCC_CONSTRUCTOR0 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && !(MACPORTS_GCC_COMPILER > 0))
|
||||
|
||||
static inline bool IsIntel(const word32 output[4])
|
||||
{
|
||||
// This is the "GenuineIntel" string
|
||||
|
|
@ -251,6 +257,36 @@ void DetectX86Features()
|
|||
*((volatile bool*)&g_x86DetectionDone) = true;
|
||||
}
|
||||
|
||||
// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
|
||||
#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64)
|
||||
|
||||
bool g_ArmDetectionDone = false;
|
||||
bool g_hasNEON = false;
|
||||
|
||||
// This is avaiable in a status register, but we need privileged code to execute the read
|
||||
word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
|
||||
|
||||
#if HAVE_GCC_CONSTRUCTOR1
|
||||
void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures()
|
||||
#elif HAVE_GCC_CONSTRUCTOR0
|
||||
void __attribute__ ((constructor)) DetectArmFeatures()
|
||||
#else
|
||||
void DetectArmFeatures()
|
||||
#endif
|
||||
{
|
||||
g_hasNEON = false;
|
||||
#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
|
||||
# if defined(__linux__) && defined(__aarch64__)
|
||||
const long hwcaps = getauxval(AT_HWCAP);
|
||||
g_hasNEON = !!(hwcaps & HWCAP_ASIMD);
|
||||
# elif defined(__linux__)
|
||||
const long hwcaps = getauxval(AT_HWCAP);
|
||||
g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON);
|
||||
# endif
|
||||
#endif
|
||||
*((volatile bool*)&g_ArmDetectionDone) = true;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
NAMESPACE_END
|
||||
|
|
|
|||
23
cpu.h
23
cpu.h
|
|
@ -1,7 +1,6 @@
|
|||
// cpu.h - written and placed in the public domain by Wei Dai
|
||||
|
||||
//! \file
|
||||
//! \headerfile cpu.h
|
||||
//! \file cpu.h
|
||||
//! \brief Classes, functions, intrinsics and features for X86, X32 nd X64 assembly
|
||||
|
||||
#ifndef CRYPTOPP_CPU_H
|
||||
|
|
@ -218,6 +217,24 @@ inline int GetCacheLineSize()
|
|||
return g_cacheLineSize;
|
||||
}
|
||||
|
||||
#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64)
|
||||
|
||||
extern bool g_ArmDetectionDone;
|
||||
extern bool g_hasNEON;
|
||||
void CRYPTOPP_API DetectArmFeatures();
|
||||
|
||||
inline bool HasNEON()
|
||||
{
|
||||
if (!g_ArmDetectionDone)
|
||||
DetectArmFeatures();
|
||||
return g_hasNEON;
|
||||
}
|
||||
|
||||
inline int GetCacheLineSize()
|
||||
{
|
||||
return CRYPTOPP_L1_CACHE_LINE_SIZE;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
inline int GetCacheLineSize()
|
||||
|
|
@ -225,7 +242,7 @@ inline int GetCacheLineSize()
|
|||
return CRYPTOPP_L1_CACHE_LINE_SIZE;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // X86/X32/X64 and ARM
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -311,6 +311,12 @@ bool TestSettings()
|
|||
|
||||
cout << "hasMMX == " << hasMMX << ", hasISSE == " << hasISSE << ", hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSE4 << ", hasSSE4 == " << hasSSSE3 << ", hasAESNI == " << HasAESNI() << ", hasRDRAND == " << HasRDRAND() << ", hasRDSEED == " << HasRDSEED() << ", hasCLMUL == " << HasCLMUL() << ", isP4 == " << isP4 << ", cacheLineSize == " << cacheLineSize;
|
||||
cout << ", AESNI_INTRINSICS == " << CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE << endl;
|
||||
|
||||
#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64)
|
||||
bool hasNEON = HasNEON();
|
||||
|
||||
cout << "passed: ";
|
||||
cout << "hasNEON == " << hasNEON << endl;
|
||||
#endif
|
||||
|
||||
if (!pass)
|
||||
|
|
|
|||
Loading…
Reference in New Issue