Add runtime detection code for ARM NEON, CRC32 and Crypto extensions

pull/174/head
Jeffrey Walton 2016-05-16 18:47:43 -04:00
parent e846beac35
commit 392c55d573
2 changed files with 211 additions and 49 deletions

231
cpu.cpp
View File

@ -24,10 +24,17 @@
NAMESPACE_BEGIN(CryptoPP) NAMESPACE_BEGIN(CryptoPP)
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
// MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it. // MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it.
#define HAVE_GCC_CONSTRUCTOR1 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && ((CRYPTOPP_GCC_VERSION >= 40300) || (CRYPTOPP_CLANG_VERSION >= 20900) || (_INTEL_COMPILER >= 300)) && !(MACPORTS_GCC_COMPILER > 0)) #define HAVE_GCC_CONSTRUCTOR1 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && ((CRYPTOPP_GCC_VERSION >= 40300) || (CRYPTOPP_CLANG_VERSION >= 20900) || (_INTEL_COMPILER >= 300)) && !(MACPORTS_GCC_COMPILER > 0))
#define HAVE_GCC_CONSTRUCTOR0 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && !(MACPORTS_GCC_COMPILER > 0)) #define HAVE_GCC_CONSTRUCTOR0 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && !(MACPORTS_GCC_COMPILER > 0))
extern "C" {
typedef void (*SigHandler)(int);
};
#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
#ifdef CRYPTOPP_CPUID_AVAILABLE #ifdef CRYPTOPP_CPUID_AVAILABLE
#if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64 #if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64
@ -42,7 +49,6 @@ bool CpuId(word32 input, word32 output[4])
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
extern "C" { extern "C" {
typedef void (*SigHandler)(int);
static jmp_buf s_jmpNoCPUID; static jmp_buf s_jmpNoCPUID;
static void SigIllHandlerCPUID(int) static void SigIllHandlerCPUID(int)
@ -258,9 +264,189 @@ void DetectX86Features()
bool g_ArmDetectionDone = false; bool g_ArmDetectionDone = false;
bool g_hasNEON = false, g_hasCRC32 = false, g_hasCrypto = false; bool g_hasNEON = false, g_hasCRC32 = false, g_hasCrypto = false;
// This is avaiable in a status register, but we need privileged code to perform the read
word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
// The ARM equivalent of CPUID is reading a MSR. For example, fetch crypto capabilities with:
// #if defined(__arm64__) || defined(__aarch64__)
// word64 caps = 0; // Read ID_AA64ISAR0_EL1
// __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps));
// #elif defined(__arm__) || defined(__aarch32__)
// word32 caps = 0; // Read ID_ISAR5_EL1
// __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps));
// #endif
// The code requires Exception Level 1 (EL1) and above, but user space runs at EL0.
// Attempting to run the code results in a SIGILL and termination.
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
extern "C" {
static jmp_buf s_jmpNoNEON;
static void SigIllHandlerNEON(int)
{
longjmp(s_jmpNoNEON, 1);
}
static jmp_buf s_jmpNoCRC32;
static void SigIllHandlerCRC32(int)
{
longjmp(s_jmpNoCRC32, 1);
}
static jmp_buf s_jmpNoCrypto;
static void SigIllHandlerCrypto(int)
{
longjmp(s_jmpNoCrypto, 1);
}
};
#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
static bool TryNEON()
{
#if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
__try
{
static const uint32_t v1[4] = {1,1,1,1};
uint32x4_t x1 = vld1q_u32(v1);
static const uint64_t v2[2] = {1,1};
uint64x2_t x2 = vld1q_u64(v2);
uint32x4_t x3 = vdupq_n_u32(0);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
uint64x2_t x4 = vdupq_n_u64(0);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
return false;
}
return true;
# else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24
// http://stackoverflow.com/q/7721854
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON);
if (oldHandler == SIG_ERR)
result = false;
if (setjmp(s_jmpNoNEON))
result = false;
else
{
static const uint32_t v1[4] = {1,1,1,1};
uint32x4_t x1 = vld1q_u32(v1);
static const uint64_t v2[2] = {1,1};
uint64x2_t x2 = vld1q_u64(v2);
uint32x4_t x3 = vdupq_n_u32(0);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
uint64x2_t x4 = vdupq_n_u64(0);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
}
signal(SIGILL, oldHandler);
return result;
# endif
#else
return false;
#endif // CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
}
static bool TryCRC32()
{
#if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
__try
{
word32 w=0, x=0; word16 y=0; byte z=0;
w = __crc32cw(w,x);
w = __crc32ch(w,y);
w = __crc32cb(w,z);
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
return false;
}
return true;
# else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24
// http://stackoverflow.com/q/7721854
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
if (oldHandler == SIG_ERR)
result = false;
if (setjmp(s_jmpNoCRC32))
result = false;
else
{
word32 w=0, x=0; word16 y=0; byte z=0;
w = __crc32cw(w,x);
w = __crc32ch(w,y);
w = __crc32cb(w,z);
}
signal(SIGILL, oldHandler);
return result;
# endif
#else
return false;
#endif // CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE
}
static bool TryCrypto()
{
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
__try
{
// AES encrypt and decrypt
static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
uint8x16_t r1 = vaeseq_u8(data, key);
uint8x16_t r2 = vaesdq_u8(data, key);
//
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
return false;
}
return true;
# else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24
// http://stackoverflow.com/q/7721854
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerCrypto);
if (oldHandler == SIG_ERR)
result = false;
if (setjmp(s_jmpNoCrypto))
result = false;
else
{
static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
uint8x16_t r1 = vaeseq_u8(data, key);
uint8x16_t r2 = vaesdq_u8(data, key);
}
signal(SIGILL, oldHandler);
return result;
# endif
#else
return false;
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
}
#if HAVE_GCC_CONSTRUCTOR1 #if HAVE_GCC_CONSTRUCTOR1
void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures() void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures()
#elif HAVE_GCC_CONSTRUCTOR0 #elif HAVE_GCC_CONSTRUCTOR0
@ -269,43 +455,10 @@ void __attribute__ ((constructor)) DetectArmFeatures()
void DetectArmFeatures() void DetectArmFeatures()
#endif #endif
{ {
#if defined(__linux__) && defined(__aarch64__) // ARM-64 g_hasNEON = TryNEON();
const unsigned long hwcaps = getauxval(AT_HWCAP); g_hasCRC32 = TryCRC32();
g_hasNEON = !!(hwcaps & HWCAP_ASIMD); g_hasCrypto = TryCrypto();
# if defined(__ARM_FEATURE_CRC32)
g_hasCRC32 = !!(hwcaps & HWCAP_CRC32);
# else
g_hasCRC32 = false;
# endif
#elif defined(__linux__) // ARM-32
const unsigned long hwcaps = getauxval(AT_HWCAP);
g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON);
# if defined(__ARM_FEATURE_CRC32)
g_hasCRC32 = !!(hwcaps & HWCAP_ARM_CRC32);
# else
g_hasCRC32 = false;
# endif
#elif defined(__APPLE__)
g_hasNEON = true;
# if defined(__ARM_FEATURE_CRC32)
g_hasCRC32 = true;
# else
g_hasCRC32 = false;
# endif
# if defined(__ARM_FEATURE_CRYPTO)
g_hasCrypto = true;
# else
g_hasCrypto = false;
# endif
#elif defined(_WIN32)
g_hasNEON = true;
g_hasCRC32 = false;
g_hasCrypto = false;
#else
g_hasNEON = false;
g_hasCRC32 = false;
g_hasCrypto = false;
#endif
*((volatile bool*)&g_ArmDetectionDone) = true; *((volatile bool*)&g_ArmDetectionDone) = true;
} }

29
cpu.h
View File

@ -12,16 +12,19 @@
#include "config.h" #include "config.h"
#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) #if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
# if defined(__linux__) # if defined(_MSC_VER) || defined(__BORLANDC__)
# include <sys/auxv.h> # define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
# include <asm/hwcap.h> # else
# include <stdint.h> # define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
# endif # endif
# if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE # if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
# include <arm_neon.h> # include <arm_neon.h>
# endif # endif
# if (defined(__ARM_FEATURE_CRC32) || defined(__ARM_FEATURE_CRYPTO) || (__ARM_ACLE >= 200)) && !defined(__APPLE__) # if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE || CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
# include <arm_acle.h> # include <stdint.h>
# if (defined(__ARM_ACLE) || defined(__GNUC__)) && !defined(__APPLE__)
# include <arm_acle.h>
# endif
# endif # endif
#endif // ARM-32 or ARM-64 #endif // ARM-32 or ARM-64
@ -242,6 +245,7 @@ void CRYPTOPP_API DetectArmFeatures();
//! \brief Determine if an ARM processor has Advanced SIMD available //! \brief Determine if an ARM processor has Advanced SIMD available
//! \returns true if the hardware is capable of Advanced SIMD at runtime, false otherwise. //! \returns true if the hardware is capable of Advanced SIMD at runtime, false otherwise.
//! \details Advanced SIMD instructions are available under Aarch64 (ARM-64) and Aarch32 (ARM-32).
//! \details Runtime support requires compile time support. When compiling with GCC, you may //! \details Runtime support requires compile time support. When compiling with GCC, you may
//! need to compile with <tt>-mfpu=neon</tt> (32-bit) or <tt>-march=armv8-a</tt> //! need to compile with <tt>-mfpu=neon</tt> (32-bit) or <tt>-march=armv8-a</tt>
//! (64-bit). Also see ARM's <tt>__ARM_NEON</tt> preprocessor macro. //! (64-bit). Also see ARM's <tt>__ARM_NEON</tt> preprocessor macro.
@ -254,9 +258,12 @@ inline bool HasNEON()
//! \brief Determine if an ARM processor has CRC32 available //! \brief Determine if an ARM processor has CRC32 available
//! \returns true if the hardware is capable of CRC32 at runtime, false otherwise. //! \returns true if the hardware is capable of CRC32 at runtime, false otherwise.
//! \details CRC32 instructions provide access to the processor's CRC32 and CRC32-C intructions.
//! They are provided by ARM C Language Extensions 2.0 (ACLE 2.0) and available under Aarch64
//! (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an AArch32 execution environment).
//! \details Runtime support requires compile time support. When compiling with GCC, you may //! \details Runtime support requires compile time support. When compiling with GCC, you may
//! need to compile with <tt>-march=armv8-a+crc</tt>. Also see ARM's <tt>__ARM_FEATURE_CRC32</tt> //! need to compile with <tt>-march=armv8-a+crc</tt>; while Apple requires
//! preprocessor macro. //! <tt>-arch arm64</tt>. Also see ARM's <tt>__ARM_FEATURE_CRC32</tt> preprocessor macro.
inline bool HasCRC32() inline bool HasCRC32()
{ {
if (!g_ArmDetectionDone) if (!g_ArmDetectionDone)
@ -266,10 +273,12 @@ inline bool HasCRC32()
//! \brief Determine if an ARM processor has Crypto available //! \brief Determine if an ARM processor has Crypto available
//! \returns true if the hardware is capable of Crypto at runtime, false otherwise. //! \returns true if the hardware is capable of Crypto at runtime, false otherwise.
//! \details Crypto instructions provide access to the processor's AES, SHA-1, SHA-224 and SHA-256 intructions.
//! They are provided by ARM C Language Extensions 2.0 (ACLE 2.0) and available under Aarch64
//! (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an AArch32 execution environment).
//! \details Runtime support requires compile time support. When compiling with GCC, you may //! \details Runtime support requires compile time support. When compiling with GCC, you may
//! need to compile with <tt>-march=armv8-a+crypto</tt>; while Apple requires //! need to compile with <tt>-march=armv8-a+crypto</tt>; while Apple requires
//! <tt>-arch armv7s</tt> or <tt>-arch arm64</tt>. Also see ARM's <tt>__ARM_FEATURE_CRYPTO</tt> //! <tt>-arch arm64</tt>. Also see ARM's <tt>__ARM_FEATURE_CRYPTO</tt> preprocessor macro.
//! preprocessor macro.
inline bool HasCrypto() inline bool HasCrypto()
{ {
if (!g_ArmDetectionDone) if (!g_ArmDetectionDone)