Merge branch 'master' into windows-store

pull/174/head
Jeffrey Walton 2016-05-16 18:48:49 -04:00
commit b42cc4676f
5 changed files with 277 additions and 63 deletions

View File

@ -534,24 +534,35 @@ NAMESPACE_END
#define CRYPTOPP_BOOL_ARM64 0
#endif
// Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under the toolchains.
#if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
# if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_APPLE_CLANG_VERSION >= 60000) || (CRYPTOPP_MSC_VERSION >= 1700))
# if defined(__ARM_NEON__) || defined(__ARM_NEON) || defined(_M_ARM) || (__ARM_ARCH >= 8)
# if defined(__ARM_NEON__) || defined(__ARM_NEON) || (__ARM_ARCH >= 7) || defined(_M_ARM)
# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1
# endif
# endif
#endif
#if !defined(CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
# if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_APPLE_CLANG_VERSION >= 60000))
# if defined(__ARM_FEATURE_CRYPTO)
# define CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE 1
// Requires ARMv8 and ACLE 2.0.
// Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
# if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_APPLE_CLANG_VERSION >= 60000) || (CRYPTOPP_MSC_VERSION >= 2000))
# if defined(__ARM_FEATURE_CRC32) || (__ARM_ARCH >= 8) || defined(_M_ARM64)
# define CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE 1
# endif
# endif
#endif
#ifndef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 0
// Requires ARMv8 and ACLE 2.0.
// Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
# if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_APPLE_CLANG_VERSION >= 60000) || (CRYPTOPP_MSC_VERSION >= 2000))
# if defined(__ARM_FEATURE_CRYPTO) || (__ARM_ARCH >= 8) || defined(_M_ARM64)
# define CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE 1
# endif
# endif
#endif
#if !defined(CRYPTOPP_NO_UNALIGNED_DATA_ACCESS) && !defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS)

View File

@ -534,24 +534,35 @@ NAMESPACE_END
#define CRYPTOPP_BOOL_ARM64 0
#endif
// Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under the toolchains.
#if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
# if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_APPLE_CLANG_VERSION >= 60000) || (CRYPTOPP_MSC_VERSION >= 1700))
# if defined(__ARM_NEON__) || defined(__ARM_NEON) || defined(_M_ARM) || (__ARM_ARCH >= 8)
# if defined(__ARM_NEON__) || defined(__ARM_NEON) || (__ARM_ARCH >= 7) || defined(_M_ARM)
# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 1
# endif
# endif
#endif
#if !defined(CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
# if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_APPLE_CLANG_VERSION >= 60000))
# if defined(__ARM_FEATURE_CRYPTO)
# define CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE 1
// Requires ARMv8 and ACLE 2.0.
// Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
# if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_APPLE_CLANG_VERSION >= 60000) || (CRYPTOPP_MSC_VERSION >= 2000))
# if defined(__ARM_FEATURE_CRC32) || (__ARM_ARCH >= 8) || defined(_M_ARM64)
# define CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE 1
# endif
# endif
#endif
#ifndef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
# define CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE 0
// Requires ARMv8 and ACLE 2.0.
// Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
# if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && ((CRYPTOPP_GCC_VERSION >= 40400) || (CRYPTOPP_CLANG_VERSION >= 20800) || (CRYPTOPP_APPLE_CLANG_VERSION >= 60000) || (CRYPTOPP_MSC_VERSION >= 2000))
# if defined(__ARM_FEATURE_CRYPTO) || (__ARM_ARCH >= 8) || defined(_M_ARM64)
# define CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE 1
# endif
# endif
#endif
#if !defined(CRYPTOPP_NO_UNALIGNED_DATA_ACCESS) && !defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS)

231
cpu.cpp
View File

@ -24,10 +24,17 @@
NAMESPACE_BEGIN(CryptoPP)
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
// MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it.
#define HAVE_GCC_CONSTRUCTOR1 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && ((CRYPTOPP_GCC_VERSION >= 40300) || (CRYPTOPP_CLANG_VERSION >= 20900) || (_INTEL_COMPILER >= 300)) && !(MACPORTS_GCC_COMPILER > 0))
#define HAVE_GCC_CONSTRUCTOR0 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && !(MACPORTS_GCC_COMPILER > 0))
extern "C" {
typedef void (*SigHandler)(int);
};
#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
#ifdef CRYPTOPP_CPUID_AVAILABLE
#if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64
@ -42,7 +49,6 @@ bool CpuId(word32 input, word32 output[4])
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
extern "C" {
typedef void (*SigHandler)(int);
static jmp_buf s_jmpNoCPUID;
static void SigIllHandlerCPUID(int)
@ -258,9 +264,189 @@ void DetectX86Features()
bool g_ArmDetectionDone = false;
bool g_hasNEON = false, g_hasCRC32 = false, g_hasCrypto = false;
// This is avaiable in a status register, but we need privileged code to perform the read
word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
// The ARM equivalent of CPUID is reading a MSR. For example, fetch crypto capabilities with:
// #if defined(__arm64__) || defined(__aarch64__)
// word64 caps = 0; // Read ID_AA64ISAR0_EL1
// __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps));
// #elif defined(__arm__) || defined(__aarch32__)
// word32 caps = 0; // Read ID_ISAR5_EL1
// __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps));
// #endif
// The code requires Exception Level 1 (EL1) and above, but user space runs at EL0.
// Attempting to run the code results in a SIGILL and termination.
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
extern "C" {
static jmp_buf s_jmpNoNEON;
static void SigIllHandlerNEON(int)
{
longjmp(s_jmpNoNEON, 1);
}
static jmp_buf s_jmpNoCRC32;
static void SigIllHandlerCRC32(int)
{
longjmp(s_jmpNoCRC32, 1);
}
static jmp_buf s_jmpNoCrypto;
static void SigIllHandlerCrypto(int)
{
longjmp(s_jmpNoCrypto, 1);
}
};
#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
static bool TryNEON()
{
#if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
__try
{
static const uint32_t v1[4] = {1,1,1,1};
uint32x4_t x1 = vld1q_u32(v1);
static const uint64_t v2[2] = {1,1};
uint64x2_t x2 = vld1q_u64(v2);
uint32x4_t x3 = vdupq_n_u32(0);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
uint64x2_t x4 = vdupq_n_u64(0);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
return false;
}
return true;
# else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24
// http://stackoverflow.com/q/7721854
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON);
if (oldHandler == SIG_ERR)
result = false;
if (setjmp(s_jmpNoNEON))
result = false;
else
{
static const uint32_t v1[4] = {1,1,1,1};
uint32x4_t x1 = vld1q_u32(v1);
static const uint64_t v2[2] = {1,1};
uint64x2_t x2 = vld1q_u64(v2);
uint32x4_t x3 = vdupq_n_u32(0);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
uint64x2_t x4 = vdupq_n_u64(0);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
}
signal(SIGILL, oldHandler);
return result;
# endif
#else
return false;
#endif // CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
}
static bool TryCRC32()
{
#if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
__try
{
word32 w=0, x=0; word16 y=0; byte z=0;
w = __crc32cw(w,x);
w = __crc32ch(w,y);
w = __crc32cb(w,z);
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
return false;
}
return true;
# else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24
// http://stackoverflow.com/q/7721854
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
if (oldHandler == SIG_ERR)
result = false;
if (setjmp(s_jmpNoCRC32))
result = false;
else
{
word32 w=0, x=0; word16 y=0; byte z=0;
w = __crc32cw(w,x);
w = __crc32ch(w,y);
w = __crc32cb(w,z);
}
signal(SIGILL, oldHandler);
return result;
# endif
#else
return false;
#endif // CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE
}
static bool TryCrypto()
{
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
__try
{
// AES encrypt and decrypt
static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
uint8x16_t r1 = vaeseq_u8(data, key);
uint8x16_t r2 = vaesdq_u8(data, key);
//
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
return false;
}
return true;
# else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24
// http://stackoverflow.com/q/7721854
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerCrypto);
if (oldHandler == SIG_ERR)
result = false;
if (setjmp(s_jmpNoCrypto))
result = false;
else
{
static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
uint8x16_t r1 = vaeseq_u8(data, key);
uint8x16_t r2 = vaesdq_u8(data, key);
}
signal(SIGILL, oldHandler);
return result;
# endif
#else
return false;
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
}
#if HAVE_GCC_CONSTRUCTOR1
void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures()
#elif HAVE_GCC_CONSTRUCTOR0
@ -269,43 +455,10 @@ void __attribute__ ((constructor)) DetectArmFeatures()
void DetectArmFeatures()
#endif
{
#if defined(__linux__) && defined(__aarch64__) // ARM-64
const unsigned long hwcaps = getauxval(AT_HWCAP);
g_hasNEON = !!(hwcaps & HWCAP_ASIMD);
# if defined(__ARM_FEATURE_CRC32)
g_hasCRC32 = !!(hwcaps & HWCAP_CRC32);
# else
g_hasCRC32 = false;
# endif
#elif defined(__linux__) // ARM-32
const unsigned long hwcaps = getauxval(AT_HWCAP);
g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON);
# if defined(__ARM_FEATURE_CRC32)
g_hasCRC32 = !!(hwcaps & HWCAP_ARM_CRC32);
# else
g_hasCRC32 = false;
# endif
#elif defined(__APPLE__)
g_hasNEON = true;
# if defined(__ARM_FEATURE_CRC32)
g_hasCRC32 = true;
# else
g_hasCRC32 = false;
# endif
# if defined(__ARM_FEATURE_CRYPTO)
g_hasCrypto = true;
# else
g_hasCrypto = false;
# endif
#elif defined(_WIN32)
g_hasNEON = true;
g_hasCRC32 = false;
g_hasCrypto = false;
#else
g_hasNEON = false;
g_hasCRC32 = false;
g_hasCrypto = false;
#endif
g_hasNEON = TryNEON();
g_hasCRC32 = TryCRC32();
g_hasCrypto = TryCrypto();
*((volatile bool*)&g_ArmDetectionDone) = true;
}

27
cpu.h
View File

@ -12,17 +12,20 @@
#include "config.h"
#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
# if defined(__linux__)
# include <sys/auxv.h>
# include <asm/hwcap.h>
# include <stdint.h>
# if defined(_MSC_VER) || defined(__BORLANDC__)
# define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
# else
# define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
# endif
# if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
# include <arm_neon.h>
# endif
# if (defined(__ARM_FEATURE_CRC32) || defined(__ARM_FEATURE_CRYPTO) || (__ARM_ACLE >= 200)) && !defined(__APPLE__)
# if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE || CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
# include <stdint.h>
# if (defined(__ARM_ACLE) || defined(__GNUC__)) && !defined(__APPLE__)
# include <arm_acle.h>
# endif
# endif
#endif // ARM-32 or ARM-64
#ifdef CRYPTOPP_GENERATE_X64_MASM
@ -242,6 +245,7 @@ void CRYPTOPP_API DetectArmFeatures();
//! \brief Determine if an ARM processor has Advanced SIMD available
//! \returns true if the hardware is capable of Advanced SIMD at runtime, false otherwise.
//! \details Advanced SIMD instructions are available under Aarch64 (ARM-64) and Aarch32 (ARM-32).
//! \details Runtime support requires compile time support. When compiling with GCC, you may
//! need to compile with <tt>-mfpu=neon</tt> (32-bit) or <tt>-march=armv8-a</tt>
//! (64-bit). Also see ARM's <tt>__ARM_NEON</tt> preprocessor macro.
@ -254,9 +258,12 @@ inline bool HasNEON()
//! \brief Determine if an ARM processor has CRC32 available
//! \returns true if the hardware is capable of CRC32 at runtime, false otherwise.
//! \details CRC32 instructions provide access to the processor's CRC32 and CRC32-C intructions.
//! They are provided by ARM C Language Extensions 2.0 (ACLE 2.0) and available under Aarch64
//! (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an AArch32 execution environment).
//! \details Runtime support requires compile time support. When compiling with GCC, you may
//! need to compile with <tt>-march=armv8-a+crc</tt>. Also see ARM's <tt>__ARM_FEATURE_CRC32</tt>
//! preprocessor macro.
//! need to compile with <tt>-march=armv8-a+crc</tt>; while Apple requires
//! <tt>-arch arm64</tt>. Also see ARM's <tt>__ARM_FEATURE_CRC32</tt> preprocessor macro.
inline bool HasCRC32()
{
if (!g_ArmDetectionDone)
@ -266,10 +273,12 @@ inline bool HasCRC32()
//! \brief Determine if an ARM processor has Crypto available
//! \returns true if the hardware is capable of Crypto at runtime, false otherwise.
//! \details Crypto instructions provide access to the processor's AES, SHA-1, SHA-224 and SHA-256 intructions.
//! They are provided by ARM C Language Extensions 2.0 (ACLE 2.0) and available under Aarch64
//! (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an AArch32 execution environment).
//! \details Runtime support requires compile time support. When compiling with GCC, you may
//! need to compile with <tt>-march=armv8-a+crypto</tt>; while Apple requires
//! <tt>-arch armv7s</tt> or <tt>-arch arm64</tt>. Also see ARM's <tt>__ARM_FEATURE_CRYPTO</tt>
//! preprocessor macro.
//! <tt>-arch arm64</tt>. Also see ARM's <tt>__ARM_FEATURE_CRYPTO</tt> preprocessor macro.
inline bool HasCrypto()
{
if (!g_ArmDetectionDone)

30
crc.cpp
View File

@ -131,6 +131,22 @@ CRC32::CRC32()
void CRC32::Update(const byte *s, size_t n)
{
#if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
if (HasCRC32())
{
for(; !IsAligned<word32>(s) && n > 0; s++, n--)
m_crc = __crc32b(m_crc, *s);
for(; n > 4; s+=4, n-=4)
m_crc = __crc32w(m_crc, *(const word32 *)(void*)s);
for(; n > 0; s++, n--)
m_crc = __crc32b(m_crc, *s);
return;
}
#endif
word32 crc = m_crc;
for(; !IsAligned<word32>(s) && n > 0; n--)
@ -295,6 +311,20 @@ void CRC32C::Update(const byte *s, size_t n)
for(; n > 0; s++, n--)
m_crc = _mm_crc32_u8(m_crc, *s);
return;
}
#elif (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
if (HasCRC32())
{
for(; !IsAligned<word32>(s) && n > 0; s++, n--)
m_crc = __crc32cb(m_crc, *s);
for(; n > 4; s+=4, n-=4)
m_crc = __crc32cw(m_crc, *(const word32 *)(void*)s);
for(; n > 0; s++, n--)
m_crc = __crc32cb(m_crc, *s);
return;
}
#endif