Updated etection code. Added ARMv8-a CRC32 implementations

pull/174/head
Jeffrey Walton 2016-05-06 04:35:42 -04:00
parent ed10758af2
commit fe4019b942
4 changed files with 84 additions and 24 deletions

23
cpu.cpp
View File

@ -22,14 +22,6 @@
#include <emmintrin.h> #include <emmintrin.h>
#endif #endif
#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
# if defined(__linux__)
# include <sys/auxv.h>
# include <asm/hwcap.h>
# endif
# include <arm_neon.h>
#endif
NAMESPACE_BEGIN(CryptoPP) NAMESPACE_BEGIN(CryptoPP)
// MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it. // MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it.
@ -264,9 +256,9 @@ void DetectX86Features()
#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) #elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64)
bool g_ArmDetectionDone = false; bool g_ArmDetectionDone = false;
bool g_hasNEON = false; bool g_hasNEON = false, g_hasCRC32 = false;
// This is avaiable in a status register, but we need privileged code to execute the read // This is avaiable in a status register, but we need privileged code to perform the read
word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
#if HAVE_GCC_CONSTRUCTOR1 #if HAVE_GCC_CONSTRUCTOR1
@ -277,17 +269,18 @@ void __attribute__ ((constructor)) DetectArmFeatures()
void DetectArmFeatures() void DetectArmFeatures()
#endif #endif
{ {
g_hasNEON = false;
#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
#if defined(__linux__) && defined(__aarch64__) #if defined(__linux__) && defined(__aarch64__)
const long hwcaps = getauxval(AT_HWCAP); const unsigned long hwcaps = getauxval(AT_HWCAP);
g_hasNEON = !!(hwcaps & HWCAP_ASIMD); g_hasNEON = !!(hwcaps & HWCAP_ASIMD);
g_hasCRC32 = !!(hwcaps & HWCAP_CRC32);
#elif defined(__linux__) #elif defined(__linux__)
const long hwcaps = getauxval(AT_HWCAP); const unsigned long hwcaps = getauxval(AT_HWCAP);
g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON); g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON);
// g_hasCRC32 = !!(hwcaps & HWCAP_ARM_CRC32);
g_hasCRC32 = false;
#elif defined(_WIN32) && defined(_M_ARM) #elif defined(_WIN32) && defined(_M_ARM)
g_hasNEON = true; g_hasNEON = true;
# endif g_hasCRC32 = false;
#endif #endif
*((volatile bool*)&g_ArmDetectionDone) = true; *((volatile bool*)&g_ArmDetectionDone) = true;
} }

42
cpu.h
View File

@ -1,13 +1,30 @@
// cpu.h - written and placed in the public domain by Wei Dai // cpu.h - written and placed in the public domain by Wei Dai
//! \file cpu.h //! \file cpu.h
//! \brief Classes, functions, intrinsics and features for X86, X32 nd X64 assembly //! \brief Functions for CPU features and intrinsics
//! \details At the moment, the functions are used heavily in X86/X32/X64 code paths
// for SSE, SSE2 and SSE4. The funtions are also used on occassion for AArch32
//! and AArch64 code paths for NEON.
#ifndef CRYPTOPP_CPU_H #ifndef CRYPTOPP_CPU_H
#define CRYPTOPP_CPU_H #define CRYPTOPP_CPU_H
#include "config.h" #include "config.h"
#if defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)
# if defined(__linux__)
# include <sys/auxv.h>
# include <asm/hwcap.h>
# include <stdint.h>
# endif
# if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
# include <arm_neon.h>
# endif
# if (__ARM_ACLE >= 200)
# include <arm_acle.h>
# endif
#endif // ARM-32 or ARM-64
#ifdef CRYPTOPP_GENERATE_X64_MASM #ifdef CRYPTOPP_GENERATE_X64_MASM
#define CRYPTOPP_X86_ASM_AVAILABLE #define CRYPTOPP_X86_ASM_AVAILABLE
@ -220,9 +237,11 @@ inline int GetCacheLineSize()
#elif defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64) #elif defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)
extern bool g_ArmDetectionDone; extern bool g_ArmDetectionDone;
extern bool g_hasNEON; extern bool g_hasNEON, g_hasCRC32;
void CRYPTOPP_API DetectArmFeatures(); void CRYPTOPP_API DetectArmFeatures();
//! \brief Determine if an ARM processor has Advanced SIMD available
//! \returns true if the hardware is capable of Advanced SIMD at runtime, false otherwise.
inline bool HasNEON() inline bool HasNEON()
{ {
if (!g_ArmDetectionDone) if (!g_ArmDetectionDone)
@ -230,6 +249,19 @@ inline bool HasNEON()
return g_hasNEON; return g_hasNEON;
} }
//! \brief Determine if an ARM processor has CRC32 available
//! \returns true if the hardware is capable of CRC32 at runtime, false otherwise.
inline bool HasCRC32()
{
if (!g_ArmDetectionDone)
DetectArmFeatures();
return g_hasCRC32;
}
//! \brief Provides the cache line size at runtime
//! \returns true if the hardware is capable of CRC32 at runtime, false otherwise.
//! \details GetCacheLineSize() provides is an estimate using CRYPTOPP_L1_CACHE_LINE_SIZE.
//! The runtime instructions to query the processor are privileged.
inline int GetCacheLineSize() inline int GetCacheLineSize()
{ {
return CRYPTOPP_L1_CACHE_LINE_SIZE; return CRYPTOPP_L1_CACHE_LINE_SIZE;
@ -246,6 +278,8 @@ inline int GetCacheLineSize()
#endif #endif
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
#ifdef CRYPTOPP_GENERATE_X64_MASM #ifdef CRYPTOPP_GENERATE_X64_MASM
#define AS1(x) x*newline* #define AS1(x) x*newline*
#define AS2(x, y) x, y*newline* #define AS2(x, y) x, y*newline*
@ -448,6 +482,8 @@ inline int GetCacheLineSize()
ASL(labelPrefix##9)\ ASL(labelPrefix##9)\
AS2( add outputPtr, increment*16) AS2( add outputPtr, increment*16)
#endif // X86/X32/X64
NAMESPACE_END NAMESPACE_END
#endif #endif // CRYPTOPP_CPU_H

30
crc.cpp
View File

@ -131,6 +131,22 @@ CRC32::CRC32()
void CRC32::Update(const byte *s, size_t n) void CRC32::Update(const byte *s, size_t n)
{ {
#if defined(__ARM_FEATURE_CRC32)
if (HasCRC32())
{
for(; !IsAligned<word32>(s) && n > 0; s++, n--)
m_crc = __crc32b(m_crc, *s);
for(; n > 4; s+=4, n-=4)
m_crc = __crc32w(m_crc, *(const word32 *)(void*)s);
for(; n > 0; s++, n--)
m_crc = __crc32b(m_crc, *s);
return;
}
#endif
word32 crc = m_crc; word32 crc = m_crc;
for(; !IsAligned<word32>(s) && n > 0; n--) for(; !IsAligned<word32>(s) && n > 0; n--)
@ -295,6 +311,20 @@ void CRC32C::Update(const byte *s, size_t n)
for(; n > 0; s++, n--) for(; n > 0; s++, n--)
m_crc = _mm_crc32_u8(m_crc, *s); m_crc = _mm_crc32_u8(m_crc, *s);
return;
}
#elif defined(__ARM_FEATURE_CRC32)
if (HasCRC32())
{
for(; !IsAligned<word32>(s) && n > 0; s++, n--)
m_crc = __crc32cb(m_crc, *s);
for(; n > 4; s+=4, n-=4)
m_crc = __crc32cw(m_crc, *(const word32 *)(void*)s);
for(; n > 0; s++, n--)
m_crc = __crc32cb(m_crc, *s);
return; return;
} }
#endif #endif

View File

@ -314,9 +314,10 @@ bool TestSettings()
#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64) #elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64)
bool hasNEON = HasNEON(); bool hasNEON = HasNEON();
bool hasCRC32 = HasCRC32();
cout << "passed: "; cout << "passed: ";
cout << "hasNEON == " << hasNEON << endl; cout << "hasNEON == " << hasNEON << ", hasCRC32 == " << hasCRC32 << endl;
#endif #endif
if (!pass) if (!pass)
@ -912,7 +913,7 @@ bool TestOS_RNG()
return pass; return pass;
} }
#ifdef NO_OS_DEPENDENCE #if defined(NO_OS_DEPENDENCE) || !defined(OS_RNG_AVAILABLE)
bool TestAutoSeeded() bool TestAutoSeeded()
{ {
return true; return true;