Merge arm-neon into master

pull/175/head
Jeffrey Walton 2016-06-01 03:10:18 -04:00
parent 432db09b72
commit 1ee66a8fc2
5 changed files with 250 additions and 133 deletions

View File

@ -496,7 +496,6 @@ void BLAKE2_CXX_Compress64(const byte* input, BLAKE2_State<word64, true>& state)
} while(0) } while(0)
word64 m[16], v[16]; word64 m[16], v[16];
unsigned int i;
GetBlock<word64, LittleEndian, true> get1(input); GetBlock<word64, LittleEndian, true> get1(input);
get1(m[0])(m[1])(m[2])(m[3])(m[4])(m[5])(m[6])(m[7])(m[8])(m[9])(m[10])(m[11])(m[12])(m[13])(m[14])(m[15]); get1(m[0])(m[1])(m[2])(m[3])(m[4])(m[5])(m[6])(m[7])(m[8])(m[9])(m[10])(m[11])(m[12])(m[13])(m[14])(m[15]);
@ -3441,6 +3440,7 @@ static const int LANE_L64 = 0;
static void BLAKE2_NEON_Compress32(const byte* input, BLAKE2_State<word32, false>& state) static void BLAKE2_NEON_Compress32(const byte* input, BLAKE2_State<word32, false>& state)
{ {
//assert(IsAlignedOn(input,GetAlignmentOf<uint8_t*>()));
assert(IsAlignedOn(&state.h[0],GetAlignmentOf<uint32x4_t>())); assert(IsAlignedOn(&state.h[0],GetAlignmentOf<uint32x4_t>()));
assert(IsAlignedOn(&state.h[4],GetAlignmentOf<uint32x4_t>())); assert(IsAlignedOn(&state.h[4],GetAlignmentOf<uint32x4_t>()));
assert(IsAlignedOn(&state.t[0],GetAlignmentOf<uint32x4_t>())); assert(IsAlignedOn(&state.t[0],GetAlignmentOf<uint32x4_t>()));
@ -3946,7 +3946,7 @@ static void BLAKE2_NEON_Compress32(const byte* input, BLAKE2_State<word32, false
static void BLAKE2_NEON_Compress64(const byte* input, BLAKE2_State<word64, true>& state) static void BLAKE2_NEON_Compress64(const byte* input, BLAKE2_State<word64, true>& state)
{ {
assert(IsAlignedOn(input,GetAlignmentOf<uint8x16_t>())); //assert(IsAlignedOn(input,GetAlignmentOf<uint8_t*>()));
assert(IsAlignedOn(&state.h[0],GetAlignmentOf<uint64x2_t>())); assert(IsAlignedOn(&state.h[0],GetAlignmentOf<uint64x2_t>()));
assert(IsAlignedOn(&state.h[4],GetAlignmentOf<uint64x2_t>())); assert(IsAlignedOn(&state.h[4],GetAlignmentOf<uint64x2_t>()));
assert(IsAlignedOn(&state.t[0],GetAlignmentOf<uint64x2_t>())); assert(IsAlignedOn(&state.t[0],GetAlignmentOf<uint64x2_t>()));

View File

@ -759,20 +759,7 @@ NAMESPACE_END
# define CRYPTOPP_CXX11_ATOMICS 1 # define CRYPTOPP_CXX11_ATOMICS 1
#endif // atomics #endif // atomics
// atomics: MS at VS2012 (17.00); GCC at 4.4; Clang at 3.1/3.2; and Intel 13.0. // alignof/alignas: MS at VS2015 (19.00); GCC at 4.8; Clang at 3.3; and Intel 15.0.
#if (CRYPTOPP_MSC_VERSION >= 1700)
# define CRYPTOPP_CXX11_ATOMICS 1
#elif defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300)
# define CRYPTOPP_CXX11_ATOMICS 1
#elif defined(__clang__)
# if __has_feature(cxx_atomic)
# define CRYPTOPP_CXX11_ATOMICS 1
# endif
#elif (CRYPTOPP_GCC_VERSION >= 40400)
# define CRYPTOPP_CXX11_ATOMICS 1
#endif // atomics
// alignof/alignas: MS at VS2013 (19.00); GCC at 4.8; Clang at 3.3; and Intel 15.0.
#if (CRYPTOPP_MSC_VERSION >= 1900) #if (CRYPTOPP_MSC_VERSION >= 1900)
# define CRYPTOPP_CXX11_ALIGNAS 1 # define CRYPTOPP_CXX11_ALIGNAS 1
# define CRYPTOPP_CXX11_ALIGNOF 1 # define CRYPTOPP_CXX11_ALIGNOF 1

View File

@ -757,20 +757,7 @@ NAMESPACE_END
# define CRYPTOPP_CXX11_ATOMICS 1 # define CRYPTOPP_CXX11_ATOMICS 1
#endif // atomics #endif // atomics
// atomics: MS at VS2012 (17.00); GCC at 4.4; Clang at 3.1/3.2; and Intel 13.0. // alignof/alignas: MS at VS2015 (19.00); GCC at 4.8; Clang at 3.3; and Intel 15.0.
#if (CRYPTOPP_MSC_VERSION >= 1700)
# define CRYPTOPP_CXX11_ATOMICS 1
#elif defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300)
# define CRYPTOPP_CXX11_ATOMICS 1
#elif defined(__clang__)
# if __has_feature(cxx_atomic)
# define CRYPTOPP_CXX11_ATOMICS 1
# endif
#elif (CRYPTOPP_GCC_VERSION >= 40400)
# define CRYPTOPP_CXX11_ATOMICS 1
#endif // atomics
// alignof/alignas: MS at VS2013 (19.00); GCC at 4.8; Clang at 3.3; and Intel 15.0.
#if (CRYPTOPP_MSC_VERSION >= 1900) #if (CRYPTOPP_MSC_VERSION >= 1900)
# define CRYPTOPP_CXX11_ALIGNAS 1 # define CRYPTOPP_CXX11_ALIGNAS 1
# define CRYPTOPP_CXX11_ALIGNOF 1 # define CRYPTOPP_CXX11_ALIGNOF 1

206
cpu.cpp
View File

@ -48,8 +48,8 @@ bool CpuId(word32 input, word32 output[4])
#else #else
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
extern "C" { extern "C"
{
static jmp_buf s_jmpNoCPUID; static jmp_buf s_jmpNoCPUID;
static void SigIllHandlerCPUID(int) static void SigIllHandlerCPUID(int)
{ {
@ -97,9 +97,13 @@ bool CpuId(word32 input, word32 output[4])
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true; volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID); volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID);
if (oldHandler == SIG_ERR) if (oldHandler == SIG_ERR)
result = false; return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoCPUID)) if (setjmp(s_jmpNoCPUID))
result = false; result = false;
@ -119,6 +123,7 @@ bool CpuId(word32 input, word32 output[4])
); );
} }
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
signal(SIGILL, oldHandler); signal(SIGILL, oldHandler);
return result; return result;
#endif #endif
@ -151,12 +156,16 @@ static bool TrySSE2()
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true; volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2); volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2);
if (oldHandler == SIG_ERR) if (oldHandler == SIG_ERR)
return false; return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoSSE2)) if (setjmp(s_jmpNoSSE2))
result = true; result = false;
else else
{ {
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
@ -167,6 +176,7 @@ static bool TrySSE2()
#endif #endif
} }
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
signal(SIGILL, oldHandler); signal(SIGILL, oldHandler);
return result; return result;
#endif #endif
@ -298,16 +308,11 @@ void DetectX86Features()
*((volatile bool*)&g_x86DetectionDone) = true; *((volatile bool*)&g_x86DetectionDone) = true;
} }
// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
// http://stackoverflow.com/questions/26701262/how-to-check-the-existence-of-neon-on-arm
#elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
bool g_ArmDetectionDone = false; // The ARM equivalent of CPUID probing is reading a MSR. The code requires Exception Level 1 (EL1) and above, but user space runs at EL0.
bool g_hasNEON = false, g_hasCRC32 = false, g_hasAES = false, g_hasSHA1 = false, g_hasSHA2 = false; // Attempting to run the code results in a SIGILL and termination.
//
word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
// The ARM equivalent of CPUID is reading a MSR. For example, fetch crypto capabilities with:
// #if defined(__arm64__) || defined(__aarch64__) // #if defined(__arm64__) || defined(__aarch64__)
// word64 caps = 0; // Read ID_AA64ISAR0_EL1 // word64 caps = 0; // Read ID_AA64ISAR0_EL1
// __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps)); // __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps));
@ -315,12 +320,18 @@ word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
// word32 caps = 0; // Read ID_ISAR5_EL1 // word32 caps = 0; // Read ID_ISAR5_EL1
// __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps)); // __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps));
// #endif // #endif
// The code requires Exception Level 1 (EL1) and above, but user space runs at EL0. //
// Attempting to run the code results in a SIGILL and termination. // The following does not work well either. Its appears to be missing constants, and it does not detect Aarch32 execution environments on Aarch64
// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
//
bool g_ArmDetectionDone = false;
bool g_hasNEON = false, g_hasCRC32 = false, g_hasAES = false, g_hasSHA1 = false, g_hasSHA2 = false;
word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
extern "C" { extern "C"
{
static jmp_buf s_jmpNoNEON; static jmp_buf s_jmpNoNEON;
static void SigIllHandlerNEON(int) static void SigIllHandlerNEON(int)
{ {
@ -357,51 +368,62 @@ static bool TryNEON()
{ {
#if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) #if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try __try
{ {
static const uint32_t v1[4] = {1,1,1,1}; uint32_t v1[4] = {1,1,1,1};
uint32x4_t x1 = vld1q_u32(v1); uint32x4_t x1 = vld1q_u32(v1);
static const uint64_t v2[2] = {1,1}; uint64_t v2[2] = {1,1};
uint64x2_t x2 = vld1q_u64(v2); uint64x2_t x2 = vld1q_u64(v2);
uint32x4_t x3 = vdupq_n_u32(0); uint32x4_t x3 = vdupq_n_u32(2);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0); x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3); x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
uint64x2_t x4 = vdupq_n_u64(0); uint64x2_t x4 = vdupq_n_u64(2);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0); x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1); x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
} }
__except (EXCEPTION_EXECUTE_HANDLER) __except (EXCEPTION_EXECUTE_HANDLER)
{ {
return false; return false;
} }
return true; return result;
# else # else
// longjmp and clobber warnings. Volatile is required. // longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true; volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON); volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON);
if (oldHandler == SIG_ERR) if (oldHandler == SIG_ERR)
result = false; return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoNEON)) if (setjmp(s_jmpNoNEON))
result = false; result = false;
else else
{ {
static const uint32_t v1[4] = {1,1,1,1}; uint32_t v1[4] = {1,1,1,1};
uint32x4_t x1 = vld1q_u32(v1); uint32x4_t x1 = vld1q_u32(v1);
static const uint64_t v2[2] = {1,1}; uint64_t v2[2] = {1,1};
uint64x2_t x2 = vld1q_u64(v2); uint64x2_t x2 = vld1q_u64(v2);
uint32x4_t x3 = vdupq_n_u32(0); uint32x4_t x3 = {0,0,0,0};
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0); x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3); x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
uint64x2_t x4 = vdupq_n_u64(0); uint64x2_t x4 = {0,0};
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0); x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1); x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
// Hack... GCC optimizes away the code and returns true
result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
} }
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
signal(SIGILL, oldHandler); signal(SIGILL, oldHandler);
return result; return result;
# endif # endif
@ -414,37 +436,48 @@ static bool TryCRC32()
{ {
#if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE) #if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try __try
{ {
word32 w=0, x=0; word16 y=0; byte z=0; word32 w=0, x=1; word16 y=2; byte z=3;
w = __crc32cw(w,x); w = __crc32cw(w,x);
w = __crc32ch(w,y); w = __crc32ch(w,y);
w = __crc32cb(w,z); w = __crc32cb(w,z);
result = !!w;
} }
__except (EXCEPTION_EXECUTE_HANDLER) __except (EXCEPTION_EXECUTE_HANDLER)
{ {
return false; return false;
} }
return true; return result;
# else # else
// longjmp and clobber warnings. Volatile is required. // longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true; volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32); volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
if (oldHandler == SIG_ERR) if (oldHandler == SIG_ERR)
result = false; return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoCRC32)) if (setjmp(s_jmpNoCRC32))
result = false; result = false;
else else
{ {
word32 w=0, x=0; word16 y=0; byte z=0; word32 w=0, x=1; word16 y=2; byte z=3;
w = __crc32cw(w,x); w = __crc32cw(w,x);
w = __crc32ch(w,y); w = __crc32ch(w,y);
w = __crc32cb(w,z); w = __crc32cb(w,z);
// Hack... GCC optimizes away the code and returns true
result = !!w;
} }
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
signal(SIGILL, oldHandler); signal(SIGILL, oldHandler);
return result; return result;
# endif # endif
@ -457,38 +490,47 @@ static bool TryAES()
{ {
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try __try
{ {
// AES encrypt and decrypt // AES encrypt and decrypt
static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
uint8x16_t r1 = vaeseq_u8(data, key); uint8x16_t r1 = vaeseq_u8(data, key);
uint8x16_t r2 = vaesdq_u8(data, key); uint8x16_t r2 = vaesdq_u8(data, key);
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2);
result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
} }
__except (EXCEPTION_EXECUTE_HANDLER) __except (EXCEPTION_EXECUTE_HANDLER)
{ {
return false; return false;
} }
return true; return result;
# else # else
// longjmp and clobber warnings. Volatile is required. // longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true; volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES); volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES);
if (oldHandler == SIG_ERR) if (oldHandler == SIG_ERR)
result = false; return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoAES)) if (setjmp(s_jmpNoAES))
result = false; result = false;
else else
{ {
static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
uint8x16_t r1 = vaeseq_u8(data, key); uint8x16_t r1 = vaeseq_u8(data, key);
uint8x16_t r2 = vaesdq_u8(data, key); uint8x16_t r2 = vaesdq_u8(data, key);
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2);
// Hack... GCC optimizes away the code and returns true
result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
} }
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
signal(SIGILL, oldHandler); signal(SIGILL, oldHandler);
return result; return result;
# endif # endif
@ -501,43 +543,54 @@ static bool TrySHA1()
{ {
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try __try
{ {
static const uint32x4_t data = vdupq_n_u32(0); uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
static const uint32_t hash = 0x0;
uint32x4_t r1 = vsha1cq_u32 (data, hash, data); uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
uint32x4_t r2 = vsha1mq_u32 (data, hash, data); uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
uint32x4_t r3 = vsha1pq_u32 (data, hash, data); uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3); uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
} }
__except (EXCEPTION_EXECUTE_HANDLER) __except (EXCEPTION_EXECUTE_HANDLER)
{ {
return false; return false;
} }
return true; return result;
# else # else
// longjmp and clobber warnings. Volatile is required. // longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true; volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1); volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1);
if (oldHandler == SIG_ERR) if (oldHandler == SIG_ERR)
result = false; return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoSHA1)) if (setjmp(s_jmpNoSHA1))
result = false; result = false;
else else
{ {
static const uint32x4_t data = vdupq_n_u32(0); uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
static const uint32_t hash = 0x0;
uint32x4_t r1 = vsha1cq_u32 (data, hash, data); uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
uint32x4_t r2 = vsha1mq_u32 (data, hash, data); uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
uint32x4_t r3 = vsha1pq_u32 (data, hash, data); uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3); uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
// Hack... GCC optimizes away the code and returns true
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
} }
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
signal(SIGILL, oldHandler); signal(SIGILL, oldHandler);
return result; return result;
# endif # endif
@ -550,45 +603,52 @@ static bool TrySHA2()
{ {
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try __try
{ {
static const uint32x4_t data = vdupq_n_u32(0); uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
static const uint32x4_t hash = vdupq_n_u32(0);
uint32x4_t r1 = vsha256hq_u32 (hash, hash, data); uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
uint32x4_t r2 = vsha256h2q_u32 (hash, hash, data); uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
uint32x4_t r3 = vsha256su0q_u32 (data, data); uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
uint32x4_t r4 = vsha256su1q_u32 (data, data, data); uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3), CRYPTOPP_UNUSED(r4);
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
} }
__except (EXCEPTION_EXECUTE_HANDLER) __except (EXCEPTION_EXECUTE_HANDLER)
{ {
return false; return false;
} }
return true; return result;
# else # else
// longjmp and clobber warnings. Volatile is required. // longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true; volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2); volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2);
if (oldHandler == SIG_ERR) if (oldHandler == SIG_ERR)
result = false; return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoSHA2)) if (setjmp(s_jmpNoSHA2))
result = false; result = false;
else else
{ {
static const uint32x4_t data = vdupq_n_u32(0); uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
static const uint32x4_t hash = vdupq_n_u32(0);
uint32x4_t r1 = vsha256hq_u32 (hash, hash, data); uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
uint32x4_t r2 = vsha256h2q_u32 (hash, hash, data); uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
uint32x4_t r3 = vsha256su0q_u32 (data, data); uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
uint32x4_t r4 = vsha256su1q_u32 (data, data, data); uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3), CRYPTOPP_UNUSED(r4);
// Hack... GCC optimizes away the code and returns true
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
} }
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
signal(SIGILL, oldHandler); signal(SIGILL, oldHandler);
return result; return result;
# endif # endif

View File

@ -46,7 +46,8 @@ IS_NETBSD=$(uname -s | grep -i -c netbsd)
IS_X86=$(uname -m | egrep -i -c "(i386|i586|i686|amd64|x86_64)") IS_X86=$(uname -m | egrep -i -c "(i386|i586|i686|amd64|x86_64)")
IS_X64=$(uname -m | egrep -i -c "(amd64|x86_64)") IS_X64=$(uname -m | egrep -i -c "(amd64|x86_64)")
IS_PPC=$(uname -m | egrep -i -c "(Power|PPC)") IS_PPC=$(uname -m | egrep -i -c "(Power|PPC)")
IS_ARM=$(uname -m | egrep -i -c "arm") IS_ARM32=$(uname -m | egrep -i -c "arm|aarch32")
IS_ARM64=$(uname -m | egrep -i -c "arm64|aarch64")
# We need to use the C++ compiler to determine if c++11 is available. Otherwise # We need to use the C++ compiler to determine if c++11 is available. Otherwise
# a mis-detection occurs on Mac OS X 10.9 and above. Below, we use the same # a mis-detection occurs on Mac OS X 10.9 and above. Below, we use the same
@ -137,7 +138,7 @@ fi
# Set to 0 if you don't have PPC multiarch # Set to 0 if you don't have PPC multiarch
HAVE_PPC_MULTIARCH=0 HAVE_PPC_MULTIARCH=0
if [ "$IS_DARWIN" -ne "0" ] && [ "$IS_PPC" -ne "0" ]; then if [ "$IS_DARWIN" -ne "0" ] && [ "$IS_PPC" -ne "0" ]; then
$CXX -x -DCRYPTOPP_ADHOC_MAIN c++ -arch ppc -arch ppc64 adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1 $CXX -x c++ -DCRYPTOPP_ADHOC_MAIN -arch ppc -arch ppc64 adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1
if [ "$?" -eq "0" ]; then if [ "$?" -eq "0" ]; then
HAVE_PPC_MULTIARCH=1 HAVE_PPC_MULTIARCH=1
fi fi
@ -145,12 +146,26 @@ fi
HAVE_X32=0 HAVE_X32=0
if [ "$IS_X64" -ne "0" ]; then if [ "$IS_X64" -ne "0" ]; then
$CXX -x -DCRYPTOPP_ADHOC_MAIN c++ -mx32 adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1 $CXX -x c++ -DCRYPTOPP_ADHOC_MAIN -mx32 adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1
if [ "$?" -eq "0" ]; then if [ "$?" -eq "0" ]; then
HAVE_X32=1 HAVE_X32=1
fi fi
fi fi
# Set to 0 if you don't have ARMv8
HAVE_ARM_CRC=0
HAVE_ARM_CRYPTO=0
if [ "$IS_ARM32" -ne "0" ] || [ "$IS_ARM64" -ne "0" ]; then
$CXX -x c++ -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crc adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1
if [ "$?" -eq "0" ]; then
HAVE_ARM_CRC=1
fi
$CXX -x c++ -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1
if [ "$?" -eq "0" ]; then
HAVE_ARM_CRYPTO=1
fi
fi
# Set to 0 if you don't have Valgrind. Valgrind tests take a long time... # Set to 0 if you don't have Valgrind. Valgrind tests take a long time...
HAVE_VALGRIND=$(which valgrind 2>&1 | grep -v "no valgrind" | grep -i -c valgrind) HAVE_VALGRIND=$(which valgrind 2>&1 | grep -v "no valgrind" | grep -i -c valgrind)
@ -165,16 +180,16 @@ echo "HAVE_UBSAN: $HAVE_UBSAN" | tee -a "$TEST_RESULTS"
if [ "$HAVE_VALGRIND" -ne "0" ]; then if [ "$HAVE_VALGRIND" -ne "0" ]; then
echo "HAVE_VALGRIND: $HAVE_VALGRIND" | tee -a "$TEST_RESULTS" echo "HAVE_VALGRIND: $HAVE_VALGRIND" | tee -a "$TEST_RESULTS"
fi fi
if [ "$IS_DARWIN" -ne "0" ]; then
echo "IS_DARWIN: $IS_DARWIN" | tee -a "$TEST_RESULTS"
unset MallocScribble MallocPreScribble MallocGuardEdges
fi
if [ "$HAVE_INTEL_MULTIARCH" -ne "0" ]; then if [ "$HAVE_INTEL_MULTIARCH" -ne "0" ]; then
echo "HAVE_INTEL_MULTIARCH: $HAVE_INTEL_MULTIARCH" | tee -a "$TEST_RESULTS" echo "HAVE_INTEL_MULTIARCH: $HAVE_INTEL_MULTIARCH" | tee -a "$TEST_RESULTS"
fi fi
if [ "$HAVE_PPC_MULTIARCH" -ne "0" ]; then if [ "$HAVE_PPC_MULTIARCH" -ne "0" ]; then
echo "HAVE_PPC_MULTIARCH: $HAVE_PPC_MULTIARCH" | tee -a "$TEST_RESULTS" echo "HAVE_PPC_MULTIARCH: $HAVE_PPC_MULTIARCH" | tee -a "$TEST_RESULTS"
fi fi
if [ "$IS_DARWIN" -ne "0" ]; then
echo "IS_DARWIN: $IS_DARWIN" | tee -a "$TEST_RESULTS"
unset MallocScribble MallocPreScribble MallocGuardEdges
fi
if [ "$IS_LINUX" -ne "0" ]; then if [ "$IS_LINUX" -ne "0" ]; then
echo "IS_LINUX: $IS_LINUX" | tee -a "$TEST_RESULTS" echo "IS_LINUX: $IS_LINUX" | tee -a "$TEST_RESULTS"
fi fi
@ -184,6 +199,16 @@ fi
if [ "$IS_MINGW" -ne "0" ]; then if [ "$IS_MINGW" -ne "0" ]; then
echo "IS_MINGW: $IS_MINGW" | tee -a "$TEST_RESULTS" echo "IS_MINGW: $IS_MINGW" | tee -a "$TEST_RESULTS"
fi fi
if [ "$IS_ARM64" -ne "0" ]; then
echo "IS_ARM64: $IS_ARM64" | tee -a "$TEST_RESULTS"
elif [ "$IS_ARM32" -ne "0" ]; then
echo "IS_ARM32: $IS_ARM32" | tee -a "$TEST_RESULTS"
fi
if [ "$IS_X64" -ne "0" ]; then
echo "IS_X64: $IS_X64" | tee -a "$TEST_RESULTS"
elif [ "$IS_X86" -ne "0" ]; then
echo "IS_X86: $IS_X86" | tee -a "$TEST_RESULTS"
fi
############################################ ############################################
@ -1615,6 +1640,64 @@ if [ "$IS_DARWIN" -ne "0" ]; then
fi fi
fi fi
############################################
# ARM CRC32
if [ "$HAVE_ARM_CRC" -ne "0" ]; then
echo
echo "************************************" | tee -a "$TEST_RESULTS"
echo "Testing: ARM CRC32" | tee -a "$TEST_RESULTS"
echo
unset CXXFLAGS
"$MAKE" clean > /dev/null 2>&1
rm -f adhoc.cpp > /dev/null 2>&1
export CXXFLAGS="-DNDEBUG -g2 -O2 -march=armv8-a+crc $ADD_CXXFLAGS"
"$MAKE" "${MAKEARGS[@]}" static cryptest.exe 2>&1 | tee -a "$TEST_RESULTS"
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to make cryptest.exe" | tee -a "$TEST_RESULTS"
else
./cryptest.exe v 2>&1 | tee -a "$TEST_RESULTS"
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to execute validation suite" | tee -a "$TEST_RESULTS"
fi
./cryptest.exe tv all 2>&1 | tee -a "$TEST_RESULTS"
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to execute test vectors" | tee -a "$TEST_RESULTS"
fi
fi
fi
############################################
# ARM Crypto
if [ "$HAVE_ARM_CRYPTO" -ne "0" ]; then
echo
echo "************************************" | tee -a "$TEST_RESULTS"
echo "Testing: ARM Crypto" | tee -a "$TEST_RESULTS"
echo
unset CXXFLAGS
"$MAKE" clean > /dev/null 2>&1
rm -f adhoc.cpp > /dev/null 2>&1
export CXXFLAGS="-DNDEBUG -g2 -O2 -march=armv8-a+crypto $ADD_CXXFLAGS"
"$MAKE" "${MAKEARGS[@]}" static cryptest.exe 2>&1 | tee -a "$TEST_RESULTS"
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to make cryptest.exe" | tee -a "$TEST_RESULTS"
else
./cryptest.exe v 2>&1 | tee -a "$TEST_RESULTS"
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to execute validation suite" | tee -a "$TEST_RESULTS"
fi
./cryptest.exe tv all 2>&1 | tee -a "$TEST_RESULTS"
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to execute test vectors" | tee -a "$TEST_RESULTS"
fi
fi
fi
############################################ ############################################
# Benchmarks, c++03 # Benchmarks, c++03
if [ "$HAVE_CXX03" -ne "0" ]; then if [ "$HAVE_CXX03" -ne "0" ]; then
@ -1631,7 +1714,7 @@ if [ "$HAVE_CXX03" -ne "0" ]; then
if [ "${PIPESTATUS[0]}" -ne "0" ]; then if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to make cryptest.exe" | tee -a "$TEST_RESULTS" echo "ERROR: failed to make cryptest.exe" | tee -a "$TEST_RESULTS"
else else
./cryptest.exe b 1 "$CPU_FREQ" 2>&1 | tee -a "$BENCHMARK_RESULTS" ./cryptest.exe b 3 "$CPU_FREQ" 2>&1 | tee -a "$BENCHMARK_RESULTS"
if [ "${PIPESTATUS[0]}" -ne "0" ]; then if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to execute benchmarks" | tee -a "$BENCHMARK_RESULTS" echo "ERROR: failed to execute benchmarks" | tee -a "$BENCHMARK_RESULTS"
fi fi
@ -1656,7 +1739,7 @@ if [ "$HAVE_CXX11" -ne "0" ]; then
if [ "${PIPESTATUS[0]}" -ne "0" ]; then if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to make cryptest.exe" | tee -a "$TEST_RESULTS" echo "ERROR: failed to make cryptest.exe" | tee -a "$TEST_RESULTS"
else else
./cryptest.exe b 1 "$CPU_FREQ" 2>&1 | tee -a "$BENCHMARK_RESULTS" ./cryptest.exe b 3 "$CPU_FREQ" 2>&1 | tee -a "$BENCHMARK_RESULTS"
if [ "${PIPESTATUS[0]}" -ne "0" ]; then if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to execute benchmarks" | tee -a "$BENCHMARK_RESULTS" echo "ERROR: failed to execute benchmarks" | tee -a "$BENCHMARK_RESULTS"
fi fi