From 1ee66a8fc207b8df8f13a04a159f3c55394216af Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Wed, 1 Jun 2016 03:10:18 -0400 Subject: [PATCH] Merge arm-neon into master --- blake2.cpp | 4 +- config.h | 15 +--- config.recommend | 15 +--- cpu.cpp | 230 +++++++++++++++++++++++++++++------------------ cryptest.sh | 119 ++++++++++++++++++++---- 5 files changed, 250 insertions(+), 133 deletions(-) diff --git a/blake2.cpp b/blake2.cpp index a18de170..96b5e73b 100644 --- a/blake2.cpp +++ b/blake2.cpp @@ -496,7 +496,6 @@ void BLAKE2_CXX_Compress64(const byte* input, BLAKE2_State& state) } while(0) word64 m[16], v[16]; - unsigned int i; GetBlock get1(input); get1(m[0])(m[1])(m[2])(m[3])(m[4])(m[5])(m[6])(m[7])(m[8])(m[9])(m[10])(m[11])(m[12])(m[13])(m[14])(m[15]); @@ -3441,6 +3440,7 @@ static const int LANE_L64 = 0; static void BLAKE2_NEON_Compress32(const byte* input, BLAKE2_State& state) { + //assert(IsAlignedOn(input,GetAlignmentOf())); assert(IsAlignedOn(&state.h[0],GetAlignmentOf())); assert(IsAlignedOn(&state.h[4],GetAlignmentOf())); assert(IsAlignedOn(&state.t[0],GetAlignmentOf())); @@ -3946,7 +3946,7 @@ static void BLAKE2_NEON_Compress32(const byte* input, BLAKE2_State& state) { - assert(IsAlignedOn(input,GetAlignmentOf())); + //assert(IsAlignedOn(input,GetAlignmentOf())); assert(IsAlignedOn(&state.h[0],GetAlignmentOf())); assert(IsAlignedOn(&state.h[4],GetAlignmentOf())); assert(IsAlignedOn(&state.t[0],GetAlignmentOf())); diff --git a/config.h b/config.h index 3716c890..2c281e38 100644 --- a/config.h +++ b/config.h @@ -759,20 +759,7 @@ NAMESPACE_END # define CRYPTOPP_CXX11_ATOMICS 1 #endif // atomics -// atomics: MS at VS2012 (17.00); GCC at 4.4; Clang at 3.1/3.2; and Intel 13.0. -#if (CRYPTOPP_MSC_VERSION >= 1700) -# define CRYPTOPP_CXX11_ATOMICS 1 -#elif defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300) -# define CRYPTOPP_CXX11_ATOMICS 1 -#elif defined(__clang__) -# if __has_feature(cxx_atomic) -# define CRYPTOPP_CXX11_ATOMICS 1 -# endif -#elif (CRYPTOPP_GCC_VERSION >= 40400) -# define CRYPTOPP_CXX11_ATOMICS 1 -#endif // atomics - -// alignof/alignas: MS at VS2013 (19.00); GCC at 4.8; Clang at 3.3; and Intel 15.0. +// alignof/alignas: MS at VS2015 (19.00); GCC at 4.8; Clang at 3.3; and Intel 15.0. #if (CRYPTOPP_MSC_VERSION >= 1900) # define CRYPTOPP_CXX11_ALIGNAS 1 # define CRYPTOPP_CXX11_ALIGNOF 1 diff --git a/config.recommend b/config.recommend index 44ff61df..d5f898ba 100644 --- a/config.recommend +++ b/config.recommend @@ -757,20 +757,7 @@ NAMESPACE_END # define CRYPTOPP_CXX11_ATOMICS 1 #endif // atomics -// atomics: MS at VS2012 (17.00); GCC at 4.4; Clang at 3.1/3.2; and Intel 13.0. -#if (CRYPTOPP_MSC_VERSION >= 1700) -# define CRYPTOPP_CXX11_ATOMICS 1 -#elif defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300) -# define CRYPTOPP_CXX11_ATOMICS 1 -#elif defined(__clang__) -# if __has_feature(cxx_atomic) -# define CRYPTOPP_CXX11_ATOMICS 1 -# endif -#elif (CRYPTOPP_GCC_VERSION >= 40400) -# define CRYPTOPP_CXX11_ATOMICS 1 -#endif // atomics - -// alignof/alignas: MS at VS2013 (19.00); GCC at 4.8; Clang at 3.3; and Intel 15.0. +// alignof/alignas: MS at VS2015 (19.00); GCC at 4.8; Clang at 3.3; and Intel 15.0. #if (CRYPTOPP_MSC_VERSION >= 1900) # define CRYPTOPP_CXX11_ALIGNAS 1 # define CRYPTOPP_CXX11_ALIGNOF 1 diff --git a/cpu.cpp b/cpu.cpp index d8f7d3c5..7c39cccd 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -48,19 +48,19 @@ bool CpuId(word32 input, word32 output[4]) #else #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY -extern "C" { - -static jmp_buf s_jmpNoCPUID; -static void SigIllHandlerCPUID(int) +extern "C" { - longjmp(s_jmpNoCPUID, 1); -} + static jmp_buf s_jmpNoCPUID; + static void SigIllHandlerCPUID(int) + { + longjmp(s_jmpNoCPUID, 1); + } -static jmp_buf s_jmpNoSSE2; -static void SigIllHandlerSSE2(int) -{ - longjmp(s_jmpNoSSE2, 1); -} + static jmp_buf s_jmpNoSSE2; + static void SigIllHandlerSSE2(int) + { + longjmp(s_jmpNoSSE2, 1); + } } #endif @@ -97,9 +97,13 @@ bool CpuId(word32 input, word32 output[4]) // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; - SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID); + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID); if (oldHandler == SIG_ERR) - result = false; + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + return false; if (setjmp(s_jmpNoCPUID)) result = false; @@ -119,6 +123,7 @@ bool CpuId(word32 input, word32 output[4]) ); } + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); signal(SIGILL, oldHandler); return result; #endif @@ -151,12 +156,16 @@ static bool TrySSE2() // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; - SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2); + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2); if (oldHandler == SIG_ERR) return false; + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + return false; + if (setjmp(s_jmpNoSSE2)) - result = true; + result = false; else { #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE @@ -167,6 +176,7 @@ static bool TrySSE2() #endif } + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); signal(SIGILL, oldHandler); return result; #endif @@ -298,29 +308,30 @@ void DetectX86Features() *((volatile bool*)&g_x86DetectionDone) = true; } -// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu -// http://stackoverflow.com/questions/26701262/how-to-check-the-existence-of-neon-on-arm #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) +// The ARM equivalent of CPUID probing is reading a MSR. The code requires Exception Level 1 (EL1) and above, but user space runs at EL0. +// Attempting to run the code results in a SIGILL and termination. +// +// #if defined(__arm64__) || defined(__aarch64__) +// word64 caps = 0; // Read ID_AA64ISAR0_EL1 +// __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps)); +// #elif defined(__arm__) || defined(__aarch32__) +// word32 caps = 0; // Read ID_ISAR5_EL1 +// __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps)); +// #endif +// +// The following does not work well either. Its appears to be missing constants, and it does not detect Aarch32 execution environments on Aarch64 +// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu +// bool g_ArmDetectionDone = false; bool g_hasNEON = false, g_hasCRC32 = false, g_hasAES = false, g_hasSHA1 = false, g_hasSHA2 = false; word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; -// The ARM equivalent of CPUID is reading a MSR. For example, fetch crypto capabilities with: -// #if defined(__arm64__) || defined(__aarch64__) -// word64 caps = 0; // Read ID_AA64ISAR0_EL1 -// __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps)); -// #elif defined(__arm__) || defined(__aarch32__) -// word32 caps = 0; // Read ID_ISAR5_EL1 -// __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps)); -// #endif -// The code requires Exception Level 1 (EL1) and above, but user space runs at EL0. -// Attempting to run the code results in a SIGILL and termination. - #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY -extern "C" { - +extern "C" +{ static jmp_buf s_jmpNoNEON; static void SigIllHandlerNEON(int) { @@ -357,51 +368,62 @@ static bool TryNEON() { #if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + volatile bool result = true; __try { - static const uint32_t v1[4] = {1,1,1,1}; + uint32_t v1[4] = {1,1,1,1}; uint32x4_t x1 = vld1q_u32(v1); - static const uint64_t v2[2] = {1,1}; + uint64_t v2[2] = {1,1}; uint64x2_t x2 = vld1q_u64(v2); - uint32x4_t x3 = vdupq_n_u32(0); + uint32x4_t x3 = vdupq_n_u32(2); x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0); x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3); - uint64x2_t x4 = vdupq_n_u64(0); + uint64x2_t x4 = vdupq_n_u64(2); x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0); x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1); + + result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1)); } __except (EXCEPTION_EXECUTE_HANDLER) { return false; } - return true; + return result; # else // longjmp and clobber warnings. Volatile is required. // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; - SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON); + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON); if (oldHandler == SIG_ERR) - result = false; + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + return false; if (setjmp(s_jmpNoNEON)) result = false; else { - static const uint32_t v1[4] = {1,1,1,1}; + uint32_t v1[4] = {1,1,1,1}; uint32x4_t x1 = vld1q_u32(v1); - static const uint64_t v2[2] = {1,1}; + uint64_t v2[2] = {1,1}; uint64x2_t x2 = vld1q_u64(v2); - uint32x4_t x3 = vdupq_n_u32(0); + uint32x4_t x3 = {0,0,0,0}; x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0); x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3); - uint64x2_t x4 = vdupq_n_u64(0); + uint64x2_t x4 = {0,0}; x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0); x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1); + + // Hack... GCC optimizes away the code and returns true + result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1)); } + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); signal(SIGILL, oldHandler); return result; # endif @@ -414,37 +436,48 @@ static bool TryCRC32() { #if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + volatile bool result = true; __try { - word32 w=0, x=0; word16 y=0; byte z=0; + word32 w=0, x=1; word16 y=2; byte z=3; w = __crc32cw(w,x); w = __crc32ch(w,y); w = __crc32cb(w,z); + + result = !!w; } __except (EXCEPTION_EXECUTE_HANDLER) { return false; } - return true; + return result; # else // longjmp and clobber warnings. Volatile is required. // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; - SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32); + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32); if (oldHandler == SIG_ERR) - result = false; + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + return false; if (setjmp(s_jmpNoCRC32)) result = false; else { - word32 w=0, x=0; word16 y=0; byte z=0; + word32 w=0, x=1; word16 y=2; byte z=3; w = __crc32cw(w,x); w = __crc32ch(w,y); w = __crc32cb(w,z); + + // Hack... GCC optimizes away the code and returns true + result = !!w; } + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); signal(SIGILL, oldHandler); return result; # endif @@ -457,38 +490,47 @@ static bool TryAES() { #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + volatile bool result = true; __try { // AES encrypt and decrypt - static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); + uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); uint8x16_t r1 = vaeseq_u8(data, key); uint8x16_t r2 = vaesdq_u8(data, key); - CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2); + + result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7)); } __except (EXCEPTION_EXECUTE_HANDLER) { return false; } - return true; + return result; # else // longjmp and clobber warnings. Volatile is required. // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; - SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES); + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES); if (oldHandler == SIG_ERR) - result = false; + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + return false; if (setjmp(s_jmpNoAES)) result = false; else { - static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); + uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); uint8x16_t r1 = vaeseq_u8(data, key); uint8x16_t r2 = vaesdq_u8(data, key); - CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2); + + // Hack... GCC optimizes away the code and returns true + result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7)); } + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); signal(SIGILL, oldHandler); return result; # endif @@ -501,43 +543,54 @@ static bool TrySHA1() { #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + volatile bool result = true; __try { - static const uint32x4_t data = vdupq_n_u32(0); - static const uint32_t hash = 0x0; + uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12}; - uint32x4_t r1 = vsha1cq_u32 (data, hash, data); - uint32x4_t r2 = vsha1mq_u32 (data, hash, data); - uint32x4_t r3 = vsha1pq_u32 (data, hash, data); - CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3); + uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2); + uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2); + uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2); + uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3); + uint32x4_t r5 = vsha1su1q_u32 (data1, data2); + + result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0)); } __except (EXCEPTION_EXECUTE_HANDLER) { return false; } - return true; + return result; # else // longjmp and clobber warnings. Volatile is required. // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; - SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1); + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1); if (oldHandler == SIG_ERR) - result = false; + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + return false; if (setjmp(s_jmpNoSHA1)) result = false; else { - static const uint32x4_t data = vdupq_n_u32(0); - static const uint32_t hash = 0x0; + uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12}; - uint32x4_t r1 = vsha1cq_u32 (data, hash, data); - uint32x4_t r2 = vsha1mq_u32 (data, hash, data); - uint32x4_t r3 = vsha1pq_u32 (data, hash, data); - CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3); + uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2); + uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2); + uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2); + uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3); + uint32x4_t r5 = vsha1su1q_u32 (data1, data2); + + // Hack... GCC optimizes away the code and returns true + result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0)); } + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); signal(SIGILL, oldHandler); return result; # endif @@ -550,45 +603,52 @@ static bool TrySHA2() { #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + volatile bool result = true; __try { - static const uint32x4_t data = vdupq_n_u32(0); - static const uint32x4_t hash = vdupq_n_u32(0); + uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12}; - uint32x4_t r1 = vsha256hq_u32 (hash, hash, data); - uint32x4_t r2 = vsha256h2q_u32 (hash, hash, data); - uint32x4_t r3 = vsha256su0q_u32 (data, data); - uint32x4_t r4 = vsha256su1q_u32 (data, data, data); - CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3), CRYPTOPP_UNUSED(r4); + uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3); + uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3); + uint32x4_t r3 = vsha256su0q_u32 (data1, data2); + uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3); + + result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3)); } __except (EXCEPTION_EXECUTE_HANDLER) { return false; } - return true; + return result; # else // longjmp and clobber warnings. Volatile is required. // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; - SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2); + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2); if (oldHandler == SIG_ERR) - result = false; + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) + return false; if (setjmp(s_jmpNoSHA2)) result = false; else { - static const uint32x4_t data = vdupq_n_u32(0); - static const uint32x4_t hash = vdupq_n_u32(0); + uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12}; - uint32x4_t r1 = vsha256hq_u32 (hash, hash, data); - uint32x4_t r2 = vsha256h2q_u32 (hash, hash, data); - uint32x4_t r3 = vsha256su0q_u32 (data, data); - uint32x4_t r4 = vsha256su1q_u32 (data, data, data); - CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3), CRYPTOPP_UNUSED(r4); + uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3); + uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3); + uint32x4_t r3 = vsha256su0q_u32 (data1, data2); + uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3); + + // Hack... GCC optimizes away the code and returns true + result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3)); } + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); signal(SIGILL, oldHandler); return result; # endif diff --git a/cryptest.sh b/cryptest.sh index 5c38d03f..e34a6db8 100755 --- a/cryptest.sh +++ b/cryptest.sh @@ -46,7 +46,8 @@ IS_NETBSD=$(uname -s | grep -i -c netbsd) IS_X86=$(uname -m | egrep -i -c "(i386|i586|i686|amd64|x86_64)") IS_X64=$(uname -m | egrep -i -c "(amd64|x86_64)") IS_PPC=$(uname -m | egrep -i -c "(Power|PPC)") -IS_ARM=$(uname -m | egrep -i -c "arm") +IS_ARM32=$(uname -m | egrep -i -c "arm|aarch32") +IS_ARM64=$(uname -m | egrep -i -c "arm64|aarch64") # We need to use the C++ compiler to determine if c++11 is available. Otherwise # a mis-detection occurs on Mac OS X 10.9 and above. Below, we use the same @@ -128,27 +129,41 @@ fi # Set to 0 if you don't have Intel multiarch HAVE_INTEL_MULTIARCH=0 if [ "$IS_DARWIN" -ne "0" ] && [ "$IS_X86" -ne "0" ]; then -$CXX -x c++ -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1 -if [ "$?" -eq "0" ]; then - HAVE_INTEL_MULTIARCH=1 -fi + $CXX -x c++ -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1 + if [ "$?" -eq "0" ]; then + HAVE_INTEL_MULTIARCH=1 + fi fi # Set to 0 if you don't have PPC multiarch HAVE_PPC_MULTIARCH=0 if [ "$IS_DARWIN" -ne "0" ] && [ "$IS_PPC" -ne "0" ]; then -$CXX -x -DCRYPTOPP_ADHOC_MAIN c++ -arch ppc -arch ppc64 adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1 -if [ "$?" -eq "0" ]; then - HAVE_PPC_MULTIARCH=1 -fi + $CXX -x c++ -DCRYPTOPP_ADHOC_MAIN -arch ppc -arch ppc64 adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1 + if [ "$?" -eq "0" ]; then + HAVE_PPC_MULTIARCH=1 + fi fi HAVE_X32=0 if [ "$IS_X64" -ne "0" ]; then -$CXX -x -DCRYPTOPP_ADHOC_MAIN c++ -mx32 adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1 -if [ "$?" -eq "0" ]; then - HAVE_X32=1 + $CXX -x c++ -DCRYPTOPP_ADHOC_MAIN -mx32 adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1 + if [ "$?" -eq "0" ]; then + HAVE_X32=1 + fi fi + +# Set to 0 if you don't have ARMv8 +HAVE_ARM_CRC=0 +HAVE_ARM_CRYPTO=0 +if [ "$IS_ARM32" -ne "0" ] || [ "$IS_ARM64" -ne "0" ]; then + $CXX -x c++ -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crc adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1 + if [ "$?" -eq "0" ]; then + HAVE_ARM_CRC=1 + fi + $CXX -x c++ -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1 + if [ "$?" -eq "0" ]; then + HAVE_ARM_CRYPTO=1 + fi fi # Set to 0 if you don't have Valgrind. Valgrind tests take a long time... @@ -165,16 +180,16 @@ echo "HAVE_UBSAN: $HAVE_UBSAN" | tee -a "$TEST_RESULTS" if [ "$HAVE_VALGRIND" -ne "0" ]; then echo "HAVE_VALGRIND: $HAVE_VALGRIND" | tee -a "$TEST_RESULTS" fi -if [ "$IS_DARWIN" -ne "0" ]; then - echo "IS_DARWIN: $IS_DARWIN" | tee -a "$TEST_RESULTS" - unset MallocScribble MallocPreScribble MallocGuardEdges -fi if [ "$HAVE_INTEL_MULTIARCH" -ne "0" ]; then echo "HAVE_INTEL_MULTIARCH: $HAVE_INTEL_MULTIARCH" | tee -a "$TEST_RESULTS" fi if [ "$HAVE_PPC_MULTIARCH" -ne "0" ]; then echo "HAVE_PPC_MULTIARCH: $HAVE_PPC_MULTIARCH" | tee -a "$TEST_RESULTS" fi +if [ "$IS_DARWIN" -ne "0" ]; then + echo "IS_DARWIN: $IS_DARWIN" | tee -a "$TEST_RESULTS" + unset MallocScribble MallocPreScribble MallocGuardEdges +fi if [ "$IS_LINUX" -ne "0" ]; then echo "IS_LINUX: $IS_LINUX" | tee -a "$TEST_RESULTS" fi @@ -184,6 +199,16 @@ fi if [ "$IS_MINGW" -ne "0" ]; then echo "IS_MINGW: $IS_MINGW" | tee -a "$TEST_RESULTS" fi +if [ "$IS_ARM64" -ne "0" ]; then + echo "IS_ARM64: $IS_ARM64" | tee -a "$TEST_RESULTS" +elif [ "$IS_ARM32" -ne "0" ]; then + echo "IS_ARM32: $IS_ARM32" | tee -a "$TEST_RESULTS" +fi +if [ "$IS_X64" -ne "0" ]; then + echo "IS_X64: $IS_X64" | tee -a "$TEST_RESULTS" +elif [ "$IS_X86" -ne "0" ]; then + echo "IS_X86: $IS_X86" | tee -a "$TEST_RESULTS" +fi ############################################ @@ -1615,6 +1640,64 @@ if [ "$IS_DARWIN" -ne "0" ]; then fi fi +############################################ +# ARM CRC32 +if [ "$HAVE_ARM_CRC" -ne "0" ]; then + echo + echo "************************************" | tee -a "$TEST_RESULTS" + echo "Testing: ARM CRC32" | tee -a "$TEST_RESULTS" + echo + + unset CXXFLAGS + "$MAKE" clean > /dev/null 2>&1 + rm -f adhoc.cpp > /dev/null 2>&1 + + export CXXFLAGS="-DNDEBUG -g2 -O2 -march=armv8-a+crc $ADD_CXXFLAGS" + "$MAKE" "${MAKEARGS[@]}" static cryptest.exe 2>&1 | tee -a "$TEST_RESULTS" + + if [ "${PIPESTATUS[0]}" -ne "0" ]; then + echo "ERROR: failed to make cryptest.exe" | tee -a "$TEST_RESULTS" + else + ./cryptest.exe v 2>&1 | tee -a "$TEST_RESULTS" + if [ "${PIPESTATUS[0]}" -ne "0" ]; then + echo "ERROR: failed to execute validation suite" | tee -a "$TEST_RESULTS" + fi + ./cryptest.exe tv all 2>&1 | tee -a "$TEST_RESULTS" + if [ "${PIPESTATUS[0]}" -ne "0" ]; then + echo "ERROR: failed to execute test vectors" | tee -a "$TEST_RESULTS" + fi + fi +fi + +############################################ +# ARM Crypto +if [ "$HAVE_ARM_CRYPTO" -ne "0" ]; then + echo + echo "************************************" | tee -a "$TEST_RESULTS" + echo "Testing: ARM Crypto" | tee -a "$TEST_RESULTS" + echo + + unset CXXFLAGS + "$MAKE" clean > /dev/null 2>&1 + rm -f adhoc.cpp > /dev/null 2>&1 + + export CXXFLAGS="-DNDEBUG -g2 -O2 -march=armv8-a+crypto $ADD_CXXFLAGS" + "$MAKE" "${MAKEARGS[@]}" static cryptest.exe 2>&1 | tee -a "$TEST_RESULTS" + + if [ "${PIPESTATUS[0]}" -ne "0" ]; then + echo "ERROR: failed to make cryptest.exe" | tee -a "$TEST_RESULTS" + else + ./cryptest.exe v 2>&1 | tee -a "$TEST_RESULTS" + if [ "${PIPESTATUS[0]}" -ne "0" ]; then + echo "ERROR: failed to execute validation suite" | tee -a "$TEST_RESULTS" + fi + ./cryptest.exe tv all 2>&1 | tee -a "$TEST_RESULTS" + if [ "${PIPESTATUS[0]}" -ne "0" ]; then + echo "ERROR: failed to execute test vectors" | tee -a "$TEST_RESULTS" + fi + fi +fi + ############################################ # Benchmarks, c++03 if [ "$HAVE_CXX03" -ne "0" ]; then @@ -1631,7 +1714,7 @@ if [ "$HAVE_CXX03" -ne "0" ]; then if [ "${PIPESTATUS[0]}" -ne "0" ]; then echo "ERROR: failed to make cryptest.exe" | tee -a "$TEST_RESULTS" else - ./cryptest.exe b 1 "$CPU_FREQ" 2>&1 | tee -a "$BENCHMARK_RESULTS" + ./cryptest.exe b 3 "$CPU_FREQ" 2>&1 | tee -a "$BENCHMARK_RESULTS" if [ "${PIPESTATUS[0]}" -ne "0" ]; then echo "ERROR: failed to execute benchmarks" | tee -a "$BENCHMARK_RESULTS" fi @@ -1656,7 +1739,7 @@ if [ "$HAVE_CXX11" -ne "0" ]; then if [ "${PIPESTATUS[0]}" -ne "0" ]; then echo "ERROR: failed to make cryptest.exe" | tee -a "$TEST_RESULTS" else - ./cryptest.exe b 1 "$CPU_FREQ" 2>&1 | tee -a "$BENCHMARK_RESULTS" + ./cryptest.exe b 3 "$CPU_FREQ" 2>&1 | tee -a "$BENCHMARK_RESULTS" if [ "${PIPESTATUS[0]}" -ne "0" ]; then echo "ERROR: failed to execute benchmarks" | tee -a "$BENCHMARK_RESULTS" fi