Merge arm-neon into master

pull/175/head
Jeffrey Walton 2016-06-01 03:10:18 -04:00
parent 432db09b72
commit 1ee66a8fc2
5 changed files with 250 additions and 133 deletions

View File

@ -496,7 +496,6 @@ void BLAKE2_CXX_Compress64(const byte* input, BLAKE2_State<word64, true>& state)
} while(0)
word64 m[16], v[16];
unsigned int i;
GetBlock<word64, LittleEndian, true> get1(input);
get1(m[0])(m[1])(m[2])(m[3])(m[4])(m[5])(m[6])(m[7])(m[8])(m[9])(m[10])(m[11])(m[12])(m[13])(m[14])(m[15]);
@ -3441,6 +3440,7 @@ static const int LANE_L64 = 0;
static void BLAKE2_NEON_Compress32(const byte* input, BLAKE2_State<word32, false>& state)
{
//assert(IsAlignedOn(input,GetAlignmentOf<uint8_t*>()));
assert(IsAlignedOn(&state.h[0],GetAlignmentOf<uint32x4_t>()));
assert(IsAlignedOn(&state.h[4],GetAlignmentOf<uint32x4_t>()));
assert(IsAlignedOn(&state.t[0],GetAlignmentOf<uint32x4_t>()));
@ -3946,7 +3946,7 @@ static void BLAKE2_NEON_Compress32(const byte* input, BLAKE2_State<word32, false
static void BLAKE2_NEON_Compress64(const byte* input, BLAKE2_State<word64, true>& state)
{
assert(IsAlignedOn(input,GetAlignmentOf<uint8x16_t>()));
//assert(IsAlignedOn(input,GetAlignmentOf<uint8_t*>()));
assert(IsAlignedOn(&state.h[0],GetAlignmentOf<uint64x2_t>()));
assert(IsAlignedOn(&state.h[4],GetAlignmentOf<uint64x2_t>()));
assert(IsAlignedOn(&state.t[0],GetAlignmentOf<uint64x2_t>()));

View File

@ -759,20 +759,7 @@ NAMESPACE_END
# define CRYPTOPP_CXX11_ATOMICS 1
#endif // atomics
// atomics: MS at VS2012 (17.00); GCC at 4.4; Clang at 3.1/3.2; and Intel 13.0.
#if (CRYPTOPP_MSC_VERSION >= 1700)
# define CRYPTOPP_CXX11_ATOMICS 1
#elif defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300)
# define CRYPTOPP_CXX11_ATOMICS 1
#elif defined(__clang__)
# if __has_feature(cxx_atomic)
# define CRYPTOPP_CXX11_ATOMICS 1
# endif
#elif (CRYPTOPP_GCC_VERSION >= 40400)
# define CRYPTOPP_CXX11_ATOMICS 1
#endif // atomics
// alignof/alignas: MS at VS2013 (19.00); GCC at 4.8; Clang at 3.3; and Intel 15.0.
// alignof/alignas: MS at VS2015 (19.00); GCC at 4.8; Clang at 3.3; and Intel 15.0.
#if (CRYPTOPP_MSC_VERSION >= 1900)
# define CRYPTOPP_CXX11_ALIGNAS 1
# define CRYPTOPP_CXX11_ALIGNOF 1

View File

@ -757,20 +757,7 @@ NAMESPACE_END
# define CRYPTOPP_CXX11_ATOMICS 1
#endif // atomics
// atomics: MS at VS2012 (17.00); GCC at 4.4; Clang at 3.1/3.2; and Intel 13.0.
#if (CRYPTOPP_MSC_VERSION >= 1700)
# define CRYPTOPP_CXX11_ATOMICS 1
#elif defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300)
# define CRYPTOPP_CXX11_ATOMICS 1
#elif defined(__clang__)
# if __has_feature(cxx_atomic)
# define CRYPTOPP_CXX11_ATOMICS 1
# endif
#elif (CRYPTOPP_GCC_VERSION >= 40400)
# define CRYPTOPP_CXX11_ATOMICS 1
#endif // atomics
// alignof/alignas: MS at VS2013 (19.00); GCC at 4.8; Clang at 3.3; and Intel 15.0.
// alignof/alignas: MS at VS2015 (19.00); GCC at 4.8; Clang at 3.3; and Intel 15.0.
#if (CRYPTOPP_MSC_VERSION >= 1900)
# define CRYPTOPP_CXX11_ALIGNAS 1
# define CRYPTOPP_CXX11_ALIGNOF 1

220
cpu.cpp
View File

@ -48,19 +48,19 @@ bool CpuId(word32 input, word32 output[4])
#else
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
extern "C" {
static jmp_buf s_jmpNoCPUID;
static void SigIllHandlerCPUID(int)
extern "C"
{
static jmp_buf s_jmpNoCPUID;
static void SigIllHandlerCPUID(int)
{
longjmp(s_jmpNoCPUID, 1);
}
}
static jmp_buf s_jmpNoSSE2;
static void SigIllHandlerSSE2(int)
{
static jmp_buf s_jmpNoSSE2;
static void SigIllHandlerSSE2(int)
{
longjmp(s_jmpNoSSE2, 1);
}
}
}
#endif
@ -97,9 +97,13 @@ bool CpuId(word32 input, word32 output[4])
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID);
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID);
if (oldHandler == SIG_ERR)
result = false;
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoCPUID))
result = false;
@ -119,6 +123,7 @@ bool CpuId(word32 input, word32 output[4])
);
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
signal(SIGILL, oldHandler);
return result;
#endif
@ -151,12 +156,16 @@ static bool TrySSE2()
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2);
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2);
if (oldHandler == SIG_ERR)
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoSSE2))
result = true;
result = false;
else
{
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
@ -167,6 +176,7 @@ static bool TrySSE2()
#endif
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
signal(SIGILL, oldHandler);
return result;
#endif
@ -298,16 +308,11 @@ void DetectX86Features()
*((volatile bool*)&g_x86DetectionDone) = true;
}
// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
// http://stackoverflow.com/questions/26701262/how-to-check-the-existence-of-neon-on-arm
#elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
bool g_ArmDetectionDone = false;
bool g_hasNEON = false, g_hasCRC32 = false, g_hasAES = false, g_hasSHA1 = false, g_hasSHA2 = false;
word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
// The ARM equivalent of CPUID is reading a MSR. For example, fetch crypto capabilities with:
// The ARM equivalent of CPUID probing is reading a MSR. The code requires Exception Level 1 (EL1) and above, but user space runs at EL0.
// Attempting to run the code results in a SIGILL and termination.
//
// #if defined(__arm64__) || defined(__aarch64__)
// word64 caps = 0; // Read ID_AA64ISAR0_EL1
// __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps));
@ -315,12 +320,18 @@ word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
// word32 caps = 0; // Read ID_ISAR5_EL1
// __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps));
// #endif
// The code requires Exception Level 1 (EL1) and above, but user space runs at EL0.
// Attempting to run the code results in a SIGILL and termination.
//
// The following does not work well either. Its appears to be missing constants, and it does not detect Aarch32 execution environments on Aarch64
// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
//
bool g_ArmDetectionDone = false;
bool g_hasNEON = false, g_hasCRC32 = false, g_hasAES = false, g_hasSHA1 = false, g_hasSHA2 = false;
word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
extern "C" {
extern "C"
{
static jmp_buf s_jmpNoNEON;
static void SigIllHandlerNEON(int)
{
@ -357,51 +368,62 @@ static bool TryNEON()
{
#if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try
{
static const uint32_t v1[4] = {1,1,1,1};
uint32_t v1[4] = {1,1,1,1};
uint32x4_t x1 = vld1q_u32(v1);
static const uint64_t v2[2] = {1,1};
uint64_t v2[2] = {1,1};
uint64x2_t x2 = vld1q_u64(v2);
uint32x4_t x3 = vdupq_n_u32(0);
uint32x4_t x3 = vdupq_n_u32(2);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
uint64x2_t x4 = vdupq_n_u64(0);
uint64x2_t x4 = vdupq_n_u64(2);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
return false;
}
return true;
return result;
# else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON);
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON);
if (oldHandler == SIG_ERR)
result = false;
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoNEON))
result = false;
else
{
static const uint32_t v1[4] = {1,1,1,1};
uint32_t v1[4] = {1,1,1,1};
uint32x4_t x1 = vld1q_u32(v1);
static const uint64_t v2[2] = {1,1};
uint64_t v2[2] = {1,1};
uint64x2_t x2 = vld1q_u64(v2);
uint32x4_t x3 = vdupq_n_u32(0);
uint32x4_t x3 = {0,0,0,0};
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
uint64x2_t x4 = vdupq_n_u64(0);
uint64x2_t x4 = {0,0};
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
// Hack... GCC optimizes away the code and returns true
result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
signal(SIGILL, oldHandler);
return result;
# endif
@ -414,37 +436,48 @@ static bool TryCRC32()
{
#if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try
{
word32 w=0, x=0; word16 y=0; byte z=0;
word32 w=0, x=1; word16 y=2; byte z=3;
w = __crc32cw(w,x);
w = __crc32ch(w,y);
w = __crc32cb(w,z);
result = !!w;
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
return false;
}
return true;
return result;
# else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
if (oldHandler == SIG_ERR)
result = false;
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoCRC32))
result = false;
else
{
word32 w=0, x=0; word16 y=0; byte z=0;
word32 w=0, x=1; word16 y=2; byte z=3;
w = __crc32cw(w,x);
w = __crc32ch(w,y);
w = __crc32cb(w,z);
// Hack... GCC optimizes away the code and returns true
result = !!w;
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
signal(SIGILL, oldHandler);
return result;
# endif
@ -457,38 +490,47 @@ static bool TryAES()
{
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try
{
// AES encrypt and decrypt
static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
uint8x16_t r1 = vaeseq_u8(data, key);
uint8x16_t r2 = vaesdq_u8(data, key);
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2);
result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
return false;
}
return true;
return result;
# else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES);
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES);
if (oldHandler == SIG_ERR)
result = false;
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoAES))
result = false;
else
{
static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
uint8x16_t r1 = vaeseq_u8(data, key);
uint8x16_t r2 = vaesdq_u8(data, key);
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2);
// Hack... GCC optimizes away the code and returns true
result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
signal(SIGILL, oldHandler);
return result;
# endif
@ -501,43 +543,54 @@ static bool TrySHA1()
{
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try
{
static const uint32x4_t data = vdupq_n_u32(0);
static const uint32_t hash = 0x0;
uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
uint32x4_t r1 = vsha1cq_u32 (data, hash, data);
uint32x4_t r2 = vsha1mq_u32 (data, hash, data);
uint32x4_t r3 = vsha1pq_u32 (data, hash, data);
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3);
uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
return false;
}
return true;
return result;
# else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1);
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1);
if (oldHandler == SIG_ERR)
result = false;
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoSHA1))
result = false;
else
{
static const uint32x4_t data = vdupq_n_u32(0);
static const uint32_t hash = 0x0;
uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
uint32x4_t r1 = vsha1cq_u32 (data, hash, data);
uint32x4_t r2 = vsha1mq_u32 (data, hash, data);
uint32x4_t r3 = vsha1pq_u32 (data, hash, data);
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3);
uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
// Hack... GCC optimizes away the code and returns true
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
signal(SIGILL, oldHandler);
return result;
# endif
@ -550,45 +603,52 @@ static bool TrySHA2()
{
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try
{
static const uint32x4_t data = vdupq_n_u32(0);
static const uint32x4_t hash = vdupq_n_u32(0);
uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
uint32x4_t r1 = vsha256hq_u32 (hash, hash, data);
uint32x4_t r2 = vsha256h2q_u32 (hash, hash, data);
uint32x4_t r3 = vsha256su0q_u32 (data, data);
uint32x4_t r4 = vsha256su1q_u32 (data, data, data);
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3), CRYPTOPP_UNUSED(r4);
uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
return false;
}
return true;
return result;
# else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true;
SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2);
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2);
if (oldHandler == SIG_ERR)
result = false;
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoSHA2))
result = false;
else
{
static const uint32x4_t data = vdupq_n_u32(0);
static const uint32x4_t hash = vdupq_n_u32(0);
uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
uint32x4_t r1 = vsha256hq_u32 (hash, hash, data);
uint32x4_t r2 = vsha256h2q_u32 (hash, hash, data);
uint32x4_t r3 = vsha256su0q_u32 (data, data);
uint32x4_t r4 = vsha256su1q_u32 (data, data, data);
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3), CRYPTOPP_UNUSED(r4);
uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
// Hack... GCC optimizes away the code and returns true
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
signal(SIGILL, oldHandler);
return result;
# endif

View File

@ -46,7 +46,8 @@ IS_NETBSD=$(uname -s | grep -i -c netbsd)
IS_X86=$(uname -m | egrep -i -c "(i386|i586|i686|amd64|x86_64)")
IS_X64=$(uname -m | egrep -i -c "(amd64|x86_64)")
IS_PPC=$(uname -m | egrep -i -c "(Power|PPC)")
IS_ARM=$(uname -m | egrep -i -c "arm")
IS_ARM32=$(uname -m | egrep -i -c "arm|aarch32")
IS_ARM64=$(uname -m | egrep -i -c "arm64|aarch64")
# We need to use the C++ compiler to determine if c++11 is available. Otherwise
# a mis-detection occurs on Mac OS X 10.9 and above. Below, we use the same
@ -128,27 +129,41 @@ fi
# Set to 0 if you don't have Intel multiarch
HAVE_INTEL_MULTIARCH=0
if [ "$IS_DARWIN" -ne "0" ] && [ "$IS_X86" -ne "0" ]; then
$CXX -x c++ -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1
if [ "$?" -eq "0" ]; then
$CXX -x c++ -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1
if [ "$?" -eq "0" ]; then
HAVE_INTEL_MULTIARCH=1
fi
fi
fi
# Set to 0 if you don't have PPC multiarch
HAVE_PPC_MULTIARCH=0
if [ "$IS_DARWIN" -ne "0" ] && [ "$IS_PPC" -ne "0" ]; then
$CXX -x -DCRYPTOPP_ADHOC_MAIN c++ -arch ppc -arch ppc64 adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1
if [ "$?" -eq "0" ]; then
$CXX -x c++ -DCRYPTOPP_ADHOC_MAIN -arch ppc -arch ppc64 adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1
if [ "$?" -eq "0" ]; then
HAVE_PPC_MULTIARCH=1
fi
fi
fi
HAVE_X32=0
if [ "$IS_X64" -ne "0" ]; then
$CXX -x -DCRYPTOPP_ADHOC_MAIN c++ -mx32 adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1
if [ "$?" -eq "0" ]; then
$CXX -x c++ -DCRYPTOPP_ADHOC_MAIN -mx32 adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1
if [ "$?" -eq "0" ]; then
HAVE_X32=1
fi
fi
# Set to 0 if you don't have ARMv8
HAVE_ARM_CRC=0
HAVE_ARM_CRYPTO=0
if [ "$IS_ARM32" -ne "0" ] || [ "$IS_ARM64" -ne "0" ]; then
$CXX -x c++ -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crc adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1
if [ "$?" -eq "0" ]; then
HAVE_ARM_CRC=1
fi
$CXX -x c++ -DCRYPTOPP_ADHOC_MAIN -march=armv8-a+crypto adhoc.cpp.proto -o $TMP/adhoc.exe > /dev/null 2>&1
if [ "$?" -eq "0" ]; then
HAVE_ARM_CRYPTO=1
fi
fi
# Set to 0 if you don't have Valgrind. Valgrind tests take a long time...
@ -165,16 +180,16 @@ echo "HAVE_UBSAN: $HAVE_UBSAN" | tee -a "$TEST_RESULTS"
if [ "$HAVE_VALGRIND" -ne "0" ]; then
echo "HAVE_VALGRIND: $HAVE_VALGRIND" | tee -a "$TEST_RESULTS"
fi
if [ "$IS_DARWIN" -ne "0" ]; then
echo "IS_DARWIN: $IS_DARWIN" | tee -a "$TEST_RESULTS"
unset MallocScribble MallocPreScribble MallocGuardEdges
fi
if [ "$HAVE_INTEL_MULTIARCH" -ne "0" ]; then
echo "HAVE_INTEL_MULTIARCH: $HAVE_INTEL_MULTIARCH" | tee -a "$TEST_RESULTS"
fi
if [ "$HAVE_PPC_MULTIARCH" -ne "0" ]; then
echo "HAVE_PPC_MULTIARCH: $HAVE_PPC_MULTIARCH" | tee -a "$TEST_RESULTS"
fi
if [ "$IS_DARWIN" -ne "0" ]; then
echo "IS_DARWIN: $IS_DARWIN" | tee -a "$TEST_RESULTS"
unset MallocScribble MallocPreScribble MallocGuardEdges
fi
if [ "$IS_LINUX" -ne "0" ]; then
echo "IS_LINUX: $IS_LINUX" | tee -a "$TEST_RESULTS"
fi
@ -184,6 +199,16 @@ fi
if [ "$IS_MINGW" -ne "0" ]; then
echo "IS_MINGW: $IS_MINGW" | tee -a "$TEST_RESULTS"
fi
if [ "$IS_ARM64" -ne "0" ]; then
echo "IS_ARM64: $IS_ARM64" | tee -a "$TEST_RESULTS"
elif [ "$IS_ARM32" -ne "0" ]; then
echo "IS_ARM32: $IS_ARM32" | tee -a "$TEST_RESULTS"
fi
if [ "$IS_X64" -ne "0" ]; then
echo "IS_X64: $IS_X64" | tee -a "$TEST_RESULTS"
elif [ "$IS_X86" -ne "0" ]; then
echo "IS_X86: $IS_X86" | tee -a "$TEST_RESULTS"
fi
############################################
@ -1615,6 +1640,64 @@ if [ "$IS_DARWIN" -ne "0" ]; then
fi
fi
############################################
# ARM CRC32
if [ "$HAVE_ARM_CRC" -ne "0" ]; then
echo
echo "************************************" | tee -a "$TEST_RESULTS"
echo "Testing: ARM CRC32" | tee -a "$TEST_RESULTS"
echo
unset CXXFLAGS
"$MAKE" clean > /dev/null 2>&1
rm -f adhoc.cpp > /dev/null 2>&1
export CXXFLAGS="-DNDEBUG -g2 -O2 -march=armv8-a+crc $ADD_CXXFLAGS"
"$MAKE" "${MAKEARGS[@]}" static cryptest.exe 2>&1 | tee -a "$TEST_RESULTS"
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to make cryptest.exe" | tee -a "$TEST_RESULTS"
else
./cryptest.exe v 2>&1 | tee -a "$TEST_RESULTS"
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to execute validation suite" | tee -a "$TEST_RESULTS"
fi
./cryptest.exe tv all 2>&1 | tee -a "$TEST_RESULTS"
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to execute test vectors" | tee -a "$TEST_RESULTS"
fi
fi
fi
############################################
# ARM Crypto
if [ "$HAVE_ARM_CRYPTO" -ne "0" ]; then
echo
echo "************************************" | tee -a "$TEST_RESULTS"
echo "Testing: ARM Crypto" | tee -a "$TEST_RESULTS"
echo
unset CXXFLAGS
"$MAKE" clean > /dev/null 2>&1
rm -f adhoc.cpp > /dev/null 2>&1
export CXXFLAGS="-DNDEBUG -g2 -O2 -march=armv8-a+crypto $ADD_CXXFLAGS"
"$MAKE" "${MAKEARGS[@]}" static cryptest.exe 2>&1 | tee -a "$TEST_RESULTS"
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to make cryptest.exe" | tee -a "$TEST_RESULTS"
else
./cryptest.exe v 2>&1 | tee -a "$TEST_RESULTS"
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to execute validation suite" | tee -a "$TEST_RESULTS"
fi
./cryptest.exe tv all 2>&1 | tee -a "$TEST_RESULTS"
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to execute test vectors" | tee -a "$TEST_RESULTS"
fi
fi
fi
############################################
# Benchmarks, c++03
if [ "$HAVE_CXX03" -ne "0" ]; then
@ -1631,7 +1714,7 @@ if [ "$HAVE_CXX03" -ne "0" ]; then
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to make cryptest.exe" | tee -a "$TEST_RESULTS"
else
./cryptest.exe b 1 "$CPU_FREQ" 2>&1 | tee -a "$BENCHMARK_RESULTS"
./cryptest.exe b 3 "$CPU_FREQ" 2>&1 | tee -a "$BENCHMARK_RESULTS"
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to execute benchmarks" | tee -a "$BENCHMARK_RESULTS"
fi
@ -1656,7 +1739,7 @@ if [ "$HAVE_CXX11" -ne "0" ]; then
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to make cryptest.exe" | tee -a "$TEST_RESULTS"
else
./cryptest.exe b 1 "$CPU_FREQ" 2>&1 | tee -a "$BENCHMARK_RESULTS"
./cryptest.exe b 3 "$CPU_FREQ" 2>&1 | tee -a "$BENCHMARK_RESULTS"
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
echo "ERROR: failed to execute benchmarks" | tee -a "$BENCHMARK_RESULTS"
fi