Whitespace check-in

We also simplified the CPU_ProbeNEON logic a bit to a vmov.u32 and vshl.u32.
pull/853/head
Jeffrey Walton 2019-05-21 02:21:15 -04:00
parent 40251d9b7f
commit e8603143dc
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
1 changed files with 92 additions and 107 deletions

View File

@ -6,6 +6,7 @@
// ARMv8a NEON instructions. A separate source file is needed
// because additional CXXFLAGS are required to enable the
// appropriate instructions sets in some build configurations.
// For Linux and Unix additional flags are not required.
#include "pch.h"
#include "config.h"
@ -64,7 +65,7 @@ bool CPU_ProbeARMv7()
return false;
}
return result;
# elif defined(__GNUC__) || defined(__clang__)
# else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true;
@ -100,7 +101,7 @@ bool CPU_ProbeARMv7()
24: e12fff1e bx lr
#endif
int a;
volatile int a;
asm volatile (
".arm \n\t"
".inst 0xe3010234 \n\t" // movw r0, 0x1234
@ -126,24 +127,14 @@ bool CPU_ProbeNEON()
return true;
#elif defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
return false;
#elif (CRYPTOPP_ARM_NEON_AVAILABLE)
#elif CRYPTOPP_BOOL_ARM32
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try
{
uint32_t v1[4] = {1,1,1,1};
uint32x4_t x1 = vld1q_u32(v1);
uint64_t v2[2] = {1,1};
uint64x2_t x2 = vld1q_u64(v2);
uint32x4_t x3 = vdupq_n_u32(2);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
uint64x2_t x4 = vdupq_n_u64(2);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
volatile uint32x4_t x = vdupq_n_u32(1);
volatile uint32x4_t y = vshlq_n_u32(x, 4);
return (y[0] & y[1] & y[2] & y[3]) == 16;
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
@ -151,7 +142,6 @@ bool CPU_ProbeNEON()
}
return result;
# else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true;
@ -168,20 +158,15 @@ bool CPU_ProbeNEON()
result = false;
else
{
uint32_t v1[4] = {1,1,1,1};
uint32x4_t x1 = vld1q_u32(v1);
uint64_t v2[2] = {1,1};
uint64x2_t x2 = vld1q_u64(v2);
uint32x4_t x3 = {0,0,0,0};
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
uint64x2_t x4 = {0,0};
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
// Hack... GCC optimizes away the code and returns true
result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
// This is risky... When we hand encode the instructions
// for vmov.u32 and vshl.u32 we get a SIGILL. Apparently
// we need more than just the instructions. Using
// intrinsics introduces the risk because the whole
// file gets built with ISA options, and the higher ISA
// may escape the try block with the SIGILL guard.
uint32x4_t x = vdupq_n_u32(1);
uint32x4_t y = vshlq_n_u32(x, 4);
return (y[0] & y[1] & y[2] & y[3]) == 16;
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);