Fix GCC optimizing away feature tests
parent
be572877f6
commit
89ec42b1f2
142
cpu.cpp
142
cpu.cpp
|
|
@ -48,19 +48,19 @@ bool CpuId(word32 input, word32 output[4])
|
|||
#else
|
||||
|
||||
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
||||
extern "C" {
|
||||
|
||||
static jmp_buf s_jmpNoCPUID;
|
||||
static void SigIllHandlerCPUID(int)
|
||||
extern "C"
|
||||
{
|
||||
static jmp_buf s_jmpNoCPUID;
|
||||
static void SigIllHandlerCPUID(int)
|
||||
{
|
||||
longjmp(s_jmpNoCPUID, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static jmp_buf s_jmpNoSSE2;
|
||||
static void SigIllHandlerSSE2(int)
|
||||
{
|
||||
static jmp_buf s_jmpNoSSE2;
|
||||
static void SigIllHandlerSSE2(int)
|
||||
{
|
||||
longjmp(s_jmpNoSSE2, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -97,10 +97,14 @@ bool CpuId(word32 input, word32 output[4])
|
|||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||
volatile bool result = true;
|
||||
|
||||
SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID);
|
||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID);
|
||||
if (oldHandler == SIG_ERR)
|
||||
result = false;
|
||||
|
||||
volatile sigset_t oldMask;
|
||||
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
|
||||
result = false;
|
||||
|
||||
if (setjmp(s_jmpNoCPUID))
|
||||
result = false;
|
||||
else
|
||||
|
|
@ -119,6 +123,7 @@ bool CpuId(word32 input, word32 output[4])
|
|||
);
|
||||
}
|
||||
|
||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
|
||||
signal(SIGILL, oldHandler);
|
||||
return result;
|
||||
#endif
|
||||
|
|
@ -151,7 +156,7 @@ static bool TrySSE2()
|
|||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||
volatile bool result = true;
|
||||
|
||||
SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2);
|
||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2);
|
||||
if (oldHandler == SIG_ERR)
|
||||
return false;
|
||||
|
||||
|
|
@ -167,6 +172,7 @@ static bool TrySSE2()
|
|||
#endif
|
||||
}
|
||||
|
||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
|
||||
signal(SIGILL, oldHandler);
|
||||
return result;
|
||||
#endif
|
||||
|
|
@ -298,16 +304,11 @@ void DetectX86Features()
|
|||
*((volatile bool*)&g_x86DetectionDone) = true;
|
||||
}
|
||||
|
||||
// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
|
||||
// http://stackoverflow.com/questions/26701262/how-to-check-the-existence-of-neon-on-arm
|
||||
#elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
|
||||
|
||||
bool g_ArmDetectionDone = false;
|
||||
bool g_hasNEON = false, g_hasCRC32 = false, g_hasAES = false, g_hasSHA1 = false, g_hasSHA2 = false;
|
||||
|
||||
word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
|
||||
|
||||
// The ARM equivalent of CPUID is reading a MSR. For example, fetch crypto capabilities with:
|
||||
// The ARM equivalent of CPUID probing is reading a MSR. The code requires Exception Level 1 (EL1) and above, but user space runs at EL0.
|
||||
// Attempting to run the code results in a SIGILL and termination.
|
||||
//
|
||||
// #if defined(__arm64__) || defined(__aarch64__)
|
||||
// word64 caps = 0; // Read ID_AA64ISAR0_EL1
|
||||
// __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps));
|
||||
|
|
@ -315,12 +316,18 @@ word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
|
|||
// word32 caps = 0; // Read ID_ISAR5_EL1
|
||||
// __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps));
|
||||
// #endif
|
||||
// The code requires Exception Level 1 (EL1) and above, but user space runs at EL0.
|
||||
// Attempting to run the code results in a SIGILL and termination.
|
||||
//
|
||||
// The following does not work well either. Its appears to be missing constants, and it does not detect Aarch32 execution environments on Aarch64
|
||||
// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
|
||||
//
|
||||
bool g_ArmDetectionDone = false;
|
||||
bool g_hasNEON = false, g_hasCRC32 = false, g_hasAES = false, g_hasSHA1 = false, g_hasSHA2 = false;
|
||||
|
||||
word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
|
||||
|
||||
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
||||
extern "C" {
|
||||
|
||||
extern "C"
|
||||
{
|
||||
static jmp_buf s_jmpNoNEON;
|
||||
static void SigIllHandlerNEON(int)
|
||||
{
|
||||
|
|
@ -359,15 +366,15 @@ static bool TryNEON()
|
|||
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
||||
__try
|
||||
{
|
||||
static const uint32_t v1[4] = {1,1,1,1};
|
||||
uint32_t v1[4] = {1,1,1,1};
|
||||
uint32x4_t x1 = vld1q_u32(v1);
|
||||
static const uint64_t v2[2] = {1,1};
|
||||
uint64_t v2[2] = {1,1};
|
||||
uint64x2_t x2 = vld1q_u64(v2);
|
||||
|
||||
uint32x4_t x3 = vdupq_n_u32(0);
|
||||
uint32x4_t x3 = {0,0,0,0};
|
||||
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
|
||||
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
|
||||
uint64x2_t x4 = vdupq_n_u64(0);
|
||||
uint64x2_t x4 = {0,0};
|
||||
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
|
||||
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
|
||||
}
|
||||
|
|
@ -381,27 +388,35 @@ static bool TryNEON()
|
|||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||
volatile bool result = true;
|
||||
|
||||
SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON);
|
||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON);
|
||||
if (oldHandler == SIG_ERR)
|
||||
result = false;
|
||||
|
||||
volatile sigset_t oldMask;
|
||||
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
|
||||
result = false;
|
||||
|
||||
if (setjmp(s_jmpNoNEON))
|
||||
result = false;
|
||||
else
|
||||
{
|
||||
static const uint32_t v1[4] = {1,1,1,1};
|
||||
uint32_t v1[4] = {1,1,1,1};
|
||||
uint32x4_t x1 = vld1q_u32(v1);
|
||||
static const uint64_t v2[2] = {1,1};
|
||||
uint64_t v2[2] = {1,1};
|
||||
uint64x2_t x2 = vld1q_u64(v2);
|
||||
|
||||
uint32x4_t x3 = vdupq_n_u32(0);
|
||||
uint32x4_t x3 = {0,0,0,0};
|
||||
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
|
||||
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
|
||||
uint64x2_t x4 = vdupq_n_u64(0);
|
||||
uint64x2_t x4 = {0,0};
|
||||
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
|
||||
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
|
||||
|
||||
// Hack... GCC optimizes away the code and returns true
|
||||
result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
|
||||
}
|
||||
|
||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
|
||||
signal(SIGILL, oldHandler);
|
||||
return result;
|
||||
# endif
|
||||
|
|
@ -431,10 +446,14 @@ static bool TryCRC32()
|
|||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||
volatile bool result = true;
|
||||
|
||||
SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
|
||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
|
||||
if (oldHandler == SIG_ERR)
|
||||
result = false;
|
||||
|
||||
volatile sigset_t oldMask;
|
||||
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
|
||||
result = false;
|
||||
|
||||
if (setjmp(s_jmpNoCRC32))
|
||||
result = false;
|
||||
else
|
||||
|
|
@ -443,8 +462,12 @@ static bool TryCRC32()
|
|||
w = __crc32cw(w,x);
|
||||
w = __crc32ch(w,y);
|
||||
w = __crc32cb(w,z);
|
||||
|
||||
// Hack... GCC optimizes away the code and returns true
|
||||
result = !!w;
|
||||
}
|
||||
|
||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
|
||||
signal(SIGILL, oldHandler);
|
||||
return result;
|
||||
# endif
|
||||
|
|
@ -460,7 +483,7 @@ static bool TryAES()
|
|||
__try
|
||||
{
|
||||
// AES encrypt and decrypt
|
||||
static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
|
||||
uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
|
||||
uint8x16_t r1 = vaeseq_u8(data, key);
|
||||
uint8x16_t r2 = vaesdq_u8(data, key);
|
||||
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2);
|
||||
|
|
@ -475,20 +498,27 @@ static bool TryAES()
|
|||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||
volatile bool result = true;
|
||||
|
||||
SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES);
|
||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES);
|
||||
if (oldHandler == SIG_ERR)
|
||||
result = false;
|
||||
|
||||
volatile sigset_t oldMask;
|
||||
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
|
||||
result = false;
|
||||
|
||||
if (setjmp(s_jmpNoAES))
|
||||
result = false;
|
||||
else
|
||||
{
|
||||
static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
|
||||
uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
|
||||
uint8x16_t r1 = vaeseq_u8(data, key);
|
||||
uint8x16_t r2 = vaesdq_u8(data, key);
|
||||
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2);
|
||||
|
||||
// Hack... GCC optimizes away the code and returns true
|
||||
result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
|
||||
}
|
||||
|
||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
|
||||
signal(SIGILL, oldHandler);
|
||||
return result;
|
||||
# endif
|
||||
|
|
@ -503,8 +533,8 @@ static bool TrySHA1()
|
|||
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
||||
__try
|
||||
{
|
||||
static const uint32x4_t data = vdupq_n_u32(0);
|
||||
static const uint32_t hash = 0x0;
|
||||
uint32x4_t data = {0,0,0,0};
|
||||
uint32_t hash = 0x0;
|
||||
|
||||
uint32x4_t r1 = vsha1cq_u32 (data, hash, data);
|
||||
uint32x4_t r2 = vsha1mq_u32 (data, hash, data);
|
||||
|
|
@ -521,23 +551,30 @@ static bool TrySHA1()
|
|||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||
volatile bool result = true;
|
||||
|
||||
SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1);
|
||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1);
|
||||
if (oldHandler == SIG_ERR)
|
||||
result = false;
|
||||
|
||||
volatile sigset_t oldMask;
|
||||
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
|
||||
result = false;
|
||||
|
||||
if (setjmp(s_jmpNoSHA1))
|
||||
result = false;
|
||||
else
|
||||
{
|
||||
static const uint32x4_t data = vdupq_n_u32(0);
|
||||
static const uint32_t hash = 0x0;
|
||||
uint32x4_t data = {0,0,0,0};
|
||||
uint32_t hash = 0x0;
|
||||
|
||||
uint32x4_t r1 = vsha1cq_u32 (data, hash, data);
|
||||
uint32x4_t r2 = vsha1mq_u32 (data, hash, data);
|
||||
uint32x4_t r3 = vsha1pq_u32 (data, hash, data);
|
||||
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3);
|
||||
|
||||
// Hack... GCC optimizes away the code and returns true
|
||||
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2));
|
||||
}
|
||||
|
||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
|
||||
signal(SIGILL, oldHandler);
|
||||
return result;
|
||||
# endif
|
||||
|
|
@ -552,8 +589,8 @@ static bool TrySHA2()
|
|||
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
||||
__try
|
||||
{
|
||||
static const uint32x4_t data = vdupq_n_u32(0);
|
||||
static const uint32x4_t hash = vdupq_n_u32(0);
|
||||
uint32x4_t data = {0,0,0,0};
|
||||
uint32x4_t hash = {0,0,0,0};
|
||||
|
||||
uint32x4_t r1 = vsha256hq_u32 (hash, hash, data);
|
||||
uint32x4_t r2 = vsha256h2q_u32 (hash, hash, data);
|
||||
|
|
@ -571,24 +608,31 @@ static bool TrySHA2()
|
|||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||
volatile bool result = true;
|
||||
|
||||
SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2);
|
||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2);
|
||||
if (oldHandler == SIG_ERR)
|
||||
result = false;
|
||||
|
||||
volatile sigset_t oldMask;
|
||||
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
|
||||
result = false;
|
||||
|
||||
if (setjmp(s_jmpNoSHA2))
|
||||
result = false;
|
||||
else
|
||||
{
|
||||
static const uint32x4_t data = vdupq_n_u32(0);
|
||||
static const uint32x4_t hash = vdupq_n_u32(0);
|
||||
uint32x4_t data = {0,0,0,0};
|
||||
uint32x4_t hash = {0,0,0,0};
|
||||
|
||||
uint32x4_t r1 = vsha256hq_u32 (hash, hash, data);
|
||||
uint32x4_t r2 = vsha256h2q_u32 (hash, hash, data);
|
||||
uint32x4_t r3 = vsha256su0q_u32 (data, data);
|
||||
uint32x4_t r4 = vsha256su1q_u32 (data, data, data);
|
||||
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3), CRYPTOPP_UNUSED(r4);
|
||||
|
||||
// Hack... GCC optimizes away the code and returns true
|
||||
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
|
||||
}
|
||||
|
||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
|
||||
signal(SIGILL, oldHandler);
|
||||
return result;
|
||||
# endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue