Added non-zero terms to CRC detection code. Exercise all SHA1 instructions during feature test. Apply GCC optimization workaround to MSVC code paths to avoid potential problems

pull/157/merge
Jeffrey Walton 2016-05-30 06:37:08 -04:00
parent 89ec42b1f2
commit fd6ccce8ed
1 changed files with 45 additions and 33 deletions

78
cpu.cpp
View File

@ -364,6 +364,7 @@ static bool TryNEON()
{ {
#if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) #if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try __try
{ {
uint32_t v1[4] = {1,1,1,1}; uint32_t v1[4] = {1,1,1,1};
@ -377,12 +378,14 @@ static bool TryNEON()
uint64x2_t x4 = {0,0}; uint64x2_t x4 = {0,0};
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0); x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1); x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
} }
__except (EXCEPTION_EXECUTE_HANDLER) __except (EXCEPTION_EXECUTE_HANDLER)
{ {
return false; return false;
} }
return true; return result;
# else # else
// longjmp and clobber warnings. Volatile is required. // longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
@ -429,18 +432,21 @@ static bool TryCRC32()
{ {
#if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE) #if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try __try
{ {
word32 w=0, x=0; word16 y=0; byte z=0; word32 w=0, x=1; word16 y=2; byte z=3;
w = __crc32cw(w,x); w = __crc32cw(w,x);
w = __crc32ch(w,y); w = __crc32ch(w,y);
w = __crc32cb(w,z); w = __crc32cb(w,z);
result = !!w;
} }
__except (EXCEPTION_EXECUTE_HANDLER) __except (EXCEPTION_EXECUTE_HANDLER)
{ {
return false; return false;
} }
return true; return result;
# else # else
// longjmp and clobber warnings. Volatile is required. // longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
@ -458,7 +464,7 @@ static bool TryCRC32()
result = false; result = false;
else else
{ {
word32 w=0, x=0; word16 y=0; byte z=0; word32 w=0, x=1; word16 y=2; byte z=3;
w = __crc32cw(w,x); w = __crc32cw(w,x);
w = __crc32ch(w,y); w = __crc32ch(w,y);
w = __crc32cb(w,z); w = __crc32cb(w,z);
@ -480,19 +486,21 @@ static bool TryAES()
{ {
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try __try
{ {
// AES encrypt and decrypt // AES encrypt and decrypt
uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
uint8x16_t r1 = vaeseq_u8(data, key); uint8x16_t r1 = vaeseq_u8(data, key);
uint8x16_t r2 = vaesdq_u8(data, key); uint8x16_t r2 = vaesdq_u8(data, key);
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2);
result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
} }
__except (EXCEPTION_EXECUTE_HANDLER) __except (EXCEPTION_EXECUTE_HANDLER)
{ {
return false; return false;
} }
return true; return result;
# else # else
// longjmp and clobber warnings. Volatile is required. // longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
@ -531,21 +539,24 @@ static bool TrySHA1()
{ {
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try __try
{ {
uint32x4_t data = {0,0,0,0}; uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
uint32_t hash = 0x0;
uint32x4_t r1 = vsha1cq_u32 (data, hash, data); uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
uint32x4_t r2 = vsha1mq_u32 (data, hash, data); uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
uint32x4_t r3 = vsha1pq_u32 (data, hash, data); uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3); uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
} }
__except (EXCEPTION_EXECUTE_HANDLER) __except (EXCEPTION_EXECUTE_HANDLER)
{ {
return false; return false;
} }
return true; return result;
# else # else
// longjmp and clobber warnings. Volatile is required. // longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
@ -563,15 +574,16 @@ static bool TrySHA1()
result = false; result = false;
else else
{ {
uint32x4_t data = {0,0,0,0}; uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
uint32_t hash = 0x0;
uint32x4_t r1 = vsha1cq_u32 (data, hash, data); uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
uint32x4_t r2 = vsha1mq_u32 (data, hash, data); uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
uint32x4_t r3 = vsha1pq_u32 (data, hash, data); uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
// Hack... GCC optimizes away the code and returns true // Hack... GCC optimizes away the code and returns true
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2)); result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
} }
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
@ -587,22 +599,23 @@ static bool TrySHA2()
{ {
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try __try
{ {
uint32x4_t data = {0,0,0,0}; uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
uint32x4_t hash = {0,0,0,0};
uint32x4_t r1 = vsha256hq_u32 (hash, hash, data); uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
uint32x4_t r2 = vsha256h2q_u32 (hash, hash, data); uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
uint32x4_t r3 = vsha256su0q_u32 (data, data); uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
uint32x4_t r4 = vsha256su1q_u32 (data, data, data); uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3), CRYPTOPP_UNUSED(r4);
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
} }
__except (EXCEPTION_EXECUTE_HANDLER) __except (EXCEPTION_EXECUTE_HANDLER)
{ {
return false; return false;
} }
return true; return result;
# else # else
// longjmp and clobber warnings. Volatile is required. // longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
@ -620,13 +633,12 @@ static bool TrySHA2()
result = false; result = false;
else else
{ {
uint32x4_t data = {0,0,0,0}; uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
uint32x4_t hash = {0,0,0,0};
uint32x4_t r1 = vsha256hq_u32 (hash, hash, data); uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
uint32x4_t r2 = vsha256h2q_u32 (hash, hash, data); uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
uint32x4_t r3 = vsha256su0q_u32 (data, data); uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
uint32x4_t r4 = vsha256su1q_u32 (data, data, data); uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
// Hack... GCC optimizes away the code and returns true // Hack... GCC optimizes away the code and returns true
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3)); result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));