From 7c7e8aa8046ac6689f4a5468842e4333badebc6c Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sun, 29 Jan 2017 03:08:19 -0500 Subject: [PATCH] Fix headers and data types for cpu.cpp (Issue 362) This is trickier than expected due to sporadic support for PMULL and PMULL2 among compilers --- cpu.cpp | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/cpu.cpp b/cpu.cpp index e8bf611b..e4d3b7d4 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -445,21 +445,19 @@ static bool TryPMULL() volatile bool result = true; __try { - const poly64_t a1={2}, b1={3}; - const poly64x2_t a2={4,5}, b2={6,7}; - const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0}; + const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0}; + const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0}, + b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0}; const poly128_t r1 = vmull_p64(a1, b1); - const poly128_t r2 = vmull_high_p64(a2, b2); - const poly128_t r3 = vmull_high_p64(a3, b3); + const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2)); - // Also see https://github.com/weidai11/cryptopp/issues/233. - const uint64x2_t& t1 = vreinterpretq_u64_p128(r1); // {6,0} - const uint64x2_t& t2 = vreinterpretq_u64_p128(r2); // {24,0} - const uint64x2_t& t3 = vreinterpretq_u64_p128(r3); // {bignum,bignum} + // Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233. + const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum} + const uint64x2_t& t2 = (uint64x2_t)(r2); // {bignum,bignum} - result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b && - vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00); + result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 && + vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00); } __except (EXCEPTION_EXECUTE_HANDLER) { @@ -483,21 +481,19 @@ static bool TryPMULL() result = false; else { - const poly64_t a1={2}, b1={3}; - const poly64x2_t a2={4,5}, b2={6,7}; - const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0}; + const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0}; + const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0}, + b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0}; const poly128_t r1 = vmull_p64(a1, b1); - const poly128_t r2 = vmull_high_p64(a2, b2); - const poly128_t r3 = vmull_high_p64(a3, b3); + const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2)); - // Linaro is missing vreinterpretq_u64_p128. Also see https://github.com/weidai11/cryptopp/issues/233. - const uint64x2_t& t1 = (uint64x2_t)(r1); // {6,0} - const uint64x2_t& t2 = (uint64x2_t)(r2); // {24,0} - const uint64x2_t& t3 = (uint64x2_t)(r3); // {bignum,bignum} + // Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233. + const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum} + const uint64x2_t& t2 = (uint64x2_t)(r2); // {bignum,bignum} - result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b && - vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00); + result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 && + vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00); } sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);