From 9b61d4143da890d23328c22db8d9454235e1ded7 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Tue, 5 Dec 2017 12:32:26 -0500 Subject: [PATCH] Add big- and little-endian rotates for Aarch32 and Aarch64 --- simon-simd.cpp | 24 ++++++++++++++++++++++++ speck-simd.cpp | 24 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/simon-simd.cpp b/simon-simd.cpp index 7af34ac3..b9c4e337 100644 --- a/simon-simd.cpp +++ b/simon-simd.cpp @@ -86,8 +86,14 @@ inline uint32x4_t RotateRight32(const uint32x4_t& val) template <> inline uint32x4_t RotateLeft32<8>(const uint32x4_t& val) { +#if defined(CRYPTOPP_BIG_ENDIAN) const uint8_t maskb[16] = { 14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3 }; const uint8x16_t mask = vld1q_u8(maskb); +#else + const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 }; + const uint8x16_t mask = vld1q_u8(maskb); +#endif + return vreinterpretq_u32_u8( vqtbl1q_u8(vreinterpretq_u8_u32(val), mask)); } @@ -96,8 +102,14 @@ inline uint32x4_t RotateLeft32<8>(const uint32x4_t& val) template <> inline uint32x4_t RotateRight32<8>(const uint32x4_t& val) { +#if defined(CRYPTOPP_BIG_ENDIAN) const uint8_t maskb[16] = { 12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1 }; const uint8x16_t mask = vld1q_u8(maskb); +#else + const uint8_t maskb[16] = { 1,2,3,0, 5,6,7,4, 9,10,11,8, 13,14,14,12 }; + const uint8x16_t mask = vld1q_u8(maskb); +#endif + return vreinterpretq_u32_u8( vqtbl1q_u8(vreinterpretq_u8_u32(val), mask)); } @@ -521,8 +533,14 @@ inline uint64x2_t RotateRight64(const uint64x2_t& val) template <> inline uint64x2_t RotateLeft64<8>(const uint64x2_t& val) { +#if defined(CRYPTOPP_BIG_ENDIAN) const uint8_t maskb[16] = { 14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7 }; const uint8x16_t mask = vld1q_u8(maskb); +#else + const uint8_t maskb[16] = { 7,0,1,2, 3,4,5,6, 15,8,9,10, 11,12,13,14 }; + const uint8x16_t mask = vld1q_u8(maskb); +#endif + return vreinterpretq_u64_u8( vqtbl1q_u8(vreinterpretq_u8_u64(val), mask)); } @@ -531,8 +549,14 @@ inline uint64x2_t RotateLeft64<8>(const uint64x2_t& val) template <> inline uint64x2_t RotateRight64<8>(const uint64x2_t& val) { +#if defined(CRYPTOPP_BIG_ENDIAN) const uint8_t maskb[16] = { 8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1 }; const uint8x16_t mask = vld1q_u8(maskb); +#else + const uint8_t maskb[16] = { 1,2,3,4, 5,6,7,0, 9,10,11,12, 13,14,15,8 }; + const uint8x16_t mask = vld1q_u8(maskb); +#endif + return vreinterpretq_u64_u8( vqtbl1q_u8(vreinterpretq_u8_u64(val), mask)); } diff --git a/speck-simd.cpp b/speck-simd.cpp index 41df95d4..e23d6aec 100644 --- a/speck-simd.cpp +++ b/speck-simd.cpp @@ -84,8 +84,14 @@ inline uint32x4_t RotateRight32(const uint32x4_t& val) template <> inline uint32x4_t RotateLeft32<8>(const uint32x4_t& val) { +#if defined(CRYPTOPP_BIG_ENDIAN) const uint8_t maskb[16] = { 14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3 }; const uint8x16_t mask = vld1q_u8(maskb); +#else + const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 }; + const uint8x16_t mask = vld1q_u8(maskb); +#endif + return vreinterpretq_u32_u8( vqtbl1q_u8(vreinterpretq_u8_u32(val), mask)); } @@ -94,8 +100,14 @@ inline uint32x4_t RotateLeft32<8>(const uint32x4_t& val) template <> inline uint32x4_t RotateRight32<8>(const uint32x4_t& val) { +#if defined(CRYPTOPP_BIG_ENDIAN) const uint8_t maskb[16] = { 12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1 }; const uint8x16_t mask = vld1q_u8(maskb); +#else + const uint8_t maskb[16] = { 1,2,3,0, 5,6,7,4, 9,10,11,8, 13,14,14,12 }; + const uint8x16_t mask = vld1q_u8(maskb); +#endif + return vreinterpretq_u32_u8( vqtbl1q_u8(vreinterpretq_u8_u32(val), mask)); } @@ -491,8 +503,14 @@ inline uint64x2_t RotateRight64(const uint64x2_t& val) template <> inline uint64x2_t RotateLeft64<8>(const uint64x2_t& val) { +#if defined(CRYPTOPP_BIG_ENDIAN) const uint8_t maskb[16] = { 14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7 }; const uint8x16_t mask = vld1q_u8(maskb); +#else + const uint8_t maskb[16] = { 7,0,1,2, 3,4,5,6, 15,8,9,10, 11,12,13,14 }; + const uint8x16_t mask = vld1q_u8(maskb); +#endif + return vreinterpretq_u64_u8( vqtbl1q_u8(vreinterpretq_u8_u64(val), mask)); } @@ -501,8 +519,14 @@ inline uint64x2_t RotateLeft64<8>(const uint64x2_t& val) template <> inline uint64x2_t RotateRight64<8>(const uint64x2_t& val) { +#if defined(CRYPTOPP_BIG_ENDIAN) const uint8_t maskb[16] = { 8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1 }; const uint8x16_t mask = vld1q_u8(maskb); +#else + const uint8_t maskb[16] = { 1,2,3,4, 5,6,7,0, 9,10,11,12, 13,14,15,8 }; + const uint8x16_t mask = vld1q_u8(maskb); +#endif + return vreinterpretq_u64_u8( vqtbl1q_u8(vreinterpretq_u8_u64(val), mask)); }