Add big- and little-endian rotates for Aarch32 and Aarch64

pull/548/head
Jeffrey Walton 2017-12-05 12:32:26 -05:00
parent 9faa504a24
commit 9b61d4143d
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
2 changed files with 48 additions and 0 deletions

View File

@ -86,8 +86,14 @@ inline uint32x4_t RotateRight32(const uint32x4_t& val)
template <> template <>
inline uint32x4_t RotateLeft32<8>(const uint32x4_t& val) inline uint32x4_t RotateLeft32<8>(const uint32x4_t& val)
{ {
#if defined(CRYPTOPP_BIG_ENDIAN)
const uint8_t maskb[16] = { 14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3 }; const uint8_t maskb[16] = { 14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3 };
const uint8x16_t mask = vld1q_u8(maskb); const uint8x16_t mask = vld1q_u8(maskb);
#else
const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
const uint8x16_t mask = vld1q_u8(maskb);
#endif
return vreinterpretq_u32_u8( return vreinterpretq_u32_u8(
vqtbl1q_u8(vreinterpretq_u8_u32(val), mask)); vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
} }
@ -96,8 +102,14 @@ inline uint32x4_t RotateLeft32<8>(const uint32x4_t& val)
template <> template <>
inline uint32x4_t RotateRight32<8>(const uint32x4_t& val) inline uint32x4_t RotateRight32<8>(const uint32x4_t& val)
{ {
#if defined(CRYPTOPP_BIG_ENDIAN)
const uint8_t maskb[16] = { 12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1 }; const uint8_t maskb[16] = { 12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1 };
const uint8x16_t mask = vld1q_u8(maskb); const uint8x16_t mask = vld1q_u8(maskb);
#else
const uint8_t maskb[16] = { 1,2,3,0, 5,6,7,4, 9,10,11,8, 13,14,14,12 };
const uint8x16_t mask = vld1q_u8(maskb);
#endif
return vreinterpretq_u32_u8( return vreinterpretq_u32_u8(
vqtbl1q_u8(vreinterpretq_u8_u32(val), mask)); vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
} }
@ -521,8 +533,14 @@ inline uint64x2_t RotateRight64(const uint64x2_t& val)
template <> template <>
inline uint64x2_t RotateLeft64<8>(const uint64x2_t& val) inline uint64x2_t RotateLeft64<8>(const uint64x2_t& val)
{ {
#if defined(CRYPTOPP_BIG_ENDIAN)
const uint8_t maskb[16] = { 14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7 }; const uint8_t maskb[16] = { 14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7 };
const uint8x16_t mask = vld1q_u8(maskb); const uint8x16_t mask = vld1q_u8(maskb);
#else
const uint8_t maskb[16] = { 7,0,1,2, 3,4,5,6, 15,8,9,10, 11,12,13,14 };
const uint8x16_t mask = vld1q_u8(maskb);
#endif
return vreinterpretq_u64_u8( return vreinterpretq_u64_u8(
vqtbl1q_u8(vreinterpretq_u8_u64(val), mask)); vqtbl1q_u8(vreinterpretq_u8_u64(val), mask));
} }
@ -531,8 +549,14 @@ inline uint64x2_t RotateLeft64<8>(const uint64x2_t& val)
template <> template <>
inline uint64x2_t RotateRight64<8>(const uint64x2_t& val) inline uint64x2_t RotateRight64<8>(const uint64x2_t& val)
{ {
#if defined(CRYPTOPP_BIG_ENDIAN)
const uint8_t maskb[16] = { 8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1 }; const uint8_t maskb[16] = { 8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1 };
const uint8x16_t mask = vld1q_u8(maskb); const uint8x16_t mask = vld1q_u8(maskb);
#else
const uint8_t maskb[16] = { 1,2,3,4, 5,6,7,0, 9,10,11,12, 13,14,15,8 };
const uint8x16_t mask = vld1q_u8(maskb);
#endif
return vreinterpretq_u64_u8( return vreinterpretq_u64_u8(
vqtbl1q_u8(vreinterpretq_u8_u64(val), mask)); vqtbl1q_u8(vreinterpretq_u8_u64(val), mask));
} }

View File

@ -84,8 +84,14 @@ inline uint32x4_t RotateRight32(const uint32x4_t& val)
template <> template <>
inline uint32x4_t RotateLeft32<8>(const uint32x4_t& val) inline uint32x4_t RotateLeft32<8>(const uint32x4_t& val)
{ {
#if defined(CRYPTOPP_BIG_ENDIAN)
const uint8_t maskb[16] = { 14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3 }; const uint8_t maskb[16] = { 14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3 };
const uint8x16_t mask = vld1q_u8(maskb); const uint8x16_t mask = vld1q_u8(maskb);
#else
const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
const uint8x16_t mask = vld1q_u8(maskb);
#endif
return vreinterpretq_u32_u8( return vreinterpretq_u32_u8(
vqtbl1q_u8(vreinterpretq_u8_u32(val), mask)); vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
} }
@ -94,8 +100,14 @@ inline uint32x4_t RotateLeft32<8>(const uint32x4_t& val)
template <> template <>
inline uint32x4_t RotateRight32<8>(const uint32x4_t& val) inline uint32x4_t RotateRight32<8>(const uint32x4_t& val)
{ {
#if defined(CRYPTOPP_BIG_ENDIAN)
const uint8_t maskb[16] = { 12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1 }; const uint8_t maskb[16] = { 12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1 };
const uint8x16_t mask = vld1q_u8(maskb); const uint8x16_t mask = vld1q_u8(maskb);
#else
const uint8_t maskb[16] = { 1,2,3,0, 5,6,7,4, 9,10,11,8, 13,14,14,12 };
const uint8x16_t mask = vld1q_u8(maskb);
#endif
return vreinterpretq_u32_u8( return vreinterpretq_u32_u8(
vqtbl1q_u8(vreinterpretq_u8_u32(val), mask)); vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
} }
@ -491,8 +503,14 @@ inline uint64x2_t RotateRight64(const uint64x2_t& val)
template <> template <>
inline uint64x2_t RotateLeft64<8>(const uint64x2_t& val) inline uint64x2_t RotateLeft64<8>(const uint64x2_t& val)
{ {
#if defined(CRYPTOPP_BIG_ENDIAN)
const uint8_t maskb[16] = { 14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7 }; const uint8_t maskb[16] = { 14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7 };
const uint8x16_t mask = vld1q_u8(maskb); const uint8x16_t mask = vld1q_u8(maskb);
#else
const uint8_t maskb[16] = { 7,0,1,2, 3,4,5,6, 15,8,9,10, 11,12,13,14 };
const uint8x16_t mask = vld1q_u8(maskb);
#endif
return vreinterpretq_u64_u8( return vreinterpretq_u64_u8(
vqtbl1q_u8(vreinterpretq_u8_u64(val), mask)); vqtbl1q_u8(vreinterpretq_u8_u64(val), mask));
} }
@ -501,8 +519,14 @@ inline uint64x2_t RotateLeft64<8>(const uint64x2_t& val)
template <> template <>
inline uint64x2_t RotateRight64<8>(const uint64x2_t& val) inline uint64x2_t RotateRight64<8>(const uint64x2_t& val)
{ {
#if defined(CRYPTOPP_BIG_ENDIAN)
const uint8_t maskb[16] = { 8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1 }; const uint8_t maskb[16] = { 8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1 };
const uint8x16_t mask = vld1q_u8(maskb); const uint8x16_t mask = vld1q_u8(maskb);
#else
const uint8_t maskb[16] = { 1,2,3,4, 5,6,7,0, 9,10,11,12, 13,14,15,8 };
const uint8x16_t mask = vld1q_u8(maskb);
#endif
return vreinterpretq_u64_u8( return vreinterpretq_u64_u8(
vqtbl1q_u8(vreinterpretq_u8_u64(val), mask)); vqtbl1q_u8(vreinterpretq_u8_u64(val), mask));
} }