diff --git a/salsa.h b/salsa.h index a491a056..b7b6aa06 100644 --- a/salsa.h +++ b/salsa.h @@ -9,8 +9,6 @@ #include "strciphr.h" #include "secblock.h" -// TODO: work around GCC 4.8+ issue with SSE2 ASM until the exact details are known -// and fix is released. Duplicate with "valgrind ./cryptest.exe tv salsa" // "Inline assembly operands don't work with .intel_syntax", http://llvm.org/bugs/show_bug.cgi?id=24232 #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_INTEL_ASM) # define CRYPTOPP_DISABLE_SALSA_ASM diff --git a/simon-simd.cpp b/simon-simd.cpp index 5cb3ca8e..e6141018 100644 --- a/simon-simd.cpp +++ b/simon-simd.cpp @@ -98,14 +98,18 @@ inline uint64x2_t RotateRight64(const uint64x2_t& val) inline uint64x2_t Shuffle64(const uint64x2_t& val) { +#if defined(CRYPTOPP_LITTLE_ENDIAN) return vreinterpretq_u64_u8( vrev64q_u8(vreinterpretq_u8_u64(val))); +#else + return val; +#endif } -inline uint64x2_t SIMON128_f(const uint64x2_t& v) +inline uint64x2_t SIMON128_f(const uint64x2_t& val) { - return veorq_u64(RotateLeft64<2>(v), - vandq_u64(RotateLeft64<1>(v), RotateLeft64<8>(v))); + return veorq_u64(RotateLeft64<2>(val), + vandq_u64(RotateLeft64<1>(val), RotateLeft64<8>(val))); } inline void SIMON128_Enc_Block(uint8x16_t &block0, const word64 *subkeys, unsigned int rounds) @@ -476,7 +480,7 @@ template <> inline __m128i RotateLeft64<8>(const __m128i& val) { CRYPTOPP_ASSERT(R < 64); - const __m128i mask = _mm_set_epi8(14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7); + const __m128i mask = _mm_set_epi8(14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7); return _mm_shuffle_epi8(val, mask); } @@ -485,7 +489,7 @@ template <> inline __m128i RotateRight64<8>(const __m128i& val) { CRYPTOPP_ASSERT(R < 64); - const __m128i mask = _mm_set_epi8(8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1); + const __m128i mask = _mm_set_epi8(8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1); return _mm_shuffle_epi8(val, mask); } diff --git a/speck-simd.cpp b/speck-simd.cpp index a04f39da..7061a832 100644 --- a/speck-simd.cpp +++ b/speck-simd.cpp @@ -63,16 +63,16 @@ const word32 s_one[] = {0, 0, 0, 1}; // uint32x4_t template inline W UnpackHigh64(const T& a, const T& b) { - const uint64x1_t x = vget_high_u64((uint64x2_t)a); - const uint64x1_t y = vget_high_u64((uint64x2_t)b); + const uint64x1_t x(vget_high_u64((uint64x2_t)a)); + const uint64x1_t y(vget_high_u64((uint64x2_t)b)); return (W)vcombine_u64(x, y); } template inline W UnpackLow64(const T& a, const T& b) { - const uint64x1_t x = vget_low_u64((uint64x2_t)a); - const uint64x1_t y = vget_low_u64((uint64x2_t)b); + const uint64x1_t x(vget_low_u64((uint64x2_t)a)); + const uint64x1_t y(vget_low_u64((uint64x2_t)b)); return (W)vcombine_u64(x, y); } @@ -96,8 +96,12 @@ inline uint64x2_t RotateRight64(const uint64x2_t& val) inline uint64x2_t Shuffle64(const uint64x2_t& val) { +#if defined(CRYPTOPP_LITTLE_ENDIAN) return vreinterpretq_u64_u8( vrev64q_u8(vreinterpretq_u8_u64(val))); +#else + return val; +#endif } inline void SPECK128_Enc_Block(uint8x16_t &block0, const word64 *subkeys, unsigned int rounds) @@ -423,7 +427,7 @@ template <> inline __m128i RotateLeft64<8>(const __m128i& val) { CRYPTOPP_ASSERT(R < 64); - const __m128i mask = _mm_set_epi8(14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7); + const __m128i mask = _mm_set_epi8(14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7); return _mm_shuffle_epi8(val, mask); } @@ -432,7 +436,7 @@ template <> inline __m128i RotateRight64<8>(const __m128i& val) { CRYPTOPP_ASSERT(R < 64); - const __m128i mask = _mm_set_epi8(8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1); + const __m128i mask = _mm_set_epi8(8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1); return _mm_shuffle_epi8(val, mask); }