From 65806e5ee9d1d7141da95cc611d0cade7bf12ee3 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Tue, 3 Jul 2018 23:13:40 -0400 Subject: [PATCH] Switch back to _mm_load_sd to load subkeys We performed an intermediate copy while attempting to isolate GH #677. --- cham-simd.cpp | 45 +++++++++------------------------------------ 1 file changed, 9 insertions(+), 36 deletions(-) diff --git a/cham-simd.cpp b/cham-simd.cpp index ae851b2a..0e61cfff 100644 --- a/cham-simd.cpp +++ b/cham-simd.cpp @@ -334,7 +334,7 @@ inline void CHAM64_Enc_Block(__m128i &block0, for (int i=0; i(rounds); i+=4) { __m128i k, kr, t1, t2, t3, t4; - k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[i & MASK]))); + k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i+0) & MASK]))); // Shuffle out key kr = _mm_shuffle_epi8(k, _mm_set_epi8(1,0,1,0, 1,0,1,0, 1,0,1,0, 1,0,1,0)); @@ -781,11 +781,7 @@ inline void CHAM128_Enc_Block(__m128i &block0, for (int i=0; i(rounds); i+=4) { __m128i k, k1, k2, t1, t2; - double x[2]; - - // Avoid casting among datatypes - std::memcpy(x, &subkeys[(i+0) & MASK], 16); - k = _mm_castpd_si128(_mm_loadu_pd(x)); + k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i+0) & MASK]))); // Shuffle out two subkeys k1 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0)); @@ -803,9 +799,7 @@ inline void CHAM128_Enc_Block(__m128i &block0, counter = _mm_add_epi32(counter, increment); - // Avoid casting among datatypes - std::memcpy(x, &subkeys[(i+2) & MASK], 16); - k = _mm_castpd_si128(_mm_loadu_pd(x)); + k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i+2) & MASK]))); // Shuffle out two subkeys k1 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0)); @@ -847,11 +841,7 @@ inline void CHAM128_Dec_Block(__m128i &block0, for (int i = static_cast(rounds)-1; i >= 0; i-=4) { __m128i k, k1, k2, t1, t2; - double x[2]; - - // Avoid casting among datatypes - std::memcpy(x, &subkeys[(i-1) & MASK], 16); - k = _mm_castpd_si128(_mm_loadu_pd(x)); + k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i-1) & MASK]))); // Shuffle out two subkeys k1 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4)); @@ -870,10 +860,7 @@ inline void CHAM128_Dec_Block(__m128i &block0, c = _mm_xor_si128(_mm_sub_epi32(t1, t2), counter); counter = _mm_sub_epi32(counter, decrement); - - // Avoid casting among datatypes - std::memcpy(x, &subkeys[(i-3) & MASK], 16); - k = _mm_castpd_si128(_mm_loadu_pd(x)); + k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i-3) & MASK]))); // Shuffle out two subkeys k1 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4)); @@ -917,11 +904,7 @@ inline void CHAM128_Enc_4_Blocks(__m128i &block0, __m128i &block1, for (int i=0; i(rounds); i+=4) { __m128i k, k1, k2, t1, t2; - double x[2]; - - // Avoid casting among datatypes - std::memcpy(x, &subkeys[(i+0) & MASK], 16); - k = _mm_castpd_si128(_mm_loadu_pd(x)); + k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i+0) & MASK]))); // Shuffle out two subkeys k1 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0)); @@ -938,10 +921,7 @@ inline void CHAM128_Enc_4_Blocks(__m128i &block0, __m128i &block1, b = RotateLeft32<1>(_mm_add_epi32(t1, t2)); counter = _mm_add_epi32(counter, increment); - - // Avoid casting among datatypes - std::memcpy(x, &subkeys[(i+2) & MASK], 16); - k = _mm_castpd_si128(_mm_loadu_pd(x)); + k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i+2) & MASK]))); // Shuffle out two subkeys k1 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0)); @@ -986,11 +966,7 @@ inline void CHAM128_Dec_4_Blocks(__m128i &block0, __m128i &block1, for (int i = static_cast(rounds)-1; i >= 0; i-=4) { __m128i k, k1, k2, t1, t2; - double x[2]; - - // Avoid casting among datatypes - std::memcpy(x, &subkeys[(i-1) & MASK], 16); - k = _mm_castpd_si128(_mm_loadu_pd(x)); + k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i-1) & MASK]))); // Shuffle out two subkeys k1 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4)); @@ -1009,10 +985,7 @@ inline void CHAM128_Dec_4_Blocks(__m128i &block0, __m128i &block1, c = _mm_xor_si128(_mm_sub_epi32(t1, t2), counter); counter = _mm_sub_epi32(counter, decrement); - - // Avoid casting among datatypes - std::memcpy(x, &subkeys[(i-3) & MASK], 16); - k = _mm_castpd_si128(_mm_loadu_pd(x)); + k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i-3) & MASK]))); // Shuffle out two subkeys k1 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));