Switch back to _mm_load_sd to load subkeys

We performed an intermediate copy while attempting to isolate GH #677.
pull/681/head^2
Jeffrey Walton 2018-07-03 23:13:40 -04:00
parent 78ece1b2aa
commit 65806e5ee9
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
1 changed files with 9 additions and 36 deletions

View File

@ -334,7 +334,7 @@ inline void CHAM64_Enc_Block(__m128i &block0,
for (int i=0; i<static_cast<int>(rounds); i+=4)
{
__m128i k, kr, t1, t2, t3, t4;
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[i & MASK])));
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i+0) & MASK])));
// Shuffle out key
kr = _mm_shuffle_epi8(k, _mm_set_epi8(1,0,1,0, 1,0,1,0, 1,0,1,0, 1,0,1,0));
@ -781,11 +781,7 @@ inline void CHAM128_Enc_Block(__m128i &block0,
for (int i=0; i<static_cast<int>(rounds); i+=4)
{
__m128i k, k1, k2, t1, t2;
double x[2];
// Avoid casting among datatypes
std::memcpy(x, &subkeys[(i+0) & MASK], 16);
k = _mm_castpd_si128(_mm_loadu_pd(x));
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i+0) & MASK])));
// Shuffle out two subkeys
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
@ -803,9 +799,7 @@ inline void CHAM128_Enc_Block(__m128i &block0,
counter = _mm_add_epi32(counter, increment);
// Avoid casting among datatypes
std::memcpy(x, &subkeys[(i+2) & MASK], 16);
k = _mm_castpd_si128(_mm_loadu_pd(x));
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i+2) & MASK])));
// Shuffle out two subkeys
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
@ -847,11 +841,7 @@ inline void CHAM128_Dec_Block(__m128i &block0,
for (int i = static_cast<int>(rounds)-1; i >= 0; i-=4)
{
__m128i k, k1, k2, t1, t2;
double x[2];
// Avoid casting among datatypes
std::memcpy(x, &subkeys[(i-1) & MASK], 16);
k = _mm_castpd_si128(_mm_loadu_pd(x));
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i-1) & MASK])));
// Shuffle out two subkeys
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
@ -870,10 +860,7 @@ inline void CHAM128_Dec_Block(__m128i &block0,
c = _mm_xor_si128(_mm_sub_epi32(t1, t2), counter);
counter = _mm_sub_epi32(counter, decrement);
// Avoid casting among datatypes
std::memcpy(x, &subkeys[(i-3) & MASK], 16);
k = _mm_castpd_si128(_mm_loadu_pd(x));
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i-3) & MASK])));
// Shuffle out two subkeys
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
@ -917,11 +904,7 @@ inline void CHAM128_Enc_4_Blocks(__m128i &block0, __m128i &block1,
for (int i=0; i<static_cast<int>(rounds); i+=4)
{
__m128i k, k1, k2, t1, t2;
double x[2];
// Avoid casting among datatypes
std::memcpy(x, &subkeys[(i+0) & MASK], 16);
k = _mm_castpd_si128(_mm_loadu_pd(x));
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i+0) & MASK])));
// Shuffle out two subkeys
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
@ -938,10 +921,7 @@ inline void CHAM128_Enc_4_Blocks(__m128i &block0, __m128i &block1,
b = RotateLeft32<1>(_mm_add_epi32(t1, t2));
counter = _mm_add_epi32(counter, increment);
// Avoid casting among datatypes
std::memcpy(x, &subkeys[(i+2) & MASK], 16);
k = _mm_castpd_si128(_mm_loadu_pd(x));
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i+2) & MASK])));
// Shuffle out two subkeys
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
@ -986,11 +966,7 @@ inline void CHAM128_Dec_4_Blocks(__m128i &block0, __m128i &block1,
for (int i = static_cast<int>(rounds)-1; i >= 0; i-=4)
{
__m128i k, k1, k2, t1, t2;
double x[2];
// Avoid casting among datatypes
std::memcpy(x, &subkeys[(i-1) & MASK], 16);
k = _mm_castpd_si128(_mm_loadu_pd(x));
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i-1) & MASK])));
// Shuffle out two subkeys
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
@ -1009,10 +985,7 @@ inline void CHAM128_Dec_4_Blocks(__m128i &block0, __m128i &block1,
c = _mm_xor_si128(_mm_sub_epi32(t1, t2), counter);
counter = _mm_sub_epi32(counter, decrement);
// Avoid casting among datatypes
std::memcpy(x, &subkeys[(i-3) & MASK], 16);
k = _mm_castpd_si128(_mm_loadu_pd(x));
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i-3) & MASK])));
// Shuffle out two subkeys
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));