Switch back to _mm_load_sd to load subkeys
We performed an intermediate copy while attempting to isolate GH #677.pull/681/head^2
parent
78ece1b2aa
commit
65806e5ee9
|
|
@ -334,7 +334,7 @@ inline void CHAM64_Enc_Block(__m128i &block0,
|
|||
for (int i=0; i<static_cast<int>(rounds); i+=4)
|
||||
{
|
||||
__m128i k, kr, t1, t2, t3, t4;
|
||||
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[i & MASK])));
|
||||
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i+0) & MASK])));
|
||||
|
||||
// Shuffle out key
|
||||
kr = _mm_shuffle_epi8(k, _mm_set_epi8(1,0,1,0, 1,0,1,0, 1,0,1,0, 1,0,1,0));
|
||||
|
|
@ -781,11 +781,7 @@ inline void CHAM128_Enc_Block(__m128i &block0,
|
|||
for (int i=0; i<static_cast<int>(rounds); i+=4)
|
||||
{
|
||||
__m128i k, k1, k2, t1, t2;
|
||||
double x[2];
|
||||
|
||||
// Avoid casting among datatypes
|
||||
std::memcpy(x, &subkeys[(i+0) & MASK], 16);
|
||||
k = _mm_castpd_si128(_mm_loadu_pd(x));
|
||||
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i+0) & MASK])));
|
||||
|
||||
// Shuffle out two subkeys
|
||||
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
|
||||
|
|
@ -803,9 +799,7 @@ inline void CHAM128_Enc_Block(__m128i &block0,
|
|||
|
||||
counter = _mm_add_epi32(counter, increment);
|
||||
|
||||
// Avoid casting among datatypes
|
||||
std::memcpy(x, &subkeys[(i+2) & MASK], 16);
|
||||
k = _mm_castpd_si128(_mm_loadu_pd(x));
|
||||
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i+2) & MASK])));
|
||||
|
||||
// Shuffle out two subkeys
|
||||
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
|
||||
|
|
@ -847,11 +841,7 @@ inline void CHAM128_Dec_Block(__m128i &block0,
|
|||
for (int i = static_cast<int>(rounds)-1; i >= 0; i-=4)
|
||||
{
|
||||
__m128i k, k1, k2, t1, t2;
|
||||
double x[2];
|
||||
|
||||
// Avoid casting among datatypes
|
||||
std::memcpy(x, &subkeys[(i-1) & MASK], 16);
|
||||
k = _mm_castpd_si128(_mm_loadu_pd(x));
|
||||
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i-1) & MASK])));
|
||||
|
||||
// Shuffle out two subkeys
|
||||
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
|
||||
|
|
@ -870,10 +860,7 @@ inline void CHAM128_Dec_Block(__m128i &block0,
|
|||
c = _mm_xor_si128(_mm_sub_epi32(t1, t2), counter);
|
||||
|
||||
counter = _mm_sub_epi32(counter, decrement);
|
||||
|
||||
// Avoid casting among datatypes
|
||||
std::memcpy(x, &subkeys[(i-3) & MASK], 16);
|
||||
k = _mm_castpd_si128(_mm_loadu_pd(x));
|
||||
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i-3) & MASK])));
|
||||
|
||||
// Shuffle out two subkeys
|
||||
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
|
||||
|
|
@ -917,11 +904,7 @@ inline void CHAM128_Enc_4_Blocks(__m128i &block0, __m128i &block1,
|
|||
for (int i=0; i<static_cast<int>(rounds); i+=4)
|
||||
{
|
||||
__m128i k, k1, k2, t1, t2;
|
||||
double x[2];
|
||||
|
||||
// Avoid casting among datatypes
|
||||
std::memcpy(x, &subkeys[(i+0) & MASK], 16);
|
||||
k = _mm_castpd_si128(_mm_loadu_pd(x));
|
||||
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i+0) & MASK])));
|
||||
|
||||
// Shuffle out two subkeys
|
||||
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
|
||||
|
|
@ -938,10 +921,7 @@ inline void CHAM128_Enc_4_Blocks(__m128i &block0, __m128i &block1,
|
|||
b = RotateLeft32<1>(_mm_add_epi32(t1, t2));
|
||||
|
||||
counter = _mm_add_epi32(counter, increment);
|
||||
|
||||
// Avoid casting among datatypes
|
||||
std::memcpy(x, &subkeys[(i+2) & MASK], 16);
|
||||
k = _mm_castpd_si128(_mm_loadu_pd(x));
|
||||
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i+2) & MASK])));
|
||||
|
||||
// Shuffle out two subkeys
|
||||
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
|
||||
|
|
@ -986,11 +966,7 @@ inline void CHAM128_Dec_4_Blocks(__m128i &block0, __m128i &block1,
|
|||
for (int i = static_cast<int>(rounds)-1; i >= 0; i-=4)
|
||||
{
|
||||
__m128i k, k1, k2, t1, t2;
|
||||
double x[2];
|
||||
|
||||
// Avoid casting among datatypes
|
||||
std::memcpy(x, &subkeys[(i-1) & MASK], 16);
|
||||
k = _mm_castpd_si128(_mm_loadu_pd(x));
|
||||
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i-1) & MASK])));
|
||||
|
||||
// Shuffle out two subkeys
|
||||
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
|
||||
|
|
@ -1009,10 +985,7 @@ inline void CHAM128_Dec_4_Blocks(__m128i &block0, __m128i &block1,
|
|||
c = _mm_xor_si128(_mm_sub_epi32(t1, t2), counter);
|
||||
|
||||
counter = _mm_sub_epi32(counter, decrement);
|
||||
|
||||
// Avoid casting among datatypes
|
||||
std::memcpy(x, &subkeys[(i-3) & MASK], 16);
|
||||
k = _mm_castpd_si128(_mm_loadu_pd(x));
|
||||
k = _mm_castpd_si128(_mm_load_sd((const double*)(&subkeys[(i-3) & MASK])));
|
||||
|
||||
// Shuffle out two subkeys
|
||||
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
|
||||
|
|
|
|||
Loading…
Reference in New Issue