Remove unneeded SIMON128 functions in simon64-simd.cpp

This looks like artifacts from when we split simon-simd.cpp into simon64-simd.cpp and simon128-simd.cpp.
pull/730/head
Jeffrey Walton 2018-10-24 19:02:20 -04:00
parent 67f421174c
commit ecbf791015
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
1 changed files with 1 additions and 236 deletions

View File

@ -292,23 +292,7 @@ inline void SIMON64_Dec_6_Blocks(uint32x4_t &block0, uint32x4_t &block1,
// ***************************** IA-32 ***************************** //
#if (CRYPTOPP_SSSE3_AVAILABLE)
// Clang __m128i casts, http://bugs.llvm.org/show_bug.cgi?id=20670
#ifndef M128_CAST
# define M128_CAST(x) ((__m128i *)(void *)(x))
#endif
#ifndef CONST_M128_CAST
# define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x))
#endif
// GCC double casts, https://www.spinics.net/lists/gcchelp/msg47735.html
#ifndef DOUBLE_CAST
# define DOUBLE_CAST(x) ((double *)(void *)(x))
#endif
#ifndef CONST_DOUBLE_CAST
# define CONST_DOUBLE_CAST(x) ((const double *)(const void *)(x))
#endif
#if defined(CRYPTOPP_SSE41_AVAILABLE)
inline void Swap128(__m128i& a,__m128i& b)
{
@ -321,225 +305,6 @@ inline void Swap128(__m128i& a,__m128i& b)
#endif
}
template <unsigned int R>
inline __m128i RotateLeft64(const __m128i& val)
{
#if defined(CRYPTOPP_AVX512_ROTATE)
return _mm_rol_epi64(val, R);
#elif defined(__XOP__)
return _mm_roti_epi64(val, R);
#else
return _mm_or_si128(
_mm_slli_epi64(val, R), _mm_srli_epi64(val, 64-R));
#endif
}
template <unsigned int R>
inline __m128i RotateRight64(const __m128i& val)
{
#if defined(CRYPTOPP_AVX512_ROTATE)
return _mm_ror_epi64(val, R);
#elif defined(__XOP__)
return _mm_roti_epi64(val, 64-R);
#else
return _mm_or_si128(
_mm_slli_epi64(val, 64-R), _mm_srli_epi64(val, R));
#endif
}
// Faster than two Shifts and an Or. Thanks to Louis Wingers and Bryan Weeks.
template <>
inline __m128i RotateLeft64<8>(const __m128i& val)
{
#if defined(__XOP__)
return _mm_roti_epi64(val, 8);
#else
const __m128i mask = _mm_set_epi8(14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7);
return _mm_shuffle_epi8(val, mask);
#endif
}
// Faster than two Shifts and an Or. Thanks to Louis Wingers and Bryan Weeks.
template <>
inline __m128i RotateRight64<8>(const __m128i& val)
{
#if defined(__XOP__)
return _mm_roti_epi64(val, 64-8);
#else
const __m128i mask = _mm_set_epi8(8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1);
return _mm_shuffle_epi8(val, mask);
#endif
}
inline __m128i SIMON128_f(const __m128i& v)
{
return _mm_xor_si128(RotateLeft64<2>(v),
_mm_and_si128(RotateLeft64<1>(v), RotateLeft64<8>(v)));
}
inline void SIMON128_Enc_Block(__m128i &block0, __m128i &block1,
const word64 *subkeys, unsigned int rounds)
{
// [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
__m128i x1 = _mm_unpackhi_epi64(block0, block1);
__m128i y1 = _mm_unpacklo_epi64(block0, block1);
for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
{
const __m128i rk1 = _mm_castpd_si128(
_mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys+i)));
y1 = _mm_xor_si128(_mm_xor_si128(y1, SIMON128_f(x1)), rk1);
const __m128i rk2 = _mm_castpd_si128(
_mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys+i+1)));
x1 = _mm_xor_si128(_mm_xor_si128(x1, SIMON128_f(y1)), rk2);
}
if (rounds & 1)
{
const __m128i rk = _mm_castpd_si128(
_mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys+rounds-1)));
y1 = _mm_xor_si128(_mm_xor_si128(y1, SIMON128_f(x1)), rk);
Swap128(x1, y1);
}
// [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
block0 = _mm_unpacklo_epi64(y1, x1);
block1 = _mm_unpackhi_epi64(y1, x1);
}
inline void SIMON128_Enc_6_Blocks(__m128i &block0, __m128i &block1,
__m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
const word64 *subkeys, unsigned int rounds)
{
// [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
__m128i x1 = _mm_unpackhi_epi64(block0, block1);
__m128i y1 = _mm_unpacklo_epi64(block0, block1);
__m128i x2 = _mm_unpackhi_epi64(block2, block3);
__m128i y2 = _mm_unpacklo_epi64(block2, block3);
__m128i x3 = _mm_unpackhi_epi64(block4, block5);
__m128i y3 = _mm_unpacklo_epi64(block4, block5);
for (int i = 0; i < static_cast<int>(rounds & ~1) - 1; i += 2)
{
const __m128i rk1 = _mm_castpd_si128(
_mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys + i)));
y1 = _mm_xor_si128(_mm_xor_si128(y1, SIMON128_f(x1)), rk1);
y2 = _mm_xor_si128(_mm_xor_si128(y2, SIMON128_f(x2)), rk1);
y3 = _mm_xor_si128(_mm_xor_si128(y3, SIMON128_f(x3)), rk1);
const __m128i rk2 = _mm_castpd_si128(
_mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys + i + 1)));
x1 = _mm_xor_si128(_mm_xor_si128(x1, SIMON128_f(y1)), rk2);
x2 = _mm_xor_si128(_mm_xor_si128(x2, SIMON128_f(y2)), rk2);
x3 = _mm_xor_si128(_mm_xor_si128(x3, SIMON128_f(y3)), rk2);
}
if (rounds & 1)
{
const __m128i rk = _mm_castpd_si128(
_mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys + rounds - 1)));
y1 = _mm_xor_si128(_mm_xor_si128(y1, SIMON128_f(x1)), rk);
y2 = _mm_xor_si128(_mm_xor_si128(y2, SIMON128_f(x2)), rk);
y3 = _mm_xor_si128(_mm_xor_si128(y3, SIMON128_f(x3)), rk);
Swap128(x1, y1); Swap128(x2, y2); Swap128(x3, y3);
}
// [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
block0 = _mm_unpacklo_epi64(y1, x1);
block1 = _mm_unpackhi_epi64(y1, x1);
block2 = _mm_unpacklo_epi64(y2, x2);
block3 = _mm_unpackhi_epi64(y2, x2);
block4 = _mm_unpacklo_epi64(y3, x3);
block5 = _mm_unpackhi_epi64(y3, x3);
}
inline void SIMON128_Dec_Block(__m128i &block0, __m128i &block1,
const word64 *subkeys, unsigned int rounds)
{
// [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
__m128i x1 = _mm_unpackhi_epi64(block0, block1);
__m128i y1 = _mm_unpacklo_epi64(block0, block1);
if (rounds & 1)
{
const __m128i rk = _mm_castpd_si128(
_mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys + rounds - 1)));
Swap128(x1, y1);
y1 = _mm_xor_si128(_mm_xor_si128(y1, rk), SIMON128_f(x1));
rounds--;
}
for (int i = static_cast<int>(rounds-2); i >= 0; i -= 2)
{
const __m128i rk1 = _mm_castpd_si128(
_mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys+i+1)));
x1 = _mm_xor_si128(_mm_xor_si128(x1, SIMON128_f(y1)), rk1);
const __m128i rk2 = _mm_castpd_si128(
_mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys+i)));
y1 = _mm_xor_si128(_mm_xor_si128(y1, SIMON128_f(x1)), rk2);
}
// [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
block0 = _mm_unpacklo_epi64(y1, x1);
block1 = _mm_unpackhi_epi64(y1, x1);
}
inline void SIMON128_Dec_6_Blocks(__m128i &block0, __m128i &block1,
__m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
const word64 *subkeys, unsigned int rounds)
{
// [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
__m128i x1 = _mm_unpackhi_epi64(block0, block1);
__m128i y1 = _mm_unpacklo_epi64(block0, block1);
__m128i x2 = _mm_unpackhi_epi64(block2, block3);
__m128i y2 = _mm_unpacklo_epi64(block2, block3);
__m128i x3 = _mm_unpackhi_epi64(block4, block5);
__m128i y3 = _mm_unpacklo_epi64(block4, block5);
if (rounds & 1)
{
const __m128i rk = _mm_castpd_si128(
_mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys + rounds - 1)));
Swap128(x1, y1); Swap128(x2, y2); Swap128(x3, y3);
y1 = _mm_xor_si128(_mm_xor_si128(y1, rk), SIMON128_f(x1));
y2 = _mm_xor_si128(_mm_xor_si128(y2, rk), SIMON128_f(x2));
y3 = _mm_xor_si128(_mm_xor_si128(y3, rk), SIMON128_f(x3));
rounds--;
}
for (int i = static_cast<int>(rounds-2); i >= 0; i -= 2)
{
const __m128i rk1 = _mm_castpd_si128(
_mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys + i + 1)));
x1 = _mm_xor_si128(_mm_xor_si128(x1, SIMON128_f(y1)), rk1);
x2 = _mm_xor_si128(_mm_xor_si128(x2, SIMON128_f(y2)), rk1);
x3 = _mm_xor_si128(_mm_xor_si128(x3, SIMON128_f(y3)), rk1);
const __m128i rk2 = _mm_castpd_si128(
_mm_loaddup_pd(CONST_DOUBLE_CAST(subkeys + i)));
y1 = _mm_xor_si128(_mm_xor_si128(y1, SIMON128_f(x1)), rk2);
y2 = _mm_xor_si128(_mm_xor_si128(y2, SIMON128_f(x2)), rk2);
y3 = _mm_xor_si128(_mm_xor_si128(y3, SIMON128_f(x3)), rk2);
}
// [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
block0 = _mm_unpacklo_epi64(y1, x1);
block1 = _mm_unpackhi_epi64(y1, x1);
block2 = _mm_unpacklo_epi64(y2, x2);
block3 = _mm_unpackhi_epi64(y2, x2);
block4 = _mm_unpacklo_epi64(y3, x3);
block5 = _mm_unpackhi_epi64(y3, x3);
}
#endif // CRYPTOPP_SSSE3_AVAILABLE
#if defined(CRYPTOPP_SSE41_AVAILABLE)
template <unsigned int R>
inline __m128i RotateLeft32(const __m128i& val)
{