Add XOP aware SIMECK
parent
babdf8b38b
commit
67f421174c
|
|
@ -22,6 +22,10 @@
|
||||||
# include <tmmintrin.h>
|
# include <tmmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__XOP__)
|
||||||
|
# include <ammintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(__AVX512F__) && defined(__AVX512VL__)
|
#if defined(__AVX512F__) && defined(__AVX512VL__)
|
||||||
# define CRYPTOPP_AVX512_ROTATE 1
|
# define CRYPTOPP_AVX512_ROTATE 1
|
||||||
# include <immintrin.h>
|
# include <immintrin.h>
|
||||||
|
|
@ -44,6 +48,8 @@ inline __m128i RotateLeft32(const __m128i& val)
|
||||||
{
|
{
|
||||||
#if defined(CRYPTOPP_AVX512_ROTATE)
|
#if defined(CRYPTOPP_AVX512_ROTATE)
|
||||||
return _mm_rol_epi32(val, R);
|
return _mm_rol_epi32(val, R);
|
||||||
|
#elif defined(__XOP__)
|
||||||
|
return _mm_roti_epi32(val, R);
|
||||||
#else
|
#else
|
||||||
return _mm_or_si128(
|
return _mm_or_si128(
|
||||||
_mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
|
_mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
|
||||||
|
|
@ -55,6 +61,8 @@ inline __m128i RotateRight32(const __m128i& val)
|
||||||
{
|
{
|
||||||
#if defined(CRYPTOPP_AVX512_ROTATE)
|
#if defined(CRYPTOPP_AVX512_ROTATE)
|
||||||
return _mm_ror_epi32(val, R);
|
return _mm_ror_epi32(val, R);
|
||||||
|
#elif defined(__XOP__)
|
||||||
|
return _mm_roti_epi32(val, 32-R);
|
||||||
#else
|
#else
|
||||||
return _mm_or_si128(
|
return _mm_or_si128(
|
||||||
_mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
|
_mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
|
||||||
|
|
@ -65,16 +73,24 @@ inline __m128i RotateRight32(const __m128i& val)
|
||||||
template <>
|
template <>
|
||||||
inline __m128i RotateLeft32<8>(const __m128i& val)
|
inline __m128i RotateLeft32<8>(const __m128i& val)
|
||||||
{
|
{
|
||||||
|
#if defined(__XOP__)
|
||||||
|
return _mm_roti_epi32(val, 8);
|
||||||
|
#else
|
||||||
const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
|
const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
|
||||||
return _mm_shuffle_epi8(val, mask);
|
return _mm_shuffle_epi8(val, mask);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Faster than two Shifts and an Or. Thanks to Louis Wingers and Bryan Weeks.
|
// Faster than two Shifts and an Or. Thanks to Louis Wingers and Bryan Weeks.
|
||||||
template <>
|
template <>
|
||||||
inline __m128i RotateRight32<8>(const __m128i& val)
|
inline __m128i RotateRight32<8>(const __m128i& val)
|
||||||
{
|
{
|
||||||
|
#if defined(__XOP__)
|
||||||
|
return _mm_roti_epi32(val, 32-8);
|
||||||
|
#else
|
||||||
const __m128i mask = _mm_set_epi8(12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1);
|
const __m128i mask = _mm_set_epi8(12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1);
|
||||||
return _mm_shuffle_epi8(val, mask);
|
return _mm_shuffle_epi8(val, mask);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \brief Unpack XMM words
|
/// \brief Unpack XMM words
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue