diff --git a/chacha-simd.cpp b/chacha-simd.cpp index ce409173..21782164 100644 --- a/chacha-simd.cpp +++ b/chacha-simd.cpp @@ -26,6 +26,10 @@ # include #endif +#ifdef __XOP__ +# include +#endif + #if (CRYPTOPP_ARM_NEON_AVAILABLE) # include #endif @@ -47,24 +51,36 @@ ANONYMOUS_NAMESPACE_BEGIN template inline __m128i RotateLeft(const __m128i val) { +#ifdef __XOP__ + return _mm_roti_epi32(val, R); +#else return _mm_or_si128(_mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R)); +#endif } -#ifdef __SSSE3__ +#if defined(__SSSE3__) template <> inline __m128i RotateLeft<8>(const __m128i val) { +#ifdef __XOP__ + return _mm_roti_epi32(val, R); +#else const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3); return _mm_shuffle_epi8(val, mask); +#endif } template <> inline __m128i RotateLeft<16>(const __m128i val) { +#ifdef __XOP__ + return _mm_roti_epi32(val, R); +#else const __m128i mask = _mm_set_epi8(13,12,15,14, 9,8,11,10, 5,4,7,6, 1,0,3,2); return _mm_shuffle_epi8(val, mask); -} #endif +} +#endif // SSE3 #endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE