diff --git a/chacha-simd.cpp b/chacha-simd.cpp index c0ad8245..ce409173 100644 --- a/chacha-simd.cpp +++ b/chacha-simd.cpp @@ -9,7 +9,7 @@ // SSE2 implementation based on Botan's chacha_sse2.cpp. Many thanks // to Jack Lloyd and the Botan team for allowing us to use it. // -// ARMv8 Power7 is upcoming. +// NEON and Power7 is upcoming. #include "pch.h" #include "config.h" @@ -22,6 +22,10 @@ # include #endif +#if (CRYPTOPP_SSSE3_INTRIN_AVAILABLE || CRYPTOPP_SSSE3_ASM_AVAILABLE) +# include +#endif + #if (CRYPTOPP_ARM_NEON_AVAILABLE) # include #endif @@ -46,6 +50,22 @@ inline __m128i RotateLeft(const __m128i val) return _mm_or_si128(_mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R)); } +#ifdef __SSSE3__ +template <> +inline __m128i RotateLeft<8>(const __m128i val) +{ + const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3); + return _mm_shuffle_epi8(val, mask); +} + +template <> +inline __m128i RotateLeft<16>(const __m128i val) +{ + const __m128i mask = _mm_set_epi8(13,12,15,14, 9,8,11,10, 5,4,7,6, 1,0,3,2); + return _mm_shuffle_epi8(val, mask); +} +#endif + #endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE ANONYMOUS_NAMESPACE_END diff --git a/chacha.cpp b/chacha.cpp index 080ccbfd..23fd3be8 100644 --- a/chacha.cpp +++ b/chacha.cpp @@ -33,10 +33,6 @@ std::string ChaCha_Policy::AlgorithmProvider() const #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE) if (HasSSE2()) return "SSE2"; -#endif -#if (CRYPTOPP_ARM_NEON_AVAILABLE) - if (HasNEON()) - return "NEON"; #endif return "C++"; } @@ -95,11 +91,6 @@ unsigned int ChaCha_Policy::GetOptimalBlockSize() const if (HasSSE2()) return 4*BYTES_PER_ITERATION; else -#endif -#if (CRYPTOPP_ARM_NEON_AVAILABLE) - if (HasNEON()) - return 4*BYTES_PER_ITERATION; - else #endif return BYTES_PER_ITERATION; } @@ -122,7 +113,7 @@ void ChaCha_Policy::OperateKeystream(KeystreamOperation operation, if (m_state[12] < 4) m_state[13]++; - input += 4*BYTES_PER_ITERATION; + input += !!xorInput*4*BYTES_PER_ITERATION; output += 4*BYTES_PER_ITERATION; iterationCount -= 4; }