Add SSSE3 rotates when available

This change obtains the remaining 0.1 to 0.15 cpb. It should be engaged with -march=native
pull/730/head
Jeffrey Walton 2018-10-24 15:34:54 -04:00
parent c43c47e590
commit b4c4c5aa14
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
2 changed files with 22 additions and 11 deletions

View File

@ -9,7 +9,7 @@
// SSE2 implementation based on Botan's chacha_sse2.cpp. Many thanks // SSE2 implementation based on Botan's chacha_sse2.cpp. Many thanks
// to Jack Lloyd and the Botan team for allowing us to use it. // to Jack Lloyd and the Botan team for allowing us to use it.
// //
// ARMv8 Power7 is upcoming. // NEON and Power7 is upcoming.
#include "pch.h" #include "pch.h"
#include "config.h" #include "config.h"
@ -22,6 +22,10 @@
# include <emmintrin.h> # include <emmintrin.h>
#endif #endif
#if (CRYPTOPP_SSSE3_INTRIN_AVAILABLE || CRYPTOPP_SSSE3_ASM_AVAILABLE)
# include <tmmintrin.h>
#endif
#if (CRYPTOPP_ARM_NEON_AVAILABLE) #if (CRYPTOPP_ARM_NEON_AVAILABLE)
# include <arm_neon.h> # include <arm_neon.h>
#endif #endif
@ -46,6 +50,22 @@ inline __m128i RotateLeft(const __m128i val)
return _mm_or_si128(_mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R)); return _mm_or_si128(_mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
} }
#ifdef __SSSE3__
template <>
inline __m128i RotateLeft<8>(const __m128i val)
{
const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
return _mm_shuffle_epi8(val, mask);
}
template <>
inline __m128i RotateLeft<16>(const __m128i val)
{
const __m128i mask = _mm_set_epi8(13,12,15,14, 9,8,11,10, 5,4,7,6, 1,0,3,2);
return _mm_shuffle_epi8(val, mask);
}
#endif
#endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE #endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
ANONYMOUS_NAMESPACE_END ANONYMOUS_NAMESPACE_END

View File

@ -33,10 +33,6 @@ std::string ChaCha_Policy::AlgorithmProvider() const
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE) #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE)
if (HasSSE2()) if (HasSSE2())
return "SSE2"; return "SSE2";
#endif
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
if (HasNEON())
return "NEON";
#endif #endif
return "C++"; return "C++";
} }
@ -95,11 +91,6 @@ unsigned int ChaCha_Policy::GetOptimalBlockSize() const
if (HasSSE2()) if (HasSSE2())
return 4*BYTES_PER_ITERATION; return 4*BYTES_PER_ITERATION;
else else
#endif
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
if (HasNEON())
return 4*BYTES_PER_ITERATION;
else
#endif #endif
return BYTES_PER_ITERATION; return BYTES_PER_ITERATION;
} }
@ -122,7 +113,7 @@ void ChaCha_Policy::OperateKeystream(KeystreamOperation operation,
if (m_state[12] < 4) if (m_state[12] < 4)
m_state[13]++; m_state[13]++;
input += 4*BYTES_PER_ITERATION; input += !!xorInput*4*BYTES_PER_ITERATION;
output += 4*BYTES_PER_ITERATION; output += 4*BYTES_PER_ITERATION;
iterationCount -= 4; iterationCount -= 4;
} }