Add SSSE3 rotates when available
This change obtains the remaining 0.1 to 0.15 cpb. It should be engaged with -march=nativepull/730/head
parent
c43c47e590
commit
b4c4c5aa14
|
|
@ -9,7 +9,7 @@
|
||||||
// SSE2 implementation based on Botan's chacha_sse2.cpp. Many thanks
|
// SSE2 implementation based on Botan's chacha_sse2.cpp. Many thanks
|
||||||
// to Jack Lloyd and the Botan team for allowing us to use it.
|
// to Jack Lloyd and the Botan team for allowing us to use it.
|
||||||
//
|
//
|
||||||
// ARMv8 Power7 is upcoming.
|
// NEON and Power7 is upcoming.
|
||||||
|
|
||||||
#include "pch.h"
|
#include "pch.h"
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
@ -22,6 +22,10 @@
|
||||||
# include <emmintrin.h>
|
# include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if (CRYPTOPP_SSSE3_INTRIN_AVAILABLE || CRYPTOPP_SSSE3_ASM_AVAILABLE)
|
||||||
|
# include <tmmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
||||||
# include <arm_neon.h>
|
# include <arm_neon.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -46,6 +50,22 @@ inline __m128i RotateLeft(const __m128i val)
|
||||||
return _mm_or_si128(_mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
|
return _mm_or_si128(_mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __SSSE3__
|
||||||
|
template <>
|
||||||
|
inline __m128i RotateLeft<8>(const __m128i val)
|
||||||
|
{
|
||||||
|
const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
|
||||||
|
return _mm_shuffle_epi8(val, mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline __m128i RotateLeft<16>(const __m128i val)
|
||||||
|
{
|
||||||
|
const __m128i mask = _mm_set_epi8(13,12,15,14, 9,8,11,10, 5,4,7,6, 1,0,3,2);
|
||||||
|
return _mm_shuffle_epi8(val, mask);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
|
#endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
|
||||||
|
|
||||||
ANONYMOUS_NAMESPACE_END
|
ANONYMOUS_NAMESPACE_END
|
||||||
|
|
|
||||||
11
chacha.cpp
11
chacha.cpp
|
|
@ -33,10 +33,6 @@ std::string ChaCha_Policy::AlgorithmProvider() const
|
||||||
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE)
|
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE)
|
||||||
if (HasSSE2())
|
if (HasSSE2())
|
||||||
return "SSE2";
|
return "SSE2";
|
||||||
#endif
|
|
||||||
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
|
||||||
if (HasNEON())
|
|
||||||
return "NEON";
|
|
||||||
#endif
|
#endif
|
||||||
return "C++";
|
return "C++";
|
||||||
}
|
}
|
||||||
|
|
@ -95,11 +91,6 @@ unsigned int ChaCha_Policy::GetOptimalBlockSize() const
|
||||||
if (HasSSE2())
|
if (HasSSE2())
|
||||||
return 4*BYTES_PER_ITERATION;
|
return 4*BYTES_PER_ITERATION;
|
||||||
else
|
else
|
||||||
#endif
|
|
||||||
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
|
||||||
if (HasNEON())
|
|
||||||
return 4*BYTES_PER_ITERATION;
|
|
||||||
else
|
|
||||||
#endif
|
#endif
|
||||||
return BYTES_PER_ITERATION;
|
return BYTES_PER_ITERATION;
|
||||||
}
|
}
|
||||||
|
|
@ -122,7 +113,7 @@ void ChaCha_Policy::OperateKeystream(KeystreamOperation operation,
|
||||||
if (m_state[12] < 4)
|
if (m_state[12] < 4)
|
||||||
m_state[13]++;
|
m_state[13]++;
|
||||||
|
|
||||||
input += 4*BYTES_PER_ITERATION;
|
input += !!xorInput*4*BYTES_PER_ITERATION;
|
||||||
output += 4*BYTES_PER_ITERATION;
|
output += 4*BYTES_PER_ITERATION;
|
||||||
iterationCount -= 4;
|
iterationCount -= 4;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue