Whitespace check-in
parent
b1050636a6
commit
b4b3623938
434
chacha-simd.cpp
434
chacha-simd.cpp
|
|
@ -118,9 +118,9 @@ template <unsigned int R>
|
||||||
inline __m128i RotateLeft(const __m128i val)
|
inline __m128i RotateLeft(const __m128i val)
|
||||||
{
|
{
|
||||||
#ifdef __XOP__
|
#ifdef __XOP__
|
||||||
return _mm_roti_epi32(val, R);
|
return _mm_roti_epi32(val, R);
|
||||||
#else
|
#else
|
||||||
return _mm_or_si128(_mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
|
return _mm_or_si128(_mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -129,10 +129,10 @@ template <>
|
||||||
inline __m128i RotateLeft<8>(const __m128i val)
|
inline __m128i RotateLeft<8>(const __m128i val)
|
||||||
{
|
{
|
||||||
#ifdef __XOP__
|
#ifdef __XOP__
|
||||||
return _mm_roti_epi32(val, 8);
|
return _mm_roti_epi32(val, 8);
|
||||||
#else
|
#else
|
||||||
const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
|
const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
|
||||||
return _mm_shuffle_epi8(val, mask);
|
return _mm_shuffle_epi8(val, mask);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -140,10 +140,10 @@ template <>
|
||||||
inline __m128i RotateLeft<16>(const __m128i val)
|
inline __m128i RotateLeft<16>(const __m128i val)
|
||||||
{
|
{
|
||||||
#ifdef __XOP__
|
#ifdef __XOP__
|
||||||
return _mm_roti_epi32(val, 16);
|
return _mm_roti_epi32(val, 16);
|
||||||
#else
|
#else
|
||||||
const __m128i mask = _mm_set_epi8(13,12,15,14, 9,8,11,10, 5,4,7,6, 1,0,3,2);
|
const __m128i mask = _mm_set_epi8(13,12,15,14, 9,8,11,10, 5,4,7,6, 1,0,3,2);
|
||||||
return _mm_shuffle_epi8(val, mask);
|
return _mm_shuffle_epi8(val, mask);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif // SSE3
|
#endif // SSE3
|
||||||
|
|
@ -430,264 +430,264 @@ void ChaCha_OperateKeystream_NEON(const word32 *state, const byte* input, byte *
|
||||||
|
|
||||||
void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *output, unsigned int rounds, bool xorInput)
|
void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *output, unsigned int rounds, bool xorInput)
|
||||||
{
|
{
|
||||||
const __m128i* state_mm = reinterpret_cast<const __m128i*>(state);
|
const __m128i* state_mm = reinterpret_cast<const __m128i*>(state);
|
||||||
const __m128i* input_mm = reinterpret_cast<const __m128i*>(input);
|
const __m128i* input_mm = reinterpret_cast<const __m128i*>(input);
|
||||||
__m128i* output_mm = reinterpret_cast<__m128i*>(output);
|
__m128i* output_mm = reinterpret_cast<__m128i*>(output);
|
||||||
|
|
||||||
const __m128i state0 = _mm_load_si128(state_mm + 0);
|
const __m128i state0 = _mm_load_si128(state_mm + 0);
|
||||||
const __m128i state1 = _mm_load_si128(state_mm + 1);
|
const __m128i state1 = _mm_load_si128(state_mm + 1);
|
||||||
const __m128i state2 = _mm_load_si128(state_mm + 2);
|
const __m128i state2 = _mm_load_si128(state_mm + 2);
|
||||||
const __m128i state3 = _mm_load_si128(state_mm + 3);
|
const __m128i state3 = _mm_load_si128(state_mm + 3);
|
||||||
|
|
||||||
__m128i r0_0 = state0;
|
__m128i r0_0 = state0;
|
||||||
__m128i r0_1 = state1;
|
__m128i r0_1 = state1;
|
||||||
__m128i r0_2 = state2;
|
__m128i r0_2 = state2;
|
||||||
__m128i r0_3 = state3;
|
__m128i r0_3 = state3;
|
||||||
|
|
||||||
__m128i r1_0 = state0;
|
__m128i r1_0 = state0;
|
||||||
__m128i r1_1 = state1;
|
__m128i r1_1 = state1;
|
||||||
__m128i r1_2 = state2;
|
__m128i r1_2 = state2;
|
||||||
__m128i r1_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 1));
|
__m128i r1_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 1));
|
||||||
|
|
||||||
__m128i r2_0 = state0;
|
__m128i r2_0 = state0;
|
||||||
__m128i r2_1 = state1;
|
__m128i r2_1 = state1;
|
||||||
__m128i r2_2 = state2;
|
__m128i r2_2 = state2;
|
||||||
__m128i r2_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 2));
|
__m128i r2_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 2));
|
||||||
|
|
||||||
__m128i r3_0 = state0;
|
__m128i r3_0 = state0;
|
||||||
__m128i r3_1 = state1;
|
__m128i r3_1 = state1;
|
||||||
__m128i r3_2 = state2;
|
__m128i r3_2 = state2;
|
||||||
__m128i r3_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 3));
|
__m128i r3_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 3));
|
||||||
|
|
||||||
for (int i = static_cast<int>(rounds); i > 0; i -= 2)
|
for (int i = static_cast<int>(rounds); i > 0; i -= 2)
|
||||||
{
|
{
|
||||||
r0_0 = _mm_add_epi32(r0_0, r0_1);
|
r0_0 = _mm_add_epi32(r0_0, r0_1);
|
||||||
r1_0 = _mm_add_epi32(r1_0, r1_1);
|
r1_0 = _mm_add_epi32(r1_0, r1_1);
|
||||||
r2_0 = _mm_add_epi32(r2_0, r2_1);
|
r2_0 = _mm_add_epi32(r2_0, r2_1);
|
||||||
r3_0 = _mm_add_epi32(r3_0, r3_1);
|
r3_0 = _mm_add_epi32(r3_0, r3_1);
|
||||||
|
|
||||||
r0_3 = _mm_xor_si128(r0_3, r0_0);
|
r0_3 = _mm_xor_si128(r0_3, r0_0);
|
||||||
r1_3 = _mm_xor_si128(r1_3, r1_0);
|
r1_3 = _mm_xor_si128(r1_3, r1_0);
|
||||||
r2_3 = _mm_xor_si128(r2_3, r2_0);
|
r2_3 = _mm_xor_si128(r2_3, r2_0);
|
||||||
r3_3 = _mm_xor_si128(r3_3, r3_0);
|
r3_3 = _mm_xor_si128(r3_3, r3_0);
|
||||||
|
|
||||||
r0_3 = RotateLeft<16>(r0_3);
|
r0_3 = RotateLeft<16>(r0_3);
|
||||||
r1_3 = RotateLeft<16>(r1_3);
|
r1_3 = RotateLeft<16>(r1_3);
|
||||||
r2_3 = RotateLeft<16>(r2_3);
|
r2_3 = RotateLeft<16>(r2_3);
|
||||||
r3_3 = RotateLeft<16>(r3_3);
|
r3_3 = RotateLeft<16>(r3_3);
|
||||||
|
|
||||||
r0_2 = _mm_add_epi32(r0_2, r0_3);
|
r0_2 = _mm_add_epi32(r0_2, r0_3);
|
||||||
r1_2 = _mm_add_epi32(r1_2, r1_3);
|
r1_2 = _mm_add_epi32(r1_2, r1_3);
|
||||||
r2_2 = _mm_add_epi32(r2_2, r2_3);
|
r2_2 = _mm_add_epi32(r2_2, r2_3);
|
||||||
r3_2 = _mm_add_epi32(r3_2, r3_3);
|
r3_2 = _mm_add_epi32(r3_2, r3_3);
|
||||||
|
|
||||||
r0_1 = _mm_xor_si128(r0_1, r0_2);
|
r0_1 = _mm_xor_si128(r0_1, r0_2);
|
||||||
r1_1 = _mm_xor_si128(r1_1, r1_2);
|
r1_1 = _mm_xor_si128(r1_1, r1_2);
|
||||||
r2_1 = _mm_xor_si128(r2_1, r2_2);
|
r2_1 = _mm_xor_si128(r2_1, r2_2);
|
||||||
r3_1 = _mm_xor_si128(r3_1, r3_2);
|
r3_1 = _mm_xor_si128(r3_1, r3_2);
|
||||||
|
|
||||||
r0_1 = RotateLeft<12>(r0_1);
|
r0_1 = RotateLeft<12>(r0_1);
|
||||||
r1_1 = RotateLeft<12>(r1_1);
|
r1_1 = RotateLeft<12>(r1_1);
|
||||||
r2_1 = RotateLeft<12>(r2_1);
|
r2_1 = RotateLeft<12>(r2_1);
|
||||||
r3_1 = RotateLeft<12>(r3_1);
|
r3_1 = RotateLeft<12>(r3_1);
|
||||||
|
|
||||||
r0_0 = _mm_add_epi32(r0_0, r0_1);
|
r0_0 = _mm_add_epi32(r0_0, r0_1);
|
||||||
r1_0 = _mm_add_epi32(r1_0, r1_1);
|
r1_0 = _mm_add_epi32(r1_0, r1_1);
|
||||||
r2_0 = _mm_add_epi32(r2_0, r2_1);
|
r2_0 = _mm_add_epi32(r2_0, r2_1);
|
||||||
r3_0 = _mm_add_epi32(r3_0, r3_1);
|
r3_0 = _mm_add_epi32(r3_0, r3_1);
|
||||||
|
|
||||||
r0_3 = _mm_xor_si128(r0_3, r0_0);
|
r0_3 = _mm_xor_si128(r0_3, r0_0);
|
||||||
r1_3 = _mm_xor_si128(r1_3, r1_0);
|
r1_3 = _mm_xor_si128(r1_3, r1_0);
|
||||||
r2_3 = _mm_xor_si128(r2_3, r2_0);
|
r2_3 = _mm_xor_si128(r2_3, r2_0);
|
||||||
r3_3 = _mm_xor_si128(r3_3, r3_0);
|
r3_3 = _mm_xor_si128(r3_3, r3_0);
|
||||||
|
|
||||||
r0_3 = RotateLeft<8>(r0_3);
|
r0_3 = RotateLeft<8>(r0_3);
|
||||||
r1_3 = RotateLeft<8>(r1_3);
|
r1_3 = RotateLeft<8>(r1_3);
|
||||||
r2_3 = RotateLeft<8>(r2_3);
|
r2_3 = RotateLeft<8>(r2_3);
|
||||||
r3_3 = RotateLeft<8>(r3_3);
|
r3_3 = RotateLeft<8>(r3_3);
|
||||||
|
|
||||||
r0_2 = _mm_add_epi32(r0_2, r0_3);
|
r0_2 = _mm_add_epi32(r0_2, r0_3);
|
||||||
r1_2 = _mm_add_epi32(r1_2, r1_3);
|
r1_2 = _mm_add_epi32(r1_2, r1_3);
|
||||||
r2_2 = _mm_add_epi32(r2_2, r2_3);
|
r2_2 = _mm_add_epi32(r2_2, r2_3);
|
||||||
r3_2 = _mm_add_epi32(r3_2, r3_3);
|
r3_2 = _mm_add_epi32(r3_2, r3_3);
|
||||||
|
|
||||||
r0_1 = _mm_xor_si128(r0_1, r0_2);
|
r0_1 = _mm_xor_si128(r0_1, r0_2);
|
||||||
r1_1 = _mm_xor_si128(r1_1, r1_2);
|
r1_1 = _mm_xor_si128(r1_1, r1_2);
|
||||||
r2_1 = _mm_xor_si128(r2_1, r2_2);
|
r2_1 = _mm_xor_si128(r2_1, r2_2);
|
||||||
r3_1 = _mm_xor_si128(r3_1, r3_2);
|
r3_1 = _mm_xor_si128(r3_1, r3_2);
|
||||||
|
|
||||||
r0_1 = RotateLeft<7>(r0_1);
|
r0_1 = RotateLeft<7>(r0_1);
|
||||||
r1_1 = RotateLeft<7>(r1_1);
|
r1_1 = RotateLeft<7>(r1_1);
|
||||||
r2_1 = RotateLeft<7>(r2_1);
|
r2_1 = RotateLeft<7>(r2_1);
|
||||||
r3_1 = RotateLeft<7>(r3_1);
|
r3_1 = RotateLeft<7>(r3_1);
|
||||||
|
|
||||||
r0_1 = _mm_shuffle_epi32(r0_1, _MM_SHUFFLE(0, 3, 2, 1));
|
r0_1 = _mm_shuffle_epi32(r0_1, _MM_SHUFFLE(0, 3, 2, 1));
|
||||||
r0_2 = _mm_shuffle_epi32(r0_2, _MM_SHUFFLE(1, 0, 3, 2));
|
r0_2 = _mm_shuffle_epi32(r0_2, _MM_SHUFFLE(1, 0, 3, 2));
|
||||||
r0_3 = _mm_shuffle_epi32(r0_3, _MM_SHUFFLE(2, 1, 0, 3));
|
r0_3 = _mm_shuffle_epi32(r0_3, _MM_SHUFFLE(2, 1, 0, 3));
|
||||||
|
|
||||||
r1_1 = _mm_shuffle_epi32(r1_1, _MM_SHUFFLE(0, 3, 2, 1));
|
r1_1 = _mm_shuffle_epi32(r1_1, _MM_SHUFFLE(0, 3, 2, 1));
|
||||||
r1_2 = _mm_shuffle_epi32(r1_2, _MM_SHUFFLE(1, 0, 3, 2));
|
r1_2 = _mm_shuffle_epi32(r1_2, _MM_SHUFFLE(1, 0, 3, 2));
|
||||||
r1_3 = _mm_shuffle_epi32(r1_3, _MM_SHUFFLE(2, 1, 0, 3));
|
r1_3 = _mm_shuffle_epi32(r1_3, _MM_SHUFFLE(2, 1, 0, 3));
|
||||||
|
|
||||||
r2_1 = _mm_shuffle_epi32(r2_1, _MM_SHUFFLE(0, 3, 2, 1));
|
r2_1 = _mm_shuffle_epi32(r2_1, _MM_SHUFFLE(0, 3, 2, 1));
|
||||||
r2_2 = _mm_shuffle_epi32(r2_2, _MM_SHUFFLE(1, 0, 3, 2));
|
r2_2 = _mm_shuffle_epi32(r2_2, _MM_SHUFFLE(1, 0, 3, 2));
|
||||||
r2_3 = _mm_shuffle_epi32(r2_3, _MM_SHUFFLE(2, 1, 0, 3));
|
r2_3 = _mm_shuffle_epi32(r2_3, _MM_SHUFFLE(2, 1, 0, 3));
|
||||||
|
|
||||||
r3_1 = _mm_shuffle_epi32(r3_1, _MM_SHUFFLE(0, 3, 2, 1));
|
r3_1 = _mm_shuffle_epi32(r3_1, _MM_SHUFFLE(0, 3, 2, 1));
|
||||||
r3_2 = _mm_shuffle_epi32(r3_2, _MM_SHUFFLE(1, 0, 3, 2));
|
r3_2 = _mm_shuffle_epi32(r3_2, _MM_SHUFFLE(1, 0, 3, 2));
|
||||||
r3_3 = _mm_shuffle_epi32(r3_3, _MM_SHUFFLE(2, 1, 0, 3));
|
r3_3 = _mm_shuffle_epi32(r3_3, _MM_SHUFFLE(2, 1, 0, 3));
|
||||||
|
|
||||||
r0_0 = _mm_add_epi32(r0_0, r0_1);
|
r0_0 = _mm_add_epi32(r0_0, r0_1);
|
||||||
r1_0 = _mm_add_epi32(r1_0, r1_1);
|
r1_0 = _mm_add_epi32(r1_0, r1_1);
|
||||||
r2_0 = _mm_add_epi32(r2_0, r2_1);
|
r2_0 = _mm_add_epi32(r2_0, r2_1);
|
||||||
r3_0 = _mm_add_epi32(r3_0, r3_1);
|
r3_0 = _mm_add_epi32(r3_0, r3_1);
|
||||||
|
|
||||||
r0_3 = _mm_xor_si128(r0_3, r0_0);
|
r0_3 = _mm_xor_si128(r0_3, r0_0);
|
||||||
r1_3 = _mm_xor_si128(r1_3, r1_0);
|
r1_3 = _mm_xor_si128(r1_3, r1_0);
|
||||||
r2_3 = _mm_xor_si128(r2_3, r2_0);
|
r2_3 = _mm_xor_si128(r2_3, r2_0);
|
||||||
r3_3 = _mm_xor_si128(r3_3, r3_0);
|
r3_3 = _mm_xor_si128(r3_3, r3_0);
|
||||||
|
|
||||||
r0_3 = RotateLeft<16>(r0_3);
|
r0_3 = RotateLeft<16>(r0_3);
|
||||||
r1_3 = RotateLeft<16>(r1_3);
|
r1_3 = RotateLeft<16>(r1_3);
|
||||||
r2_3 = RotateLeft<16>(r2_3);
|
r2_3 = RotateLeft<16>(r2_3);
|
||||||
r3_3 = RotateLeft<16>(r3_3);
|
r3_3 = RotateLeft<16>(r3_3);
|
||||||
|
|
||||||
r0_2 = _mm_add_epi32(r0_2, r0_3);
|
r0_2 = _mm_add_epi32(r0_2, r0_3);
|
||||||
r1_2 = _mm_add_epi32(r1_2, r1_3);
|
r1_2 = _mm_add_epi32(r1_2, r1_3);
|
||||||
r2_2 = _mm_add_epi32(r2_2, r2_3);
|
r2_2 = _mm_add_epi32(r2_2, r2_3);
|
||||||
r3_2 = _mm_add_epi32(r3_2, r3_3);
|
r3_2 = _mm_add_epi32(r3_2, r3_3);
|
||||||
|
|
||||||
r0_1 = _mm_xor_si128(r0_1, r0_2);
|
r0_1 = _mm_xor_si128(r0_1, r0_2);
|
||||||
r1_1 = _mm_xor_si128(r1_1, r1_2);
|
r1_1 = _mm_xor_si128(r1_1, r1_2);
|
||||||
r2_1 = _mm_xor_si128(r2_1, r2_2);
|
r2_1 = _mm_xor_si128(r2_1, r2_2);
|
||||||
r3_1 = _mm_xor_si128(r3_1, r3_2);
|
r3_1 = _mm_xor_si128(r3_1, r3_2);
|
||||||
|
|
||||||
r0_1 = RotateLeft<12>(r0_1);
|
r0_1 = RotateLeft<12>(r0_1);
|
||||||
r1_1 = RotateLeft<12>(r1_1);
|
r1_1 = RotateLeft<12>(r1_1);
|
||||||
r2_1 = RotateLeft<12>(r2_1);
|
r2_1 = RotateLeft<12>(r2_1);
|
||||||
r3_1 = RotateLeft<12>(r3_1);
|
r3_1 = RotateLeft<12>(r3_1);
|
||||||
|
|
||||||
r0_0 = _mm_add_epi32(r0_0, r0_1);
|
r0_0 = _mm_add_epi32(r0_0, r0_1);
|
||||||
r1_0 = _mm_add_epi32(r1_0, r1_1);
|
r1_0 = _mm_add_epi32(r1_0, r1_1);
|
||||||
r2_0 = _mm_add_epi32(r2_0, r2_1);
|
r2_0 = _mm_add_epi32(r2_0, r2_1);
|
||||||
r3_0 = _mm_add_epi32(r3_0, r3_1);
|
r3_0 = _mm_add_epi32(r3_0, r3_1);
|
||||||
|
|
||||||
r0_3 = _mm_xor_si128(r0_3, r0_0);
|
r0_3 = _mm_xor_si128(r0_3, r0_0);
|
||||||
r1_3 = _mm_xor_si128(r1_3, r1_0);
|
r1_3 = _mm_xor_si128(r1_3, r1_0);
|
||||||
r2_3 = _mm_xor_si128(r2_3, r2_0);
|
r2_3 = _mm_xor_si128(r2_3, r2_0);
|
||||||
r3_3 = _mm_xor_si128(r3_3, r3_0);
|
r3_3 = _mm_xor_si128(r3_3, r3_0);
|
||||||
|
|
||||||
r0_3 = RotateLeft<8>(r0_3);
|
r0_3 = RotateLeft<8>(r0_3);
|
||||||
r1_3 = RotateLeft<8>(r1_3);
|
r1_3 = RotateLeft<8>(r1_3);
|
||||||
r2_3 = RotateLeft<8>(r2_3);
|
r2_3 = RotateLeft<8>(r2_3);
|
||||||
r3_3 = RotateLeft<8>(r3_3);
|
r3_3 = RotateLeft<8>(r3_3);
|
||||||
|
|
||||||
r0_2 = _mm_add_epi32(r0_2, r0_3);
|
r0_2 = _mm_add_epi32(r0_2, r0_3);
|
||||||
r1_2 = _mm_add_epi32(r1_2, r1_3);
|
r1_2 = _mm_add_epi32(r1_2, r1_3);
|
||||||
r2_2 = _mm_add_epi32(r2_2, r2_3);
|
r2_2 = _mm_add_epi32(r2_2, r2_3);
|
||||||
r3_2 = _mm_add_epi32(r3_2, r3_3);
|
r3_2 = _mm_add_epi32(r3_2, r3_3);
|
||||||
|
|
||||||
r0_1 = _mm_xor_si128(r0_1, r0_2);
|
r0_1 = _mm_xor_si128(r0_1, r0_2);
|
||||||
r1_1 = _mm_xor_si128(r1_1, r1_2);
|
r1_1 = _mm_xor_si128(r1_1, r1_2);
|
||||||
r2_1 = _mm_xor_si128(r2_1, r2_2);
|
r2_1 = _mm_xor_si128(r2_1, r2_2);
|
||||||
r3_1 = _mm_xor_si128(r3_1, r3_2);
|
r3_1 = _mm_xor_si128(r3_1, r3_2);
|
||||||
|
|
||||||
r0_1 = RotateLeft<7>(r0_1);
|
r0_1 = RotateLeft<7>(r0_1);
|
||||||
r1_1 = RotateLeft<7>(r1_1);
|
r1_1 = RotateLeft<7>(r1_1);
|
||||||
r2_1 = RotateLeft<7>(r2_1);
|
r2_1 = RotateLeft<7>(r2_1);
|
||||||
r3_1 = RotateLeft<7>(r3_1);
|
r3_1 = RotateLeft<7>(r3_1);
|
||||||
|
|
||||||
r0_1 = _mm_shuffle_epi32(r0_1, _MM_SHUFFLE(2, 1, 0, 3));
|
r0_1 = _mm_shuffle_epi32(r0_1, _MM_SHUFFLE(2, 1, 0, 3));
|
||||||
r0_2 = _mm_shuffle_epi32(r0_2, _MM_SHUFFLE(1, 0, 3, 2));
|
r0_2 = _mm_shuffle_epi32(r0_2, _MM_SHUFFLE(1, 0, 3, 2));
|
||||||
r0_3 = _mm_shuffle_epi32(r0_3, _MM_SHUFFLE(0, 3, 2, 1));
|
r0_3 = _mm_shuffle_epi32(r0_3, _MM_SHUFFLE(0, 3, 2, 1));
|
||||||
|
|
||||||
r1_1 = _mm_shuffle_epi32(r1_1, _MM_SHUFFLE(2, 1, 0, 3));
|
r1_1 = _mm_shuffle_epi32(r1_1, _MM_SHUFFLE(2, 1, 0, 3));
|
||||||
r1_2 = _mm_shuffle_epi32(r1_2, _MM_SHUFFLE(1, 0, 3, 2));
|
r1_2 = _mm_shuffle_epi32(r1_2, _MM_SHUFFLE(1, 0, 3, 2));
|
||||||
r1_3 = _mm_shuffle_epi32(r1_3, _MM_SHUFFLE(0, 3, 2, 1));
|
r1_3 = _mm_shuffle_epi32(r1_3, _MM_SHUFFLE(0, 3, 2, 1));
|
||||||
|
|
||||||
r2_1 = _mm_shuffle_epi32(r2_1, _MM_SHUFFLE(2, 1, 0, 3));
|
r2_1 = _mm_shuffle_epi32(r2_1, _MM_SHUFFLE(2, 1, 0, 3));
|
||||||
r2_2 = _mm_shuffle_epi32(r2_2, _MM_SHUFFLE(1, 0, 3, 2));
|
r2_2 = _mm_shuffle_epi32(r2_2, _MM_SHUFFLE(1, 0, 3, 2));
|
||||||
r2_3 = _mm_shuffle_epi32(r2_3, _MM_SHUFFLE(0, 3, 2, 1));
|
r2_3 = _mm_shuffle_epi32(r2_3, _MM_SHUFFLE(0, 3, 2, 1));
|
||||||
|
|
||||||
r3_1 = _mm_shuffle_epi32(r3_1, _MM_SHUFFLE(2, 1, 0, 3));
|
r3_1 = _mm_shuffle_epi32(r3_1, _MM_SHUFFLE(2, 1, 0, 3));
|
||||||
r3_2 = _mm_shuffle_epi32(r3_2, _MM_SHUFFLE(1, 0, 3, 2));
|
r3_2 = _mm_shuffle_epi32(r3_2, _MM_SHUFFLE(1, 0, 3, 2));
|
||||||
r3_3 = _mm_shuffle_epi32(r3_3, _MM_SHUFFLE(0, 3, 2, 1));
|
r3_3 = _mm_shuffle_epi32(r3_3, _MM_SHUFFLE(0, 3, 2, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
r0_0 = _mm_add_epi32(r0_0, state0);
|
r0_0 = _mm_add_epi32(r0_0, state0);
|
||||||
r0_1 = _mm_add_epi32(r0_1, state1);
|
r0_1 = _mm_add_epi32(r0_1, state1);
|
||||||
r0_2 = _mm_add_epi32(r0_2, state2);
|
r0_2 = _mm_add_epi32(r0_2, state2);
|
||||||
r0_3 = _mm_add_epi32(r0_3, state3);
|
r0_3 = _mm_add_epi32(r0_3, state3);
|
||||||
|
|
||||||
r1_0 = _mm_add_epi32(r1_0, state0);
|
r1_0 = _mm_add_epi32(r1_0, state0);
|
||||||
r1_1 = _mm_add_epi32(r1_1, state1);
|
r1_1 = _mm_add_epi32(r1_1, state1);
|
||||||
r1_2 = _mm_add_epi32(r1_2, state2);
|
r1_2 = _mm_add_epi32(r1_2, state2);
|
||||||
r1_3 = _mm_add_epi32(r1_3, state3);
|
r1_3 = _mm_add_epi32(r1_3, state3);
|
||||||
r1_3 = _mm_add_epi64(r1_3, _mm_set_epi32(0, 0, 0, 1));
|
r1_3 = _mm_add_epi64(r1_3, _mm_set_epi32(0, 0, 0, 1));
|
||||||
|
|
||||||
r2_0 = _mm_add_epi32(r2_0, state0);
|
r2_0 = _mm_add_epi32(r2_0, state0);
|
||||||
r2_1 = _mm_add_epi32(r2_1, state1);
|
r2_1 = _mm_add_epi32(r2_1, state1);
|
||||||
r2_2 = _mm_add_epi32(r2_2, state2);
|
r2_2 = _mm_add_epi32(r2_2, state2);
|
||||||
r2_3 = _mm_add_epi32(r2_3, state3);
|
r2_3 = _mm_add_epi32(r2_3, state3);
|
||||||
r2_3 = _mm_add_epi64(r2_3, _mm_set_epi32(0, 0, 0, 2));
|
r2_3 = _mm_add_epi64(r2_3, _mm_set_epi32(0, 0, 0, 2));
|
||||||
|
|
||||||
r3_0 = _mm_add_epi32(r3_0, state0);
|
r3_0 = _mm_add_epi32(r3_0, state0);
|
||||||
r3_1 = _mm_add_epi32(r3_1, state1);
|
r3_1 = _mm_add_epi32(r3_1, state1);
|
||||||
r3_2 = _mm_add_epi32(r3_2, state2);
|
r3_2 = _mm_add_epi32(r3_2, state2);
|
||||||
r3_3 = _mm_add_epi32(r3_3, state3);
|
r3_3 = _mm_add_epi32(r3_3, state3);
|
||||||
r3_3 = _mm_add_epi64(r3_3, _mm_set_epi32(0, 0, 0, 3));
|
r3_3 = _mm_add_epi64(r3_3, _mm_set_epi32(0, 0, 0, 3));
|
||||||
|
|
||||||
if (xorInput)
|
if (xorInput)
|
||||||
{
|
{
|
||||||
r0_0 = _mm_xor_si128(_mm_loadu_si128(input_mm + 0), r0_0);
|
r0_0 = _mm_xor_si128(_mm_loadu_si128(input_mm + 0), r0_0);
|
||||||
r0_1 = _mm_xor_si128(_mm_loadu_si128(input_mm + 1), r0_1);
|
r0_1 = _mm_xor_si128(_mm_loadu_si128(input_mm + 1), r0_1);
|
||||||
r0_2 = _mm_xor_si128(_mm_loadu_si128(input_mm + 2), r0_2);
|
r0_2 = _mm_xor_si128(_mm_loadu_si128(input_mm + 2), r0_2);
|
||||||
r0_3 = _mm_xor_si128(_mm_loadu_si128(input_mm + 3), r0_3);
|
r0_3 = _mm_xor_si128(_mm_loadu_si128(input_mm + 3), r0_3);
|
||||||
}
|
}
|
||||||
|
|
||||||
_mm_storeu_si128(output_mm + 0, r0_0);
|
_mm_storeu_si128(output_mm + 0, r0_0);
|
||||||
_mm_storeu_si128(output_mm + 1, r0_1);
|
_mm_storeu_si128(output_mm + 1, r0_1);
|
||||||
_mm_storeu_si128(output_mm + 2, r0_2);
|
_mm_storeu_si128(output_mm + 2, r0_2);
|
||||||
_mm_storeu_si128(output_mm + 3, r0_3);
|
_mm_storeu_si128(output_mm + 3, r0_3);
|
||||||
|
|
||||||
if (xorInput)
|
if (xorInput)
|
||||||
{
|
{
|
||||||
r1_0 = _mm_xor_si128(_mm_loadu_si128(input_mm + 4), r1_0);
|
r1_0 = _mm_xor_si128(_mm_loadu_si128(input_mm + 4), r1_0);
|
||||||
r1_1 = _mm_xor_si128(_mm_loadu_si128(input_mm + 5), r1_1);
|
r1_1 = _mm_xor_si128(_mm_loadu_si128(input_mm + 5), r1_1);
|
||||||
r1_2 = _mm_xor_si128(_mm_loadu_si128(input_mm + 6), r1_2);
|
r1_2 = _mm_xor_si128(_mm_loadu_si128(input_mm + 6), r1_2);
|
||||||
r1_3 = _mm_xor_si128(_mm_loadu_si128(input_mm + 7), r1_3);
|
r1_3 = _mm_xor_si128(_mm_loadu_si128(input_mm + 7), r1_3);
|
||||||
}
|
}
|
||||||
|
|
||||||
_mm_storeu_si128(output_mm + 4, r1_0);
|
_mm_storeu_si128(output_mm + 4, r1_0);
|
||||||
_mm_storeu_si128(output_mm + 5, r1_1);
|
_mm_storeu_si128(output_mm + 5, r1_1);
|
||||||
_mm_storeu_si128(output_mm + 6, r1_2);
|
_mm_storeu_si128(output_mm + 6, r1_2);
|
||||||
_mm_storeu_si128(output_mm + 7, r1_3);
|
_mm_storeu_si128(output_mm + 7, r1_3);
|
||||||
|
|
||||||
if (xorInput)
|
if (xorInput)
|
||||||
{
|
{
|
||||||
r2_0 = _mm_xor_si128(_mm_loadu_si128(input_mm + 8), r2_0);
|
r2_0 = _mm_xor_si128(_mm_loadu_si128(input_mm + 8), r2_0);
|
||||||
r2_1 = _mm_xor_si128(_mm_loadu_si128(input_mm + 9), r2_1);
|
r2_1 = _mm_xor_si128(_mm_loadu_si128(input_mm + 9), r2_1);
|
||||||
r2_2 = _mm_xor_si128(_mm_loadu_si128(input_mm + 10), r2_2);
|
r2_2 = _mm_xor_si128(_mm_loadu_si128(input_mm + 10), r2_2);
|
||||||
r2_3 = _mm_xor_si128(_mm_loadu_si128(input_mm + 11), r2_3);
|
r2_3 = _mm_xor_si128(_mm_loadu_si128(input_mm + 11), r2_3);
|
||||||
}
|
}
|
||||||
|
|
||||||
_mm_storeu_si128(output_mm + 8, r2_0);
|
_mm_storeu_si128(output_mm + 8, r2_0);
|
||||||
_mm_storeu_si128(output_mm + 9, r2_1);
|
_mm_storeu_si128(output_mm + 9, r2_1);
|
||||||
_mm_storeu_si128(output_mm + 10, r2_2);
|
_mm_storeu_si128(output_mm + 10, r2_2);
|
||||||
_mm_storeu_si128(output_mm + 11, r2_3);
|
_mm_storeu_si128(output_mm + 11, r2_3);
|
||||||
|
|
||||||
if (xorInput)
|
if (xorInput)
|
||||||
{
|
{
|
||||||
r3_0 = _mm_xor_si128(_mm_loadu_si128(input_mm + 12), r3_0);
|
r3_0 = _mm_xor_si128(_mm_loadu_si128(input_mm + 12), r3_0);
|
||||||
r3_1 = _mm_xor_si128(_mm_loadu_si128(input_mm + 13), r3_1);
|
r3_1 = _mm_xor_si128(_mm_loadu_si128(input_mm + 13), r3_1);
|
||||||
r3_2 = _mm_xor_si128(_mm_loadu_si128(input_mm + 14), r3_2);
|
r3_2 = _mm_xor_si128(_mm_loadu_si128(input_mm + 14), r3_2);
|
||||||
r3_3 = _mm_xor_si128(_mm_loadu_si128(input_mm + 15), r3_3);
|
r3_3 = _mm_xor_si128(_mm_loadu_si128(input_mm + 15), r3_3);
|
||||||
}
|
}
|
||||||
|
|
||||||
_mm_storeu_si128(output_mm + 12, r3_0);
|
_mm_storeu_si128(output_mm + 12, r3_0);
|
||||||
_mm_storeu_si128(output_mm + 13, r3_1);
|
_mm_storeu_si128(output_mm + 13, r3_1);
|
||||||
_mm_storeu_si128(output_mm + 14, r3_2);
|
_mm_storeu_si128(output_mm + 14, r3_2);
|
||||||
_mm_storeu_si128(output_mm + 15, r3_3);
|
_mm_storeu_si128(output_mm + 15, r3_3);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
|
#endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
|
||||||
|
|
|
||||||
212
chacha.cpp
212
chacha.cpp
|
|
@ -28,84 +28,84 @@ extern void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input,
|
||||||
#if defined(CRYPTOPP_DEBUG) && !defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
#if defined(CRYPTOPP_DEBUG) && !defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
||||||
void ChaCha_TestInstantiations()
|
void ChaCha_TestInstantiations()
|
||||||
{
|
{
|
||||||
ChaCha::Encryption x;
|
ChaCha::Encryption x;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
std::string ChaCha_Policy::AlgorithmProvider() const
|
std::string ChaCha_Policy::AlgorithmProvider() const
|
||||||
{
|
{
|
||||||
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE)
|
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE)
|
||||||
if (HasSSE2())
|
if (HasSSE2())
|
||||||
return "SSE2";
|
return "SSE2";
|
||||||
#endif
|
#endif
|
||||||
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
||||||
if (HasNEON())
|
if (HasNEON())
|
||||||
return "NEON";
|
return "NEON";
|
||||||
#endif
|
#endif
|
||||||
return "C++";
|
return "C++";
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChaCha_Policy::CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length)
|
void ChaCha_Policy::CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length)
|
||||||
{
|
{
|
||||||
CRYPTOPP_UNUSED(params);
|
CRYPTOPP_UNUSED(params);
|
||||||
CRYPTOPP_ASSERT(length == 16 || length == 32);
|
CRYPTOPP_ASSERT(length == 16 || length == 32);
|
||||||
|
|
||||||
m_rounds = params.GetIntValueWithDefault(Name::Rounds(), 20);
|
m_rounds = params.GetIntValueWithDefault(Name::Rounds(), 20);
|
||||||
if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20))
|
if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20))
|
||||||
throw InvalidRounds(ChaCha::StaticAlgorithmName(), m_rounds);
|
throw InvalidRounds(ChaCha::StaticAlgorithmName(), m_rounds);
|
||||||
|
|
||||||
// "expand 16-byte k" or "expand 32-byte k"
|
// "expand 16-byte k" or "expand 32-byte k"
|
||||||
m_state[0] = 0x61707865;
|
m_state[0] = 0x61707865;
|
||||||
m_state[1] = (length == 16) ? 0x3120646e : 0x3320646e;
|
m_state[1] = (length == 16) ? 0x3120646e : 0x3320646e;
|
||||||
m_state[2] = (length == 16) ? 0x79622d36 : 0x79622d32;
|
m_state[2] = (length == 16) ? 0x79622d36 : 0x79622d32;
|
||||||
m_state[3] = 0x6b206574;
|
m_state[3] = 0x6b206574;
|
||||||
|
|
||||||
GetBlock<word32, LittleEndian> get1(key);
|
GetBlock<word32, LittleEndian> get1(key);
|
||||||
get1(m_state[4])(m_state[5])(m_state[6])(m_state[7]);
|
get1(m_state[4])(m_state[5])(m_state[6])(m_state[7]);
|
||||||
|
|
||||||
GetBlock<word32, LittleEndian> get2(key + ((length == 32) ? 16 : 0));
|
GetBlock<word32, LittleEndian> get2(key + ((length == 32) ? 16 : 0));
|
||||||
get2(m_state[8])(m_state[9])(m_state[10])(m_state[11]);
|
get2(m_state[8])(m_state[9])(m_state[10])(m_state[11]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChaCha_Policy::CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length)
|
void ChaCha_Policy::CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length)
|
||||||
{
|
{
|
||||||
CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length);
|
CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length);
|
||||||
CRYPTOPP_ASSERT(length==8);
|
CRYPTOPP_ASSERT(length==8);
|
||||||
|
|
||||||
GetBlock<word32, LittleEndian> get(IV);
|
GetBlock<word32, LittleEndian> get(IV);
|
||||||
m_state[12] = m_state[13] = 0;
|
m_state[12] = m_state[13] = 0;
|
||||||
get(m_state[14])(m_state[15]);
|
get(m_state[14])(m_state[15]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChaCha_Policy::SeekToIteration(lword iterationCount)
|
void ChaCha_Policy::SeekToIteration(lword iterationCount)
|
||||||
{
|
{
|
||||||
m_state[13] = (word32)iterationCount;
|
m_state[13] = (word32)iterationCount;
|
||||||
m_state[12] = (word32)SafeRightShift<32>(iterationCount);
|
m_state[12] = (word32)SafeRightShift<32>(iterationCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int ChaCha_Policy::GetAlignment() const
|
unsigned int ChaCha_Policy::GetAlignment() const
|
||||||
{
|
{
|
||||||
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE)
|
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE)
|
||||||
if (HasSSE2())
|
if (HasSSE2())
|
||||||
return 16;
|
return 16;
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
return GetAlignmentOf<word32>();
|
return GetAlignmentOf<word32>();
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int ChaCha_Policy::GetOptimalBlockSize() const
|
unsigned int ChaCha_Policy::GetOptimalBlockSize() const
|
||||||
{
|
{
|
||||||
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE)
|
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE)
|
||||||
if (HasSSE2())
|
if (HasSSE2())
|
||||||
return 4*BYTES_PER_ITERATION;
|
return 4*BYTES_PER_ITERATION;
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
||||||
if (HasNEON())
|
if (HasNEON())
|
||||||
return 4*BYTES_PER_ITERATION;
|
return 4*BYTES_PER_ITERATION;
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
return BYTES_PER_ITERATION;
|
return BYTES_PER_ITERATION;
|
||||||
}
|
}
|
||||||
|
|
||||||
// OperateKeystream always produces a key stream. The key stream is written
|
// OperateKeystream always produces a key stream. The key stream is written
|
||||||
|
|
@ -115,91 +115,91 @@ void ChaCha_Policy::OperateKeystream(KeystreamOperation operation,
|
||||||
byte *output, const byte *input, size_t iterationCount)
|
byte *output, const byte *input, size_t iterationCount)
|
||||||
{
|
{
|
||||||
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE)
|
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE)
|
||||||
if (HasSSE2())
|
if (HasSSE2())
|
||||||
{
|
{
|
||||||
while (iterationCount >= 4)
|
while (iterationCount >= 4)
|
||||||
{
|
{
|
||||||
bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
|
bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
|
||||||
ChaCha_OperateKeystream_SSE2(m_state, input, output, m_rounds, xorInput);
|
ChaCha_OperateKeystream_SSE2(m_state, input, output, m_rounds, xorInput);
|
||||||
|
|
||||||
m_state[12] += 4;
|
m_state[12] += 4;
|
||||||
if (m_state[12] < 4)
|
if (m_state[12] < 4)
|
||||||
m_state[13]++;
|
m_state[13]++;
|
||||||
|
|
||||||
input += (!!xorInput)*4*BYTES_PER_ITERATION;
|
input += (!!xorInput)*4*BYTES_PER_ITERATION;
|
||||||
output += 4*BYTES_PER_ITERATION;
|
output += 4*BYTES_PER_ITERATION;
|
||||||
iterationCount -= 4;
|
iterationCount -= 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
||||||
if (HasNEON())
|
if (HasNEON())
|
||||||
{
|
{
|
||||||
while (iterationCount >= 4)
|
while (iterationCount >= 4)
|
||||||
{
|
{
|
||||||
bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
|
bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
|
||||||
ChaCha_OperateKeystream_NEON(m_state, input, output, m_rounds, xorInput);
|
ChaCha_OperateKeystream_NEON(m_state, input, output, m_rounds, xorInput);
|
||||||
|
|
||||||
m_state[12] += 4;
|
m_state[12] += 4;
|
||||||
if (m_state[12] < 4)
|
if (m_state[12] < 4)
|
||||||
m_state[13]++;
|
m_state[13]++;
|
||||||
|
|
||||||
input += (!!xorInput)*4*BYTES_PER_ITERATION;
|
input += (!!xorInput)*4*BYTES_PER_ITERATION;
|
||||||
output += 4*BYTES_PER_ITERATION;
|
output += 4*BYTES_PER_ITERATION;
|
||||||
iterationCount -= 4;
|
iterationCount -= 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
while (iterationCount--)
|
while (iterationCount--)
|
||||||
{
|
{
|
||||||
word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
|
word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
|
||||||
|
|
||||||
x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3];
|
x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3];
|
||||||
x4 = m_state[4]; x5 = m_state[5]; x6 = m_state[6]; x7 = m_state[7];
|
x4 = m_state[4]; x5 = m_state[5]; x6 = m_state[6]; x7 = m_state[7];
|
||||||
x8 = m_state[8]; x9 = m_state[9]; x10 = m_state[10]; x11 = m_state[11];
|
x8 = m_state[8]; x9 = m_state[9]; x10 = m_state[10]; x11 = m_state[11];
|
||||||
x12 = m_state[12]; x13 = m_state[13]; x14 = m_state[14]; x15 = m_state[15];
|
x12 = m_state[12]; x13 = m_state[13]; x14 = m_state[14]; x15 = m_state[15];
|
||||||
|
|
||||||
for (int i = static_cast<int>(m_rounds); i > 0; i -= 2)
|
for (int i = static_cast<int>(m_rounds); i > 0; i -= 2)
|
||||||
{
|
{
|
||||||
CHACHA_QUARTER_ROUND(x0, x4, x8, x12);
|
CHACHA_QUARTER_ROUND(x0, x4, x8, x12);
|
||||||
CHACHA_QUARTER_ROUND(x1, x5, x9, x13);
|
CHACHA_QUARTER_ROUND(x1, x5, x9, x13);
|
||||||
CHACHA_QUARTER_ROUND(x2, x6, x10, x14);
|
CHACHA_QUARTER_ROUND(x2, x6, x10, x14);
|
||||||
CHACHA_QUARTER_ROUND(x3, x7, x11, x15);
|
CHACHA_QUARTER_ROUND(x3, x7, x11, x15);
|
||||||
|
|
||||||
CHACHA_QUARTER_ROUND(x0, x5, x10, x15);
|
CHACHA_QUARTER_ROUND(x0, x5, x10, x15);
|
||||||
CHACHA_QUARTER_ROUND(x1, x6, x11, x12);
|
CHACHA_QUARTER_ROUND(x1, x6, x11, x12);
|
||||||
CHACHA_QUARTER_ROUND(x2, x7, x8, x13);
|
CHACHA_QUARTER_ROUND(x2, x7, x8, x13);
|
||||||
CHACHA_QUARTER_ROUND(x3, x4, x9, x14);
|
CHACHA_QUARTER_ROUND(x3, x4, x9, x14);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef CRYPTOPP_DOXYGEN_PROCESSING
|
#ifndef CRYPTOPP_DOXYGEN_PROCESSING
|
||||||
#define CHACHA_OUTPUT(x){\
|
#define CHACHA_OUTPUT(x){\
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\
|
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x1 + m_state[1]);\
|
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x1 + m_state[1]);\
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, x2 + m_state[2]);\
|
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, x2 + m_state[2]);\
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, x3 + m_state[3]);\
|
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, x3 + m_state[3]);\
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 4, x4 + m_state[4]);\
|
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 4, x4 + m_state[4]);\
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 5, x5 + m_state[5]);\
|
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 5, x5 + m_state[5]);\
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 6, x6 + m_state[6]);\
|
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 6, x6 + m_state[6]);\
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 7, x7 + m_state[7]);\
|
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 7, x7 + m_state[7]);\
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 8, x8 + m_state[8]);\
|
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 8, x8 + m_state[8]);\
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 9, x9 + m_state[9]);\
|
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 9, x9 + m_state[9]);\
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 10, x10 + m_state[10]);\
|
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 10, x10 + m_state[10]);\
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 11, x11 + m_state[11]);\
|
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 11, x11 + m_state[11]);\
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 12, x12 + m_state[12]);\
|
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 12, x12 + m_state[12]);\
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 13, x13 + m_state[13]);\
|
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 13, x13 + m_state[13]);\
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x14 + m_state[14]);\
|
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x14 + m_state[14]);\
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x15 + m_state[15]);}
|
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x15 + m_state[15]);}
|
||||||
|
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(CHACHA_OUTPUT, BYTES_PER_ITERATION);
|
CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(CHACHA_OUTPUT, BYTES_PER_ITERATION);
|
||||||
#undef CHACHA_OUTPUT
|
#undef CHACHA_OUTPUT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (++m_state[12] == 0)
|
if (++m_state[12] == 0)
|
||||||
m_state[13]++;
|
m_state[13]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NAMESPACE_END
|
NAMESPACE_END
|
||||||
|
|
|
||||||
30
chacha.h
30
chacha.h
|
|
@ -22,9 +22,9 @@ NAMESPACE_BEGIN(CryptoPP)
|
||||||
/// \since Crypto++ 5.6.4
|
/// \since Crypto++ 5.6.4
|
||||||
struct ChaCha_Info : public VariableKeyLength<32, 16, 32, 16, SimpleKeyingInterface::UNIQUE_IV, 8>
|
struct ChaCha_Info : public VariableKeyLength<32, 16, 32, 16, SimpleKeyingInterface::UNIQUE_IV, 8>
|
||||||
{
|
{
|
||||||
static const char* StaticAlgorithmName() {
|
static const char* StaticAlgorithmName() {
|
||||||
return "ChaCha";
|
return "ChaCha";
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/// \brief ChaCha stream cipher implementation
|
/// \brief ChaCha stream cipher implementation
|
||||||
|
|
@ -32,18 +32,18 @@ struct ChaCha_Info : public VariableKeyLength<32, 16, 32, 16, SimpleKeyingInterf
|
||||||
class CRYPTOPP_NO_VTABLE ChaCha_Policy : public AdditiveCipherConcretePolicy<word32, 16>
|
class CRYPTOPP_NO_VTABLE ChaCha_Policy : public AdditiveCipherConcretePolicy<word32, 16>
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
void CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length);
|
void CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length);
|
||||||
void OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount);
|
void OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount);
|
||||||
void CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length);
|
void CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length);
|
||||||
bool CipherIsRandomAccess() const {return true;}
|
bool CipherIsRandomAccess() const {return true;}
|
||||||
void SeekToIteration(lword iterationCount);
|
void SeekToIteration(lword iterationCount);
|
||||||
unsigned int GetAlignment() const;
|
unsigned int GetAlignment() const;
|
||||||
unsigned int GetOptimalBlockSize() const;
|
unsigned int GetOptimalBlockSize() const;
|
||||||
|
|
||||||
std::string AlgorithmProvider() const;
|
std::string AlgorithmProvider() const;
|
||||||
|
|
||||||
FixedSizeAlignedSecBlock<word32, 16> m_state;
|
FixedSizeAlignedSecBlock<word32, 16> m_state;
|
||||||
int m_rounds;
|
int m_rounds;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// \brief ChaCha stream cipher
|
/// \brief ChaCha stream cipher
|
||||||
|
|
@ -56,8 +56,8 @@ protected:
|
||||||
/// \since Crypto++ 5.6.4
|
/// \since Crypto++ 5.6.4
|
||||||
struct ChaCha : public ChaCha_Info, public SymmetricCipherDocumentation
|
struct ChaCha : public ChaCha_Info, public SymmetricCipherDocumentation
|
||||||
{
|
{
|
||||||
typedef SymmetricCipherFinal<ConcretePolicyHolder<ChaCha_Policy, AdditiveCipherTemplate<> >, ChaCha_Info > Encryption;
|
typedef SymmetricCipherFinal<ConcretePolicyHolder<ChaCha_Policy, AdditiveCipherTemplate<> >, ChaCha_Info > Encryption;
|
||||||
typedef Encryption Decryption;
|
typedef Encryption Decryption;
|
||||||
};
|
};
|
||||||
|
|
||||||
NAMESPACE_END
|
NAMESPACE_END
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue