From 6a5d2ab03d05cdde8d3fbf96fc2db9ac80b11e7e Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Tue, 23 Oct 2018 08:52:29 -0400 Subject: [PATCH] Remove unneeded params from ChaCha_OperateKeystream_SSE2 --- chacha-simd.cpp | 105 ++++++++++++++++++++++++------------------------ chacha.cpp | 5 +-- 2 files changed, 54 insertions(+), 56 deletions(-) diff --git a/chacha-simd.cpp b/chacha-simd.cpp index 9e07c3ec..f0eb6efa 100644 --- a/chacha-simd.cpp +++ b/chacha-simd.cpp @@ -54,35 +54,34 @@ NAMESPACE_BEGIN(CryptoPP) #if defined(CRYPTOPP_SSE2_INTRIN_AVAILABLE) -void ChaCha_OperateKeystream_SSE2(KeystreamOperation operation, byte *output, - const word32 *input, size_t iterationCount, unsigned int rounds) +void ChaCha_OperateKeystream_SSE2(const word32 *state, byte *message, unsigned int rounds) { - const __m128i* input_mm = reinterpret_cast(input); - __m128i* output_mm = reinterpret_cast<__m128i*>(output); + const __m128i* state_mm = reinterpret_cast(state); + __m128i* message_mm = reinterpret_cast<__m128i*>(message); - __m128i input0 = _mm_loadu_si128(input_mm); - __m128i input1 = _mm_loadu_si128(input_mm + 1); - __m128i input2 = _mm_loadu_si128(input_mm + 2); - __m128i input3 = _mm_loadu_si128(input_mm + 3); + const __m128i state0 = _mm_load_si128(state_mm); + const __m128i state1 = _mm_load_si128(state_mm + 1); + const __m128i state2 = _mm_load_si128(state_mm + 2); + const __m128i state3 = _mm_load_si128(state_mm + 3); - __m128i r0_0 = input0; - __m128i r0_1 = input1; - __m128i r0_2 = input2; - __m128i r0_3 = input3; + __m128i r0_0 = state0; + __m128i r0_1 = state1; + __m128i r0_2 = state2; + __m128i r0_3 = state3; - __m128i r1_0 = input0; - __m128i r1_1 = input1; - __m128i r1_2 = input2; + __m128i r1_0 = state0; + __m128i r1_1 = state1; + __m128i r1_2 = state2; __m128i r1_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 1)); - __m128i r2_0 = input0; - __m128i r2_1 = input1; - __m128i r2_2 = input2; + __m128i r2_0 = state0; + __m128i r2_1 = state1; + __m128i r2_2 = state2; __m128i r2_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 2)); - __m128i r3_0 = input0; - __m128i r3_1 = input1; - __m128i r3_2 = input2; + __m128i r3_0 = state0; + __m128i r3_1 = state1; + __m128i r3_2 = state2; __m128i r3_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 3)); for (int i = static_cast(rounds); i > 0; i -= 2) @@ -240,48 +239,48 @@ void ChaCha_OperateKeystream_SSE2(KeystreamOperation operation, byte *output, r3_3 = _mm_shuffle_epi32(r3_3, _MM_SHUFFLE(0, 3, 2, 1)); } - r0_0 = _mm_add_epi32(r0_0, input0); - r0_1 = _mm_add_epi32(r0_1, input1); - r0_2 = _mm_add_epi32(r0_2, input2); - r0_3 = _mm_add_epi32(r0_3, input3); + r0_0 = _mm_add_epi32(r0_0, state0); + r0_1 = _mm_add_epi32(r0_1, state1); + r0_2 = _mm_add_epi32(r0_2, state2); + r0_3 = _mm_add_epi32(r0_3, state3); - r1_0 = _mm_add_epi32(r1_0, input0); - r1_1 = _mm_add_epi32(r1_1, input1); - r1_2 = _mm_add_epi32(r1_2, input2); - r1_3 = _mm_add_epi32(r1_3, input3); + r1_0 = _mm_add_epi32(r1_0, state0); + r1_1 = _mm_add_epi32(r1_1, state1); + r1_2 = _mm_add_epi32(r1_2, state2); + r1_3 = _mm_add_epi32(r1_3, state3); r1_3 = _mm_add_epi64(r1_3, _mm_set_epi32(0, 0, 0, 1)); - r2_0 = _mm_add_epi32(r2_0, input0); - r2_1 = _mm_add_epi32(r2_1, input1); - r2_2 = _mm_add_epi32(r2_2, input2); - r2_3 = _mm_add_epi32(r2_3, input3); + r2_0 = _mm_add_epi32(r2_0, state0); + r2_1 = _mm_add_epi32(r2_1, state1); + r2_2 = _mm_add_epi32(r2_2, state2); + r2_3 = _mm_add_epi32(r2_3, state3); r2_3 = _mm_add_epi64(r2_3, _mm_set_epi32(0, 0, 0, 2)); - r3_0 = _mm_add_epi32(r3_0, input0); - r3_1 = _mm_add_epi32(r3_1, input1); - r3_2 = _mm_add_epi32(r3_2, input2); - r3_3 = _mm_add_epi32(r3_3, input3); + r3_0 = _mm_add_epi32(r3_0, state0); + r3_1 = _mm_add_epi32(r3_1, state1); + r3_2 = _mm_add_epi32(r3_2, state2); + r3_3 = _mm_add_epi32(r3_3, state3); r3_3 = _mm_add_epi64(r3_3, _mm_set_epi32(0, 0, 0, 3)); - _mm_storeu_si128(output_mm + 0, r0_0); - _mm_storeu_si128(output_mm + 1, r0_1); - _mm_storeu_si128(output_mm + 2, r0_2); - _mm_storeu_si128(output_mm + 3, r0_3); + _mm_storeu_si128(message_mm + 0, r0_0); + _mm_storeu_si128(message_mm + 1, r0_1); + _mm_storeu_si128(message_mm + 2, r0_2); + _mm_storeu_si128(message_mm + 3, r0_3); - _mm_storeu_si128(output_mm + 4, r1_0); - _mm_storeu_si128(output_mm + 5, r1_1); - _mm_storeu_si128(output_mm + 6, r1_2); - _mm_storeu_si128(output_mm + 7, r1_3); + _mm_storeu_si128(message_mm + 4, r1_0); + _mm_storeu_si128(message_mm + 5, r1_1); + _mm_storeu_si128(message_mm + 6, r1_2); + _mm_storeu_si128(message_mm + 7, r1_3); - _mm_storeu_si128(output_mm + 8, r2_0); - _mm_storeu_si128(output_mm + 9, r2_1); - _mm_storeu_si128(output_mm + 10, r2_2); - _mm_storeu_si128(output_mm + 11, r2_3); + _mm_storeu_si128(message_mm + 8, r2_0); + _mm_storeu_si128(message_mm + 9, r2_1); + _mm_storeu_si128(message_mm + 10, r2_2); + _mm_storeu_si128(message_mm + 11, r2_3); - _mm_storeu_si128(output_mm + 12, r3_0); - _mm_storeu_si128(output_mm + 13, r3_1); - _mm_storeu_si128(output_mm + 14, r3_2); - _mm_storeu_si128(output_mm + 15, r3_3); + _mm_storeu_si128(message_mm + 12, r3_0); + _mm_storeu_si128(message_mm + 13, r3_1); + _mm_storeu_si128(message_mm + 14, r3_2); + _mm_storeu_si128(message_mm + 15, r3_3); } #endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE diff --git a/chacha.cpp b/chacha.cpp index d93e3d93..6ae78c4b 100644 --- a/chacha.cpp +++ b/chacha.cpp @@ -12,8 +12,7 @@ NAMESPACE_BEGIN(CryptoPP) #if defined(CRYPTOPP_SSE2_INTRIN_AVAILABLE) -extern void ChaCha_OperateKeystream_SSE2(KeystreamOperation operation, byte *output, - const word32 *state, size_t iterationCount, unsigned int rounds); +extern void ChaCha_OperateKeystream_SSE2(const word32 *state, byte *message, unsigned int rounds); #endif #define CHACHA_QUARTER_ROUND(a,b,c,d) \ @@ -104,7 +103,7 @@ void ChaCha_Policy::OperateKeystream(KeystreamOperation operation, { while (iterationCount >= 4) { - ChaCha_OperateKeystream_SSE2(operation, output, m_state, iterationCount, m_rounds); + ChaCha_OperateKeystream_SSE2(m_state, output, m_rounds); if ((operation & INPUT_NULL) != INPUT_NULL) xorbuf(output, input, 4*BYTES_PER_ITERATION);