Remove unneeded params from ChaCha_OperateKeystream_SSE2

pull/730/head
Jeffrey Walton 2018-10-23 08:52:29 -04:00
parent 028a9f0494
commit 6a5d2ab03d
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
2 changed files with 54 additions and 56 deletions

View File

@ -54,35 +54,34 @@ NAMESPACE_BEGIN(CryptoPP)
#if defined(CRYPTOPP_SSE2_INTRIN_AVAILABLE)
void ChaCha_OperateKeystream_SSE2(KeystreamOperation operation, byte *output,
const word32 *input, size_t iterationCount, unsigned int rounds)
void ChaCha_OperateKeystream_SSE2(const word32 *state, byte *message, unsigned int rounds)
{
const __m128i* input_mm = reinterpret_cast<const __m128i*>(input);
__m128i* output_mm = reinterpret_cast<__m128i*>(output);
const __m128i* state_mm = reinterpret_cast<const __m128i*>(state);
__m128i* message_mm = reinterpret_cast<__m128i*>(message);
__m128i input0 = _mm_loadu_si128(input_mm);
__m128i input1 = _mm_loadu_si128(input_mm + 1);
__m128i input2 = _mm_loadu_si128(input_mm + 2);
__m128i input3 = _mm_loadu_si128(input_mm + 3);
const __m128i state0 = _mm_load_si128(state_mm);
const __m128i state1 = _mm_load_si128(state_mm + 1);
const __m128i state2 = _mm_load_si128(state_mm + 2);
const __m128i state3 = _mm_load_si128(state_mm + 3);
__m128i r0_0 = input0;
__m128i r0_1 = input1;
__m128i r0_2 = input2;
__m128i r0_3 = input3;
__m128i r0_0 = state0;
__m128i r0_1 = state1;
__m128i r0_2 = state2;
__m128i r0_3 = state3;
__m128i r1_0 = input0;
__m128i r1_1 = input1;
__m128i r1_2 = input2;
__m128i r1_0 = state0;
__m128i r1_1 = state1;
__m128i r1_2 = state2;
__m128i r1_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 1));
__m128i r2_0 = input0;
__m128i r2_1 = input1;
__m128i r2_2 = input2;
__m128i r2_0 = state0;
__m128i r2_1 = state1;
__m128i r2_2 = state2;
__m128i r2_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 2));
__m128i r3_0 = input0;
__m128i r3_1 = input1;
__m128i r3_2 = input2;
__m128i r3_0 = state0;
__m128i r3_1 = state1;
__m128i r3_2 = state2;
__m128i r3_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 3));
for (int i = static_cast<int>(rounds); i > 0; i -= 2)
@ -240,48 +239,48 @@ void ChaCha_OperateKeystream_SSE2(KeystreamOperation operation, byte *output,
r3_3 = _mm_shuffle_epi32(r3_3, _MM_SHUFFLE(0, 3, 2, 1));
}
r0_0 = _mm_add_epi32(r0_0, input0);
r0_1 = _mm_add_epi32(r0_1, input1);
r0_2 = _mm_add_epi32(r0_2, input2);
r0_3 = _mm_add_epi32(r0_3, input3);
r0_0 = _mm_add_epi32(r0_0, state0);
r0_1 = _mm_add_epi32(r0_1, state1);
r0_2 = _mm_add_epi32(r0_2, state2);
r0_3 = _mm_add_epi32(r0_3, state3);
r1_0 = _mm_add_epi32(r1_0, input0);
r1_1 = _mm_add_epi32(r1_1, input1);
r1_2 = _mm_add_epi32(r1_2, input2);
r1_3 = _mm_add_epi32(r1_3, input3);
r1_0 = _mm_add_epi32(r1_0, state0);
r1_1 = _mm_add_epi32(r1_1, state1);
r1_2 = _mm_add_epi32(r1_2, state2);
r1_3 = _mm_add_epi32(r1_3, state3);
r1_3 = _mm_add_epi64(r1_3, _mm_set_epi32(0, 0, 0, 1));
r2_0 = _mm_add_epi32(r2_0, input0);
r2_1 = _mm_add_epi32(r2_1, input1);
r2_2 = _mm_add_epi32(r2_2, input2);
r2_3 = _mm_add_epi32(r2_3, input3);
r2_0 = _mm_add_epi32(r2_0, state0);
r2_1 = _mm_add_epi32(r2_1, state1);
r2_2 = _mm_add_epi32(r2_2, state2);
r2_3 = _mm_add_epi32(r2_3, state3);
r2_3 = _mm_add_epi64(r2_3, _mm_set_epi32(0, 0, 0, 2));
r3_0 = _mm_add_epi32(r3_0, input0);
r3_1 = _mm_add_epi32(r3_1, input1);
r3_2 = _mm_add_epi32(r3_2, input2);
r3_3 = _mm_add_epi32(r3_3, input3);
r3_0 = _mm_add_epi32(r3_0, state0);
r3_1 = _mm_add_epi32(r3_1, state1);
r3_2 = _mm_add_epi32(r3_2, state2);
r3_3 = _mm_add_epi32(r3_3, state3);
r3_3 = _mm_add_epi64(r3_3, _mm_set_epi32(0, 0, 0, 3));
_mm_storeu_si128(output_mm + 0, r0_0);
_mm_storeu_si128(output_mm + 1, r0_1);
_mm_storeu_si128(output_mm + 2, r0_2);
_mm_storeu_si128(output_mm + 3, r0_3);
_mm_storeu_si128(message_mm + 0, r0_0);
_mm_storeu_si128(message_mm + 1, r0_1);
_mm_storeu_si128(message_mm + 2, r0_2);
_mm_storeu_si128(message_mm + 3, r0_3);
_mm_storeu_si128(output_mm + 4, r1_0);
_mm_storeu_si128(output_mm + 5, r1_1);
_mm_storeu_si128(output_mm + 6, r1_2);
_mm_storeu_si128(output_mm + 7, r1_3);
_mm_storeu_si128(message_mm + 4, r1_0);
_mm_storeu_si128(message_mm + 5, r1_1);
_mm_storeu_si128(message_mm + 6, r1_2);
_mm_storeu_si128(message_mm + 7, r1_3);
_mm_storeu_si128(output_mm + 8, r2_0);
_mm_storeu_si128(output_mm + 9, r2_1);
_mm_storeu_si128(output_mm + 10, r2_2);
_mm_storeu_si128(output_mm + 11, r2_3);
_mm_storeu_si128(message_mm + 8, r2_0);
_mm_storeu_si128(message_mm + 9, r2_1);
_mm_storeu_si128(message_mm + 10, r2_2);
_mm_storeu_si128(message_mm + 11, r2_3);
_mm_storeu_si128(output_mm + 12, r3_0);
_mm_storeu_si128(output_mm + 13, r3_1);
_mm_storeu_si128(output_mm + 14, r3_2);
_mm_storeu_si128(output_mm + 15, r3_3);
_mm_storeu_si128(message_mm + 12, r3_0);
_mm_storeu_si128(message_mm + 13, r3_1);
_mm_storeu_si128(message_mm + 14, r3_2);
_mm_storeu_si128(message_mm + 15, r3_3);
}
#endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE

View File

@ -12,8 +12,7 @@
NAMESPACE_BEGIN(CryptoPP)
#if defined(CRYPTOPP_SSE2_INTRIN_AVAILABLE)
extern void ChaCha_OperateKeystream_SSE2(KeystreamOperation operation, byte *output,
const word32 *state, size_t iterationCount, unsigned int rounds);
extern void ChaCha_OperateKeystream_SSE2(const word32 *state, byte *message, unsigned int rounds);
#endif
#define CHACHA_QUARTER_ROUND(a,b,c,d) \
@ -104,7 +103,7 @@ void ChaCha_Policy::OperateKeystream(KeystreamOperation operation,
{
while (iterationCount >= 4)
{
ChaCha_OperateKeystream_SSE2(operation, output, m_state, iterationCount, m_rounds);
ChaCha_OperateKeystream_SSE2(m_state, output, m_rounds);
if ((operation & INPUT_NULL) != INPUT_NULL)
xorbuf(output, input, 4*BYTES_PER_ITERATION);