Remove unneeded params from ChaCha_OperateKeystream_SSE2
parent
028a9f0494
commit
6a5d2ab03d
105
chacha-simd.cpp
105
chacha-simd.cpp
|
|
@ -54,35 +54,34 @@ NAMESPACE_BEGIN(CryptoPP)
|
||||||
|
|
||||||
#if defined(CRYPTOPP_SSE2_INTRIN_AVAILABLE)
|
#if defined(CRYPTOPP_SSE2_INTRIN_AVAILABLE)
|
||||||
|
|
||||||
void ChaCha_OperateKeystream_SSE2(KeystreamOperation operation, byte *output,
|
void ChaCha_OperateKeystream_SSE2(const word32 *state, byte *message, unsigned int rounds)
|
||||||
const word32 *input, size_t iterationCount, unsigned int rounds)
|
|
||||||
{
|
{
|
||||||
const __m128i* input_mm = reinterpret_cast<const __m128i*>(input);
|
const __m128i* state_mm = reinterpret_cast<const __m128i*>(state);
|
||||||
__m128i* output_mm = reinterpret_cast<__m128i*>(output);
|
__m128i* message_mm = reinterpret_cast<__m128i*>(message);
|
||||||
|
|
||||||
__m128i input0 = _mm_loadu_si128(input_mm);
|
const __m128i state0 = _mm_load_si128(state_mm);
|
||||||
__m128i input1 = _mm_loadu_si128(input_mm + 1);
|
const __m128i state1 = _mm_load_si128(state_mm + 1);
|
||||||
__m128i input2 = _mm_loadu_si128(input_mm + 2);
|
const __m128i state2 = _mm_load_si128(state_mm + 2);
|
||||||
__m128i input3 = _mm_loadu_si128(input_mm + 3);
|
const __m128i state3 = _mm_load_si128(state_mm + 3);
|
||||||
|
|
||||||
__m128i r0_0 = input0;
|
__m128i r0_0 = state0;
|
||||||
__m128i r0_1 = input1;
|
__m128i r0_1 = state1;
|
||||||
__m128i r0_2 = input2;
|
__m128i r0_2 = state2;
|
||||||
__m128i r0_3 = input3;
|
__m128i r0_3 = state3;
|
||||||
|
|
||||||
__m128i r1_0 = input0;
|
__m128i r1_0 = state0;
|
||||||
__m128i r1_1 = input1;
|
__m128i r1_1 = state1;
|
||||||
__m128i r1_2 = input2;
|
__m128i r1_2 = state2;
|
||||||
__m128i r1_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 1));
|
__m128i r1_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 1));
|
||||||
|
|
||||||
__m128i r2_0 = input0;
|
__m128i r2_0 = state0;
|
||||||
__m128i r2_1 = input1;
|
__m128i r2_1 = state1;
|
||||||
__m128i r2_2 = input2;
|
__m128i r2_2 = state2;
|
||||||
__m128i r2_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 2));
|
__m128i r2_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 2));
|
||||||
|
|
||||||
__m128i r3_0 = input0;
|
__m128i r3_0 = state0;
|
||||||
__m128i r3_1 = input1;
|
__m128i r3_1 = state1;
|
||||||
__m128i r3_2 = input2;
|
__m128i r3_2 = state2;
|
||||||
__m128i r3_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 3));
|
__m128i r3_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 3));
|
||||||
|
|
||||||
for (int i = static_cast<int>(rounds); i > 0; i -= 2)
|
for (int i = static_cast<int>(rounds); i > 0; i -= 2)
|
||||||
|
|
@ -240,48 +239,48 @@ void ChaCha_OperateKeystream_SSE2(KeystreamOperation operation, byte *output,
|
||||||
r3_3 = _mm_shuffle_epi32(r3_3, _MM_SHUFFLE(0, 3, 2, 1));
|
r3_3 = _mm_shuffle_epi32(r3_3, _MM_SHUFFLE(0, 3, 2, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
r0_0 = _mm_add_epi32(r0_0, input0);
|
r0_0 = _mm_add_epi32(r0_0, state0);
|
||||||
r0_1 = _mm_add_epi32(r0_1, input1);
|
r0_1 = _mm_add_epi32(r0_1, state1);
|
||||||
r0_2 = _mm_add_epi32(r0_2, input2);
|
r0_2 = _mm_add_epi32(r0_2, state2);
|
||||||
r0_3 = _mm_add_epi32(r0_3, input3);
|
r0_3 = _mm_add_epi32(r0_3, state3);
|
||||||
|
|
||||||
r1_0 = _mm_add_epi32(r1_0, input0);
|
r1_0 = _mm_add_epi32(r1_0, state0);
|
||||||
r1_1 = _mm_add_epi32(r1_1, input1);
|
r1_1 = _mm_add_epi32(r1_1, state1);
|
||||||
r1_2 = _mm_add_epi32(r1_2, input2);
|
r1_2 = _mm_add_epi32(r1_2, state2);
|
||||||
r1_3 = _mm_add_epi32(r1_3, input3);
|
r1_3 = _mm_add_epi32(r1_3, state3);
|
||||||
r1_3 = _mm_add_epi64(r1_3, _mm_set_epi32(0, 0, 0, 1));
|
r1_3 = _mm_add_epi64(r1_3, _mm_set_epi32(0, 0, 0, 1));
|
||||||
|
|
||||||
r2_0 = _mm_add_epi32(r2_0, input0);
|
r2_0 = _mm_add_epi32(r2_0, state0);
|
||||||
r2_1 = _mm_add_epi32(r2_1, input1);
|
r2_1 = _mm_add_epi32(r2_1, state1);
|
||||||
r2_2 = _mm_add_epi32(r2_2, input2);
|
r2_2 = _mm_add_epi32(r2_2, state2);
|
||||||
r2_3 = _mm_add_epi32(r2_3, input3);
|
r2_3 = _mm_add_epi32(r2_3, state3);
|
||||||
r2_3 = _mm_add_epi64(r2_3, _mm_set_epi32(0, 0, 0, 2));
|
r2_3 = _mm_add_epi64(r2_3, _mm_set_epi32(0, 0, 0, 2));
|
||||||
|
|
||||||
r3_0 = _mm_add_epi32(r3_0, input0);
|
r3_0 = _mm_add_epi32(r3_0, state0);
|
||||||
r3_1 = _mm_add_epi32(r3_1, input1);
|
r3_1 = _mm_add_epi32(r3_1, state1);
|
||||||
r3_2 = _mm_add_epi32(r3_2, input2);
|
r3_2 = _mm_add_epi32(r3_2, state2);
|
||||||
r3_3 = _mm_add_epi32(r3_3, input3);
|
r3_3 = _mm_add_epi32(r3_3, state3);
|
||||||
r3_3 = _mm_add_epi64(r3_3, _mm_set_epi32(0, 0, 0, 3));
|
r3_3 = _mm_add_epi64(r3_3, _mm_set_epi32(0, 0, 0, 3));
|
||||||
|
|
||||||
_mm_storeu_si128(output_mm + 0, r0_0);
|
_mm_storeu_si128(message_mm + 0, r0_0);
|
||||||
_mm_storeu_si128(output_mm + 1, r0_1);
|
_mm_storeu_si128(message_mm + 1, r0_1);
|
||||||
_mm_storeu_si128(output_mm + 2, r0_2);
|
_mm_storeu_si128(message_mm + 2, r0_2);
|
||||||
_mm_storeu_si128(output_mm + 3, r0_3);
|
_mm_storeu_si128(message_mm + 3, r0_3);
|
||||||
|
|
||||||
_mm_storeu_si128(output_mm + 4, r1_0);
|
_mm_storeu_si128(message_mm + 4, r1_0);
|
||||||
_mm_storeu_si128(output_mm + 5, r1_1);
|
_mm_storeu_si128(message_mm + 5, r1_1);
|
||||||
_mm_storeu_si128(output_mm + 6, r1_2);
|
_mm_storeu_si128(message_mm + 6, r1_2);
|
||||||
_mm_storeu_si128(output_mm + 7, r1_3);
|
_mm_storeu_si128(message_mm + 7, r1_3);
|
||||||
|
|
||||||
_mm_storeu_si128(output_mm + 8, r2_0);
|
_mm_storeu_si128(message_mm + 8, r2_0);
|
||||||
_mm_storeu_si128(output_mm + 9, r2_1);
|
_mm_storeu_si128(message_mm + 9, r2_1);
|
||||||
_mm_storeu_si128(output_mm + 10, r2_2);
|
_mm_storeu_si128(message_mm + 10, r2_2);
|
||||||
_mm_storeu_si128(output_mm + 11, r2_3);
|
_mm_storeu_si128(message_mm + 11, r2_3);
|
||||||
|
|
||||||
_mm_storeu_si128(output_mm + 12, r3_0);
|
_mm_storeu_si128(message_mm + 12, r3_0);
|
||||||
_mm_storeu_si128(output_mm + 13, r3_1);
|
_mm_storeu_si128(message_mm + 13, r3_1);
|
||||||
_mm_storeu_si128(output_mm + 14, r3_2);
|
_mm_storeu_si128(message_mm + 14, r3_2);
|
||||||
_mm_storeu_si128(output_mm + 15, r3_3);
|
_mm_storeu_si128(message_mm + 15, r3_3);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
#endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
||||||
|
|
|
||||||
|
|
@ -12,8 +12,7 @@
|
||||||
NAMESPACE_BEGIN(CryptoPP)
|
NAMESPACE_BEGIN(CryptoPP)
|
||||||
|
|
||||||
#if defined(CRYPTOPP_SSE2_INTRIN_AVAILABLE)
|
#if defined(CRYPTOPP_SSE2_INTRIN_AVAILABLE)
|
||||||
extern void ChaCha_OperateKeystream_SSE2(KeystreamOperation operation, byte *output,
|
extern void ChaCha_OperateKeystream_SSE2(const word32 *state, byte *message, unsigned int rounds);
|
||||||
const word32 *state, size_t iterationCount, unsigned int rounds);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define CHACHA_QUARTER_ROUND(a,b,c,d) \
|
#define CHACHA_QUARTER_ROUND(a,b,c,d) \
|
||||||
|
|
@ -104,7 +103,7 @@ void ChaCha_Policy::OperateKeystream(KeystreamOperation operation,
|
||||||
{
|
{
|
||||||
while (iterationCount >= 4)
|
while (iterationCount >= 4)
|
||||||
{
|
{
|
||||||
ChaCha_OperateKeystream_SSE2(operation, output, m_state, iterationCount, m_rounds);
|
ChaCha_OperateKeystream_SSE2(m_state, output, m_rounds);
|
||||||
|
|
||||||
if ((operation & INPUT_NULL) != INPUT_NULL)
|
if ((operation & INPUT_NULL) != INPUT_NULL)
|
||||||
xorbuf(output, input, 4*BYTES_PER_ITERATION);
|
xorbuf(output, input, 4*BYTES_PER_ITERATION);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue