Clear lgtm findings
parent
43b01973b1
commit
6a11f00768
|
|
@ -38,6 +38,7 @@
|
|||
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE)
|
||||
# include <xmmintrin.h>
|
||||
# include <emmintrin.h>
|
||||
# include "sse_simd.h"
|
||||
#endif
|
||||
|
||||
#if defined(__SSSE3__)
|
||||
|
|
@ -565,14 +566,10 @@ void ChaCha_OperateKeystream_NEON(const word32 *state, const byte* input, byte *
|
|||
|
||||
void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *output, unsigned int rounds)
|
||||
{
|
||||
const __m128i* state_mm = reinterpret_cast<const __m128i*>(state);
|
||||
const __m128i* input_mm = reinterpret_cast<const __m128i*>(input);
|
||||
__m128i* output_mm = reinterpret_cast<__m128i*>(output);
|
||||
|
||||
const __m128i state0 = _mm_load_si128(state_mm + 0);
|
||||
const __m128i state1 = _mm_load_si128(state_mm + 1);
|
||||
const __m128i state2 = _mm_load_si128(state_mm + 2);
|
||||
const __m128i state3 = _mm_load_si128(state_mm + 3);
|
||||
const __m128i state0 = load_m128i<0>(state);
|
||||
const __m128i state1 = load_m128i<1>(state);
|
||||
const __m128i state2 = load_m128i<2>(state);
|
||||
const __m128i state3 = load_m128i<3>(state);
|
||||
|
||||
__m128i r0_0 = state0;
|
||||
__m128i r0_1 = state1;
|
||||
|
|
@ -772,57 +769,57 @@ void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *
|
|||
r3_3 = _mm_add_epi32(r3_3, state3);
|
||||
r3_3 = _mm_add_epi64(r3_3, _mm_set_epi32(0, 0, 0, 3));
|
||||
|
||||
if (input_mm)
|
||||
if (input)
|
||||
{
|
||||
r0_0 = _mm_xor_si128(_mm_loadu_si128(input_mm + 0), r0_0);
|
||||
r0_1 = _mm_xor_si128(_mm_loadu_si128(input_mm + 1), r0_1);
|
||||
r0_2 = _mm_xor_si128(_mm_loadu_si128(input_mm + 2), r0_2);
|
||||
r0_3 = _mm_xor_si128(_mm_loadu_si128(input_mm + 3), r0_3);
|
||||
r0_0 = _mm_xor_si128(load_m128i<0>(input), r0_0);
|
||||
r0_1 = _mm_xor_si128(load_m128i<1>(input), r0_1);
|
||||
r0_2 = _mm_xor_si128(load_m128i<2>(input), r0_2);
|
||||
r0_3 = _mm_xor_si128(load_m128i<3>(input), r0_3);
|
||||
}
|
||||
|
||||
_mm_storeu_si128(output_mm + 0, r0_0);
|
||||
_mm_storeu_si128(output_mm + 1, r0_1);
|
||||
_mm_storeu_si128(output_mm + 2, r0_2);
|
||||
_mm_storeu_si128(output_mm + 3, r0_3);
|
||||
store_m128i<0>(output, r0_0);
|
||||
store_m128i<1>(output, r0_1);
|
||||
store_m128i<2>(output, r0_2);
|
||||
store_m128i<3>(output, r0_3);
|
||||
|
||||
if (input_mm)
|
||||
if (input)
|
||||
{
|
||||
r1_0 = _mm_xor_si128(_mm_loadu_si128(input_mm + 4), r1_0);
|
||||
r1_1 = _mm_xor_si128(_mm_loadu_si128(input_mm + 5), r1_1);
|
||||
r1_2 = _mm_xor_si128(_mm_loadu_si128(input_mm + 6), r1_2);
|
||||
r1_3 = _mm_xor_si128(_mm_loadu_si128(input_mm + 7), r1_3);
|
||||
r1_0 = _mm_xor_si128(load_m128i<4>(input), r1_0);
|
||||
r1_1 = _mm_xor_si128(load_m128i<5>(input), r1_1);
|
||||
r1_2 = _mm_xor_si128(load_m128i<6>(input), r1_2);
|
||||
r1_3 = _mm_xor_si128(load_m128i<7>(input), r1_3);
|
||||
}
|
||||
|
||||
_mm_storeu_si128(output_mm + 4, r1_0);
|
||||
_mm_storeu_si128(output_mm + 5, r1_1);
|
||||
_mm_storeu_si128(output_mm + 6, r1_2);
|
||||
_mm_storeu_si128(output_mm + 7, r1_3);
|
||||
store_m128i<4>(output, r1_0);
|
||||
store_m128i<5>(output, r1_1);
|
||||
store_m128i<6>(output, r1_2);
|
||||
store_m128i<7>(output, r1_3);
|
||||
|
||||
if (input_mm)
|
||||
if (input)
|
||||
{
|
||||
r2_0 = _mm_xor_si128(_mm_loadu_si128(input_mm + 8), r2_0);
|
||||
r2_1 = _mm_xor_si128(_mm_loadu_si128(input_mm + 9), r2_1);
|
||||
r2_2 = _mm_xor_si128(_mm_loadu_si128(input_mm + 10), r2_2);
|
||||
r2_3 = _mm_xor_si128(_mm_loadu_si128(input_mm + 11), r2_3);
|
||||
r2_0 = _mm_xor_si128(load_m128i< 8>(input), r2_0);
|
||||
r2_1 = _mm_xor_si128(load_m128i< 9>(input), r2_1);
|
||||
r2_2 = _mm_xor_si128(load_m128i<10>(input), r2_2);
|
||||
r2_3 = _mm_xor_si128(load_m128i<11>(input), r2_3);
|
||||
}
|
||||
|
||||
_mm_storeu_si128(output_mm + 8, r2_0);
|
||||
_mm_storeu_si128(output_mm + 9, r2_1);
|
||||
_mm_storeu_si128(output_mm + 10, r2_2);
|
||||
_mm_storeu_si128(output_mm + 11, r2_3);
|
||||
store_m128i< 8>(output, r2_0);
|
||||
store_m128i< 9>(output, r2_1);
|
||||
store_m128i<10>(output, r2_2);
|
||||
store_m128i<11>(output, r2_3);
|
||||
|
||||
if (input_mm)
|
||||
if (input)
|
||||
{
|
||||
r3_0 = _mm_xor_si128(_mm_loadu_si128(input_mm + 12), r3_0);
|
||||
r3_1 = _mm_xor_si128(_mm_loadu_si128(input_mm + 13), r3_1);
|
||||
r3_2 = _mm_xor_si128(_mm_loadu_si128(input_mm + 14), r3_2);
|
||||
r3_3 = _mm_xor_si128(_mm_loadu_si128(input_mm + 15), r3_3);
|
||||
r3_0 = _mm_xor_si128(load_m128i<12>(input), r3_0);
|
||||
r3_1 = _mm_xor_si128(load_m128i<13>(input), r3_1);
|
||||
r3_2 = _mm_xor_si128(load_m128i<14>(input), r3_2);
|
||||
r3_3 = _mm_xor_si128(load_m128i<15>(input), r3_3);
|
||||
}
|
||||
|
||||
_mm_storeu_si128(output_mm + 12, r3_0);
|
||||
_mm_storeu_si128(output_mm + 13, r3_1);
|
||||
_mm_storeu_si128(output_mm + 14, r3_2);
|
||||
_mm_storeu_si128(output_mm + 15, r3_3);
|
||||
store_m128i<12>(output, r3_0);
|
||||
store_m128i<13>(output, r3_1);
|
||||
store_m128i<14>(output, r3_2);
|
||||
store_m128i<15>(output, r3_3);
|
||||
}
|
||||
|
||||
#endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
||||
|
|
|
|||
43
sse_simd.h
43
sse_simd.h
|
|
@ -20,6 +20,27 @@ NAMESPACE_BEGIN(CryptoPP)
|
|||
|
||||
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE)
|
||||
|
||||
template <class T>
|
||||
inline __m128i load_m128i(T* ptr)
|
||||
{
|
||||
return _mm_loadu_si128(
|
||||
reinterpret_cast<__m128i*>(ptr));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline __m128i load_m128i(const T* ptr)
|
||||
{
|
||||
return _mm_loadu_si128(
|
||||
reinterpret_cast<const __m128i*>(ptr));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline void store_m128i(T* ptr, __m128i val)
|
||||
{
|
||||
return _mm_storeu_si128(
|
||||
reinterpret_cast<__m128i*>(ptr), val);
|
||||
}
|
||||
|
||||
// N specifies the nth 128-bit element
|
||||
template <unsigned int N, class T>
|
||||
inline __m128i load_m128i(T* ptr)
|
||||
|
|
@ -46,10 +67,32 @@ inline void store_m128i(T* ptr, __m128i val)
|
|||
return _mm_storeu_si128(
|
||||
reinterpret_cast<__m128i*>(ptr+SCALE*N), val);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if (CRYPTOPP_AVX2_AVAILABLE)
|
||||
|
||||
template <class T>
|
||||
inline __m256i load_m256i(T* ptr)
|
||||
{
|
||||
return _mm256_loadu_si256(
|
||||
reinterpret_cast<__m256i*>(ptr));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline __m256i load_m256i(const T* ptr)
|
||||
{
|
||||
return _mm256_loadu_si256(
|
||||
reinterpret_cast<const __m256i*>(ptr));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline void store_m256i(T* ptr, __m256i val)
|
||||
{
|
||||
return _mm256_storeu_si256(
|
||||
reinterpret_cast<__m256i*>(ptr), val);
|
||||
}
|
||||
|
||||
// N specifies the nth 256-bit element
|
||||
template <unsigned int N, class T>
|
||||
inline __m256i load_m256i(T* ptr)
|
||||
|
|
|
|||
Loading…
Reference in New Issue