Cleanup casts due to Clang
parent
f02bf91ee5
commit
863bf9133c
96
blake2.cpp
96
blake2.cpp
|
|
@ -35,6 +35,10 @@ inline __m128i MM_SET_EPI64X(const word64 a, const word64 b)
|
||||||
# define MM_SET_EPI64X(a, b) _mm_set_epi64x(a, b)
|
# define MM_SET_EPI64X(a, b) _mm_set_epi64x(a, b)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Clang casts
|
||||||
|
#define M128I_CAST(x) ((__m128i *)(void *)(x))
|
||||||
|
#define CONST_M128I_CAST(x) ((const __m128i *)(const void *)(x))
|
||||||
|
|
||||||
// C/C++ implementation
|
// C/C++ implementation
|
||||||
static void BLAKE2_CXX_Compress32(const byte* input, BLAKE2_State<word32, false>& state);
|
static void BLAKE2_CXX_Compress32(const byte* input, BLAKE2_State<word32, false>& state);
|
||||||
static void BLAKE2_CXX_Compress64(const byte* input, BLAKE2_State<word64, true>& state);
|
static void BLAKE2_CXX_Compress64(const byte* input, BLAKE2_State<word64, true>& state);
|
||||||
|
|
@ -626,10 +630,10 @@ static void BLAKE2_SSE2_Compress32(const byte* input, BLAKE2_State<word32, false
|
||||||
__m128i buf1,buf2,buf3,buf4;
|
__m128i buf1,buf2,buf3,buf4;
|
||||||
__m128i ff0,ff1;
|
__m128i ff0,ff1;
|
||||||
|
|
||||||
row1 = ff0 = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[0]));
|
row1 = ff0 = _mm_loadu_si128(CONST_M128I_CAST(&state.h[0]));
|
||||||
row2 = ff1 = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[4]));
|
row2 = ff1 = _mm_loadu_si128(CONST_M128I_CAST(&state.h[4]));
|
||||||
row3 = _mm_setr_epi32(BLAKE2S_IV(0),BLAKE2S_IV(1),BLAKE2S_IV(2),BLAKE2S_IV(3));
|
row3 = _mm_setr_epi32(BLAKE2S_IV(0),BLAKE2S_IV(1),BLAKE2S_IV(2),BLAKE2S_IV(3));
|
||||||
row4 = _mm_xor_si128(_mm_setr_epi32(BLAKE2S_IV(4),BLAKE2S_IV(5),BLAKE2S_IV(6),BLAKE2S_IV(7)),_mm_loadu_si128((const __m128i*)(const void*)(&state.t[0])));
|
row4 = _mm_xor_si128(_mm_setr_epi32(BLAKE2S_IV(4),BLAKE2S_IV(5),BLAKE2S_IV(6),BLAKE2S_IV(7)),_mm_loadu_si128(CONST_M128I_CAST(&state.t[0])));
|
||||||
buf1 = _mm_set_epi32(m6,m4,m2,m0);
|
buf1 = _mm_set_epi32(m6,m4,m2,m0);
|
||||||
row1 = _mm_add_epi32(_mm_add_epi32(row1,buf1),row2);
|
row1 = _mm_add_epi32(_mm_add_epi32(row1,buf1),row2);
|
||||||
row4 = _mm_xor_si128(row4,row1);
|
row4 = _mm_xor_si128(row4,row1);
|
||||||
|
|
@ -1030,8 +1034,8 @@ static void BLAKE2_SSE2_Compress32(const byte* input, BLAKE2_State<word32, false
|
||||||
row3 = _mm_shuffle_epi32(row3,_MM_SHUFFLE(1,0,3,2));
|
row3 = _mm_shuffle_epi32(row3,_MM_SHUFFLE(1,0,3,2));
|
||||||
row2 = _mm_shuffle_epi32(row2,_MM_SHUFFLE(2,1,0,3));
|
row2 = _mm_shuffle_epi32(row2,_MM_SHUFFLE(2,1,0,3));
|
||||||
|
|
||||||
_mm_storeu_si128((__m128i *)(void*)(&state.h[0]),_mm_xor_si128(ff0,_mm_xor_si128(row1,row3)));
|
_mm_storeu_si128(M128I_CAST(&state.h[0]),_mm_xor_si128(ff0,_mm_xor_si128(row1,row3)));
|
||||||
_mm_storeu_si128((__m128i *)(void*)(&state.h[4]),_mm_xor_si128(ff1,_mm_xor_si128(row2,row4)));
|
_mm_storeu_si128(M128I_CAST(&state.h[4]),_mm_xor_si128(ff1,_mm_xor_si128(row2,row4)));
|
||||||
}
|
}
|
||||||
|
|
||||||
# if (__SUNPRO_CC != 0x5120)
|
# if (__SUNPRO_CC != 0x5120)
|
||||||
|
|
@ -1045,14 +1049,14 @@ static void BLAKE2_SSE2_Compress64(const byte* input, BLAKE2_State<word64, true>
|
||||||
__m128i row3l, row3h, row4l, row4h;
|
__m128i row3l, row3h, row4l, row4h;
|
||||||
__m128i b0, b1, t0, t1;
|
__m128i b0, b1, t0, t1;
|
||||||
|
|
||||||
row1l = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[0]));
|
row1l = _mm_loadu_si128(CONST_M128I_CAST(&state.h[0]));
|
||||||
row1h = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[2]));
|
row1h = _mm_loadu_si128(CONST_M128I_CAST(&state.h[2]));
|
||||||
row2l = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[4]));
|
row2l = _mm_loadu_si128(CONST_M128I_CAST(&state.h[4]));
|
||||||
row2h = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[6]));
|
row2h = _mm_loadu_si128(CONST_M128I_CAST(&state.h[6]));
|
||||||
row3l = _mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(0)));
|
row3l = _mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(0)));
|
||||||
row3h = _mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(2)));
|
row3h = _mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(2)));
|
||||||
row4l = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(4))), _mm_loadu_si128((const __m128i*)(const void*)(&state.t[0])));
|
row4l = _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(4))), _mm_loadu_si128(CONST_M128I_CAST(&state.t[0])));
|
||||||
row4h = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(6))), _mm_loadu_si128((const __m128i*)(const void*)(&state.f[0])));
|
row4h = _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(6))), _mm_loadu_si128(CONST_M128I_CAST(&state.f[0])));
|
||||||
|
|
||||||
b0 = MM_SET_EPI64X(m2, m0);
|
b0 = MM_SET_EPI64X(m2, m0);
|
||||||
b1 = MM_SET_EPI64X(m6, m4);
|
b1 = MM_SET_EPI64X(m6, m4);
|
||||||
|
|
@ -1918,13 +1922,13 @@ static void BLAKE2_SSE2_Compress64(const byte* input, BLAKE2_State<word64, true>
|
||||||
|
|
||||||
row1l = _mm_xor_si128(row3l, row1l);
|
row1l = _mm_xor_si128(row3l, row1l);
|
||||||
row1h = _mm_xor_si128(row3h, row1h);
|
row1h = _mm_xor_si128(row3h, row1h);
|
||||||
_mm_storeu_si128((__m128i *)(void*)(&state.h[0]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[0])), row1l));
|
_mm_storeu_si128(M128I_CAST(&state.h[0]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[0])), row1l));
|
||||||
_mm_storeu_si128((__m128i *)(void*)(&state.h[2]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[2])), row1h));
|
_mm_storeu_si128(M128I_CAST(&state.h[2]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[2])), row1h));
|
||||||
|
|
||||||
row2l = _mm_xor_si128(row4l, row2l);
|
row2l = _mm_xor_si128(row4l, row2l);
|
||||||
row2h = _mm_xor_si128(row4h, row2h);
|
row2h = _mm_xor_si128(row4h, row2h);
|
||||||
_mm_storeu_si128((__m128i *)(void*)(&state.h[4]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[4])), row2l));
|
_mm_storeu_si128(M128I_CAST(&state.h[4]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[4])), row2l));
|
||||||
_mm_storeu_si128((__m128i *)(void*)(&state.h[6]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[6])), row2h));
|
_mm_storeu_si128(M128I_CAST(&state.h[6]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[6])), row2h));
|
||||||
}
|
}
|
||||||
# endif // (__SUNPRO_CC != 0x5120)
|
# endif // (__SUNPRO_CC != 0x5120)
|
||||||
#endif // CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
|
#endif // CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
|
||||||
|
|
@ -1941,15 +1945,15 @@ static void BLAKE2_SSE4_Compress32(const byte* input, BLAKE2_State<word32, false
|
||||||
const __m128i r8 = _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1);
|
const __m128i r8 = _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1);
|
||||||
const __m128i r16 = _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
|
const __m128i r16 = _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
|
||||||
|
|
||||||
const __m128i m0 = _mm_loadu_si128((const __m128i*)(const void*)(input + 00));
|
const __m128i m0 = _mm_loadu_si128(CONST_M128I_CAST(input + 00));
|
||||||
const __m128i m1 = _mm_loadu_si128((const __m128i*)(const void*)(input + 16));
|
const __m128i m1 = _mm_loadu_si128(CONST_M128I_CAST(input + 16));
|
||||||
const __m128i m2 = _mm_loadu_si128((const __m128i*)(const void*)(input + 32));
|
const __m128i m2 = _mm_loadu_si128(CONST_M128I_CAST(input + 32));
|
||||||
const __m128i m3 = _mm_loadu_si128((const __m128i*)(const void*)(input + 48));
|
const __m128i m3 = _mm_loadu_si128(CONST_M128I_CAST(input + 48));
|
||||||
|
|
||||||
row1 = ff0 = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[0]));
|
row1 = ff0 = _mm_loadu_si128(CONST_M128I_CAST(&state.h[0]));
|
||||||
row2 = ff1 = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[4]));
|
row2 = ff1 = _mm_loadu_si128(CONST_M128I_CAST(&state.h[4]));
|
||||||
row3 = _mm_setr_epi32(BLAKE2S_IV(0), BLAKE2S_IV(1), BLAKE2S_IV(2), BLAKE2S_IV(3));
|
row3 = _mm_setr_epi32(BLAKE2S_IV(0), BLAKE2S_IV(1), BLAKE2S_IV(2), BLAKE2S_IV(3));
|
||||||
row4 = _mm_xor_si128(_mm_setr_epi32(BLAKE2S_IV(4), BLAKE2S_IV(5), BLAKE2S_IV(6), BLAKE2S_IV(7)), _mm_loadu_si128((const __m128i*)(const void*)(&state.t[0])));
|
row4 = _mm_xor_si128(_mm_setr_epi32(BLAKE2S_IV(4), BLAKE2S_IV(5), BLAKE2S_IV(6), BLAKE2S_IV(7)), _mm_loadu_si128(CONST_M128I_CAST(&state.t[0])));
|
||||||
buf1 = _mm_castps_si128((_mm_shuffle_ps(_mm_castsi128_ps((m0)), _mm_castsi128_ps((m1)), _MM_SHUFFLE(2,0,2,0))));
|
buf1 = _mm_castps_si128((_mm_shuffle_ps(_mm_castsi128_ps((m0)), _mm_castsi128_ps((m1)), _MM_SHUFFLE(2,0,2,0))));
|
||||||
|
|
||||||
row1 = _mm_add_epi32(_mm_add_epi32(row1, buf1), row2);
|
row1 = _mm_add_epi32(_mm_add_epi32(row1, buf1), row2);
|
||||||
|
|
@ -2481,8 +2485,8 @@ static void BLAKE2_SSE4_Compress32(const byte* input, BLAKE2_State<word32, false
|
||||||
row3 = _mm_shuffle_epi32(row3, _MM_SHUFFLE(1,0,3,2));
|
row3 = _mm_shuffle_epi32(row3, _MM_SHUFFLE(1,0,3,2));
|
||||||
row2 = _mm_shuffle_epi32(row2, _MM_SHUFFLE(2,1,0,3));
|
row2 = _mm_shuffle_epi32(row2, _MM_SHUFFLE(2,1,0,3));
|
||||||
|
|
||||||
_mm_storeu_si128((__m128i *)(void*)(&state.h[0]), _mm_xor_si128(ff0, _mm_xor_si128(row1, row3)));
|
_mm_storeu_si128(M128I_CAST(&state.h[0]), _mm_xor_si128(ff0, _mm_xor_si128(row1, row3)));
|
||||||
_mm_storeu_si128((__m128i *)(void*)(&state.h[4]), _mm_xor_si128(ff1, _mm_xor_si128(row2, row4)));
|
_mm_storeu_si128(M128I_CAST(&state.h[4]), _mm_xor_si128(ff1, _mm_xor_si128(row2, row4)));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void BLAKE2_SSE4_Compress64(const byte* input, BLAKE2_State<word64, true>& state)
|
static void BLAKE2_SSE4_Compress64(const byte* input, BLAKE2_State<word64, true>& state)
|
||||||
|
|
@ -2496,23 +2500,23 @@ static void BLAKE2_SSE4_Compress64(const byte* input, BLAKE2_State<word64, true>
|
||||||
const __m128i r16 = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
|
const __m128i r16 = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
|
||||||
const __m128i r24 = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
|
const __m128i r24 = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
|
||||||
|
|
||||||
const __m128i m0 = _mm_loadu_si128((const __m128i*)(const void*)(input + 00));
|
const __m128i m0 = _mm_loadu_si128(CONST_M128I_CAST(input + 00));
|
||||||
const __m128i m1 = _mm_loadu_si128((const __m128i*)(const void*)(input + 16));
|
const __m128i m1 = _mm_loadu_si128(CONST_M128I_CAST(input + 16));
|
||||||
const __m128i m2 = _mm_loadu_si128((const __m128i*)(const void*)(input + 32));
|
const __m128i m2 = _mm_loadu_si128(CONST_M128I_CAST(input + 32));
|
||||||
const __m128i m3 = _mm_loadu_si128((const __m128i*)(const void*)(input + 48));
|
const __m128i m3 = _mm_loadu_si128(CONST_M128I_CAST(input + 48));
|
||||||
const __m128i m4 = _mm_loadu_si128((const __m128i*)(const void*)(input + 64));
|
const __m128i m4 = _mm_loadu_si128(CONST_M128I_CAST(input + 64));
|
||||||
const __m128i m5 = _mm_loadu_si128((const __m128i*)(const void*)(input + 80));
|
const __m128i m5 = _mm_loadu_si128(CONST_M128I_CAST(input + 80));
|
||||||
const __m128i m6 = _mm_loadu_si128((const __m128i*)(const void*)(input + 96));
|
const __m128i m6 = _mm_loadu_si128(CONST_M128I_CAST(input + 96));
|
||||||
const __m128i m7 = _mm_loadu_si128((const __m128i*)(const void*)(input + 112));
|
const __m128i m7 = _mm_loadu_si128(CONST_M128I_CAST(input + 112));
|
||||||
|
|
||||||
row1l = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[0]));
|
row1l = _mm_loadu_si128(CONST_M128I_CAST(&state.h[0]));
|
||||||
row1h = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[2]));
|
row1h = _mm_loadu_si128(CONST_M128I_CAST(&state.h[2]));
|
||||||
row2l = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[4]));
|
row2l = _mm_loadu_si128(CONST_M128I_CAST(&state.h[4]));
|
||||||
row2h = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[6]));
|
row2h = _mm_loadu_si128(CONST_M128I_CAST(&state.h[6]));
|
||||||
row3l = _mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(0)));
|
row3l = _mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(0)));
|
||||||
row3h = _mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(2)));
|
row3h = _mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(2)));
|
||||||
row4l = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(4))), _mm_loadu_si128((const __m128i*)(const void*)(&state.t[0])));
|
row4l = _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(4))), _mm_loadu_si128(CONST_M128I_CAST(&state.t[0])));
|
||||||
row4h = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(6))), _mm_loadu_si128((const __m128i*)(const void*)(&state.f[0])));
|
row4h = _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(6))), _mm_loadu_si128(CONST_M128I_CAST(&state.f[0])));
|
||||||
|
|
||||||
b0 = _mm_unpacklo_epi64(m0, m1);
|
b0 = _mm_unpacklo_epi64(m0, m1);
|
||||||
b1 = _mm_unpacklo_epi64(m2, m3);
|
b1 = _mm_unpacklo_epi64(m2, m3);
|
||||||
|
|
@ -3451,13 +3455,13 @@ static void BLAKE2_SSE4_Compress64(const byte* input, BLAKE2_State<word64, true>
|
||||||
|
|
||||||
row1l = _mm_xor_si128(row3l, row1l);
|
row1l = _mm_xor_si128(row3l, row1l);
|
||||||
row1h = _mm_xor_si128(row3h, row1h);
|
row1h = _mm_xor_si128(row3h, row1h);
|
||||||
_mm_storeu_si128((__m128i *)(void*)(&state.h[0]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[0])), row1l));
|
_mm_storeu_si128(M128I_CAST(&state.h[0]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[0])), row1l));
|
||||||
_mm_storeu_si128((__m128i *)(void*)(&state.h[2]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[2])), row1h));
|
_mm_storeu_si128(M128I_CAST(&state.h[2]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[2])), row1h));
|
||||||
|
|
||||||
row2l = _mm_xor_si128(row4l, row2l);
|
row2l = _mm_xor_si128(row4l, row2l);
|
||||||
row2h = _mm_xor_si128(row4h, row2h);
|
row2h = _mm_xor_si128(row4h, row2h);
|
||||||
_mm_storeu_si128((__m128i *)(void*)(&state.h[4]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[4])), row2l));
|
_mm_storeu_si128(M128I_CAST(&state.h[4]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[4])), row2l));
|
||||||
_mm_storeu_si128((__m128i *)(void*)(&state.h[6]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[6])), row2h));
|
_mm_storeu_si128(M128I_CAST(&state.h[6]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[6])), row2h));
|
||||||
}
|
}
|
||||||
#endif // CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE
|
#endif // CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE
|
||||||
|
|
||||||
|
|
|
||||||
38
gcm.cpp
38
gcm.cpp
|
|
@ -27,6 +27,10 @@
|
||||||
# undef CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
|
# undef CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Clang casts
|
||||||
|
#define M128I_CAST(x) ((__m128i *)(void *)(x))
|
||||||
|
#define CONST_M128I_CAST(x) ((const __m128i *)(const void *)(x))
|
||||||
|
|
||||||
#include "gcm.h"
|
#include "gcm.h"
|
||||||
#include "cpu.h"
|
#include "cpu.h"
|
||||||
|
|
||||||
|
|
@ -199,12 +203,12 @@ inline static void SSE2_Xor16(byte *a, const byte *b, const byte *c)
|
||||||
// SunCC 5.14 crash (bewildering since asserts are not in effect in release builds)
|
// SunCC 5.14 crash (bewildering since asserts are not in effect in release builds)
|
||||||
// Also see http://github.com/weidai11/cryptopp/issues/226 and http://github.com/weidai11/cryptopp/issues/284
|
// Also see http://github.com/weidai11/cryptopp/issues/226 and http://github.com/weidai11/cryptopp/issues/284
|
||||||
# if __SUNPRO_CC
|
# if __SUNPRO_CC
|
||||||
*(__m128i *)(void *)a = _mm_xor_si128(*(__m128i *)(void *)b, *(__m128i *)(void *)c);
|
*M128I_CAST(a) = _mm_xor_si128(*M128I_CAST(b), *M128I_CAST(c));
|
||||||
# elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
|
# elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
|
||||||
CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<__m128i>()));
|
CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<__m128i>()));
|
||||||
CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<__m128i>()));
|
CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<__m128i>()));
|
||||||
CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<__m128i>()));
|
CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<__m128i>()));
|
||||||
*(__m128i *)(void *)a = _mm_xor_si128(*(__m128i *)(void *)b, *(__m128i *)(void *)c);
|
*M128I_CAST(a) = _mm_xor_si128(*M128I_CAST(b), *M128I_CAST(c));
|
||||||
# else
|
# else
|
||||||
asm ("movdqa %1, %%xmm0; pxor %2, %%xmm0; movdqa %%xmm0, %0;" : "=m" (a[0]) : "m"(b[0]), "m"(c[0]));
|
asm ("movdqa %1, %%xmm0; pxor %2, %%xmm0; movdqa %%xmm0, %0;" : "=m" (a[0]) : "m"(b[0]), "m"(c[0]));
|
||||||
# endif
|
# endif
|
||||||
|
|
@ -237,7 +241,7 @@ static const word64 s_clmulConstants64[] = {
|
||||||
W64LIT(0x08090a0b0c0d0e0f), W64LIT(0x0001020304050607),
|
W64LIT(0x08090a0b0c0d0e0f), W64LIT(0x0001020304050607),
|
||||||
W64LIT(0x0001020304050607), W64LIT(0x08090a0b0c0d0e0f)};
|
W64LIT(0x0001020304050607), W64LIT(0x08090a0b0c0d0e0f)};
|
||||||
|
|
||||||
static const __m128i *s_clmulConstants = (const __m128i *)(const void *)s_clmulConstants64;
|
static const __m128i *s_clmulConstants = CONST_M128I_CAST(s_clmulConstants64);
|
||||||
static const unsigned int s_clmulTableSizeInBlocks = 8;
|
static const unsigned int s_clmulTableSizeInBlocks = 8;
|
||||||
|
|
||||||
inline __m128i CLMUL_Reduce(__m128i c0, __m128i c1, __m128i c2, const __m128i &r)
|
inline __m128i CLMUL_Reduce(__m128i c0, __m128i c1, __m128i c2, const __m128i &r)
|
||||||
|
|
@ -369,16 +373,16 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
|
||||||
if (HasCLMUL())
|
if (HasCLMUL())
|
||||||
{
|
{
|
||||||
const __m128i r = s_clmulConstants[0];
|
const __m128i r = s_clmulConstants[0];
|
||||||
__m128i h0 = _mm_shuffle_epi8(_mm_load_si128((__m128i *)(void *)hashKey), s_clmulConstants[1]);
|
__m128i h0 = _mm_shuffle_epi8(_mm_load_si128(M128I_CAST(hashKey)), s_clmulConstants[1]);
|
||||||
__m128i h = h0;
|
__m128i h = h0;
|
||||||
|
|
||||||
for (i=0; i<tableSize; i+=32)
|
for (i=0; i<tableSize; i+=32)
|
||||||
{
|
{
|
||||||
__m128i h1 = CLMUL_GF_Mul(h, h0, r);
|
__m128i h1 = CLMUL_GF_Mul(h, h0, r);
|
||||||
_mm_storel_epi64((__m128i *)(void *)(mulTable+i), h);
|
_mm_storel_epi64(M128I_CAST(mulTable+i), h);
|
||||||
_mm_storeu_si128((__m128i *)(void *)(mulTable+i+16), h1);
|
_mm_storeu_si128(M128I_CAST(mulTable+i+16), h1);
|
||||||
_mm_storeu_si128((__m128i *)(void *)(mulTable+i+8), h);
|
_mm_storeu_si128(M128I_CAST(mulTable+i+8), h);
|
||||||
_mm_storel_epi64((__m128i *)(void *)(mulTable+i+8), h1);
|
_mm_storel_epi64(M128I_CAST(mulTable+i+8), h1);
|
||||||
h = CLMUL_GF_Mul(h1, h0, r);
|
h = CLMUL_GF_Mul(h1, h0, r);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -517,7 +521,7 @@ inline void GCM_Base::ReverseHashBufferIfNeeded()
|
||||||
#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
|
#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
|
||||||
if (HasCLMUL())
|
if (HasCLMUL())
|
||||||
{
|
{
|
||||||
__m128i &x = *(__m128i *)(void *)HashBuffer();
|
__m128i &x = *M128I_CAST(HashBuffer());
|
||||||
x = _mm_shuffle_epi8(x, s_clmulConstants[1]);
|
x = _mm_shuffle_epi8(x, s_clmulConstants[1]);
|
||||||
}
|
}
|
||||||
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
|
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
|
||||||
|
|
@ -608,14 +612,14 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
|
||||||
#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
|
#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
|
||||||
if (HasCLMUL())
|
if (HasCLMUL())
|
||||||
{
|
{
|
||||||
const __m128i *mulTable = (const __m128i *)(const void *)MulTable();
|
const __m128i *mulTable = CONST_M128I_CAST(MulTable());
|
||||||
__m128i x = _mm_load_si128((__m128i *)(void *)HashBuffer());
|
__m128i x = _mm_load_si128(M128I_CAST(HashBuffer()));
|
||||||
const __m128i r = s_clmulConstants[0], mask1 = s_clmulConstants[1], mask2 = s_clmulConstants[2];
|
const __m128i r = s_clmulConstants[0], mask1 = s_clmulConstants[1], mask2 = s_clmulConstants[2];
|
||||||
|
|
||||||
while (len >= 16)
|
while (len >= 16)
|
||||||
{
|
{
|
||||||
size_t s = UnsignedMin(len/16, s_clmulTableSizeInBlocks), i=0;
|
size_t s = UnsignedMin(len/16, s_clmulTableSizeInBlocks), i=0;
|
||||||
__m128i d1, d2 = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)(data+(s-1)*16)), mask2);
|
__m128i d1, d2 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128I_CAST(data+(s-1)*16)), mask2);
|
||||||
__m128i c0 = _mm_setzero_si128();
|
__m128i c0 = _mm_setzero_si128();
|
||||||
__m128i c1 = _mm_setzero_si128();
|
__m128i c1 = _mm_setzero_si128();
|
||||||
__m128i c2 = _mm_setzero_si128();
|
__m128i c2 = _mm_setzero_si128();
|
||||||
|
|
@ -628,7 +632,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
|
||||||
|
|
||||||
if (++i == s)
|
if (++i == s)
|
||||||
{
|
{
|
||||||
d1 = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)data), mask1);
|
d1 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128I_CAST(data)), mask1);
|
||||||
d1 = _mm_xor_si128(d1, x);
|
d1 = _mm_xor_si128(d1, x);
|
||||||
c0 = _mm_xor_si128(c0, _mm_clmulepi64_si128(d1, h0, 0));
|
c0 = _mm_xor_si128(c0, _mm_clmulepi64_si128(d1, h0, 0));
|
||||||
c2 = _mm_xor_si128(c2, _mm_clmulepi64_si128(d1, h1, 1));
|
c2 = _mm_xor_si128(c2, _mm_clmulepi64_si128(d1, h1, 1));
|
||||||
|
|
@ -637,7 +641,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
d1 = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)(data+(s-i)*16-8)), mask2);
|
d1 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128I_CAST(data+(s-i)*16-8)), mask2);
|
||||||
c0 = _mm_xor_si128(c0, _mm_clmulepi64_si128(d2, h0, 1));
|
c0 = _mm_xor_si128(c0, _mm_clmulepi64_si128(d2, h0, 1));
|
||||||
c2 = _mm_xor_si128(c2, _mm_clmulepi64_si128(d1, h1, 1));
|
c2 = _mm_xor_si128(c2, _mm_clmulepi64_si128(d1, h1, 1));
|
||||||
d2 = _mm_xor_si128(d2, d1);
|
d2 = _mm_xor_si128(d2, d1);
|
||||||
|
|
@ -645,7 +649,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
|
||||||
|
|
||||||
if (++i == s)
|
if (++i == s)
|
||||||
{
|
{
|
||||||
d1 = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)data), mask1);
|
d1 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128I_CAST(data)), mask1);
|
||||||
d1 = _mm_xor_si128(d1, x);
|
d1 = _mm_xor_si128(d1, x);
|
||||||
c0 = _mm_xor_si128(c0, _mm_clmulepi64_si128(d1, h0, 0x10));
|
c0 = _mm_xor_si128(c0, _mm_clmulepi64_si128(d1, h0, 0x10));
|
||||||
c2 = _mm_xor_si128(c2, _mm_clmulepi64_si128(d1, h1, 0x11));
|
c2 = _mm_xor_si128(c2, _mm_clmulepi64_si128(d1, h1, 0x11));
|
||||||
|
|
@ -654,7 +658,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
d2 = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)(data+(s-i)*16-8)), mask1);
|
d2 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128I_CAST(data+(s-i)*16-8)), mask1);
|
||||||
c0 = _mm_xor_si128(c0, _mm_clmulepi64_si128(d1, h0, 0x10));
|
c0 = _mm_xor_si128(c0, _mm_clmulepi64_si128(d1, h0, 0x10));
|
||||||
c2 = _mm_xor_si128(c2, _mm_clmulepi64_si128(d2, h1, 0x10));
|
c2 = _mm_xor_si128(c2, _mm_clmulepi64_si128(d2, h1, 0x10));
|
||||||
d1 = _mm_xor_si128(d1, d2);
|
d1 = _mm_xor_si128(d1, d2);
|
||||||
|
|
@ -667,7 +671,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
|
||||||
x = CLMUL_Reduce(c0, c1, c2, r);
|
x = CLMUL_Reduce(c0, c1, c2, r);
|
||||||
}
|
}
|
||||||
|
|
||||||
_mm_store_si128((__m128i *)(void *)HashBuffer(), x);
|
_mm_store_si128(M128I_CAST(HashBuffer()), x);
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
|
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
|
||||||
|
|
|
||||||
60
rijndael.cpp
60
rijndael.cpp
|
|
@ -95,6 +95,10 @@ static void Rijndael_Dec_ProcessAndXorBlock_ARMV8(const byte *inBlock, const byt
|
||||||
# define MAYBE_CONST const
|
# define MAYBE_CONST const
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Clang casts
|
||||||
|
#define M128I_CAST(x) ((__m128i *)(void *)(x))
|
||||||
|
#define CONST_M128I_CAST(x) ((const __m128i *)(const void *)(x))
|
||||||
|
|
||||||
#if defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) || defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
|
#if defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) || defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)
|
||||||
# if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM)
|
# if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM)
|
||||||
namespace rdtable {CRYPTOPP_ALIGN_DATA(16) word64 Te[256+2];}
|
namespace rdtable {CRYPTOPP_ALIGN_DATA(16) word64 Te[256+2];}
|
||||||
|
|
@ -244,7 +248,7 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, c
|
||||||
const word32 *ro = rcLE, *rc = rcLE;
|
const word32 *ro = rcLE, *rc = rcLE;
|
||||||
CRYPTOPP_UNUSED(ro);
|
CRYPTOPP_UNUSED(ro);
|
||||||
|
|
||||||
__m128i temp = _mm_loadu_si128((__m128i *)(void *)(userKey+keylen-16));
|
__m128i temp = _mm_loadu_si128(M128I_CAST(userKey+keylen-16));
|
||||||
memcpy(rk, userKey, keylen);
|
memcpy(rk, userKey, keylen);
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
|
|
@ -300,16 +304,16 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, c
|
||||||
// SunCC 12.1 - 12.3 fail to consume the swap; while SunCC 12.4 consumes it without -std=c++11.
|
// SunCC 12.1 - 12.3 fail to consume the swap; while SunCC 12.4 consumes it without -std=c++11.
|
||||||
vec_swap(*(__m128i *)(rk), *(__m128i *)(rk+4*m_rounds));
|
vec_swap(*(__m128i *)(rk), *(__m128i *)(rk+4*m_rounds));
|
||||||
#else
|
#else
|
||||||
std::swap(*(__m128i *)(void *)(rk), *(__m128i *)(void *)(rk+4*m_rounds));
|
std::swap(*M128I_CAST(rk), *M128I_CAST(rk+4*m_rounds));
|
||||||
#endif
|
#endif
|
||||||
for (i = 4, j = 4*m_rounds-4; i < j; i += 4, j -= 4)
|
for (i = 4, j = 4*m_rounds-4; i < j; i += 4, j -= 4)
|
||||||
{
|
{
|
||||||
temp = _mm_aesimc_si128(*(__m128i *)(void *)(rk+i));
|
temp = _mm_aesimc_si128(*M128I_CAST(rk+i));
|
||||||
*(__m128i *)(void *)(rk+i) = _mm_aesimc_si128(*(__m128i *)(void *)(rk+j));
|
*M128I_CAST(rk+i) = _mm_aesimc_si128(*M128I_CAST(rk+j));
|
||||||
*(__m128i *)(void *)(rk+j) = temp;
|
*M128I_CAST(rk+j) = temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
*(__m128i *)(void *)(rk+i) = _mm_aesimc_si128(*(__m128i *)(void *)(rk+i));
|
*M128I_CAST(rk+i) = _mm_aesimc_si128(*M128I_CAST(rk+i));
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
|
@ -1203,23 +1207,23 @@ inline size_t AESNI_AdvancedProcessBlocks(F1 func1, F4 func4, MAYBE_CONST __m128
|
||||||
{
|
{
|
||||||
while (length >= 4*blockSize)
|
while (length >= 4*blockSize)
|
||||||
{
|
{
|
||||||
__m128i block0 = _mm_loadu_si128((const __m128i *)(const void *)inBlocks), block1, block2, block3;
|
__m128i block0 = _mm_loadu_si128(CONST_M128I_CAST(inBlocks)), block1, block2, block3;
|
||||||
if (flags & BlockTransformation::BT_InBlockIsCounter)
|
if (flags & BlockTransformation::BT_InBlockIsCounter)
|
||||||
{
|
{
|
||||||
const __m128i be1 = *(const __m128i *)(const void *)s_one;
|
const __m128i be1 = *CONST_M128I_CAST(s_one);
|
||||||
block1 = _mm_add_epi32(block0, be1);
|
block1 = _mm_add_epi32(block0, be1);
|
||||||
block2 = _mm_add_epi32(block1, be1);
|
block2 = _mm_add_epi32(block1, be1);
|
||||||
block3 = _mm_add_epi32(block2, be1);
|
block3 = _mm_add_epi32(block2, be1);
|
||||||
_mm_storeu_si128((__m128i *)(void *)inBlocks, _mm_add_epi32(block3, be1));
|
_mm_storeu_si128(M128I_CAST(inBlocks), _mm_add_epi32(block3, be1));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
inBlocks += inIncrement;
|
inBlocks += inIncrement;
|
||||||
block1 = _mm_loadu_si128((const __m128i *)(const void *)inBlocks);
|
block1 = _mm_loadu_si128(CONST_M128I_CAST(inBlocks));
|
||||||
inBlocks += inIncrement;
|
inBlocks += inIncrement;
|
||||||
block2 = _mm_loadu_si128((const __m128i *)(const void *)inBlocks);
|
block2 = _mm_loadu_si128(CONST_M128I_CAST(inBlocks));
|
||||||
inBlocks += inIncrement;
|
inBlocks += inIncrement;
|
||||||
block3 = _mm_loadu_si128((const __m128i *)(const void *)inBlocks);
|
block3 = _mm_loadu_si128(CONST_M128I_CAST(inBlocks));
|
||||||
inBlocks += inIncrement;
|
inBlocks += inIncrement;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1227,13 +1231,13 @@ inline size_t AESNI_AdvancedProcessBlocks(F1 func1, F4 func4, MAYBE_CONST __m128
|
||||||
{
|
{
|
||||||
// Coverity finding, appears to be false positive. Assert the condition.
|
// Coverity finding, appears to be false positive. Assert the condition.
|
||||||
CRYPTOPP_ASSERT(xorBlocks);
|
CRYPTOPP_ASSERT(xorBlocks);
|
||||||
block0 = _mm_xor_si128(block0, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks));
|
block0 = _mm_xor_si128(block0, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks)));
|
||||||
xorBlocks += xorIncrement;
|
xorBlocks += xorIncrement;
|
||||||
block1 = _mm_xor_si128(block1, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks));
|
block1 = _mm_xor_si128(block1, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks)));
|
||||||
xorBlocks += xorIncrement;
|
xorBlocks += xorIncrement;
|
||||||
block2 = _mm_xor_si128(block2, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks));
|
block2 = _mm_xor_si128(block2, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks)));
|
||||||
xorBlocks += xorIncrement;
|
xorBlocks += xorIncrement;
|
||||||
block3 = _mm_xor_si128(block3, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks));
|
block3 = _mm_xor_si128(block3, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks)));
|
||||||
xorBlocks += xorIncrement;
|
xorBlocks += xorIncrement;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1241,23 +1245,23 @@ inline size_t AESNI_AdvancedProcessBlocks(F1 func1, F4 func4, MAYBE_CONST __m128
|
||||||
|
|
||||||
if (xorBlocks && !(flags & BlockTransformation::BT_XorInput))
|
if (xorBlocks && !(flags & BlockTransformation::BT_XorInput))
|
||||||
{
|
{
|
||||||
block0 = _mm_xor_si128(block0, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks));
|
block0 = _mm_xor_si128(block0, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks)));
|
||||||
xorBlocks += xorIncrement;
|
xorBlocks += xorIncrement;
|
||||||
block1 = _mm_xor_si128(block1, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks));
|
block1 = _mm_xor_si128(block1, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks)));
|
||||||
xorBlocks += xorIncrement;
|
xorBlocks += xorIncrement;
|
||||||
block2 = _mm_xor_si128(block2, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks));
|
block2 = _mm_xor_si128(block2, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks)));
|
||||||
xorBlocks += xorIncrement;
|
xorBlocks += xorIncrement;
|
||||||
block3 = _mm_xor_si128(block3, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks));
|
block3 = _mm_xor_si128(block3, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks)));
|
||||||
xorBlocks += xorIncrement;
|
xorBlocks += xorIncrement;
|
||||||
}
|
}
|
||||||
|
|
||||||
_mm_storeu_si128((__m128i *)(void *)outBlocks, block0);
|
_mm_storeu_si128(M128I_CAST(outBlocks), block0);
|
||||||
outBlocks += outIncrement;
|
outBlocks += outIncrement;
|
||||||
_mm_storeu_si128((__m128i *)(void *)outBlocks, block1);
|
_mm_storeu_si128(M128I_CAST(outBlocks), block1);
|
||||||
outBlocks += outIncrement;
|
outBlocks += outIncrement;
|
||||||
_mm_storeu_si128((__m128i *)(void *)outBlocks, block2);
|
_mm_storeu_si128(M128I_CAST(outBlocks), block2);
|
||||||
outBlocks += outIncrement;
|
outBlocks += outIncrement;
|
||||||
_mm_storeu_si128((__m128i *)(void *)outBlocks, block3);
|
_mm_storeu_si128(M128I_CAST(outBlocks), block3);
|
||||||
outBlocks += outIncrement;
|
outBlocks += outIncrement;
|
||||||
|
|
||||||
length -= 4*blockSize;
|
length -= 4*blockSize;
|
||||||
|
|
@ -1266,10 +1270,10 @@ inline size_t AESNI_AdvancedProcessBlocks(F1 func1, F4 func4, MAYBE_CONST __m128
|
||||||
|
|
||||||
while (length >= blockSize)
|
while (length >= blockSize)
|
||||||
{
|
{
|
||||||
__m128i block = _mm_loadu_si128((const __m128i *)(const void *)inBlocks);
|
__m128i block = _mm_loadu_si128(CONST_M128I_CAST(inBlocks));
|
||||||
|
|
||||||
if (flags & BlockTransformation::BT_XorInput)
|
if (flags & BlockTransformation::BT_XorInput)
|
||||||
block = _mm_xor_si128(block, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks));
|
block = _mm_xor_si128(block, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks)));
|
||||||
|
|
||||||
if (flags & BlockTransformation::BT_InBlockIsCounter)
|
if (flags & BlockTransformation::BT_InBlockIsCounter)
|
||||||
const_cast<byte *>(inBlocks)[15]++;
|
const_cast<byte *>(inBlocks)[15]++;
|
||||||
|
|
@ -1277,9 +1281,9 @@ inline size_t AESNI_AdvancedProcessBlocks(F1 func1, F4 func4, MAYBE_CONST __m128
|
||||||
func1(block, subkeys, rounds);
|
func1(block, subkeys, rounds);
|
||||||
|
|
||||||
if (xorBlocks && !(flags & BlockTransformation::BT_XorInput))
|
if (xorBlocks && !(flags & BlockTransformation::BT_XorInput))
|
||||||
block = _mm_xor_si128(block, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks));
|
block = _mm_xor_si128(block, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks)));
|
||||||
|
|
||||||
_mm_storeu_si128((__m128i *)(void *)outBlocks, block);
|
_mm_storeu_si128(M128I_CAST(outBlocks), block);
|
||||||
|
|
||||||
inBlocks += inIncrement;
|
inBlocks += inIncrement;
|
||||||
outBlocks += outIncrement;
|
outBlocks += outIncrement;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue