From 863bf9133c81933f4417fa9de49850a7c181158c Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sun, 13 Aug 2017 06:32:09 -0400 Subject: [PATCH] Cleanup casts due to Clang --- blake2.cpp | 96 +++++++++++++++++++++++++++------------------------- gcm.cpp | 38 +++++++++++---------- rijndael.cpp | 60 +++++++++++++++++--------------- 3 files changed, 103 insertions(+), 91 deletions(-) diff --git a/blake2.cpp b/blake2.cpp index 09a3794d..6313fec1 100644 --- a/blake2.cpp +++ b/blake2.cpp @@ -35,6 +35,10 @@ inline __m128i MM_SET_EPI64X(const word64 a, const word64 b) # define MM_SET_EPI64X(a, b) _mm_set_epi64x(a, b) #endif +// Clang casts +#define M128I_CAST(x) ((__m128i *)(void *)(x)) +#define CONST_M128I_CAST(x) ((const __m128i *)(const void *)(x)) + // C/C++ implementation static void BLAKE2_CXX_Compress32(const byte* input, BLAKE2_State& state); static void BLAKE2_CXX_Compress64(const byte* input, BLAKE2_State& state); @@ -626,10 +630,10 @@ static void BLAKE2_SSE2_Compress32(const byte* input, BLAKE2_State __m128i row3l, row3h, row4l, row4h; __m128i b0, b1, t0, t1; - row1l = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[0])); - row1h = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[2])); - row2l = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[4])); - row2h = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[6])); - row3l = _mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(0))); - row3h = _mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(2))); - row4l = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(4))), _mm_loadu_si128((const __m128i*)(const void*)(&state.t[0]))); - row4h = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(6))), _mm_loadu_si128((const __m128i*)(const void*)(&state.f[0]))); + row1l = _mm_loadu_si128(CONST_M128I_CAST(&state.h[0])); + row1h = _mm_loadu_si128(CONST_M128I_CAST(&state.h[2])); + row2l = _mm_loadu_si128(CONST_M128I_CAST(&state.h[4])); + row2h = _mm_loadu_si128(CONST_M128I_CAST(&state.h[6])); + row3l = _mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(0))); + row3h = _mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(2))); + row4l = _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(4))), _mm_loadu_si128(CONST_M128I_CAST(&state.t[0]))); + row4h = _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(6))), _mm_loadu_si128(CONST_M128I_CAST(&state.f[0]))); b0 = MM_SET_EPI64X(m2, m0); b1 = MM_SET_EPI64X(m6, m4); @@ -1918,13 +1922,13 @@ static void BLAKE2_SSE2_Compress64(const byte* input, BLAKE2_State row1l = _mm_xor_si128(row3l, row1l); row1h = _mm_xor_si128(row3h, row1h); - _mm_storeu_si128((__m128i *)(void*)(&state.h[0]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[0])), row1l)); - _mm_storeu_si128((__m128i *)(void*)(&state.h[2]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[2])), row1h)); + _mm_storeu_si128(M128I_CAST(&state.h[0]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[0])), row1l)); + _mm_storeu_si128(M128I_CAST(&state.h[2]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[2])), row1h)); row2l = _mm_xor_si128(row4l, row2l); row2h = _mm_xor_si128(row4h, row2h); - _mm_storeu_si128((__m128i *)(void*)(&state.h[4]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[4])), row2l)); - _mm_storeu_si128((__m128i *)(void*)(&state.h[6]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[6])), row2h)); + _mm_storeu_si128(M128I_CAST(&state.h[4]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[4])), row2l)); + _mm_storeu_si128(M128I_CAST(&state.h[6]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[6])), row2h)); } # endif // (__SUNPRO_CC != 0x5120) #endif // CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE @@ -1941,15 +1945,15 @@ static void BLAKE2_SSE4_Compress32(const byte* input, BLAKE2_State& state) @@ -2496,23 +2500,23 @@ static void BLAKE2_SSE4_Compress64(const byte* input, BLAKE2_State const __m128i r16 = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9); const __m128i r24 = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10); - const __m128i m0 = _mm_loadu_si128((const __m128i*)(const void*)(input + 00)); - const __m128i m1 = _mm_loadu_si128((const __m128i*)(const void*)(input + 16)); - const __m128i m2 = _mm_loadu_si128((const __m128i*)(const void*)(input + 32)); - const __m128i m3 = _mm_loadu_si128((const __m128i*)(const void*)(input + 48)); - const __m128i m4 = _mm_loadu_si128((const __m128i*)(const void*)(input + 64)); - const __m128i m5 = _mm_loadu_si128((const __m128i*)(const void*)(input + 80)); - const __m128i m6 = _mm_loadu_si128((const __m128i*)(const void*)(input + 96)); - const __m128i m7 = _mm_loadu_si128((const __m128i*)(const void*)(input + 112)); + const __m128i m0 = _mm_loadu_si128(CONST_M128I_CAST(input + 00)); + const __m128i m1 = _mm_loadu_si128(CONST_M128I_CAST(input + 16)); + const __m128i m2 = _mm_loadu_si128(CONST_M128I_CAST(input + 32)); + const __m128i m3 = _mm_loadu_si128(CONST_M128I_CAST(input + 48)); + const __m128i m4 = _mm_loadu_si128(CONST_M128I_CAST(input + 64)); + const __m128i m5 = _mm_loadu_si128(CONST_M128I_CAST(input + 80)); + const __m128i m6 = _mm_loadu_si128(CONST_M128I_CAST(input + 96)); + const __m128i m7 = _mm_loadu_si128(CONST_M128I_CAST(input + 112)); - row1l = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[0])); - row1h = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[2])); - row2l = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[4])); - row2h = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[6])); - row3l = _mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(0))); - row3h = _mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(2))); - row4l = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(4))), _mm_loadu_si128((const __m128i*)(const void*)(&state.t[0]))); - row4h = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(6))), _mm_loadu_si128((const __m128i*)(const void*)(&state.f[0]))); + row1l = _mm_loadu_si128(CONST_M128I_CAST(&state.h[0])); + row1h = _mm_loadu_si128(CONST_M128I_CAST(&state.h[2])); + row2l = _mm_loadu_si128(CONST_M128I_CAST(&state.h[4])); + row2h = _mm_loadu_si128(CONST_M128I_CAST(&state.h[6])); + row3l = _mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(0))); + row3h = _mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(2))); + row4l = _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(4))), _mm_loadu_si128(CONST_M128I_CAST(&state.t[0]))); + row4h = _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(6))), _mm_loadu_si128(CONST_M128I_CAST(&state.f[0]))); b0 = _mm_unpacklo_epi64(m0, m1); b1 = _mm_unpacklo_epi64(m2, m3); @@ -3451,13 +3455,13 @@ static void BLAKE2_SSE4_Compress64(const byte* input, BLAKE2_State row1l = _mm_xor_si128(row3l, row1l); row1h = _mm_xor_si128(row3h, row1h); - _mm_storeu_si128((__m128i *)(void*)(&state.h[0]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[0])), row1l)); - _mm_storeu_si128((__m128i *)(void*)(&state.h[2]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[2])), row1h)); + _mm_storeu_si128(M128I_CAST(&state.h[0]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[0])), row1l)); + _mm_storeu_si128(M128I_CAST(&state.h[2]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[2])), row1h)); row2l = _mm_xor_si128(row4l, row2l); row2h = _mm_xor_si128(row4h, row2h); - _mm_storeu_si128((__m128i *)(void*)(&state.h[4]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[4])), row2l)); - _mm_storeu_si128((__m128i *)(void*)(&state.h[6]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[6])), row2h)); + _mm_storeu_si128(M128I_CAST(&state.h[4]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[4])), row2l)); + _mm_storeu_si128(M128I_CAST(&state.h[6]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[6])), row2h)); } #endif // CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE diff --git a/gcm.cpp b/gcm.cpp index 0f06730f..b27dba29 100644 --- a/gcm.cpp +++ b/gcm.cpp @@ -27,6 +27,10 @@ # undef CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE #endif +// Clang casts +#define M128I_CAST(x) ((__m128i *)(void *)(x)) +#define CONST_M128I_CAST(x) ((const __m128i *)(const void *)(x)) + #include "gcm.h" #include "cpu.h" @@ -199,12 +203,12 @@ inline static void SSE2_Xor16(byte *a, const byte *b, const byte *c) // SunCC 5.14 crash (bewildering since asserts are not in effect in release builds) // Also see http://github.com/weidai11/cryptopp/issues/226 and http://github.com/weidai11/cryptopp/issues/284 # if __SUNPRO_CC - *(__m128i *)(void *)a = _mm_xor_si128(*(__m128i *)(void *)b, *(__m128i *)(void *)c); + *M128I_CAST(a) = _mm_xor_si128(*M128I_CAST(b), *M128I_CAST(c)); # elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<__m128i>())); CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<__m128i>())); CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<__m128i>())); - *(__m128i *)(void *)a = _mm_xor_si128(*(__m128i *)(void *)b, *(__m128i *)(void *)c); + *M128I_CAST(a) = _mm_xor_si128(*M128I_CAST(b), *M128I_CAST(c)); # else asm ("movdqa %1, %%xmm0; pxor %2, %%xmm0; movdqa %%xmm0, %0;" : "=m" (a[0]) : "m"(b[0]), "m"(c[0])); # endif @@ -237,7 +241,7 @@ static const word64 s_clmulConstants64[] = { W64LIT(0x08090a0b0c0d0e0f), W64LIT(0x0001020304050607), W64LIT(0x0001020304050607), W64LIT(0x08090a0b0c0d0e0f)}; -static const __m128i *s_clmulConstants = (const __m128i *)(const void *)s_clmulConstants64; +static const __m128i *s_clmulConstants = CONST_M128I_CAST(s_clmulConstants64); static const unsigned int s_clmulTableSizeInBlocks = 8; inline __m128i CLMUL_Reduce(__m128i c0, __m128i c1, __m128i c2, const __m128i &r) @@ -369,16 +373,16 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const if (HasCLMUL()) { const __m128i r = s_clmulConstants[0]; - __m128i h0 = _mm_shuffle_epi8(_mm_load_si128((__m128i *)(void *)hashKey), s_clmulConstants[1]); + __m128i h0 = _mm_shuffle_epi8(_mm_load_si128(M128I_CAST(hashKey)), s_clmulConstants[1]); __m128i h = h0; for (i=0; i= 16) { size_t s = UnsignedMin(len/16, s_clmulTableSizeInBlocks), i=0; - __m128i d1, d2 = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)(data+(s-1)*16)), mask2); + __m128i d1, d2 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128I_CAST(data+(s-1)*16)), mask2); __m128i c0 = _mm_setzero_si128(); __m128i c1 = _mm_setzero_si128(); __m128i c2 = _mm_setzero_si128(); @@ -628,7 +632,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len) if (++i == s) { - d1 = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)data), mask1); + d1 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128I_CAST(data)), mask1); d1 = _mm_xor_si128(d1, x); c0 = _mm_xor_si128(c0, _mm_clmulepi64_si128(d1, h0, 0)); c2 = _mm_xor_si128(c2, _mm_clmulepi64_si128(d1, h1, 1)); @@ -637,7 +641,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len) break; } - d1 = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)(data+(s-i)*16-8)), mask2); + d1 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128I_CAST(data+(s-i)*16-8)), mask2); c0 = _mm_xor_si128(c0, _mm_clmulepi64_si128(d2, h0, 1)); c2 = _mm_xor_si128(c2, _mm_clmulepi64_si128(d1, h1, 1)); d2 = _mm_xor_si128(d2, d1); @@ -645,7 +649,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len) if (++i == s) { - d1 = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)data), mask1); + d1 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128I_CAST(data)), mask1); d1 = _mm_xor_si128(d1, x); c0 = _mm_xor_si128(c0, _mm_clmulepi64_si128(d1, h0, 0x10)); c2 = _mm_xor_si128(c2, _mm_clmulepi64_si128(d1, h1, 0x11)); @@ -654,7 +658,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len) break; } - d2 = _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(const void *)(data+(s-i)*16-8)), mask1); + d2 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128I_CAST(data+(s-i)*16-8)), mask1); c0 = _mm_xor_si128(c0, _mm_clmulepi64_si128(d1, h0, 0x10)); c2 = _mm_xor_si128(c2, _mm_clmulepi64_si128(d2, h1, 0x10)); d1 = _mm_xor_si128(d1, d2); @@ -667,7 +671,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len) x = CLMUL_Reduce(c0, c1, c2, r); } - _mm_store_si128((__m128i *)(void *)HashBuffer(), x); + _mm_store_si128(M128I_CAST(HashBuffer()), x); return len; } #elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE diff --git a/rijndael.cpp b/rijndael.cpp index eaaffb0f..fce3d737 100644 --- a/rijndael.cpp +++ b/rijndael.cpp @@ -95,6 +95,10 @@ static void Rijndael_Dec_ProcessAndXorBlock_ARMV8(const byte *inBlock, const byt # define MAYBE_CONST const #endif +// Clang casts +#define M128I_CAST(x) ((__m128i *)(void *)(x)) +#define CONST_M128I_CAST(x) ((const __m128i *)(const void *)(x)) + #if defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) || defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS) # if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM) namespace rdtable {CRYPTOPP_ALIGN_DATA(16) word64 Te[256+2];} @@ -244,7 +248,7 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, c const word32 *ro = rcLE, *rc = rcLE; CRYPTOPP_UNUSED(ro); - __m128i temp = _mm_loadu_si128((__m128i *)(void *)(userKey+keylen-16)); + __m128i temp = _mm_loadu_si128(M128I_CAST(userKey+keylen-16)); memcpy(rk, userKey, keylen); while (true) @@ -300,16 +304,16 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, c // SunCC 12.1 - 12.3 fail to consume the swap; while SunCC 12.4 consumes it without -std=c++11. vec_swap(*(__m128i *)(rk), *(__m128i *)(rk+4*m_rounds)); #else - std::swap(*(__m128i *)(void *)(rk), *(__m128i *)(void *)(rk+4*m_rounds)); + std::swap(*M128I_CAST(rk), *M128I_CAST(rk+4*m_rounds)); #endif for (i = 4, j = 4*m_rounds-4; i < j; i += 4, j -= 4) { - temp = _mm_aesimc_si128(*(__m128i *)(void *)(rk+i)); - *(__m128i *)(void *)(rk+i) = _mm_aesimc_si128(*(__m128i *)(void *)(rk+j)); - *(__m128i *)(void *)(rk+j) = temp; + temp = _mm_aesimc_si128(*M128I_CAST(rk+i)); + *M128I_CAST(rk+i) = _mm_aesimc_si128(*M128I_CAST(rk+j)); + *M128I_CAST(rk+j) = temp; } - *(__m128i *)(void *)(rk+i) = _mm_aesimc_si128(*(__m128i *)(void *)(rk+i)); + *M128I_CAST(rk+i) = _mm_aesimc_si128(*M128I_CAST(rk+i)); } return; @@ -1203,23 +1207,23 @@ inline size_t AESNI_AdvancedProcessBlocks(F1 func1, F4 func4, MAYBE_CONST __m128 { while (length >= 4*blockSize) { - __m128i block0 = _mm_loadu_si128((const __m128i *)(const void *)inBlocks), block1, block2, block3; + __m128i block0 = _mm_loadu_si128(CONST_M128I_CAST(inBlocks)), block1, block2, block3; if (flags & BlockTransformation::BT_InBlockIsCounter) { - const __m128i be1 = *(const __m128i *)(const void *)s_one; + const __m128i be1 = *CONST_M128I_CAST(s_one); block1 = _mm_add_epi32(block0, be1); block2 = _mm_add_epi32(block1, be1); block3 = _mm_add_epi32(block2, be1); - _mm_storeu_si128((__m128i *)(void *)inBlocks, _mm_add_epi32(block3, be1)); + _mm_storeu_si128(M128I_CAST(inBlocks), _mm_add_epi32(block3, be1)); } else { inBlocks += inIncrement; - block1 = _mm_loadu_si128((const __m128i *)(const void *)inBlocks); + block1 = _mm_loadu_si128(CONST_M128I_CAST(inBlocks)); inBlocks += inIncrement; - block2 = _mm_loadu_si128((const __m128i *)(const void *)inBlocks); + block2 = _mm_loadu_si128(CONST_M128I_CAST(inBlocks)); inBlocks += inIncrement; - block3 = _mm_loadu_si128((const __m128i *)(const void *)inBlocks); + block3 = _mm_loadu_si128(CONST_M128I_CAST(inBlocks)); inBlocks += inIncrement; } @@ -1227,13 +1231,13 @@ inline size_t AESNI_AdvancedProcessBlocks(F1 func1, F4 func4, MAYBE_CONST __m128 { // Coverity finding, appears to be false positive. Assert the condition. CRYPTOPP_ASSERT(xorBlocks); - block0 = _mm_xor_si128(block0, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + block0 = _mm_xor_si128(block0, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks))); xorBlocks += xorIncrement; - block1 = _mm_xor_si128(block1, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + block1 = _mm_xor_si128(block1, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks))); xorBlocks += xorIncrement; - block2 = _mm_xor_si128(block2, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + block2 = _mm_xor_si128(block2, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks))); xorBlocks += xorIncrement; - block3 = _mm_xor_si128(block3, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + block3 = _mm_xor_si128(block3, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks))); xorBlocks += xorIncrement; } @@ -1241,23 +1245,23 @@ inline size_t AESNI_AdvancedProcessBlocks(F1 func1, F4 func4, MAYBE_CONST __m128 if (xorBlocks && !(flags & BlockTransformation::BT_XorInput)) { - block0 = _mm_xor_si128(block0, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + block0 = _mm_xor_si128(block0, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks))); xorBlocks += xorIncrement; - block1 = _mm_xor_si128(block1, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + block1 = _mm_xor_si128(block1, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks))); xorBlocks += xorIncrement; - block2 = _mm_xor_si128(block2, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + block2 = _mm_xor_si128(block2, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks))); xorBlocks += xorIncrement; - block3 = _mm_xor_si128(block3, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + block3 = _mm_xor_si128(block3, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks))); xorBlocks += xorIncrement; } - _mm_storeu_si128((__m128i *)(void *)outBlocks, block0); + _mm_storeu_si128(M128I_CAST(outBlocks), block0); outBlocks += outIncrement; - _mm_storeu_si128((__m128i *)(void *)outBlocks, block1); + _mm_storeu_si128(M128I_CAST(outBlocks), block1); outBlocks += outIncrement; - _mm_storeu_si128((__m128i *)(void *)outBlocks, block2); + _mm_storeu_si128(M128I_CAST(outBlocks), block2); outBlocks += outIncrement; - _mm_storeu_si128((__m128i *)(void *)outBlocks, block3); + _mm_storeu_si128(M128I_CAST(outBlocks), block3); outBlocks += outIncrement; length -= 4*blockSize; @@ -1266,10 +1270,10 @@ inline size_t AESNI_AdvancedProcessBlocks(F1 func1, F4 func4, MAYBE_CONST __m128 while (length >= blockSize) { - __m128i block = _mm_loadu_si128((const __m128i *)(const void *)inBlocks); + __m128i block = _mm_loadu_si128(CONST_M128I_CAST(inBlocks)); if (flags & BlockTransformation::BT_XorInput) - block = _mm_xor_si128(block, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + block = _mm_xor_si128(block, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks))); if (flags & BlockTransformation::BT_InBlockIsCounter) const_cast(inBlocks)[15]++; @@ -1277,9 +1281,9 @@ inline size_t AESNI_AdvancedProcessBlocks(F1 func1, F4 func4, MAYBE_CONST __m128 func1(block, subkeys, rounds); if (xorBlocks && !(flags & BlockTransformation::BT_XorInput)) - block = _mm_xor_si128(block, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + block = _mm_xor_si128(block, _mm_loadu_si128(CONST_M128I_CAST(xorBlocks))); - _mm_storeu_si128((__m128i *)(void *)outBlocks, block); + _mm_storeu_si128(M128I_CAST(outBlocks), block); inBlocks += inIncrement; outBlocks += outIncrement;