diff --git a/chacha_avx.cpp b/chacha_avx.cpp index 72dc42c2..cdf50266 100644 --- a/chacha_avx.cpp +++ b/chacha_avx.cpp @@ -30,13 +30,6 @@ // Squash MS LNK4221 and libtool warnings extern const char CHACHA_AVX_FNAME[] = __FILE__; -// Sun Studio 12.4 OK, 12.5 and 12.6 compile error. -#if (__SUNPRO_CC >= 0x5140) && (__SUNPRO_CC <= 0x5150) -# define MAYBE_CONST -#else -# define MAYBE_CONST const -#endif - // VS2017 and global optimization bug. TODO, figure out when // we can re-enable full optimizations for VS2017. Also see // https://github.com/weidai11/cryptopp/issues/649 and diff --git a/sse_simd.h b/sse_simd.h index 0effad47..fe3a0332 100644 --- a/sse_simd.h +++ b/sse_simd.h @@ -20,43 +20,40 @@ NAMESPACE_BEGIN(CryptoPP) #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE) -template -inline __m128i load_m128i(T* ptr) +template +inline __m128i load_m128i(const byte* ptr) { + enum { SCALE=sizeof(__m128i)/sizeof(byte) }; return _mm_loadu_si128( - reinterpret_cast<__m128i*>(ptr)); + const_cast<__m128i*>( // SunCC workaround + reinterpret_cast(ptr+SCALE*N))); } -template -inline __m128i load_m128i(const T* ptr) +template +inline __m128i load_m128i(const word16* ptr) { + enum { SCALE=sizeof(__m128i)/sizeof(word16) }; return _mm_loadu_si128( - reinterpret_cast(ptr)); + const_cast<__m128i*>( // SunCC workaround + reinterpret_cast(ptr+SCALE*N))); } -template -inline void store_m128i(T* ptr, __m128i val) +template +inline __m128i load_m128i(const word32* ptr) { - return _mm_storeu_si128( - reinterpret_cast<__m128i*>(ptr), val); + enum { SCALE=sizeof(__m128i)/sizeof(word32) }; + return _mm_loadu_si128( + const_cast<__m128i*>( // SunCC workaround + reinterpret_cast(ptr+SCALE*N))); } -// N specifies the nth 128-bit element -template -inline __m128i load_m128i(T* ptr) +template +inline __m128i load_m128i(const word64* ptr) { - enum { SCALE=sizeof(__m128i)/sizeof(T) }; + enum { SCALE=sizeof(__m128i)/sizeof(word64) }; return _mm_loadu_si128( - reinterpret_cast<__m128i*>(ptr+SCALE*N)); -} - -// N specifies the nth 128-bit element -template -inline __m128i load_m128i(const T* ptr) -{ - enum { SCALE=sizeof(__m128i)/sizeof(T) }; - return _mm_loadu_si128( - reinterpret_cast(ptr+SCALE*N)); + const_cast<__m128i*>( // SunCC workaround + reinterpret_cast(ptr+SCALE*N))); } // N specifies the nth 128-bit element @@ -72,43 +69,40 @@ inline void store_m128i(T* ptr, __m128i val) #if (CRYPTOPP_AVX2_AVAILABLE) -template -inline __m256i load_m256i(T* ptr) +template +inline __m256i load_m256i(const byte* ptr) { + enum { SCALE=sizeof(__m256i)/sizeof(byte) }; return _mm256_loadu_si256( - reinterpret_cast<__m256i*>(ptr)); + const_cast<__m256i*>( // SunCC workaround + reinterpret_cast(ptr+SCALE*N))); } -template -inline __m256i load_m256i(const T* ptr) +template +inline __m256i load_m256i(const word16* ptr) { + enum { SCALE=sizeof(__m256i)/sizeof(word16) }; return _mm256_loadu_si256( - reinterpret_cast(ptr)); + const_cast<__m256i*>( // SunCC workaround + reinterpret_cast(ptr+SCALE*N))); } -template -inline void store_m256i(T* ptr, __m256i val) +template +inline __m256i load_m256i(const word32* ptr) { - return _mm256_storeu_si256( - reinterpret_cast<__m256i*>(ptr), val); + enum { SCALE=sizeof(__m256i)/sizeof(word32) }; + return _mm256_loadu_si256( + const_cast<__m256i*>( // SunCC workaround + reinterpret_cast(ptr+SCALE*N))); } -// N specifies the nth 256-bit element -template -inline __m256i load_m256i(T* ptr) +template +inline __m256i load_m256i(const word64* ptr) { - enum { SCALE=sizeof(__m256i)/sizeof(T) }; + enum { SCALE=sizeof(__m256i)/sizeof(word64) }; return _mm256_loadu_si256( - reinterpret_cast<__m256i*>(ptr+SCALE*N)); -} - -// N specifies the nth 256-bit element -template -inline __m256i load_m256i(const T* ptr) -{ - enum { SCALE=sizeof(__m256i)/sizeof(T) }; - return _mm256_loadu_si256( - reinterpret_cast(ptr+SCALE*N)); + const_cast<__m256i*>( // SunCC workaround + reinterpret_cast(ptr+SCALE*N))); } // N specifies the nth 256-bit element