diff --git a/GNUmakefile b/GNUmakefile index ae19f145..49ad24ef 100755 --- a/GNUmakefile +++ b/GNUmakefile @@ -882,8 +882,6 @@ gcm-simd.o : gcm-simd.cpp $(CXX) $(strip $(CXXFLAGS) $(GCM_FLAG) -c) $< # AESNI or ARMv7a/ARMv8a available -rijndael.o : rijndael.cpp - $(CXX) $(strip $(CXXFLAGS) $(AES_FLAG) -c) $< rijndael-simd.o : rijndael-simd.cpp $(CXX) $(strip $(CXXFLAGS) $(AES_FLAG) -c) $< diff --git a/blake2.cpp b/blake2.cpp index cbc6e706..16442928 100644 --- a/blake2.cpp +++ b/blake2.cpp @@ -114,7 +114,7 @@ typedef void (*pfnCompress64)(const byte*, BLAKE2_State&); pfnCompress64 InitializeCompress64Fn() { #if CRYPTOPP_SSE42_AVAILABLE - if (HasSSE4()) + if (HasSSE42()) return &BLAKE2_Compress64_SSE4; else #endif @@ -136,7 +136,7 @@ pfnCompress64 InitializeCompress64Fn() pfnCompress32 InitializeCompress32Fn() { #if CRYPTOPP_SSE42_AVAILABLE - if (HasSSE4()) + if (HasSSE42()) return &BLAKE2_Compress32_SSE4; else #endif diff --git a/cpu.cpp b/cpu.cpp index c3b6de61..26851a2b 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -185,8 +185,9 @@ static bool TrySSE2() } bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false; -bool CRYPTOPP_SECTION_INIT g_hasMMX = false, CRYPTOPP_SECTION_INIT g_hasISSE = false, CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false; -bool CRYPTOPP_SECTION_INIT g_hasSSE4 = false, CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false, CRYPTOPP_SECTION_INIT g_hasSHA = false; +bool CRYPTOPP_SECTION_INIT CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false; +bool CRYPTOPP_SECTION_INIT g_hasSSE41 = false, CRYPTOPP_SECTION_INIT g_hasSSE42 = false; +bool CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false, CRYPTOPP_SECTION_INIT g_hasSHA = false; bool CRYPTOPP_SECTION_INIT g_hasRDRAND = false, CRYPTOPP_SECTION_INIT g_hasRDSEED = false, CRYPTOPP_SECTION_INIT g_isP4 = false; bool CRYPTOPP_SECTION_INIT g_hasPadlockRNG = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE2 = false; bool CRYPTOPP_SECTION_INIT g_hasPadlockPHE = false, CRYPTOPP_SECTION_INIT g_hasPadlockPMM = false; @@ -225,26 +226,14 @@ void DetectX86Features() if (!CpuId(1, cpuid2)) return; - g_hasMMX = (cpuid2[3] & (1 << 23)) != 0; if ((cpuid2[3] & (1 << 26)) != 0) g_hasSSE2 = TrySSE2(); g_hasSSSE3 = g_hasSSE2 && (cpuid2[2] & (1<<9)); - g_hasSSE4 = g_hasSSE2 && ((cpuid2[2] & (1<<19)) && (cpuid2[2] & (1<<20))); + g_hasSSE41 = g_hasSSE2 && (cpuid2[2] & (1<<19)); + g_hasSSE42 = g_hasSSE2 && (cpuid2[2] & (1<<20)); g_hasAESNI = g_hasSSE2 && (cpuid2[2] & (1<<25)); g_hasCLMUL = g_hasSSE2 && (cpuid2[2] & (1<<1)); - if ((cpuid2[3] & (1 << 25)) != 0) - g_hasISSE = true; - else - { - CpuId(0x080000000, cpuid3); - if (cpuid3[0] >= 0x080000001) - { - CpuId(0x080000001, cpuid3); - g_hasISSE = (cpuid3[3] & (1 << 22)) != 0; - } - } - if (IsIntel(cpuid1)) { static const unsigned int RDRAND_FLAG = (1 << 30); diff --git a/cpu.h b/cpu.h index 2c458682..763d5a01 100644 --- a/cpu.h +++ b/cpu.h @@ -57,11 +57,10 @@ NAMESPACE_BEGIN(CryptoPP) #ifndef CRYPTOPP_DOXYGEN_PROCESSING // These should not be used directly extern CRYPTOPP_DLL bool g_x86DetectionDone; -extern CRYPTOPP_DLL bool g_hasMMX; -extern CRYPTOPP_DLL bool g_hasISSE; extern CRYPTOPP_DLL bool g_hasSSE2; extern CRYPTOPP_DLL bool g_hasSSSE3; -extern CRYPTOPP_DLL bool g_hasSSE4; +extern CRYPTOPP_DLL bool g_hasSSE41; +extern CRYPTOPP_DLL bool g_hasSSE42; extern CRYPTOPP_DLL bool g_hasAESNI; extern CRYPTOPP_DLL bool g_hasCLMUL; extern CRYPTOPP_DLL bool g_hasSHA; @@ -79,36 +78,6 @@ CRYPTOPP_DLL void CRYPTOPP_API DetectX86Features(); CRYPTOPP_DLL bool CRYPTOPP_API CpuId(word32 input, word32 output[4]); #endif // CRYPTOPP_DOXYGEN_PROCESSING -//! \brief Determines MMX availability -//! \returns true if MMX is determined to be available, false otherwise -//! \details MMX, SSE and SSE2 are core processor features for x86_64, and -//! the function always returns true for the platform. -inline bool HasMMX() -{ -#if CRYPTOPP_BOOL_X64 - return true; -#else - if (!g_x86DetectionDone) - DetectX86Features(); - return g_hasMMX; -#endif -} - -//! \brief Determines SSE availability -//! \returns true if SSE is determined to be available, false otherwise -//! \details MMX, SSE and SSE2 are core processor features for x86_64, and -//! the function always returns true for the platform. -inline bool HasISSE() -{ -#if CRYPTOPP_BOOL_X64 - return true; -#else - if (!g_x86DetectionDone) - DetectX86Features(); - return g_hasISSE; -#endif -} - //! \brief Determines SSE2 availability //! \returns true if SSE2 is determined to be available, false otherwise //! \details MMX, SSE and SSE2 are core processor features for x86_64, and @@ -135,14 +104,24 @@ inline bool HasSSSE3() return g_hasSSSE3; } -//! \brief Determines SSE4 availability -//! \returns true if SSE4.1 and SSE4.2 are determined to be available, false otherwise -//! \details HasSSE4() is a runtime check performed using CPUID which requires both SSE4.1 and SSE4.2 -inline bool HasSSE4() +//! \brief Determines SSE4.1 availability +//! \returns true if SSE4.1 is determined to be available, false otherwise +//! \details HasSSE41() is a runtime check performed using CPUID +inline bool HasSSE41() { if (!g_x86DetectionDone) DetectX86Features(); - return g_hasSSE4; + return g_hasSSE41; +} + +//! \brief Determines SSE4.2 availability +//! \returns true if SSE4.2 is determined to be available, false otherwise +//! \details HasSSE42() is a runtime check performed using CPUID +inline bool HasSSE42() +{ + if (!g_x86DetectionDone) + DetectX86Features(); + return g_hasSSE42; } //! \brief Determines AES-NI availability diff --git a/crc.cpp b/crc.cpp index da0d1c3a..21153d3d 100644 --- a/crc.cpp +++ b/crc.cpp @@ -298,7 +298,7 @@ CRC32C::CRC32C() void CRC32C::Update(const byte *s, size_t n) { #if (CRYPTOPP_SSE42_AVAILABLE) - if (HasSSE4()) + if (HasSSE42()) { CRC32C_Update_SSE42(s, n, m_crc); return; diff --git a/rijndael-simd.cpp b/rijndael-simd.cpp index a6575c34..39a4dd96 100644 --- a/rijndael-simd.cpp +++ b/rijndael-simd.cpp @@ -15,7 +15,7 @@ # undef CRYPTOPP_ARM_AES_AVAILABLE #endif -#if (CRYPTOPP_SSE42_AVAILABLE) +#if (CRYPTOPP_SSE41_AVAILABLE) # include "nmmintrin.h" #endif @@ -37,6 +37,13 @@ # define EXCEPTION_EXECUTE_HANDLER 1 #endif +// Hack for SunCC, http://github.com/weidai11/cryptopp/issues/224 +#if (__SUNPRO_CC >= 0x5130) +# define MAYBE_CONST +#else +# define MAYBE_CONST const +#endif + NAMESPACE_BEGIN(CryptoPP) #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY @@ -110,6 +117,199 @@ bool CPU_TryAES_ARMV8() #endif // CRYPTOPP_ARM_AES_AVAILABLE #if (CRYPTOPP_AESNI_AVAILABLE) +void AESNI_Enc_Block(__m128i &block, MAYBE_CONST __m128i *subkeys, unsigned int rounds) +{ + block = _mm_xor_si128(block, subkeys[0]); + for (unsigned int i=1; i +inline size_t Rijndael_AdvancedProcessBlocks_AESNI(F1 func1, F4 func4, MAYBE_CONST __m128i *subkeys, unsigned int rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) +{ + size_t blockSize = 16; + size_t inIncrement = (flags & (BlockTransformation::BT_InBlockIsCounter|BlockTransformation::BT_DontIncrementInOutPointers)) ? 0 : blockSize; + size_t xorIncrement = xorBlocks ? blockSize : 0; + size_t outIncrement = (flags & BlockTransformation::BT_DontIncrementInOutPointers) ? 0 : blockSize; + + if (flags & BlockTransformation::BT_ReverseDirection) + { + CRYPTOPP_ASSERT(length % blockSize == 0); + inBlocks += length - blockSize; + xorBlocks += length - blockSize; + outBlocks += length - blockSize; + inIncrement = 0-inIncrement; + xorIncrement = 0-xorIncrement; + outIncrement = 0-outIncrement; + } + + if (flags & BlockTransformation::BT_AllowParallel) + { + while (length >= 4*blockSize) + { + __m128i block0 = _mm_loadu_si128((const __m128i *)(const void *)inBlocks), block1, block2, block3; + if (flags & BlockTransformation::BT_InBlockIsCounter) + { + const __m128i be1 = *(const __m128i *)(const void *)s_one; + block1 = _mm_add_epi32(block0, be1); + block2 = _mm_add_epi32(block1, be1); + block3 = _mm_add_epi32(block2, be1); + _mm_storeu_si128((__m128i *)(void *)inBlocks, _mm_add_epi32(block3, be1)); + } + else + { + inBlocks += inIncrement; + block1 = _mm_loadu_si128((const __m128i *)(const void *)inBlocks); + inBlocks += inIncrement; + block2 = _mm_loadu_si128((const __m128i *)(const void *)inBlocks); + inBlocks += inIncrement; + block3 = _mm_loadu_si128((const __m128i *)(const void *)inBlocks); + inBlocks += inIncrement; + } + + if (flags & BlockTransformation::BT_XorInput) + { + // Coverity finding, appears to be false positive. Assert the condition. + CRYPTOPP_ASSERT(xorBlocks); + block0 = _mm_xor_si128(block0, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + xorBlocks += xorIncrement; + block1 = _mm_xor_si128(block1, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + xorBlocks += xorIncrement; + block2 = _mm_xor_si128(block2, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + xorBlocks += xorIncrement; + block3 = _mm_xor_si128(block3, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + xorBlocks += xorIncrement; + } + + func4(block0, block1, block2, block3, subkeys, rounds); + + if (xorBlocks && !(flags & BlockTransformation::BT_XorInput)) + { + block0 = _mm_xor_si128(block0, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + xorBlocks += xorIncrement; + block1 = _mm_xor_si128(block1, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + xorBlocks += xorIncrement; + block2 = _mm_xor_si128(block2, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + xorBlocks += xorIncrement; + block3 = _mm_xor_si128(block3, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + xorBlocks += xorIncrement; + } + + _mm_storeu_si128((__m128i *)(void *)outBlocks, block0); + outBlocks += outIncrement; + _mm_storeu_si128((__m128i *)(void *)outBlocks, block1); + outBlocks += outIncrement; + _mm_storeu_si128((__m128i *)(void *)outBlocks, block2); + outBlocks += outIncrement; + _mm_storeu_si128((__m128i *)(void *)outBlocks, block3); + outBlocks += outIncrement; + + length -= 4*blockSize; + } + } + + while (length >= blockSize) + { + __m128i block = _mm_loadu_si128((const __m128i *)(const void *)inBlocks); + + if (flags & BlockTransformation::BT_XorInput) + block = _mm_xor_si128(block, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + + if (flags & BlockTransformation::BT_InBlockIsCounter) + const_cast(inBlocks)[15]++; + + func1(block, subkeys, rounds); + + if (xorBlocks && !(flags & BlockTransformation::BT_XorInput)) + block = _mm_xor_si128(block, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); + + _mm_storeu_si128((__m128i *)(void *)outBlocks, block); + + inBlocks += inIncrement; + outBlocks += outIncrement; + xorBlocks += xorIncrement; + length -= blockSize; + } + + return length; +} + +size_t Rijndael_AdvancedProcessBlocks_Enc_AESNI(MAYBE_CONST __m128i *subkeys, unsigned int rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) +{ + return Rijndael_AdvancedProcessBlocks_AESNI(AESNI_Enc_Block, AESNI_Enc_4_Blocks, + subkeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); +} + +size_t Rijndael_AdvancedProcessBlocks_Dec_AESNI(MAYBE_CONST __m128i *subkeys, unsigned int rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) +{ + return Rijndael_AdvancedProcessBlocks_AESNI(AESNI_Dec_Block, AESNI_Dec_4_Blocks, + subkeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); +} + void Rijndael_UncheckedSetKey_SSE4_AESNI(const byte *userKey, size_t keyLen, word32 *rk) { const unsigned rounds = keyLen/4 + 6; @@ -188,7 +388,7 @@ void Rijndael_UncheckedSetKeyRev_SSE4_AESNI(word32 *key, unsigned int rounds) *(__m128i *)(void *)(key+j) = temp; } - *(__m128i *)(void *)(key+i) = _mm_aesimc_si128(*(__m128i *)(void *)(key+i)); + *(__m128i *)(void *)(key+i) = _mm_aesimc_si128(*(__m128i *)(void *)(key+i)); } #endif // CRYPTOPP_AESNI_AVAILABLE diff --git a/rijndael.cpp b/rijndael.cpp index f54fec60..bea3b45e 100644 --- a/rijndael.cpp +++ b/rijndael.cpp @@ -228,6 +228,11 @@ void Rijndael::Base::FillDecTable() #if (CRYPTOPP_AESNI_AVAILABLE) extern void Rijndael_UncheckedSetKey_SSE4_AESNI(const byte *userKey, size_t keyLen, word32* rk); extern void Rijndael_UncheckedSetKeyRev_SSE4_AESNI(word32 *key, unsigned int rounds); + +extern size_t Rijndael_AdvancedProcessBlocks_Enc_AESNI(MAYBE_CONST __m128i *subkeys, unsigned int rounds, + const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags); +extern size_t Rijndael_AdvancedProcessBlocks_Dec_AESNI(MAYBE_CONST __m128i *subkeys, unsigned int rounds, + const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags); #endif void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLen, const NameValuePairs &) @@ -239,10 +244,12 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLen, c word32 *rk = m_key; -#if (CRYPTOPP_AESNI_AVAILABLE && CRYPTOPP_SSE42_AVAILABLE && (!defined(_MSC_VER) || _MSC_VER >= 1600 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)) +#if (CRYPTOPP_AESNI_AVAILABLE && CRYPTOPP_SSE41_AVAILABLE && (!defined(_MSC_VER) || _MSC_VER >= 1600 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)) // MSVC 2008 SP1 generates bad code for _mm_extract_epi32() when compiling for X64 - if (HasAESNI() && HasSSE4()) + if (HasAESNI() && HasSSE41()) { + // TODO: Add non-SSE4.1 variant for low-end Atoms. The low-end + // Atoms have SSE2-SSSE3 and AES-NI, but not SSE4.1 or SSE4.2. Rijndael_UncheckedSetKey_SSE4_AESNI(userKey, keyLen, rk); if (!IsForwardTransformation()) Rijndael_UncheckedSetKeyRev_SSE4_AESNI(m_key, m_rounds); @@ -336,7 +343,8 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock if (HasAESNI()) #endif { - return (void)Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0); + (void)Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0); + return; } #endif @@ -1024,190 +1032,6 @@ static inline bool AliasedWithTable(const byte *begin, const byte *end) return (s0 < t1 || s1 <= t1) || (s0 >= t0 || s1 > t0); } -#if CRYPTOPP_AESNI_AVAILABLE - -inline void AESNI_Enc_Block(__m128i &block, MAYBE_CONST __m128i *subkeys, unsigned int rounds) -{ - block = _mm_xor_si128(block, subkeys[0]); - for (unsigned int i=1; i -inline size_t AESNI_AdvancedProcessBlocks(F1 func1, F4 func4, MAYBE_CONST __m128i *subkeys, unsigned int rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) -{ - size_t blockSize = 16; - size_t inIncrement = (flags & (BlockTransformation::BT_InBlockIsCounter|BlockTransformation::BT_DontIncrementInOutPointers)) ? 0 : blockSize; - size_t xorIncrement = xorBlocks ? blockSize : 0; - size_t outIncrement = (flags & BlockTransformation::BT_DontIncrementInOutPointers) ? 0 : blockSize; - - if (flags & BlockTransformation::BT_ReverseDirection) - { - CRYPTOPP_ASSERT(length % blockSize == 0); - inBlocks += length - blockSize; - xorBlocks += length - blockSize; - outBlocks += length - blockSize; - inIncrement = 0-inIncrement; - xorIncrement = 0-xorIncrement; - outIncrement = 0-outIncrement; - } - - if (flags & BlockTransformation::BT_AllowParallel) - { - while (length >= 4*blockSize) - { - __m128i block0 = _mm_loadu_si128((const __m128i *)(const void *)inBlocks), block1, block2, block3; - if (flags & BlockTransformation::BT_InBlockIsCounter) - { - const __m128i be1 = *(const __m128i *)(const void *)s_one; - block1 = _mm_add_epi32(block0, be1); - block2 = _mm_add_epi32(block1, be1); - block3 = _mm_add_epi32(block2, be1); - _mm_storeu_si128((__m128i *)(void *)inBlocks, _mm_add_epi32(block3, be1)); - } - else - { - inBlocks += inIncrement; - block1 = _mm_loadu_si128((const __m128i *)(const void *)inBlocks); - inBlocks += inIncrement; - block2 = _mm_loadu_si128((const __m128i *)(const void *)inBlocks); - inBlocks += inIncrement; - block3 = _mm_loadu_si128((const __m128i *)(const void *)inBlocks); - inBlocks += inIncrement; - } - - if (flags & BlockTransformation::BT_XorInput) - { - // Coverity finding, appears to be false positive. Assert the condition. - CRYPTOPP_ASSERT(xorBlocks); - block0 = _mm_xor_si128(block0, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); - xorBlocks += xorIncrement; - block1 = _mm_xor_si128(block1, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); - xorBlocks += xorIncrement; - block2 = _mm_xor_si128(block2, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); - xorBlocks += xorIncrement; - block3 = _mm_xor_si128(block3, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); - xorBlocks += xorIncrement; - } - - func4(block0, block1, block2, block3, subkeys, rounds); - - if (xorBlocks && !(flags & BlockTransformation::BT_XorInput)) - { - block0 = _mm_xor_si128(block0, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); - xorBlocks += xorIncrement; - block1 = _mm_xor_si128(block1, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); - xorBlocks += xorIncrement; - block2 = _mm_xor_si128(block2, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); - xorBlocks += xorIncrement; - block3 = _mm_xor_si128(block3, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); - xorBlocks += xorIncrement; - } - - _mm_storeu_si128((__m128i *)(void *)outBlocks, block0); - outBlocks += outIncrement; - _mm_storeu_si128((__m128i *)(void *)outBlocks, block1); - outBlocks += outIncrement; - _mm_storeu_si128((__m128i *)(void *)outBlocks, block2); - outBlocks += outIncrement; - _mm_storeu_si128((__m128i *)(void *)outBlocks, block3); - outBlocks += outIncrement; - - length -= 4*blockSize; - } - } - - while (length >= blockSize) - { - __m128i block = _mm_loadu_si128((const __m128i *)(const void *)inBlocks); - - if (flags & BlockTransformation::BT_XorInput) - block = _mm_xor_si128(block, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); - - if (flags & BlockTransformation::BT_InBlockIsCounter) - const_cast(inBlocks)[15]++; - - func1(block, subkeys, rounds); - - if (xorBlocks && !(flags & BlockTransformation::BT_XorInput)) - block = _mm_xor_si128(block, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); - - _mm_storeu_si128((__m128i *)(void *)outBlocks, block); - - inBlocks += inIncrement; - outBlocks += outIncrement; - xorBlocks += xorIncrement; - length -= blockSize; - } - - return length; -} -#endif - #if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86 struct Locals { @@ -1229,7 +1053,9 @@ size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo { #if CRYPTOPP_AESNI_AVAILABLE if (HasAESNI()) - return AESNI_AdvancedProcessBlocks(AESNI_Enc_Block, AESNI_Enc_4_Blocks, (MAYBE_CONST __m128i *)(const void *)m_key.begin(), m_rounds, inBlocks, xorBlocks, outBlocks, length, flags); + return Rijndael_AdvancedProcessBlocks_Enc_AESNI((MAYBE_CONST __m128i *)(const void *)m_key.begin(), + m_rounds, inBlocks, xorBlocks, outBlocks, length, flags); + #endif #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM) @@ -1291,7 +1117,8 @@ size_t Rijndael::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo { #if CRYPTOPP_AESNI_AVAILABLE if (HasAESNI()) - return AESNI_AdvancedProcessBlocks(AESNI_Dec_Block, AESNI_Dec_4_Blocks, (MAYBE_CONST __m128i *)(const void *)m_key.begin(), m_rounds, inBlocks, xorBlocks, outBlocks, length, flags); + return Rijndael_AdvancedProcessBlocks_Dec_AESNI((MAYBE_CONST __m128i *)(const void *)m_key.begin(), + m_rounds, inBlocks, xorBlocks, outBlocks, length, flags); #endif return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags); diff --git a/validat1.cpp b/validat1.cpp index 3c50b7d4..be903af1 100644 --- a/validat1.cpp +++ b/validat1.cpp @@ -339,15 +339,14 @@ bool TestSettings() std::cout << std::endl; #ifdef CRYPTOPP_CPUID_AVAILABLE - bool hasMMX = HasMMX(); - bool hasISSE = HasISSE(); bool hasSSE2 = HasSSE2(); bool hasSSSE3 = HasSSSE3(); - bool hasSSE4 = HasSSE4(); + bool hasSSE41 = HasSSE41(); + bool hasSSE42 = HasSSE42(); bool isP4 = IsP4(); int cacheLineSize = GetCacheLineSize(); - if ((isP4 && (!hasMMX || !hasSSE2)) || (hasSSE2 && !hasMMX) || (cacheLineSize < 16 || cacheLineSize > 256 || !IsPowerOf2(cacheLineSize))) + if (cacheLineSize < 16 || cacheLineSize > 256 || !IsPowerOf2(cacheLineSize)) { std::cout << "FAILED: "; pass = false; @@ -355,7 +354,7 @@ bool TestSettings() else std::cout << "passed: "; - std::cout << "hasMMX == " << hasMMX << ", hasISSE == " << hasISSE << ", hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSSE3 << ", hasSSE4 == " << hasSSE4; + std::cout << "hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSSE3 << ", hasSSE4.1 == " << hasSSE41 << ", hasSSE4.2 == " << hasSSE42; std::cout << ", hasAESNI == " << HasAESNI() << ", hasCLMUL == " << HasCLMUL() << ", hasRDRAND == " << HasRDRAND() << ", hasRDSEED == " << HasRDSEED(); std::cout << ", hasSHA == " << HasSHA() << ", isP4 == " << isP4 << ", cacheLineSize == " << cacheLineSize << std::endl; diff --git a/whrlpool.cpp b/whrlpool.cpp index 1278ad8a..a947b813 100644 --- a/whrlpool.cpp +++ b/whrlpool.cpp @@ -409,7 +409,7 @@ static const word64 Whirlpool_C[4*256+R] = { void Whirlpool::Transform(word64 *digest, const word64 *block) { #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE - if (HasISSE()) + if (HasSSE2()) { // MMX version has the same structure as C version below #ifdef __GNUC__