diff --git a/adv-simd.h b/adv-simd.h index 3ee46fb6..b3d8a847 100644 --- a/adv-simd.h +++ b/adv-simd.h @@ -8,16 +8,18 @@ // // There are 8 templates provided in this file. The number following the // function name is the block size of the cipher. The name following that -// is the acceleration and arrangement. For example SSE1x4 means Intel SSE -// using two encrypt (or decrypt) functions: one that operates on 1 block, -// and one that operates on 4 blocks. +// is the acceleration and arrangement. For example 4x1_SSE means Intel SSE +// using two encrypt (or decrypt) functions: one that operates on 4 blocks, +// and one that operates on 1 block. // -// * AdvancedProcessBlocks64_SSE1x4 -// * AdvancedProcessBlocks128_SSE1x4 -// * AdvancedProcessBlocks64_SSE2x6 -// * AdvancedProcessBlocks128_SSE2x6 -// * AdvancedProcessBlocks64_NEON2x6 -// * AdvancedProcessBlocks128_NEON2x6 +// * AdvancedProcessBlocks64_4x1_SSE +// * AdvancedProcessBlocks128_4x1_SSE +// * AdvancedProcessBlocks64_6x2_SSE +// * AdvancedProcessBlocks128_6x2_SSE +// * AdvancedProcessBlocks64_6x2_NEON +// * AdvancedProcessBlocks128_6x2_NEON +// * AdvancedProcessBlocks64_6x2_ALTIVEC +// * AdvancedProcessBlocks128_6x2_ALTIVEC // #ifndef CRYPTOPP_ADVANCED_SIMD_TEMPLATES @@ -36,6 +38,10 @@ # include #endif +#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) +# include "ppc-simd.h" +#endif + // https://www.spinics.net/lists/gcchelp/msg47735.html and // https://www.spinics.net/lists/gcchelp/msg47749.html #if (CRYPTOPP_GCC_VERSION >= 40900) @@ -88,7 +94,7 @@ ANONYMOUS_NAMESPACE_END NAMESPACE_BEGIN(CryptoPP) template -inline size_t AdvancedProcessBlocks64_NEON2x6(F2 func2, F6 func6, +inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6, const word32 *subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { @@ -455,7 +461,7 @@ size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6, } template -size_t AdvancedProcessBlocks128_NEON2x6(F2 func2, F6 func6, +size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6, const word64 *subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { @@ -690,7 +696,7 @@ ANONYMOUS_NAMESPACE_END NAMESPACE_BEGIN(CryptoPP) template -inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_SSE2x6(F2 func2, F6 func6, +inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6, const word32 *subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { @@ -924,7 +930,7 @@ inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_SSE2x6(F2 func2, F6 func6, } template -inline size_t AdvancedProcessBlocks128_SSE2x6(F2 func2, F6 func6, +inline size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6, const word64 *subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { @@ -1109,7 +1115,7 @@ inline size_t AdvancedProcessBlocks128_SSE2x6(F2 func2, F6 func6, } template -inline size_t AdvancedProcessBlocks128_SSE1x4(F1 func1, F4 func4, +inline size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4, MAYBE_CONST word32 *subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { @@ -1232,4 +1238,144 @@ NAMESPACE_END // CryptoPP #endif // CRYPTOPP_SSSE3_AVAILABLE +// *********************** Altivec/Power 4 ********************** // + +#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) + +ANONYMOUS_NAMESPACE_BEGIN + +using CryptoPP::uint32x4_p; + +#if defined(CRYPTOPP_LITTLE_ENDIAN) +const uint32x4_p s_one = {1,0,0,0}; +#else +const uint32x4_p s_one = {0,0,0,1}; +#endif + +ANONYMOUS_NAMESPACE_END + +NAMESPACE_BEGIN(CryptoPP) + +template +size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6, const word32 *subKeys, size_t rounds, + const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) +{ + CRYPTOPP_ASSERT(subKeys); + CRYPTOPP_ASSERT(inBlocks); + CRYPTOPP_ASSERT(outBlocks); + CRYPTOPP_ASSERT(length >= 16); + + const ptrdiff_t blockSize = 16; + + ptrdiff_t inIncrement = (flags & (BT_InBlockIsCounter|BT_DontIncrementInOutPointers)) ? 0 : blockSize; + ptrdiff_t xorIncrement = xorBlocks ? blockSize : 0; + ptrdiff_t outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : blockSize; + + if (flags & BT_ReverseDirection) + { + inBlocks += length - blockSize; + xorBlocks += length - blockSize; + outBlocks += length - blockSize; + inIncrement = 0-inIncrement; + xorIncrement = 0-xorIncrement; + outIncrement = 0-outIncrement; + } + + if (flags & BT_AllowParallel) + { + while (length >= 6*blockSize) + { + VectorType block0, block1, block2, block3, block4, block5, temp; + block0 = VectorLoad(inBlocks); + + if (flags & BT_InBlockIsCounter) + { + block1 = VectorAdd(block0, s_one); + block2 = VectorAdd(block1, s_one); + block3 = VectorAdd(block2, s_one); + block4 = VectorAdd(block3, s_one); + block5 = VectorAdd(block4, s_one); + temp = VectorAdd(block5, s_one); + VectorStore(temp, const_cast(inBlocks)); + } + else + { + const int inc = static_cast(inIncrement); + block1 = VectorLoad(1*inc, inBlocks); + block2 = VectorLoad(2*inc, inBlocks); + block3 = VectorLoad(3*inc, inBlocks); + block4 = VectorLoad(4*inc, inBlocks); + block5 = VectorLoad(5*inc, inBlocks); + inBlocks += 6*inc; + } + + if (flags & BT_XorInput) + { + const int inc = static_cast(xorIncrement); + block0 = VectorXor(block0, VectorLoad(0*inc, xorBlocks)); + block1 = VectorXor(block1, VectorLoad(1*inc, xorBlocks)); + block2 = VectorXor(block2, VectorLoad(2*inc, xorBlocks)); + block3 = VectorXor(block3, VectorLoad(3*inc, xorBlocks)); + block4 = VectorXor(block4, VectorLoad(4*inc, xorBlocks)); + block5 = VectorXor(block5, VectorLoad(5*inc, xorBlocks)); + xorBlocks += 6*inc; + } + + func6(block0, block1, block2, block3, block4, block5, subKeys, rounds); + + if (xorBlocks && !(flags & BT_XorInput)) + { + const int inc = static_cast(xorIncrement); + block0 = VectorXor(block0, VectorLoad(0*inc, xorBlocks)); + block1 = VectorXor(block1, VectorLoad(1*inc, xorBlocks)); + block2 = VectorXor(block2, VectorLoad(2*inc, xorBlocks)); + block3 = VectorXor(block3, VectorLoad(3*inc, xorBlocks)); + block4 = VectorXor(block4, VectorLoad(4*inc, xorBlocks)); + block5 = VectorXor(block5, VectorLoad(5*inc, xorBlocks)); + xorBlocks += 6*inc; + } + + const int inc = static_cast(outIncrement); + VectorStore(block0, outBlocks+0*inc); + VectorStore(block1, outBlocks+1*inc); + VectorStore(block2, outBlocks+2*inc); + VectorStore(block3, outBlocks+3*inc); + VectorStore(block4, outBlocks+4*inc); + VectorStore(block5, outBlocks+5*inc); + + outBlocks += 6*inc; + length -= 6*blockSize; + } + } + + while (length >= blockSize) + { + VectorType block = VectorLoad(inBlocks); + + if (flags & BT_XorInput) + block = VectorXor(block, VectorLoad(xorBlocks)); + + if (flags & BT_InBlockIsCounter) + const_cast(inBlocks)[15]++; + + func1(block, subKeys, rounds); + + if (xorBlocks && !(flags & BT_XorInput)) + block = VectorXor(block, VectorLoad(xorBlocks)); + + VectorStore(block, outBlocks); + + inBlocks += inIncrement; + outBlocks += outIncrement; + xorBlocks += xorIncrement; + length -= blockSize; + } + + return length; +} + +NAMESPACE_END // CryptoPP + +#endif // CRYPTOPP_ALTIVEC_AVAILABLE + #endif // CRYPTOPP_ADVANCED_SIMD_TEMPLATES diff --git a/ppc-simd.h b/ppc-simd.h index 880d15d4..d1f04607 100644 --- a/ppc-simd.h +++ b/ppc-simd.h @@ -29,9 +29,12 @@ NAMESPACE_BEGIN(CryptoPP) #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING) -typedef __vector unsigned char uint8x16_p; -typedef __vector unsigned int uint32x4_p; -#if defined(CRYPTOPP_POWER5_AVAILABLE) +typedef __vector char int8x16_p; +typedef __vector unsigned char uint8x16_p; +typedef __vector unsigned short uint16x8_p; +typedef __vector unsigned int uint32x4_p; + +#if defined(CRYPTOPP_POWER8_AVAILABLE) typedef __vector unsigned long long uint64x2_p; #endif diff --git a/rijndael-simd.cpp b/rijndael-simd.cpp index f6445b98..0c5c7c52 100644 --- a/rijndael-simd.cpp +++ b/rijndael-simd.cpp @@ -517,7 +517,7 @@ size_t Rijndael_Enc_AdvancedProcessBlocks_AESNI(const word32 *subKeys, size_t ro MAYBE_CONST byte* ib = MAYBE_UNCONST_CAST(byte*, inBlocks); MAYBE_CONST byte* xb = MAYBE_UNCONST_CAST(byte*, xorBlocks); - return AdvancedProcessBlocks128_SSE1x4(AESNI_Enc_Block, AESNI_Enc_4_Blocks, + return AdvancedProcessBlocks128_4x1_SSE(AESNI_Enc_Block, AESNI_Enc_4_Blocks, sk, rounds, ib, xb, outBlocks, length, flags); } @@ -528,7 +528,7 @@ size_t Rijndael_Dec_AdvancedProcessBlocks_AESNI(const word32 *subKeys, size_t ro MAYBE_CONST byte* ib = MAYBE_UNCONST_CAST(byte*, inBlocks); MAYBE_CONST byte* xb = MAYBE_UNCONST_CAST(byte*, xorBlocks); - return AdvancedProcessBlocks128_SSE1x4(AESNI_Dec_Block, AESNI_Dec_4_Blocks, + return AdvancedProcessBlocks128_4x1_SSE(AESNI_Dec_Block, AESNI_Dec_4_Blocks, sk, rounds, ib, xb, outBlocks, length, flags); } @@ -702,129 +702,6 @@ static inline void POWER8_Dec_6_Blocks(VectorType &block0, VectorType &block1, block5 = VectorDecryptLast(block5, k); } -template -size_t Rijndael_AdvancedProcessBlocks_POWER8(F1 func1, F6 func6, const word32 *subKeys, size_t rounds, - const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) -{ - CRYPTOPP_ASSERT(subKeys); - CRYPTOPP_ASSERT(inBlocks); - CRYPTOPP_ASSERT(outBlocks); - CRYPTOPP_ASSERT(length >= 16); - - const size_t blockSize = 16; - size_t inIncrement = (flags & (BlockTransformation::BT_InBlockIsCounter|BlockTransformation::BT_DontIncrementInOutPointers)) ? 0 : blockSize; - size_t xorIncrement = xorBlocks ? blockSize : 0; - size_t outIncrement = (flags & BlockTransformation::BT_DontIncrementInOutPointers) ? 0 : blockSize; - - if (flags & BlockTransformation::BT_ReverseDirection) - { - inBlocks += length - blockSize; - xorBlocks += length - blockSize; - outBlocks += length - blockSize; - inIncrement = 0-inIncrement; - xorIncrement = 0-xorIncrement; - outIncrement = 0-outIncrement; - } - - if (flags & BlockTransformation::BT_AllowParallel) - { - while (length >= 6*blockSize) - { -#if defined(CRYPTOPP_LITTLE_ENDIAN) - const VectorType one = (VectorType)((uint64x2_p){1,0}); -#else - const VectorType one = (VectorType)((uint64x2_p){0,1}); -#endif - - VectorType block0, block1, block2, block3, block4, block5, temp; - block0 = VectorLoad(inBlocks); - - if (flags & BlockTransformation::BT_InBlockIsCounter) - { - block1 = VectorAdd(block0, one); - block2 = VectorAdd(block1, one); - block3 = VectorAdd(block2, one); - block4 = VectorAdd(block3, one); - block5 = VectorAdd(block4, one); - temp = VectorAdd(block5, one); - VectorStore(temp, const_cast(inBlocks)); - } - else - { - const int inc = static_cast(inIncrement); - block1 = VectorLoad(1*inc, inBlocks); - block2 = VectorLoad(2*inc, inBlocks); - block3 = VectorLoad(3*inc, inBlocks); - block4 = VectorLoad(4*inc, inBlocks); - block5 = VectorLoad(5*inc, inBlocks); - inBlocks += 6*inc; - } - - if (flags & BlockTransformation::BT_XorInput) - { - const int inc = static_cast(xorIncrement); - block0 = VectorXor(block0, VectorLoad(0*inc, xorBlocks)); - block1 = VectorXor(block1, VectorLoad(1*inc, xorBlocks)); - block2 = VectorXor(block2, VectorLoad(2*inc, xorBlocks)); - block3 = VectorXor(block3, VectorLoad(3*inc, xorBlocks)); - block4 = VectorXor(block4, VectorLoad(4*inc, xorBlocks)); - block5 = VectorXor(block5, VectorLoad(5*inc, xorBlocks)); - xorBlocks += 6*inc; - } - - func6(block0, block1, block2, block3, block4, block5, subKeys, rounds); - - if (xorBlocks && !(flags & BlockTransformation::BT_XorInput)) - { - const int inc = static_cast(xorIncrement); - block0 = VectorXor(block0, VectorLoad(0*inc, xorBlocks)); - block1 = VectorXor(block1, VectorLoad(1*inc, xorBlocks)); - block2 = VectorXor(block2, VectorLoad(2*inc, xorBlocks)); - block3 = VectorXor(block3, VectorLoad(3*inc, xorBlocks)); - block4 = VectorXor(block4, VectorLoad(4*inc, xorBlocks)); - block5 = VectorXor(block5, VectorLoad(5*inc, xorBlocks)); - xorBlocks += 6*inc; - } - - const int inc = static_cast(outIncrement); - VectorStore(block0, outBlocks+0*inc); - VectorStore(block1, outBlocks+1*inc); - VectorStore(block2, outBlocks+2*inc); - VectorStore(block3, outBlocks+3*inc); - VectorStore(block4, outBlocks+4*inc); - VectorStore(block5, outBlocks+5*inc); - - outBlocks += 6*inc; - length -= 6*blockSize; - } - } - - while (length >= blockSize) - { - VectorType block = VectorLoad(inBlocks); - - if (flags & BlockTransformation::BT_XorInput) - block = VectorXor(block, VectorLoad(xorBlocks)); - - if (flags & BlockTransformation::BT_InBlockIsCounter) - const_cast(inBlocks)[15]++; - - func1(block, subKeys, rounds); - - if (xorBlocks && !(flags & BlockTransformation::BT_XorInput)) - block = VectorXor(block, VectorLoad(xorBlocks)); - - VectorStore(block, outBlocks); - - inBlocks += inIncrement; - outBlocks += outIncrement; - xorBlocks += xorIncrement; - length -= blockSize; - } - - return length; -} - ANONYMOUS_NAMESPACE_END // We still need rcon and Se to fallback to C/C++ for AES-192 and AES-256. @@ -925,17 +802,17 @@ void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen, word32* } } -size_t Rijndael_Enc_AdvancedProcessBlocks_POWER8(const word32 *subKeys, size_t rounds, +size_t Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(const word32 *subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return Rijndael_AdvancedProcessBlocks_POWER8(POWER8_Enc_Block, POWER8_Enc_6_Blocks, + return AdvancedProcessBlocks128_6x1_ALTIVEC(POWER8_Enc_Block, POWER8_Enc_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } -size_t Rijndael_Dec_AdvancedProcessBlocks_POWER8(const word32 *subKeys, size_t rounds, +size_t Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(const word32 *subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return Rijndael_AdvancedProcessBlocks_POWER8(POWER8_Dec_Block, POWER8_Dec_6_Blocks, + return AdvancedProcessBlocks128_6x1_ALTIVEC(POWER8_Dec_Block, POWER8_Dec_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } diff --git a/rijndael.cpp b/rijndael.cpp index 9074cec9..4ffe2a4b 100644 --- a/rijndael.cpp +++ b/rijndael.cpp @@ -304,9 +304,9 @@ extern size_t Rijndael_Dec_AdvancedProcessBlocks_ARMV8(const word32 *subkeys, si extern void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen, word32* rk, const word32* rc, const byte* Se); -extern size_t Rijndael_Enc_AdvancedProcessBlocks_POWER8(const word32 *subkeys, size_t rounds, +extern size_t Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(const word32 *subkeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags); -extern size_t Rijndael_Dec_AdvancedProcessBlocks_POWER8(const word32 *subkeys, size_t rounds, +extern size_t Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(const word32 *subkeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags); #endif @@ -1139,7 +1139,7 @@ size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo #endif #if CRYPTOPP_POWER8_AES_AVAILABLE if (HasAES()) - return Rijndael_Enc_AdvancedProcessBlocks_POWER8(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags); + return Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags); #endif #if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM) @@ -1207,7 +1207,7 @@ size_t Rijndael::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo #endif #if CRYPTOPP_POWER8_AES_AVAILABLE if (HasAES()) - return Rijndael_Dec_AdvancedProcessBlocks_POWER8(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags); + return Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags); #endif return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags); diff --git a/simon-simd.cpp b/simon-simd.cpp index e3a39b01..26b41c73 100644 --- a/simon-simd.cpp +++ b/simon-simd.cpp @@ -1155,14 +1155,14 @@ NAMESPACE_BEGIN(CryptoPP) size_t SIMON64_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return AdvancedProcessBlocks64_NEON2x6(SIMON64_Enc_Block, SIMON64_Enc_6_Blocks, + return AdvancedProcessBlocks64_6x2_NEON(SIMON64_Enc_Block, SIMON64_Enc_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } size_t SIMON64_Dec_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return AdvancedProcessBlocks64_NEON2x6(SIMON64_Dec_Block, SIMON64_Dec_6_Blocks, + return AdvancedProcessBlocks64_6x2_NEON(SIMON64_Dec_Block, SIMON64_Dec_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } #endif // CRYPTOPP_ARM_NEON_AVAILABLE @@ -1171,14 +1171,14 @@ size_t SIMON64_Dec_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t roun size_t SIMON128_Enc_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return AdvancedProcessBlocks128_NEON2x6(SIMON128_Enc_Block, SIMON128_Enc_6_Blocks, + return AdvancedProcessBlocks128_6x2_NEON(SIMON128_Enc_Block, SIMON128_Enc_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } size_t SIMON128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return AdvancedProcessBlocks128_NEON2x6(SIMON128_Dec_Block, SIMON128_Dec_6_Blocks, + return AdvancedProcessBlocks128_6x2_NEON(SIMON128_Dec_Block, SIMON128_Dec_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } #endif // CRYPTOPP_ARM_NEON_AVAILABLE @@ -1189,14 +1189,14 @@ size_t SIMON128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rou size_t SIMON64_Enc_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return AdvancedProcessBlocks64_SSE2x6(SIMON64_Enc_Block, SIMON64_Enc_6_Blocks, + return AdvancedProcessBlocks64_6x2_SSE(SIMON64_Enc_Block, SIMON64_Enc_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } size_t SIMON64_Dec_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return AdvancedProcessBlocks64_SSE2x6(SIMON64_Dec_Block, SIMON64_Dec_6_Blocks, + return AdvancedProcessBlocks64_6x2_SSE(SIMON64_Dec_Block, SIMON64_Dec_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } #endif @@ -1205,14 +1205,14 @@ size_t SIMON64_Dec_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rou size_t SIMON128_Enc_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return AdvancedProcessBlocks128_SSE2x6(SIMON128_Enc_Block, SIMON128_Enc_6_Blocks, + return AdvancedProcessBlocks128_6x2_SSE(SIMON128_Enc_Block, SIMON128_Enc_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } size_t SIMON128_Dec_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return AdvancedProcessBlocks128_SSE2x6(SIMON128_Dec_Block, SIMON128_Dec_6_Blocks, + return AdvancedProcessBlocks128_6x2_SSE(SIMON128_Dec_Block, SIMON128_Dec_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } #endif // CRYPTOPP_SSSE3_AVAILABLE diff --git a/speck-simd.cpp b/speck-simd.cpp index ae340c38..b946460a 100644 --- a/speck-simd.cpp +++ b/speck-simd.cpp @@ -1056,14 +1056,14 @@ NAMESPACE_BEGIN(CryptoPP) size_t SPECK64_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return AdvancedProcessBlocks64_NEON2x6(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks, + return AdvancedProcessBlocks64_6x2_NEON(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } size_t SPECK64_Dec_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return AdvancedProcessBlocks64_NEON2x6(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks, + return AdvancedProcessBlocks64_6x2_NEON(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } #endif @@ -1072,14 +1072,14 @@ size_t SPECK64_Dec_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t roun size_t SPECK128_Enc_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return AdvancedProcessBlocks128_NEON2x6(SPECK128_Enc_Block, SPECK128_Enc_6_Blocks, + return AdvancedProcessBlocks128_6x2_NEON(SPECK128_Enc_Block, SPECK128_Enc_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } size_t SPECK128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return AdvancedProcessBlocks128_NEON2x6(SPECK128_Dec_Block, SPECK128_Dec_6_Blocks, + return AdvancedProcessBlocks128_6x2_NEON(SPECK128_Dec_Block, SPECK128_Dec_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } #endif // CRYPTOPP_ARM_NEON_AVAILABLE @@ -1090,14 +1090,14 @@ size_t SPECK128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rou size_t SPECK64_Enc_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return AdvancedProcessBlocks64_SSE2x6(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks, + return AdvancedProcessBlocks64_6x2_SSE(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } size_t SPECK64_Dec_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return AdvancedProcessBlocks64_SSE2x6(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks, + return AdvancedProcessBlocks64_6x2_SSE(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } #endif @@ -1106,14 +1106,14 @@ size_t SPECK64_Dec_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rou size_t SPECK128_Enc_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return AdvancedProcessBlocks128_SSE2x6(SPECK128_Enc_Block, SPECK128_Enc_6_Blocks, + return AdvancedProcessBlocks128_6x2_SSE(SPECK128_Enc_Block, SPECK128_Enc_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } size_t SPECK128_Dec_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) { - return AdvancedProcessBlocks128_SSE2x6(SPECK128_Dec_Block, SPECK128_Dec_6_Blocks, + return AdvancedProcessBlocks128_6x2_SSE(SPECK128_Dec_Block, SPECK128_Dec_6_Blocks, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags); } #endif // CRYPTOPP_SSSE3_AVAILABLE