From 9d6008161981f1456bf6b1bc35740445f1b7ce0e Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Tue, 14 Aug 2018 01:22:17 -0400 Subject: [PATCH] Cleanup SPECK-128 code The check-in provides more unification and sets the pattern used with SPECK-64 --- speck-simd.cpp | 117 ++++++++++++++++++++----------------------------- 1 file changed, 47 insertions(+), 70 deletions(-) diff --git a/speck-simd.cpp b/speck-simd.cpp index c564a716..f22a88bf 100644 --- a/speck-simd.cpp +++ b/speck-simd.cpp @@ -871,16 +871,11 @@ inline uint64x2_p RotateRight64(const uint64x2_p val) return vec_rl(val, m); } -inline uint64x2_p SwapWords(const uint64x2_p val) -{ - return VectorSwapWords(val); -} - void SPECK128_Enc_Block(uint32x4_p &block, const word64 *subkeys, unsigned int rounds) { #if defined(CRYPTOPP_BIG_ENDIAN) - const uint8x16_p m1 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; - const uint8x16_p m2 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; + const uint8x16_p m1 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; + const uint8x16_p m2 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else const uint8x16_p m1 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; const uint8x16_p m2 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; @@ -890,11 +885,6 @@ void SPECK128_Enc_Block(uint32x4_p &block, const word64 *subkeys, unsigned int r uint64x2_p x1 = (uint64x2_p)vec_perm(block, block, m1); uint64x2_p y1 = (uint64x2_p)vec_perm(block, block, m2); -#if defined(CRYPTOPP_BIG_ENDIAN) - x1 = SwapWords(x1); - y1 = SwapWords(y1); -#endif - for (int i=0; i < static_cast(rounds); ++i) { const uint64x2_p rk = vec_splats((unsigned long long)subkeys[i]); @@ -908,19 +898,22 @@ void SPECK128_Enc_Block(uint32x4_p &block, const word64 *subkeys, unsigned int r } #if defined(CRYPTOPP_BIG_ENDIAN) - // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... - block = (uint32x4_p)vec_perm(y1, x1, m1); + const uint8x16_p m3 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; + //const uint8x16_p m4 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else - // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... - block = (uint32x4_p)vec_perm(x1, y1, m1); + const uint8x16_p m3 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; + //const uint8x16_p m4 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; #endif + + // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... + block = (uint32x4_p)vec_perm(x1, y1, m3); } void SPECK128_Dec_Block(uint32x4_p &block, const word64 *subkeys, unsigned int rounds) { #if defined(CRYPTOPP_BIG_ENDIAN) - const uint8x16_p m1 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; - const uint8x16_p m2 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; + const uint8x16_p m1 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; + const uint8x16_p m2 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else const uint8x16_p m1 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; const uint8x16_p m2 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; @@ -930,11 +923,6 @@ void SPECK128_Dec_Block(uint32x4_p &block, const word64 *subkeys, unsigned int r uint64x2_p x1 = (uint64x2_p)vec_perm(block, block, m1); uint64x2_p y1 = (uint64x2_p)vec_perm(block, block, m2); -#if defined(CRYPTOPP_BIG_ENDIAN) - x1 = SwapWords(x1); - y1 = SwapWords(y1); -#endif - for (int i = static_cast(rounds-1); i >= 0; --i) { const uint64x2_p rk = vec_splats((unsigned long long)subkeys[i]); @@ -947,12 +935,15 @@ void SPECK128_Dec_Block(uint32x4_p &block, const word64 *subkeys, unsigned int r } #if defined(CRYPTOPP_BIG_ENDIAN) - // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... - block = (uint32x4_p)vec_perm(y1, x1, m1); + const uint8x16_p m3 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; + //const uint8x16_p m4 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else - // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... - block = (uint32x4_p)vec_perm(x1, y1, m1); + const uint8x16_p m3 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; + //const uint8x16_p m4 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; #endif + + // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... + block = (uint32x4_p)vec_perm(x1, y1, m3); } void SPECK128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, @@ -960,8 +951,8 @@ void SPECK128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, uint32x4_p &block5, const word64 *subkeys, unsigned int rounds) { #if defined(CRYPTOPP_BIG_ENDIAN) - const uint8x16_p m1 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; - const uint8x16_p m2 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; + const uint8x16_p m1 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; + const uint8x16_p m2 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else const uint8x16_p m1 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; const uint8x16_p m2 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; @@ -975,11 +966,6 @@ void SPECK128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, uint64x2_p x3 = (uint64x2_p)vec_perm(block4, block5, m1); uint64x2_p y3 = (uint64x2_p)vec_perm(block4, block5, m2); -#if defined(CRYPTOPP_BIG_ENDIAN) - x1 = SwapWords(x1); x2 = SwapWords(x2); x3 = SwapWords(x3); - y1 = SwapWords(y1); y2 = SwapWords(y2); y3 = SwapWords(y3); -#endif - for (int i=0; i < static_cast(rounds); ++i) { const uint64x2_p rk = vec_splats((unsigned long long)subkeys[i]); @@ -1003,22 +989,20 @@ void SPECK128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, } #if defined(CRYPTOPP_BIG_ENDIAN) - // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... - block0 = (uint32x4_p)vec_perm(y1, x1, m1); - block1 = (uint32x4_p)vec_perm(y1, x1, m2); - block2 = (uint32x4_p)vec_perm(y2, x2, m1); - block3 = (uint32x4_p)vec_perm(y2, x2, m2); - block4 = (uint32x4_p)vec_perm(y3, x3, m1); - block5 = (uint32x4_p)vec_perm(y3, x3, m2); + const uint8x16_p m3 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; + const uint8x16_p m4 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else - // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... - block0 = (uint32x4_p)vec_perm(x1, y1, m1); - block1 = (uint32x4_p)vec_perm(x1, y1, m2); - block2 = (uint32x4_p)vec_perm(x2, y2, m1); - block3 = (uint32x4_p)vec_perm(x2, y2, m2); - block4 = (uint32x4_p)vec_perm(x3, y3, m1); - block5 = (uint32x4_p)vec_perm(x3, y3, m2); + const uint8x16_p m3 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; + const uint8x16_p m4 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; #endif + + // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... + block0 = (uint32x4_p)vec_perm(x1, y1, m3); + block1 = (uint32x4_p)vec_perm(x1, y1, m4); + block2 = (uint32x4_p)vec_perm(x2, y2, m3); + block3 = (uint32x4_p)vec_perm(x2, y2, m4); + block4 = (uint32x4_p)vec_perm(x3, y3, m3); + block5 = (uint32x4_p)vec_perm(x3, y3, m4); } void SPECK128_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, @@ -1026,8 +1010,8 @@ void SPECK128_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, uint32x4_p &block5, const word64 *subkeys, unsigned int rounds) { #if defined(CRYPTOPP_BIG_ENDIAN) - const uint8x16_p m1 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; - const uint8x16_p m2 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; + const uint8x16_p m1 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; + const uint8x16_p m2 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else const uint8x16_p m1 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; const uint8x16_p m2 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; @@ -1041,11 +1025,6 @@ void SPECK128_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, uint64x2_p x3 = (uint64x2_p)vec_perm(block4, block5, m1); uint64x2_p y3 = (uint64x2_p)vec_perm(block4, block5, m2); -#if defined(CRYPTOPP_BIG_ENDIAN) - x1 = SwapWords(x1); x2 = SwapWords(x2); x3 = SwapWords(x3); - y1 = SwapWords(y1); y2 = SwapWords(y2); y3 = SwapWords(y3); -#endif - for (int i = static_cast(rounds-1); i >= 0; --i) { const uint64x2_p rk = vec_splats((unsigned long long)subkeys[i]); @@ -1069,25 +1048,23 @@ void SPECK128_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, } #if defined(CRYPTOPP_BIG_ENDIAN) - // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... - block0 = (uint32x4_p)vec_perm(y1, x1, m1); - block1 = (uint32x4_p)vec_perm(y1, x1, m2); - block2 = (uint32x4_p)vec_perm(y2, x2, m1); - block3 = (uint32x4_p)vec_perm(y2, x2, m2); - block4 = (uint32x4_p)vec_perm(y3, x3, m1); - block5 = (uint32x4_p)vec_perm(y3, x3, m2); + const uint8x16_p m3 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; + const uint8x16_p m4 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else - // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... - block0 = (uint32x4_p)vec_perm(x1, y1, m1); - block1 = (uint32x4_p)vec_perm(x1, y1, m2); - block2 = (uint32x4_p)vec_perm(x2, y2, m1); - block3 = (uint32x4_p)vec_perm(x2, y2, m2); - block4 = (uint32x4_p)vec_perm(x3, y3, m1); - block5 = (uint32x4_p)vec_perm(x3, y3, m2); + const uint8x16_p m3 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; + const uint8x16_p m4 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; #endif + + // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... + block0 = (uint32x4_p)vec_perm(x1, y1, m3); + block1 = (uint32x4_p)vec_perm(x1, y1, m4); + block2 = (uint32x4_p)vec_perm(x2, y2, m3); + block3 = (uint32x4_p)vec_perm(x2, y2, m4); + block4 = (uint32x4_p)vec_perm(x3, y3, m3); + block5 = (uint32x4_p)vec_perm(x3, y3, m4); } -#endif +#endif // POWER8 ANONYMOUS_NAMESPACE_END