Cleanup SPECK-128 code
The check-in provides more unification and sets the pattern used with SPECK-64pull/703/head
parent
7c70b39d18
commit
9d60081619
117
speck-simd.cpp
117
speck-simd.cpp
|
|
@ -871,16 +871,11 @@ inline uint64x2_p RotateRight64(const uint64x2_p val)
|
|||
return vec_rl(val, m);
|
||||
}
|
||||
|
||||
inline uint64x2_p SwapWords(const uint64x2_p val)
|
||||
{
|
||||
return VectorSwapWords(val);
|
||||
}
|
||||
|
||||
void SPECK128_Enc_Block(uint32x4_p &block, const word64 *subkeys, unsigned int rounds)
|
||||
{
|
||||
#if defined(CRYPTOPP_BIG_ENDIAN)
|
||||
const uint8x16_p m1 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
|
||||
const uint8x16_p m2 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
|
||||
const uint8x16_p m1 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8};
|
||||
const uint8x16_p m2 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0};
|
||||
#else
|
||||
const uint8x16_p m1 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
|
||||
const uint8x16_p m2 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
|
||||
|
|
@ -890,11 +885,6 @@ void SPECK128_Enc_Block(uint32x4_p &block, const word64 *subkeys, unsigned int r
|
|||
uint64x2_p x1 = (uint64x2_p)vec_perm(block, block, m1);
|
||||
uint64x2_p y1 = (uint64x2_p)vec_perm(block, block, m2);
|
||||
|
||||
#if defined(CRYPTOPP_BIG_ENDIAN)
|
||||
x1 = SwapWords(x1);
|
||||
y1 = SwapWords(y1);
|
||||
#endif
|
||||
|
||||
for (int i=0; i < static_cast<int>(rounds); ++i)
|
||||
{
|
||||
const uint64x2_p rk = vec_splats((unsigned long long)subkeys[i]);
|
||||
|
|
@ -908,19 +898,22 @@ void SPECK128_Enc_Block(uint32x4_p &block, const word64 *subkeys, unsigned int r
|
|||
}
|
||||
|
||||
#if defined(CRYPTOPP_BIG_ENDIAN)
|
||||
// [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
|
||||
block = (uint32x4_p)vec_perm(y1, x1, m1);
|
||||
const uint8x16_p m3 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8};
|
||||
//const uint8x16_p m4 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0};
|
||||
#else
|
||||
// [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
|
||||
block = (uint32x4_p)vec_perm(x1, y1, m1);
|
||||
const uint8x16_p m3 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
|
||||
//const uint8x16_p m4 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
|
||||
#endif
|
||||
|
||||
// [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
|
||||
block = (uint32x4_p)vec_perm(x1, y1, m3);
|
||||
}
|
||||
|
||||
void SPECK128_Dec_Block(uint32x4_p &block, const word64 *subkeys, unsigned int rounds)
|
||||
{
|
||||
#if defined(CRYPTOPP_BIG_ENDIAN)
|
||||
const uint8x16_p m1 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
|
||||
const uint8x16_p m2 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
|
||||
const uint8x16_p m1 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8};
|
||||
const uint8x16_p m2 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0};
|
||||
#else
|
||||
const uint8x16_p m1 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
|
||||
const uint8x16_p m2 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
|
||||
|
|
@ -930,11 +923,6 @@ void SPECK128_Dec_Block(uint32x4_p &block, const word64 *subkeys, unsigned int r
|
|||
uint64x2_p x1 = (uint64x2_p)vec_perm(block, block, m1);
|
||||
uint64x2_p y1 = (uint64x2_p)vec_perm(block, block, m2);
|
||||
|
||||
#if defined(CRYPTOPP_BIG_ENDIAN)
|
||||
x1 = SwapWords(x1);
|
||||
y1 = SwapWords(y1);
|
||||
#endif
|
||||
|
||||
for (int i = static_cast<int>(rounds-1); i >= 0; --i)
|
||||
{
|
||||
const uint64x2_p rk = vec_splats((unsigned long long)subkeys[i]);
|
||||
|
|
@ -947,12 +935,15 @@ void SPECK128_Dec_Block(uint32x4_p &block, const word64 *subkeys, unsigned int r
|
|||
}
|
||||
|
||||
#if defined(CRYPTOPP_BIG_ENDIAN)
|
||||
// [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
|
||||
block = (uint32x4_p)vec_perm(y1, x1, m1);
|
||||
const uint8x16_p m3 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8};
|
||||
//const uint8x16_p m4 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0};
|
||||
#else
|
||||
// [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
|
||||
block = (uint32x4_p)vec_perm(x1, y1, m1);
|
||||
const uint8x16_p m3 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
|
||||
//const uint8x16_p m4 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
|
||||
#endif
|
||||
|
||||
// [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
|
||||
block = (uint32x4_p)vec_perm(x1, y1, m3);
|
||||
}
|
||||
|
||||
void SPECK128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
||||
|
|
@ -960,8 +951,8 @@ void SPECK128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
|||
uint32x4_p &block5, const word64 *subkeys, unsigned int rounds)
|
||||
{
|
||||
#if defined(CRYPTOPP_BIG_ENDIAN)
|
||||
const uint8x16_p m1 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
|
||||
const uint8x16_p m2 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
|
||||
const uint8x16_p m1 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8};
|
||||
const uint8x16_p m2 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0};
|
||||
#else
|
||||
const uint8x16_p m1 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
|
||||
const uint8x16_p m2 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
|
||||
|
|
@ -975,11 +966,6 @@ void SPECK128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
|||
uint64x2_p x3 = (uint64x2_p)vec_perm(block4, block5, m1);
|
||||
uint64x2_p y3 = (uint64x2_p)vec_perm(block4, block5, m2);
|
||||
|
||||
#if defined(CRYPTOPP_BIG_ENDIAN)
|
||||
x1 = SwapWords(x1); x2 = SwapWords(x2); x3 = SwapWords(x3);
|
||||
y1 = SwapWords(y1); y2 = SwapWords(y2); y3 = SwapWords(y3);
|
||||
#endif
|
||||
|
||||
for (int i=0; i < static_cast<int>(rounds); ++i)
|
||||
{
|
||||
const uint64x2_p rk = vec_splats((unsigned long long)subkeys[i]);
|
||||
|
|
@ -1003,22 +989,20 @@ void SPECK128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
|||
}
|
||||
|
||||
#if defined(CRYPTOPP_BIG_ENDIAN)
|
||||
// [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
|
||||
block0 = (uint32x4_p)vec_perm(y1, x1, m1);
|
||||
block1 = (uint32x4_p)vec_perm(y1, x1, m2);
|
||||
block2 = (uint32x4_p)vec_perm(y2, x2, m1);
|
||||
block3 = (uint32x4_p)vec_perm(y2, x2, m2);
|
||||
block4 = (uint32x4_p)vec_perm(y3, x3, m1);
|
||||
block5 = (uint32x4_p)vec_perm(y3, x3, m2);
|
||||
const uint8x16_p m3 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8};
|
||||
const uint8x16_p m4 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0};
|
||||
#else
|
||||
// [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
|
||||
block0 = (uint32x4_p)vec_perm(x1, y1, m1);
|
||||
block1 = (uint32x4_p)vec_perm(x1, y1, m2);
|
||||
block2 = (uint32x4_p)vec_perm(x2, y2, m1);
|
||||
block3 = (uint32x4_p)vec_perm(x2, y2, m2);
|
||||
block4 = (uint32x4_p)vec_perm(x3, y3, m1);
|
||||
block5 = (uint32x4_p)vec_perm(x3, y3, m2);
|
||||
const uint8x16_p m3 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
|
||||
const uint8x16_p m4 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
|
||||
#endif
|
||||
|
||||
// [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
|
||||
block0 = (uint32x4_p)vec_perm(x1, y1, m3);
|
||||
block1 = (uint32x4_p)vec_perm(x1, y1, m4);
|
||||
block2 = (uint32x4_p)vec_perm(x2, y2, m3);
|
||||
block3 = (uint32x4_p)vec_perm(x2, y2, m4);
|
||||
block4 = (uint32x4_p)vec_perm(x3, y3, m3);
|
||||
block5 = (uint32x4_p)vec_perm(x3, y3, m4);
|
||||
}
|
||||
|
||||
void SPECK128_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
||||
|
|
@ -1026,8 +1010,8 @@ void SPECK128_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
|||
uint32x4_p &block5, const word64 *subkeys, unsigned int rounds)
|
||||
{
|
||||
#if defined(CRYPTOPP_BIG_ENDIAN)
|
||||
const uint8x16_p m1 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
|
||||
const uint8x16_p m2 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
|
||||
const uint8x16_p m1 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8};
|
||||
const uint8x16_p m2 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0};
|
||||
#else
|
||||
const uint8x16_p m1 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
|
||||
const uint8x16_p m2 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
|
||||
|
|
@ -1041,11 +1025,6 @@ void SPECK128_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
|||
uint64x2_p x3 = (uint64x2_p)vec_perm(block4, block5, m1);
|
||||
uint64x2_p y3 = (uint64x2_p)vec_perm(block4, block5, m2);
|
||||
|
||||
#if defined(CRYPTOPP_BIG_ENDIAN)
|
||||
x1 = SwapWords(x1); x2 = SwapWords(x2); x3 = SwapWords(x3);
|
||||
y1 = SwapWords(y1); y2 = SwapWords(y2); y3 = SwapWords(y3);
|
||||
#endif
|
||||
|
||||
for (int i = static_cast<int>(rounds-1); i >= 0; --i)
|
||||
{
|
||||
const uint64x2_p rk = vec_splats((unsigned long long)subkeys[i]);
|
||||
|
|
@ -1069,25 +1048,23 @@ void SPECK128_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
|||
}
|
||||
|
||||
#if defined(CRYPTOPP_BIG_ENDIAN)
|
||||
// [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
|
||||
block0 = (uint32x4_p)vec_perm(y1, x1, m1);
|
||||
block1 = (uint32x4_p)vec_perm(y1, x1, m2);
|
||||
block2 = (uint32x4_p)vec_perm(y2, x2, m1);
|
||||
block3 = (uint32x4_p)vec_perm(y2, x2, m2);
|
||||
block4 = (uint32x4_p)vec_perm(y3, x3, m1);
|
||||
block5 = (uint32x4_p)vec_perm(y3, x3, m2);
|
||||
const uint8x16_p m3 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8};
|
||||
const uint8x16_p m4 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0};
|
||||
#else
|
||||
// [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
|
||||
block0 = (uint32x4_p)vec_perm(x1, y1, m1);
|
||||
block1 = (uint32x4_p)vec_perm(x1, y1, m2);
|
||||
block2 = (uint32x4_p)vec_perm(x2, y2, m1);
|
||||
block3 = (uint32x4_p)vec_perm(x2, y2, m2);
|
||||
block4 = (uint32x4_p)vec_perm(x3, y3, m1);
|
||||
block5 = (uint32x4_p)vec_perm(x3, y3, m2);
|
||||
const uint8x16_p m3 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16};
|
||||
const uint8x16_p m4 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24};
|
||||
#endif
|
||||
|
||||
// [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
|
||||
block0 = (uint32x4_p)vec_perm(x1, y1, m3);
|
||||
block1 = (uint32x4_p)vec_perm(x1, y1, m4);
|
||||
block2 = (uint32x4_p)vec_perm(x2, y2, m3);
|
||||
block3 = (uint32x4_p)vec_perm(x2, y2, m4);
|
||||
block4 = (uint32x4_p)vec_perm(x3, y3, m3);
|
||||
block5 = (uint32x4_p)vec_perm(x3, y3, m4);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // POWER8
|
||||
|
||||
ANONYMOUS_NAMESPACE_END
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue