Add packed 32-bit Shuffle specializations for ChaCha on Power8
parent
542140621a
commit
a7615a8c7c
|
|
@ -262,29 +262,29 @@ inline uint32x4_p RotateRight(const uint32x4_p val)
|
||||||
template <unsigned int S>
|
template <unsigned int S>
|
||||||
inline uint32x4_p Shuffle(const uint32x4_p& val)
|
inline uint32x4_p Shuffle(const uint32x4_p& val)
|
||||||
{
|
{
|
||||||
switch (S%4)
|
CRYPTOPP_ASSERT(0);
|
||||||
{
|
return val;
|
||||||
case 1:
|
}
|
||||||
{
|
|
||||||
const uint8x16_p mask = {4,5,6,7, 8,9,10,11, 12,13,14,15, 0,1,2,3};
|
template <>
|
||||||
return vec_perm(val, val, mask);
|
inline uint32x4_p Shuffle<1>(const uint32x4_p& val)
|
||||||
}
|
{
|
||||||
case 2:
|
const uint8x16_p mask = {4,5,6,7, 8,9,10,11, 12,13,14,15, 0,1,2,3};
|
||||||
{
|
return vec_perm(val, val, mask);
|
||||||
const uint8x16_p mask = {8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7};
|
}
|
||||||
return vec_perm(val, val, mask);
|
|
||||||
}
|
template <>
|
||||||
case 3:
|
inline uint32x4_p Shuffle<2>(const uint32x4_p& val)
|
||||||
{
|
{
|
||||||
const uint8x16_p mask = {12,13,14,15, 0,1,2,3, 4,5,6,7, 8,9,10,11};
|
const uint8x16_p mask = {8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7};
|
||||||
return vec_perm(val, val, mask);
|
return vec_perm(val, val, mask);
|
||||||
}
|
}
|
||||||
default:
|
|
||||||
{
|
template <>
|
||||||
CRYPTOPP_ASSERT(0);
|
inline uint32x4_p Shuffle<3>(const uint32x4_p& val)
|
||||||
return val;
|
{
|
||||||
}
|
const uint8x16_p mask = {12,13,14,15, 0,1,2,3, 4,5,6,7, 8,9,10,11};
|
||||||
}
|
return vec_perm(val, val, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper to perform 64-bit addition across two elements of 32-bit vectors
|
// Helper to perform 64-bit addition across two elements of 32-bit vectors
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue