Remove non-const cast from POWER8 loads and stores

Also see the discussion at https://github.com/noloader/POWER8-crypto/issues/2
pull/603/head
Jeffrey Walton 2018-03-20 15:02:47 -04:00
parent 244abbe41c
commit 9a52edcfdb
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
2 changed files with 39 additions and 39 deletions

View File

@ -47,14 +47,14 @@ inline uint32x4_p VectorLoad(const byte src[16])
uint8x16_p data; uint8x16_p data;
if (IsAlignedOn(src, 16)) if (IsAlignedOn(src, 16))
{ {
data = vec_ld(0, (uint8_t*)src); data = vec_ld(0, src);
} }
else else
{ {
// http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf // http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
const uint8x16_p perm = vec_lvsl(0, (uint8_t*)src); const uint8x16_p perm = vec_lvsl(0, src);
const uint8x16_p low = vec_ld(0, (uint8_t*)src); const uint8x16_p low = vec_ld(0, src);
const uint8x16_p high = vec_ld(15, (uint8_t*)src); const uint8x16_p high = vec_ld(15, src);
data = vec_perm(low, high, perm); data = vec_perm(low, high, perm);
} }
@ -77,7 +77,7 @@ inline void VectorStore(const uint32x4_p data, byte dest[16])
if (IsAlignedOn(dest, 16)) if (IsAlignedOn(dest, 16))
{ {
vec_st(t1, 0, (uint8_t*) dest); vec_st(t1, 0, dest);
} }
else else
{ {
@ -147,12 +147,12 @@ inline T Reverse(const T& src)
inline uint32x4_p VectorLoadBE(const uint8_t src[16]) inline uint32x4_p VectorLoadBE(const uint8_t src[16])
{ {
#if defined(CRYPTOPP_XLC_VERSION) #if defined(CRYPTOPP_XLC_VERSION)
return (uint32x4_p)vec_xl_be(0, (uint8_t*)src); return (uint32x4_p)vec_xl_be(0, src);
#else #else
# if defined(CRYPTOPP_LITTLE_ENDIAN) # if defined(CRYPTOPP_LITTLE_ENDIAN)
return (uint32x4_p)Reverse(vec_vsx_ld(0, (uint8_t*)src)); return (uint32x4_p)Reverse(vec_vsx_ld(0, src));
# else # else
return (uint32x4_p)vec_vsx_ld(0, (uint8_t*)src); return (uint32x4_p)vec_vsx_ld(0, src);
# endif # endif
#endif #endif
} }
@ -168,12 +168,12 @@ inline uint32x4_p VectorLoadBE(const uint8_t src[16])
inline uint32x4_p VectorLoadBE(int off, const uint8_t src[16]) inline uint32x4_p VectorLoadBE(int off, const uint8_t src[16])
{ {
#if defined(CRYPTOPP_XLC_VERSION) #if defined(CRYPTOPP_XLC_VERSION)
return (uint32x4_p)vec_xl_be(off, (uint8_t*)src); return (uint32x4_p)vec_xl_be(off, src);
#else #else
# if defined(CRYPTOPP_LITTLE_ENDIAN) # if defined(CRYPTOPP_LITTLE_ENDIAN)
return (uint32x4_p)Reverse(vec_vsx_ld(off, (uint8_t*)src)); return (uint32x4_p)Reverse(vec_vsx_ld(off, src));
# else # else
return (uint32x4_p)vec_vsx_ld(off, (uint8_t*)src); return (uint32x4_p)vec_vsx_ld(off, src);
# endif # endif
#endif #endif
} }
@ -187,7 +187,7 @@ inline uint32x4_p VectorLoadBE(int off, const uint8_t src[16])
/// \since Crypto++ 6.0 /// \since Crypto++ 6.0
inline uint32x4_p VectorLoad(const byte src[16]) inline uint32x4_p VectorLoad(const byte src[16])
{ {
return (uint32x4_p)VectorLoadBE((uint8_t*)src); return (uint32x4_p)VectorLoadBE(src);
} }
/// \brief Loads a vector from a byte array /// \brief Loads a vector from a byte array
@ -200,7 +200,7 @@ inline uint32x4_p VectorLoad(const byte src[16])
/// \since Crypto++ 6.0 /// \since Crypto++ 6.0
inline uint32x4_p VectorLoad(int off, const byte src[16]) inline uint32x4_p VectorLoad(int off, const byte src[16])
{ {
return (uint32x4_p)VectorLoadBE(off, (uint8_t*)src); return (uint32x4_p)VectorLoadBE(off, src);
} }
/// \brief Loads a vector from a byte array /// \brief Loads a vector from a byte array
@ -213,9 +213,9 @@ inline uint32x4_p VectorLoad(int off, const byte src[16])
inline uint32x4_p VectorLoadKey(const byte src[16]) inline uint32x4_p VectorLoadKey(const byte src[16])
{ {
#if defined(CRYPTOPP_XLC_VERSION) #if defined(CRYPTOPP_XLC_VERSION)
return (uint32x4_p)vec_xl(0, (uint8_t*)src); return (uint32x4_p)vec_xl(0, src);
#else #else
return (uint32x4_p)vec_vsx_ld(0, (uint8_t*)src); return (uint32x4_p)vec_vsx_ld(0, src);
#endif #endif
} }
@ -229,9 +229,9 @@ inline uint32x4_p VectorLoadKey(const byte src[16])
inline uint32x4_p VectorLoadKey(const word32 src[4]) inline uint32x4_p VectorLoadKey(const word32 src[4])
{ {
#if defined(CRYPTOPP_XLC_VERSION) #if defined(CRYPTOPP_XLC_VERSION)
return (uint32x4_p)vec_xl(0, (uint8_t*)src); return (uint32x4_p)vec_xl(0, src);
#else #else
return (uint32x4_p)vec_vsx_ld(0, (uint8_t*)src); return (uint32x4_p)vec_vsx_ld(0, src);
#endif #endif
} }
@ -246,9 +246,9 @@ inline uint32x4_p VectorLoadKey(const word32 src[4])
inline uint32x4_p VectorLoadKey(int off, const byte src[16]) inline uint32x4_p VectorLoadKey(int off, const byte src[16])
{ {
#if defined(CRYPTOPP_XLC_VERSION) #if defined(CRYPTOPP_XLC_VERSION)
return (uint32x4_p)vec_xl(off, (uint8_t*)src); return (uint32x4_p)vec_xl(off, src);
#else #else
return (uint32x4_p)vec_vsx_ld(off, (uint8_t*)src); return (uint32x4_p)vec_vsx_ld(off, src);
#endif #endif
} }
@ -265,12 +265,12 @@ template <class T>
inline void VectorStoreBE(const T& src, uint8_t dest[16]) inline void VectorStoreBE(const T& src, uint8_t dest[16])
{ {
#if defined(CRYPTOPP_XLC_VERSION) #if defined(CRYPTOPP_XLC_VERSION)
vec_xst_be((uint8x16_p)src, 0, (uint8_t*)dest); vec_xst_be((uint8x16_p)src, 0, dest);
#else #else
# if defined(CRYPTOPP_LITTLE_ENDIAN) # if defined(CRYPTOPP_LITTLE_ENDIAN)
vec_vsx_st(Reverse((uint8x16_p)src), 0, (uint8_t*)dest); vec_vsx_st(Reverse((uint8x16_p)src), 0, dest);
# else # else
vec_vsx_st((uint8x16_p)src, 0, (uint8_t*)dest); vec_vsx_st((uint8x16_p)src, 0, dest);
# endif # endif
#endif #endif
} }
@ -288,12 +288,12 @@ template <class T>
inline void VectorStoreBE(const T& src, int off, uint8_t dest[16]) inline void VectorStoreBE(const T& src, int off, uint8_t dest[16])
{ {
#if defined(CRYPTOPP_XLC_VERSION) #if defined(CRYPTOPP_XLC_VERSION)
vec_xst_be((uint8x16_p)src, off, (uint8_t*)dest); vec_xst_be((uint8x16_p)src, off, dest);
#else #else
# if defined(CRYPTOPP_LITTLE_ENDIAN) # if defined(CRYPTOPP_LITTLE_ENDIAN)
vec_vsx_st(Reverse((uint8x16_p)src), off, (uint8_t*)dest); vec_vsx_st(Reverse((uint8x16_p)src), off, dest);
# else # else
vec_vsx_st((uint8x16_p)src, off, (uint8_t*)dest); vec_vsx_st((uint8x16_p)src, off, dest);
# endif # endif
#endif #endif
} }
@ -311,12 +311,12 @@ inline void VectorStore(const T& src, byte dest[16])
{ {
// Do not call VectorStoreBE. It slows us down by about 0.5 cpb on LE. // Do not call VectorStoreBE. It slows us down by about 0.5 cpb on LE.
#if defined(CRYPTOPP_XLC_VERSION) #if defined(CRYPTOPP_XLC_VERSION)
vec_xst_be((uint8x16_p)src, 0, (uint8_t*)dest); vec_xst_be((uint8x16_p)src, 0, dest);
#else #else
# if defined(CRYPTOPP_LITTLE_ENDIAN) # if defined(CRYPTOPP_LITTLE_ENDIAN)
vec_vsx_st(Reverse((uint8x16_p)src), 0, (uint8_t*)dest); vec_vsx_st(Reverse((uint8x16_p)src), 0, dest);
# else # else
vec_vsx_st((uint8x16_p)src, 0, (uint8_t*)dest); vec_vsx_st((uint8x16_p)src, 0, dest);
# endif # endif
#endif #endif
} }
@ -335,12 +335,12 @@ inline void VectorStore(const T& src, int off, byte dest[16])
{ {
// Do not call VectorStoreBE. It slows us down by about 0.5 cpb on LE. // Do not call VectorStoreBE. It slows us down by about 0.5 cpb on LE.
#if defined(CRYPTOPP_XLC_VERSION) #if defined(CRYPTOPP_XLC_VERSION)
vec_xst_be((uint8x16_p)src, off, (uint8_t*)dest); vec_xst_be((uint8x16_p)src, off, dest);
#else #else
# if defined(CRYPTOPP_LITTLE_ENDIAN) # if defined(CRYPTOPP_LITTLE_ENDIAN)
vec_vsx_st(Reverse((uint8x16_p)src), off, (uint8_t*)dest); vec_vsx_st(Reverse((uint8x16_p)src), off, dest);
# else # else
vec_vsx_st((uint8x16_p)src, off, (uint8_t*)dest); vec_vsx_st((uint8x16_p)src, off, dest);
# endif # endif
#endif #endif
} }

View File

@ -1006,7 +1006,7 @@ uint32x4_p8 VEC_XL_BE(int offset, const uint8_t* data)
template <class T> static inline template <class T> static inline
uint32x4_p8 VectorLoad32x4(const T* data, int offset) uint32x4_p8 VectorLoad32x4(const T* data, int offset)
{ {
return (uint32x4_p8)vec_ld(offset, (uint8_t*)data); return (uint32x4_p8)vec_ld(offset, data);
} }
// Unaligned load // Unaligned load
@ -1014,9 +1014,9 @@ template <class T> static inline
uint32x4_p8 VectorLoad32x4u(const T* data, int offset) uint32x4_p8 VectorLoad32x4u(const T* data, int offset)
{ {
#if defined(CRYPTOPP_XLC_VERSION) #if defined(CRYPTOPP_XLC_VERSION)
return (uint32x4_p8)vec_xl(offset, (uint8_t*)data); return (uint32x4_p8)vec_xl(offset, data);
#else #else
return (uint32x4_p8)vec_vsx_ld(offset, (uint8_t*)data); return (uint32x4_p8)vec_vsx_ld(offset, data);
#endif #endif
} }
@ -1024,7 +1024,7 @@ uint32x4_p8 VectorLoad32x4u(const T* data, int offset)
template <class T> static inline template <class T> static inline
void VectorStore32x4(const uint32x4_p8 val, T* data, int offset) void VectorStore32x4(const uint32x4_p8 val, T* data, int offset)
{ {
vec_st((uint8x16_p8)val, offset, (uint8_t*)data); vec_st((uint8x16_p8)val, offset, data);
} }
// Unaligned store // Unaligned store
@ -1314,7 +1314,7 @@ uint64x2_p8 VectorPermute64x2(const uint64x2_p8 val, const uint8x16_p8 mask)
template <class T> static inline template <class T> static inline
uint64x2_p8 VectorLoad64x2(const T* data, int offset) uint64x2_p8 VectorLoad64x2(const T* data, int offset)
{ {
return (uint64x2_p8)vec_ld(offset, (uint8_t*)data); return (uint64x2_p8)vec_ld(offset, data);
} }
// Unaligned load // Unaligned load
@ -1322,9 +1322,9 @@ template <class T> static inline
uint64x2_p8 VectorLoad64x2u(const T* data, int offset) uint64x2_p8 VectorLoad64x2u(const T* data, int offset)
{ {
#if defined(CRYPTOPP_XLC_VERSION) #if defined(CRYPTOPP_XLC_VERSION)
return (uint64x2_p8)vec_xl(offset, (uint8_t*)data); return (uint64x2_p8)vec_xl(offset, data);
#else #else
return (uint64x2_p8)vec_vsx_ld(offset, (uint8_t*)data); return (uint64x2_p8)vec_vsx_ld(offset, data);
#endif #endif
} }
@ -1332,7 +1332,7 @@ uint64x2_p8 VectorLoad64x2u(const T* data, int offset)
template <class T> static inline template <class T> static inline
void VectorStore64x2(const uint64x2_p8 val, T* data, int offset) void VectorStore64x2(const uint64x2_p8 val, T* data, int offset)
{ {
vec_st((uint8x16_p8)val, offset, (uint8_t*)data); vec_st((uint8x16_p8)val, offset, data);
} }
// Unaligned store // Unaligned store