From 064650d31eb51408907d1015e5e7c0e69edbf88a Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Fri, 23 Nov 2018 23:58:30 -0500 Subject: [PATCH] Avoid IsAlignedOn in VecLoad_ALTIVEC and VecStore_ALTIVEC --- ppc_simd.h | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/ppc_simd.h b/ppc_simd.h index e57a609f..5cd812bb 100644 --- a/ppc_simd.h +++ b/ppc_simd.h @@ -137,18 +137,18 @@ inline T VecReverse(const T data) /// \param src the byte array /// \details Loads a vector in native endian format from a byte array. /// \details VecLoad_ALTIVEC() uses vec_ld if the effective address -/// of dest is aligned, and uses vec_lvsl and vec_perm -/// otherwise. -/// vec_lvsl and vec_perm are relatively expensive so you should -/// provide aligned memory adresses. -/// \details VecLoad_ALTIVEC() is used automatically when POWER7 or above -/// and unaligned loads is not available. +/// of src is aligned. If unaligned it uses vec_lvsl, +/// vec_ld, vec_perm and src. The fixups using +/// vec_lvsl and vec_perm are relatively expensive so +/// you should provide aligned memory adresses. /// \par Wraps /// vec_ld, vec_lvsl, vec_perm /// \since Crypto++ 6.0 inline uint32x4_p VecLoad_ALTIVEC(const byte src[16]) { - if (IsAlignedOn(src, 16)) + // Avoid IsAlignedOn for convenience. + uintptr_t eff = reinterpret_cast(src)+0; + if (eff % 16 == 0) { return (uint32x4_p)vec_ld(0, src); } @@ -167,16 +167,18 @@ inline uint32x4_p VecLoad_ALTIVEC(const byte src[16]) /// \param off offset into the src byte array /// \details Loads a vector in native endian format from a byte array. /// \details VecLoad_ALTIVEC() uses vec_ld if the effective address -/// of dest is aligned, and uses vec_lvsl and vec_perm -/// otherwise. -/// vec_lvsl and vec_perm are relatively expensive so you should -/// provide aligned memory adresses. +/// of src is aligned. If unaligned it uses vec_lvsl, +/// vec_ld, vec_perm and src. +/// \details The fixups using vec_lvsl and vec_perm are +/// relatively expensive so you should provide aligned memory adresses. /// \par Wraps /// vec_ld, vec_lvsl, vec_perm /// \since Crypto++ 6.0 inline uint32x4_p VecLoad_ALTIVEC(int off, const byte src[16]) { - if (IsAlignedOn(src, 16)) + // Avoid IsAlignedOn for convenience. + uintptr_t eff = reinterpret_cast(src)+off; + if (eff % 16 == 0) { return (uint32x4_p)vec_ld(off, src); } @@ -217,10 +219,10 @@ inline uint32x4_p VecLoad(const byte src[16]) #endif } -/// \brief Loads a vector from a word array -/// \param src the word array -/// \param off offset into the word array -/// \details VecLoad loads a vector in from a word array. +/// \brief Loads a vector from a byte array +/// \param src the byte array +/// \param off offset into the byte array +/// \details VecLoad loads a vector in from a byte array. /// \details VecLoad uses POWER7's vec_xl or /// vec_vsx_ld if available. The instructions do not require /// aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 @@ -300,7 +302,7 @@ inline uint64x2_p VecLoad(const word64 src[2]) return (uint64x2_p)VecLoad((const byte*)src); } -/// \brief Loads a vector from a byte array +/// \brief Loads a vector from a word array /// \param src the word array /// \param off offset into the word array /// \details VecLoad loads a vector in from a word array. @@ -421,7 +423,9 @@ inline uint32x4_p VecLoadBE(int off, const byte src[16]) template inline void VecStore_ALTIVEC(const T data, byte dest[16]) { - if (IsAlignedOn(dest, 16)) + // Avoid IsAlignedOn for convenience. + uintptr_t eff = reinterpret_cast(dest)+0; + if (eff % 16 == 0) { vec_st((uint8x16_p)data, 0, dest); } @@ -458,7 +462,9 @@ inline void VecStore_ALTIVEC(const T data, byte dest[16]) template inline void VecStore_ALTIVEC(const T data, int off, byte dest[16]) { - if (IsAlignedOn(dest, 16)) + // Avoid IsAlignedOn for convenience. + uintptr_t eff = reinterpret_cast(dest)+off; + if (eff % 16 == 0) { vec_st((uint8x16_p)data, off, dest); }