From 2f26de7aabce2b30165a426b312acd7ffad07b1f Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Mon, 19 Nov 2018 04:49:51 -0500 Subject: [PATCH] Add 64-bit element loads and stores --- ppc_simd.h | 110 +++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 95 insertions(+), 15 deletions(-) diff --git a/ppc_simd.h b/ppc_simd.h index 6b745037..f588dd12 100644 --- a/ppc_simd.h +++ b/ppc_simd.h @@ -156,10 +156,10 @@ inline uint32x4_p VecLoad(const byte src[16]) #endif } -/// \brief Loads a vector from a byte array -/// \param src the byte array -/// \param off offset into the byte array -/// \details VecLoad loads a vector in from a byte array. +/// \brief Loads a vector from a word array +/// \param src the word array +/// \param off offset into the word array +/// \details VecLoad loads a vector in from a word array. /// \details VecLoad uses POWER7's vec_xl or /// vec_vsx_ld if available. The instructions do not require /// aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 @@ -181,9 +181,9 @@ inline uint32x4_p VecLoad(int off, const byte src[16]) #endif } -/// \brief Loads a vector from a byte array -/// \param src the byte array -/// \details VecLoad loads a vector in from a byte array. +/// \brief Loads a vector from a word array +/// \param src the word array +/// \details VecLoad loads a vector in from a word array. /// \details VecLoad uses POWER7's vec_xl or /// vec_vsx_ld if available. The instructions do not require /// aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 @@ -197,10 +197,10 @@ inline uint32x4_p VecLoad(const word32 src[4]) return VecLoad((const byte*)src); } -/// \brief Loads a vector from a byte array -/// \param src the byte array -/// \param off offset into the byte array -/// \details VecLoad loads a vector in from a byte array. +/// \brief Loads a vector from a word array +/// \param src the word array +/// \param off offset into the word array +/// \details VecLoad loads a vector in from a word array. /// \details VecLoad uses POWER7's vec_xl or /// vec_vsx_ld if available. The instructions do not require /// aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 @@ -214,6 +214,43 @@ inline uint32x4_p VecLoad(int off, const word32 src[4]) return VecLoad(off, (const byte*)src); } +#if defined(_ARCH_PWR8) + +/// \brief Loads a vector from a word array +/// \param src the word array +/// \details VecLoad loads a vector in from a word array. +/// \details VecLoad uses POWER7's vec_xl or +/// vec_vsx_ld if available. The instructions do not require +/// aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 +/// is not available. VecLoad_ALTIVEC() can be relatively expensive if +/// extra instructions are required to fix up unaligned effective memory +/// addresses. +/// \note VecLoad does not require an aligned array. +/// \since Crypto++ 8.0 +inline uint64x2_p VecLoad(const word64 src[2]) +{ + return (uint64x2_p)VecLoad((const byte*)src); +} + +/// \brief Loads a vector from a byte array +/// \param src the word array +/// \param off offset into the word array +/// \details VecLoad loads a vector in from a word array. +/// \details VecLoad uses POWER7's vec_xl or +/// vec_vsx_ld if available. The instructions do not require +/// aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 +/// is not available. VecLoad_ALTIVEC() can be relatively expensive if +/// extra instructions are required to fix up unaligned effective memory +/// addresses. +/// \note VecLoad does not require an aligned array. +/// \since Crypto++ 8.0 +inline uint64x2_p VecLoad(int off, const word64 src[2]) +{ + return (uint64x2_p)VecLoad(off, (const byte*)src); +} + +#endif // _ARCH_PWR8 + /// \brief Loads a vector from a byte array /// \param src the byte array /// \details VecLoadBE loads a vector in from a byte array. VecLoadBE @@ -412,8 +449,8 @@ inline void VecStore(const T data, int off, byte dest[16]) /// \brief Stores a vector to a word array /// \tparam T vector type /// \param data the vector -/// \param dest the byte array -/// \details VecStore stores a vector to a byte array. +/// \param dest the word array +/// \details VecStore stores a vector to a word array. /// \details VecStore uses POWER7's vec_xst or /// vec_vsx_st if available. The instructions do not require /// aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 @@ -432,8 +469,8 @@ inline void VecStore(const T data, word32 dest[4]) /// \tparam T vector type /// \param data the vector /// \param off the byte offset into the array -/// \param dest the byte array -/// \details VecStore stores a vector to a byte array. +/// \param dest the word array +/// \details VecStore stores a vector to a word array. /// \details VecStore uses POWER7's vec_xst or /// vec_vsx_st if available. The instructions do not require /// aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 @@ -448,6 +485,49 @@ inline void VecStore(const T data, int off, word32 dest[4]) VecStore((uint8x16_p)data, off, (byte*)dest); } +#if defined(_ARCH_PWR8) + +/// \brief Stores a vector to a word array +/// \tparam T vector type +/// \param data the vector +/// \param dest the word array +/// \details VecStore stores a vector to a word array. +/// \details VecStore uses POWER7's vec_xst or +/// vec_vsx_st if available. The instructions do not require +/// aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 +/// is not available. VecStore_ALTIVEC() can be relatively expensive if +/// extra instructions are required to fix up unaligned effective memory +/// addresses. +/// \note VecStore does not require an aligned array. +/// \since Crypto++ 8.0 +template +inline void VecStore(const T data, word64 dest[2]) +{ + VecStore((uint8x16_p)data, 0, (byte*)dest); +} + +/// \brief Stores a vector to a word array +/// \tparam T vector type +/// \param data the vector +/// \param off the byte offset into the array +/// \param dest the word array +/// \details VecStore stores a vector to a word array. +/// \details VecStore uses POWER7's vec_xst or +/// vec_vsx_st if available. The instructions do not require +/// aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 +/// is not available. VecStore_ALTIVEC() can be relatively expensive if +/// extra instructions are required to fix up unaligned effective memory +/// addresses. +/// \note VecStore does not require an aligned array. +/// \since Crypto++ 8.0 +template +inline void VecStore(const T data, int off, word64 dest[2]) +{ + VecStore((uint8x16_p)data, off, (byte*)dest); +} + +#endif // _ARCH_PWR8 + /// \brief Stores a vector to a byte array /// \tparam T vector type /// \param src the vector