diff --git a/ppc_simd.h b/ppc_simd.h
index 1e5ec5e7..7dcd37eb 100644
--- a/ppc_simd.h
+++ b/ppc_simd.h
@@ -67,6 +67,13 @@ inline T Reverse(const T src)
/// \brief Loads a vector from a byte array
/// \param src the byte array
/// \details Loads a vector in native endian format from a byte array.
+/// \details VectorLoad_ALTIVEC() uses vec_ld if the effective address
+/// of dest is aligned, and uses vec_lvsl and vec_perm
+/// otherwise.
+/// vec_lvsl and vec_perm are relatively expensive so you should
+/// provide aligned memory adresses.
+/// \details VectorLoad_ALTIVEC() is used automatically when POWER7 or above
+/// and unaligned loads is not available.
/// \note VectorLoad does not require an aligned array.
/// \since Crypto++ 6.0
inline uint32x4_p VectorLoad_ALTIVEC(const byte src[16])
@@ -89,6 +96,11 @@ inline uint32x4_p VectorLoad_ALTIVEC(const byte src[16])
/// \param src the byte array
/// \param off offset into the src byte array
/// \details Loads a vector in native endian format from a byte array.
+/// \details VectorLoad_ALTIVEC() uses vec_ld if the effective address
+/// of dest is aligned, and uses vec_lvsl and vec_perm
+/// otherwise.
+/// vec_lvsl and vec_perm are relatively expensive so you should
+/// provide aligned memory adresses.
/// \note VectorLoad does not require an aligned array.
/// \since Crypto++ 6.0
inline uint32x4_p VectorLoad_ALTIVEC(int off, const byte src[16])
@@ -110,6 +122,11 @@ inline uint32x4_p VectorLoad_ALTIVEC(int off, const byte src[16])
/// \brief Loads a vector from a byte array
/// \param src the byte array
/// \details Loads a vector in native endian format from a byte array.
+/// \details VectorLoad uses POWER7's vec_xl or
+/// vec_vsx_ld if available. The instructions do not require
+/// an aligned memory address.
+/// \details VectorLoad_ALTIVEC() is used if POWER7 or above
+/// is not available. VectorLoad_ALTIVEC() is relatively expensive.
/// \note VectorLoad does not require an aligned array.
/// \since Crypto++ 6.0
inline uint32x4_p VectorLoad(const byte src[16])
@@ -129,6 +146,11 @@ inline uint32x4_p VectorLoad(const byte src[16])
/// \param src the byte array
/// \param off offset into the byte array
/// \details Loads a vector in native endian format from a byte array.
+/// \details VectorLoad uses POWER7's vec_xl or
+/// vec_vsx_ld if available. The instructions do not require
+/// an aligned memory address.
+/// \details VectorLoad_ALTIVEC() is used if POWER7 or above
+/// is not available. VectorLoad_ALTIVEC() is relatively expensive.
/// \note VectorLoad does not require an aligned array.
/// \since Crypto++ 6.0
inline uint32x4_p VectorLoad(int off, const byte src[16])
@@ -147,8 +169,13 @@ inline uint32x4_p VectorLoad(int off, const byte src[16])
/// \brief Loads a vector from a byte array
/// \param src the byte array
/// \details Loads a vector in native endian format from a byte array.
+/// \details VectorLoad uses POWER7's vec_xl or
+/// vec_vsx_ld if available. The instructions do not require
+/// an aligned memory address.
+/// \details VectorLoad_ALTIVEC() is used if POWER7 or above
+/// is not available. VectorLoad_ALTIVEC() is relatively expensive.
/// \note VectorLoad does not require an aligned array.
-/// \since Crypto++ 6.0
+/// \since Crypto++ 8.0
inline uint32x4_p VectorLoad(const word32 src[4])
{
return VectorLoad((const byte*)src);
@@ -159,7 +186,7 @@ inline uint32x4_p VectorLoad(const word32 src[4])
/// \param off offset into the byte array
/// \details Loads a vector in native endian format from a byte array.
/// \note VectorLoad does not require an aligned array.
-/// \since Crypto++ 6.0
+/// \since Crypto++ 8.0
inline uint32x4_p VectorLoad(int off, const word32 src[4])
{
return VectorLoad(off, (const byte*)src);
@@ -169,6 +196,11 @@ inline uint32x4_p VectorLoad(int off, const word32 src[4])
/// \param src the byte array
/// \details Loads a vector in big endian format from a byte array.
/// VectorLoadBE will swap all bytes on little endian systems.
+/// \details VectorLoadBE uses POWER7's vec_xl or
+/// vec_vsx_ld if available. The instructions do not require
+/// an aligned memory address.
+/// \details VectorLoad_ALTIVEC() is used if POWER7 or above
+/// is not available. VectorLoad_ALTIVEC() is relatively expensive.
/// \note VectorLoadBE() does not require an aligned array.
/// \since Crypto++ 6.0
inline uint32x4_p VectorLoadBE(const byte src[16])
@@ -197,6 +229,11 @@ inline uint32x4_p VectorLoadBE(const byte src[16])
/// \param off offset into the src byte array
/// \details Loads a vector in big endian format from a byte array.
/// VectorLoadBE will swap all bytes on little endian systems.
+/// \details VectorLoadBE uses POWER7's vec_xl or
+/// vec_vsx_ld if available. The instructions do not require
+/// an aligned memory address.
+/// \details VectorLoad_ALTIVEC() is used if POWER7 or above
+/// is not available. VectorLoad_ALTIVEC() is relatively expensive.
/// \note VectorLoadBE does not require an aligned array.
/// \since Crypto++ 6.0
inline uint32x4_p VectorLoadBE(int off, const byte src[16])
@@ -222,6 +259,19 @@ inline uint32x4_p VectorLoadBE(int off, const byte src[16])
//////////////////////// Stores ////////////////////////
+/// \brief Stores a vector to a byte array
+/// \tparam T vector type
+/// \param data the vector
+/// \param dest the byte array
+/// \details Stores a vector in native endian format to a byte array.
+/// \details VectorStore_ALTIVEC() uses vec_st if the effective address
+/// of dest is aligned, and uses vec_ste otherwise.
+/// vec_ste is relatively expensive so you should provide aligned
+/// memory adresses.
+/// \details VectorStore_ALTIVEC() is used automatically when POWER7 or above
+/// and unaligned loads is not available.
+/// \note VectorStore does not require an aligned array.
+/// \since Crypto++ 8.0
template
inline void VectorStore_ALTIVEC(const T data, byte dest[16])
{
@@ -244,6 +294,20 @@ inline void VectorStore_ALTIVEC(const T data, byte dest[16])
}
}
+/// \brief Stores a vector to a byte array
+/// \tparam T vector type
+/// \param data the vector
+/// \param off the byte offset into the array
+/// \param dest the byte array
+/// \details Stores a vector in native endian format to a byte array.
+/// \details VectorStore_ALTIVEC() uses vec_st if the effective address
+/// of dest is aligned, and uses vec_ste otherwise.
+/// vec_ste is relatively expensive so you should provide aligned
+/// memory adresses.
+/// \details VectorStore_ALTIVEC() is used automatically when POWER7 or above
+/// and unaligned loads is not available.
+/// \note VectorStore does not require an aligned array.
+/// \since Crypto++ 8.0
template
inline void VectorStore_ALTIVEC(const T data, int off, byte dest[16])
{
@@ -271,6 +335,11 @@ inline void VectorStore_ALTIVEC(const T data, int off, byte dest[16])
/// \param data the vector
/// \param dest the byte array
/// \details Stores a vector in native endian format to a byte array.
+/// \details VectorStore uses POWER7's vec_xst or
+/// vec_vsx_st if available. The instructions do not require
+/// an aligned memory address.
+/// \details VectorStore_ALTIVEC() is used if POWER7 or above
+/// is not available. VectorStore_ALTIVEC() is relatively expensive.
/// \note VectorStore does not require an aligned array.
/// \since Crypto++ 6.0
template
@@ -293,6 +362,11 @@ inline void VectorStore(const T data, byte dest[16])
/// \param off the byte offset into the array
/// \param dest the byte array
/// \details Stores a vector in native endian format to a byte array.
+/// \details VectorStore uses POWER7's vec_xst or
+/// vec_vsx_st if available. The instructions do not require
+/// an aligned memory address.
+/// \details VectorStore_ALTIVEC() is used if POWER7 or above
+/// is not available. VectorStore_ALTIVEC() is relatively expensive.
/// \note VectorStore does not require an aligned array.
/// \since Crypto++ 6.0
template
@@ -314,6 +388,11 @@ inline void VectorStore(const T data, int off, byte dest[16])
/// \param data the vector
/// \param dest the byte array
/// \details Stores a vector in native endian format to a byte array.
+/// \details VectorStore uses POWER7's vec_xst or
+/// vec_vsx_st if available. The instructions do not require
+/// an aligned memory address.
+/// \details VectorStore_ALTIVEC() is used if POWER7 or above
+/// is not available. VectorStore_ALTIVEC() is relatively expensive.
/// \note VectorStore does not require an aligned array.
/// \since Crypto++ 8.0
template
@@ -328,6 +407,11 @@ inline void VectorStore(const T data, word32 dest[4])
/// \param off the byte offset into the array
/// \param dest the byte array
/// \details Stores a vector in native endian format to a byte array.
+/// \details VectorStore uses POWER7's vec_xst or
+/// vec_vsx_st if available. The instructions do not require
+/// an aligned memory address.
+/// \details VectorStore_ALTIVEC() is used if POWER7 or above
+/// is not available. VectorStore_ALTIVEC() is relatively expensive.
/// \note VectorStore does not require an aligned array.
/// \since Crypto++ 8.0
template
@@ -342,6 +426,11 @@ inline void VectorStore(const T data, int off, word32 dest[4])
/// \param dest the byte array
/// \details Stores a vector in big endian format to a byte array.
/// VectorStoreBE will swap all bytes on little endian systems.
+/// \details VectorStoreBE uses POWER7's vec_xst or
+/// vec_vsx_st if available. The instructions do not require
+/// an aligned memory address.
+/// \details VectorStore_ALTIVEC() is used if POWER7 or above
+/// is not available. VectorStore_ALTIVEC() is relatively expensive.
/// \note VectorStoreBE does not require an aligned array.
/// \since Crypto++ 6.0
template
@@ -373,6 +462,11 @@ inline void VectorStoreBE(const T src, byte dest[16])
/// \param dest the byte array
/// \details Stores a vector in big endian format to a byte array.
/// VectorStoreBE will swap all bytes on little endian systems.
+/// \details VectorStoreBE uses POWER7's vec_xst or
+/// vec_vsx_st if available. The instructions do not require
+/// an aligned memory address.
+/// \details VectorStore_ALTIVEC() is used if POWER7 or above
+/// is not available. VectorStore_ALTIVEC() is relatively expensive.
/// \note VectorStoreBE does not require an aligned array.
/// \since Crypto++ 6.0
template