From 064650d31eb51408907d1015e5e7c0e69edbf88a Mon Sep 17 00:00:00 2001
From: Jeffrey Walton <noloader@gmail.com>
Date: Fri, 23 Nov 2018 23:58:30 -0500
Subject: [PATCH] Avoid IsAlignedOn in VecLoad_ALTIVEC and VecStore_ALTIVEC

---
 ppc_simd.h | 44 +++++++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 19 deletions(-)
diff --git a/ppc_simd.h b/ppc_simd.h
index e57a609f..5cd812bb 100644
--- a/ppc_simd.h
+++ b/ppc_simd.h
@@ -137,18 +137,18 @@ inline T VecReverse(const T data)
 /// \param src the byte array
 /// \details Loads a vector in native endian format from a byte array.
 /// \details VecLoad_ALTIVEC() uses <tt>vec_ld</tt> if the effective address
-///   of <tt>dest</tt> is aligned, and uses <tt>vec_lvsl</tt> and <tt>vec_perm</tt>
-///   otherwise.
-///   <tt>vec_lvsl</tt> and <tt>vec_perm</tt> are relatively expensive so you should
-///   provide aligned memory adresses.
-/// \details VecLoad_ALTIVEC() is used automatically when POWER7 or above
-///   and unaligned loads is not available.
+///   of <tt>src</tt> is aligned. If unaligned it uses <tt>vec_lvsl</tt>,
+///   <tt>vec_ld</tt>, <tt>vec_perm</tt> and <tt>src</tt>. The fixups using
+///   <tt>vec_lvsl</tt> and <tt>vec_perm</tt> are relatively expensive so
+///   you should provide aligned memory adresses.
 /// \par Wraps
 ///   vec_ld, vec_lvsl, vec_perm
 /// \since Crypto++ 6.0
 inline uint32x4_p VecLoad_ALTIVEC(const byte src[16])
 {
-    if (IsAlignedOn(src, 16))
+    // Avoid IsAlignedOn for convenience.
+    uintptr_t eff = reinterpret_cast<uintptr_t>(src)+0;
+    if (eff % 16 == 0)
     {
         return (uint32x4_p)vec_ld(0, src);
     }
@@ -167,16 +167,18 @@ inline uint32x4_p VecLoad_ALTIVEC(const byte src[16])
 /// \param off offset into the src byte array
 /// \details Loads a vector in native endian format from a byte array.
 /// \details VecLoad_ALTIVEC() uses <tt>vec_ld</tt> if the effective address
-///   of <tt>dest</tt> is aligned, and uses <tt>vec_lvsl</tt> and <tt>vec_perm</tt>
-///   otherwise.
-///   <tt>vec_lvsl</tt> and <tt>vec_perm</tt> are relatively expensive so you should
-///   provide aligned memory adresses.
+///   of <tt>src</tt> is aligned. If unaligned it uses <tt>vec_lvsl</tt>,
+///   <tt>vec_ld</tt>, <tt>vec_perm</tt> and <tt>src</tt>.
+/// \details The fixups using <tt>vec_lvsl</tt> and <tt>vec_perm</tt> are
+///   relatively expensive so you should provide aligned memory adresses.
 /// \par Wraps
 ///   vec_ld, vec_lvsl, vec_perm
 /// \since Crypto++ 6.0
 inline uint32x4_p VecLoad_ALTIVEC(int off, const byte src[16])
 {
-    if (IsAlignedOn(src, 16))
+    // Avoid IsAlignedOn for convenience.
+    uintptr_t eff = reinterpret_cast<uintptr_t>(src)+off;
+    if (eff % 16 == 0)
     {
         return (uint32x4_p)vec_ld(off, src);
     }
@@ -217,10 +219,10 @@ inline uint32x4_p VecLoad(const byte src[16])
 #endif
 }
 
-/// \brief Loads a vector from a word array
-/// \param src the word array
-/// \param off offset into the word array
-/// \details VecLoad loads a vector in from a word array.
+/// \brief Loads a vector from a byte array
+/// \param src the byte array
+/// \param off offset into the byte array
+/// \details VecLoad loads a vector in from a byte array.
 /// \details VecLoad uses POWER7's <tt>vec_xl</tt> or
 ///   <tt>vec_vsx_ld</tt> if available. The instructions do not require
 ///   aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7
@@ -300,7 +302,7 @@ inline uint64x2_p VecLoad(const word64 src[2])
     return (uint64x2_p)VecLoad((const byte*)src);
 }
 
-/// \brief Loads a vector from a byte array
+/// \brief Loads a vector from a word array
 /// \param src the word array
 /// \param off offset into the word array
 /// \details VecLoad loads a vector in from a word array.
@@ -421,7 +423,9 @@ inline uint32x4_p VecLoadBE(int off, const byte src[16])
 template<class T>
 inline void VecStore_ALTIVEC(const T data, byte dest[16])
 {
-    if (IsAlignedOn(dest, 16))
+    // Avoid IsAlignedOn for convenience.
+    uintptr_t eff = reinterpret_cast<uintptr_t>(dest)+0;
+    if (eff % 16 == 0)
     {
         vec_st((uint8x16_p)data, 0,  dest);
     }
@@ -458,7 +462,9 @@ inline void VecStore_ALTIVEC(const T data, byte dest[16])
 template<class T>
 inline void VecStore_ALTIVEC(const T data, int off, byte dest[16])
 {
-    if (IsAlignedOn(dest, 16))
+    // Avoid IsAlignedOn for convenience.
+    uintptr_t eff = reinterpret_cast<uintptr_t>(dest)+off;
+    if (eff % 16 == 0)
     {
         vec_st((uint8x16_p)data, off,  dest);
     }