Remove non-const cast from POWER8 loads and stores

Also see the discussion at https://github.com/noloader/POWER8-crypto/issues/2
2018-03-20 15:02:47 -04:00 · 2018-03-20 15:02:47 -04:00 · 9a52edcfdb
parent 244abbe41c
commit 9a52edcfdb
2 changed files with 39 additions and 39 deletions
--- a/ppc-simd.h
+++ b/ppc-simd.h
@ -47,14 +47,14 @@ inline uint32x4_p VectorLoad(const byte src[16])
    uint8x16_p data;
    if (IsAlignedOn(src, 16))
    {
-        data = vec_ld(0, (uint8_t*)src);
+        data = vec_ld(0, src);
    }
    else
    {
        // http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
-        const uint8x16_p perm = vec_lvsl(0, (uint8_t*)src);
+        const uint8x16_p perm = vec_lvsl(0, src);
-        const uint8x16_p low = vec_ld(0, (uint8_t*)src);
+        const uint8x16_p low = vec_ld(0, src);
-        const uint8x16_p high = vec_ld(15, (uint8_t*)src);
+        const uint8x16_p high = vec_ld(15, src);
        data = vec_perm(low, high, perm);
    }
@ -77,7 +77,7 @@ inline void VectorStore(const uint32x4_p data, byte dest[16])
    if (IsAlignedOn(dest, 16))
    {
-        vec_st(t1, 0, (uint8_t*) dest);
+        vec_st(t1, 0,  dest);
    }
    else
    {
@ -147,12 +147,12 @@ inline T Reverse(const T& src)
 inline uint32x4_p VectorLoadBE(const uint8_t src[16])
 {
 #if defined(CRYPTOPP_XLC_VERSION)
-    return (uint32x4_p)vec_xl_be(0, (uint8_t*)src);
+    return (uint32x4_p)vec_xl_be(0, src);
 #else
 # if defined(CRYPTOPP_LITTLE_ENDIAN)
-    return (uint32x4_p)Reverse(vec_vsx_ld(0, (uint8_t*)src));
+    return (uint32x4_p)Reverse(vec_vsx_ld(0, src));
 # else
-    return (uint32x4_p)vec_vsx_ld(0, (uint8_t*)src);
+    return (uint32x4_p)vec_vsx_ld(0, src);
 # endif
 #endif
 }
@ -168,12 +168,12 @@ inline uint32x4_p VectorLoadBE(const uint8_t src[16])
 inline uint32x4_p VectorLoadBE(int off, const uint8_t src[16])
 {
 #if defined(CRYPTOPP_XLC_VERSION)
-    return (uint32x4_p)vec_xl_be(off, (uint8_t*)src);
+    return (uint32x4_p)vec_xl_be(off, src);
 #else
 # if defined(CRYPTOPP_LITTLE_ENDIAN)
-    return (uint32x4_p)Reverse(vec_vsx_ld(off, (uint8_t*)src));
+    return (uint32x4_p)Reverse(vec_vsx_ld(off, src));
 # else
-    return (uint32x4_p)vec_vsx_ld(off, (uint8_t*)src);
+    return (uint32x4_p)vec_vsx_ld(off, src);
 # endif
 #endif
 }
@ -187,7 +187,7 @@ inline uint32x4_p VectorLoadBE(int off, const uint8_t src[16])
 /// \since Crypto++ 6.0
 inline uint32x4_p VectorLoad(const byte src[16])
 {
-    return (uint32x4_p)VectorLoadBE((uint8_t*)src);
+    return (uint32x4_p)VectorLoadBE(src);
 }
 /// \brief Loads a vector from a byte array
@ -200,7 +200,7 @@ inline uint32x4_p VectorLoad(const byte src[16])
 /// \since Crypto++ 6.0
 inline uint32x4_p VectorLoad(int off, const byte src[16])
 {
-    return (uint32x4_p)VectorLoadBE(off, (uint8_t*)src);
+    return (uint32x4_p)VectorLoadBE(off, src);
 }
 /// \brief Loads a vector from a byte array
@ -213,9 +213,9 @@ inline uint32x4_p VectorLoad(int off, const byte src[16])
 inline uint32x4_p VectorLoadKey(const byte src[16])
 {
 #if defined(CRYPTOPP_XLC_VERSION)
-    return (uint32x4_p)vec_xl(0, (uint8_t*)src);
+    return (uint32x4_p)vec_xl(0, src);
 #else
-    return (uint32x4_p)vec_vsx_ld(0, (uint8_t*)src);
+    return (uint32x4_p)vec_vsx_ld(0, src);
 #endif
 }
@ -229,9 +229,9 @@ inline uint32x4_p VectorLoadKey(const byte src[16])
 inline uint32x4_p VectorLoadKey(const word32 src[4])
 {
 #if defined(CRYPTOPP_XLC_VERSION)
-    return (uint32x4_p)vec_xl(0, (uint8_t*)src);
+    return (uint32x4_p)vec_xl(0, src);
 #else
-    return (uint32x4_p)vec_vsx_ld(0, (uint8_t*)src);
+    return (uint32x4_p)vec_vsx_ld(0, src);
 #endif
 }
@ -246,9 +246,9 @@ inline uint32x4_p VectorLoadKey(const word32 src[4])
 inline uint32x4_p VectorLoadKey(int off, const byte src[16])
 {
 #if defined(CRYPTOPP_XLC_VERSION)
-    return (uint32x4_p)vec_xl(off, (uint8_t*)src);
+    return (uint32x4_p)vec_xl(off, src);
 #else
-    return (uint32x4_p)vec_vsx_ld(off, (uint8_t*)src);
+    return (uint32x4_p)vec_vsx_ld(off, src);
 #endif
 }
@ -265,12 +265,12 @@ template <class T>
 inline void VectorStoreBE(const T& src, uint8_t dest[16])
 {
 #if defined(CRYPTOPP_XLC_VERSION)
-    vec_xst_be((uint8x16_p)src, 0, (uint8_t*)dest);
+    vec_xst_be((uint8x16_p)src, 0, dest);
 #else
 # if defined(CRYPTOPP_LITTLE_ENDIAN)
-    vec_vsx_st(Reverse((uint8x16_p)src), 0, (uint8_t*)dest);
+    vec_vsx_st(Reverse((uint8x16_p)src), 0, dest);
 # else
-    vec_vsx_st((uint8x16_p)src, 0, (uint8_t*)dest);
+    vec_vsx_st((uint8x16_p)src, 0, dest);
 # endif
 #endif
 }
@ -288,12 +288,12 @@ template <class T>
 inline void VectorStoreBE(const T& src, int off, uint8_t dest[16])
 {
 #if defined(CRYPTOPP_XLC_VERSION)
-    vec_xst_be((uint8x16_p)src, off, (uint8_t*)dest);
+    vec_xst_be((uint8x16_p)src, off, dest);
 #else
 # if defined(CRYPTOPP_LITTLE_ENDIAN)
-    vec_vsx_st(Reverse((uint8x16_p)src), off, (uint8_t*)dest);
+    vec_vsx_st(Reverse((uint8x16_p)src), off, dest);
 # else
-    vec_vsx_st((uint8x16_p)src, off, (uint8_t*)dest);
+    vec_vsx_st((uint8x16_p)src, off, dest);
 # endif
 #endif
 }
@ -311,12 +311,12 @@ inline void VectorStore(const T& src, byte dest[16])
 {
    // Do not call VectorStoreBE. It slows us down by about 0.5 cpb on LE.
 #if defined(CRYPTOPP_XLC_VERSION)
-    vec_xst_be((uint8x16_p)src, 0, (uint8_t*)dest);
+    vec_xst_be((uint8x16_p)src, 0, dest);
 #else
 # if defined(CRYPTOPP_LITTLE_ENDIAN)
-    vec_vsx_st(Reverse((uint8x16_p)src), 0, (uint8_t*)dest);
+    vec_vsx_st(Reverse((uint8x16_p)src), 0, dest);
 # else
-    vec_vsx_st((uint8x16_p)src, 0, (uint8_t*)dest);
+    vec_vsx_st((uint8x16_p)src, 0, dest);
 # endif
 #endif
 }
@ -335,12 +335,12 @@ inline void VectorStore(const T& src, int off, byte dest[16])
 {
    // Do not call VectorStoreBE. It slows us down by about 0.5 cpb on LE.
 #if defined(CRYPTOPP_XLC_VERSION)
-    vec_xst_be((uint8x16_p)src, off, (uint8_t*)dest);
+    vec_xst_be((uint8x16_p)src, off, dest);
 #else
 # if defined(CRYPTOPP_LITTLE_ENDIAN)
-    vec_vsx_st(Reverse((uint8x16_p)src), off, (uint8_t*)dest);
+    vec_vsx_st(Reverse((uint8x16_p)src), off, dest);
 # else
-    vec_vsx_st((uint8x16_p)src, off, (uint8_t*)dest);
+    vec_vsx_st((uint8x16_p)src, off, dest);
 # endif
 #endif
 }
--- a/sha-simd.cpp
+++ b/sha-simd.cpp
@ -1006,7 +1006,7 @@ uint32x4_p8 VEC_XL_BE(int offset, const uint8_t* data)
 template <class T> static inline
 uint32x4_p8 VectorLoad32x4(const T* data, int offset)
 {
-    return (uint32x4_p8)vec_ld(offset, (uint8_t*)data);
+    return (uint32x4_p8)vec_ld(offset, data);
 }
 // Unaligned load
@ -1014,9 +1014,9 @@ template <class T> static inline
 uint32x4_p8 VectorLoad32x4u(const T* data, int offset)
 {
 #if defined(CRYPTOPP_XLC_VERSION)
-    return (uint32x4_p8)vec_xl(offset, (uint8_t*)data);
+    return (uint32x4_p8)vec_xl(offset, data);
 #else
-    return (uint32x4_p8)vec_vsx_ld(offset, (uint8_t*)data);
+    return (uint32x4_p8)vec_vsx_ld(offset, data);
 #endif
 }
@ -1024,7 +1024,7 @@ uint32x4_p8 VectorLoad32x4u(const T* data, int offset)
 template <class T> static inline
 void VectorStore32x4(const uint32x4_p8 val, T* data, int offset)
 {
-    vec_st((uint8x16_p8)val, offset, (uint8_t*)data);
+    vec_st((uint8x16_p8)val, offset, data);
 }
 // Unaligned store
@ -1314,7 +1314,7 @@ uint64x2_p8 VectorPermute64x2(const uint64x2_p8 val, const uint8x16_p8 mask)
 template <class T> static inline
 uint64x2_p8 VectorLoad64x2(const T* data, int offset)
 {
-    return (uint64x2_p8)vec_ld(offset, (uint8_t*)data);
+    return (uint64x2_p8)vec_ld(offset, data);
 }
 // Unaligned load
@ -1322,9 +1322,9 @@ template <class T> static inline
 uint64x2_p8 VectorLoad64x2u(const T* data, int offset)
 {
 #if defined(CRYPTOPP_XLC_VERSION)
-    return (uint64x2_p8)vec_xl(offset, (uint8_t*)data);
+    return (uint64x2_p8)vec_xl(offset, data);
 #else
-    return (uint64x2_p8)vec_vsx_ld(offset, (uint8_t*)data);
+    return (uint64x2_p8)vec_vsx_ld(offset, data);
 #endif
 }
@ -1332,7 +1332,7 @@ uint64x2_p8 VectorLoad64x2u(const T* data, int offset)
 template <class T> static inline
 void VectorStore64x2(const uint64x2_p8 val, T* data, int offset)
 {
-    vec_st((uint8x16_p8)val, offset, (uint8_t*)data);
+    vec_st((uint8x16_p8)val, offset, data);
 }
 // Unaligned store