diff --git a/gcm-simd.cpp b/gcm-simd.cpp index c7c958b4..f58a4e05 100644 --- a/gcm-simd.cpp +++ b/gcm-simd.cpp @@ -192,7 +192,6 @@ inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b) using CryptoPP::uint8x16_p; using CryptoPP::uint64x2_p; using CryptoPP::VectorXor; -using CryptoPP::VectorShiftLeft; using CryptoPP::VectorShiftRight; inline uint64x2_p VMULL_P64(uint64x2_p a, uint64x2_p b) @@ -208,14 +207,12 @@ inline uint64x2_p VMULL_P64(uint64x2_p a, uint64x2_p b) inline uint64x2_p VMULL_HIGH_P64(uint64x2_p a, uint64x2_p b) { #if defined(__xlc__) || defined(__xlC__) - const uint64x2_p z = VectorXor(a, a); - const uint64x2_p s = VectorShiftRight<8>(a, z); - const uint64x2_p t = VectorShiftRight<8>(b, z); + const uint64x2_p s = VectorShiftRight<8>(a); + const uint64x2_p t = VectorShiftRight<8>(b); return __vpmsumd (s, t); #else - const uint64x2_p z = VectorXor(a, a); - const uint64x2_p s = VectorShiftRight<8>(a, z); - const uint64x2_p t = VectorShiftRight<8>(b, z); + const uint64x2_p s = VectorShiftRight<8>(a); + const uint64x2_p t = VectorShiftRight<8>(b); return __builtin_crypto_vpmsumd (s, t); #endif } diff --git a/ppc-simd.h b/ppc-simd.h index 181a8252..e076cc94 100644 --- a/ppc-simd.h +++ b/ppc-simd.h @@ -119,6 +119,36 @@ inline T1 VectorAdd(const T1& vec1, const T2& vec2) return (T1)vec_add(vec1, (T1)vec2); } +/// \brief Shift a vector left +/// \tparam C shift byte count +/// \tparam T vector type +/// \param vec the vector +/// \details VectorShiftLeft() returns a new vector after shifting the +/// concatenation of the zero vector and the source vector by the specified +/// number of bytes. The return vector is the same type as vec. +/// \details On big endian machines VectorShiftLeft() is vec_sld(a, z, +/// c). On little endian machines VectorShiftLeft() is translated to +/// vec_sld(z, a, 16-c). You should always call the function as +/// if on a big endian machine as shown below. +///
+/// uint8x16_p r1 = VectorLoad(ptr); +/// uint8x16_p r5 = VectorShiftLeft<12>(r1); +///+/// \sa Is vec_sld +/// endian sensitive? on Stack Overflow +/// \since Crypto++ 6.0 +template
+/// uint8x16_p r1 = VectorLoad(ptr); +/// uint8x16_p r5 = VectorShiftRight<12>(r1); +///+/// \sa Is vec_sld +/// endian sensitive? on Stack Overflow +/// \since Crypto++ 6.0 +template
/// uint8x16_p r0 = {0};
@@ -176,7 +230,7 @@ inline T1 VectorShiftLeft(const T1& vec1, const T2& vec2)
template
inline T1 VectorShiftRight(const T1& vec1, const T2& vec2)
{
- return VectorShiftLeft<16-C>(vec1, vec2);
+ return (T1)VectorShiftLeft<16-C>(vec2, vec1);
}
#endif // POWER4 and above