Improve support for early XLC compilers on POWER7 (GH #742)

pull/748/head
Jeffrey Walton 2018-11-19 18:28:50 -05:00
parent 7274827f19
commit 0df98b049a
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
1 changed files with 72 additions and 25 deletions

View File

@ -39,6 +39,21 @@
# define __CRYPTO__ 1 # define __CRYPTO__ 1
#endif #endif
// Hack to detect early XLC compilers. XLC compilers for POWER7
// use vec_xlw4 and vec_xstw4. XLC compilers for POWER8 and above
// use vec_xl and vec_xst. The way to tell the difference is,
// POWER7 XLC compilers are version 12.x and earlier. The open
// question is, how to handle early Clang compilers for POWER7.
// We know the latest Clang compilers support vec_xl and vec_xst.
// 0x0d00 is hex for version 0x0d (13) and 0x00 (0), or 13.0.
#if defined(__xlc__) && (__xlc__ < 0x0d00)
# define __old_xlc__ 1
#endif
#if defined(__xlC__) && (__xlC__ < 0x0d00)
# define __old_xlC__ 1
#endif
// VecLoad_ALTIVEC and VecStore_ALTIVEC are // VecLoad_ALTIVEC and VecStore_ALTIVEC are
// too noisy on modern compilers // too noisy on modern compilers
#if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
@ -65,15 +80,15 @@ typedef __vector unsigned long long uint64x2_p;
/// \brief Reverse bytes in a vector /// \brief Reverse bytes in a vector
/// \tparam T vector type /// \tparam T vector type
/// \param src the vector /// \param data the vector
/// \returns vector /// \returns vector
/// \details VecReverse() reverses the bytes in a vector /// \details VecReverse() reverses the bytes in a vector
/// \since Crypto++ 6.0 /// \since Crypto++ 6.0
template <class T> template <class T>
inline T VecReverse(const T src) inline T VecReverse(const T data)
{ {
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0}; const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
return (T)vec_perm(src, src, mask); return (T)vec_perm(data, data, mask);
} }
//////////////////////// Loads //////////////////////// //////////////////////// Loads ////////////////////////
@ -147,7 +162,9 @@ inline uint32x4_p VecLoad_ALTIVEC(int off, const byte src[16])
inline uint32x4_p VecLoad(const byte src[16]) inline uint32x4_p VecLoad(const byte src[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR7)
# if defined(__xlc__) || defined(__xlC__) || defined(__clang__) # if defined(__old_xlc__) || defined(__old_xlC__)
return (uint32x4_p)vec_xlw4(0, (byte*)src);
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
return (uint32x4_p)vec_xl(0, (byte*)src); return (uint32x4_p)vec_xl(0, (byte*)src);
# else # else
return (uint32x4_p)vec_vsx_ld(0, (byte*)src); return (uint32x4_p)vec_vsx_ld(0, (byte*)src);
@ -172,7 +189,9 @@ inline uint32x4_p VecLoad(const byte src[16])
inline uint32x4_p VecLoad(int off, const byte src[16]) inline uint32x4_p VecLoad(int off, const byte src[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR7)
# if defined(__xlc__) || defined(__xlC__) || defined(__clang__) # if defined(__old_xlc__) || defined(__old_xlC__)
return (uint32x4_p)vec_xlw4(off, (byte*)src);
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
return (uint32x4_p)vec_xl(off, (byte*)src); return (uint32x4_p)vec_xl(off, (byte*)src);
# else # else
return (uint32x4_p)vec_vsx_ld(off, (byte*)src); return (uint32x4_p)vec_vsx_ld(off, (byte*)src);
@ -269,7 +288,13 @@ inline uint64x2_p VecLoad(int off, const word64 src[2])
inline uint32x4_p VecLoadBE(const byte src[16]) inline uint32x4_p VecLoadBE(const byte src[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR7)
# if defined(__xlc__) || defined(__xlC__) || defined(__clang__) # if defined(__old_xlc__) || defined(__old_xlC__)
# if (CRYPTOPP_BIG_ENDIAN)
return (uint32x4_p)vec_xlw4(0, (byte*)src);
# else
return (uint32x4_p)VecReverse(vec_xlw4(0, (byte*)src));
# endif
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
return (uint32x4_p)vec_xl_be(0, (byte*)src); return (uint32x4_p)vec_xl_be(0, (byte*)src);
# else # else
# if (CRYPTOPP_BIG_ENDIAN) # if (CRYPTOPP_BIG_ENDIAN)
@ -303,7 +328,13 @@ inline uint32x4_p VecLoadBE(const byte src[16])
inline uint32x4_p VecLoadBE(int off, const byte src[16]) inline uint32x4_p VecLoadBE(int off, const byte src[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR7)
# if defined(__xlc__) || defined(__xlC__) || defined(__clang__) # if defined(__old_xlc__) || defined(__old_xlC__)
# if (CRYPTOPP_BIG_ENDIAN)
return (uint32x4_p)vec_xlw4(off, (byte*)src);
# else
return (uint32x4_p)VecReverse(vec_xlw4(off, (byte*)src));
# endif
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
return (uint32x4_p)vec_xl_be(off, (byte*)src); return (uint32x4_p)vec_xl_be(off, (byte*)src);
# else # else
# if (CRYPTOPP_BIG_ENDIAN) # if (CRYPTOPP_BIG_ENDIAN)
@ -411,13 +442,15 @@ template<class T>
inline void VecStore(const T data, byte dest[16]) inline void VecStore(const T data, byte dest[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR7)
# if defined(__xlc__) || defined(__xlC__) || defined(__clang__) # if defined(__old_xlc__) || defined(__old_xlC__)
vec_xstw4((uint8x16_p)data, 0, (byte*)dest);
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
vec_xst((uint8x16_p)data, 0, (byte*)dest); vec_xst((uint8x16_p)data, 0, (byte*)dest);
# else # else
vec_vsx_st((uint8x16_p)data, 0, (byte*)dest); vec_vsx_st((uint8x16_p)data, 0, (byte*)dest);
# endif # endif
#else #else
return VecStore_ALTIVEC(data, 0, dest); VecStore(data, dest);
#endif #endif
} }
@ -439,13 +472,15 @@ template<class T>
inline void VecStore(const T data, int off, byte dest[16]) inline void VecStore(const T data, int off, byte dest[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR7)
# if defined(__xlc__) || defined(__xlC__) || defined(__clang__) # if defined(__old_xlc__) || defined(__old_xlC__)
vec_xstw4((uint8x16_p)data, 0, (byte*)dest);
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
vec_xst((uint8x16_p)data, off, (byte*)dest); vec_xst((uint8x16_p)data, off, (byte*)dest);
# else # else
vec_vsx_st((uint8x16_p)data, off, (byte*)dest); vec_vsx_st((uint8x16_p)data, off, (byte*)dest);
# endif # endif
#else #else
return VecStore_ALTIVEC(data, off, dest); VecStore(data, off, dest);
#endif #endif
} }
@ -548,23 +583,29 @@ inline void VecStore(const T data, int off, word64 dest[2])
/// \note VecStore does not require an aligned array. /// \note VecStore does not require an aligned array.
/// \since Crypto++ 6.0 /// \since Crypto++ 6.0
template <class T> template <class T>
inline void VecStoreBE(const T src, byte dest[16]) inline void VecStoreBE(const T data, byte dest[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR7)
# if defined(__xlc__) || defined(__xlC__) || defined(__clang__) # if defined(__old_xlc__) || defined(__old_xlC__)
vec_xst_be((uint8x16_p)src, 0, (byte*)dest); # if (CRYPTOPP_BIG_ENDIAN)
vec_xstw4((uint8x16_p)data, 0, (byte*)dest);
# else
vec_xstw4((uint8x16_p)VecReverse(data), 0, (byte*)dest);
# endif
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
vec_xst_be((uint8x16_p)data, 0, (byte*)dest);
# else # else
# if (CRYPTOPP_BIG_ENDIAN) # if (CRYPTOPP_BIG_ENDIAN)
vec_vsx_st((uint8x16_p)src, 0, (byte*)dest); vec_vsx_st((uint8x16_p)data, 0, (byte*)dest);
# else # else
vec_vsx_st((uint8x16_p)VecReverse(src), 0, (byte*)dest); vec_vsx_st((uint8x16_p)VecReverse(data), 0, (byte*)dest);
# endif # endif
# endif # endif
#else // _ARCH_PWR7 #else // _ARCH_PWR7
# if (CRYPTOPP_BIG_ENDIAN) # if (CRYPTOPP_BIG_ENDIAN)
VecStore((uint8x16_p)src, (byte*)dest); VecStore(data, dest);
# else # else
VecStore((uint8x16_p)VecReverse(src), (byte*)dest); VecStore(VecReverse(data), dest);
# endif # endif
#endif // _ARCH_PWR7 #endif // _ARCH_PWR7
} }
@ -585,23 +626,29 @@ inline void VecStoreBE(const T src, byte dest[16])
/// \note VecStore does not require an aligned array. /// \note VecStore does not require an aligned array.
/// \since Crypto++ 6.0 /// \since Crypto++ 6.0
template <class T> template <class T>
inline void VecStoreBE(const T src, int off, byte dest[16]) inline void VecStoreBE(const T data, int off, byte dest[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR7)
# if defined(__xlc__) || defined(__xlC__) || defined(__clang__) # if defined(__old_xlc__) || defined(__old_xlC__)
vec_xst_be((uint8x16_p)src, off, (byte*)dest); # if (CRYPTOPP_BIG_ENDIAN)
vec_xstw4((uint8x16_p)data, off, (byte*)dest);
# else
vec_xstw4((uint8x16_p)VecReverse(data), off, (byte*)dest);
# endif
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
vec_xst_be((uint8x16_p)data, off, (byte*)dest);
# else # else
# if (CRYPTOPP_BIG_ENDIAN) # if (CRYPTOPP_BIG_ENDIAN)
vec_vsx_st((uint8x16_p)src, off, (byte*)dest); vec_vsx_st((uint8x16_p)data, off, (byte*)dest);
# else # else
vec_vsx_st((uint8x16_p)VecReverse(src), off, (byte*)dest); vec_vsx_st((uint8x16_p)VecReverse(data), off, (byte*)dest);
# endif # endif
# endif # endif
#else // _ARCH_PWR7 #else // _ARCH_PWR7
# if (CRYPTOPP_BIG_ENDIAN) # if (CRYPTOPP_BIG_ENDIAN)
VecStore((uint8x16_p)src, off, (byte*)dest); VecStore(data, dest);
# else # else
VecStore((uint8x16_p)VecReverse(src), off, (byte*)dest); VecStore(VecReverse(data), dest);
# endif # endif
#endif // _ARCH_PWR7 #endif // _ARCH_PWR7
} }