Update comments
parent
f52a141f6e
commit
66b3ba5d66
|
|
@ -1,17 +1,20 @@
|
|||
#include <altivec.h>
|
||||
|
||||
// This follows ppc_simd.h. XLC compilers for POWER7 use vec_xlw4 and vec_xstw4.
|
||||
// XLC compilers for POWER8 and above use vec_xl and vec_xst. The way to tell
|
||||
// the difference is, POWER7 XLC compilers are version 12.x and earlier.
|
||||
// The open question is, how to handle early Clang compilers for POWER7.
|
||||
// We know the latest Clang compilers support vec_xl and vec_xst.
|
||||
// This follows ppc_simd.h. XLC compilers for POWER7 use vec_xlw4 and
|
||||
// vec_xstw4. Some XLC compilers for POWER7 and above use vec_xl and
|
||||
// vec_xst. The way to tell the difference is, XLC compilers version
|
||||
// 13.0 and earlier use use vec_xlw4 and vec_xstw4 XLC compilers 13.1
|
||||
// and later are use vec_xl and vec_xst. The open question is, how to
|
||||
// handle early Clang compilers for POWER7. We know the latest Clang
|
||||
// compilers support vec_xl and vec_xst. Also see
|
||||
// https://www-01.ibm.com/support/docview.wss?uid=swg21683541
|
||||
|
||||
#if defined(__xlc__) && (__xlc__ < 0x0d00)
|
||||
# define __old_xlc__ 1
|
||||
#if defined(__xlc__) && (__xlc__ < 0x0d01)
|
||||
# define __early_xlc__ 1
|
||||
#endif
|
||||
|
||||
#if defined(__xlC__) && (__xlC__ < 0x0d00)
|
||||
# define __old_xlC__ 1
|
||||
#if defined(__xlC__) && (__xlC__ < 0x0d01)
|
||||
# define __early_xlC__ 1
|
||||
#endif
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
|
|
@ -19,15 +22,15 @@ int main(int argc, char* argv[])
|
|||
__vector unsigned char x;
|
||||
unsigned char res[16];
|
||||
|
||||
#if defined(__old_xlc__) || defined(__old_xlC__)
|
||||
#if defined(_ARCH_PWR7) && (defined(__early_xlc__) || defined(__early_xlC__))
|
||||
x=vec_xlw4(0, (unsigned char*)argv[0]);
|
||||
x=vec_add(x,x);
|
||||
vec_xstw4(x, 0, res);
|
||||
#elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
||||
#elif defined(_ARCH_PWR7) && (defined(__xlc__) || defined(__xlC__) || defined(__clang__))
|
||||
x=vec_xl(0, (unsigned char*)argv[0]);
|
||||
x=vec_add(x,x);
|
||||
vec_xst(x, 0, res);
|
||||
#elif defined(__GNUC__)
|
||||
#elif defined(_ARCH_PWR7) && defined(__GNUC__)
|
||||
x=vec_vsx_ld(0, (unsigned char*)argv[0]);
|
||||
x=vec_add(x,x);
|
||||
vec_vsx_st(x, 0, res);
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ bool CPU_ProbePower7()
|
|||
byte b1[19] = {255, 255, 255, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, b2[17];
|
||||
|
||||
// Specifically call the VSX loads and stores
|
||||
#if defined(__old_xlc__) || defined(__old_xlC__)
|
||||
#if defined(__early_xlc__) || defined(__early_xlC__)
|
||||
vec_xstw4(vec_xlw4(0, b1+3), 0, b2+1);
|
||||
#elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
||||
vec_xst(vec_xl(0, b1+3), 0, b2+1);
|
||||
|
|
|
|||
39
ppc_simd.h
39
ppc_simd.h
|
|
@ -39,19 +39,20 @@
|
|||
# define __CRYPTO__ 1
|
||||
#endif
|
||||
|
||||
// Hack to detect early XLC compilers. XLC compilers for POWER7
|
||||
// use vec_xlw4 and vec_xstw4. XLC compilers for POWER8 and above
|
||||
// use vec_xl and vec_xst. The way to tell the difference is,
|
||||
// POWER7 XLC compilers are version 12.x and earlier. The open
|
||||
// question is, how to handle early Clang compilers for POWER7.
|
||||
// We know the latest Clang compilers support vec_xl and vec_xst.
|
||||
// 0x0d00 is hex for version 0x0d (13) and 0x00 (0), or 13.0.
|
||||
// Hack to detect early XLC compilers. XLC compilers for POWER7 use
|
||||
// vec_xlw4 and vec_xstw4. Some XLC compilers for POWER7 and above
|
||||
// use vec_xl and vec_xst. The way to tell the difference is, XLC
|
||||
// compilers version 13.0 and earlier use vec_xlw4 and vec_xstw4.
|
||||
// XLC compilers 13.1 and later are use vec_xl and vec_xst. The open
|
||||
// question is, how to handle early Clang compilers for POWER7. We
|
||||
// know the latest Clang compilers support vec_xl and vec_xst. Also
|
||||
// see https://www-01.ibm.com/support/docview.wss?uid=swg21683541.
|
||||
|
||||
#if defined(__xlc__) && (__xlc__ < 0x0d00)
|
||||
# define __old_xlc__ 1
|
||||
#if defined(__xlc__) && (__xlc__ < 0x0d01)
|
||||
# define __early_xlc__ 1
|
||||
#endif
|
||||
#if defined(__xlC__) && (__xlC__ < 0x0d00)
|
||||
# define __old_xlC__ 1
|
||||
#if defined(__xlC__) && (__xlC__ < 0x0d01)
|
||||
# define __early_xlC__ 1
|
||||
#endif
|
||||
|
||||
// VecLoad_ALTIVEC and VecStore_ALTIVEC are
|
||||
|
|
@ -162,7 +163,7 @@ inline uint32x4_p VecLoad_ALTIVEC(int off, const byte src[16])
|
|||
inline uint32x4_p VecLoad(const byte src[16])
|
||||
{
|
||||
#if defined(_ARCH_PWR7)
|
||||
# if defined(__old_xlc__) || defined(__old_xlC__)
|
||||
# if defined(__early_xlc__) || defined(__early_xlC__)
|
||||
return (uint32x4_p)vec_xlw4(0, (byte*)src);
|
||||
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
||||
return (uint32x4_p)vec_xl(0, (byte*)src);
|
||||
|
|
@ -189,7 +190,7 @@ inline uint32x4_p VecLoad(const byte src[16])
|
|||
inline uint32x4_p VecLoad(int off, const byte src[16])
|
||||
{
|
||||
#if defined(_ARCH_PWR7)
|
||||
# if defined(__old_xlc__) || defined(__old_xlC__)
|
||||
# if defined(__early_xlc__) || defined(__early_xlC__)
|
||||
return (uint32x4_p)vec_xlw4(off, (byte*)src);
|
||||
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
||||
return (uint32x4_p)vec_xl(off, (byte*)src);
|
||||
|
|
@ -288,7 +289,7 @@ inline uint64x2_p VecLoad(int off, const word64 src[2])
|
|||
inline uint32x4_p VecLoadBE(const byte src[16])
|
||||
{
|
||||
#if defined(_ARCH_PWR7)
|
||||
# if defined(__old_xlc__) || defined(__old_xlC__)
|
||||
# if defined(__early_xlc__) || defined(__early_xlC__)
|
||||
# if (CRYPTOPP_BIG_ENDIAN)
|
||||
return (uint32x4_p)vec_xlw4(0, (byte*)src);
|
||||
# else
|
||||
|
|
@ -328,7 +329,7 @@ inline uint32x4_p VecLoadBE(const byte src[16])
|
|||
inline uint32x4_p VecLoadBE(int off, const byte src[16])
|
||||
{
|
||||
#if defined(_ARCH_PWR7)
|
||||
# if defined(__old_xlc__) || defined(__old_xlC__)
|
||||
# if defined(__early_xlc__) || defined(__early_xlC__)
|
||||
# if (CRYPTOPP_BIG_ENDIAN)
|
||||
return (uint32x4_p)vec_xlw4(off, (byte*)src);
|
||||
# else
|
||||
|
|
@ -442,7 +443,7 @@ template<class T>
|
|||
inline void VecStore(const T data, byte dest[16])
|
||||
{
|
||||
#if defined(_ARCH_PWR7)
|
||||
# if defined(__old_xlc__) || defined(__old_xlC__)
|
||||
# if defined(__early_xlc__) || defined(__early_xlC__)
|
||||
vec_xstw4((uint8x16_p)data, 0, (byte*)dest);
|
||||
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
||||
vec_xst((uint8x16_p)data, 0, (byte*)dest);
|
||||
|
|
@ -472,7 +473,7 @@ template<class T>
|
|||
inline void VecStore(const T data, int off, byte dest[16])
|
||||
{
|
||||
#if defined(_ARCH_PWR7)
|
||||
# if defined(__old_xlc__) || defined(__old_xlC__)
|
||||
# if defined(__early_xlc__) || defined(__early_xlC__)
|
||||
vec_xstw4((uint8x16_p)data, 0, (byte*)dest);
|
||||
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
||||
vec_xst((uint8x16_p)data, off, (byte*)dest);
|
||||
|
|
@ -586,7 +587,7 @@ template <class T>
|
|||
inline void VecStoreBE(const T data, byte dest[16])
|
||||
{
|
||||
#if defined(_ARCH_PWR7)
|
||||
# if defined(__old_xlc__) || defined(__old_xlC__)
|
||||
# if defined(__early_xlc__) || defined(__early_xlC__)
|
||||
# if (CRYPTOPP_BIG_ENDIAN)
|
||||
vec_xstw4((uint8x16_p)data, 0, (byte*)dest);
|
||||
# else
|
||||
|
|
@ -629,7 +630,7 @@ template <class T>
|
|||
inline void VecStoreBE(const T data, int off, byte dest[16])
|
||||
{
|
||||
#if defined(_ARCH_PWR7)
|
||||
# if defined(__old_xlc__) || defined(__old_xlC__)
|
||||
# if defined(__early_xlc__) || defined(__early_xlC__)
|
||||
# if (CRYPTOPP_BIG_ENDIAN)
|
||||
vec_xstw4((uint8x16_p)data, off, (byte*)dest);
|
||||
# else
|
||||
|
|
|
|||
Loading…
Reference in New Issue