diff --git a/ppc_power8.cpp b/ppc_power8.cpp index 4f1a0834..cd8f2531 100644 --- a/ppc_power8.cpp +++ b/ppc_power8.cpp @@ -64,7 +64,7 @@ bool CPU_ProbePower8() { // POWER8 added 64-bit SIMD operations const word64 x = W64LIT(0xffffffffffffffff); - word64 w1[2] = {x, x}, w2[2] = {4, 6}, w3[2]; + word64 w1[2] = {x, x}, w2[2] = {4, 6}, w3[2]; // Specifically call the VSX loads and stores with 64-bit types #if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__)) diff --git a/ppc_simd.h b/ppc_simd.h index b68c31f9..e6a955b3 100644 --- a/ppc_simd.h +++ b/ppc_simd.h @@ -123,8 +123,12 @@ inline uint32x4_p VecOne() template inline T VecReverse(const T data) { +#if (_ARCH_PWR9) + return (T)vec_revb((uint8x16_p)data); +#else const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0}; return (T)vec_perm(data, data, mask); +#endif } //////////////////////// Loads //////////////////////// @@ -723,7 +727,7 @@ inline void VecStoreBE(const T data, int off, byte dest[16]) template inline void VecStoreBE(const T data, word32 dest[4]) { - return VecStoreBE((uint8x16_p)data, (byte*)dest); + return VecStoreBE((uint8x16_p)data, (byte*)dest); } /// \brief Stores a vector to a word array @@ -745,7 +749,7 @@ inline void VecStoreBE(const T data, word32 dest[4]) template inline void VecStoreBE(const T data, int off, word32 dest[4]) { - return VecStoreBE((uint8x16_p)data, (byte*)dest); + return VecStoreBE((uint8x16_p)data, (byte*)dest); } //////////////////////// Miscellaneous ////////////////////////