From 1dd0e321a6e092c0472b36d75ea06bfa3750b281 Mon Sep 17 00:00:00 2001
From: Jeffrey Walton <noloader@gmail.com>
Date: Sun, 5 Aug 2018 05:39:42 -0400
Subject: [PATCH] Rework Makefile and ppc-simd.h for XLC and LLVM front-end
 changes

---
 GNUmakefile       | 138 +++++++++++++--------
 ppc-simd.cpp      | 244 +-----------------------------------
 ppc-simd.h        | 308 ++++++++++++++++++++++++----------------------
 rijndael-simd.cpp | 158 ++++++++++++++++++++++--
 sha-simd.cpp      |  90 ++++++++++++++
 5 files changed, 492 insertions(+), 446 deletions(-)

diff --git a/GNUmakefile b/GNUmakefile
index bfacbf57..5a738847 100755
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -101,6 +101,16 @@ ifeq ($(wildcard adhoc.cpp),)
 $(shell cp adhoc.cpp.proto adhoc.cpp)
 endif
 
+# Fixup AIX
+ifeq ($(IS_AIX),1)
+  BITNESS=$(shell getconf KERNEL_BITMODE)
+  ifeq ($(BITNESS),64)
+    IS_PPC64=1
+  else
+    IS_PPC32=1
+  endif
+endif
+
 ###########################################################
 #####                General Variables                #####
 ###########################################################
@@ -400,63 +410,93 @@ ifeq ($(IS_ARMV8),1)
   endif
 endif
 
-# PowerPC and PowerPC-64. Altivec is available with Power4
-ifneq ($(IS_PPC32)$(IS_PPC64)$(IS_AIX),000)
+# PowerPC and PowerPC-64. Altivec is available with Power4.
+# The tests below are crafted for IBM XLC and the LLVM front-end.
+# XLC/LLVM only supplies POWER8. So we set the flags for XLC/LLVM
+# and lower it if POWER7 or ALTIVEC is available. I've got a
+# feeling LLVM is going to cause a lot of troubles.
+ifneq ($(IS_PPC32)$(IS_PPC64),00)
+  HAVE_POWER8 = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -mcpu=power8 -maltivec -dM -E adhoc.cpp 2>&1 | $(GREP) -i -c -E '_ARCH_PWR8|_ARCH_PWR9|__CRYPTO')
+  ifneq ($(HAVE_POWER8),0)
+    POWER8_FLAG = -mcpu=power8 -maltivec
+    AES_FLAG = $(POWER8_FLAG)
+    GCM_FLAG = $(POWER8_FLAG)
+    SHA_FLAG = $(POWER8_FLAG)
+    SM4_FLAG = $(POWER8_FLAG)
+  endif
+
+  # GCC and some compatibles
+  HAVE_POWER7 = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -mcpu=power7 -maltivec -dM -E adhoc.cpp 2>&1 | $(GREP) -i -c '_ARCH_PWR7')
+  ifneq ($(HAVE_POWER7),0)
+    POWER7_FLAG = -mcpu=power7 -maltivec
+    ARIA_FLAG = $(POWER7_FLAG)
+    BLAKE2_FLAG = $(POWER7_FLAG)
+    CHAM_FLAG = $(POWER7_FLAG)
+    LEA_FLAG = $(POWER7_FLAG)
+    SIMON_FLAG = $(POWER7_FLAG)
+    SPECK_FLAG = $(POWER7_FLAG)
+    SIMECK_FLAG = $(POWER7_FLAG)
+  endif
+
   # GCC and some compatibles
   HAVE_ALTIVEC = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -mcpu=power4 -maltivec -dM -E adhoc.cpp 2>&1 | $(GREP) -i -c '__ALTIVEC__')
   ifneq ($(HAVE_ALTIVEC),0)
     ALTIVEC_FLAG = -mcpu=power4 -maltivec
-    ARIA_FLAG = -mcpu=power4 -maltivec
-    BLAKE2_FLAG = -mcpu=power4 -maltivec
-    CHAM_FLAG = -mcpu=power4 -maltivec
-    LEA_FLAG = -mcpu=power4 -maltivec
-    SIMON_FLAG = -mcpu=power4 -maltivec
-    SPECK_FLAG = -mcpu=power4 -maltivec
-    SIMECK_FLAG = -mcpu=power4 -maltivec
-    SM4_FLAG = -mcpu=power7 -maltivec
-  endif
-  # GCC and some compatibles
-  HAVE_CRYPTO = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -mcpu=power8 -maltivec -dM -E adhoc.cpp 2>&1 | $(GREP) -i -c -E '_ARCH_PWR8|_ARCH_PWR9|__CRYPTO')
-  ifneq ($(HAVE_CRYPTO),0)
-    ALTIVEC_FLAG = -mcpu=power8 -maltivec
-    AES_FLAG = -mcpu=power8 -maltivec
-    GCM_FLAG = -mcpu=power8 -maltivec
-    SHA_FLAG = -mcpu=power8 -maltivec
-    CHAM_FLAG = -mcpu=power8 -maltivec
-    LEA_FLAG = -mcpu=power8 -maltivec
-    SIMON_FLAG = -mcpu=power8 -maltivec
-    SPECK_FLAG = -mcpu=power8 -maltivec
-    SIMECK_FLAG = -mcpu=power8 -maltivec
-    SM4_FLAG = -mcpu=power8 -maltivec
   endif
+
   # IBM XL C/C++
-  HAVE_ALTIVEC = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr7 -qaltivec -E adhoc.cpp 2>&1 | $(GREP) -i -c '__ALTIVEC__')
+  HAVE_POWER8 = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr8 -qaltivec -E adhoc.cpp 2>&1 | $(GREP) -i -c -E '_ARCH_PWR8|_ARCH_PWR9|__CRYPTO')
+  ifneq ($(HAVE_POWER8),0)
+    POWER8_FLAG = -qarch=pwr8 -qaltivec
+    AES_FLAG = $(POWER8_FLAG)
+    GCM_FLAG = $(POWER8_FLAG)
+    SHA_FLAG = $(POWER8_FLAG)
+    SM4_FLAG = $(POWER8_FLAG)
+  endif
+
+  # IBM XL C/C++
+  HAVE_POWER7 = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr7 -qaltivec -E adhoc.cpp 2>&1 | $(GREP) -i -c -E '_ARCH_PWR7')
+  ifneq ($(HAVE_POWER7),0)
+    POWER7_FLAG = -qarch=pwr7 -qaltivec
+    ARIA_FLAG = $(POWER7_FLAG)
+    BLAKE2_FLAG = $(POWER7_FLAG)
+    CHAM_FLAG = $(POWER7_FLAG)
+    LEA_FLAG = $(POWER7_FLAG)
+    SIMECK_FLAG = $(POWER7_FLAG)
+    SIMON_FLAG = $(POWER7_FLAG)
+    SPECK_FLAG = $(POWER7_FLAG)
+  endif
+
+  # IBM XL C/C++
+  HAVE_ALTIVEC = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr6 -qaltivec -E adhoc.cpp 2>&1 | $(GREP) -i -c '__ALTIVEC__')
   ifneq ($(HAVE_ALTIVEC),0)
-    ALTIVEC_FLAG = -qarch=pwr7 -qaltivec
-    ARIA_FLAG = -qarch=pwr7 -qaltivec
-    BLAKE2_FLAG = -qarch=pwr7 -qaltivec
-    CHAM_FLAG = -qarch=pwr7 -qaltivec
-    LEA_FLAG = -qarch=pwr7 -qaltivec
-    SIMECK_FLAG = -qarch=pwr7 -qaltivec
-    SIMON_FLAG = -qarch=pwr7 -qaltivec
-    SPECK_FLAG = -qarch=pwr7 -qaltivec
-    SM4_FLAG = -qarch=pwr7 -qaltivec
+    ALTIVEC_FLAG = -qarch=pwr6 -qaltivec
   endif
-  # IBM XL C/C++
-  HAVE_CRYPTO = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr8 -qaltivec -E adhoc.cpp 2>&1 | $(GREP) -i -c -E '_ARCH_PWR8|_ARCH_PWR9|__CRYPTO')
-  ifneq ($(HAVE_CRYPTO),0)
-    ALTIVEC_FLAG = -qarch=pwr8 -qaltivec
-    AES_FLAG = -qarch=pwr8 -qaltivec
-    GCM_FLAG = -qarch=pwr8 -qaltivec
-    SHA_FLAG = -qarch=pwr8 -qaltivec
-    ARIA_FLAG = -qarch=pwr8 -qaltivec
-    BLAKE2_FLAG = -qarch=pwr8 -qaltivec
-    CHAM_FLAG = -qarch=pwr8 -qaltivec
-    LEA_FLAG = -qarch=pwr8 -qaltivec
-    SIMECK_FLAG = -qarch=pwr8 -qaltivec
-    SIMON_FLAG = -qarch=pwr8 -qaltivec
-    SPECK_FLAG = -qarch=pwr8 -qaltivec
-    SM4_FLAG = -qarch=pwr8 -qaltivec
+
+  # LLVM front-ends only provide Power8. It really jambs us up
+  # for ppc-simd.cpp which needs ALTIVEC/POWER4. We have similar
+  # problems {lea|cham|simon|speck|...}-simd.cpp and POWER7.
+  HAVE_LLVM = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -E adhoc.cpp 2>&1 | $(GREP) -i -c '__llvm__')
+  ifneq ($(HAVE_LLVM),0)
+    POWER7_FLAG = $(POWER8_FLAG)
+    ARIA_FLAG = $(POWER8_FLAG)
+    BLAKE2_FLAG = $(POWER8_FLAG)
+    CHAM_FLAG = $(POWER8_FLAG)
+    LEA_FLAG = $(POWER8_FLAG)
+    SIMECK_FLAG = $(POWER8_FLAG)
+    SIMON_FLAG = $(POWER8_FLAG)
+    SPECK_FLAG = $(POWER8_FLAG)
+    ALTIVEC_FLAG = $(POWER8_FLAG)
+  endif
+
+  ifeq ($(ALTIVEC_FLAG),)
+    CXXFLAGS += -DCRYPTOPP_DISABLE_ALTIVEC
+  endif
+  ifeq ($(POWER7_FLAG),)
+    CXXFLAGS += -DCRYPTOPP_DISABLE_POWER7
+  endif
+  ifeq ($(POWER8_FLAG),)
+    CXXFLAGS += -DCRYPTOPP_DISABLE_POWER8
   endif
 endif
 
diff --git a/ppc-simd.cpp b/ppc-simd.cpp
index 56542e38..dd4f5c95 100644
--- a/ppc-simd.cpp
+++ b/ppc-simd.cpp
@@ -6,14 +6,6 @@
 //    is needed because additional CXXFLAGS are required to enable the
 //    appropriate instructions sets in some build configurations.
 
-// TODO: Bob Wilkinson reported we are misdetecting CRYPTOPP_POWER8_AVAILABLE.
-//    The problem is, the updated compiler supports them but the down-level
-//    assembler and linker do not. We will probably need to fix it through
-//    the makefile, similar to the way x86 AES and SHA are handled. For the time
-//    being CRYPTOPP_DISABLE_POWER8 will have to be applied manually. Another
-//    twist is, we don't have access to a test machine and it must be fixed
-//    for two compilers (IBM XL C/C++ and GCC). Ugh...
-
 #include "pch.h"
 #include "config.h"
 #include "stdcpp.h"
@@ -53,7 +45,7 @@ bool CPU_ProbeAltivec()
 {
 #if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
     return false;
-#elif (CRYPTOPP_ALTIVEC_AVAILABLE) || (CRYPTOPP_POWER7_AVAILABLE) || (CRYPTOPP_POWER8_AVAILABLE)
+#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
 # if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
 
     // longjmp and clobber warnings. Volatile is required.
@@ -96,239 +88,5 @@ bool CPU_ProbeAltivec()
 #endif  // CRYPTOPP_ALTIVEC_AVAILABLE
 }
 
-bool CPU_ProbePower7()
-{
-#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
-    return false;
-#elif (CRYPTOPP_POWER7_AVAILABLE) || (CRYPTOPP_POWER8_AVAILABLE)
-# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
-
-    // longjmp and clobber warnings. Volatile is required.
-    // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
-    volatile int result = false;
-
-    volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
-    if (oldHandler == SIG_ERR)
-        return false;
-
-    volatile sigset_t oldMask;
-    if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
-        return false;
-
-    if (setjmp(s_jmpSIGILL))
-        result = false;
-    else
-    {
-        // POWER7 added unaligned loads and store operations
-        byte b1[19] = {255, 255, 255, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, b2[17];
-
-        // Specifically call the VSX loads and stores
-        #if defined(__xlc__) || defined(__xlC__)
-        vec_xst(vec_xl(0, b1+3), 0, b2+1);
-        #else
-        vec_vsx_st(vec_vsx_ld(0, b1+3), 0, b2+1);
-        #endif
-
-        result = (0 == std::memcmp(b1+3, b2+1, 16));
-    }
-
-    sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
-    signal(SIGILL, oldHandler);
-    return result;
-# endif
-#else
-    return false;
-#endif  // CRYPTOPP_POWER7_AVAILABLE
-}
-
-bool CPU_ProbePower8()
-{
-#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
-    return false;
-#elif (CRYPTOPP_POWER8_AVAILABLE)
-# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
-
-    // longjmp and clobber warnings. Volatile is required.
-    // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
-    volatile int result = true;
-
-    volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
-    if (oldHandler == SIG_ERR)
-        return false;
-
-    volatile sigset_t oldMask;
-    if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
-        return false;
-
-    if (setjmp(s_jmpSIGILL))
-        result = false;
-    else
-    {
-        // POWER8 added 64-bit SIMD operations
-        const word64 x = W64LIT(0xffffffffffffffff);
-        word64 w1[2] = {x, x}, w2[2] = {4, 6}, w3[2];
-
-        // Specifically call the VSX loads and stores
-        #if defined(__xlc__) || defined(__xlC__)
-        const uint64x2_p v1 = (uint64x2_p)vec_xl(0, (byte*)w1);
-        const uint64x2_p v2 = (uint64x2_p)vec_xl(0, (byte*)w2);
-        const uint64x2_p v3 = vec_add(v1, v2);  // 64-bit add
-        vec_xst((uint8x16_p)v3, 0, (byte*)w3);
-        #else
-        const uint64x2_p v1 = (uint64x2_p)vec_vsx_ld(0, (byte*)w1);
-        const uint64x2_p v2 = (uint64x2_p)vec_vsx_ld(0, (byte*)w2);
-        const uint64x2_p v3 = vec_add(v1, v2);  // 64-bit add
-        vec_vsx_st((uint8x16_p)v3, 0, (byte*)w3);
-        #endif
-
-        // Relies on integer wrap
-        result = (w3[0] == 3 && w3[1] == 5);
-    }
-
-    sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
-    signal(SIGILL, oldHandler);
-    return result;
-# endif
-#else
-    return false;
-#endif  // CRYPTOPP_POWER8_AVAILABLE
-}
-
-bool CPU_ProbeAES()
-{
-#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
-    return false;
-#elif (CRYPTOPP_POWER8_AVAILABLE)
-# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
-
-    // longjmp and clobber warnings. Volatile is required.
-    // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
-    volatile int result = true;
-
-    volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
-    if (oldHandler == SIG_ERR)
-        return false;
-
-    volatile sigset_t oldMask;
-    if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
-        return false;
-
-    if (setjmp(s_jmpSIGILL))
-        result = false;
-    else
-    {
-        byte key[16] = {0xA0, 0xFA, 0xFE, 0x17, 0x88, 0x54, 0x2c, 0xb1,
-                        0x23, 0xa3, 0x39, 0x39, 0x2a, 0x6c, 0x76, 0x05};
-        byte state[16] = {0x19, 0x3d, 0xe3, 0xb3, 0xa0, 0xf4, 0xe2, 0x2b,
-                          0x9a, 0xc6, 0x8d, 0x2a, 0xe9, 0xf8, 0x48, 0x08};
-        byte r[16] = {255}, z[16] = {};
-
-        uint8x16_p k = (uint8x16_p)VectorLoad(0, key);
-        uint8x16_p s = (uint8x16_p)VectorLoad(0, state);
-        s = VectorEncrypt(s, k);
-        s = VectorEncryptLast(s, k);
-        s = VectorDecrypt(s, k);
-        s = VectorDecryptLast(s, k);
-        VectorStore(s, r);
-
-        result = (0 != std::memcmp(r, z, 16));
-    }
-
-    sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
-    signal(SIGILL, oldHandler);
-    return result;
-# endif
-#else
-    return false;
-#endif  // CRYPTOPP_ALTIVEC_AVAILABLE
-}
-
-bool CPU_ProbeSHA256()
-{
-#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
-    return false;
-#elif (CRYPTOPP_POWER8_AVAILABLE)
-# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
-
-    // longjmp and clobber warnings. Volatile is required.
-    // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
-    volatile int result = false;
-
-    volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
-    if (oldHandler == SIG_ERR)
-        return false;
-
-    volatile sigset_t oldMask;
-    if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
-        return false;
-
-    if (setjmp(s_jmpSIGILL))
-        result = false;
-    else
-    {
-        byte r[16], z[16] = {0};
-        uint8x16_p x = ((uint8x16_p){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0});
-
-        x = VectorSHA256<0,0>(x);
-        x = VectorSHA256<0,1>(x);
-        x = VectorSHA256<1,0>(x);
-        x = VectorSHA256<1,1>(x);
-        VectorStore(x, r);
-
-        result = (0 == std::memcmp(r, z, 16));
-    }
-
-    sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
-    signal(SIGILL, oldHandler);
-    return result;
-# endif
-#else
-    return false;
-#endif  // CRYPTOPP_ALTIVEC_AVAILABLE
-}
-
-bool CPU_ProbeSHA512()
-{
-#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
-    return false;
-#elif (CRYPTOPP_POWER8_AVAILABLE)
-# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
-
-    // longjmp and clobber warnings. Volatile is required.
-    // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
-    volatile int result = false;
-
-    volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
-    if (oldHandler == SIG_ERR)
-        return false;
-
-    volatile sigset_t oldMask;
-    if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
-        return false;
-
-    if (setjmp(s_jmpSIGILL))
-        result = false;
-    else
-    {
-        byte r[16], z[16] = {0};
-        uint8x16_p x = ((uint8x16_p){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0});
-
-        x = VectorSHA512<0,0>(x);
-        x = VectorSHA512<0,1>(x);
-        x = VectorSHA512<1,0>(x);
-        x = VectorSHA512<1,1>(x);
-        VectorStore(x, r);
-
-        result = (0 == std::memcmp(r, z, 16));
-    }
-
-    sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
-    signal(SIGILL, oldHandler);
-    return result;
-# endif
-#else
-    return false;
-#endif  // CRYPTOPP_POWER8_AVAILABLE
-}
 # endif  // CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64
 NAMESPACE_END
diff --git a/ppc-simd.h b/ppc-simd.h
index 08426839..7e7ef52e 100644
--- a/ppc-simd.h
+++ b/ppc-simd.h
@@ -19,6 +19,26 @@
 #include "config.h"
 #include "misc.h"
 
+// We are boxed into undefining macros like CRYPTOPP_POWER8_AVAILABLE.
+// We set CRYPTOPP_POWER8_AVAILABLE based on compiler versions because
+// we needed them for the SIMD and non-SIMD files. When the SIMD file is
+// compiled it may only get -mcpu=power4 or -mcpu=power7, so the POWER7
+// or POWER8 stuff is not actually available when this header is included.
+#if !defined(__ALTIVEC__)
+# undef CRYPTOPP_ALTIVEC_AVAILABLE
+#endif
+
+#if !defined(_ARCH_PWR7)
+# undef CRYPTOPP_POWER7_AVAILABLE
+#endif
+
+#if !(defined(_ARCH_PWR8) || defined(_ARCH_PWR9) || defined(_CRYPTO))
+# undef CRYPTOPP_POWER8_AVAILABLE
+# undef CRYPTOPP_POWER8_AES_AVAILABLE
+# undef CRYPTOPP_POWER8_SHA_AVAILABLE
+# undef CRYPTOPP_POWER8_PMULL_AVAILABLE
+#endif
+
 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
 # include <altivec.h>
 # undef vector
@@ -28,84 +48,112 @@
 
 NAMESPACE_BEGIN(CryptoPP)
 
+// Datatypes
 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
-
 typedef __vector unsigned char   uint8x16_p;
 typedef __vector unsigned short  uint16x8_p;
 typedef __vector unsigned int    uint32x4_p;
-
-#if defined(CRYPTOPP_POWER8_AVAILABLE)
+#if defined(CRYPTOPP_POWER8_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
 typedef __vector unsigned long long uint64x2_p;
 #endif
+#endif  // ALTIVEC/POWER4 datatypes
 
-#endif  // CRYPTOPP_ALTIVEC_AVAILABLE
+// POWER4 and above
+#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
 
-#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) && !defined(CRYPTOPP_POWER7_AVAILABLE)
-
-inline uint32x4_p VectorLoad(const byte src[16])
+/// \brief Reverse a vector
+/// \tparam T vector type
+/// \param src the vector
+/// \details Reverse() endian swaps the bytes in a vector
+/// \sa Reverse(), VectorLoadBE(), VectorLoad(), VectorLoadKey()
+/// \since Crypto++ 6.0
+template <class T>
+inline T Reverse(const T& src)
 {
-    uint8x16_p data;
-    if (IsAlignedOn(src, 16))
-    {
-        data = vec_ld(0, src);
-    }
-    else
-    {
-        // http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
-        const uint8x16_p perm = vec_lvsl(0, src);
-        const uint8x16_p low = vec_ld(0, src);
-        const uint8x16_p high = vec_ld(15, src);
-        data = vec_perm(low, high, perm);
-    }
-
-#if defined(CRYPTOPP_BIG_ENDIAN)
-    return (uint32x4_p)data;
-#else
     const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
-    return (uint32x4_p)vec_perm(data, data, mask);
-#endif
+    return vec_perm(src, src, mask);
 }
 
-inline void VectorStore(const uint32x4_p data, byte dest[16])
+/// \brief Permutes two vectors
+/// \tparam T1 vector type
+/// \tparam T2 vector type
+/// \param vec1 the first vector
+/// \param vec2 the second vector
+/// \param mask vector mask
+/// \details VectorPermute returns a new vector from vec1 and vec2
+///   based on mask. mask is an uint8x16_p type vector. The return
+///   vector is the same type as vec1.
+/// \since Crypto++ 6.0
+template <class T1, class T2>
+inline T1 VectorPermute(const T1& vec1, const T1& vec2, const T2& mask)
+{
+    return (T1)vec_perm(vec1, vec2, (uint8x16_p)mask);
+}
+
+/// \brief XOR two vectors
+/// \tparam T1 vector type
+/// \tparam T2 vector type
+/// \param vec1 the first vector
+/// \param vec2 the second vector
+/// \details VectorXor returns a new vector from vec1 and vec2. The return
+///   vector is the same type as vec1.
+/// \since Crypto++ 6.0
+template <class T1, class T2>
+inline T1 VectorXor(const T1& vec1, const T2& vec2)
+{
+    return (T1)vec_xor(vec1, (T1)vec2);
+}
+
+/// \brief Add two vector
+/// \tparam T1 vector type
+/// \tparam T2 vector type
+/// \param vec1 the first vector
+/// \param vec2 the second vector
+/// \details VectorAdd returns a new vector from vec1 and vec2.
+///   vec2 is cast to the same type as vec1. The return vector
+///   is the same type as vec1.
+/// \since Crypto++ 6.0
+template <class T1, class T2>
+inline T1 VectorAdd(const T1& vec1, const T2& vec2)
+{
+    return (T1)vec_add(vec1, (T1)vec2);
+}
+
+/// \brief Shift two vectors left
+/// \tparam C shift byte count
+/// \tparam T1 vector type
+/// \tparam T2 vector type
+/// \param vec1 the first vector
+/// \param vec2 the second vector
+/// \details VectorShiftLeft() concatenates vec1 and vec2 and returns a
+///   new vector after shifting the concatenation by the specified number
+///   of bytes. Both vec1 and vec2 are cast to uint8x16_p. The return
+///   vector is the same type as vec1.
+/// \details On big endian machines VectorShiftLeft() is <tt>vec_sld(a, b,
+///   c)</tt>. On little endian machines VectorShiftLeft() is translated to
+///   <tt>vec_sld(b, a, 16-c)</tt>. You should always call the function as
+///   if on a big endian machine as shown below.
+/// <pre>
+///    uint8x16_p r0 = {0};
+///    uint8x16_p r1 = VectorLoad(ptr);
+///    uint8x16_p r5 = VectorShiftLeft<12>(r0, r1);
+/// </pre>
+/// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
+///   endian sensitive?</A> on Stack Overflow
+/// \since Crypto++ 6.0
+template <unsigned int C, class T1, class T2>
+inline T1 VectorShiftLeft(const T1& vec1, const T2& vec2)
 {
 #if defined(CRYPTOPP_LITTLE_ENDIAN)
-    const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
-    const uint8x16_p t1 = (uint8x16_p)vec_perm(data, data, mask);
+    return (T1)vec_sld((uint8x16_p)vec2, (uint8x16_p)vec1, 16-C);
 #else
-    const uint8x16_p t1 = (uint8x16_p)data;
+    return (T1)vec_sld((uint8x16_p)vec1, (uint8x16_p)vec2, C);
 #endif
-
-    if (IsAlignedOn(dest, 16))
-    {
-        vec_st(t1, 0,  dest);
-    }
-    else
-    {
-        // http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
-        const uint8x16_p t2 = vec_perm(t1, t1, vec_lvsr(0, dest));
-        vec_ste((uint8x16_p) t2,  0, (unsigned char*) dest);
-        vec_ste((uint16x8_p) t2,  1, (unsigned short*)dest);
-        vec_ste((uint32x4_p) t2,  3, (unsigned int*)  dest);
-        vec_ste((uint32x4_p) t2,  4, (unsigned int*)  dest);
-        vec_ste((uint32x4_p) t2,  8, (unsigned int*)  dest);
-        vec_ste((uint32x4_p) t2, 12, (unsigned int*)  dest);
-        vec_ste((uint16x8_p) t2, 14, (unsigned short*)dest);
-        vec_ste((uint8x16_p) t2, 15, (unsigned char*) dest);
-    }
 }
 
-inline uint32x4_p VectorXor(const uint32x4_p vec1, const uint32x4_p vec2)
-{
-    return vec_xor(vec1, vec2);
-}
-
-inline uint32x4_p VectorAdd(const uint32x4_p vec1, const uint32x4_p vec2)
-{
-    return vec_add(vec1, vec2);
-}
-
-#endif
+#endif  // POWER4 and above
 
+// POWER7/POWER4 load and store
 #if defined(CRYPTOPP_POWER7_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
 
 /// \brief Reverse a 16-byte array
@@ -124,19 +172,6 @@ inline void ReverseByteArrayLE(byte src[16])
 #endif
 }
 
-/// \brief Reverse a vector
-/// \tparam T vector type
-/// \param src the vector
-/// \details Reverse() endian swaps the bytes in a vector
-/// \sa Reverse(), VectorLoadBE(), VectorLoad(), VectorLoadKey()
-/// \since Crypto++ 6.0
-template <class T>
-inline T Reverse(const T& src)
-{
-    const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
-    return vec_perm(src, src, mask);
-}
-
 /// \brief Loads a vector from a byte array
 /// \param src the byte array
 /// \details Loads a vector in big endian format from a byte array.
@@ -346,86 +381,65 @@ inline void VectorStore(const T& src, int off, byte dest[16])
 #endif
 }
 
-/// \brief Permutes two vectors
-/// \tparam T1 vector type
-/// \tparam T2 vector type
-/// \param vec1 the first vector
-/// \param vec2 the second vector
-/// \param mask vector mask
-/// \details VectorPermute returns a new vector from vec1 and vec2
-///   based on mask. mask is an uint8x16_p type vector. The return
-///   vector is the same type as vec1.
-/// \since Crypto++ 6.0
-template <class T1, class T2>
-inline T1 VectorPermute(const T1& vec1, const T1& vec2, const T2& mask)
-{
-    return (T1)vec_perm(vec1, vec2, (uint8x16_p)mask);
-}
+#else  // not CRYPTOPP_POWER7_AVAILABLE
 
-/// \brief XOR two vectors
-/// \tparam T1 vector type
-/// \tparam T2 vector type
-/// \param vec1 the first vector
-/// \param vec2 the second vector
-/// \details VectorXor returns a new vector from vec1 and vec2. The return
-///   vector is the same type as vec1.
-/// \since Crypto++ 6.0
-template <class T1, class T2>
-inline T1 VectorXor(const T1& vec1, const T2& vec2)
+// POWER7 is not available. Slow Altivec loads and stores.
+inline uint32x4_p VectorLoad(const byte src[16])
 {
-    return (T1)vec_xor(vec1, (T1)vec2);
-}
+    uint8x16_p data;
+    if (IsAlignedOn(src, 16))
+    {
+        data = vec_ld(0, src);
+    }
+    else
+    {
+        // http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
+        const uint8x16_p perm = vec_lvsl(0, src);
+        const uint8x16_p low = vec_ld(0, src);
+        const uint8x16_p high = vec_ld(15, src);
+        data = vec_perm(low, high, perm);
+    }
 
-/// \brief Add two vector
-/// \tparam T1 vector type
-/// \tparam T2 vector type
-/// \param vec1 the first vector
-/// \param vec2 the second vector
-/// \details VectorAdd returns a new vector from vec1 and vec2.
-///   vec2 is cast to the same type as vec1. The return vector
-///   is the same type as vec1.
-/// \since Crypto++ 6.0
-template <class T1, class T2>
-inline T1 VectorAdd(const T1& vec1, const T2& vec2)
-{
-    return (T1)vec_add(vec1, (T1)vec2);
-}
-
-/// \brief Shift two vectors left
-/// \tparam C shift byte count
-/// \tparam T1 vector type
-/// \tparam T2 vector type
-/// \param vec1 the first vector
-/// \param vec2 the second vector
-/// \details VectorShiftLeft() concatenates vec1 and vec2 and returns a
-///   new vector after shifting the concatenation by the specified number
-///   of bytes. Both vec1 and vec2 are cast to uint8x16_p. The return
-///   vector is the same type as vec1.
-/// \details On big endian machines VectorShiftLeft() is <tt>vec_sld(a, b,
-///   c)</tt>. On little endian machines VectorShiftLeft() is translated to
-///   <tt>vec_sld(b, a, 16-c)</tt>. You should always call the function as
-///   if on a big endian machine as shown below.
-/// <pre>
-///    uint8x16_p r0 = {0};
-///    uint8x16_p r1 = VectorLoad(ptr);
-///    uint8x16_p r5 = VectorShiftLeft<12>(r0, r1);
-/// </pre>
-/// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
-///   endian sensitive?</A> on Stack Overflow
-/// \since Crypto++ 6.0
-template <unsigned int C, class T1, class T2>
-inline T1 VectorShiftLeft(const T1& vec1, const T2& vec2)
-{
-#if defined(CRYPTOPP_LITTLE_ENDIAN)
-    return (T1)vec_sld((uint8x16_p)vec2, (uint8x16_p)vec1, 16-C);
+#if defined(CRYPTOPP_BIG_ENDIAN)
+    return (uint32x4_p)data;
 #else
-    return (T1)vec_sld((uint8x16_p)vec1, (uint8x16_p)vec2, C);
+    const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
+    return (uint32x4_p)vec_perm(data, data, mask);
 #endif
 }
 
-#endif  // CRYPTOPP_POWER7_AVAILABLE
+inline void VectorStore(const uint32x4_p data, byte dest[16])
+{
+#if defined(CRYPTOPP_LITTLE_ENDIAN)
+    const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
+    const uint8x16_p t1 = (uint8x16_p)vec_perm(data, data, mask);
+#else
+    const uint8x16_p t1 = (uint8x16_p)data;
+#endif
 
-#if defined(CRYPTOPP_POWER8_AES_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
+    if (IsAlignedOn(dest, 16))
+    {
+        vec_st(t1, 0,  dest);
+    }
+    else
+    {
+        // http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
+        const uint8x16_p t2 = vec_perm(t1, t1, vec_lvsr(0, dest));
+        vec_ste((uint8x16_p) t2,  0, (unsigned char*) dest);
+        vec_ste((uint16x8_p) t2,  1, (unsigned short*)dest);
+        vec_ste((uint32x4_p) t2,  3, (unsigned int*)  dest);
+        vec_ste((uint32x4_p) t2,  4, (unsigned int*)  dest);
+        vec_ste((uint32x4_p) t2,  8, (unsigned int*)  dest);
+        vec_ste((uint32x4_p) t2, 12, (unsigned int*)  dest);
+        vec_ste((uint16x8_p) t2, 14, (unsigned short*)dest);
+        vec_ste((uint8x16_p) t2, 15, (unsigned char*) dest);
+    }
+}
+
+#endif  // POWER4/POWER7 load and store
+
+// POWER8 crypto
+#if defined(CRYPTOPP_POWER8_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
 
 /// \brief One round of AES encryption
 /// \tparam T1 vector type
@@ -507,9 +521,9 @@ inline T1 VectorDecryptLast(const T1& state, const T2& key)
 #endif
 }
 
-#endif  // CRYPTOPP_POWER8_AES_AVAILABLE
+#endif  // POWER8 crypto
 
-#if defined(CRYPTOPP_POWER8_SHA_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
+#if defined(CRYPTOPP_POWER8_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
 
 /// \brief SHA256 Sigma functions
 /// \tparam func function
@@ -551,7 +565,7 @@ inline T VectorSHA512(const T& vec)
 #endif
 }
 
-#endif  // CRYPTOPP_POWER8_SHA_AVAILABLE
+#endif  // POWER8 crypto
 
 NAMESPACE_END
 
diff --git a/rijndael-simd.cpp b/rijndael-simd.cpp
index 6abfe3e5..8b98c1ce 100644
--- a/rijndael-simd.cpp
+++ b/rijndael-simd.cpp
@@ -25,13 +25,6 @@
 #include "misc.h"
 #include "adv-simd.h"
 
-// We set CRYPTOPP_POWER8_CRYPTO_AVAILABLE based on compiler version.
-// If the crypto is not available, then we have to disable it here.
-#if !(defined(__CRYPTO) || defined(_ARCH_PWR8) || defined(_ARCH_PWR9))
-# undef CRYPTOPP_POWER8_CRYPTO_AVAILABLE
-# undef CRYPTOPP_POWER8_AES_AVAILABLE
-#endif
-
 #if (CRYPTOPP_AESNI_AVAILABLE)
 # include <smmintrin.h>
 # include <wmmintrin.h>
@@ -68,6 +61,8 @@ extern const char RIJNDAEL_SIMD_FNAME[] = __FILE__;
 
 NAMESPACE_BEGIN(CryptoPP)
 
+// ************************* Feature Probes ************************* //
+
 #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
 extern "C" {
     typedef void (*SigHandler)(int);
@@ -142,6 +137,155 @@ bool CPU_ProbeAES()
 }
 #endif  // ARM32 or ARM64
 
+#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64)
+	bool CPU_ProbePower7()
+{
+#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
+    return false;
+#elif (CRYPTOPP_POWER7_AVAILABLE) || (CRYPTOPP_POWER8_AVAILABLE)
+# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
+
+    // longjmp and clobber warnings. Volatile is required.
+    // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
+    volatile int result = false;
+
+    volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
+    if (oldHandler == SIG_ERR)
+        return false;
+
+    volatile sigset_t oldMask;
+    if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
+        return false;
+
+    if (setjmp(s_jmpSIGILL))
+        result = false;
+    else
+    {
+        // POWER7 added unaligned loads and store operations
+        byte b1[19] = {255, 255, 255, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, b2[17];
+
+        // Specifically call the VSX loads and stores
+        #if defined(__xlc__) || defined(__xlC__)
+        vec_xst(vec_xl(0, b1+3), 0, b2+1);
+        #else
+        vec_vsx_st(vec_vsx_ld(0, b1+3), 0, b2+1);
+        #endif
+
+        result = (0 == std::memcmp(b1+3, b2+1, 16));
+    }
+
+    sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
+    signal(SIGILL, oldHandler);
+    return result;
+# endif
+#else
+    return false;
+#endif  // CRYPTOPP_POWER7_AVAILABLE
+}
+
+bool CPU_ProbePower8()
+{
+#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
+    return false;
+#elif (CRYPTOPP_POWER8_AVAILABLE)
+# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
+
+    // longjmp and clobber warnings. Volatile is required.
+    // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
+    volatile int result = true;
+
+    volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
+    if (oldHandler == SIG_ERR)
+        return false;
+
+    volatile sigset_t oldMask;
+    if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
+        return false;
+
+    if (setjmp(s_jmpSIGILL))
+        result = false;
+    else
+    {
+        // POWER8 added 64-bit SIMD operations
+        const word64 x = W64LIT(0xffffffffffffffff);
+        word64 w1[2] = {x, x}, w2[2] = {4, 6}, w3[2];
+
+        // Specifically call the VSX loads and stores
+        #if defined(__xlc__) || defined(__xlC__)
+        const uint64x2_p v1 = (uint64x2_p)vec_xl(0, (byte*)w1);
+        const uint64x2_p v2 = (uint64x2_p)vec_xl(0, (byte*)w2);
+        const uint64x2_p v3 = vec_add(v1, v2);  // 64-bit add
+        vec_xst((uint8x16_p)v3, 0, (byte*)w3);
+        #else
+        const uint64x2_p v1 = (uint64x2_p)vec_vsx_ld(0, (byte*)w1);
+        const uint64x2_p v2 = (uint64x2_p)vec_vsx_ld(0, (byte*)w2);
+        const uint64x2_p v3 = vec_add(v1, v2);  // 64-bit add
+        vec_vsx_st((uint8x16_p)v3, 0, (byte*)w3);
+        #endif
+
+        // Relies on integer wrap
+        result = (w3[0] == 3 && w3[1] == 5);
+    }
+
+    sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
+    signal(SIGILL, oldHandler);
+    return result;
+# endif
+#else
+    return false;
+#endif  // CRYPTOPP_POWER8_AVAILABLE
+}
+
+bool CPU_ProbeAES()
+{
+#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
+    return false;
+#elif (CRYPTOPP_POWER8_AES_AVAILABLE)
+# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
+
+    // longjmp and clobber warnings. Volatile is required.
+    // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
+    volatile int result = true;
+
+    volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
+    if (oldHandler == SIG_ERR)
+        return false;
+
+    volatile sigset_t oldMask;
+    if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
+        return false;
+
+    if (setjmp(s_jmpSIGILL))
+        result = false;
+    else
+    {
+        byte key[16] = {0xA0, 0xFA, 0xFE, 0x17, 0x88, 0x54, 0x2c, 0xb1,
+                        0x23, 0xa3, 0x39, 0x39, 0x2a, 0x6c, 0x76, 0x05};
+        byte state[16] = {0x19, 0x3d, 0xe3, 0xb3, 0xa0, 0xf4, 0xe2, 0x2b,
+                          0x9a, 0xc6, 0x8d, 0x2a, 0xe9, 0xf8, 0x48, 0x08};
+        byte r[16] = {255}, z[16] = {};
+
+        uint8x16_p k = (uint8x16_p)VectorLoad(0, key);
+        uint8x16_p s = (uint8x16_p)VectorLoad(0, state);
+        s = VectorEncrypt(s, k);
+        s = VectorEncryptLast(s, k);
+        s = VectorDecrypt(s, k);
+        s = VectorDecryptLast(s, k);
+        VectorStore(s, r);
+
+        result = (0 != std::memcmp(r, z, 16));
+    }
+
+    sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
+    signal(SIGILL, oldHandler);
+    return result;
+# endif
+#else
+    return false;
+#endif  // CRYPTOPP_POWER8_AES_AVAILABLE
+}
+#endif  // PPC32 or PPC64
+
 // ***************************** ARMv8 ***************************** //
 
 #if (CRYPTOPP_ARM_AES_AVAILABLE)
diff --git a/sha-simd.cpp b/sha-simd.cpp
index 09442279..c5b4b122 100644
--- a/sha-simd.cpp
+++ b/sha-simd.cpp
@@ -185,6 +185,96 @@ bool CPU_ProbeSHA2()
 }
 #endif  // ARM32 or ARM64
 
+#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64)
+bool CPU_ProbeSHA256()
+{
+#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
+    return false;
+#elif (CRYPTOPP_POWER8_AVAILABLE)
+# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
+
+    // longjmp and clobber warnings. Volatile is required.
+    // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
+    volatile int result = false;
+
+    volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
+    if (oldHandler == SIG_ERR)
+        return false;
+
+    volatile sigset_t oldMask;
+    if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
+        return false;
+
+    if (setjmp(s_jmpSIGILL))
+        result = false;
+    else
+    {
+        byte r[16], z[16] = {0};
+        uint8x16_p x = ((uint8x16_p){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0});
+
+        x = VectorSHA256<0,0>(x);
+        x = VectorSHA256<0,1>(x);
+        x = VectorSHA256<1,0>(x);
+        x = VectorSHA256<1,1>(x);
+        VectorStore(x, r);
+
+        result = (0 == std::memcmp(r, z, 16));
+    }
+
+    sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
+    signal(SIGILL, oldHandler);
+    return result;
+# endif
+#else
+    return false;
+#endif  // CRYPTOPP_ALTIVEC_AVAILABLE
+}
+
+bool CPU_ProbeSHA512()
+{
+#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
+    return false;
+#elif (CRYPTOPP_POWER8_AVAILABLE)
+# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
+
+    // longjmp and clobber warnings. Volatile is required.
+    // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
+    volatile int result = false;
+
+    volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
+    if (oldHandler == SIG_ERR)
+        return false;
+
+    volatile sigset_t oldMask;
+    if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
+        return false;
+
+    if (setjmp(s_jmpSIGILL))
+        result = false;
+    else
+    {
+        byte r[16], z[16] = {0};
+        uint8x16_p x = ((uint8x16_p){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0});
+
+        x = VectorSHA512<0,0>(x);
+        x = VectorSHA512<0,1>(x);
+        x = VectorSHA512<1,0>(x);
+        x = VectorSHA512<1,1>(x);
+        VectorStore(x, r);
+
+        result = (0 == std::memcmp(r, z, 16));
+    }
+
+    sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
+    signal(SIGILL, oldHandler);
+    return result;
+# endif
+#else
+    return false;
+#endif  // CRYPTOPP_POWER8_AVAILABLE
+}
+#endif  // PPC32 or PPC64
+
 // ***************** Intel x86 SHA ********************
 
 // provided by sha.cpp, 16-byte aigned