diff --git a/ppc-simd.cpp b/ppc-simd.cpp
index c6859d09..a0dcbb47 100644
--- a/ppc-simd.cpp
+++ b/ppc-simd.cpp
@@ -59,7 +59,7 @@ bool CPU_ProbeAltivec()
 {
 #if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
     return false;
-#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
+#elif (CRYPTOPP_ALTIVEC_AVAILABLE) || (CRYPTOPP_POWER7_AVAILABLE) || (CRYPTOPP_POWER8_AVAILABLE)
 # if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
 
     // longjmp and clobber warnings. Volatile is required.
@@ -78,14 +78,17 @@ bool CPU_ProbeAltivec()
         result = false;
     else
     {
+        CRYPTOPP_ALIGN_DATA(16)
         const byte b1[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+        CRYPTOPP_ALIGN_DATA(16)
         const byte b2[16] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
-        byte b3[16];
+        CRYPTOPP_ALIGN_DATA(16) byte b3[16];
 
-        const uint8x16_p v1 = (uint8x16_p)VectorLoad(0, b1);
-        const uint8x16_p v2 = (uint8x16_p)VectorLoad(0, b2);
-        const uint8x16_p v3 = (uint8x16_p)VectorXor(v1, v2);
-        VectorStore(v3, b3);
+        // Specifically call the Altivec loads and stores
+        const uint8x16_p v1 = (uint8x16_p)vec_ld(0, (byte*)b1);
+        const uint8x16_p v2 = (uint8x16_p)vec_ld(0, (byte*)b2);
+        const uint8x16_p v3 = (uint8x16_p)vec_xor(v1, v2);
+        vec_st(v3, 0, b3);
 
         result = (0 == std::memcmp(b2, b3, 16));
     }
@@ -103,7 +106,7 @@ bool CPU_ProbePower7()
 {
 #if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
     return false;
-#elif (CRYPTOPP_POWER7_AVAILABLE)
+#elif (CRYPTOPP_POWER7_AVAILABLE) || (CRYPTOPP_POWER8_AVAILABLE)
 # if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
 
     // longjmp and clobber warnings. Volatile is required.
@@ -125,7 +128,6 @@ bool CPU_ProbePower7()
         byte b1[19] = {255, 255, 255, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, b2[17];
         const uint8x16_p v1 = (uint8x16_p)VectorLoad(0, b1+3);
         VectorStore(v1, b2+1);
-
         result = (0 == std::memcmp(b1+3, b2+1, 16));
     }
 
@@ -164,12 +166,12 @@ bool CPU_ProbePower8()
         // POWER8 added 64-bit SIMD operations
         const word64 m = W64LIT(0xffffffffffffffff);
         word64 w1[2] = {m, m}, w2[2] = {3, 4}, w3[2];
-        const uint64x2_p v1 = (uint64x2_p)VectorLoad(0, w1);
-        const uint64x2_p v2 = (uint64x2_p)VectorLoad(0, w2);
-        VectorStore(VectorAdd(v1, v2), w3);
+        const uint64x2_p v1 = (uint64x2_p)VectorLoad(0, (byte*)w1);
+        const uint64x2_p v2 = (uint64x2_p)VectorLoad(0, (byte*)w2);
 
-        // The 64-bit add will overflow.
-        result = (w3[0] == 2 && w3[1] == 3);
+        word64 w[2];
+        VectorStore(VectorAdd(v1, v2), (byte*)w);
+        result = (w[0] == 2 && w[1] == 3);
     }
 
     sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);