From 397ccd7e49de949bcf3451254c6b9599cabe6445 Mon Sep 17 00:00:00 2001
From: Jeffrey Walton <noloader@gmail.com>
Date: Wed, 13 Sep 2017 03:59:25 -0400
Subject: [PATCH] remove commented code for Power8

---
 rijndael-simd.cpp | 54 ++++++++++++++++++++---------------------------
 1 file changed, 23 insertions(+), 31 deletions(-)
diff --git a/rijndael-simd.cpp b/rijndael-simd.cpp
index 229d3b6b..b9f272ee 100644
--- a/rijndael-simd.cpp
+++ b/rijndael-simd.cpp
@@ -939,16 +939,18 @@ inline VectorType VectorLoad(int off, const byte src[16])
 
 // Loads an aligned byte array, does not perform an endian conversion.
 //  This function presumes the subkey table is correct endianess.
-inline VectorType VectorLoadKey(const byte vec[16])
+inline VectorType VectorLoadKey(const byte src[16])
 {
-	return (VectorType)vec_ld(0, vec);
+	CRYPTOPP_ASSERT(IsAlignedOn(src, 16));
+	return (VectorType)vec_ld(0, src);
 }
 
 // Loads an aligned byte array, does not perform an endian conversion.
 //  This function presumes the subkey table is correct endianess.
-inline VectorType VectorLoadKey(int off, const byte vec[16])
+inline VectorType VectorLoadKey(int off, const byte src[16])
 {
-	return (VectorType)vec_ld(off, vec);
+	CRYPTOPP_ASSERT(IsAlignedOn(src, 16));
+	return (VectorType)vec_ld(off, src);
 }
 
 // Stores to a mis-aligned byte array, performs an endian conversion.
@@ -1150,7 +1152,7 @@ size_t Rijndael_AdvancedProcessBlocks_POWER8(F1 func1, F4 func4, const word32 *s
 			if (flags & BlockTransformation::BT_InBlockIsCounter)
 			{
 #if defined(IS_LITTLE_ENDIAN)
-				const VectorType one = {1};
+				const VectorType one = (VectorType)((uint64x2_p8){1,0});
 #else
 				const VectorType one = (VectorType)((uint64x2_p8){0,1});
 #endif
@@ -1162,45 +1164,35 @@ size_t Rijndael_AdvancedProcessBlocks_POWER8(F1 func1, F4 func4, const word32 *s
 			}
 			else
 			{
-				//inBlocks += inIncrement;
-				block1 = VectorLoad(1*inIncrement, inBlocks);
-				//inBlocks += inIncrement;
-				block2 = VectorLoad(2*inIncrement, inBlocks);
-				//inBlocks += inIncrement;
-				block3 = VectorLoad(3*inIncrement, inBlocks);
-				//inBlocks += inIncrement;
+				const int inc = static_cast<int>(inIncrement);
+				block1 = VectorLoad(1*inc, inBlocks);
+				block2 = VectorLoad(2*inc, inBlocks);
+				block3 = VectorLoad(3*inc, inBlocks);
 				inBlocks += 4*inIncrement;
 			}
 
 			if (flags & BlockTransformation::BT_XorInput)
 			{
-				block0 = VectorXor(block0, VectorLoad(0*xorIncrement, xorBlocks));
-				//xorBlocks += xorIncrement;
-				block1 = VectorXor(block1, VectorLoad(1*xorIncrement, xorBlocks));
-				//xorBlocks += xorIncrement;
-				block2 = VectorXor(block2, VectorLoad(2*xorIncrement, xorBlocks));
-				//xorBlocks += xorIncrement;
-				block3 = VectorXor(block3, VectorLoad(3*xorIncrement, xorBlocks));
-				//xorBlocks += xorIncrement;
-				xorBlocks += 4*xorIncrement;
+				const int inc = static_cast<int>(xorIncrement);
+				block0 = VectorXor(block0, VectorLoad(0*inc, xorBlocks));
+				block1 = VectorXor(block1, VectorLoad(1*inc, xorBlocks));
+				block2 = VectorXor(block2, VectorLoad(2*inc, xorBlocks));
+				block3 = VectorXor(block3, VectorLoad(3*inc, xorBlocks));
+				xorBlocks += 4*inc;
 			}
 
 			func4(block0, block1, block2, block3, subKeys, rounds);
 
 			if (xorBlocks && !(flags & BlockTransformation::BT_XorInput))
 			{
-				block0 = VectorXor(block0, VectorLoad(0*xorIncrement, xorBlocks));
-				//xorBlocks += xorIncrement;
-				block1 = VectorXor(block1, VectorLoad(1*xorIncrement, xorBlocks));
-				//xorBlocks += xorIncrement;
-				block2 = VectorXor(block2, VectorLoad(2*xorIncrement, xorBlocks));
-				//xorBlocks += xorIncrement;
-				block3 = VectorXor(block3, VectorLoad(3*xorIncrement, xorBlocks));
-				//xorBlocks += xorIncrement;
-				xorBlocks += 4*xorIncrement;
+				const int inc = static_cast<int>(xorIncrement);
+				block0 = VectorXor(block0, VectorLoad(0*inc, xorBlocks));
+				block1 = VectorXor(block1, VectorLoad(1*inc, xorBlocks));
+				block2 = VectorXor(block2, VectorLoad(2*inc, xorBlocks));
+				block3 = VectorXor(block3, VectorLoad(3*inc, xorBlocks));
+				xorBlocks += 4*inc;
 			}
 
-			// I can't get Store to run faster using indexed offsets
 			VectorStore(block0, outBlocks);
 			outBlocks += outIncrement;
 			VectorStore(block1, outBlocks);