Update comments

2018-08-09 18:18:40 -04:00 · 2018-08-09 18:18:40 -04:00 · 989c3bfbf2
parent 99ab11d1ed
commit 989c3bfbf2
3 changed files with 32 additions and 18 deletions
--- a/gcm-simd.cpp
+++ b/gcm-simd.cpp
@ -206,30 +206,45 @@ INLINE uint64x2_p VMULL_00(const uint64x2_p& a, const uint64x2_p& b)
 // _mm_clmulepi64_si128(a, b, 0x01)
 INLINE uint64x2_p VMULL_01(const uint64x2_p& a, const uint64x2_p& b)
 {
+    // Small speedup. VectorGetHigh(b) ensures the high dword of 'b' is 0.
+    // The 0 used in the vmull yields 0 for the high product, so the high
+    // dword of 'a' is "don't care".
 #if defined(__xlc__) || defined(__xlC__)
-    return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetHigh(b)));
+    // return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetHigh(b)));
+    return AdjustBE(__vpmsumd (a, VectorGetHigh(b)));
 #else
-    return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetHigh(b)));
+    // return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetHigh(b)));
+    return AdjustBE(__builtin_crypto_vpmsumd (a, VectorGetHigh(b)));
 #endif
 }

 // _mm_clmulepi64_si128(a, b, 0x10)
 INLINE uint64x2_p VMULL_10(const uint64x2_p& a, const uint64x2_p& b)
 {
+    // Small speedup. VectorGetHigh(a) ensures the high dword of 'a' is 0.
+    // The 0 used in the vmull yields 0 for the high product, so the high
+    // dword of 'b' is "don't care".
 #if defined(__xlc__) || defined(__xlC__)
-    return AdjustBE(__vpmsumd (VectorGetHigh(a), VectorGetLow(b)));
+    // return AdjustBE(__vpmsumd (VectorGetHigh(a), VectorGetLow(b)));
+    return AdjustBE(__vpmsumd (VectorGetHigh(a), b));
 #else
-    return AdjustBE(__builtin_crypto_vpmsumd (VectorGetHigh(a), VectorGetLow(b)));
+    // return AdjustBE(__builtin_crypto_vpmsumd (VectorGetHigh(a), VectorGetLow(b)));
+    return AdjustBE(__builtin_crypto_vpmsumd (VectorGetHigh(a), b));
 #endif
 }

 // _mm_clmulepi64_si128(a, b, 0x11)
 INLINE uint64x2_p VMULL_11(const uint64x2_p& a, const uint64x2_p& b)
 {
+    // Small speedup. VectorGetLow(a) ensures the high dword of 'a' is 0.
+    // The 0 used in the vmull yields 0 for the high product, so the high
+    // dword of 'b' is "don't care".
 #if defined(__xlc__) || defined(__xlC__)
-    return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetLow(b)));
+    // return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetLow(b)));
+    return AdjustBE(__vpmsumd (VectorGetLow(a), b));
 #else
-    return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetLow(b)));
+    // return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetLow(b)));
+    return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), b));
 #endif
 }
 #endif // CRYPTOPP_POWER8_VMULL_AVAILABLE
@ -592,7 +607,7 @@ __m128i _mm_clmulepi64_si128(const __m128i &a, const __m128i &b, int i)
 }
 #endif  // Testing

-// SunCC 5.11-5.15 compiler crash. Make the function INLINE
+// SunCC 5.11-5.15 compiler crash. Make the function inline
 // and parameters non-const. Also see GH #188 and GH #224.
 inline __m128i GCM_Reduce_CLMUL(__m128i c0, __m128i c1, __m128i c2, const __m128i& r)
 {
@ -622,8 +637,8 @@ inline __m128i GCM_Reduce_CLMUL(__m128i c0, __m128i c1, __m128i c2, const __m128
    return _mm_xor_si128(c2, c1);
 }

-// SunCC 5.13-5.14 compiler crash. Don't make the function INLINE.
-// This is in contrast to GCM_Reduce_CLMUL, which must be INLINE.
+// SunCC 5.13-5.14 compiler crash. Don't make the function inline.
+// This is in contrast to GCM_Reduce_CLMUL, which must be inline.
 __m128i GCM_Multiply_CLMUL(const __m128i &x, const __m128i &h, const __m128i &r)
 {
    const __m128i c0 = _mm_clmulepi64_si128(x,h,0);
@ -739,10 +754,9 @@ void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer)
 #if CRYPTOPP_ALTIVEC_AVAILABLE
 void GCM_Xor16_ALTIVEC(byte *a, const byte *b, const byte *c)
 {
-    // *UINT64X2_CAST(a) = veorq_u64(*CONST_UINT64X2_CAST(b), *CONST_UINT64X2_CAST(c));
    VectorStore(VectorXor(VectorLoad(b), VectorLoad(c)), a);
 }
-#endif  // CRYPTOPP_ARM_NEON_AVAILABLE
+#endif  // CRYPTOPP_ALTIVEC_AVAILABLE

 #if CRYPTOPP_POWER8_VMULL_AVAILABLE

--- a/ppc-simd.h
+++ b/ppc-simd.h
@ -331,7 +331,7 @@ inline T VectorRotateRight(const T& vec)
 /// \returns vector created from low dword
 /// \details VectorGetLow() extracts the low dword from a vector. The low dword
 ///   is composed of the least significant bits and occupies bytes 8 through 15
-///   when viewed as a big endian array. The returned array is the same type as
+///   when viewed as a big endian array. The return vector is the same type as
 ///   the original vector and padded with 0's in the most significant bit positions.
 template <class T>
 inline T VectorGetLow(const T& val)
@ -344,17 +344,18 @@ inline T VectorGetLow(const T& val)
 /// \brief Extract a dword from a vector
 /// \tparam T vector type
 /// \param val the vector
-/// \returns vector created from low dword
+/// \returns vector created from high dword
 /// \details VectorGetHigh() extracts the high dword from a vector. The high dword
 ///   is composed of the most significant bits and occupies bytes 0 through 7
-///   when viewed as a big endian array. The returned array is the same type as
+///   when viewed as a big endian array. The return vector is the same type as
 ///   the original vector and padded with 0's in the most significant bit positions.
 template <class T>
 inline T VectorGetHigh(const T& val)
 {
-    const T zero = {0};
-    const uint8x16_p mask = {16,16,16,16, 16,16,16,16, 0,1,2,3, 4,5,6,7 };
-    return (T)vec_perm(val, zero, mask);
+    //const T zero = {0};
+    //const uint8x16_p mask = {16,16,16,16, 16,16,16,16, 0,1,2,3, 4,5,6,7 };
+    //return (T)vec_perm(val, zero, mask);
+    return VectorShiftRight<8>(val);
 }

 /// \brief Compare two vectors
--- a/test.cpp
+++ b/test.cpp
@ -993,7 +993,6 @@ bool Validate(int alg, bool thorough, const char *seedInput)
 	case 9994: result = TestHuffmanCodes(); break;
 	// http://github.com/weidai11/cryptopp/issues/346
 	case 9993: result = TestASN1Parse(); break;
-
 # if defined(CRYPTOPP_ALTIVEC_AVAILABLE)
 	case 9992: result = TestAltivecOps(); break;
 # endif