Update comments

pull/703/head
Jeffrey Walton 2018-08-09 18:18:40 -04:00
parent 99ab11d1ed
commit 989c3bfbf2
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
3 changed files with 32 additions and 18 deletions

View File

@ -206,30 +206,45 @@ INLINE uint64x2_p VMULL_00(const uint64x2_p& a, const uint64x2_p& b)
// _mm_clmulepi64_si128(a, b, 0x01)
INLINE uint64x2_p VMULL_01(const uint64x2_p& a, const uint64x2_p& b)
{
// Small speedup. VectorGetHigh(b) ensures the high dword of 'b' is 0.
// The 0 used in the vmull yields 0 for the high product, so the high
// dword of 'a' is "don't care".
#if defined(__xlc__) || defined(__xlC__)
return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetHigh(b)));
// return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetHigh(b)));
return AdjustBE(__vpmsumd (a, VectorGetHigh(b)));
#else
return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetHigh(b)));
// return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetHigh(b)));
return AdjustBE(__builtin_crypto_vpmsumd (a, VectorGetHigh(b)));
#endif
}
// _mm_clmulepi64_si128(a, b, 0x10)
INLINE uint64x2_p VMULL_10(const uint64x2_p& a, const uint64x2_p& b)
{
// Small speedup. VectorGetHigh(a) ensures the high dword of 'a' is 0.
// The 0 used in the vmull yields 0 for the high product, so the high
// dword of 'b' is "don't care".
#if defined(__xlc__) || defined(__xlC__)
return AdjustBE(__vpmsumd (VectorGetHigh(a), VectorGetLow(b)));
// return AdjustBE(__vpmsumd (VectorGetHigh(a), VectorGetLow(b)));
return AdjustBE(__vpmsumd (VectorGetHigh(a), b));
#else
return AdjustBE(__builtin_crypto_vpmsumd (VectorGetHigh(a), VectorGetLow(b)));
// return AdjustBE(__builtin_crypto_vpmsumd (VectorGetHigh(a), VectorGetLow(b)));
return AdjustBE(__builtin_crypto_vpmsumd (VectorGetHigh(a), b));
#endif
}
// _mm_clmulepi64_si128(a, b, 0x11)
INLINE uint64x2_p VMULL_11(const uint64x2_p& a, const uint64x2_p& b)
{
// Small speedup. VectorGetLow(a) ensures the high dword of 'a' is 0.
// The 0 used in the vmull yields 0 for the high product, so the high
// dword of 'b' is "don't care".
#if defined(__xlc__) || defined(__xlC__)
return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetLow(b)));
// return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetLow(b)));
return AdjustBE(__vpmsumd (VectorGetLow(a), b));
#else
return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetLow(b)));
// return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetLow(b)));
return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), b));
#endif
}
#endif // CRYPTOPP_POWER8_VMULL_AVAILABLE
@ -592,7 +607,7 @@ __m128i _mm_clmulepi64_si128(const __m128i &a, const __m128i &b, int i)
}
#endif // Testing
// SunCC 5.11-5.15 compiler crash. Make the function INLINE
// SunCC 5.11-5.15 compiler crash. Make the function inline
// and parameters non-const. Also see GH #188 and GH #224.
inline __m128i GCM_Reduce_CLMUL(__m128i c0, __m128i c1, __m128i c2, const __m128i& r)
{
@ -622,8 +637,8 @@ inline __m128i GCM_Reduce_CLMUL(__m128i c0, __m128i c1, __m128i c2, const __m128
return _mm_xor_si128(c2, c1);
}
// SunCC 5.13-5.14 compiler crash. Don't make the function INLINE.
// This is in contrast to GCM_Reduce_CLMUL, which must be INLINE.
// SunCC 5.13-5.14 compiler crash. Don't make the function inline.
// This is in contrast to GCM_Reduce_CLMUL, which must be inline.
__m128i GCM_Multiply_CLMUL(const __m128i &x, const __m128i &h, const __m128i &r)
{
const __m128i c0 = _mm_clmulepi64_si128(x,h,0);
@ -739,10 +754,9 @@ void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer)
#if CRYPTOPP_ALTIVEC_AVAILABLE
void GCM_Xor16_ALTIVEC(byte *a, const byte *b, const byte *c)
{
// *UINT64X2_CAST(a) = veorq_u64(*CONST_UINT64X2_CAST(b), *CONST_UINT64X2_CAST(c));
VectorStore(VectorXor(VectorLoad(b), VectorLoad(c)), a);
}
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
#endif // CRYPTOPP_ALTIVEC_AVAILABLE
#if CRYPTOPP_POWER8_VMULL_AVAILABLE

View File

@ -331,7 +331,7 @@ inline T VectorRotateRight(const T& vec)
/// \returns vector created from low dword
/// \details VectorGetLow() extracts the low dword from a vector. The low dword
/// is composed of the least significant bits and occupies bytes 8 through 15
/// when viewed as a big endian array. The returned array is the same type as
/// when viewed as a big endian array. The return vector is the same type as
/// the original vector and padded with 0's in the most significant bit positions.
template <class T>
inline T VectorGetLow(const T& val)
@ -344,17 +344,18 @@ inline T VectorGetLow(const T& val)
/// \brief Extract a dword from a vector
/// \tparam T vector type
/// \param val the vector
/// \returns vector created from low dword
/// \returns vector created from high dword
/// \details VectorGetHigh() extracts the high dword from a vector. The high dword
/// is composed of the most significant bits and occupies bytes 0 through 7
/// when viewed as a big endian array. The returned array is the same type as
/// when viewed as a big endian array. The return vector is the same type as
/// the original vector and padded with 0's in the most significant bit positions.
template <class T>
inline T VectorGetHigh(const T& val)
{
const T zero = {0};
const uint8x16_p mask = {16,16,16,16, 16,16,16,16, 0,1,2,3, 4,5,6,7 };
return (T)vec_perm(val, zero, mask);
//const T zero = {0};
//const uint8x16_p mask = {16,16,16,16, 16,16,16,16, 0,1,2,3, 4,5,6,7 };
//return (T)vec_perm(val, zero, mask);
return VectorShiftRight<8>(val);
}
/// \brief Compare two vectors

View File

@ -993,7 +993,6 @@ bool Validate(int alg, bool thorough, const char *seedInput)
case 9994: result = TestHuffmanCodes(); break;
// http://github.com/weidai11/cryptopp/issues/346
case 9993: result = TestASN1Parse(); break;
# if defined(CRYPTOPP_ALTIVEC_AVAILABLE)
case 9992: result = TestAltivecOps(); break;
# endif