Update comments
parent
99ab11d1ed
commit
989c3bfbf2
36
gcm-simd.cpp
36
gcm-simd.cpp
|
|
@ -206,30 +206,45 @@ INLINE uint64x2_p VMULL_00(const uint64x2_p& a, const uint64x2_p& b)
|
||||||
// _mm_clmulepi64_si128(a, b, 0x01)
|
// _mm_clmulepi64_si128(a, b, 0x01)
|
||||||
INLINE uint64x2_p VMULL_01(const uint64x2_p& a, const uint64x2_p& b)
|
INLINE uint64x2_p VMULL_01(const uint64x2_p& a, const uint64x2_p& b)
|
||||||
{
|
{
|
||||||
|
// Small speedup. VectorGetHigh(b) ensures the high dword of 'b' is 0.
|
||||||
|
// The 0 used in the vmull yields 0 for the high product, so the high
|
||||||
|
// dword of 'a' is "don't care".
|
||||||
#if defined(__xlc__) || defined(__xlC__)
|
#if defined(__xlc__) || defined(__xlC__)
|
||||||
return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetHigh(b)));
|
// return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetHigh(b)));
|
||||||
|
return AdjustBE(__vpmsumd (a, VectorGetHigh(b)));
|
||||||
#else
|
#else
|
||||||
return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetHigh(b)));
|
// return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetHigh(b)));
|
||||||
|
return AdjustBE(__builtin_crypto_vpmsumd (a, VectorGetHigh(b)));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// _mm_clmulepi64_si128(a, b, 0x10)
|
// _mm_clmulepi64_si128(a, b, 0x10)
|
||||||
INLINE uint64x2_p VMULL_10(const uint64x2_p& a, const uint64x2_p& b)
|
INLINE uint64x2_p VMULL_10(const uint64x2_p& a, const uint64x2_p& b)
|
||||||
{
|
{
|
||||||
|
// Small speedup. VectorGetHigh(a) ensures the high dword of 'a' is 0.
|
||||||
|
// The 0 used in the vmull yields 0 for the high product, so the high
|
||||||
|
// dword of 'b' is "don't care".
|
||||||
#if defined(__xlc__) || defined(__xlC__)
|
#if defined(__xlc__) || defined(__xlC__)
|
||||||
return AdjustBE(__vpmsumd (VectorGetHigh(a), VectorGetLow(b)));
|
// return AdjustBE(__vpmsumd (VectorGetHigh(a), VectorGetLow(b)));
|
||||||
|
return AdjustBE(__vpmsumd (VectorGetHigh(a), b));
|
||||||
#else
|
#else
|
||||||
return AdjustBE(__builtin_crypto_vpmsumd (VectorGetHigh(a), VectorGetLow(b)));
|
// return AdjustBE(__builtin_crypto_vpmsumd (VectorGetHigh(a), VectorGetLow(b)));
|
||||||
|
return AdjustBE(__builtin_crypto_vpmsumd (VectorGetHigh(a), b));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// _mm_clmulepi64_si128(a, b, 0x11)
|
// _mm_clmulepi64_si128(a, b, 0x11)
|
||||||
INLINE uint64x2_p VMULL_11(const uint64x2_p& a, const uint64x2_p& b)
|
INLINE uint64x2_p VMULL_11(const uint64x2_p& a, const uint64x2_p& b)
|
||||||
{
|
{
|
||||||
|
// Small speedup. VectorGetLow(a) ensures the high dword of 'a' is 0.
|
||||||
|
// The 0 used in the vmull yields 0 for the high product, so the high
|
||||||
|
// dword of 'b' is "don't care".
|
||||||
#if defined(__xlc__) || defined(__xlC__)
|
#if defined(__xlc__) || defined(__xlC__)
|
||||||
return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetLow(b)));
|
// return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetLow(b)));
|
||||||
|
return AdjustBE(__vpmsumd (VectorGetLow(a), b));
|
||||||
#else
|
#else
|
||||||
return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetLow(b)));
|
// return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetLow(b)));
|
||||||
|
return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), b));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif // CRYPTOPP_POWER8_VMULL_AVAILABLE
|
#endif // CRYPTOPP_POWER8_VMULL_AVAILABLE
|
||||||
|
|
@ -592,7 +607,7 @@ __m128i _mm_clmulepi64_si128(const __m128i &a, const __m128i &b, int i)
|
||||||
}
|
}
|
||||||
#endif // Testing
|
#endif // Testing
|
||||||
|
|
||||||
// SunCC 5.11-5.15 compiler crash. Make the function INLINE
|
// SunCC 5.11-5.15 compiler crash. Make the function inline
|
||||||
// and parameters non-const. Also see GH #188 and GH #224.
|
// and parameters non-const. Also see GH #188 and GH #224.
|
||||||
inline __m128i GCM_Reduce_CLMUL(__m128i c0, __m128i c1, __m128i c2, const __m128i& r)
|
inline __m128i GCM_Reduce_CLMUL(__m128i c0, __m128i c1, __m128i c2, const __m128i& r)
|
||||||
{
|
{
|
||||||
|
|
@ -622,8 +637,8 @@ inline __m128i GCM_Reduce_CLMUL(__m128i c0, __m128i c1, __m128i c2, const __m128
|
||||||
return _mm_xor_si128(c2, c1);
|
return _mm_xor_si128(c2, c1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// SunCC 5.13-5.14 compiler crash. Don't make the function INLINE.
|
// SunCC 5.13-5.14 compiler crash. Don't make the function inline.
|
||||||
// This is in contrast to GCM_Reduce_CLMUL, which must be INLINE.
|
// This is in contrast to GCM_Reduce_CLMUL, which must be inline.
|
||||||
__m128i GCM_Multiply_CLMUL(const __m128i &x, const __m128i &h, const __m128i &r)
|
__m128i GCM_Multiply_CLMUL(const __m128i &x, const __m128i &h, const __m128i &r)
|
||||||
{
|
{
|
||||||
const __m128i c0 = _mm_clmulepi64_si128(x,h,0);
|
const __m128i c0 = _mm_clmulepi64_si128(x,h,0);
|
||||||
|
|
@ -739,10 +754,9 @@ void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer)
|
||||||
#if CRYPTOPP_ALTIVEC_AVAILABLE
|
#if CRYPTOPP_ALTIVEC_AVAILABLE
|
||||||
void GCM_Xor16_ALTIVEC(byte *a, const byte *b, const byte *c)
|
void GCM_Xor16_ALTIVEC(byte *a, const byte *b, const byte *c)
|
||||||
{
|
{
|
||||||
// *UINT64X2_CAST(a) = veorq_u64(*CONST_UINT64X2_CAST(b), *CONST_UINT64X2_CAST(c));
|
|
||||||
VectorStore(VectorXor(VectorLoad(b), VectorLoad(c)), a);
|
VectorStore(VectorXor(VectorLoad(b), VectorLoad(c)), a);
|
||||||
}
|
}
|
||||||
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
|
#endif // CRYPTOPP_ALTIVEC_AVAILABLE
|
||||||
|
|
||||||
#if CRYPTOPP_POWER8_VMULL_AVAILABLE
|
#if CRYPTOPP_POWER8_VMULL_AVAILABLE
|
||||||
|
|
||||||
|
|
|
||||||
13
ppc-simd.h
13
ppc-simd.h
|
|
@ -331,7 +331,7 @@ inline T VectorRotateRight(const T& vec)
|
||||||
/// \returns vector created from low dword
|
/// \returns vector created from low dword
|
||||||
/// \details VectorGetLow() extracts the low dword from a vector. The low dword
|
/// \details VectorGetLow() extracts the low dword from a vector. The low dword
|
||||||
/// is composed of the least significant bits and occupies bytes 8 through 15
|
/// is composed of the least significant bits and occupies bytes 8 through 15
|
||||||
/// when viewed as a big endian array. The returned array is the same type as
|
/// when viewed as a big endian array. The return vector is the same type as
|
||||||
/// the original vector and padded with 0's in the most significant bit positions.
|
/// the original vector and padded with 0's in the most significant bit positions.
|
||||||
template <class T>
|
template <class T>
|
||||||
inline T VectorGetLow(const T& val)
|
inline T VectorGetLow(const T& val)
|
||||||
|
|
@ -344,17 +344,18 @@ inline T VectorGetLow(const T& val)
|
||||||
/// \brief Extract a dword from a vector
|
/// \brief Extract a dword from a vector
|
||||||
/// \tparam T vector type
|
/// \tparam T vector type
|
||||||
/// \param val the vector
|
/// \param val the vector
|
||||||
/// \returns vector created from low dword
|
/// \returns vector created from high dword
|
||||||
/// \details VectorGetHigh() extracts the high dword from a vector. The high dword
|
/// \details VectorGetHigh() extracts the high dword from a vector. The high dword
|
||||||
/// is composed of the most significant bits and occupies bytes 0 through 7
|
/// is composed of the most significant bits and occupies bytes 0 through 7
|
||||||
/// when viewed as a big endian array. The returned array is the same type as
|
/// when viewed as a big endian array. The return vector is the same type as
|
||||||
/// the original vector and padded with 0's in the most significant bit positions.
|
/// the original vector and padded with 0's in the most significant bit positions.
|
||||||
template <class T>
|
template <class T>
|
||||||
inline T VectorGetHigh(const T& val)
|
inline T VectorGetHigh(const T& val)
|
||||||
{
|
{
|
||||||
const T zero = {0};
|
//const T zero = {0};
|
||||||
const uint8x16_p mask = {16,16,16,16, 16,16,16,16, 0,1,2,3, 4,5,6,7 };
|
//const uint8x16_p mask = {16,16,16,16, 16,16,16,16, 0,1,2,3, 4,5,6,7 };
|
||||||
return (T)vec_perm(val, zero, mask);
|
//return (T)vec_perm(val, zero, mask);
|
||||||
|
return VectorShiftRight<8>(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \brief Compare two vectors
|
/// \brief Compare two vectors
|
||||||
|
|
|
||||||
1
test.cpp
1
test.cpp
|
|
@ -993,7 +993,6 @@ bool Validate(int alg, bool thorough, const char *seedInput)
|
||||||
case 9994: result = TestHuffmanCodes(); break;
|
case 9994: result = TestHuffmanCodes(); break;
|
||||||
// http://github.com/weidai11/cryptopp/issues/346
|
// http://github.com/weidai11/cryptopp/issues/346
|
||||||
case 9993: result = TestASN1Parse(); break;
|
case 9993: result = TestASN1Parse(); break;
|
||||||
|
|
||||||
# if defined(CRYPTOPP_ALTIVEC_AVAILABLE)
|
# if defined(CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||||
case 9992: result = TestAltivecOps(); break;
|
case 9992: result = TestAltivecOps(); break;
|
||||||
# endif
|
# endif
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue