Update comments
parent
99ab11d1ed
commit
989c3bfbf2
36
gcm-simd.cpp
36
gcm-simd.cpp
|
|
@ -206,30 +206,45 @@ INLINE uint64x2_p VMULL_00(const uint64x2_p& a, const uint64x2_p& b)
|
|||
// _mm_clmulepi64_si128(a, b, 0x01)
|
||||
INLINE uint64x2_p VMULL_01(const uint64x2_p& a, const uint64x2_p& b)
|
||||
{
|
||||
// Small speedup. VectorGetHigh(b) ensures the high dword of 'b' is 0.
|
||||
// The 0 used in the vmull yields 0 for the high product, so the high
|
||||
// dword of 'a' is "don't care".
|
||||
#if defined(__xlc__) || defined(__xlC__)
|
||||
return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetHigh(b)));
|
||||
// return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetHigh(b)));
|
||||
return AdjustBE(__vpmsumd (a, VectorGetHigh(b)));
|
||||
#else
|
||||
return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetHigh(b)));
|
||||
// return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetHigh(b)));
|
||||
return AdjustBE(__builtin_crypto_vpmsumd (a, VectorGetHigh(b)));
|
||||
#endif
|
||||
}
|
||||
|
||||
// _mm_clmulepi64_si128(a, b, 0x10)
|
||||
INLINE uint64x2_p VMULL_10(const uint64x2_p& a, const uint64x2_p& b)
|
||||
{
|
||||
// Small speedup. VectorGetHigh(a) ensures the high dword of 'a' is 0.
|
||||
// The 0 used in the vmull yields 0 for the high product, so the high
|
||||
// dword of 'b' is "don't care".
|
||||
#if defined(__xlc__) || defined(__xlC__)
|
||||
return AdjustBE(__vpmsumd (VectorGetHigh(a), VectorGetLow(b)));
|
||||
// return AdjustBE(__vpmsumd (VectorGetHigh(a), VectorGetLow(b)));
|
||||
return AdjustBE(__vpmsumd (VectorGetHigh(a), b));
|
||||
#else
|
||||
return AdjustBE(__builtin_crypto_vpmsumd (VectorGetHigh(a), VectorGetLow(b)));
|
||||
// return AdjustBE(__builtin_crypto_vpmsumd (VectorGetHigh(a), VectorGetLow(b)));
|
||||
return AdjustBE(__builtin_crypto_vpmsumd (VectorGetHigh(a), b));
|
||||
#endif
|
||||
}
|
||||
|
||||
// _mm_clmulepi64_si128(a, b, 0x11)
|
||||
INLINE uint64x2_p VMULL_11(const uint64x2_p& a, const uint64x2_p& b)
|
||||
{
|
||||
// Small speedup. VectorGetLow(a) ensures the high dword of 'a' is 0.
|
||||
// The 0 used in the vmull yields 0 for the high product, so the high
|
||||
// dword of 'b' is "don't care".
|
||||
#if defined(__xlc__) || defined(__xlC__)
|
||||
return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetLow(b)));
|
||||
// return AdjustBE(__vpmsumd (VectorGetLow(a), VectorGetLow(b)));
|
||||
return AdjustBE(__vpmsumd (VectorGetLow(a), b));
|
||||
#else
|
||||
return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetLow(b)));
|
||||
// return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), VectorGetLow(b)));
|
||||
return AdjustBE(__builtin_crypto_vpmsumd (VectorGetLow(a), b));
|
||||
#endif
|
||||
}
|
||||
#endif // CRYPTOPP_POWER8_VMULL_AVAILABLE
|
||||
|
|
@ -592,7 +607,7 @@ __m128i _mm_clmulepi64_si128(const __m128i &a, const __m128i &b, int i)
|
|||
}
|
||||
#endif // Testing
|
||||
|
||||
// SunCC 5.11-5.15 compiler crash. Make the function INLINE
|
||||
// SunCC 5.11-5.15 compiler crash. Make the function inline
|
||||
// and parameters non-const. Also see GH #188 and GH #224.
|
||||
inline __m128i GCM_Reduce_CLMUL(__m128i c0, __m128i c1, __m128i c2, const __m128i& r)
|
||||
{
|
||||
|
|
@ -622,8 +637,8 @@ inline __m128i GCM_Reduce_CLMUL(__m128i c0, __m128i c1, __m128i c2, const __m128
|
|||
return _mm_xor_si128(c2, c1);
|
||||
}
|
||||
|
||||
// SunCC 5.13-5.14 compiler crash. Don't make the function INLINE.
|
||||
// This is in contrast to GCM_Reduce_CLMUL, which must be INLINE.
|
||||
// SunCC 5.13-5.14 compiler crash. Don't make the function inline.
|
||||
// This is in contrast to GCM_Reduce_CLMUL, which must be inline.
|
||||
__m128i GCM_Multiply_CLMUL(const __m128i &x, const __m128i &h, const __m128i &r)
|
||||
{
|
||||
const __m128i c0 = _mm_clmulepi64_si128(x,h,0);
|
||||
|
|
@ -739,10 +754,9 @@ void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer)
|
|||
#if CRYPTOPP_ALTIVEC_AVAILABLE
|
||||
void GCM_Xor16_ALTIVEC(byte *a, const byte *b, const byte *c)
|
||||
{
|
||||
// *UINT64X2_CAST(a) = veorq_u64(*CONST_UINT64X2_CAST(b), *CONST_UINT64X2_CAST(c));
|
||||
VectorStore(VectorXor(VectorLoad(b), VectorLoad(c)), a);
|
||||
}
|
||||
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
|
||||
#endif // CRYPTOPP_ALTIVEC_AVAILABLE
|
||||
|
||||
#if CRYPTOPP_POWER8_VMULL_AVAILABLE
|
||||
|
||||
|
|
|
|||
13
ppc-simd.h
13
ppc-simd.h
|
|
@ -331,7 +331,7 @@ inline T VectorRotateRight(const T& vec)
|
|||
/// \returns vector created from low dword
|
||||
/// \details VectorGetLow() extracts the low dword from a vector. The low dword
|
||||
/// is composed of the least significant bits and occupies bytes 8 through 15
|
||||
/// when viewed as a big endian array. The returned array is the same type as
|
||||
/// when viewed as a big endian array. The return vector is the same type as
|
||||
/// the original vector and padded with 0's in the most significant bit positions.
|
||||
template <class T>
|
||||
inline T VectorGetLow(const T& val)
|
||||
|
|
@ -344,17 +344,18 @@ inline T VectorGetLow(const T& val)
|
|||
/// \brief Extract a dword from a vector
|
||||
/// \tparam T vector type
|
||||
/// \param val the vector
|
||||
/// \returns vector created from low dword
|
||||
/// \returns vector created from high dword
|
||||
/// \details VectorGetHigh() extracts the high dword from a vector. The high dword
|
||||
/// is composed of the most significant bits and occupies bytes 0 through 7
|
||||
/// when viewed as a big endian array. The returned array is the same type as
|
||||
/// when viewed as a big endian array. The return vector is the same type as
|
||||
/// the original vector and padded with 0's in the most significant bit positions.
|
||||
template <class T>
|
||||
inline T VectorGetHigh(const T& val)
|
||||
{
|
||||
const T zero = {0};
|
||||
const uint8x16_p mask = {16,16,16,16, 16,16,16,16, 0,1,2,3, 4,5,6,7 };
|
||||
return (T)vec_perm(val, zero, mask);
|
||||
//const T zero = {0};
|
||||
//const uint8x16_p mask = {16,16,16,16, 16,16,16,16, 0,1,2,3, 4,5,6,7 };
|
||||
//return (T)vec_perm(val, zero, mask);
|
||||
return VectorShiftRight<8>(val);
|
||||
}
|
||||
|
||||
/// \brief Compare two vectors
|
||||
|
|
|
|||
1
test.cpp
1
test.cpp
|
|
@ -993,7 +993,6 @@ bool Validate(int alg, bool thorough, const char *seedInput)
|
|||
case 9994: result = TestHuffmanCodes(); break;
|
||||
// http://github.com/weidai11/cryptopp/issues/346
|
||||
case 9993: result = TestASN1Parse(); break;
|
||||
|
||||
# if defined(CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||
case 9992: result = TestAltivecOps(); break;
|
||||
# endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue