Cleanup VPMSUM probes
parent
2ec9c9963c
commit
194307308c
81
gcm-simd.cpp
81
gcm-simd.cpp
|
|
@ -67,31 +67,6 @@ ANONYMOUS_NAMESPACE_BEGIN
|
||||||
|
|
||||||
// ************************* Miscellaneous ************************* //
|
// ************************* Miscellaneous ************************* //
|
||||||
|
|
||||||
// GCC 4.8 is missing PMULL gear
|
|
||||||
#if (CRYPTOPP_ARM_PMULL_AVAILABLE)
|
|
||||||
# if (CRYPTOPP_GCC_VERSION >= 40800) && (CRYPTOPP_GCC_VERSION < 49000)
|
|
||||||
inline poly128_t VMULL_P64(poly64_t a, poly64_t b)
|
|
||||||
{
|
|
||||||
return __builtin_aarch64_crypto_pmulldi_ppp (a, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline poly128_t VMULL_HIGH_P64(poly64x2_t a, poly64x2_t b)
|
|
||||||
{
|
|
||||||
return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
|
|
||||||
}
|
|
||||||
# else
|
|
||||||
inline poly128_t VMULL_P64(poly64_t a, poly64_t b)
|
|
||||||
{
|
|
||||||
return vmull_p64(a, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline poly128_t VMULL_HIGH_P64(poly64x2_t a, poly64x2_t b)
|
|
||||||
{
|
|
||||||
return vmull_high_p64(a, b);
|
|
||||||
}
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if CRYPTOPP_ARM_PMULL_AVAILABLE
|
#if CRYPTOPP_ARM_PMULL_AVAILABLE
|
||||||
#if defined(__GNUC__)
|
#if defined(__GNUC__)
|
||||||
// Schneiders, Hovsmith and O'Rourke used this trick.
|
// Schneiders, Hovsmith and O'Rourke used this trick.
|
||||||
|
|
@ -189,31 +164,55 @@ inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
|
||||||
#endif // CRYPTOPP_ARM_PMULL_AVAILABLE
|
#endif // CRYPTOPP_ARM_PMULL_AVAILABLE
|
||||||
|
|
||||||
#if CRYPTOPP_POWER8_PMULL_AVAILABLE
|
#if CRYPTOPP_POWER8_PMULL_AVAILABLE
|
||||||
using CryptoPP::uint8x16_p;
|
|
||||||
using CryptoPP::uint64x2_p;
|
using CryptoPP::uint64x2_p;
|
||||||
using CryptoPP::VectorXor;
|
using CryptoPP::VectorAnd;
|
||||||
using CryptoPP::VectorShiftRight;
|
using CryptoPP::VectorShiftRight;
|
||||||
|
|
||||||
inline uint64x2_p VMULL_P64(uint64x2_p a, uint64x2_p b)
|
inline uint64x2_p VMULL_00(uint64x2_p a, uint64x2_p b)
|
||||||
{
|
{
|
||||||
// Multiplies low dwords
|
// Multiplies low dwords
|
||||||
#if defined(__xlc__) || defined(__xlC__)
|
#if defined(__xlc__) || defined(__xlC__)
|
||||||
return __vpmsumd (a, b);
|
const uint64x2_p m = {0xffffffffffffffffull, 0};
|
||||||
|
return __vpmsumd (VectorAnd(a, m), VectorAnd(b, m));
|
||||||
#else
|
#else
|
||||||
return __builtin_crypto_vpmsumd (a, b);
|
const uint64x2_p m = {0xffffffffffffffffull, 0};
|
||||||
|
return __builtin_crypto_vpmsumd (VectorAnd(a, m), VectorAnd(b, m));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
inline uint64x2_p VMULL_HIGH_P64(uint64x2_p a, uint64x2_p b)
|
inline uint64x2_p VMULL_01(uint64x2_p a, uint64x2_p b)
|
||||||
{
|
{
|
||||||
|
// Multiplies high and low dwords
|
||||||
#if defined(__xlc__) || defined(__xlC__)
|
#if defined(__xlc__) || defined(__xlC__)
|
||||||
const uint64x2_p s = VectorShiftRight<8>(a);
|
const uint64x2_p m = {0xffffffffffffffffull, 0};
|
||||||
const uint64x2_p t = VectorShiftRight<8>(b);
|
return __vpmsumd (VectorAnd(a, m), VectorShiftRight<8>(b));
|
||||||
return __vpmsumd (s, t);
|
|
||||||
#else
|
#else
|
||||||
const uint64x2_p s = VectorShiftRight<8>(a);
|
const uint64x2_p m = {0xffffffffffffffffull, 0};
|
||||||
const uint64x2_p t = VectorShiftRight<8>(b);
|
return __builtin_crypto_vpmsumd (VectorAnd(a, m), VectorShiftRight<8>(b));
|
||||||
return __builtin_crypto_vpmsumd (s, t);
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint64x2_p VMULL_10(uint64x2_p a, uint64x2_p b)
|
||||||
|
{
|
||||||
|
// Multiplies high and low dwords
|
||||||
|
#if defined(__xlc__) || defined(__xlC__)
|
||||||
|
const uint64x2_p m = {0xffffffffffffffffull, 0};
|
||||||
|
return __vpmsumd (VectorShiftRight<8>(a), VectorAnd(b, m));
|
||||||
|
#else
|
||||||
|
const uint64x2_p m = {0xffffffffffffffffull, 0};
|
||||||
|
return __builtin_crypto_vpmsumd (VectorShiftRight<8>(a), VectorAnd(b, m));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint64x2_p VMULL_11(uint64x2_p a, uint64x2_p b)
|
||||||
|
{
|
||||||
|
// Multiplies high dwords
|
||||||
|
#if defined(__xlc__) || defined(__xlC__)
|
||||||
|
const uint64x2_p m = {0, 0xffffffffffffffffull};
|
||||||
|
return __vpmsumd (VectorAnd(a, m), VectorAnd(b, m));
|
||||||
|
#else
|
||||||
|
const uint64x2_p m = {0, 0xffffffffffffffffull};
|
||||||
|
return __builtin_crypto_vpmsumd (VectorAnd(a, m), VectorAnd(b, m));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif // CRYPTOPP_POWER8_PMULL_AVAILABLE
|
#endif // CRYPTOPP_POWER8_PMULL_AVAILABLE
|
||||||
|
|
@ -293,8 +292,8 @@ bool CPU_ProbePMULL()
|
||||||
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,
|
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,
|
||||||
0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
||||||
|
|
||||||
const poly128_t r1 = VMULL_P64(a1, b1);
|
const poly128_t r1 = VMULL_00(a1, b1);
|
||||||
const poly128_t r2 = VMULL_HIGH_P64((poly64x2_t)(a2), (poly64x2_t)(b2));
|
const poly128_t r2 = VMULL_11((poly64x2_t)(a2), (poly64x2_t)(b2));
|
||||||
|
|
||||||
// Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233.
|
// Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233.
|
||||||
const uint64x2_t t1 = (uint64x2_t)(r1); // {bignum,bignum}
|
const uint64x2_t t1 = (uint64x2_t)(r1); // {bignum,bignum}
|
||||||
|
|
@ -344,8 +343,8 @@ bool CPU_ProbePMULL()
|
||||||
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,
|
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,
|
||||||
0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
||||||
|
|
||||||
const uint64x2_p r1 = VMULL_P64(a1, b1);
|
const uint64x2_p r1 = VMULL_00(a1, b1);
|
||||||
const uint64x2_p r2 = VMULL_HIGH_P64((uint64x2_p)(a2), (uint64x2_p)(b2));
|
const uint64x2_p r2 = VMULL_11((uint64x2_p)(a2), (uint64x2_p)(b2));
|
||||||
|
|
||||||
word64 w1[2], w2[2];
|
word64 w1[2], w2[2];
|
||||||
VectorStore(r1, (byte*)w1); VectorStore(r2, (byte*)w2);
|
VectorStore(r1, (byte*)w1); VectorStore(r2, (byte*)w2);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue