Switch to vec_mergeh and vec_mergel for PowerPC in binary curves
This saves aboout 4 instructions in VMULLpull/795/head
parent
e3bc76832c
commit
76b7390840
|
|
@ -317,8 +317,6 @@ using CryptoPP::VecOr;
|
||||||
using CryptoPP::VecXor;
|
using CryptoPP::VecXor;
|
||||||
using CryptoPP::VecAnd;
|
using CryptoPP::VecAnd;
|
||||||
|
|
||||||
using CryptoPP::VecGetLow;
|
|
||||||
using CryptoPP::VecGetHigh;
|
|
||||||
using CryptoPP::VecPermute;
|
using CryptoPP::VecPermute;
|
||||||
using CryptoPP::VecShiftLeft;
|
using CryptoPP::VecShiftLeft;
|
||||||
using CryptoPP::VecShiftRight;
|
using CryptoPP::VecShiftRight;
|
||||||
|
|
@ -336,24 +334,26 @@ inline uint64x2_p VMULL2LE(const uint64x2_p& val)
|
||||||
// _mm_clmulepi64_si128(a, b, 0x00)
|
// _mm_clmulepi64_si128(a, b, 0x00)
|
||||||
inline uint64x2_p VMULL_00LE(const uint64x2_p& a, const uint64x2_p& b)
|
inline uint64x2_p VMULL_00LE(const uint64x2_p& a, const uint64x2_p& b)
|
||||||
{
|
{
|
||||||
|
const uint64x2_p z={0};
|
||||||
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
|
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
|
||||||
return VMULL2LE(__vpmsumd (VecGetHigh(a), VecGetHigh(b)));
|
return VMULL2LE(__vpmsumd (vec_mergeh(z, a), vec_mergeh(z, b)));
|
||||||
#elif defined(__clang__)
|
#elif defined(__clang__)
|
||||||
return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecGetHigh(a), VecGetHigh(b)));
|
return VMULL2LE(__builtin_altivec_crypto_vpmsumd (vec_mergeh(z, a), vec_mergeh(z, b)));
|
||||||
#else
|
#else
|
||||||
return VMULL2LE(__builtin_crypto_vpmsumd (VecGetHigh(a), VecGetHigh(b)));
|
return VMULL2LE(__builtin_crypto_vpmsumd (vec_mergeh(z, a), vec_mergeh(z, b)));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// _mm_clmulepi64_si128(a, b, 0x11)
|
// _mm_clmulepi64_si128(a, b, 0x11)
|
||||||
inline uint64x2_p VMULL_11LE(const uint64x2_p& a, const uint64x2_p& b)
|
inline uint64x2_p VMULL_11LE(const uint64x2_p& a, const uint64x2_p& b)
|
||||||
{
|
{
|
||||||
|
const uint64x2_p z={0};
|
||||||
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
|
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
|
||||||
return VMULL2LE(__vpmsumd (VecGetLow(a), b));
|
return VMULL2LE(__vpmsumd (vec_mergel(z, a), b));
|
||||||
#elif defined(__clang__)
|
#elif defined(__clang__)
|
||||||
return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecGetLow(a), b));
|
return VMULL2LE(__builtin_altivec_crypto_vpmsumd (vec_mergel(z, a), b));
|
||||||
#else
|
#else
|
||||||
return VMULL2LE(__builtin_crypto_vpmsumd (VecGetLow(a), b));
|
return VMULL2LE(__builtin_crypto_vpmsumd (vec_mergel(z, a), b));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue