Update documentation
parent
f95638ef0c
commit
f510b3498c
121
ppc_simd.h
121
ppc_simd.h
|
|
@ -864,46 +864,9 @@ inline void VecStoreBE(const T data, int off, word32 dest[4])
|
||||||
|
|
||||||
//@}
|
//@}
|
||||||
|
|
||||||
/// \name OTHER OPERATIONS
|
/// \name LOGICAL OPERATIONS
|
||||||
//@{
|
//@{
|
||||||
|
|
||||||
/// \brief Permutes a vector
|
|
||||||
/// \tparam T1 vector type
|
|
||||||
/// \tparam T2 vector type
|
|
||||||
/// \param vec the vector
|
|
||||||
/// \param mask vector mask
|
|
||||||
/// \returns vector
|
|
||||||
/// \details VecPermute() returns a new vector from vec based on
|
|
||||||
/// mask. mask is an uint8x16_p type vector. The return
|
|
||||||
/// vector is the same type as vec.
|
|
||||||
/// \par Wraps
|
|
||||||
/// vec_perm
|
|
||||||
/// \since Crypto++ 6.0
|
|
||||||
template <class T1, class T2>
|
|
||||||
inline T1 VecPermute(const T1 vec, const T2 mask)
|
|
||||||
{
|
|
||||||
return (T1)vec_perm(vec, vec, (uint8x16_p)mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief Permutes two vectors
|
|
||||||
/// \tparam T1 vector type
|
|
||||||
/// \tparam T2 vector type
|
|
||||||
/// \param vec1 the first vector
|
|
||||||
/// \param vec2 the second vector
|
|
||||||
/// \param mask vector mask
|
|
||||||
/// \returns vector
|
|
||||||
/// \details VecPermute() returns a new vector from vec1 and vec2
|
|
||||||
/// based on mask. mask is an uint8x16_p type vector. The return
|
|
||||||
/// vector is the same type as vec1.
|
|
||||||
/// \par Wraps
|
|
||||||
/// vec_perm
|
|
||||||
/// \since Crypto++ 6.0
|
|
||||||
template <class T1, class T2>
|
|
||||||
inline T1 VecPermute(const T1 vec1, const T1 vec2, const T2 mask)
|
|
||||||
{
|
|
||||||
return (T1)vec_perm(vec1, (T1)vec2, (uint8x16_p)mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief AND two vectors
|
/// \brief AND two vectors
|
||||||
/// \tparam T1 vector type
|
/// \tparam T1 vector type
|
||||||
/// \tparam T2 vector type
|
/// \tparam T2 vector type
|
||||||
|
|
@ -955,6 +918,11 @@ inline T1 VecXor(const T1 vec1, const T2 vec2)
|
||||||
return (T1)vec_xor(vec1, (T1)vec2);
|
return (T1)vec_xor(vec1, (T1)vec2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//@}
|
||||||
|
|
||||||
|
/// \name ARITHMETIC OPERATIONS
|
||||||
|
//@{
|
||||||
|
|
||||||
/// \brief Add two vectors
|
/// \brief Add two vectors
|
||||||
/// \tparam T1 vector type
|
/// \tparam T1 vector type
|
||||||
/// \tparam T2 vector type
|
/// \tparam T2 vector type
|
||||||
|
|
@ -1021,6 +989,48 @@ inline uint32x4_p VecAdd64(const uint32x4_p& vec1, const uint32x4_p& vec2)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//@}
|
||||||
|
|
||||||
|
/// \name OTHER OPERATIONS
|
||||||
|
//@{
|
||||||
|
|
||||||
|
/// \brief Permutes a vector
|
||||||
|
/// \tparam T1 vector type
|
||||||
|
/// \tparam T2 vector type
|
||||||
|
/// \param vec the vector
|
||||||
|
/// \param mask vector mask
|
||||||
|
/// \returns vector
|
||||||
|
/// \details VecPermute() returns a new vector from vec based on
|
||||||
|
/// mask. mask is an uint8x16_p type vector. The return
|
||||||
|
/// vector is the same type as vec.
|
||||||
|
/// \par Wraps
|
||||||
|
/// vec_perm
|
||||||
|
/// \since Crypto++ 6.0
|
||||||
|
template <class T1, class T2>
|
||||||
|
inline T1 VecPermute(const T1 vec, const T2 mask)
|
||||||
|
{
|
||||||
|
return (T1)vec_perm(vec, vec, (uint8x16_p)mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Permutes two vectors
|
||||||
|
/// \tparam T1 vector type
|
||||||
|
/// \tparam T2 vector type
|
||||||
|
/// \param vec1 the first vector
|
||||||
|
/// \param vec2 the second vector
|
||||||
|
/// \param mask vector mask
|
||||||
|
/// \returns vector
|
||||||
|
/// \details VecPermute() returns a new vector from vec1 and vec2
|
||||||
|
/// based on mask. mask is an uint8x16_p type vector. The return
|
||||||
|
/// vector is the same type as vec1.
|
||||||
|
/// \par Wraps
|
||||||
|
/// vec_perm
|
||||||
|
/// \since Crypto++ 6.0
|
||||||
|
template <class T1, class T2>
|
||||||
|
inline T1 VecPermute(const T1 vec1, const T1 vec2, const T2 mask)
|
||||||
|
{
|
||||||
|
return (T1)vec_perm(vec1, (T1)vec2, (uint8x16_p)mask);
|
||||||
|
}
|
||||||
|
|
||||||
/// \brief Shift a vector left
|
/// \brief Shift a vector left
|
||||||
/// \tparam C shift byte count
|
/// \tparam C shift byte count
|
||||||
/// \tparam T vector type
|
/// \tparam T vector type
|
||||||
|
|
@ -1441,7 +1451,7 @@ inline uint64x2_p VMULL2LE(const uint64x2_p& val)
|
||||||
/// \param a the first term
|
/// \param a the first term
|
||||||
/// \param b the second term
|
/// \param b the second term
|
||||||
/// \returns vector product
|
/// \returns vector product
|
||||||
/// \details VecPolyMultiply00LE perform polynomial multiplication and presents
|
/// \details VecPolyMultiply00LE performs polynomial multiplication and presents
|
||||||
/// the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x00)</tt>.
|
/// the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x00)</tt>.
|
||||||
/// The <tt>0x00</tt> indicates the low 64-bits of <tt>a</tt> and <tt>b</tt>
|
/// The <tt>0x00</tt> indicates the low 64-bits of <tt>a</tt> and <tt>b</tt>
|
||||||
/// are multiplied.
|
/// are multiplied.
|
||||||
|
|
@ -1465,7 +1475,7 @@ inline uint64x2_p VecPolyMultiply00LE(const uint64x2_p& a, const uint64x2_p& b)
|
||||||
/// \param a the first term
|
/// \param a the first term
|
||||||
/// \param b the second term
|
/// \param b the second term
|
||||||
/// \returns vector product
|
/// \returns vector product
|
||||||
/// \details VecPolyMultiply01LE perform polynomial multiplication and presents
|
/// \details VecPolyMultiply01LE performs polynomial multiplication and presents
|
||||||
/// the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x01)</tt>.
|
/// the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x01)</tt>.
|
||||||
/// The <tt>0x01</tt> indicates the low 64-bits of <tt>a</tt> and high
|
/// The <tt>0x01</tt> indicates the low 64-bits of <tt>a</tt> and high
|
||||||
/// 64-bits of <tt>b</tt> are multiplied.
|
/// 64-bits of <tt>b</tt> are multiplied.
|
||||||
|
|
@ -1489,7 +1499,7 @@ inline uint64x2_p VecPolyMultiply01LE(const uint64x2_p& a, const uint64x2_p& b)
|
||||||
/// \param a the first term
|
/// \param a the first term
|
||||||
/// \param b the second term
|
/// \param b the second term
|
||||||
/// \returns vector product
|
/// \returns vector product
|
||||||
/// \details VecPolyMultiply10LE perform polynomial multiplication and presents
|
/// \details VecPolyMultiply10LE performs polynomial multiplication and presents
|
||||||
/// the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x10)</tt>.
|
/// the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x10)</tt>.
|
||||||
/// The <tt>0x10</tt> indicates the high 64-bits of <tt>a</tt> and low
|
/// The <tt>0x10</tt> indicates the high 64-bits of <tt>a</tt> and low
|
||||||
/// 64-bits of <tt>b</tt> are multiplied.
|
/// 64-bits of <tt>b</tt> are multiplied.
|
||||||
|
|
@ -1513,7 +1523,7 @@ inline uint64x2_p VecPolyMultiply10LE(const uint64x2_p& a, const uint64x2_p& b)
|
||||||
/// \param a the first term
|
/// \param a the first term
|
||||||
/// \param b the second term
|
/// \param b the second term
|
||||||
/// \returns vector product
|
/// \returns vector product
|
||||||
/// \details VecPolyMultiply11LE perform polynomial multiplication and presents
|
/// \details VecPolyMultiply11LE performs polynomial multiplication and presents
|
||||||
/// the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x11)</tt>.
|
/// the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x11)</tt>.
|
||||||
/// The <tt>0x11</tt> indicates the high 64-bits of <tt>a</tt> and <tt>b</tt>
|
/// The <tt>0x11</tt> indicates the high 64-bits of <tt>a</tt> and <tt>b</tt>
|
||||||
/// are multiplied.
|
/// are multiplied.
|
||||||
|
|
@ -1533,6 +1543,33 @@ inline uint64x2_p VecPolyMultiply11LE(const uint64x2_p& a, const uint64x2_p& b)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// \brief Polynomial multiplication
|
||||||
|
/// \tparam T the vector type
|
||||||
|
/// \param a the first term
|
||||||
|
/// \param b the second term
|
||||||
|
/// \returns vector product
|
||||||
|
/// \details VecPolyMultiply performs polynomial multiplication. POWER8
|
||||||
|
/// polynomial multiplication multiplies the high and low terms, and then XOR's
|
||||||
|
/// the high and low products. That is, the result is <tt>ah*bh XOR al*bl</tt>.
|
||||||
|
/// It is different behavior than Intel polynomial multiplication.
|
||||||
|
/// To obtain a single product without the XOR, then set one of the high or
|
||||||
|
/// low terms to 0. For example, setting <tt>ah=0</tt> results in <tt>0*bh
|
||||||
|
/// XOR al*bl = al*bl</tt>.
|
||||||
|
/// \par Wraps
|
||||||
|
/// __vpmsumd, __builtin_altivec_crypto_vpmsumd and __builtin_crypto_vpmsumd.
|
||||||
|
/// \since Crypto++ 8.1
|
||||||
|
template <class T>
|
||||||
|
inline T VecPolyMultiply(const T& a, const T& b)
|
||||||
|
{
|
||||||
|
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
|
||||||
|
return (T)__vpmsumd (a, b);
|
||||||
|
#elif defined(__clang__)
|
||||||
|
return (T)__builtin_altivec_crypto_vpmsumd (a, b);
|
||||||
|
#else
|
||||||
|
return (T)__builtin_crypto_vpmsumd (a, b);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
//@}
|
//@}
|
||||||
|
|
||||||
/// \name AES ENCRYPTION
|
/// \name AES ENCRYPTION
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue