diff --git a/arm_simd.h b/arm_simd.h
index 454f1a8f..42018070 100644
--- a/arm_simd.h
+++ b/arm_simd.h
@@ -20,6 +20,18 @@
#if CRYPTOPP_ARM_PMULL_AVAILABLE
+/// \brief Polynomial multiplication
+/// \param a the first term
+/// \param b the second term
+/// \returns vector product
+/// \details PMULL_00() performs polynomial multiplication and presents
+/// the result like Intel's c = _mm_clmulepi64_si128(a, b, 0x00).
+/// The 0x00 indicates the low 64-bits of a and b
+/// are multiplied.
+/// \note An Intel XMM register is composed of 128-bits. The leftmost bit
+/// is MSB and numbered 127, while the the rightmost bit is LSB and
+/// numbered 0.
+/// \since Crypto++ 8.0
inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
{
#if defined(_MSC_VER)
@@ -38,6 +50,18 @@ inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
#endif
}
+/// \brief Polynomial multiplication
+/// \param a the first term
+/// \param b the second term
+/// \returns vector product
+/// \details PMULL_01 performs() polynomial multiplication and presents
+/// the result like Intel's c = _mm_clmulepi64_si128(a, b, 0x01).
+/// The 0x01 indicates the low 64-bits of a and high
+/// 64-bits of b are multiplied.
+/// \note An Intel XMM register is composed of 128-bits. The leftmost bit
+/// is MSB and numbered 127, while the the rightmost bit is LSB and
+/// numbered 0.
+/// \since Crypto++ 8.0
inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
{
#if defined(_MSC_VER)
@@ -56,6 +80,18 @@ inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
#endif
}
+/// \brief Polynomial multiplication
+/// \param a the first term
+/// \param b the second term
+/// \returns vector product
+/// \details PMULL_10() performs polynomial multiplication and presents
+/// the result like Intel's c = _mm_clmulepi64_si128(a, b, 0x10).
+/// The 0x10 indicates the high 64-bits of a and low
+/// 64-bits of b are multiplied.
+/// \note An Intel XMM register is composed of 128-bits. The leftmost bit
+/// is MSB and numbered 127, while the the rightmost bit is LSB and
+/// numbered 0.
+/// \since Crypto++ 8.0
inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
{
#if defined(_MSC_VER)
@@ -74,6 +110,18 @@ inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
#endif
}
+/// \brief Polynomial multiplication
+/// \param a the first term
+/// \param b the second term
+/// \returns vector product
+/// \details PMULL_11() performs polynomial multiplication and presents
+/// the result like Intel's c = _mm_clmulepi64_si128(a, b, 0x11).
+/// The 0x11 indicates the high 64-bits of a and b
+/// are multiplied.
+/// \note An Intel XMM register is composed of 128-bits. The leftmost bit
+/// is MSB and numbered 127, while the the rightmost bit is LSB and
+/// numbered 0.
+/// \since Crypto++ 8.0
inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
{
#if defined(_MSC_VER)
@@ -92,6 +140,14 @@ inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
#endif
}
+/// \brief Vector extraction
+/// \param a the first term
+/// \param b the second term
+/// \param c the byte count
+/// \returns vector
+/// \details VEXT_U8() extracts the first c bytes of vector
+/// a and the remaining bytes in b.
+/// \since Crypto++ 8.0
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
{
#if defined(_MSC_VER)
@@ -105,10 +161,18 @@ inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
#endif
}
-// https://github.com/weidai11/cryptopp/issues/366
+/// \brief Vector extraction
+/// \tparam C the byte count
+/// \param a the first term
+/// \param b the second term
+/// \returns vector
+/// \details VEXT_U8() extracts the first C bytes of vector
+/// a and the remaining bytes in b.
+/// \since Crypto++ 8.0
template
inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
{
+ // https://github.com/weidai11/cryptopp/issues/366
#if defined(_MSC_VER)
return (uint64x2_t)vextq_u8(
vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C);