diff --git a/arm_simd.h b/arm_simd.h
index 454f1a8f..42018070 100644
--- a/arm_simd.h
+++ b/arm_simd.h
@@ -20,6 +20,18 @@
 
 #if CRYPTOPP_ARM_PMULL_AVAILABLE
 
+/// \brief Polynomial multiplication
+/// \param a the first term
+/// \param b the second term
+/// \returns vector product
+/// \details PMULL_00() performs polynomial multiplication and presents
+///  the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x00)</tt>.
+///  The <tt>0x00</tt> indicates the low 64-bits of <tt>a</tt> and <tt>b</tt>
+///  are multiplied.
+/// \note An Intel XMM register is composed of 128-bits. The leftmost bit
+///  is MSB and numbered 127, while the the rightmost bit is LSB and
+///  numbered 0.
+/// \since Crypto++ 8.0
 inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
 {
 #if defined(_MSC_VER)
@@ -38,6 +50,18 @@ inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
 #endif
 }
 
+/// \brief Polynomial multiplication
+/// \param a the first term
+/// \param b the second term
+/// \returns vector product
+/// \details PMULL_01 performs() polynomial multiplication and presents
+///  the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x01)</tt>.
+///  The <tt>0x01</tt> indicates the low 64-bits of <tt>a</tt> and high
+///  64-bits of <tt>b</tt> are multiplied.
+/// \note An Intel XMM register is composed of 128-bits. The leftmost bit
+///  is MSB and numbered 127, while the the rightmost bit is LSB and
+///  numbered 0.
+/// \since Crypto++ 8.0
 inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
 {
 #if defined(_MSC_VER)
@@ -56,6 +80,18 @@ inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
 #endif
 }
 
+/// \brief Polynomial multiplication
+/// \param a the first term
+/// \param b the second term
+/// \returns vector product
+/// \details PMULL_10() performs polynomial multiplication and presents
+///  the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x10)</tt>.
+///  The <tt>0x10</tt> indicates the high 64-bits of <tt>a</tt> and low
+///  64-bits of <tt>b</tt> are multiplied.
+/// \note An Intel XMM register is composed of 128-bits. The leftmost bit
+///  is MSB and numbered 127, while the the rightmost bit is LSB and
+///  numbered 0.
+/// \since Crypto++ 8.0
 inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
 {
 #if defined(_MSC_VER)
@@ -74,6 +110,18 @@ inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
 #endif
 }
 
+/// \brief Polynomial multiplication
+/// \param a the first term
+/// \param b the second term
+/// \returns vector product
+/// \details PMULL_11() performs polynomial multiplication and presents
+///  the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x11)</tt>.
+///  The <tt>0x11</tt> indicates the high 64-bits of <tt>a</tt> and <tt>b</tt>
+///  are multiplied.
+/// \note An Intel XMM register is composed of 128-bits. The leftmost bit
+///  is MSB and numbered 127, while the the rightmost bit is LSB and
+///  numbered 0.
+/// \since Crypto++ 8.0
 inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
 {
 #if defined(_MSC_VER)
@@ -92,6 +140,14 @@ inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
 #endif
 }
 
+/// \brief Vector extraction
+/// \param a the first term
+/// \param b the second term
+/// \param c the byte count
+/// \returns vector
+/// \details VEXT_U8() extracts the first <tt>c</tt> bytes of vector
+///  <tt>a</tt> and the remaining bytes in <tt>b</tt>.
+/// \since Crypto++ 8.0
 inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
 {
 #if defined(_MSC_VER)
@@ -105,10 +161,18 @@ inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
 #endif
 }
 
-// https://github.com/weidai11/cryptopp/issues/366
+/// \brief Vector extraction
+/// \tparam C the byte count
+/// \param a the first term
+/// \param b the second term
+/// \returns vector
+/// \details VEXT_U8() extracts the first <tt>C</tt> bytes of vector
+///  <tt>a</tt> and the remaining bytes in <tt>b</tt>.
+/// \since Crypto++ 8.0
 template <unsigned int C>
 inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
 {
+    // https://github.com/weidai11/cryptopp/issues/366
 #if defined(_MSC_VER)
     return (uint64x2_t)vextq_u8(
         vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C);