diff --git a/adv-simd.h b/adv-simd.h
index 0fde6a81..d1832421 100644
--- a/adv-simd.h
+++ b/adv-simd.h
@@ -9,11 +9,17 @@
 //    acceleration. After several implementations we noticed a lot of copy and
 //    paste occuring. adv-simd.h provides a template to avoid the copy and paste.
 //
-//    There are 10 templates provided in this file. The number following the
-//    function name is the block size of the cipher. The name following that
-//    is the acceleration and arrangement. For example 4x1_SSE means Intel SSE
-//    using two encrypt (or decrypt) functions: one that operates on 4 blocks,
-//    and one that operates on 1 block.
+//    There are 11 templates provided in this file. The number following the
+//    function name, 64 or 128, is the block size. The name following the block
+//    size is the arrangement and acceleration. For example 4x1_SSE means Intel
+//    SSE using two encrypt (or decrypt) functions: one that operates on 4 SIMD
+//    words, and one that operates on 1 SIMD words.
+//
+//    The distinction between SIMD words versus cipher blocks is important
+//    because 64-bit ciphers use two cipher blocks for one SIMD word. For
+//    example, AdvancedProcessBlocks64_6x2_ALTIVEC operates on 6 and 2 SIMD
+//    words, which is 12 and 4 cipher blocks. The function will do the right
+//    thing even if there is only one 64-bit block to encrypt.
 //
 //      * AdvancedProcessBlocks64_2x1_SSE
 //      * AdvancedProcessBlocks64_4x1_SSE
@@ -1640,7 +1646,7 @@ inline size_t AdvancedProcessBlocks64_4x1_SSE(F1 func1, F4 func4,
 
     if (flags & BT_AllowParallel)
     {
-        while (length >= 4 * xmmBlockSize)
+        while (length >= 4*xmmBlockSize)
         {
             __m128i block0, block1, block2, block3;
             if (flags & BT_InBlockIsCounter)
@@ -1713,7 +1719,7 @@ inline size_t AdvancedProcessBlocks64_4x1_SSE(F1 func1, F4 func4,
             _mm_storeu_si128(M128_CAST(outBlocks), block3);
             outBlocks = PtrAdd(outBlocks, outIncrement);
 
-            length -= 4 * xmmBlockSize;
+            length -= 4*xmmBlockSize;
         }
     }
 
@@ -1859,6 +1865,7 @@ inline size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
                 block4 = VectorAdd(s_two, block3);
                 block5 = VectorAdd(s_two, block4);
 
+                // Update the counter in the caller.
                 const_cast<byte*>(inBlocks)[7] += 12;
             }
             else
@@ -1948,6 +1955,7 @@ inline size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
                 // increment by {2,2}.
                 block1 = VectorAdd(s_two, block0);
 
+                // Update the counter in the caller.
                 const_cast<byte*>(inBlocks)[7] += 4;
             }
             else
@@ -2022,6 +2030,7 @@ inline size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
                 block = VectorXor(block, x);
             }
 
+            // Update the counter in the caller.
             if (flags & BT_InBlockIsCounter)
                 const_cast<byte *>(inBlocks)[7]++;