diff --git a/blake2-simd.cpp b/blake2-simd.cpp
index 371aa7e3..961bdc0c 100644
--- a/blake2-simd.cpp
+++ b/blake2-simd.cpp
@@ -1465,8 +1465,9 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2_State<word64, true>& sta
     // Permute masks. High is element 0 (most significant), low is
     // element 1 (least significant). We use vec_mergeh(a,b) for
     // vec_perm(a,b,HH_MASK) and vec_mergel(a,b) for vec_perm(a,b,LL_MASK).
-    // Benchmarks don't show we profit up to 0.4 cpb. The code that uses
-    // vec_mergeh and vec_mergel is about 880 bytes shorter on ppc64le.
+    // Benchmarks show we profit up to 0.4 cpb. The code that uses
+    // vec_mergeh and vec_mergel is about 880 bytes shorter, and frees
+    // up two vector registers on ppc64le.
 
     // const uint8x16_p HH_MASK = { 0,1,2,3,4,5,6,7,       16,17,18,19,20,21,22,23 };
     // const uint8x16_p LL_MASK = { 8,9,10,11,12,13,14,15, 24,25,26,27,28,29,30,31 };