diff --git a/blake2-simd.cpp b/blake2-simd.cpp index 371aa7e3..961bdc0c 100644 --- a/blake2-simd.cpp +++ b/blake2-simd.cpp @@ -1465,8 +1465,9 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2_State& sta // Permute masks. High is element 0 (most significant), low is // element 1 (least significant). We use vec_mergeh(a,b) for // vec_perm(a,b,HH_MASK) and vec_mergel(a,b) for vec_perm(a,b,LL_MASK). - // Benchmarks don't show we profit up to 0.4 cpb. The code that uses - // vec_mergeh and vec_mergel is about 880 bytes shorter on ppc64le. + // Benchmarks show we profit up to 0.4 cpb. The code that uses + // vec_mergeh and vec_mergel is about 880 bytes shorter, and frees + // up two vector registers on ppc64le. // const uint8x16_p HH_MASK = { 0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23 }; // const uint8x16_p LL_MASK = { 8,9,10,11,12,13,14,15, 24,25,26,27,28,29,30,31 };