Update comments

pull/703/head
Jeffrey Walton 2018-08-14 05:15:32 -04:00
parent d221336f42
commit 93149e4c25
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
1 changed files with 16 additions and 7 deletions

View File

@ -9,11 +9,17 @@
// acceleration. After several implementations we noticed a lot of copy and // acceleration. After several implementations we noticed a lot of copy and
// paste occuring. adv-simd.h provides a template to avoid the copy and paste. // paste occuring. adv-simd.h provides a template to avoid the copy and paste.
// //
// There are 10 templates provided in this file. The number following the // There are 11 templates provided in this file. The number following the
// function name is the block size of the cipher. The name following that // function name, 64 or 128, is the block size. The name following the block
// is the acceleration and arrangement. For example 4x1_SSE means Intel SSE // size is the arrangement and acceleration. For example 4x1_SSE means Intel
// using two encrypt (or decrypt) functions: one that operates on 4 blocks, // SSE using two encrypt (or decrypt) functions: one that operates on 4 SIMD
// and one that operates on 1 block. // words, and one that operates on 1 SIMD words.
//
// The distinction between SIMD words versus cipher blocks is important
// because 64-bit ciphers use two cipher blocks for one SIMD word. For
// example, AdvancedProcessBlocks64_6x2_ALTIVEC operates on 6 and 2 SIMD
// words, which is 12 and 4 cipher blocks. The function will do the right
// thing even if there is only one 64-bit block to encrypt.
// //
// * AdvancedProcessBlocks64_2x1_SSE // * AdvancedProcessBlocks64_2x1_SSE
// * AdvancedProcessBlocks64_4x1_SSE // * AdvancedProcessBlocks64_4x1_SSE
@ -1640,7 +1646,7 @@ inline size_t AdvancedProcessBlocks64_4x1_SSE(F1 func1, F4 func4,
if (flags & BT_AllowParallel) if (flags & BT_AllowParallel)
{ {
while (length >= 4 * xmmBlockSize) while (length >= 4*xmmBlockSize)
{ {
__m128i block0, block1, block2, block3; __m128i block0, block1, block2, block3;
if (flags & BT_InBlockIsCounter) if (flags & BT_InBlockIsCounter)
@ -1713,7 +1719,7 @@ inline size_t AdvancedProcessBlocks64_4x1_SSE(F1 func1, F4 func4,
_mm_storeu_si128(M128_CAST(outBlocks), block3); _mm_storeu_si128(M128_CAST(outBlocks), block3);
outBlocks = PtrAdd(outBlocks, outIncrement); outBlocks = PtrAdd(outBlocks, outIncrement);
length -= 4 * xmmBlockSize; length -= 4*xmmBlockSize;
} }
} }
@ -1859,6 +1865,7 @@ inline size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
block4 = VectorAdd(s_two, block3); block4 = VectorAdd(s_two, block3);
block5 = VectorAdd(s_two, block4); block5 = VectorAdd(s_two, block4);
// Update the counter in the caller.
const_cast<byte*>(inBlocks)[7] += 12; const_cast<byte*>(inBlocks)[7] += 12;
} }
else else
@ -1948,6 +1955,7 @@ inline size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
// increment by {2,2}. // increment by {2,2}.
block1 = VectorAdd(s_two, block0); block1 = VectorAdd(s_two, block0);
// Update the counter in the caller.
const_cast<byte*>(inBlocks)[7] += 4; const_cast<byte*>(inBlocks)[7] += 4;
} }
else else
@ -2022,6 +2030,7 @@ inline size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
block = VectorXor(block, x); block = VectorXor(block, x);
} }
// Update the counter in the caller.
if (flags & BT_InBlockIsCounter) if (flags & BT_InBlockIsCounter)
const_cast<byte *>(inBlocks)[7]++; const_cast<byte *>(inBlocks)[7]++;