Update comments
parent
d221336f42
commit
93149e4c25
23
adv-simd.h
23
adv-simd.h
|
|
@ -9,11 +9,17 @@
|
||||||
// acceleration. After several implementations we noticed a lot of copy and
|
// acceleration. After several implementations we noticed a lot of copy and
|
||||||
// paste occuring. adv-simd.h provides a template to avoid the copy and paste.
|
// paste occuring. adv-simd.h provides a template to avoid the copy and paste.
|
||||||
//
|
//
|
||||||
// There are 10 templates provided in this file. The number following the
|
// There are 11 templates provided in this file. The number following the
|
||||||
// function name is the block size of the cipher. The name following that
|
// function name, 64 or 128, is the block size. The name following the block
|
||||||
// is the acceleration and arrangement. For example 4x1_SSE means Intel SSE
|
// size is the arrangement and acceleration. For example 4x1_SSE means Intel
|
||||||
// using two encrypt (or decrypt) functions: one that operates on 4 blocks,
|
// SSE using two encrypt (or decrypt) functions: one that operates on 4 SIMD
|
||||||
// and one that operates on 1 block.
|
// words, and one that operates on 1 SIMD words.
|
||||||
|
//
|
||||||
|
// The distinction between SIMD words versus cipher blocks is important
|
||||||
|
// because 64-bit ciphers use two cipher blocks for one SIMD word. For
|
||||||
|
// example, AdvancedProcessBlocks64_6x2_ALTIVEC operates on 6 and 2 SIMD
|
||||||
|
// words, which is 12 and 4 cipher blocks. The function will do the right
|
||||||
|
// thing even if there is only one 64-bit block to encrypt.
|
||||||
//
|
//
|
||||||
// * AdvancedProcessBlocks64_2x1_SSE
|
// * AdvancedProcessBlocks64_2x1_SSE
|
||||||
// * AdvancedProcessBlocks64_4x1_SSE
|
// * AdvancedProcessBlocks64_4x1_SSE
|
||||||
|
|
@ -1640,7 +1646,7 @@ inline size_t AdvancedProcessBlocks64_4x1_SSE(F1 func1, F4 func4,
|
||||||
|
|
||||||
if (flags & BT_AllowParallel)
|
if (flags & BT_AllowParallel)
|
||||||
{
|
{
|
||||||
while (length >= 4 * xmmBlockSize)
|
while (length >= 4*xmmBlockSize)
|
||||||
{
|
{
|
||||||
__m128i block0, block1, block2, block3;
|
__m128i block0, block1, block2, block3;
|
||||||
if (flags & BT_InBlockIsCounter)
|
if (flags & BT_InBlockIsCounter)
|
||||||
|
|
@ -1713,7 +1719,7 @@ inline size_t AdvancedProcessBlocks64_4x1_SSE(F1 func1, F4 func4,
|
||||||
_mm_storeu_si128(M128_CAST(outBlocks), block3);
|
_mm_storeu_si128(M128_CAST(outBlocks), block3);
|
||||||
outBlocks = PtrAdd(outBlocks, outIncrement);
|
outBlocks = PtrAdd(outBlocks, outIncrement);
|
||||||
|
|
||||||
length -= 4 * xmmBlockSize;
|
length -= 4*xmmBlockSize;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1859,6 +1865,7 @@ inline size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
|
||||||
block4 = VectorAdd(s_two, block3);
|
block4 = VectorAdd(s_two, block3);
|
||||||
block5 = VectorAdd(s_two, block4);
|
block5 = VectorAdd(s_two, block4);
|
||||||
|
|
||||||
|
// Update the counter in the caller.
|
||||||
const_cast<byte*>(inBlocks)[7] += 12;
|
const_cast<byte*>(inBlocks)[7] += 12;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
@ -1948,6 +1955,7 @@ inline size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
|
||||||
// increment by {2,2}.
|
// increment by {2,2}.
|
||||||
block1 = VectorAdd(s_two, block0);
|
block1 = VectorAdd(s_two, block0);
|
||||||
|
|
||||||
|
// Update the counter in the caller.
|
||||||
const_cast<byte*>(inBlocks)[7] += 4;
|
const_cast<byte*>(inBlocks)[7] += 4;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
@ -2022,6 +2030,7 @@ inline size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
|
||||||
block = VectorXor(block, x);
|
block = VectorXor(block, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update the counter in the caller.
|
||||||
if (flags & BT_InBlockIsCounter)
|
if (flags & BT_InBlockIsCounter)
|
||||||
const_cast<byte *>(inBlocks)[7]++;
|
const_cast<byte *>(inBlocks)[7]++;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue