Update documentation
parent
d0b5dac162
commit
527613df22
|
|
@ -495,11 +495,12 @@ inline size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6,
|
||||||
/// \tparam F4 function to process 4 128-bit blocks
|
/// \tparam F4 function to process 4 128-bit blocks
|
||||||
/// \tparam W word type of the subkey table
|
/// \tparam W word type of the subkey table
|
||||||
/// \tparam V vector type of the NEON datatype
|
/// \tparam V vector type of the NEON datatype
|
||||||
/// \details AdvancedProcessBlocks128_6x2_NEON processes 4 and 1 NEON SIMD words
|
/// \details AdvancedProcessBlocks128_4x1_NEON processes 4 and 1 NEON SIMD words
|
||||||
/// at a time.
|
/// at a time.
|
||||||
/// \details The subkey type is usually word32 or word64. V is the vector type and it is
|
/// \details The subkey type is usually word32 or word64. V is the vector type and it is
|
||||||
/// usually uint32x4_t or uint64x2_t. F1, F4, W and V must use the same word and
|
/// usually uint32x4_t or uint64x2_t. F1, F4, W and V must use the same word and
|
||||||
/// vector type.
|
/// vector type. The V parameter is used to avoid template argument
|
||||||
|
/// deduction/substitution failures.
|
||||||
template <typename F1, typename F4, typename W, typename V>
|
template <typename F1, typename F4, typename W, typename V>
|
||||||
inline size_t AdvancedProcessBlocks128_4x1_NEON(F1 func1, F4 func4,
|
inline size_t AdvancedProcessBlocks128_4x1_NEON(F1 func1, F4 func4,
|
||||||
const V& unused, const W *subKeys, size_t rounds, const byte *inBlocks,
|
const V& unused, const W *subKeys, size_t rounds, const byte *inBlocks,
|
||||||
|
|
|
||||||
|
|
@ -83,7 +83,7 @@ inline __m128i UnpackXMM<0>(const __m128i& a, const __m128i& b, const __m128i& c
|
||||||
{
|
{
|
||||||
// The shuffle converts to and from little-endian for SSE. A specialized
|
// The shuffle converts to and from little-endian for SSE. A specialized
|
||||||
// CHAM implementation can avoid the shuffle by framing the data for
|
// CHAM implementation can avoid the shuffle by framing the data for
|
||||||
// encryption, decrementryption and benchmarks. The library cannot take the
|
// encryption, decryption and benchmarks. The library cannot take the
|
||||||
// speed-up because of the byte oriented API.
|
// speed-up because of the byte oriented API.
|
||||||
const __m128i r1 = _mm_unpacklo_epi16(a, b);
|
const __m128i r1 = _mm_unpacklo_epi16(a, b);
|
||||||
const __m128i r2 = _mm_unpacklo_epi16(c, d);
|
const __m128i r2 = _mm_unpacklo_epi16(c, d);
|
||||||
|
|
@ -102,7 +102,7 @@ inline __m128i UnpackXMM<1>(const __m128i& a, const __m128i& b, const __m128i& c
|
||||||
{
|
{
|
||||||
// The shuffle converts to and from little-endian for SSE. A specialized
|
// The shuffle converts to and from little-endian for SSE. A specialized
|
||||||
// CHAM implementation can avoid the shuffle by framing the data for
|
// CHAM implementation can avoid the shuffle by framing the data for
|
||||||
// encryption, decrementryption and benchmarks. The library cannot take the
|
// encryption, decryption and benchmarks. The library cannot take the
|
||||||
// speed-up because of the byte oriented API.
|
// speed-up because of the byte oriented API.
|
||||||
const __m128i r1 = _mm_unpacklo_epi16(a, b);
|
const __m128i r1 = _mm_unpacklo_epi16(a, b);
|
||||||
const __m128i r2 = _mm_unpacklo_epi16(c, d);
|
const __m128i r2 = _mm_unpacklo_epi16(c, d);
|
||||||
|
|
@ -121,7 +121,7 @@ inline __m128i UnpackXMM<2>(const __m128i& a, const __m128i& b, const __m128i& c
|
||||||
{
|
{
|
||||||
// The shuffle converts to and from little-endian for SSE. A specialized
|
// The shuffle converts to and from little-endian for SSE. A specialized
|
||||||
// CHAM implementation can avoid the shuffle by framing the data for
|
// CHAM implementation can avoid the shuffle by framing the data for
|
||||||
// encryption, decrementryption and benchmarks. The library cannot take the
|
// encryption, decryption and benchmarks. The library cannot take the
|
||||||
// speed-up because of the byte oriented API.
|
// speed-up because of the byte oriented API.
|
||||||
const __m128i r1 = _mm_unpacklo_epi16(a, b);
|
const __m128i r1 = _mm_unpacklo_epi16(a, b);
|
||||||
const __m128i r2 = _mm_unpacklo_epi16(c, d);
|
const __m128i r2 = _mm_unpacklo_epi16(c, d);
|
||||||
|
|
@ -140,7 +140,7 @@ inline __m128i UnpackXMM<3>(const __m128i& a, const __m128i& b, const __m128i& c
|
||||||
{
|
{
|
||||||
// The shuffle converts to and from little-endian for SSE. A specialized
|
// The shuffle converts to and from little-endian for SSE. A specialized
|
||||||
// CHAM implementation can avoid the shuffle by framing the data for
|
// CHAM implementation can avoid the shuffle by framing the data for
|
||||||
// encryption, decrementryption and benchmarks. The library cannot take the
|
// encryption, decryption and benchmarks. The library cannot take the
|
||||||
// speed-up because of the byte oriented API.
|
// speed-up because of the byte oriented API.
|
||||||
const __m128i r1 = _mm_unpacklo_epi16(a, b);
|
const __m128i r1 = _mm_unpacklo_epi16(a, b);
|
||||||
const __m128i r2 = _mm_unpacklo_epi16(c, d);
|
const __m128i r2 = _mm_unpacklo_epi16(c, d);
|
||||||
|
|
@ -159,7 +159,7 @@ inline __m128i UnpackXMM<4>(const __m128i& a, const __m128i& b, const __m128i& c
|
||||||
{
|
{
|
||||||
// The shuffle converts to and from little-endian for SSE. A specialized
|
// The shuffle converts to and from little-endian for SSE. A specialized
|
||||||
// CHAM implementation can avoid the shuffle by framing the data for
|
// CHAM implementation can avoid the shuffle by framing the data for
|
||||||
// encryption, decrementryption and benchmarks. The library cannot take the
|
// encryption, decryption and benchmarks. The library cannot take the
|
||||||
// speed-up because of the byte oriented API.
|
// speed-up because of the byte oriented API.
|
||||||
const __m128i r1 = _mm_unpackhi_epi16(a, b);
|
const __m128i r1 = _mm_unpackhi_epi16(a, b);
|
||||||
const __m128i r2 = _mm_unpackhi_epi16(c, d);
|
const __m128i r2 = _mm_unpackhi_epi16(c, d);
|
||||||
|
|
@ -178,7 +178,7 @@ inline __m128i UnpackXMM<5>(const __m128i& a, const __m128i& b, const __m128i& c
|
||||||
{
|
{
|
||||||
// The shuffle converts to and from little-endian for SSE. A specialized
|
// The shuffle converts to and from little-endian for SSE. A specialized
|
||||||
// CHAM implementation can avoid the shuffle by framing the data for
|
// CHAM implementation can avoid the shuffle by framing the data for
|
||||||
// encryption, decrementryption and benchmarks. The library cannot take the
|
// encryption, decryption and benchmarks. The library cannot take the
|
||||||
// speed-up because of the byte oriented API.
|
// speed-up because of the byte oriented API.
|
||||||
const __m128i r1 = _mm_unpackhi_epi16(a, b);
|
const __m128i r1 = _mm_unpackhi_epi16(a, b);
|
||||||
const __m128i r2 = _mm_unpackhi_epi16(c, d);
|
const __m128i r2 = _mm_unpackhi_epi16(c, d);
|
||||||
|
|
@ -197,7 +197,7 @@ inline __m128i UnpackXMM<6>(const __m128i& a, const __m128i& b, const __m128i& c
|
||||||
{
|
{
|
||||||
// The shuffle converts to and from little-endian for SSE. A specialized
|
// The shuffle converts to and from little-endian for SSE. A specialized
|
||||||
// CHAM implementation can avoid the shuffle by framing the data for
|
// CHAM implementation can avoid the shuffle by framing the data for
|
||||||
// encryption, decrementryption and benchmarks. The library cannot take the
|
// encryption, decryption and benchmarks. The library cannot take the
|
||||||
// speed-up because of the byte oriented API.
|
// speed-up because of the byte oriented API.
|
||||||
const __m128i r1 = _mm_unpackhi_epi16(a, b);
|
const __m128i r1 = _mm_unpackhi_epi16(a, b);
|
||||||
const __m128i r2 = _mm_unpackhi_epi16(c, d);
|
const __m128i r2 = _mm_unpackhi_epi16(c, d);
|
||||||
|
|
@ -216,7 +216,7 @@ inline __m128i UnpackXMM<7>(const __m128i& a, const __m128i& b, const __m128i& c
|
||||||
{
|
{
|
||||||
// The shuffle converts to and from little-endian for SSE. A specialized
|
// The shuffle converts to and from little-endian for SSE. A specialized
|
||||||
// CHAM implementation can avoid the shuffle by framing the data for
|
// CHAM implementation can avoid the shuffle by framing the data for
|
||||||
// encryption, decrementryption and benchmarks. The library cannot take the
|
// encryption, decryption and benchmarks. The library cannot take the
|
||||||
// speed-up because of the byte oriented API.
|
// speed-up because of the byte oriented API.
|
||||||
const __m128i r1 = _mm_unpackhi_epi16(a, b);
|
const __m128i r1 = _mm_unpackhi_epi16(a, b);
|
||||||
const __m128i r2 = _mm_unpackhi_epi16(c, d);
|
const __m128i r2 = _mm_unpackhi_epi16(c, d);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue