Update comments

2018-08-14 20:04:28 -04:00 · 2018-08-14 20:04:28 -04:00 · 9a78b92429
parent cf0c487c2a
commit 9a78b92429
1 changed files with 0 additions and 60 deletions
--- a/simon-simd.cpp
+++ b/simon-simd.cpp
@ -140,10 +140,6 @@ inline uint32x4_t SIMON64_f(const uint32x4_t& val)
 inline void SIMON64_Enc_Block(uint32x4_t &block1, uint32x4_t &block0,
    const word32 *subkeys, unsigned int rounds)
 {
-    // Rearrange the data for vectorization. The incoming data was read into
-    // a little-endian word array. Depending on the number of blocks it needs to
-    // be permuted to the following. If only a single block is available then
-    // a Zero block is provided to promote vectorizations.
    // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
    uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
    uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
@ -173,10 +169,6 @@ inline void SIMON64_Enc_Block(uint32x4_t &block1, uint32x4_t &block0,
 inline void SIMON64_Dec_Block(uint32x4_t &block0, uint32x4_t &block1,
    const word32 *subkeys, unsigned int rounds)
 {
-    // Rearrange the data for vectorization. The incoming data was read into
-    // a little-endian word array. Depending on the number of blocks it needs to
-    // be permuted to the following. If only a single block is available then
-    // a Zero block is provided to promote vectorizations.
    // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
    uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
    uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
@ -208,10 +200,6 @@ inline void SIMON64_Enc_6_Blocks(uint32x4_t &block0, uint32x4_t &block1,
    uint32x4_t &block2, uint32x4_t &block3, uint32x4_t &block4, uint32x4_t &block5,
    const word32 *subkeys, unsigned int rounds)
 {
-    // Rearrange the data for vectorization. The incoming data was read into
-    // a little-endian word array. Depending on the number of blocks it needs to
-    // be permuted to the following. If only a single block is available then
-    // a Zero block is provided to promote vectorizations.
    // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
    uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
    uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
@ -256,10 +244,6 @@ inline void SIMON64_Dec_6_Blocks(uint32x4_t &block0, uint32x4_t &block1,
    uint32x4_t &block2, uint32x4_t &block3, uint32x4_t &block4, uint32x4_t &block5,
    const word32 *subkeys, unsigned int rounds)
 {
-    // Rearrange the data for vectorization. The incoming data was read into
-    // a little-endian word array. Depending on the number of blocks it needs to
-    // be permuted to the following. If only a single block is available then
-    // a Zero block is provided to promote vectorizations.
    // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
    uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
    uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
@ -380,9 +364,6 @@ inline uint64x2_t SIMON128_f(const uint64x2_t& val)
 inline void SIMON128_Enc_Block(uint64x2_t &block0, uint64x2_t &block1,
    const word64 *subkeys, unsigned int rounds)
 {
-    // Rearrange the data for vectorization. The incoming data was read into
-    // a little-endian word array. Depending on the number of blocks it needs to
-    // be permuted to the following.
    // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
    uint64x2_t x1 = UnpackHigh64(block0, block1);
    uint64x2_t y1 = UnpackLow64(block0, block1);
@ -413,9 +394,6 @@ inline void SIMON128_Enc_6_Blocks(uint64x2_t &block0, uint64x2_t &block1,
    uint64x2_t &block2, uint64x2_t &block3, uint64x2_t &block4, uint64x2_t &block5,
    const word64 *subkeys, unsigned int rounds)
 {
-    // Rearrange the data for vectorization. The incoming data was read into
-    // a little-endian word array. Depending on the number of blocks it needs to
-    // be permuted to the following.
    // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
    uint64x2_t x1 = UnpackHigh64(block0, block1);
    uint64x2_t y1 = UnpackLow64(block0, block1);
@ -459,9 +437,6 @@ inline void SIMON128_Enc_6_Blocks(uint64x2_t &block0, uint64x2_t &block1,
 inline void SIMON128_Dec_Block(uint64x2_t &block0, uint64x2_t &block1,
    const word64 *subkeys, unsigned int rounds)
 {
-    // Rearrange the data for vectorization. The incoming data was read into
-    // a little-endian word array. Depending on the number of blocks it needs to
-    // be permuted to the following.
    // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
    uint64x2_t x1 = UnpackHigh64(block0, block1);
    uint64x2_t y1 = UnpackLow64(block0, block1);
@ -493,9 +468,6 @@ inline void SIMON128_Dec_6_Blocks(uint64x2_t &block0, uint64x2_t &block1,
    uint64x2_t &block2, uint64x2_t &block3, uint64x2_t &block4, uint64x2_t &block5,
    const word64 *subkeys, unsigned int rounds)
 {
-    // Rearrange the data for vectorization. The incoming data was read into
-    // a little-endian word array. Depending on the number of blocks it needs to
-    // be permuted to the following.
    // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
    uint64x2_t x1 = UnpackHigh64(block0, block1);
    uint64x2_t y1 = UnpackLow64(block0, block1);
@ -617,9 +589,6 @@ inline __m128i SIMON128_f(const __m128i& v)
 inline void SIMON128_Enc_Block(__m128i &block0, __m128i &block1,
    const word64 *subkeys, unsigned int rounds)
 {
-    // Rearrange the data for vectorization. The incoming data was read into
-    // a little-endian word array. Depending on the number of blocks it needs to
-    // be permuted to the following.
    // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
    __m128i x1 = _mm_unpackhi_epi64(block0, block1);
    __m128i y1 = _mm_unpacklo_epi64(block0, block1);
@ -653,9 +622,6 @@ inline void SIMON128_Enc_6_Blocks(__m128i &block0, __m128i &block1,
    __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
    const word64 *subkeys, unsigned int rounds)
 {
-    // Rearrange the data for vectorization. The incoming data was read into
-    // a little-endian word array. Depending on the number of blocks it needs to
-    // be permuted to the following.
    // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
    __m128i x1 = _mm_unpackhi_epi64(block0, block1);
    __m128i y1 = _mm_unpacklo_epi64(block0, block1);
@ -701,9 +667,6 @@ inline void SIMON128_Enc_6_Blocks(__m128i &block0, __m128i &block1,
 inline void SIMON128_Dec_Block(__m128i &block0, __m128i &block1,
    const word64 *subkeys, unsigned int rounds)
 {
-    // Rearrange the data for vectorization. The incoming data was read into
-    // a little-endian word array. Depending on the number of blocks it needs to
-    // be permuted to the following.
    // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
    __m128i x1 = _mm_unpackhi_epi64(block0, block1);
    __m128i y1 = _mm_unpacklo_epi64(block0, block1);
@ -738,9 +701,6 @@ inline void SIMON128_Dec_6_Blocks(__m128i &block0, __m128i &block1,
    __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
    const word64 *subkeys, unsigned int rounds)
 {
-    // Rearrange the data for vectorization. The incoming data was read into
-    // a little-endian word array. Depending on the number of blocks it needs to
-    // be permuted to the following.
    // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
    __m128i x1 = _mm_unpackhi_epi64(block0, block1);
    __m128i y1 = _mm_unpacklo_epi64(block0, block1);
@ -828,10 +788,6 @@ inline __m128i SIMON64_f(const __m128i& v)
 inline void SIMON64_Enc_Block(__m128i &block0, __m128i &block1,
    const word32 *subkeys, unsigned int rounds)
 {
-    // Rearrange the data for vectorization. The incoming data was read into
-    // a little-endian word array. Depending on the number of blocks it needs to
-    // be permuted to the following. Thanks to Peter Cordes for help with the
-    // SSE permutes below.
    // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
    const __m128 t0 = _mm_castsi128_ps(block0);
    const __m128 t1 = _mm_castsi128_ps(block1);
@ -854,7 +810,6 @@ inline void SIMON64_Enc_Block(__m128i &block0, __m128i &block1,
        Swap128(x1, y1);
    }

-    // The is roughly the SSE equivalent to ARM vzp32
    // [A1 A3 B1 B3][A2 A4 B2 B4] => [A1 A2 A3 A4][B1 B2 B3 B4]
    block0 = _mm_unpacklo_epi32(y1, x1);
    block1 = _mm_unpackhi_epi32(y1, x1);
@ -863,10 +818,6 @@ inline void SIMON64_Enc_Block(__m128i &block0, __m128i &block1,
 inline void SIMON64_Dec_Block(__m128i &block0, __m128i &block1,
    const word32 *subkeys, unsigned int rounds)
 {
-    // Rearrange the data for vectorization. The incoming data was read into
-    // a little-endian word array. Depending on the number of blocks it needs to
-    // be permuted to the following. Thanks to Peter Cordes for help with the
-    // SSE permutes below.
    // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
    const __m128 t0 = _mm_castsi128_ps(block0);
    const __m128 t1 = _mm_castsi128_ps(block1);
@ -890,7 +841,6 @@ inline void SIMON64_Dec_Block(__m128i &block0, __m128i &block1,
        y1 = _mm_xor_si128(_mm_xor_si128(y1, SIMON64_f(x1)), rk2);
    }

-    // The is roughly the SSE equivalent to ARM vzp32
    // [A1 A3 B1 B3][A2 A4 B2 B4] => [A1 A2 A3 A4][B1 B2 B3 B4]
    block0 = _mm_unpacklo_epi32(y1, x1);
    block1 = _mm_unpackhi_epi32(y1, x1);
@ -900,10 +850,6 @@ inline void SIMON64_Enc_6_Blocks(__m128i &block0, __m128i &block1,
    __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
    const word32 *subkeys, unsigned int rounds)
 {
-    // Rearrange the data for vectorization. The incoming data was read into
-    // a little-endian word array. Depending on the number of blocks it needs to
-    // be permuted to the following. Thanks to Peter Cordes for help with the
-    // SSE permutes below.
    // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
    const __m128 t0 = _mm_castsi128_ps(block0);
    const __m128 t1 = _mm_castsi128_ps(block1);
@ -942,7 +888,6 @@ inline void SIMON64_Enc_6_Blocks(__m128i &block0, __m128i &block1,
        Swap128(x1, y1); Swap128(x2, y2); Swap128(x3, y3);
    }

-    // The is roughly the SSE equivalent to ARM vzp32
    // [A1 A3 B1 B3][A2 A4 B2 B4] => [A1 A2 A3 A4][B1 B2 B3 B4]
    block0 = _mm_unpacklo_epi32(y1, x1);
    block1 = _mm_unpackhi_epi32(y1, x1);
@ -956,10 +901,6 @@ inline void SIMON64_Dec_6_Blocks(__m128i &block0, __m128i &block1,
    __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
    const word32 *subkeys, unsigned int rounds)
 {
-    // Rearrange the data for vectorization. The incoming data was read into
-    // a little-endian word array. Depending on the number of blocks it needs to
-    // be permuted to the following. Thanks to Peter Cordes for help with the
-    // SSE permutes below.
    // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
    const __m128 t0 = _mm_castsi128_ps(block0);
    const __m128 t1 = _mm_castsi128_ps(block1);
@ -999,7 +940,6 @@ inline void SIMON64_Dec_6_Blocks(__m128i &block0, __m128i &block1,
        y3 = _mm_xor_si128(_mm_xor_si128(y3, SIMON64_f(x3)), rk2);
    }

-    // The is roughly the SSE equivalent to ARM vzp32
    // [A1 A3 B1 B3][A2 A4 B2 B4] => [A1 A2 A3 A4][B1 B2 B3 B4]
    block0 = _mm_unpacklo_epi32(y1, x1);
    block1 = _mm_unpackhi_epi32(y1, x1);