Fix ODR violation in AdvancedProcessBlocks_{ARCH} (GH #585)

The ALTIVEC function required an inline declaration. Lack of inline caused the self test failure. Two NEON functions needed the same. We also cleaned up constants in unnamed namespaces
2018-02-20 13:17:05 -05:00 · 2018-02-20 13:17:05 -05:00 · 33c10bc027
parent b6fec08da1
commit 33c10bc027
2 changed files with 75 additions and 69 deletions
--- a/adv-simd.h
+++ b/adv-simd.h
@ -23,6 +23,7 @@
 //

 #ifndef CRYPTOPP_ADVANCED_SIMD_TEMPLATES
+#define CRYPTOPP_ADVANCED_SIMD_TEMPLATES

 #include "config.h"
 #include "misc.h"
@ -70,29 +71,6 @@ ANONYMOUS_NAMESPACE_END

 #if defined(CRYPTOPP_ARM_NEON_AVAILABLE)

-ANONYMOUS_NAMESPACE_BEGIN
-
-using CryptoPP::word32;
-using CryptoPP::word64;
-
-#if defined(CRYPTOPP_LITTLE_ENDIAN)
-const word32 s_zero32x4[]   = {0, 0, 0, 0};
-const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
-const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
-#else
-const word32 s_zero32x4[]   = {0, 0, 0, 0};
-const word32 s_one32x4_1b[] = {0, 0, 0, 1};
-const word32 s_one32x4_2b[] = {0, 2, 0, 2};
-#endif
-
-#if defined(CRYPTOPP_LITTLE_ENDIAN)
-const word32 s_one32x4[] = {0, 0, 0, 1<<24};
-#else
-const word32 s_one32x4[] = {0, 0, 0, 1};
-#endif
-
-ANONYMOUS_NAMESPACE_END
-
 NAMESPACE_BEGIN(CryptoPP)

 template <typename F2, typename F6>
@ -105,6 +83,18 @@ inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6,
    CRYPTOPP_ASSERT(outBlocks);
    CRYPTOPP_ASSERT(length >= 8);

+#if defined(CRYPTOPP_LITTLE_ENDIAN)
+    const word32 s_zero32x4[]   = {0, 0, 0, 0};
+    const word32 s_one32x4[]    = {0, 0, 0, 1<<24};
+    const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
+    const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
+#else
+    const word32 s_zero32x4[]   = {0, 0, 0, 0};
+    const word32 s_one32x4[]    = {0, 0, 0, 1};
+    const word32 s_one32x4_1b[] = {0, 0, 0, 1};
+    const word32 s_one32x4_2b[] = {0, 2, 0, 2};
+#endif
+
    const ptrdiff_t blockSize = 8;
    const ptrdiff_t neonBlockSize = 16;

@ -328,7 +318,7 @@ inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6,
 }

 template <typename F1, typename F6>
-size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6,
+inline size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6,
            const word32 *subKeys, size_t rounds, const byte *inBlocks,
            const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
 {
@ -337,6 +327,18 @@ size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6,
    CRYPTOPP_ASSERT(outBlocks);
    CRYPTOPP_ASSERT(length >= 16);

+#if defined(CRYPTOPP_LITTLE_ENDIAN)
+    const word32 s_zero32x4[]   = {0, 0, 0, 0};
+    const word32 s_one32x4[]    = {0, 0, 0, 1<<24};
+    const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
+    const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
+#else
+    const word32 s_zero32x4[]   = {0, 0, 0, 0};
+    const word32 s_one32x4[]    = {0, 0, 0, 1};
+    const word32 s_one32x4_1b[] = {0, 0, 0, 1};
+    const word32 s_one32x4_2b[] = {0, 2, 0, 2};
+#endif
+
    const ptrdiff_t blockSize = 16;
    // const ptrdiff_t neonBlockSize = 16;

@ -471,7 +473,7 @@ size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6,
 }

 template <typename F2, typename F6>
-size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6,
+inline size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6,
            const word64 *subKeys, size_t rounds, const byte *inBlocks,
            const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
 {
@ -480,6 +482,18 @@ size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6,
    CRYPTOPP_ASSERT(outBlocks);
    CRYPTOPP_ASSERT(length >= 16);

+#if defined(CRYPTOPP_LITTLE_ENDIAN)
+    const word32 s_zero32x4[]   = {0, 0, 0, 0};
+    const word32 s_one32x4[]    = {0, 0, 0, 1<<24};
+    const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
+    const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
+#else
+    const word32 s_zero32x4[]   = {0, 0, 0, 0};
+    const word32 s_one32x4[]    = {0, 0, 0, 1};
+    const word32 s_one32x4_1b[] = {0, 0, 0, 1};
+    const word32 s_one32x4_2b[] = {0, 2, 0, 2};
+#endif
+
    const ptrdiff_t blockSize = 16;
    // const ptrdiff_t neonBlockSize = 16;

@ -692,21 +706,6 @@ NAMESPACE_END  // CryptoPP
 # define CONST_DOUBLE_CAST(x) ((const double *)(const void *)(x))
 #endif

-ANONYMOUS_NAMESPACE_BEGIN
-
-using CryptoPP::word32;
-using CryptoPP::word64;
-
-CRYPTOPP_ALIGN_DATA(16)
-const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
-CRYPTOPP_ALIGN_DATA(16)
-const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
-
-CRYPTOPP_ALIGN_DATA(16)
-const word32 s_one32x4[] = {0, 0, 0, 1<<24};
-
-ANONYMOUS_NAMESPACE_END
-
 NAMESPACE_BEGIN(CryptoPP)

 template <typename F2, typename F6>
@ -719,6 +718,13 @@ inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6,
    CRYPTOPP_ASSERT(outBlocks);
    CRYPTOPP_ASSERT(length >= 8);

+    CRYPTOPP_ALIGN_DATA(16)
+    const word32 s_one32x4[] = {0, 0, 0, 1<<24};
+    CRYPTOPP_ALIGN_DATA(16)
+    const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
+    CRYPTOPP_ALIGN_DATA(16)
+    const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
+
    const ptrdiff_t blockSize = 8;
    const ptrdiff_t xmmBlockSize = 16;

@ -953,6 +959,13 @@ inline size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6,
    CRYPTOPP_ASSERT(outBlocks);
    CRYPTOPP_ASSERT(length >= 16);

+    CRYPTOPP_ALIGN_DATA(16)
+    const word32 s_one32x4[] = {0, 0, 0, 1<<24};
+    CRYPTOPP_ALIGN_DATA(16)
+    const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
+    CRYPTOPP_ALIGN_DATA(16)
+    const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
+
    const ptrdiff_t blockSize = 16;
    // const ptrdiff_t xmmBlockSize = 16;

@ -1138,6 +1151,13 @@ inline size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4,
    CRYPTOPP_ASSERT(outBlocks);
    CRYPTOPP_ASSERT(length >= 16);

+    CRYPTOPP_ALIGN_DATA(16)
+    const word32 s_one32x4[] = {0, 0, 0, 1<<24};
+    CRYPTOPP_ALIGN_DATA(16)
+    const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
+    CRYPTOPP_ALIGN_DATA(16)
+    const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
+
    const ptrdiff_t blockSize = 16;
    // const ptrdiff_t xmmBlockSize = 16;

@ -1258,29 +1278,26 @@ NAMESPACE_END  // CryptoPP

 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE)

-ANONYMOUS_NAMESPACE_BEGIN
-
-using CryptoPP::uint32x4_p;
-
-#if defined(CRYPTOPP_LITTLE_ENDIAN)
-const uint32x4_p s_one = {1,0,0,0};
-#else
-const uint32x4_p s_one = {0,0,0,1};
-#endif
-
-ANONYMOUS_NAMESPACE_END
-
 NAMESPACE_BEGIN(CryptoPP)

 template <typename F1, typename F6>
-size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6, const word32 *subKeys, size_t rounds,
-    const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
+inline size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6,
+        const word32 *subKeys, size_t rounds, const byte *inBlocks,
+        const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
 {
    CRYPTOPP_ASSERT(subKeys);
    CRYPTOPP_ASSERT(inBlocks);
    CRYPTOPP_ASSERT(outBlocks);
    CRYPTOPP_ASSERT(length >= 16);

+#if defined(CRYPTOPP_LITTLE_ENDIAN)
+    const uint32x4_p s_zero = {0,0,0,0};
+    const uint32x4_p s_one  = {1,0,0,0};
+#else
+    const uint32x4_p s_zero = {0,0,0,0};
+    const uint32x4_p s_one = {0,0,0,1};
+#endif
+
    const ptrdiff_t blockSize = 16;
    // const ptrdiff_t vexBlockSize = 16;

--- a/rijndael-simd.cpp
+++ b/rijndael-simd.cpp
@ -547,15 +547,6 @@ static const uint32_t s_rconBE[] = {
    0x1B000000, 0x36000000
 };

-/* Permute mask */
-static const uint32_t s_mask[4] = {
-#if defined(CRYPTOPP_LITTLE_ENDIAN)
-    0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
-#else
-    0x0d0e0f0c,0x0d0e0f0c,0x0d0e0f0c,0x0d0e0f0c
-#endif
-};
-
 static inline void POWER8_Enc_Block(uint32x4_p &block, const word32 *subkeys, unsigned int rounds)
 {
    CRYPTOPP_ASSERT(IsAlignedOn(subkeys, 16));
@ -713,12 +704,10 @@ void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen, word32*
    unsigned int i=0;
    for (i=0; i<rounds; i+=2, rk+=8)
    {
-        uint8x16_p d1 = vec_vsx_ld( 0, (uint8_t*)rk);
-        uint8x16_p d2 = vec_vsx_ld(16, (uint8_t*)rk);
-        d1 = vec_perm(d1, zero, mask);
-        d2 = vec_perm(d2, zero, mask);
-        vec_vsx_st(d1,  0, (uint8_t*)rk);
-        vec_vsx_st(d2, 16, (uint8_t*)rk);
+        const uint8x16_p d1 = vec_vsx_ld( 0, (uint8_t*)rk);
+        const uint8x16_p d2 = vec_vsx_ld(16, (uint8_t*)rk);
+        vec_vsx_st(vec_perm(d1, zero, mask),  0, (uint8_t*)rk);
+        vec_vsx_st(vec_perm(d2, zero, mask), 16, (uint8_t*)rk);
    }

    for ( ; i<rounds+1; i++, rk+=4)