Fix ODR violation in AdvancedProcessBlocks_{ARCH} (GH #585)

The ALTIVEC function required an inline declaration. Lack of inline caused the self test failure. Two NEON functions needed the same. We also cleaned up constants in unnamed namespaces
pull/594/head
Jeffrey Walton 2018-02-20 13:17:05 -05:00
parent b6fec08da1
commit 33c10bc027
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
2 changed files with 75 additions and 69 deletions

View File

@ -23,6 +23,7 @@
//
#ifndef CRYPTOPP_ADVANCED_SIMD_TEMPLATES
#define CRYPTOPP_ADVANCED_SIMD_TEMPLATES
#include "config.h"
#include "misc.h"
@ -70,29 +71,6 @@ ANONYMOUS_NAMESPACE_END
#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
ANONYMOUS_NAMESPACE_BEGIN
using CryptoPP::word32;
using CryptoPP::word64;
#if defined(CRYPTOPP_LITTLE_ENDIAN)
const word32 s_zero32x4[] = {0, 0, 0, 0};
const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
#else
const word32 s_zero32x4[] = {0, 0, 0, 0};
const word32 s_one32x4_1b[] = {0, 0, 0, 1};
const word32 s_one32x4_2b[] = {0, 2, 0, 2};
#endif
#if defined(CRYPTOPP_LITTLE_ENDIAN)
const word32 s_one32x4[] = {0, 0, 0, 1<<24};
#else
const word32 s_one32x4[] = {0, 0, 0, 1};
#endif
ANONYMOUS_NAMESPACE_END
NAMESPACE_BEGIN(CryptoPP)
template <typename F2, typename F6>
@ -105,6 +83,18 @@ inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6,
CRYPTOPP_ASSERT(outBlocks);
CRYPTOPP_ASSERT(length >= 8);
#if defined(CRYPTOPP_LITTLE_ENDIAN)
const word32 s_zero32x4[] = {0, 0, 0, 0};
const word32 s_one32x4[] = {0, 0, 0, 1<<24};
const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
#else
const word32 s_zero32x4[] = {0, 0, 0, 0};
const word32 s_one32x4[] = {0, 0, 0, 1};
const word32 s_one32x4_1b[] = {0, 0, 0, 1};
const word32 s_one32x4_2b[] = {0, 2, 0, 2};
#endif
const ptrdiff_t blockSize = 8;
const ptrdiff_t neonBlockSize = 16;
@ -328,7 +318,7 @@ inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6,
}
template <typename F1, typename F6>
size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6,
inline size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6,
const word32 *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@ -337,6 +327,18 @@ size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6,
CRYPTOPP_ASSERT(outBlocks);
CRYPTOPP_ASSERT(length >= 16);
#if defined(CRYPTOPP_LITTLE_ENDIAN)
const word32 s_zero32x4[] = {0, 0, 0, 0};
const word32 s_one32x4[] = {0, 0, 0, 1<<24};
const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
#else
const word32 s_zero32x4[] = {0, 0, 0, 0};
const word32 s_one32x4[] = {0, 0, 0, 1};
const word32 s_one32x4_1b[] = {0, 0, 0, 1};
const word32 s_one32x4_2b[] = {0, 2, 0, 2};
#endif
const ptrdiff_t blockSize = 16;
// const ptrdiff_t neonBlockSize = 16;
@ -471,7 +473,7 @@ size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6,
}
template <typename F2, typename F6>
size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6,
inline size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6,
const word64 *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@ -480,6 +482,18 @@ size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6,
CRYPTOPP_ASSERT(outBlocks);
CRYPTOPP_ASSERT(length >= 16);
#if defined(CRYPTOPP_LITTLE_ENDIAN)
const word32 s_zero32x4[] = {0, 0, 0, 0};
const word32 s_one32x4[] = {0, 0, 0, 1<<24};
const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
#else
const word32 s_zero32x4[] = {0, 0, 0, 0};
const word32 s_one32x4[] = {0, 0, 0, 1};
const word32 s_one32x4_1b[] = {0, 0, 0, 1};
const word32 s_one32x4_2b[] = {0, 2, 0, 2};
#endif
const ptrdiff_t blockSize = 16;
// const ptrdiff_t neonBlockSize = 16;
@ -692,21 +706,6 @@ NAMESPACE_END // CryptoPP
# define CONST_DOUBLE_CAST(x) ((const double *)(const void *)(x))
#endif
ANONYMOUS_NAMESPACE_BEGIN
using CryptoPP::word32;
using CryptoPP::word64;
CRYPTOPP_ALIGN_DATA(16)
const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
CRYPTOPP_ALIGN_DATA(16)
const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
CRYPTOPP_ALIGN_DATA(16)
const word32 s_one32x4[] = {0, 0, 0, 1<<24};
ANONYMOUS_NAMESPACE_END
NAMESPACE_BEGIN(CryptoPP)
template <typename F2, typename F6>
@ -719,6 +718,13 @@ inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6,
CRYPTOPP_ASSERT(outBlocks);
CRYPTOPP_ASSERT(length >= 8);
CRYPTOPP_ALIGN_DATA(16)
const word32 s_one32x4[] = {0, 0, 0, 1<<24};
CRYPTOPP_ALIGN_DATA(16)
const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
CRYPTOPP_ALIGN_DATA(16)
const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
const ptrdiff_t blockSize = 8;
const ptrdiff_t xmmBlockSize = 16;
@ -953,6 +959,13 @@ inline size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6,
CRYPTOPP_ASSERT(outBlocks);
CRYPTOPP_ASSERT(length >= 16);
CRYPTOPP_ALIGN_DATA(16)
const word32 s_one32x4[] = {0, 0, 0, 1<<24};
CRYPTOPP_ALIGN_DATA(16)
const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
CRYPTOPP_ALIGN_DATA(16)
const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
const ptrdiff_t blockSize = 16;
// const ptrdiff_t xmmBlockSize = 16;
@ -1138,6 +1151,13 @@ inline size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4,
CRYPTOPP_ASSERT(outBlocks);
CRYPTOPP_ASSERT(length >= 16);
CRYPTOPP_ALIGN_DATA(16)
const word32 s_one32x4[] = {0, 0, 0, 1<<24};
CRYPTOPP_ALIGN_DATA(16)
const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
CRYPTOPP_ALIGN_DATA(16)
const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
const ptrdiff_t blockSize = 16;
// const ptrdiff_t xmmBlockSize = 16;
@ -1258,29 +1278,26 @@ NAMESPACE_END // CryptoPP
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE)
ANONYMOUS_NAMESPACE_BEGIN
using CryptoPP::uint32x4_p;
#if defined(CRYPTOPP_LITTLE_ENDIAN)
const uint32x4_p s_one = {1,0,0,0};
#else
const uint32x4_p s_one = {0,0,0,1};
#endif
ANONYMOUS_NAMESPACE_END
NAMESPACE_BEGIN(CryptoPP)
template <typename F1, typename F6>
size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6, const word32 *subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
inline size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6,
const word32 *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
CRYPTOPP_ASSERT(subKeys);
CRYPTOPP_ASSERT(inBlocks);
CRYPTOPP_ASSERT(outBlocks);
CRYPTOPP_ASSERT(length >= 16);
#if defined(CRYPTOPP_LITTLE_ENDIAN)
const uint32x4_p s_zero = {0,0,0,0};
const uint32x4_p s_one = {1,0,0,0};
#else
const uint32x4_p s_zero = {0,0,0,0};
const uint32x4_p s_one = {0,0,0,1};
#endif
const ptrdiff_t blockSize = 16;
// const ptrdiff_t vexBlockSize = 16;

View File

@ -547,15 +547,6 @@ static const uint32_t s_rconBE[] = {
0x1B000000, 0x36000000
};
/* Permute mask */
static const uint32_t s_mask[4] = {
#if defined(CRYPTOPP_LITTLE_ENDIAN)
0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
#else
0x0d0e0f0c,0x0d0e0f0c,0x0d0e0f0c,0x0d0e0f0c
#endif
};
static inline void POWER8_Enc_Block(uint32x4_p &block, const word32 *subkeys, unsigned int rounds)
{
CRYPTOPP_ASSERT(IsAlignedOn(subkeys, 16));
@ -713,12 +704,10 @@ void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen, word32*
unsigned int i=0;
for (i=0; i<rounds; i+=2, rk+=8)
{
uint8x16_p d1 = vec_vsx_ld( 0, (uint8_t*)rk);
uint8x16_p d2 = vec_vsx_ld(16, (uint8_t*)rk);
d1 = vec_perm(d1, zero, mask);
d2 = vec_perm(d2, zero, mask);
vec_vsx_st(d1, 0, (uint8_t*)rk);
vec_vsx_st(d2, 16, (uint8_t*)rk);
const uint8x16_p d1 = vec_vsx_ld( 0, (uint8_t*)rk);
const uint8x16_p d2 = vec_vsx_ld(16, (uint8_t*)rk);
vec_vsx_st(vec_perm(d1, zero, mask), 0, (uint8_t*)rk);
vec_vsx_st(vec_perm(d2, zero, mask), 16, (uint8_t*)rk);
}
for ( ; i<rounds+1; i++, rk+=4)