Fix ODR violation in AdvancedProcessBlocks_{ARCH} (GH #585)
The ALTIVEC function required an inline declaration. Lack of inline caused the self test failure. Two NEON functions needed the same. We also cleaned up constants in unnamed namespacespull/594/head
parent
b6fec08da1
commit
33c10bc027
125
adv-simd.h
125
adv-simd.h
|
|
@ -23,6 +23,7 @@
|
|||
//
|
||||
|
||||
#ifndef CRYPTOPP_ADVANCED_SIMD_TEMPLATES
|
||||
#define CRYPTOPP_ADVANCED_SIMD_TEMPLATES
|
||||
|
||||
#include "config.h"
|
||||
#include "misc.h"
|
||||
|
|
@ -70,29 +71,6 @@ ANONYMOUS_NAMESPACE_END
|
|||
|
||||
#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
|
||||
|
||||
ANONYMOUS_NAMESPACE_BEGIN
|
||||
|
||||
using CryptoPP::word32;
|
||||
using CryptoPP::word64;
|
||||
|
||||
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||
const word32 s_zero32x4[] = {0, 0, 0, 0};
|
||||
const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
|
||||
const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
|
||||
#else
|
||||
const word32 s_zero32x4[] = {0, 0, 0, 0};
|
||||
const word32 s_one32x4_1b[] = {0, 0, 0, 1};
|
||||
const word32 s_one32x4_2b[] = {0, 2, 0, 2};
|
||||
#endif
|
||||
|
||||
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||
const word32 s_one32x4[] = {0, 0, 0, 1<<24};
|
||||
#else
|
||||
const word32 s_one32x4[] = {0, 0, 0, 1};
|
||||
#endif
|
||||
|
||||
ANONYMOUS_NAMESPACE_END
|
||||
|
||||
NAMESPACE_BEGIN(CryptoPP)
|
||||
|
||||
template <typename F2, typename F6>
|
||||
|
|
@ -105,6 +83,18 @@ inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6,
|
|||
CRYPTOPP_ASSERT(outBlocks);
|
||||
CRYPTOPP_ASSERT(length >= 8);
|
||||
|
||||
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||
const word32 s_zero32x4[] = {0, 0, 0, 0};
|
||||
const word32 s_one32x4[] = {0, 0, 0, 1<<24};
|
||||
const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
|
||||
const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
|
||||
#else
|
||||
const word32 s_zero32x4[] = {0, 0, 0, 0};
|
||||
const word32 s_one32x4[] = {0, 0, 0, 1};
|
||||
const word32 s_one32x4_1b[] = {0, 0, 0, 1};
|
||||
const word32 s_one32x4_2b[] = {0, 2, 0, 2};
|
||||
#endif
|
||||
|
||||
const ptrdiff_t blockSize = 8;
|
||||
const ptrdiff_t neonBlockSize = 16;
|
||||
|
||||
|
|
@ -328,7 +318,7 @@ inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6,
|
|||
}
|
||||
|
||||
template <typename F1, typename F6>
|
||||
size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6,
|
||||
inline size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6,
|
||||
const word32 *subKeys, size_t rounds, const byte *inBlocks,
|
||||
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
|
||||
{
|
||||
|
|
@ -337,6 +327,18 @@ size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6,
|
|||
CRYPTOPP_ASSERT(outBlocks);
|
||||
CRYPTOPP_ASSERT(length >= 16);
|
||||
|
||||
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||
const word32 s_zero32x4[] = {0, 0, 0, 0};
|
||||
const word32 s_one32x4[] = {0, 0, 0, 1<<24};
|
||||
const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
|
||||
const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
|
||||
#else
|
||||
const word32 s_zero32x4[] = {0, 0, 0, 0};
|
||||
const word32 s_one32x4[] = {0, 0, 0, 1};
|
||||
const word32 s_one32x4_1b[] = {0, 0, 0, 1};
|
||||
const word32 s_one32x4_2b[] = {0, 2, 0, 2};
|
||||
#endif
|
||||
|
||||
const ptrdiff_t blockSize = 16;
|
||||
// const ptrdiff_t neonBlockSize = 16;
|
||||
|
||||
|
|
@ -471,7 +473,7 @@ size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6,
|
|||
}
|
||||
|
||||
template <typename F2, typename F6>
|
||||
size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6,
|
||||
inline size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6,
|
||||
const word64 *subKeys, size_t rounds, const byte *inBlocks,
|
||||
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
|
||||
{
|
||||
|
|
@ -480,6 +482,18 @@ size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6,
|
|||
CRYPTOPP_ASSERT(outBlocks);
|
||||
CRYPTOPP_ASSERT(length >= 16);
|
||||
|
||||
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||
const word32 s_zero32x4[] = {0, 0, 0, 0};
|
||||
const word32 s_one32x4[] = {0, 0, 0, 1<<24};
|
||||
const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
|
||||
const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
|
||||
#else
|
||||
const word32 s_zero32x4[] = {0, 0, 0, 0};
|
||||
const word32 s_one32x4[] = {0, 0, 0, 1};
|
||||
const word32 s_one32x4_1b[] = {0, 0, 0, 1};
|
||||
const word32 s_one32x4_2b[] = {0, 2, 0, 2};
|
||||
#endif
|
||||
|
||||
const ptrdiff_t blockSize = 16;
|
||||
// const ptrdiff_t neonBlockSize = 16;
|
||||
|
||||
|
|
@ -692,21 +706,6 @@ NAMESPACE_END // CryptoPP
|
|||
# define CONST_DOUBLE_CAST(x) ((const double *)(const void *)(x))
|
||||
#endif
|
||||
|
||||
ANONYMOUS_NAMESPACE_BEGIN
|
||||
|
||||
using CryptoPP::word32;
|
||||
using CryptoPP::word64;
|
||||
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
|
||||
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word32 s_one32x4[] = {0, 0, 0, 1<<24};
|
||||
|
||||
ANONYMOUS_NAMESPACE_END
|
||||
|
||||
NAMESPACE_BEGIN(CryptoPP)
|
||||
|
||||
template <typename F2, typename F6>
|
||||
|
|
@ -719,6 +718,13 @@ inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6,
|
|||
CRYPTOPP_ASSERT(outBlocks);
|
||||
CRYPTOPP_ASSERT(length >= 8);
|
||||
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word32 s_one32x4[] = {0, 0, 0, 1<<24};
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
|
||||
|
||||
const ptrdiff_t blockSize = 8;
|
||||
const ptrdiff_t xmmBlockSize = 16;
|
||||
|
||||
|
|
@ -953,6 +959,13 @@ inline size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6,
|
|||
CRYPTOPP_ASSERT(outBlocks);
|
||||
CRYPTOPP_ASSERT(length >= 16);
|
||||
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word32 s_one32x4[] = {0, 0, 0, 1<<24};
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
|
||||
|
||||
const ptrdiff_t blockSize = 16;
|
||||
// const ptrdiff_t xmmBlockSize = 16;
|
||||
|
||||
|
|
@ -1138,6 +1151,13 @@ inline size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4,
|
|||
CRYPTOPP_ASSERT(outBlocks);
|
||||
CRYPTOPP_ASSERT(length >= 16);
|
||||
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word32 s_one32x4[] = {0, 0, 0, 1<<24};
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word32 s_one32x4_1b[] = {0, 0, 0, 1<<24};
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word32 s_one32x4_2b[] = {0, 2<<24, 0, 2<<24};
|
||||
|
||||
const ptrdiff_t blockSize = 16;
|
||||
// const ptrdiff_t xmmBlockSize = 16;
|
||||
|
||||
|
|
@ -1258,29 +1278,26 @@ NAMESPACE_END // CryptoPP
|
|||
|
||||
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||
|
||||
ANONYMOUS_NAMESPACE_BEGIN
|
||||
|
||||
using CryptoPP::uint32x4_p;
|
||||
|
||||
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||
const uint32x4_p s_one = {1,0,0,0};
|
||||
#else
|
||||
const uint32x4_p s_one = {0,0,0,1};
|
||||
#endif
|
||||
|
||||
ANONYMOUS_NAMESPACE_END
|
||||
|
||||
NAMESPACE_BEGIN(CryptoPP)
|
||||
|
||||
template <typename F1, typename F6>
|
||||
size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6, const word32 *subKeys, size_t rounds,
|
||||
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
|
||||
inline size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6,
|
||||
const word32 *subKeys, size_t rounds, const byte *inBlocks,
|
||||
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
|
||||
{
|
||||
CRYPTOPP_ASSERT(subKeys);
|
||||
CRYPTOPP_ASSERT(inBlocks);
|
||||
CRYPTOPP_ASSERT(outBlocks);
|
||||
CRYPTOPP_ASSERT(length >= 16);
|
||||
|
||||
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||
const uint32x4_p s_zero = {0,0,0,0};
|
||||
const uint32x4_p s_one = {1,0,0,0};
|
||||
#else
|
||||
const uint32x4_p s_zero = {0,0,0,0};
|
||||
const uint32x4_p s_one = {0,0,0,1};
|
||||
#endif
|
||||
|
||||
const ptrdiff_t blockSize = 16;
|
||||
// const ptrdiff_t vexBlockSize = 16;
|
||||
|
||||
|
|
|
|||
|
|
@ -547,15 +547,6 @@ static const uint32_t s_rconBE[] = {
|
|||
0x1B000000, 0x36000000
|
||||
};
|
||||
|
||||
/* Permute mask */
|
||||
static const uint32_t s_mask[4] = {
|
||||
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||
0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
|
||||
#else
|
||||
0x0d0e0f0c,0x0d0e0f0c,0x0d0e0f0c,0x0d0e0f0c
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline void POWER8_Enc_Block(uint32x4_p &block, const word32 *subkeys, unsigned int rounds)
|
||||
{
|
||||
CRYPTOPP_ASSERT(IsAlignedOn(subkeys, 16));
|
||||
|
|
@ -713,12 +704,10 @@ void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen, word32*
|
|||
unsigned int i=0;
|
||||
for (i=0; i<rounds; i+=2, rk+=8)
|
||||
{
|
||||
uint8x16_p d1 = vec_vsx_ld( 0, (uint8_t*)rk);
|
||||
uint8x16_p d2 = vec_vsx_ld(16, (uint8_t*)rk);
|
||||
d1 = vec_perm(d1, zero, mask);
|
||||
d2 = vec_perm(d2, zero, mask);
|
||||
vec_vsx_st(d1, 0, (uint8_t*)rk);
|
||||
vec_vsx_st(d2, 16, (uint8_t*)rk);
|
||||
const uint8x16_p d1 = vec_vsx_ld( 0, (uint8_t*)rk);
|
||||
const uint8x16_p d2 = vec_vsx_ld(16, (uint8_t*)rk);
|
||||
vec_vsx_st(vec_perm(d1, zero, mask), 0, (uint8_t*)rk);
|
||||
vec_vsx_st(vec_perm(d2, zero, mask), 16, (uint8_t*)rk);
|
||||
}
|
||||
|
||||
for ( ; i<rounds+1; i++, rk+=4)
|
||||
|
|
|
|||
Loading…
Reference in New Issue