Migrate from Power7 to Power8 unaligned loads

pull/826/head
Jeffrey Walton 2019-04-27 18:02:38 -04:00
parent d451751eb2
commit 3a8f87490a
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
13 changed files with 86 additions and 86 deletions

View File

@ -443,26 +443,26 @@ ifeq ($(DETECT_FEATURES),1)
ifneq ($(IS_ARM32),0)
TPROG = TestPrograms/test_arm_neon.cxx
TOPT = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
TOPT = -march=armv7-a -mfpu=neon
HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
ifeq ($(strip $(HAVE_OPT)),0)
NEON_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
ARIA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
AES_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
CRC_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
GCM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
BLAKE2B_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
BLAKE2S_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
CHACHA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
CHAM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
LEA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
SHA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
SIMECK_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
SIMON64_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
SIMON128_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
SPECK64_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
SPECK128_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
SM4_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
NEON_FLAG = -march=armv7-a -mfpu=neon
ARIA_FLAG = -march=armv7-a -mfpu=neon
AES_FLAG = -march=armv7-a -mfpu=neon
CRC_FLAG = -march=armv7-a -mfpu=neon
GCM_FLAG = -march=armv7-a -mfpu=neon
BLAKE2B_FLAG = -march=armv7-a -mfpu=neon
BLAKE2S_FLAG = -march=armv7-a -mfpu=neon
CHACHA_FLAG = -march=armv7-a -mfpu=neon
CHAM_FLAG = -march=armv7-a -mfpu=neon
LEA_FLAG = -march=armv7-a -mfpu=neon
SHA_FLAG = -march=armv7-a -mfpu=neon
SIMECK_FLAG = -march=armv7-a -mfpu=neon
SIMON64_FLAG = -march=armv7-a -mfpu=neon
SIMON128_FLAG = -march=armv7-a -mfpu=neon
SPECK64_FLAG = -march=armv7-a -mfpu=neon
SPECK128_FLAG = -march=armv7-a -mfpu=neon
SM4_FLAG = -march=armv7-a -mfpu=neon
else
CXXFLAGS += -DCRYPTOPP_DISABLE_ASM
endif
@ -621,12 +621,16 @@ ifeq ($(DETECT_FEATURES),1)
HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
ifeq ($(strip $(HAVE_OPT)),0)
BLAKE2B_FLAG = $(POWER8_FLAG)
BLAKE2S_FLAG = $(POWER8_FLAG)
CHACHA_FLAG = $(POWER8_FLAG)
CRC_FLAG = $(POWER8_FLAG)
GCM_FLAG = $(POWER8_FLAG)
GF2N_FLAG = $(POWER8_FLAG)
AES_FLAG = $(POWER8_FLAG)
SHA_FLAG = $(POWER8_FLAG)
SHACAL2_FLAG = $(POWER8_FLAG)
SIMON64_FLAG = $(POWER8_FLAG)
SPECK64_FLAG = $(POWER8_FLAG)
SIMON128_FLAG = $(POWER8_FLAG)
SPECK128_FLAG = $(POWER8_FLAG)
else
@ -641,13 +645,9 @@ ifeq ($(DETECT_FEATURES),1)
HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
ifeq ($(strip $(HAVE_OPT)),0)
ARIA_FLAG = $(POWER7_FLAG)
BLAKE2S_FLAG = $(POWER7_FLAG)
CHACHA_FLAG = $(POWER7_FLAG)
CHAM_FLAG = $(POWER7_FLAG)
LEA_FLAG = $(POWER7_FLAG)
SIMECK_FLAG = $(POWER7_FLAG)
SIMON64_FLAG = $(POWER7_FLAG)
SPECK64_FLAG = $(POWER7_FLAG)
else
POWER7_FLAG =
endif
@ -698,8 +698,8 @@ ifeq ($(DETECT_FEATURES),1)
endif
endif
# Drop to Power4 if Power7 not available
ifeq ($(POWER7_FLAG),)
# Drop to Power4 if Power8 not available
ifeq ($(POWER8_FLAG),)
ifneq ($(ALTIVEC_FLAG),)
BLAKE2S_FLAG = $(ALTIVEC_FLAG)
CHACHA_FLAG = $(ALTIVEC_FLAG)
@ -1428,7 +1428,7 @@ endif # Dependencies
# Cryptogams ARM asm implementation.
aes_armv4.o : aes_armv4.S
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_AES_FLAG) -mfloat-abi=$(FP_ABI) -c) $<
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_AES_FLAG) -c) $<
# SSSE3 or NEON available
aria_simd.o : aria_simd.cpp

View File

@ -38,8 +38,8 @@
// https://github.com/weidai11/cryptopp/issues/743
#if defined(__xlC__) && (__xlC__ < 0x0d01)
# define CRYPTOPP_DISABLE_ALTIVEC 1
# define CRYPTOPP_POWER7_ALTIVEC 1
# undef CRYPTOPP_POWER7_AVAILABLE
# define CRYPTOPP_POWER8_ALTIVEC 1
# undef CRYPTOPP_POWER8_AVAILABLE
# undef CRYPTOPP_ALTIVEC_AVAILABLE
#endif
@ -171,8 +171,8 @@ extern void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state);
extern void BLAKE2_Compress64_NEON(const byte* input, BLAKE2b_State& state);
#endif
#if CRYPTOPP_POWER7_AVAILABLE
extern void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state);
#if CRYPTOPP_POWER8_AVAILABLE
extern void BLAKE2_Compress32_POWER8(const byte* input, BLAKE2s_State& state);
#elif CRYPTOPP_ALTIVEC_AVAILABLE
extern void BLAKE2_Compress32_ALTIVEC(const byte* input, BLAKE2s_State& state);
#endif
@ -233,8 +233,8 @@ unsigned int BLAKE2s::OptimalDataAlignment() const
return 4;
else
#endif
#if (CRYPTOPP_POWER7_AVAILABLE)
if (HasPower7())
#if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower8())
return 16;
else
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
@ -257,8 +257,8 @@ std::string BLAKE2s::AlgorithmProvider() const
return "NEON";
else
#endif
#if (CRYPTOPP_POWER7_AVAILABLE)
if (HasPower7())
#if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower8())
return "Power7";
else
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
@ -690,10 +690,10 @@ void BLAKE2s::Compress(const byte *input)
return BLAKE2_Compress32_NEON(input, m_state);
}
#endif
#if CRYPTOPP_POWER7_AVAILABLE
if(HasPower7())
#if CRYPTOPP_POWER8_AVAILABLE
if(HasPower8())
{
return BLAKE2_Compress32_POWER7(input, m_state);
return BLAKE2_Compress32_POWER8(input, m_state);
}
#elif CRYPTOPP_ALTIVEC_AVAILABLE
if(HasAltivec())

View File

@ -28,7 +28,7 @@
// https://github.com/weidai11/cryptopp/issues/743
#if defined(__xlC__) && (__xlC__ < 0x0d01)
# define CRYPTOPP_DISABLE_ALTIVEC 1
# undef CRYPTOPP_POWER7_AVAILABLE
# undef CRYPTOPP_POWER8_AVAILABLE
# undef CRYPTOPP_ALTIVEC_AVAILABLE
#endif

View File

@ -38,8 +38,8 @@
// https://github.com/weidai11/cryptopp/issues/743
#if defined(__xlC__) && (__xlC__ < 0x0d01)
# define CRYPTOPP_DISABLE_ALTIVEC 1
# define CRYPTOPP_POWER7_ALTIVEC 1
# undef CRYPTOPP_POWER7_AVAILABLE
# define CRYPTOPP_POWER8_ALTIVEC 1
# undef CRYPTOPP_POWER8_AVAILABLE
# undef CRYPTOPP_ALTIVEC_AVAILABLE
#endif
@ -692,7 +692,7 @@ void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state)
}
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
#if (CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE)
#if (CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE)
inline uint32x4_p VecLoad32(const void* p)
{
@ -1015,11 +1015,11 @@ void BLAKE2_Compress32_CORE(const byte* input, BLAKE2s_State& state)
VecStore32LE(state.h()+0, VecXor(ff0, VecXor(row1, row3)));
VecStore32LE(state.h()+4, VecXor(ff1, VecXor(row2, row4)));
}
#endif // CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE
#endif // CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE
#if (CRYPTOPP_POWER7_AVAILABLE)
#if (CRYPTOPP_POWER8_AVAILABLE)
void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state)
void BLAKE2_Compress32_POWER8(const byte* input, BLAKE2s_State& state)
{
BLAKE2_Compress32_CORE(input, state);
}

View File

@ -28,8 +28,8 @@ extern void ChaCha_OperateKeystream_AVX2(const word32 *state, const byte* input,
extern void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *output, unsigned int rounds);
#endif
#if (CRYPTOPP_POWER7_AVAILABLE)
extern void ChaCha_OperateKeystream_POWER7(const word32 *state, const byte* input, byte *output, unsigned int rounds);
#if (CRYPTOPP_POWER8_AVAILABLE)
extern void ChaCha_OperateKeystream_POWER8(const word32 *state, const byte* input, byte *output, unsigned int rounds);
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
extern void ChaCha_OperateKeystream_ALTIVEC(const word32 *state, const byte* input, byte *output, unsigned int rounds);
#endif
@ -153,13 +153,13 @@ void ChaCha_OperateKeystream(KeystreamOperation operation,
}
#endif
#if (CRYPTOPP_POWER7_AVAILABLE)
if (HasPower7())
#if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower8())
{
while (iterationCount >= 4 && MultiBlockSafe(state[12], 4))
{
const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
ChaCha_OperateKeystream_POWER7(state, xorInput ? input : NULLPTR, output, rounds);
ChaCha_OperateKeystream_POWER8(state, xorInput ? input : NULLPTR, output, rounds);
// MultiBlockSafe avoids overflow on the counter words
state[12] += 4;
@ -267,8 +267,8 @@ std::string ChaCha_AlgorithmProvider()
return "NEON";
else
#endif
#if (CRYPTOPP_POWER7_AVAILABLE)
if (HasPower7())
#if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower8())
return "Power7";
else
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)

View File

@ -209,7 +209,7 @@ inline __m128i RotateLeft<16>(const __m128i val)
#if (CRYPTOPP_ALTIVEC_AVAILABLE)
// ChaCha_OperateKeystream_POWER7 is optimized for POWER7. However, Altivec
// ChaCha_OperateKeystream_POWER8 is optimized for POWER7. However, Altivec
// is supported by using vec_ld and vec_st, and using a composite VecAdd
// that supports 64-bit element adds. vec_ld and vec_st add significant
// overhead when memory is not aligned. Despite the drawbacks Altivec
@ -827,7 +827,7 @@ void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *
#endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE
#if (CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE)
#if (CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE)
// ChaCha_OperateKeystream_CORE will use either POWER7 or ALTIVEC,
// depending on the flags used to compile this source file. The
@ -1096,11 +1096,11 @@ inline void ChaCha_OperateKeystream_CORE(const word32 *state, const byte* input,
VecStore32LE(output + 15*16, r3_3);
}
#endif // CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE
#endif // CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE
#if (CRYPTOPP_POWER7_AVAILABLE)
#if (CRYPTOPP_POWER8_AVAILABLE)
void ChaCha_OperateKeystream_POWER7(const word32 *state, const byte* input, byte *output, unsigned int rounds)
void ChaCha_OperateKeystream_POWER8(const word32 *state, const byte* input, byte *output, unsigned int rounds)
{
ChaCha_OperateKeystream_CORE(state, input, output, rounds);
}

22
gcm.cpp
View File

@ -75,8 +75,8 @@ extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c);
extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
#endif
#if CRYPTOPP_POWER7_AVAILABLE
extern void GCM_Xor16_POWER7(byte *a, const byte *b, const byte *c);
#if CRYPTOPP_POWER8_AVAILABLE
extern void GCM_Xor16_POWER8(byte *a, const byte *b, const byte *c);
#endif
#if CRYPTOPP_CLMUL_AVAILABLE
@ -213,11 +213,11 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
for (k=1; k<j; k++)
GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
else
#elif CRYPTOPP_POWER7_AVAILABLE
if (HasPower7())
#elif CRYPTOPP_POWER8_AVAILABLE
if (HasPower8())
for (j=2; j<=0x80; j*=2)
for (k=1; k<j; k++)
GCM_Xor16_POWER7(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
GCM_Xor16_POWER8(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
else
#endif
for (j=2; j<=0x80; j*=2)
@ -277,13 +277,13 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
}
else
#elif CRYPTOPP_POWER7_AVAILABLE
if (HasPower7())
#elif CRYPTOPP_POWER8_AVAILABLE
if (HasPower8())
for (j=2; j<=8; j*=2)
for (k=1; k<j; k++)
{
GCM_Xor16_POWER7(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
GCM_Xor16_POWER7(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
GCM_Xor16_POWER8(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
GCM_Xor16_POWER8(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
}
else
#endif
@ -369,8 +369,8 @@ unsigned int GCM_Base::OptimalDataAlignment() const
HasSSE2() ? 16 :
#elif CRYPTOPP_ARM_NEON_AVAILABLE
HasNEON() ? 4 :
#elif CRYPTOPP_POWER7_AVAILABLE
HasPower7() ? 16 :
#elif CRYPTOPP_POWER8_AVAILABLE
HasPower8() ? 16 :
#endif
GetBlockCipher().OptimalDataAlignment();
}

View File

@ -569,12 +569,12 @@ void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer)
// ***************************** POWER8 ***************************** //
#if CRYPTOPP_POWER7_AVAILABLE
void GCM_Xor16_POWER7(byte *a, const byte *b, const byte *c)
#if CRYPTOPP_POWER8_AVAILABLE
void GCM_Xor16_POWER8(byte *a, const byte *b, const byte *c)
{
VecStore(VecXor(VecLoad(b), VecLoad(c)), a);
}
#endif // CRYPTOPP_POWER7_AVAILABLE
#endif // CRYPTOPP_POWER8_AVAILABLE
#if CRYPTOPP_POWER8_VMULL_AVAILABLE

View File

@ -255,8 +255,8 @@ std::string SIMON64::Base::AlgorithmProvider() const
if (HasNEON())
return "NEON";
# endif
# if (CRYPTOPP_POWER7_AVAILABLE)
if (HasPower7())
# if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower8())
return "Power7";
# endif
# if (CRYPTOPP_ALTIVEC_AVAILABLE)

View File

@ -44,7 +44,7 @@
# include <arm_acle.h>
#endif
#if defined(CRYPTOPP_POWER7_AVAILABLE)
#if defined(CRYPTOPP_POWER8_AVAILABLE)
# include "adv_simd.h"
# include "ppc_simd.h"
#endif

View File

@ -576,7 +576,7 @@ inline void SIMON64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
{
#if CRYPTOPP_POWER7_AVAILABLE
#if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk1 = vec_splats(subkeys[i]);
const uint32x4_p rk2 = vec_splats(subkeys[i+1]);
#else
@ -592,7 +592,7 @@ inline void SIMON64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
if (rounds & 1)
{
#if CRYPTOPP_POWER7_AVAILABLE
#if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
#else
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
@ -634,7 +634,7 @@ inline void SIMON64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
if (rounds & 1)
{
std::swap(x1, y1);
#if CRYPTOPP_POWER7_AVAILABLE
#if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
#else
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
@ -647,7 +647,7 @@ inline void SIMON64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
for (int i = static_cast<int>(rounds-2); i >= 0; i -= 2)
{
#if CRYPTOPP_POWER7_AVAILABLE
#if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk1 = vec_splats(subkeys[i+1]);
const uint32x4_p rk2 = vec_splats(subkeys[i]);
#else
@ -696,7 +696,7 @@ inline void SIMON64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
{
#if CRYPTOPP_POWER7_AVAILABLE
#if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk1 = vec_splats(subkeys[i]);
const uint32x4_p rk2 = vec_splats(subkeys[i+1]);
#else
@ -717,7 +717,7 @@ inline void SIMON64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
if (rounds & 1)
{
#if CRYPTOPP_POWER7_AVAILABLE
#if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
#else
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
@ -771,7 +771,7 @@ inline void SIMON64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
{
std::swap(x1, y1); std::swap(x2, y2); std::swap(x3, y3);
#if CRYPTOPP_POWER7_AVAILABLE
#if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
#else
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
@ -786,7 +786,7 @@ inline void SIMON64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
for (int i = static_cast<int>(rounds-2); i >= 0; i -= 2)
{
#if CRYPTOPP_POWER7_AVAILABLE
#if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk1 = vec_splats(subkeys[i+1]);
const uint32x4_p rk2 = vec_splats(subkeys[i]);
#else

View File

@ -235,8 +235,8 @@ std::string SPECK64::Base::AlgorithmProvider() const
if (HasNEON())
return "NEON";
# endif
# if (CRYPTOPP_POWER7_AVAILABLE)
if (HasPower7())
# if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower8())
return "Power7";
# endif
# if (CRYPTOPP_ALTIVEC_AVAILABLE)

View File

@ -517,7 +517,7 @@ void SPECK64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
for (int i=0; i < static_cast<int>(rounds); ++i)
{
#if CRYPTOPP_POWER7_AVAILABLE
#if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk = vec_splats(subkeys[i]);
#else
// subkeys has extra elements so memory backs the last subkey
@ -564,7 +564,7 @@ void SPECK64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
for (int i = static_cast<int>(rounds-1); i >= 0; --i)
{
#if CRYPTOPP_POWER7_AVAILABLE
#if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk = vec_splats(subkeys[i]);
#else
// subkeys has extra elements so memory backs the last subkey
@ -616,7 +616,7 @@ void SPECK64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
for (int i=0; i < static_cast<int>(rounds); ++i)
{
#if CRYPTOPP_POWER7_AVAILABLE
#if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk = vec_splats(subkeys[i]);
#else
// subkeys has extra elements so memory backs the last subkey
@ -685,7 +685,7 @@ void SPECK64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
for (int i = static_cast<int>(rounds-1); i >= 0; --i)
{
#if CRYPTOPP_POWER7_AVAILABLE
#if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk = vec_splats(subkeys[i]);
#else
// subkeys has extra elements so memory backs the last subkey