Migrate from Power7 to Power8 unaligned loads
parent
d451751eb2
commit
3a8f87490a
50
GNUmakefile
50
GNUmakefile
|
|
@ -443,26 +443,26 @@ ifeq ($(DETECT_FEATURES),1)
|
|||
ifneq ($(IS_ARM32),0)
|
||||
|
||||
TPROG = TestPrograms/test_arm_neon.cxx
|
||||
TOPT = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
TOPT = -march=armv7-a -mfpu=neon
|
||||
HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
||||
ifeq ($(strip $(HAVE_OPT)),0)
|
||||
NEON_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
ARIA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
AES_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
CRC_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
GCM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
BLAKE2B_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
BLAKE2S_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
CHACHA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
CHAM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
LEA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
SHA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
SIMECK_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
SIMON64_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
SIMON128_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
SPECK64_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
SPECK128_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
SM4_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
||||
NEON_FLAG = -march=armv7-a -mfpu=neon
|
||||
ARIA_FLAG = -march=armv7-a -mfpu=neon
|
||||
AES_FLAG = -march=armv7-a -mfpu=neon
|
||||
CRC_FLAG = -march=armv7-a -mfpu=neon
|
||||
GCM_FLAG = -march=armv7-a -mfpu=neon
|
||||
BLAKE2B_FLAG = -march=armv7-a -mfpu=neon
|
||||
BLAKE2S_FLAG = -march=armv7-a -mfpu=neon
|
||||
CHACHA_FLAG = -march=armv7-a -mfpu=neon
|
||||
CHAM_FLAG = -march=armv7-a -mfpu=neon
|
||||
LEA_FLAG = -march=armv7-a -mfpu=neon
|
||||
SHA_FLAG = -march=armv7-a -mfpu=neon
|
||||
SIMECK_FLAG = -march=armv7-a -mfpu=neon
|
||||
SIMON64_FLAG = -march=armv7-a -mfpu=neon
|
||||
SIMON128_FLAG = -march=armv7-a -mfpu=neon
|
||||
SPECK64_FLAG = -march=armv7-a -mfpu=neon
|
||||
SPECK128_FLAG = -march=armv7-a -mfpu=neon
|
||||
SM4_FLAG = -march=armv7-a -mfpu=neon
|
||||
else
|
||||
CXXFLAGS += -DCRYPTOPP_DISABLE_ASM
|
||||
endif
|
||||
|
|
@ -621,12 +621,16 @@ ifeq ($(DETECT_FEATURES),1)
|
|||
HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
||||
ifeq ($(strip $(HAVE_OPT)),0)
|
||||
BLAKE2B_FLAG = $(POWER8_FLAG)
|
||||
BLAKE2S_FLAG = $(POWER8_FLAG)
|
||||
CHACHA_FLAG = $(POWER8_FLAG)
|
||||
CRC_FLAG = $(POWER8_FLAG)
|
||||
GCM_FLAG = $(POWER8_FLAG)
|
||||
GF2N_FLAG = $(POWER8_FLAG)
|
||||
AES_FLAG = $(POWER8_FLAG)
|
||||
SHA_FLAG = $(POWER8_FLAG)
|
||||
SHACAL2_FLAG = $(POWER8_FLAG)
|
||||
SIMON64_FLAG = $(POWER8_FLAG)
|
||||
SPECK64_FLAG = $(POWER8_FLAG)
|
||||
SIMON128_FLAG = $(POWER8_FLAG)
|
||||
SPECK128_FLAG = $(POWER8_FLAG)
|
||||
else
|
||||
|
|
@ -641,13 +645,9 @@ ifeq ($(DETECT_FEATURES),1)
|
|||
HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
||||
ifeq ($(strip $(HAVE_OPT)),0)
|
||||
ARIA_FLAG = $(POWER7_FLAG)
|
||||
BLAKE2S_FLAG = $(POWER7_FLAG)
|
||||
CHACHA_FLAG = $(POWER7_FLAG)
|
||||
CHAM_FLAG = $(POWER7_FLAG)
|
||||
LEA_FLAG = $(POWER7_FLAG)
|
||||
SIMECK_FLAG = $(POWER7_FLAG)
|
||||
SIMON64_FLAG = $(POWER7_FLAG)
|
||||
SPECK64_FLAG = $(POWER7_FLAG)
|
||||
else
|
||||
POWER7_FLAG =
|
||||
endif
|
||||
|
|
@ -698,8 +698,8 @@ ifeq ($(DETECT_FEATURES),1)
|
|||
endif
|
||||
endif
|
||||
|
||||
# Drop to Power4 if Power7 not available
|
||||
ifeq ($(POWER7_FLAG),)
|
||||
# Drop to Power4 if Power8 not available
|
||||
ifeq ($(POWER8_FLAG),)
|
||||
ifneq ($(ALTIVEC_FLAG),)
|
||||
BLAKE2S_FLAG = $(ALTIVEC_FLAG)
|
||||
CHACHA_FLAG = $(ALTIVEC_FLAG)
|
||||
|
|
@ -1428,7 +1428,7 @@ endif # Dependencies
|
|||
|
||||
# Cryptogams ARM asm implementation.
|
||||
aes_armv4.o : aes_armv4.S
|
||||
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_AES_FLAG) -mfloat-abi=$(FP_ABI) -c) $<
|
||||
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_AES_FLAG) -c) $<
|
||||
|
||||
# SSSE3 or NEON available
|
||||
aria_simd.o : aria_simd.cpp
|
||||
|
|
|
|||
22
blake2.cpp
22
blake2.cpp
|
|
@ -38,8 +38,8 @@
|
|||
// https://github.com/weidai11/cryptopp/issues/743
|
||||
#if defined(__xlC__) && (__xlC__ < 0x0d01)
|
||||
# define CRYPTOPP_DISABLE_ALTIVEC 1
|
||||
# define CRYPTOPP_POWER7_ALTIVEC 1
|
||||
# undef CRYPTOPP_POWER7_AVAILABLE
|
||||
# define CRYPTOPP_POWER8_ALTIVEC 1
|
||||
# undef CRYPTOPP_POWER8_AVAILABLE
|
||||
# undef CRYPTOPP_ALTIVEC_AVAILABLE
|
||||
#endif
|
||||
|
||||
|
|
@ -171,8 +171,8 @@ extern void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state);
|
|||
extern void BLAKE2_Compress64_NEON(const byte* input, BLAKE2b_State& state);
|
||||
#endif
|
||||
|
||||
#if CRYPTOPP_POWER7_AVAILABLE
|
||||
extern void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state);
|
||||
#if CRYPTOPP_POWER8_AVAILABLE
|
||||
extern void BLAKE2_Compress32_POWER8(const byte* input, BLAKE2s_State& state);
|
||||
#elif CRYPTOPP_ALTIVEC_AVAILABLE
|
||||
extern void BLAKE2_Compress32_ALTIVEC(const byte* input, BLAKE2s_State& state);
|
||||
#endif
|
||||
|
|
@ -233,8 +233,8 @@ unsigned int BLAKE2s::OptimalDataAlignment() const
|
|||
return 4;
|
||||
else
|
||||
#endif
|
||||
#if (CRYPTOPP_POWER7_AVAILABLE)
|
||||
if (HasPower7())
|
||||
#if (CRYPTOPP_POWER8_AVAILABLE)
|
||||
if (HasPower8())
|
||||
return 16;
|
||||
else
|
||||
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||
|
|
@ -257,8 +257,8 @@ std::string BLAKE2s::AlgorithmProvider() const
|
|||
return "NEON";
|
||||
else
|
||||
#endif
|
||||
#if (CRYPTOPP_POWER7_AVAILABLE)
|
||||
if (HasPower7())
|
||||
#if (CRYPTOPP_POWER8_AVAILABLE)
|
||||
if (HasPower8())
|
||||
return "Power7";
|
||||
else
|
||||
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||
|
|
@ -690,10 +690,10 @@ void BLAKE2s::Compress(const byte *input)
|
|||
return BLAKE2_Compress32_NEON(input, m_state);
|
||||
}
|
||||
#endif
|
||||
#if CRYPTOPP_POWER7_AVAILABLE
|
||||
if(HasPower7())
|
||||
#if CRYPTOPP_POWER8_AVAILABLE
|
||||
if(HasPower8())
|
||||
{
|
||||
return BLAKE2_Compress32_POWER7(input, m_state);
|
||||
return BLAKE2_Compress32_POWER8(input, m_state);
|
||||
}
|
||||
#elif CRYPTOPP_ALTIVEC_AVAILABLE
|
||||
if(HasAltivec())
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@
|
|||
// https://github.com/weidai11/cryptopp/issues/743
|
||||
#if defined(__xlC__) && (__xlC__ < 0x0d01)
|
||||
# define CRYPTOPP_DISABLE_ALTIVEC 1
|
||||
# undef CRYPTOPP_POWER7_AVAILABLE
|
||||
# undef CRYPTOPP_POWER8_AVAILABLE
|
||||
# undef CRYPTOPP_ALTIVEC_AVAILABLE
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -38,8 +38,8 @@
|
|||
// https://github.com/weidai11/cryptopp/issues/743
|
||||
#if defined(__xlC__) && (__xlC__ < 0x0d01)
|
||||
# define CRYPTOPP_DISABLE_ALTIVEC 1
|
||||
# define CRYPTOPP_POWER7_ALTIVEC 1
|
||||
# undef CRYPTOPP_POWER7_AVAILABLE
|
||||
# define CRYPTOPP_POWER8_ALTIVEC 1
|
||||
# undef CRYPTOPP_POWER8_AVAILABLE
|
||||
# undef CRYPTOPP_ALTIVEC_AVAILABLE
|
||||
#endif
|
||||
|
||||
|
|
@ -692,7 +692,7 @@ void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state)
|
|||
}
|
||||
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
|
||||
|
||||
#if (CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||
#if (CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||
|
||||
inline uint32x4_p VecLoad32(const void* p)
|
||||
{
|
||||
|
|
@ -1015,11 +1015,11 @@ void BLAKE2_Compress32_CORE(const byte* input, BLAKE2s_State& state)
|
|||
VecStore32LE(state.h()+0, VecXor(ff0, VecXor(row1, row3)));
|
||||
VecStore32LE(state.h()+4, VecXor(ff1, VecXor(row2, row4)));
|
||||
}
|
||||
#endif // CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE
|
||||
#endif // CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE
|
||||
|
||||
#if (CRYPTOPP_POWER7_AVAILABLE)
|
||||
#if (CRYPTOPP_POWER8_AVAILABLE)
|
||||
|
||||
void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state)
|
||||
void BLAKE2_Compress32_POWER8(const byte* input, BLAKE2s_State& state)
|
||||
{
|
||||
BLAKE2_Compress32_CORE(input, state);
|
||||
}
|
||||
|
|
|
|||
14
chacha.cpp
14
chacha.cpp
|
|
@ -28,8 +28,8 @@ extern void ChaCha_OperateKeystream_AVX2(const word32 *state, const byte* input,
|
|||
extern void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *output, unsigned int rounds);
|
||||
#endif
|
||||
|
||||
#if (CRYPTOPP_POWER7_AVAILABLE)
|
||||
extern void ChaCha_OperateKeystream_POWER7(const word32 *state, const byte* input, byte *output, unsigned int rounds);
|
||||
#if (CRYPTOPP_POWER8_AVAILABLE)
|
||||
extern void ChaCha_OperateKeystream_POWER8(const word32 *state, const byte* input, byte *output, unsigned int rounds);
|
||||
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||
extern void ChaCha_OperateKeystream_ALTIVEC(const word32 *state, const byte* input, byte *output, unsigned int rounds);
|
||||
#endif
|
||||
|
|
@ -153,13 +153,13 @@ void ChaCha_OperateKeystream(KeystreamOperation operation,
|
|||
}
|
||||
#endif
|
||||
|
||||
#if (CRYPTOPP_POWER7_AVAILABLE)
|
||||
if (HasPower7())
|
||||
#if (CRYPTOPP_POWER8_AVAILABLE)
|
||||
if (HasPower8())
|
||||
{
|
||||
while (iterationCount >= 4 && MultiBlockSafe(state[12], 4))
|
||||
{
|
||||
const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
|
||||
ChaCha_OperateKeystream_POWER7(state, xorInput ? input : NULLPTR, output, rounds);
|
||||
ChaCha_OperateKeystream_POWER8(state, xorInput ? input : NULLPTR, output, rounds);
|
||||
|
||||
// MultiBlockSafe avoids overflow on the counter words
|
||||
state[12] += 4;
|
||||
|
|
@ -267,8 +267,8 @@ std::string ChaCha_AlgorithmProvider()
|
|||
return "NEON";
|
||||
else
|
||||
#endif
|
||||
#if (CRYPTOPP_POWER7_AVAILABLE)
|
||||
if (HasPower7())
|
||||
#if (CRYPTOPP_POWER8_AVAILABLE)
|
||||
if (HasPower8())
|
||||
return "Power7";
|
||||
else
|
||||
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||
|
|
|
|||
|
|
@ -209,7 +209,7 @@ inline __m128i RotateLeft<16>(const __m128i val)
|
|||
|
||||
#if (CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||
|
||||
// ChaCha_OperateKeystream_POWER7 is optimized for POWER7. However, Altivec
|
||||
// ChaCha_OperateKeystream_POWER8 is optimized for POWER7. However, Altivec
|
||||
// is supported by using vec_ld and vec_st, and using a composite VecAdd
|
||||
// that supports 64-bit element adds. vec_ld and vec_st add significant
|
||||
// overhead when memory is not aligned. Despite the drawbacks Altivec
|
||||
|
|
@ -827,7 +827,7 @@ void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *
|
|||
|
||||
#endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
||||
|
||||
#if (CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||
#if (CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||
|
||||
// ChaCha_OperateKeystream_CORE will use either POWER7 or ALTIVEC,
|
||||
// depending on the flags used to compile this source file. The
|
||||
|
|
@ -1096,11 +1096,11 @@ inline void ChaCha_OperateKeystream_CORE(const word32 *state, const byte* input,
|
|||
VecStore32LE(output + 15*16, r3_3);
|
||||
}
|
||||
|
||||
#endif // CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE
|
||||
#endif // CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE
|
||||
|
||||
#if (CRYPTOPP_POWER7_AVAILABLE)
|
||||
#if (CRYPTOPP_POWER8_AVAILABLE)
|
||||
|
||||
void ChaCha_OperateKeystream_POWER7(const word32 *state, const byte* input, byte *output, unsigned int rounds)
|
||||
void ChaCha_OperateKeystream_POWER8(const word32 *state, const byte* input, byte *output, unsigned int rounds)
|
||||
{
|
||||
ChaCha_OperateKeystream_CORE(state, input, output, rounds);
|
||||
}
|
||||
|
|
|
|||
22
gcm.cpp
22
gcm.cpp
|
|
@ -75,8 +75,8 @@ extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c);
|
|||
extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
|
||||
#endif
|
||||
|
||||
#if CRYPTOPP_POWER7_AVAILABLE
|
||||
extern void GCM_Xor16_POWER7(byte *a, const byte *b, const byte *c);
|
||||
#if CRYPTOPP_POWER8_AVAILABLE
|
||||
extern void GCM_Xor16_POWER8(byte *a, const byte *b, const byte *c);
|
||||
#endif
|
||||
|
||||
#if CRYPTOPP_CLMUL_AVAILABLE
|
||||
|
|
@ -213,11 +213,11 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
|
|||
for (k=1; k<j; k++)
|
||||
GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
|
||||
else
|
||||
#elif CRYPTOPP_POWER7_AVAILABLE
|
||||
if (HasPower7())
|
||||
#elif CRYPTOPP_POWER8_AVAILABLE
|
||||
if (HasPower8())
|
||||
for (j=2; j<=0x80; j*=2)
|
||||
for (k=1; k<j; k++)
|
||||
GCM_Xor16_POWER7(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
|
||||
GCM_Xor16_POWER8(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
|
||||
else
|
||||
#endif
|
||||
for (j=2; j<=0x80; j*=2)
|
||||
|
|
@ -277,13 +277,13 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
|
|||
GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
|
||||
}
|
||||
else
|
||||
#elif CRYPTOPP_POWER7_AVAILABLE
|
||||
if (HasPower7())
|
||||
#elif CRYPTOPP_POWER8_AVAILABLE
|
||||
if (HasPower8())
|
||||
for (j=2; j<=8; j*=2)
|
||||
for (k=1; k<j; k++)
|
||||
{
|
||||
GCM_Xor16_POWER7(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
|
||||
GCM_Xor16_POWER7(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
|
||||
GCM_Xor16_POWER8(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
|
||||
GCM_Xor16_POWER8(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
|
@ -369,8 +369,8 @@ unsigned int GCM_Base::OptimalDataAlignment() const
|
|||
HasSSE2() ? 16 :
|
||||
#elif CRYPTOPP_ARM_NEON_AVAILABLE
|
||||
HasNEON() ? 4 :
|
||||
#elif CRYPTOPP_POWER7_AVAILABLE
|
||||
HasPower7() ? 16 :
|
||||
#elif CRYPTOPP_POWER8_AVAILABLE
|
||||
HasPower8() ? 16 :
|
||||
#endif
|
||||
GetBlockCipher().OptimalDataAlignment();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -569,12 +569,12 @@ void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer)
|
|||
|
||||
// ***************************** POWER8 ***************************** //
|
||||
|
||||
#if CRYPTOPP_POWER7_AVAILABLE
|
||||
void GCM_Xor16_POWER7(byte *a, const byte *b, const byte *c)
|
||||
#if CRYPTOPP_POWER8_AVAILABLE
|
||||
void GCM_Xor16_POWER8(byte *a, const byte *b, const byte *c)
|
||||
{
|
||||
VecStore(VecXor(VecLoad(b), VecLoad(c)), a);
|
||||
}
|
||||
#endif // CRYPTOPP_POWER7_AVAILABLE
|
||||
#endif // CRYPTOPP_POWER8_AVAILABLE
|
||||
|
||||
#if CRYPTOPP_POWER8_VMULL_AVAILABLE
|
||||
|
||||
|
|
|
|||
|
|
@ -255,8 +255,8 @@ std::string SIMON64::Base::AlgorithmProvider() const
|
|||
if (HasNEON())
|
||||
return "NEON";
|
||||
# endif
|
||||
# if (CRYPTOPP_POWER7_AVAILABLE)
|
||||
if (HasPower7())
|
||||
# if (CRYPTOPP_POWER8_AVAILABLE)
|
||||
if (HasPower8())
|
||||
return "Power7";
|
||||
# endif
|
||||
# if (CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@
|
|||
# include <arm_acle.h>
|
||||
#endif
|
||||
|
||||
#if defined(CRYPTOPP_POWER7_AVAILABLE)
|
||||
#if defined(CRYPTOPP_POWER8_AVAILABLE)
|
||||
# include "adv_simd.h"
|
||||
# include "ppc_simd.h"
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -576,7 +576,7 @@ inline void SIMON64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
|
|||
|
||||
for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
|
||||
{
|
||||
#if CRYPTOPP_POWER7_AVAILABLE
|
||||
#if CRYPTOPP_POWER8_AVAILABLE
|
||||
const uint32x4_p rk1 = vec_splats(subkeys[i]);
|
||||
const uint32x4_p rk2 = vec_splats(subkeys[i+1]);
|
||||
#else
|
||||
|
|
@ -592,7 +592,7 @@ inline void SIMON64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
|
|||
|
||||
if (rounds & 1)
|
||||
{
|
||||
#if CRYPTOPP_POWER7_AVAILABLE
|
||||
#if CRYPTOPP_POWER8_AVAILABLE
|
||||
const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
|
||||
#else
|
||||
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
|
||||
|
|
@ -634,7 +634,7 @@ inline void SIMON64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
|
|||
if (rounds & 1)
|
||||
{
|
||||
std::swap(x1, y1);
|
||||
#if CRYPTOPP_POWER7_AVAILABLE
|
||||
#if CRYPTOPP_POWER8_AVAILABLE
|
||||
const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
|
||||
#else
|
||||
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
|
||||
|
|
@ -647,7 +647,7 @@ inline void SIMON64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
|
|||
|
||||
for (int i = static_cast<int>(rounds-2); i >= 0; i -= 2)
|
||||
{
|
||||
#if CRYPTOPP_POWER7_AVAILABLE
|
||||
#if CRYPTOPP_POWER8_AVAILABLE
|
||||
const uint32x4_p rk1 = vec_splats(subkeys[i+1]);
|
||||
const uint32x4_p rk2 = vec_splats(subkeys[i]);
|
||||
#else
|
||||
|
|
@ -696,7 +696,7 @@ inline void SIMON64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
|||
|
||||
for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
|
||||
{
|
||||
#if CRYPTOPP_POWER7_AVAILABLE
|
||||
#if CRYPTOPP_POWER8_AVAILABLE
|
||||
const uint32x4_p rk1 = vec_splats(subkeys[i]);
|
||||
const uint32x4_p rk2 = vec_splats(subkeys[i+1]);
|
||||
#else
|
||||
|
|
@ -717,7 +717,7 @@ inline void SIMON64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
|||
|
||||
if (rounds & 1)
|
||||
{
|
||||
#if CRYPTOPP_POWER7_AVAILABLE
|
||||
#if CRYPTOPP_POWER8_AVAILABLE
|
||||
const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
|
||||
#else
|
||||
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
|
||||
|
|
@ -771,7 +771,7 @@ inline void SIMON64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
|||
{
|
||||
std::swap(x1, y1); std::swap(x2, y2); std::swap(x3, y3);
|
||||
|
||||
#if CRYPTOPP_POWER7_AVAILABLE
|
||||
#if CRYPTOPP_POWER8_AVAILABLE
|
||||
const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
|
||||
#else
|
||||
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
|
||||
|
|
@ -786,7 +786,7 @@ inline void SIMON64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
|||
|
||||
for (int i = static_cast<int>(rounds-2); i >= 0; i -= 2)
|
||||
{
|
||||
#if CRYPTOPP_POWER7_AVAILABLE
|
||||
#if CRYPTOPP_POWER8_AVAILABLE
|
||||
const uint32x4_p rk1 = vec_splats(subkeys[i+1]);
|
||||
const uint32x4_p rk2 = vec_splats(subkeys[i]);
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -235,8 +235,8 @@ std::string SPECK64::Base::AlgorithmProvider() const
|
|||
if (HasNEON())
|
||||
return "NEON";
|
||||
# endif
|
||||
# if (CRYPTOPP_POWER7_AVAILABLE)
|
||||
if (HasPower7())
|
||||
# if (CRYPTOPP_POWER8_AVAILABLE)
|
||||
if (HasPower8())
|
||||
return "Power7";
|
||||
# endif
|
||||
# if (CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||
|
|
|
|||
|
|
@ -517,7 +517,7 @@ void SPECK64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
|
|||
|
||||
for (int i=0; i < static_cast<int>(rounds); ++i)
|
||||
{
|
||||
#if CRYPTOPP_POWER7_AVAILABLE
|
||||
#if CRYPTOPP_POWER8_AVAILABLE
|
||||
const uint32x4_p rk = vec_splats(subkeys[i]);
|
||||
#else
|
||||
// subkeys has extra elements so memory backs the last subkey
|
||||
|
|
@ -564,7 +564,7 @@ void SPECK64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
|
|||
|
||||
for (int i = static_cast<int>(rounds-1); i >= 0; --i)
|
||||
{
|
||||
#if CRYPTOPP_POWER7_AVAILABLE
|
||||
#if CRYPTOPP_POWER8_AVAILABLE
|
||||
const uint32x4_p rk = vec_splats(subkeys[i]);
|
||||
#else
|
||||
// subkeys has extra elements so memory backs the last subkey
|
||||
|
|
@ -616,7 +616,7 @@ void SPECK64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
|||
|
||||
for (int i=0; i < static_cast<int>(rounds); ++i)
|
||||
{
|
||||
#if CRYPTOPP_POWER7_AVAILABLE
|
||||
#if CRYPTOPP_POWER8_AVAILABLE
|
||||
const uint32x4_p rk = vec_splats(subkeys[i]);
|
||||
#else
|
||||
// subkeys has extra elements so memory backs the last subkey
|
||||
|
|
@ -685,7 +685,7 @@ void SPECK64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
|||
|
||||
for (int i = static_cast<int>(rounds-1); i >= 0; --i)
|
||||
{
|
||||
#if CRYPTOPP_POWER7_AVAILABLE
|
||||
#if CRYPTOPP_POWER8_AVAILABLE
|
||||
const uint32x4_p rk = vec_splats(subkeys[i]);
|
||||
#else
|
||||
// subkeys has extra elements so memory backs the last subkey
|
||||
|
|
|
|||
Loading…
Reference in New Issue