Use PowerPC unaligned loads and stores with Power8 (GH #825, PR #826)

Use PowerPC unaligned loads and stores with Power8. Formerly we were using Power7 as the floor because the IBM POWER Architecture manuals said unaligned loads and stores were available. However, some compilers generate bad code for unaligned loads and stores using `-march=power7`, so bump to a known good.
pull/828/head
Jeffrey Walton 2019-04-27 20:35:01 -04:00 committed by GitHub
parent d451751eb2
commit 39418a8512
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 122 additions and 148 deletions

View File

@ -10,8 +10,6 @@ SHELL = /bin/sh
# If needed # If needed
TMPDIR ?= /tmp TMPDIR ?= /tmp
# Used for ARMv7 and NEON.
FP_ABI ?= hard
# Used for feature tests # Used for feature tests
TOUT ?= a.out TOUT ?= a.out
TOUT := $(strip $(TOUT)) TOUT := $(strip $(TOUT))
@ -443,26 +441,26 @@ ifeq ($(DETECT_FEATURES),1)
ifneq ($(IS_ARM32),0) ifneq ($(IS_ARM32),0)
TPROG = TestPrograms/test_arm_neon.cxx TPROG = TestPrograms/test_arm_neon.cxx
TOPT = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon TOPT = -march=armv7-a -mfpu=neon
HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
ifeq ($(strip $(HAVE_OPT)),0) ifeq ($(strip $(HAVE_OPT)),0)
NEON_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon NEON_FLAG = -march=armv7-a -mfpu=neon
ARIA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon ARIA_FLAG = -march=armv7-a -mfpu=neon
AES_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon AES_FLAG = -march=armv7-a -mfpu=neon
CRC_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon CRC_FLAG = -march=armv7-a -mfpu=neon
GCM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon GCM_FLAG = -march=armv7-a -mfpu=neon
BLAKE2B_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon BLAKE2B_FLAG = -march=armv7-a -mfpu=neon
BLAKE2S_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon BLAKE2S_FLAG = -march=armv7-a -mfpu=neon
CHACHA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon CHACHA_FLAG = -march=armv7-a -mfpu=neon
CHAM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon CHAM_FLAG = -march=armv7-a -mfpu=neon
LEA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon LEA_FLAG = -march=armv7-a -mfpu=neon
SHA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon SHA_FLAG = -march=armv7-a -mfpu=neon
SIMECK_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon SIMECK_FLAG = -march=armv7-a -mfpu=neon
SIMON64_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon SIMON64_FLAG = -march=armv7-a -mfpu=neon
SIMON128_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon SIMON128_FLAG = -march=armv7-a -mfpu=neon
SPECK64_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon SPECK64_FLAG = -march=armv7-a -mfpu=neon
SPECK128_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon SPECK128_FLAG = -march=armv7-a -mfpu=neon
SM4_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon SM4_FLAG = -march=armv7-a -mfpu=neon
else else
CXXFLAGS += -DCRYPTOPP_DISABLE_ASM CXXFLAGS += -DCRYPTOPP_DISABLE_ASM
endif endif
@ -620,13 +618,21 @@ ifeq ($(DETECT_FEATURES),1)
TOPT = $(POWER8_FLAG) TOPT = $(POWER8_FLAG)
HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
ifeq ($(strip $(HAVE_OPT)),0) ifeq ($(strip $(HAVE_OPT)),0)
AES_FLAG = $(POWER8_FLAG)
ARIA_FLAG = $(POWER8_FLAG)
BLAKE2B_FLAG = $(POWER8_FLAG) BLAKE2B_FLAG = $(POWER8_FLAG)
BLAKE2S_FLAG = $(POWER8_FLAG)
CHACHA_FLAG = $(POWER8_FLAG)
CHAM_FLAG = $(POWER8_FLAG)
CRC_FLAG = $(POWER8_FLAG) CRC_FLAG = $(POWER8_FLAG)
GCM_FLAG = $(POWER8_FLAG) GCM_FLAG = $(POWER8_FLAG)
GF2N_FLAG = $(POWER8_FLAG) GF2N_FLAG = $(POWER8_FLAG)
AES_FLAG = $(POWER8_FLAG) LEA_FLAG = $(POWER8_FLAG)
SHA_FLAG = $(POWER8_FLAG) SHA_FLAG = $(POWER8_FLAG)
SHACAL2_FLAG = $(POWER8_FLAG) SHACAL2_FLAG = $(POWER8_FLAG)
SIMECK_FLAG = $(POWER8_FLAG)
SIMON64_FLAG = $(POWER8_FLAG)
SPECK64_FLAG = $(POWER8_FLAG)
SIMON128_FLAG = $(POWER8_FLAG) SIMON128_FLAG = $(POWER8_FLAG)
SPECK128_FLAG = $(POWER8_FLAG) SPECK128_FLAG = $(POWER8_FLAG)
else else
@ -639,16 +645,7 @@ ifeq ($(DETECT_FEATURES),1)
TPROG = TestPrograms/test_ppc_power7.cxx TPROG = TestPrograms/test_ppc_power7.cxx
TOPT = $(POWER7_FLAG) TOPT = $(POWER7_FLAG)
HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
ifeq ($(strip $(HAVE_OPT)),0) ifneq ($(strip $(HAVE_OPT)),0)
ARIA_FLAG = $(POWER7_FLAG)
BLAKE2S_FLAG = $(POWER7_FLAG)
CHACHA_FLAG = $(POWER7_FLAG)
CHAM_FLAG = $(POWER7_FLAG)
LEA_FLAG = $(POWER7_FLAG)
SIMECK_FLAG = $(POWER7_FLAG)
SIMON64_FLAG = $(POWER7_FLAG)
SPECK64_FLAG = $(POWER7_FLAG)
else
POWER7_FLAG = POWER7_FLAG =
endif endif
@ -691,18 +688,12 @@ ifeq ($(DETECT_FEATURES),1)
##################################################################### #####################################################################
# Fixups for algorithms that can drop to a lower ISA, if needed # Fixups for algorithms that can drop to a lower ISA, if needed
# Drop to Power7 if Power8 is not available. # Drop to Power4 if Power8 not available
ifeq ($(POWER8_FLAG),) ifeq ($(POWER8_FLAG),)
ifneq ($(POWER7_FLAG),)
GCM_FLAG = $(POWER7_FLAG)
endif
endif
# Drop to Power4 if Power7 not available
ifeq ($(POWER7_FLAG),)
ifneq ($(ALTIVEC_FLAG),) ifneq ($(ALTIVEC_FLAG),)
BLAKE2S_FLAG = $(ALTIVEC_FLAG) BLAKE2S_FLAG = $(ALTIVEC_FLAG)
CHACHA_FLAG = $(ALTIVEC_FLAG) CHACHA_FLAG = $(ALTIVEC_FLAG)
GCM_FLAG = $(ALTIVEC_FLAG)
SIMON64_FLAG = $(ALTIVEC_FLAG) SIMON64_FLAG = $(ALTIVEC_FLAG)
SPECK64_FLAG = $(ALTIVEC_FLAG) SPECK64_FLAG = $(ALTIVEC_FLAG)
endif endif
@ -1428,7 +1419,7 @@ endif # Dependencies
# Cryptogams ARM asm implementation. # Cryptogams ARM asm implementation.
aes_armv4.o : aes_armv4.S aes_armv4.o : aes_armv4.S
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_AES_FLAG) -mfloat-abi=$(FP_ABI) -c) $< $(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_AES_FLAG) -c) $<
# SSSE3 or NEON available # SSSE3 or NEON available
aria_simd.o : aria_simd.cpp aria_simd.o : aria_simd.cpp

View File

@ -38,8 +38,8 @@
// https://github.com/weidai11/cryptopp/issues/743 // https://github.com/weidai11/cryptopp/issues/743
#if defined(__xlC__) && (__xlC__ < 0x0d01) #if defined(__xlC__) && (__xlC__ < 0x0d01)
# define CRYPTOPP_DISABLE_ALTIVEC 1 # define CRYPTOPP_DISABLE_ALTIVEC 1
# define CRYPTOPP_POWER7_ALTIVEC 1
# undef CRYPTOPP_POWER7_AVAILABLE # undef CRYPTOPP_POWER7_AVAILABLE
# undef CRYPTOPP_POWER8_AVAILABLE
# undef CRYPTOPP_ALTIVEC_AVAILABLE # undef CRYPTOPP_ALTIVEC_AVAILABLE
#endif #endif
@ -171,8 +171,8 @@ extern void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state);
extern void BLAKE2_Compress64_NEON(const byte* input, BLAKE2b_State& state); extern void BLAKE2_Compress64_NEON(const byte* input, BLAKE2b_State& state);
#endif #endif
#if CRYPTOPP_POWER7_AVAILABLE #if CRYPTOPP_POWER8_AVAILABLE
extern void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state); extern void BLAKE2_Compress32_POWER8(const byte* input, BLAKE2s_State& state);
#elif CRYPTOPP_ALTIVEC_AVAILABLE #elif CRYPTOPP_ALTIVEC_AVAILABLE
extern void BLAKE2_Compress32_ALTIVEC(const byte* input, BLAKE2s_State& state); extern void BLAKE2_Compress32_ALTIVEC(const byte* input, BLAKE2s_State& state);
#endif #endif
@ -233,8 +233,8 @@ unsigned int BLAKE2s::OptimalDataAlignment() const
return 4; return 4;
else else
#endif #endif
#if (CRYPTOPP_POWER7_AVAILABLE) #if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower7()) if (HasPower8())
return 16; return 16;
else else
#elif (CRYPTOPP_ALTIVEC_AVAILABLE) #elif (CRYPTOPP_ALTIVEC_AVAILABLE)
@ -257,9 +257,9 @@ std::string BLAKE2s::AlgorithmProvider() const
return "NEON"; return "NEON";
else else
#endif #endif
#if (CRYPTOPP_POWER7_AVAILABLE) #if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower7()) if (HasPower8())
return "Power7"; return "Power8";
else else
#elif (CRYPTOPP_ALTIVEC_AVAILABLE) #elif (CRYPTOPP_ALTIVEC_AVAILABLE)
if (HasAltivec()) if (HasAltivec())
@ -690,10 +690,10 @@ void BLAKE2s::Compress(const byte *input)
return BLAKE2_Compress32_NEON(input, m_state); return BLAKE2_Compress32_NEON(input, m_state);
} }
#endif #endif
#if CRYPTOPP_POWER7_AVAILABLE #if CRYPTOPP_POWER8_AVAILABLE
if(HasPower7()) if(HasPower8())
{ {
return BLAKE2_Compress32_POWER7(input, m_state); return BLAKE2_Compress32_POWER8(input, m_state);
} }
#elif CRYPTOPP_ALTIVEC_AVAILABLE #elif CRYPTOPP_ALTIVEC_AVAILABLE
if(HasAltivec()) if(HasAltivec())

View File

@ -29,6 +29,7 @@
#if defined(__xlC__) && (__xlC__ < 0x0d01) #if defined(__xlC__) && (__xlC__ < 0x0d01)
# define CRYPTOPP_DISABLE_ALTIVEC 1 # define CRYPTOPP_DISABLE_ALTIVEC 1
# undef CRYPTOPP_POWER7_AVAILABLE # undef CRYPTOPP_POWER7_AVAILABLE
# undef CRYPTOPP_POWER8_AVAILABLE
# undef CRYPTOPP_ALTIVEC_AVAILABLE # undef CRYPTOPP_ALTIVEC_AVAILABLE
#endif #endif

View File

@ -38,8 +38,8 @@
// https://github.com/weidai11/cryptopp/issues/743 // https://github.com/weidai11/cryptopp/issues/743
#if defined(__xlC__) && (__xlC__ < 0x0d01) #if defined(__xlC__) && (__xlC__ < 0x0d01)
# define CRYPTOPP_DISABLE_ALTIVEC 1 # define CRYPTOPP_DISABLE_ALTIVEC 1
# define CRYPTOPP_POWER7_ALTIVEC 1
# undef CRYPTOPP_POWER7_AVAILABLE # undef CRYPTOPP_POWER7_AVAILABLE
# undef CRYPTOPP_POWER8_AVAILABLE
# undef CRYPTOPP_ALTIVEC_AVAILABLE # undef CRYPTOPP_ALTIVEC_AVAILABLE
#endif #endif
@ -692,7 +692,7 @@ void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state)
} }
#endif // CRYPTOPP_ARM_NEON_AVAILABLE #endif // CRYPTOPP_ARM_NEON_AVAILABLE
#if (CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE) #if (CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE)
inline uint32x4_p VecLoad32(const void* p) inline uint32x4_p VecLoad32(const void* p)
{ {
@ -838,7 +838,7 @@ inline uint32x4_p VectorSet32(const uint32x4_p a, const uint32x4_p b,
const uint32x4_p t0 = VectorSet32<W,X>(a, b); const uint32x4_p t0 = VectorSet32<W,X>(a, b);
const uint32x4_p t1 = VectorSet32<Y,Z>(c, d); const uint32x4_p t1 = VectorSet32<Y,Z>(c, d);
// Power7 follows SSE2's implementation, and this is _mm_set_epi32. // PowerPC follows SSE2's implementation, and this is _mm_set_epi32.
const uint8x16_p mask = {20,21,22,23, 16,17,18,19, 4,5,6,7, 0,1,2,3}; const uint8x16_p mask = {20,21,22,23, 16,17,18,19, 4,5,6,7, 0,1,2,3};
return VecPermute(t0, t1, mask); return VecPermute(t0, t1, mask);
} }
@ -1015,11 +1015,11 @@ void BLAKE2_Compress32_CORE(const byte* input, BLAKE2s_State& state)
VecStore32LE(state.h()+0, VecXor(ff0, VecXor(row1, row3))); VecStore32LE(state.h()+0, VecXor(ff0, VecXor(row1, row3)));
VecStore32LE(state.h()+4, VecXor(ff1, VecXor(row2, row4))); VecStore32LE(state.h()+4, VecXor(ff1, VecXor(row2, row4)));
} }
#endif // CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE #endif // CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE
#if (CRYPTOPP_POWER7_AVAILABLE) #if (CRYPTOPP_POWER8_AVAILABLE)
void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state) void BLAKE2_Compress32_POWER8(const byte* input, BLAKE2s_State& state)
{ {
BLAKE2_Compress32_CORE(input, state); BLAKE2_Compress32_CORE(input, state);
} }

View File

@ -28,8 +28,8 @@ extern void ChaCha_OperateKeystream_AVX2(const word32 *state, const byte* input,
extern void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *output, unsigned int rounds); extern void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *output, unsigned int rounds);
#endif #endif
#if (CRYPTOPP_POWER7_AVAILABLE) #if (CRYPTOPP_POWER8_AVAILABLE)
extern void ChaCha_OperateKeystream_POWER7(const word32 *state, const byte* input, byte *output, unsigned int rounds); extern void ChaCha_OperateKeystream_POWER8(const word32 *state, const byte* input, byte *output, unsigned int rounds);
#elif (CRYPTOPP_ALTIVEC_AVAILABLE) #elif (CRYPTOPP_ALTIVEC_AVAILABLE)
extern void ChaCha_OperateKeystream_ALTIVEC(const word32 *state, const byte* input, byte *output, unsigned int rounds); extern void ChaCha_OperateKeystream_ALTIVEC(const word32 *state, const byte* input, byte *output, unsigned int rounds);
#endif #endif
@ -153,13 +153,13 @@ void ChaCha_OperateKeystream(KeystreamOperation operation,
} }
#endif #endif
#if (CRYPTOPP_POWER7_AVAILABLE) #if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower7()) if (HasPower8())
{ {
while (iterationCount >= 4 && MultiBlockSafe(state[12], 4)) while (iterationCount >= 4 && MultiBlockSafe(state[12], 4))
{ {
const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL; const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
ChaCha_OperateKeystream_POWER7(state, xorInput ? input : NULLPTR, output, rounds); ChaCha_OperateKeystream_POWER8(state, xorInput ? input : NULLPTR, output, rounds);
// MultiBlockSafe avoids overflow on the counter words // MultiBlockSafe avoids overflow on the counter words
state[12] += 4; state[12] += 4;
@ -267,9 +267,9 @@ std::string ChaCha_AlgorithmProvider()
return "NEON"; return "NEON";
else else
#endif #endif
#if (CRYPTOPP_POWER7_AVAILABLE) #if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower7()) if (HasPower8())
return "Power7"; return "Power8";
else else
#elif (CRYPTOPP_ALTIVEC_AVAILABLE) #elif (CRYPTOPP_ALTIVEC_AVAILABLE)
if (HasAltivec()) if (HasAltivec())

View File

@ -209,7 +209,7 @@ inline __m128i RotateLeft<16>(const __m128i val)
#if (CRYPTOPP_ALTIVEC_AVAILABLE) #if (CRYPTOPP_ALTIVEC_AVAILABLE)
// ChaCha_OperateKeystream_POWER7 is optimized for POWER7. However, Altivec // ChaCha_OperateKeystream_POWER8 is optimized for POWER7. However, Altivec
// is supported by using vec_ld and vec_st, and using a composite VecAdd // is supported by using vec_ld and vec_st, and using a composite VecAdd
// that supports 64-bit element adds. vec_ld and vec_st add significant // that supports 64-bit element adds. vec_ld and vec_st add significant
// overhead when memory is not aligned. Despite the drawbacks Altivec // overhead when memory is not aligned. Despite the drawbacks Altivec
@ -827,7 +827,7 @@ void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *
#endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE #endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE
#if (CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE) #if (CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE)
// ChaCha_OperateKeystream_CORE will use either POWER7 or ALTIVEC, // ChaCha_OperateKeystream_CORE will use either POWER7 or ALTIVEC,
// depending on the flags used to compile this source file. The // depending on the flags used to compile this source file. The
@ -1096,11 +1096,11 @@ inline void ChaCha_OperateKeystream_CORE(const word32 *state, const byte* input,
VecStore32LE(output + 15*16, r3_3); VecStore32LE(output + 15*16, r3_3);
} }
#endif // CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE #endif // CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE
#if (CRYPTOPP_POWER7_AVAILABLE) #if (CRYPTOPP_POWER8_AVAILABLE)
void ChaCha_OperateKeystream_POWER7(const word32 *state, const byte* input, byte *output, unsigned int rounds) void ChaCha_OperateKeystream_POWER8(const word32 *state, const byte* input, byte *output, unsigned int rounds)
{ {
ChaCha_OperateKeystream_CORE(state, input, output, rounds); ChaCha_OperateKeystream_CORE(state, input, output, rounds);
} }

View File

@ -74,24 +74,6 @@
// Also see https://bugs.llvm.org/show_bug.cgi?id=39895 . // Also see https://bugs.llvm.org/show_bug.cgi?id=39895 .
// #define CRYPTOPP_DISABLE_MIXED_ASM 1 // #define CRYPTOPP_DISABLE_MIXED_ASM 1
// Several compilers discard SIMD code that loads unaligned data. The symptom
// is often self test failures and UBsan findings for unaligned loads. For
// example, Power7 can load unaligned data using vec_vsx_ld but some versions
// of GCC and Clang require 16-byte aligned data when using the builtin.
// It is not limited to SSE and PowerPC code. Define this to disable
// Crypto++ code that uses potentially problematic builtins or intrinsics.
// Also see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88234 and
// https://bugs.llvm.org/show_bug.cgi?id=39704
// #define CRYPTOPP_BUGGY_SIMD_LOAD_AND_STORE 1
// This list will probably grow over time as more compilers are identified.
#if defined(CRYPTOPP_BUGGY_SIMD_LOAD_AND_STORE)
# define CRYPTOPP_DISABLE_LEA_SIMD 1
# define CRYPTOPP_DISABLE_SIMON_SIMD 1
# define CRYPTOPP_DISABLE_SPECK_SIMD 1
# define CRYPTOPP_DISABLE_SM4_SIMD 1
#endif
// Define CRYPTOPP_NO_CXX11 to avoid C++11 related features shown at the // Define CRYPTOPP_NO_CXX11 to avoid C++11 related features shown at the
// end of this file. Some compilers and standard C++ headers advertise C++11 // end of this file. Some compilers and standard C++ headers advertise C++11
// but they are really just C++03 with some additional C++11 headers and // but they are really just C++03 with some additional C++11 headers and

22
gcm.cpp
View File

@ -75,8 +75,8 @@ extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c);
extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c); extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
#endif #endif
#if CRYPTOPP_POWER7_AVAILABLE #if CRYPTOPP_POWER8_AVAILABLE
extern void GCM_Xor16_POWER7(byte *a, const byte *b, const byte *c); extern void GCM_Xor16_POWER8(byte *a, const byte *b, const byte *c);
#endif #endif
#if CRYPTOPP_CLMUL_AVAILABLE #if CRYPTOPP_CLMUL_AVAILABLE
@ -213,11 +213,11 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
for (k=1; k<j; k++) for (k=1; k<j; k++)
GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16); GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
else else
#elif CRYPTOPP_POWER7_AVAILABLE #elif CRYPTOPP_POWER8_AVAILABLE
if (HasPower7()) if (HasPower8())
for (j=2; j<=0x80; j*=2) for (j=2; j<=0x80; j*=2)
for (k=1; k<j; k++) for (k=1; k<j; k++)
GCM_Xor16_POWER7(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16); GCM_Xor16_POWER8(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
else else
#endif #endif
for (j=2; j<=0x80; j*=2) for (j=2; j<=0x80; j*=2)
@ -277,13 +277,13 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16); GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
} }
else else
#elif CRYPTOPP_POWER7_AVAILABLE #elif CRYPTOPP_POWER8_AVAILABLE
if (HasPower7()) if (HasPower8())
for (j=2; j<=8; j*=2) for (j=2; j<=8; j*=2)
for (k=1; k<j; k++) for (k=1; k<j; k++)
{ {
GCM_Xor16_POWER7(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16); GCM_Xor16_POWER8(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
GCM_Xor16_POWER7(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16); GCM_Xor16_POWER8(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
} }
else else
#endif #endif
@ -369,8 +369,8 @@ unsigned int GCM_Base::OptimalDataAlignment() const
HasSSE2() ? 16 : HasSSE2() ? 16 :
#elif CRYPTOPP_ARM_NEON_AVAILABLE #elif CRYPTOPP_ARM_NEON_AVAILABLE
HasNEON() ? 4 : HasNEON() ? 4 :
#elif CRYPTOPP_POWER7_AVAILABLE #elif CRYPTOPP_POWER8_AVAILABLE
HasPower7() ? 16 : HasPower8() ? 16 :
#endif #endif
GetBlockCipher().OptimalDataAlignment(); GetBlockCipher().OptimalDataAlignment();
} }

View File

@ -569,12 +569,12 @@ void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer)
// ***************************** POWER8 ***************************** // // ***************************** POWER8 ***************************** //
#if CRYPTOPP_POWER7_AVAILABLE #if CRYPTOPP_POWER8_AVAILABLE
void GCM_Xor16_POWER7(byte *a, const byte *b, const byte *c) void GCM_Xor16_POWER8(byte *a, const byte *b, const byte *c)
{ {
VecStore(VecXor(VecLoad(b), VecLoad(c)), a); VecStore(VecXor(VecLoad(b), VecLoad(c)), a);
} }
#endif // CRYPTOPP_POWER7_AVAILABLE #endif // CRYPTOPP_POWER8_AVAILABLE
#if CRYPTOPP_POWER8_VMULL_AVAILABLE #if CRYPTOPP_POWER8_VMULL_AVAILABLE

View File

@ -57,7 +57,7 @@
// //
// inline uint32x4_p VecLoad(const byte src[16]) // inline uint32x4_p VecLoad(const byte src[16])
// { // {
// #if defined(_ARCH_PWR7) // #if defined(_ARCH_PWR8)
// return (uint32x4_p) *(uint8x16_p*)((byte*)src); // return (uint32x4_p) *(uint8x16_p*)((byte*)src);
// #else // #else
// return VecLoad_ALTIVEC(src); // return VecLoad_ALTIVEC(src);
@ -128,7 +128,7 @@ typedef __vector unsigned short uint16x8_p;
/// \since Crypto++ 6.0 /// \since Crypto++ 6.0
typedef __vector unsigned int uint32x4_p; typedef __vector unsigned int uint32x4_p;
#if defined(_ARCH_PWR7) || defined(CRYPTOPP_DOXYGEN_PROCESSING) #if defined(_ARCH_PWR8) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
/// \brief Vector of 64-bit elements /// \brief Vector of 64-bit elements
/// \details uint64x2_p is available on POWER7 and above. Some supporting /// \details uint64x2_p is available on POWER7 and above. Some supporting
/// functions, like 64-bit <tt>vec_add</tt> (<tt>vaddudm</tt>), did not /// functions, like 64-bit <tt>vec_add</tt> (<tt>vaddudm</tt>), did not
@ -137,7 +137,7 @@ typedef __vector unsigned int uint32x4_p;
/// __vector unsigned long long /// __vector unsigned long long
/// \since Crypto++ 6.0 /// \since Crypto++ 6.0
typedef __vector unsigned long long uint64x2_p; typedef __vector unsigned long long uint64x2_p;
#endif // _ARCH_PWR7 #endif // _ARCH_PWR8
/// \brief The 0 vector /// \brief The 0 vector
/// \returns a 32-bit vector of 0's /// \returns a 32-bit vector of 0's
@ -252,7 +252,7 @@ inline uint32x4_p VecLoad_ALTIVEC(int off, const byte src[16])
/// \since Crypto++ 6.0 /// \since Crypto++ 6.0
inline uint32x4_p VecLoad(const byte src[16]) inline uint32x4_p VecLoad(const byte src[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR8)
# if defined(__early_xlc__) || defined(__early_xlC__) # if defined(__early_xlc__) || defined(__early_xlC__)
return (uint32x4_p)vec_xlw4(0, (byte*)src); return (uint32x4_p)vec_xlw4(0, (byte*)src);
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__) # elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
@ -280,7 +280,7 @@ inline uint32x4_p VecLoad(const byte src[16])
/// \since Crypto++ 6.0 /// \since Crypto++ 6.0
inline uint32x4_p VecLoad(int off, const byte src[16]) inline uint32x4_p VecLoad(int off, const byte src[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR8)
# if defined(__early_xlc__) || defined(__early_xlC__) # if defined(__early_xlc__) || defined(__early_xlC__)
return (uint32x4_p)vec_xlw4(off, (byte*)src); return (uint32x4_p)vec_xlw4(off, (byte*)src);
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__) # elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
@ -328,7 +328,7 @@ inline uint32x4_p VecLoad(int off, const word32 src[4])
return VecLoad(off, (const byte*)src); return VecLoad(off, (const byte*)src);
} }
#if defined(_ARCH_PWR7) || defined(CRYPTOPP_DOXYGEN_PROCESSING) #if defined(_ARCH_PWR8) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
/// \brief Loads a vector from a word array /// \brief Loads a vector from a word array
/// \param src the word array /// \param src the word array
@ -367,7 +367,7 @@ inline uint64x2_p VecLoad(int off, const word64 src[2])
return (uint64x2_p)VecLoad(off, (const byte*)src); return (uint64x2_p)VecLoad(off, (const byte*)src);
} }
#endif // _ARCH_PWR7 #endif // _ARCH_PWR8
/// \brief Loads a vector from an aligned byte array /// \brief Loads a vector from an aligned byte array
/// \param src the byte array /// \param src the byte array
@ -382,7 +382,7 @@ inline uint64x2_p VecLoad(int off, const word64 src[2])
/// \since Crypto++ 8.0 /// \since Crypto++ 8.0
inline uint32x4_p VecLoadAligned(const byte src[16]) inline uint32x4_p VecLoadAligned(const byte src[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR8)
# if defined(__early_xlc__) || defined(__early_xlC__) # if defined(__early_xlc__) || defined(__early_xlC__)
return (uint32x4_p)vec_xlw4(0, (byte*)src); return (uint32x4_p)vec_xlw4(0, (byte*)src);
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__) # elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
@ -390,10 +390,10 @@ inline uint32x4_p VecLoadAligned(const byte src[16])
# else # else
return (uint32x4_p)vec_vsx_ld(0, (byte*)src); return (uint32x4_p)vec_vsx_ld(0, (byte*)src);
# endif # endif
#else // _ARCH_PWR7 #else // _ARCH_PWR8
CRYPTOPP_ASSERT(((uintptr_t)src) % 16 == 0); CRYPTOPP_ASSERT(((uintptr_t)src) % 16 == 0);
return (uint32x4_p)vec_ld(0, (byte*)src); return (uint32x4_p)vec_ld(0, (byte*)src);
#endif // _ARCH_PWR7 #endif // _ARCH_PWR8
} }
/// \brief Loads a vector from an aligned byte array /// \brief Loads a vector from an aligned byte array
@ -410,7 +410,7 @@ inline uint32x4_p VecLoadAligned(const byte src[16])
/// \since Crypto++ 8.0 /// \since Crypto++ 8.0
inline uint32x4_p VecLoadAligned(int off, const byte src[16]) inline uint32x4_p VecLoadAligned(int off, const byte src[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR8)
# if defined(__early_xlc__) || defined(__early_xlC__) # if defined(__early_xlc__) || defined(__early_xlC__)
return (uint32x4_p)vec_xlw4(off, (byte*)src); return (uint32x4_p)vec_xlw4(off, (byte*)src);
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__) # elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
@ -418,10 +418,10 @@ inline uint32x4_p VecLoadAligned(int off, const byte src[16])
# else # else
return (uint32x4_p)vec_vsx_ld(off, (byte*)src); return (uint32x4_p)vec_vsx_ld(off, (byte*)src);
# endif # endif
#else // _ARCH_PWR7 #else // _ARCH_PWR8
CRYPTOPP_ASSERT((((uintptr_t)src)+off) % 16 == 0); CRYPTOPP_ASSERT((((uintptr_t)src)+off) % 16 == 0);
return (uint32x4_p)vec_ld(off, (byte*)src); return (uint32x4_p)vec_ld(off, (byte*)src);
#endif // _ARCH_PWR7 #endif // _ARCH_PWR8
} }
/// \brief Loads a vector from a byte array /// \brief Loads a vector from a byte array
@ -439,7 +439,7 @@ inline uint32x4_p VecLoadAligned(int off, const byte src[16])
/// \since Crypto++ 6.0 /// \since Crypto++ 6.0
inline uint32x4_p VecLoadBE(const byte src[16]) inline uint32x4_p VecLoadBE(const byte src[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR8)
# if defined(__early_xlc__) || defined(__early_xlC__) # if defined(__early_xlc__) || defined(__early_xlC__)
# if (CRYPTOPP_BIG_ENDIAN) # if (CRYPTOPP_BIG_ENDIAN)
return (uint32x4_p)vec_xlw4(0, (byte*)src); return (uint32x4_p)vec_xlw4(0, (byte*)src);
@ -455,13 +455,13 @@ inline uint32x4_p VecLoadBE(const byte src[16])
return (uint32x4_p)VecReverse(vec_vsx_ld(0, (byte*)src)); return (uint32x4_p)VecReverse(vec_vsx_ld(0, (byte*)src));
# endif # endif
# endif # endif
#else // _ARCH_PWR7 #else // _ARCH_PWR8
# if (CRYPTOPP_BIG_ENDIAN) # if (CRYPTOPP_BIG_ENDIAN)
return (uint32x4_p)VecLoad((const byte*)src); return (uint32x4_p)VecLoad((const byte*)src);
# else # else
return (uint32x4_p)VecReverse(VecLoad((const byte*)src)); return (uint32x4_p)VecReverse(VecLoad((const byte*)src));
# endif # endif
#endif // _ARCH_PWR7 #endif // _ARCH_PWR8
} }
/// \brief Loads a vector from a byte array /// \brief Loads a vector from a byte array
@ -480,7 +480,7 @@ inline uint32x4_p VecLoadBE(const byte src[16])
/// \since Crypto++ 6.0 /// \since Crypto++ 6.0
inline uint32x4_p VecLoadBE(int off, const byte src[16]) inline uint32x4_p VecLoadBE(int off, const byte src[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR8)
# if defined(__early_xlc__) || defined(__early_xlC__) # if defined(__early_xlc__) || defined(__early_xlC__)
# if (CRYPTOPP_BIG_ENDIAN) # if (CRYPTOPP_BIG_ENDIAN)
return (uint32x4_p)vec_xlw4(off, (byte*)src); return (uint32x4_p)vec_xlw4(off, (byte*)src);
@ -496,13 +496,13 @@ inline uint32x4_p VecLoadBE(int off, const byte src[16])
return (uint32x4_p)VecReverse(vec_vsx_ld(off, (byte*)src)); return (uint32x4_p)VecReverse(vec_vsx_ld(off, (byte*)src));
# endif # endif
# endif # endif
#else // _ARCH_PWR7 #else // _ARCH_PWR8
# if (CRYPTOPP_BIG_ENDIAN) # if (CRYPTOPP_BIG_ENDIAN)
return (uint32x4_p)VecLoad(off, (const byte*)src); return (uint32x4_p)VecLoad(off, (const byte*)src);
# else # else
return (uint32x4_p)VecReverse(VecLoad(off, (const byte*)src)); return (uint32x4_p)VecReverse(VecLoad(off, (const byte*)src));
# endif # endif
#endif // _ARCH_PWR7 #endif // _ARCH_PWR8
} }
//@} //@}
@ -604,7 +604,7 @@ inline void VecStore_ALTIVEC(const T data, int off, byte dest[16])
template<class T> template<class T>
inline void VecStore(const T data, byte dest[16]) inline void VecStore(const T data, byte dest[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR8)
# if defined(__early_xlc__) || defined(__early_xlC__) # if defined(__early_xlc__) || defined(__early_xlC__)
vec_xstw4((uint8x16_p)data, 0, (byte*)dest); vec_xstw4((uint8x16_p)data, 0, (byte*)dest);
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__) # elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
@ -635,7 +635,7 @@ inline void VecStore(const T data, byte dest[16])
template<class T> template<class T>
inline void VecStore(const T data, int off, byte dest[16]) inline void VecStore(const T data, int off, byte dest[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR8)
# if defined(__early_xlc__) || defined(__early_xlC__) # if defined(__early_xlc__) || defined(__early_xlC__)
vec_xstw4((uint8x16_p)data, off, (byte*)dest); vec_xstw4((uint8x16_p)data, off, (byte*)dest);
# elif defined(__xlc__) || defined(__xlC__) || defined(__clang__) # elif defined(__xlc__) || defined(__xlC__) || defined(__clang__)
@ -750,7 +750,7 @@ inline void VecStore(const T data, int off, word64 dest[2])
template <class T> template <class T>
inline void VecStoreBE(const T data, byte dest[16]) inline void VecStoreBE(const T data, byte dest[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR8)
# if defined(__early_xlc__) || defined(__early_xlC__) # if defined(__early_xlc__) || defined(__early_xlC__)
# if (CRYPTOPP_BIG_ENDIAN) # if (CRYPTOPP_BIG_ENDIAN)
vec_xstw4((uint8x16_p)data, 0, (byte*)dest); vec_xstw4((uint8x16_p)data, 0, (byte*)dest);
@ -766,13 +766,13 @@ inline void VecStoreBE(const T data, byte dest[16])
vec_vsx_st((uint8x16_p)VecReverse(data), 0, (byte*)dest); vec_vsx_st((uint8x16_p)VecReverse(data), 0, (byte*)dest);
# endif # endif
# endif # endif
#else // _ARCH_PWR7 #else // _ARCH_PWR8
# if (CRYPTOPP_BIG_ENDIAN) # if (CRYPTOPP_BIG_ENDIAN)
VecStore_ALTIVEC((uint8x16_p)data, 0, (byte*)dest); VecStore_ALTIVEC((uint8x16_p)data, 0, (byte*)dest);
# else # else
VecStore_ALTIVEC((uint8x16_p)VecReverse(data), 0, (byte*)dest); VecStore_ALTIVEC((uint8x16_p)VecReverse(data), 0, (byte*)dest);
# endif # endif
#endif // _ARCH_PWR7 #endif // _ARCH_PWR8
} }
/// \brief Stores a vector to a byte array /// \brief Stores a vector to a byte array
@ -794,7 +794,7 @@ inline void VecStoreBE(const T data, byte dest[16])
template <class T> template <class T>
inline void VecStoreBE(const T data, int off, byte dest[16]) inline void VecStoreBE(const T data, int off, byte dest[16])
{ {
#if defined(_ARCH_PWR7) #if defined(_ARCH_PWR8)
# if defined(__early_xlc__) || defined(__early_xlC__) # if defined(__early_xlc__) || defined(__early_xlC__)
# if (CRYPTOPP_BIG_ENDIAN) # if (CRYPTOPP_BIG_ENDIAN)
vec_xstw4((uint8x16_p)data, off, (byte*)dest); vec_xstw4((uint8x16_p)data, off, (byte*)dest);
@ -810,13 +810,13 @@ inline void VecStoreBE(const T data, int off, byte dest[16])
vec_vsx_st((uint8x16_p)VecReverse(data), off, (byte*)dest); vec_vsx_st((uint8x16_p)VecReverse(data), off, (byte*)dest);
# endif # endif
# endif # endif
#else // _ARCH_PWR7 #else // _ARCH_PWR8
# if (CRYPTOPP_BIG_ENDIAN) # if (CRYPTOPP_BIG_ENDIAN)
VecStore_ALTIVEC((uint8x16_p)data, off, (byte*)dest); VecStore_ALTIVEC((uint8x16_p)data, off, (byte*)dest);
# else # else
VecStore_ALTIVEC((uint8x16_p)VecReverse(data), off, (byte*)dest); VecStore_ALTIVEC((uint8x16_p)VecReverse(data), off, (byte*)dest);
# endif # endif
#endif // _ARCH_PWR7 #endif // _ARCH_PWR8
} }
/// \brief Stores a vector to a word array /// \brief Stores a vector to a word array

View File

@ -255,9 +255,9 @@ std::string SIMON64::Base::AlgorithmProvider() const
if (HasNEON()) if (HasNEON())
return "NEON"; return "NEON";
# endif # endif
# if (CRYPTOPP_POWER7_AVAILABLE) # if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower7()) if (HasPower8())
return "Power7"; return "Power8";
# endif # endif
# if (CRYPTOPP_ALTIVEC_AVAILABLE) # if (CRYPTOPP_ALTIVEC_AVAILABLE)
if (HasAltivec()) if (HasAltivec())

View File

@ -44,7 +44,7 @@
# include <arm_acle.h> # include <arm_acle.h>
#endif #endif
#if defined(CRYPTOPP_POWER7_AVAILABLE) #if defined(CRYPTOPP_POWER8_AVAILABLE)
# include "adv_simd.h" # include "adv_simd.h"
# include "ppc_simd.h" # include "ppc_simd.h"
#endif #endif

View File

@ -576,7 +576,7 @@ inline void SIMON64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2) for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
{ {
#if CRYPTOPP_POWER7_AVAILABLE #if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk1 = vec_splats(subkeys[i]); const uint32x4_p rk1 = vec_splats(subkeys[i]);
const uint32x4_p rk2 = vec_splats(subkeys[i+1]); const uint32x4_p rk2 = vec_splats(subkeys[i+1]);
#else #else
@ -592,7 +592,7 @@ inline void SIMON64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
if (rounds & 1) if (rounds & 1)
{ {
#if CRYPTOPP_POWER7_AVAILABLE #if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk = vec_splats(subkeys[rounds-1]); const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
#else #else
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3}; const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
@ -634,7 +634,7 @@ inline void SIMON64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
if (rounds & 1) if (rounds & 1)
{ {
std::swap(x1, y1); std::swap(x1, y1);
#if CRYPTOPP_POWER7_AVAILABLE #if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk = vec_splats(subkeys[rounds-1]); const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
#else #else
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3}; const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
@ -647,7 +647,7 @@ inline void SIMON64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
for (int i = static_cast<int>(rounds-2); i >= 0; i -= 2) for (int i = static_cast<int>(rounds-2); i >= 0; i -= 2)
{ {
#if CRYPTOPP_POWER7_AVAILABLE #if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk1 = vec_splats(subkeys[i+1]); const uint32x4_p rk1 = vec_splats(subkeys[i+1]);
const uint32x4_p rk2 = vec_splats(subkeys[i]); const uint32x4_p rk2 = vec_splats(subkeys[i]);
#else #else
@ -696,7 +696,7 @@ inline void SIMON64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2) for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
{ {
#if CRYPTOPP_POWER7_AVAILABLE #if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk1 = vec_splats(subkeys[i]); const uint32x4_p rk1 = vec_splats(subkeys[i]);
const uint32x4_p rk2 = vec_splats(subkeys[i+1]); const uint32x4_p rk2 = vec_splats(subkeys[i+1]);
#else #else
@ -717,7 +717,7 @@ inline void SIMON64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
if (rounds & 1) if (rounds & 1)
{ {
#if CRYPTOPP_POWER7_AVAILABLE #if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk = vec_splats(subkeys[rounds-1]); const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
#else #else
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3}; const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
@ -771,7 +771,7 @@ inline void SIMON64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
{ {
std::swap(x1, y1); std::swap(x2, y2); std::swap(x3, y3); std::swap(x1, y1); std::swap(x2, y2); std::swap(x3, y3);
#if CRYPTOPP_POWER7_AVAILABLE #if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk = vec_splats(subkeys[rounds-1]); const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
#else #else
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3}; const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
@ -786,7 +786,7 @@ inline void SIMON64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
for (int i = static_cast<int>(rounds-2); i >= 0; i -= 2) for (int i = static_cast<int>(rounds-2); i >= 0; i -= 2)
{ {
#if CRYPTOPP_POWER7_AVAILABLE #if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk1 = vec_splats(subkeys[i+1]); const uint32x4_p rk1 = vec_splats(subkeys[i+1]);
const uint32x4_p rk2 = vec_splats(subkeys[i]); const uint32x4_p rk2 = vec_splats(subkeys[i]);
#else #else

View File

@ -235,9 +235,9 @@ std::string SPECK64::Base::AlgorithmProvider() const
if (HasNEON()) if (HasNEON())
return "NEON"; return "NEON";
# endif # endif
# if (CRYPTOPP_POWER7_AVAILABLE) # if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower7()) if (HasPower8())
return "Power7"; return "Power8";
# endif # endif
# if (CRYPTOPP_ALTIVEC_AVAILABLE) # if (CRYPTOPP_ALTIVEC_AVAILABLE)
if (HasAltivec()) if (HasAltivec())

View File

@ -517,7 +517,7 @@ void SPECK64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
for (int i=0; i < static_cast<int>(rounds); ++i) for (int i=0; i < static_cast<int>(rounds); ++i)
{ {
#if CRYPTOPP_POWER7_AVAILABLE #if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk = vec_splats(subkeys[i]); const uint32x4_p rk = vec_splats(subkeys[i]);
#else #else
// subkeys has extra elements so memory backs the last subkey // subkeys has extra elements so memory backs the last subkey
@ -564,7 +564,7 @@ void SPECK64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
for (int i = static_cast<int>(rounds-1); i >= 0; --i) for (int i = static_cast<int>(rounds-1); i >= 0; --i)
{ {
#if CRYPTOPP_POWER7_AVAILABLE #if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk = vec_splats(subkeys[i]); const uint32x4_p rk = vec_splats(subkeys[i]);
#else #else
// subkeys has extra elements so memory backs the last subkey // subkeys has extra elements so memory backs the last subkey
@ -616,7 +616,7 @@ void SPECK64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
for (int i=0; i < static_cast<int>(rounds); ++i) for (int i=0; i < static_cast<int>(rounds); ++i)
{ {
#if CRYPTOPP_POWER7_AVAILABLE #if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk = vec_splats(subkeys[i]); const uint32x4_p rk = vec_splats(subkeys[i]);
#else #else
// subkeys has extra elements so memory backs the last subkey // subkeys has extra elements so memory backs the last subkey
@ -685,7 +685,7 @@ void SPECK64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
for (int i = static_cast<int>(rounds-1); i >= 0; --i) for (int i = static_cast<int>(rounds-1); i >= 0; --i)
{ {
#if CRYPTOPP_POWER7_AVAILABLE #if CRYPTOPP_POWER8_AVAILABLE
const uint32x4_p rk = vec_splats(subkeys[i]); const uint32x4_p rk = vec_splats(subkeys[i]);
#else #else
// subkeys has extra elements so memory backs the last subkey // subkeys has extra elements so memory backs the last subkey