diff --git a/GNUmakefile b/GNUmakefile index 548fe2bf..e7b7b3a6 100755 --- a/GNUmakefile +++ b/GNUmakefile @@ -10,8 +10,6 @@ SHELL = /bin/sh # If needed TMPDIR ?= /tmp -# Used for ARMv7 and NEON. -FP_ABI ?= hard # Used for feature tests TOUT ?= a.out TOUT := $(strip $(TOUT)) @@ -443,26 +441,26 @@ ifeq ($(DETECT_FEATURES),1) ifneq ($(IS_ARM32),0) TPROG = TestPrograms/test_arm_neon.cxx - TOPT = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon + TOPT = -march=armv7-a -mfpu=neon HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) ifeq ($(strip $(HAVE_OPT)),0) - NEON_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - ARIA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - AES_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - CRC_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - GCM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - BLAKE2B_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - BLAKE2S_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - CHACHA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - CHAM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - LEA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - SHA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - SIMECK_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - SIMON64_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - SIMON128_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - SPECK64_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - SPECK128_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - SM4_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon + NEON_FLAG = -march=armv7-a -mfpu=neon + ARIA_FLAG = -march=armv7-a -mfpu=neon + AES_FLAG = -march=armv7-a -mfpu=neon + CRC_FLAG = -march=armv7-a -mfpu=neon + GCM_FLAG = -march=armv7-a -mfpu=neon + BLAKE2B_FLAG = -march=armv7-a -mfpu=neon + BLAKE2S_FLAG = -march=armv7-a -mfpu=neon + CHACHA_FLAG = -march=armv7-a -mfpu=neon + CHAM_FLAG = -march=armv7-a -mfpu=neon + LEA_FLAG = -march=armv7-a -mfpu=neon + SHA_FLAG = -march=armv7-a -mfpu=neon + SIMECK_FLAG = -march=armv7-a -mfpu=neon + SIMON64_FLAG = -march=armv7-a -mfpu=neon + SIMON128_FLAG = -march=armv7-a -mfpu=neon + SPECK64_FLAG = -march=armv7-a -mfpu=neon + SPECK128_FLAG = -march=armv7-a -mfpu=neon + SM4_FLAG = -march=armv7-a -mfpu=neon else CXXFLAGS += -DCRYPTOPP_DISABLE_ASM endif @@ -620,13 +618,21 @@ ifeq ($(DETECT_FEATURES),1) TOPT = $(POWER8_FLAG) HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) ifeq ($(strip $(HAVE_OPT)),0) + AES_FLAG = $(POWER8_FLAG) + ARIA_FLAG = $(POWER8_FLAG) BLAKE2B_FLAG = $(POWER8_FLAG) + BLAKE2S_FLAG = $(POWER8_FLAG) + CHACHA_FLAG = $(POWER8_FLAG) + CHAM_FLAG = $(POWER8_FLAG) CRC_FLAG = $(POWER8_FLAG) GCM_FLAG = $(POWER8_FLAG) GF2N_FLAG = $(POWER8_FLAG) - AES_FLAG = $(POWER8_FLAG) + LEA_FLAG = $(POWER8_FLAG) SHA_FLAG = $(POWER8_FLAG) SHACAL2_FLAG = $(POWER8_FLAG) + SIMECK_FLAG = $(POWER8_FLAG) + SIMON64_FLAG = $(POWER8_FLAG) + SPECK64_FLAG = $(POWER8_FLAG) SIMON128_FLAG = $(POWER8_FLAG) SPECK128_FLAG = $(POWER8_FLAG) else @@ -639,16 +645,7 @@ ifeq ($(DETECT_FEATURES),1) TPROG = TestPrograms/test_ppc_power7.cxx TOPT = $(POWER7_FLAG) HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) - ifeq ($(strip $(HAVE_OPT)),0) - ARIA_FLAG = $(POWER7_FLAG) - BLAKE2S_FLAG = $(POWER7_FLAG) - CHACHA_FLAG = $(POWER7_FLAG) - CHAM_FLAG = $(POWER7_FLAG) - LEA_FLAG = $(POWER7_FLAG) - SIMECK_FLAG = $(POWER7_FLAG) - SIMON64_FLAG = $(POWER7_FLAG) - SPECK64_FLAG = $(POWER7_FLAG) - else + ifneq ($(strip $(HAVE_OPT)),0) POWER7_FLAG = endif @@ -691,18 +688,12 @@ ifeq ($(DETECT_FEATURES),1) ##################################################################### # Fixups for algorithms that can drop to a lower ISA, if needed - # Drop to Power7 if Power8 is not available. + # Drop to Power4 if Power8 not available ifeq ($(POWER8_FLAG),) - ifneq ($(POWER7_FLAG),) - GCM_FLAG = $(POWER7_FLAG) - endif - endif - - # Drop to Power4 if Power7 not available - ifeq ($(POWER7_FLAG),) ifneq ($(ALTIVEC_FLAG),) BLAKE2S_FLAG = $(ALTIVEC_FLAG) CHACHA_FLAG = $(ALTIVEC_FLAG) + GCM_FLAG = $(ALTIVEC_FLAG) SIMON64_FLAG = $(ALTIVEC_FLAG) SPECK64_FLAG = $(ALTIVEC_FLAG) endif @@ -1428,7 +1419,7 @@ endif # Dependencies # Cryptogams ARM asm implementation. aes_armv4.o : aes_armv4.S - $(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_AES_FLAG) -mfloat-abi=$(FP_ABI) -c) $< + $(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_AES_FLAG) -c) $< # SSSE3 or NEON available aria_simd.o : aria_simd.cpp diff --git a/blake2.cpp b/blake2.cpp index 733082d7..f8872619 100644 --- a/blake2.cpp +++ b/blake2.cpp @@ -38,8 +38,8 @@ // https://github.com/weidai11/cryptopp/issues/743 #if defined(__xlC__) && (__xlC__ < 0x0d01) # define CRYPTOPP_DISABLE_ALTIVEC 1 -# define CRYPTOPP_POWER7_ALTIVEC 1 # undef CRYPTOPP_POWER7_AVAILABLE +# undef CRYPTOPP_POWER8_AVAILABLE # undef CRYPTOPP_ALTIVEC_AVAILABLE #endif @@ -171,8 +171,8 @@ extern void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state); extern void BLAKE2_Compress64_NEON(const byte* input, BLAKE2b_State& state); #endif -#if CRYPTOPP_POWER7_AVAILABLE -extern void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state); +#if CRYPTOPP_POWER8_AVAILABLE +extern void BLAKE2_Compress32_POWER8(const byte* input, BLAKE2s_State& state); #elif CRYPTOPP_ALTIVEC_AVAILABLE extern void BLAKE2_Compress32_ALTIVEC(const byte* input, BLAKE2s_State& state); #endif @@ -233,8 +233,8 @@ unsigned int BLAKE2s::OptimalDataAlignment() const return 4; else #endif -#if (CRYPTOPP_POWER7_AVAILABLE) - if (HasPower7()) +#if (CRYPTOPP_POWER8_AVAILABLE) + if (HasPower8()) return 16; else #elif (CRYPTOPP_ALTIVEC_AVAILABLE) @@ -257,9 +257,9 @@ std::string BLAKE2s::AlgorithmProvider() const return "NEON"; else #endif -#if (CRYPTOPP_POWER7_AVAILABLE) - if (HasPower7()) - return "Power7"; +#if (CRYPTOPP_POWER8_AVAILABLE) + if (HasPower8()) + return "Power8"; else #elif (CRYPTOPP_ALTIVEC_AVAILABLE) if (HasAltivec()) @@ -690,10 +690,10 @@ void BLAKE2s::Compress(const byte *input) return BLAKE2_Compress32_NEON(input, m_state); } #endif -#if CRYPTOPP_POWER7_AVAILABLE - if(HasPower7()) +#if CRYPTOPP_POWER8_AVAILABLE + if(HasPower8()) { - return BLAKE2_Compress32_POWER7(input, m_state); + return BLAKE2_Compress32_POWER8(input, m_state); } #elif CRYPTOPP_ALTIVEC_AVAILABLE if(HasAltivec()) diff --git a/blake2b_simd.cpp b/blake2b_simd.cpp index 9a220bd1..13272d21 100644 --- a/blake2b_simd.cpp +++ b/blake2b_simd.cpp @@ -29,6 +29,7 @@ #if defined(__xlC__) && (__xlC__ < 0x0d01) # define CRYPTOPP_DISABLE_ALTIVEC 1 # undef CRYPTOPP_POWER7_AVAILABLE +# undef CRYPTOPP_POWER8_AVAILABLE # undef CRYPTOPP_ALTIVEC_AVAILABLE #endif diff --git a/blake2s_simd.cpp b/blake2s_simd.cpp index 4b46c525..ff461da8 100644 --- a/blake2s_simd.cpp +++ b/blake2s_simd.cpp @@ -38,8 +38,8 @@ // https://github.com/weidai11/cryptopp/issues/743 #if defined(__xlC__) && (__xlC__ < 0x0d01) # define CRYPTOPP_DISABLE_ALTIVEC 1 -# define CRYPTOPP_POWER7_ALTIVEC 1 # undef CRYPTOPP_POWER7_AVAILABLE +# undef CRYPTOPP_POWER8_AVAILABLE # undef CRYPTOPP_ALTIVEC_AVAILABLE #endif @@ -692,7 +692,7 @@ void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state) } #endif // CRYPTOPP_ARM_NEON_AVAILABLE -#if (CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE) +#if (CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE) inline uint32x4_p VecLoad32(const void* p) { @@ -838,7 +838,7 @@ inline uint32x4_p VectorSet32(const uint32x4_p a, const uint32x4_p b, const uint32x4_p t0 = VectorSet32(a, b); const uint32x4_p t1 = VectorSet32(c, d); - // Power7 follows SSE2's implementation, and this is _mm_set_epi32. + // PowerPC follows SSE2's implementation, and this is _mm_set_epi32. const uint8x16_p mask = {20,21,22,23, 16,17,18,19, 4,5,6,7, 0,1,2,3}; return VecPermute(t0, t1, mask); } @@ -1015,11 +1015,11 @@ void BLAKE2_Compress32_CORE(const byte* input, BLAKE2s_State& state) VecStore32LE(state.h()+0, VecXor(ff0, VecXor(row1, row3))); VecStore32LE(state.h()+4, VecXor(ff1, VecXor(row2, row4))); } -#endif // CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE +#endif // CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE -#if (CRYPTOPP_POWER7_AVAILABLE) +#if (CRYPTOPP_POWER8_AVAILABLE) -void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state) +void BLAKE2_Compress32_POWER8(const byte* input, BLAKE2s_State& state) { BLAKE2_Compress32_CORE(input, state); } diff --git a/chacha.cpp b/chacha.cpp index dccde301..58fd2201 100644 --- a/chacha.cpp +++ b/chacha.cpp @@ -28,8 +28,8 @@ extern void ChaCha_OperateKeystream_AVX2(const word32 *state, const byte* input, extern void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *output, unsigned int rounds); #endif -#if (CRYPTOPP_POWER7_AVAILABLE) -extern void ChaCha_OperateKeystream_POWER7(const word32 *state, const byte* input, byte *output, unsigned int rounds); +#if (CRYPTOPP_POWER8_AVAILABLE) +extern void ChaCha_OperateKeystream_POWER8(const word32 *state, const byte* input, byte *output, unsigned int rounds); #elif (CRYPTOPP_ALTIVEC_AVAILABLE) extern void ChaCha_OperateKeystream_ALTIVEC(const word32 *state, const byte* input, byte *output, unsigned int rounds); #endif @@ -153,13 +153,13 @@ void ChaCha_OperateKeystream(KeystreamOperation operation, } #endif -#if (CRYPTOPP_POWER7_AVAILABLE) - if (HasPower7()) +#if (CRYPTOPP_POWER8_AVAILABLE) + if (HasPower8()) { while (iterationCount >= 4 && MultiBlockSafe(state[12], 4)) { const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL; - ChaCha_OperateKeystream_POWER7(state, xorInput ? input : NULLPTR, output, rounds); + ChaCha_OperateKeystream_POWER8(state, xorInput ? input : NULLPTR, output, rounds); // MultiBlockSafe avoids overflow on the counter words state[12] += 4; @@ -267,9 +267,9 @@ std::string ChaCha_AlgorithmProvider() return "NEON"; else #endif -#if (CRYPTOPP_POWER7_AVAILABLE) - if (HasPower7()) - return "Power7"; +#if (CRYPTOPP_POWER8_AVAILABLE) + if (HasPower8()) + return "Power8"; else #elif (CRYPTOPP_ALTIVEC_AVAILABLE) if (HasAltivec()) diff --git a/chacha_simd.cpp b/chacha_simd.cpp index 65316384..9fd6b0f1 100644 --- a/chacha_simd.cpp +++ b/chacha_simd.cpp @@ -209,7 +209,7 @@ inline __m128i RotateLeft<16>(const __m128i val) #if (CRYPTOPP_ALTIVEC_AVAILABLE) -// ChaCha_OperateKeystream_POWER7 is optimized for POWER7. However, Altivec +// ChaCha_OperateKeystream_POWER8 is optimized for POWER7. However, Altivec // is supported by using vec_ld and vec_st, and using a composite VecAdd // that supports 64-bit element adds. vec_ld and vec_st add significant // overhead when memory is not aligned. Despite the drawbacks Altivec @@ -827,7 +827,7 @@ void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte * #endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE -#if (CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE) +#if (CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE) // ChaCha_OperateKeystream_CORE will use either POWER7 or ALTIVEC, // depending on the flags used to compile this source file. The @@ -1096,11 +1096,11 @@ inline void ChaCha_OperateKeystream_CORE(const word32 *state, const byte* input, VecStore32LE(output + 15*16, r3_3); } -#endif // CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE +#endif // CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE -#if (CRYPTOPP_POWER7_AVAILABLE) +#if (CRYPTOPP_POWER8_AVAILABLE) -void ChaCha_OperateKeystream_POWER7(const word32 *state, const byte* input, byte *output, unsigned int rounds) +void ChaCha_OperateKeystream_POWER8(const word32 *state, const byte* input, byte *output, unsigned int rounds) { ChaCha_OperateKeystream_CORE(state, input, output, rounds); } diff --git a/config.h b/config.h index 3642b3cf..283c4090 100644 --- a/config.h +++ b/config.h @@ -74,24 +74,6 @@ // Also see https://bugs.llvm.org/show_bug.cgi?id=39895 . // #define CRYPTOPP_DISABLE_MIXED_ASM 1 -// Several compilers discard SIMD code that loads unaligned data. The symptom -// is often self test failures and UBsan findings for unaligned loads. For -// example, Power7 can load unaligned data using vec_vsx_ld but some versions -// of GCC and Clang require 16-byte aligned data when using the builtin. -// It is not limited to SSE and PowerPC code. Define this to disable -// Crypto++ code that uses potentially problematic builtins or intrinsics. -// Also see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88234 and -// https://bugs.llvm.org/show_bug.cgi?id=39704 -// #define CRYPTOPP_BUGGY_SIMD_LOAD_AND_STORE 1 - -// This list will probably grow over time as more compilers are identified. -#if defined(CRYPTOPP_BUGGY_SIMD_LOAD_AND_STORE) -# define CRYPTOPP_DISABLE_LEA_SIMD 1 -# define CRYPTOPP_DISABLE_SIMON_SIMD 1 -# define CRYPTOPP_DISABLE_SPECK_SIMD 1 -# define CRYPTOPP_DISABLE_SM4_SIMD 1 -#endif - // Define CRYPTOPP_NO_CXX11 to avoid C++11 related features shown at the // end of this file. Some compilers and standard C++ headers advertise C++11 // but they are really just C++03 with some additional C++11 headers and diff --git a/gcm.cpp b/gcm.cpp index d1c3011c..c7c51071 100644 --- a/gcm.cpp +++ b/gcm.cpp @@ -75,8 +75,8 @@ extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c); extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c); #endif -#if CRYPTOPP_POWER7_AVAILABLE -extern void GCM_Xor16_POWER7(byte *a, const byte *b, const byte *c); +#if CRYPTOPP_POWER8_AVAILABLE +extern void GCM_Xor16_POWER8(byte *a, const byte *b, const byte *c); #endif #if CRYPTOPP_CLMUL_AVAILABLE @@ -213,11 +213,11 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const for (k=1; kvec_add (vaddudm), did not @@ -137,7 +137,7 @@ typedef __vector unsigned int uint32x4_p; /// __vector unsigned long long /// \since Crypto++ 6.0 typedef __vector unsigned long long uint64x2_p; -#endif // _ARCH_PWR7 +#endif // _ARCH_PWR8 /// \brief The 0 vector /// \returns a 32-bit vector of 0's @@ -252,7 +252,7 @@ inline uint32x4_p VecLoad_ALTIVEC(int off, const byte src[16]) /// \since Crypto++ 6.0 inline uint32x4_p VecLoad(const byte src[16]) { -#if defined(_ARCH_PWR7) +#if defined(_ARCH_PWR8) # if defined(__early_xlc__) || defined(__early_xlC__) return (uint32x4_p)vec_xlw4(0, (byte*)src); # elif defined(__xlc__) || defined(__xlC__) || defined(__clang__) @@ -280,7 +280,7 @@ inline uint32x4_p VecLoad(const byte src[16]) /// \since Crypto++ 6.0 inline uint32x4_p VecLoad(int off, const byte src[16]) { -#if defined(_ARCH_PWR7) +#if defined(_ARCH_PWR8) # if defined(__early_xlc__) || defined(__early_xlC__) return (uint32x4_p)vec_xlw4(off, (byte*)src); # elif defined(__xlc__) || defined(__xlC__) || defined(__clang__) @@ -328,7 +328,7 @@ inline uint32x4_p VecLoad(int off, const word32 src[4]) return VecLoad(off, (const byte*)src); } -#if defined(_ARCH_PWR7) || defined(CRYPTOPP_DOXYGEN_PROCESSING) +#if defined(_ARCH_PWR8) || defined(CRYPTOPP_DOXYGEN_PROCESSING) /// \brief Loads a vector from a word array /// \param src the word array @@ -367,7 +367,7 @@ inline uint64x2_p VecLoad(int off, const word64 src[2]) return (uint64x2_p)VecLoad(off, (const byte*)src); } -#endif // _ARCH_PWR7 +#endif // _ARCH_PWR8 /// \brief Loads a vector from an aligned byte array /// \param src the byte array @@ -382,7 +382,7 @@ inline uint64x2_p VecLoad(int off, const word64 src[2]) /// \since Crypto++ 8.0 inline uint32x4_p VecLoadAligned(const byte src[16]) { -#if defined(_ARCH_PWR7) +#if defined(_ARCH_PWR8) # if defined(__early_xlc__) || defined(__early_xlC__) return (uint32x4_p)vec_xlw4(0, (byte*)src); # elif defined(__xlc__) || defined(__xlC__) || defined(__clang__) @@ -390,10 +390,10 @@ inline uint32x4_p VecLoadAligned(const byte src[16]) # else return (uint32x4_p)vec_vsx_ld(0, (byte*)src); # endif -#else // _ARCH_PWR7 +#else // _ARCH_PWR8 CRYPTOPP_ASSERT(((uintptr_t)src) % 16 == 0); return (uint32x4_p)vec_ld(0, (byte*)src); -#endif // _ARCH_PWR7 +#endif // _ARCH_PWR8 } /// \brief Loads a vector from an aligned byte array @@ -410,7 +410,7 @@ inline uint32x4_p VecLoadAligned(const byte src[16]) /// \since Crypto++ 8.0 inline uint32x4_p VecLoadAligned(int off, const byte src[16]) { -#if defined(_ARCH_PWR7) +#if defined(_ARCH_PWR8) # if defined(__early_xlc__) || defined(__early_xlC__) return (uint32x4_p)vec_xlw4(off, (byte*)src); # elif defined(__xlc__) || defined(__xlC__) || defined(__clang__) @@ -418,10 +418,10 @@ inline uint32x4_p VecLoadAligned(int off, const byte src[16]) # else return (uint32x4_p)vec_vsx_ld(off, (byte*)src); # endif -#else // _ARCH_PWR7 +#else // _ARCH_PWR8 CRYPTOPP_ASSERT((((uintptr_t)src)+off) % 16 == 0); return (uint32x4_p)vec_ld(off, (byte*)src); -#endif // _ARCH_PWR7 +#endif // _ARCH_PWR8 } /// \brief Loads a vector from a byte array @@ -439,7 +439,7 @@ inline uint32x4_p VecLoadAligned(int off, const byte src[16]) /// \since Crypto++ 6.0 inline uint32x4_p VecLoadBE(const byte src[16]) { -#if defined(_ARCH_PWR7) +#if defined(_ARCH_PWR8) # if defined(__early_xlc__) || defined(__early_xlC__) # if (CRYPTOPP_BIG_ENDIAN) return (uint32x4_p)vec_xlw4(0, (byte*)src); @@ -455,13 +455,13 @@ inline uint32x4_p VecLoadBE(const byte src[16]) return (uint32x4_p)VecReverse(vec_vsx_ld(0, (byte*)src)); # endif # endif -#else // _ARCH_PWR7 +#else // _ARCH_PWR8 # if (CRYPTOPP_BIG_ENDIAN) return (uint32x4_p)VecLoad((const byte*)src); # else return (uint32x4_p)VecReverse(VecLoad((const byte*)src)); # endif -#endif // _ARCH_PWR7 +#endif // _ARCH_PWR8 } /// \brief Loads a vector from a byte array @@ -480,7 +480,7 @@ inline uint32x4_p VecLoadBE(const byte src[16]) /// \since Crypto++ 6.0 inline uint32x4_p VecLoadBE(int off, const byte src[16]) { -#if defined(_ARCH_PWR7) +#if defined(_ARCH_PWR8) # if defined(__early_xlc__) || defined(__early_xlC__) # if (CRYPTOPP_BIG_ENDIAN) return (uint32x4_p)vec_xlw4(off, (byte*)src); @@ -496,13 +496,13 @@ inline uint32x4_p VecLoadBE(int off, const byte src[16]) return (uint32x4_p)VecReverse(vec_vsx_ld(off, (byte*)src)); # endif # endif -#else // _ARCH_PWR7 +#else // _ARCH_PWR8 # if (CRYPTOPP_BIG_ENDIAN) return (uint32x4_p)VecLoad(off, (const byte*)src); # else return (uint32x4_p)VecReverse(VecLoad(off, (const byte*)src)); # endif -#endif // _ARCH_PWR7 +#endif // _ARCH_PWR8 } //@} @@ -604,7 +604,7 @@ inline void VecStore_ALTIVEC(const T data, int off, byte dest[16]) template inline void VecStore(const T data, byte dest[16]) { -#if defined(_ARCH_PWR7) +#if defined(_ARCH_PWR8) # if defined(__early_xlc__) || defined(__early_xlC__) vec_xstw4((uint8x16_p)data, 0, (byte*)dest); # elif defined(__xlc__) || defined(__xlC__) || defined(__clang__) @@ -635,7 +635,7 @@ inline void VecStore(const T data, byte dest[16]) template inline void VecStore(const T data, int off, byte dest[16]) { -#if defined(_ARCH_PWR7) +#if defined(_ARCH_PWR8) # if defined(__early_xlc__) || defined(__early_xlC__) vec_xstw4((uint8x16_p)data, off, (byte*)dest); # elif defined(__xlc__) || defined(__xlC__) || defined(__clang__) @@ -750,7 +750,7 @@ inline void VecStore(const T data, int off, word64 dest[2]) template inline void VecStoreBE(const T data, byte dest[16]) { -#if defined(_ARCH_PWR7) +#if defined(_ARCH_PWR8) # if defined(__early_xlc__) || defined(__early_xlC__) # if (CRYPTOPP_BIG_ENDIAN) vec_xstw4((uint8x16_p)data, 0, (byte*)dest); @@ -766,13 +766,13 @@ inline void VecStoreBE(const T data, byte dest[16]) vec_vsx_st((uint8x16_p)VecReverse(data), 0, (byte*)dest); # endif # endif -#else // _ARCH_PWR7 +#else // _ARCH_PWR8 # if (CRYPTOPP_BIG_ENDIAN) VecStore_ALTIVEC((uint8x16_p)data, 0, (byte*)dest); # else VecStore_ALTIVEC((uint8x16_p)VecReverse(data), 0, (byte*)dest); # endif -#endif // _ARCH_PWR7 +#endif // _ARCH_PWR8 } /// \brief Stores a vector to a byte array @@ -794,7 +794,7 @@ inline void VecStoreBE(const T data, byte dest[16]) template inline void VecStoreBE(const T data, int off, byte dest[16]) { -#if defined(_ARCH_PWR7) +#if defined(_ARCH_PWR8) # if defined(__early_xlc__) || defined(__early_xlC__) # if (CRYPTOPP_BIG_ENDIAN) vec_xstw4((uint8x16_p)data, off, (byte*)dest); @@ -810,13 +810,13 @@ inline void VecStoreBE(const T data, int off, byte dest[16]) vec_vsx_st((uint8x16_p)VecReverse(data), off, (byte*)dest); # endif # endif -#else // _ARCH_PWR7 +#else // _ARCH_PWR8 # if (CRYPTOPP_BIG_ENDIAN) VecStore_ALTIVEC((uint8x16_p)data, off, (byte*)dest); # else VecStore_ALTIVEC((uint8x16_p)VecReverse(data), off, (byte*)dest); # endif -#endif // _ARCH_PWR7 +#endif // _ARCH_PWR8 } /// \brief Stores a vector to a word array diff --git a/simon.cpp b/simon.cpp index 099cd7a2..d508d974 100644 --- a/simon.cpp +++ b/simon.cpp @@ -255,9 +255,9 @@ std::string SIMON64::Base::AlgorithmProvider() const if (HasNEON()) return "NEON"; # endif -# if (CRYPTOPP_POWER7_AVAILABLE) - if (HasPower7()) - return "Power7"; +# if (CRYPTOPP_POWER8_AVAILABLE) + if (HasPower8()) + return "Power8"; # endif # if (CRYPTOPP_ALTIVEC_AVAILABLE) if (HasAltivec()) diff --git a/simon128_simd.cpp b/simon128_simd.cpp index 7b3f8518..df59ffb2 100644 --- a/simon128_simd.cpp +++ b/simon128_simd.cpp @@ -44,7 +44,7 @@ # include #endif -#if defined(CRYPTOPP_POWER7_AVAILABLE) +#if defined(CRYPTOPP_POWER8_AVAILABLE) # include "adv_simd.h" # include "ppc_simd.h" #endif diff --git a/simon64_simd.cpp b/simon64_simd.cpp index 3ad26bcd..3863e779 100644 --- a/simon64_simd.cpp +++ b/simon64_simd.cpp @@ -576,7 +576,7 @@ inline void SIMON64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1, for (int i = 0; i < static_cast(rounds & ~1)-1; i += 2) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk1 = vec_splats(subkeys[i]); const uint32x4_p rk2 = vec_splats(subkeys[i+1]); #else @@ -592,7 +592,7 @@ inline void SIMON64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1, if (rounds & 1) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[rounds-1]); #else const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3}; @@ -634,7 +634,7 @@ inline void SIMON64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1, if (rounds & 1) { std::swap(x1, y1); -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[rounds-1]); #else const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3}; @@ -647,7 +647,7 @@ inline void SIMON64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1, for (int i = static_cast(rounds-2); i >= 0; i -= 2) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk1 = vec_splats(subkeys[i+1]); const uint32x4_p rk2 = vec_splats(subkeys[i]); #else @@ -696,7 +696,7 @@ inline void SIMON64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, for (int i = 0; i < static_cast(rounds & ~1)-1; i += 2) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk1 = vec_splats(subkeys[i]); const uint32x4_p rk2 = vec_splats(subkeys[i+1]); #else @@ -717,7 +717,7 @@ inline void SIMON64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, if (rounds & 1) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[rounds-1]); #else const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3}; @@ -771,7 +771,7 @@ inline void SIMON64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, { std::swap(x1, y1); std::swap(x2, y2); std::swap(x3, y3); -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[rounds-1]); #else const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3}; @@ -786,7 +786,7 @@ inline void SIMON64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, for (int i = static_cast(rounds-2); i >= 0; i -= 2) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk1 = vec_splats(subkeys[i+1]); const uint32x4_p rk2 = vec_splats(subkeys[i]); #else diff --git a/speck.cpp b/speck.cpp index 61fac2cf..151bac78 100644 --- a/speck.cpp +++ b/speck.cpp @@ -235,9 +235,9 @@ std::string SPECK64::Base::AlgorithmProvider() const if (HasNEON()) return "NEON"; # endif -# if (CRYPTOPP_POWER7_AVAILABLE) - if (HasPower7()) - return "Power7"; +# if (CRYPTOPP_POWER8_AVAILABLE) + if (HasPower8()) + return "Power8"; # endif # if (CRYPTOPP_ALTIVEC_AVAILABLE) if (HasAltivec()) diff --git a/speck64_simd.cpp b/speck64_simd.cpp index 885acad4..08aaaa19 100644 --- a/speck64_simd.cpp +++ b/speck64_simd.cpp @@ -517,7 +517,7 @@ void SPECK64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1, for (int i=0; i < static_cast(rounds); ++i) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[i]); #else // subkeys has extra elements so memory backs the last subkey @@ -564,7 +564,7 @@ void SPECK64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1, for (int i = static_cast(rounds-1); i >= 0; --i) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[i]); #else // subkeys has extra elements so memory backs the last subkey @@ -616,7 +616,7 @@ void SPECK64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, for (int i=0; i < static_cast(rounds); ++i) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[i]); #else // subkeys has extra elements so memory backs the last subkey @@ -685,7 +685,7 @@ void SPECK64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, for (int i = static_cast(rounds-1); i >= 0; --i) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[i]); #else // subkeys has extra elements so memory backs the last subkey