diff --git a/GNUmakefile b/GNUmakefile index 548fe2bf..e05761fe 100755 --- a/GNUmakefile +++ b/GNUmakefile @@ -443,26 +443,26 @@ ifeq ($(DETECT_FEATURES),1) ifneq ($(IS_ARM32),0) TPROG = TestPrograms/test_arm_neon.cxx - TOPT = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon + TOPT = -march=armv7-a -mfpu=neon HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) ifeq ($(strip $(HAVE_OPT)),0) - NEON_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - ARIA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - AES_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - CRC_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - GCM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - BLAKE2B_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - BLAKE2S_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - CHACHA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - CHAM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - LEA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - SHA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - SIMECK_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - SIMON64_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - SIMON128_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - SPECK64_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - SPECK128_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon - SM4_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon + NEON_FLAG = -march=armv7-a -mfpu=neon + ARIA_FLAG = -march=armv7-a -mfpu=neon + AES_FLAG = -march=armv7-a -mfpu=neon + CRC_FLAG = -march=armv7-a -mfpu=neon + GCM_FLAG = -march=armv7-a -mfpu=neon + BLAKE2B_FLAG = -march=armv7-a -mfpu=neon + BLAKE2S_FLAG = -march=armv7-a -mfpu=neon + CHACHA_FLAG = -march=armv7-a -mfpu=neon + CHAM_FLAG = -march=armv7-a -mfpu=neon + LEA_FLAG = -march=armv7-a -mfpu=neon + SHA_FLAG = -march=armv7-a -mfpu=neon + SIMECK_FLAG = -march=armv7-a -mfpu=neon + SIMON64_FLAG = -march=armv7-a -mfpu=neon + SIMON128_FLAG = -march=armv7-a -mfpu=neon + SPECK64_FLAG = -march=armv7-a -mfpu=neon + SPECK128_FLAG = -march=armv7-a -mfpu=neon + SM4_FLAG = -march=armv7-a -mfpu=neon else CXXFLAGS += -DCRYPTOPP_DISABLE_ASM endif @@ -621,12 +621,16 @@ ifeq ($(DETECT_FEATURES),1) HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) ifeq ($(strip $(HAVE_OPT)),0) BLAKE2B_FLAG = $(POWER8_FLAG) + BLAKE2S_FLAG = $(POWER8_FLAG) + CHACHA_FLAG = $(POWER8_FLAG) CRC_FLAG = $(POWER8_FLAG) GCM_FLAG = $(POWER8_FLAG) GF2N_FLAG = $(POWER8_FLAG) AES_FLAG = $(POWER8_FLAG) SHA_FLAG = $(POWER8_FLAG) SHACAL2_FLAG = $(POWER8_FLAG) + SIMON64_FLAG = $(POWER8_FLAG) + SPECK64_FLAG = $(POWER8_FLAG) SIMON128_FLAG = $(POWER8_FLAG) SPECK128_FLAG = $(POWER8_FLAG) else @@ -641,13 +645,9 @@ ifeq ($(DETECT_FEATURES),1) HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) ifeq ($(strip $(HAVE_OPT)),0) ARIA_FLAG = $(POWER7_FLAG) - BLAKE2S_FLAG = $(POWER7_FLAG) - CHACHA_FLAG = $(POWER7_FLAG) CHAM_FLAG = $(POWER7_FLAG) LEA_FLAG = $(POWER7_FLAG) SIMECK_FLAG = $(POWER7_FLAG) - SIMON64_FLAG = $(POWER7_FLAG) - SPECK64_FLAG = $(POWER7_FLAG) else POWER7_FLAG = endif @@ -698,8 +698,8 @@ ifeq ($(DETECT_FEATURES),1) endif endif - # Drop to Power4 if Power7 not available - ifeq ($(POWER7_FLAG),) + # Drop to Power4 if Power8 not available + ifeq ($(POWER8_FLAG),) ifneq ($(ALTIVEC_FLAG),) BLAKE2S_FLAG = $(ALTIVEC_FLAG) CHACHA_FLAG = $(ALTIVEC_FLAG) @@ -1428,7 +1428,7 @@ endif # Dependencies # Cryptogams ARM asm implementation. aes_armv4.o : aes_armv4.S - $(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_AES_FLAG) -mfloat-abi=$(FP_ABI) -c) $< + $(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_AES_FLAG) -c) $< # SSSE3 or NEON available aria_simd.o : aria_simd.cpp diff --git a/blake2.cpp b/blake2.cpp index 733082d7..92d3d7f4 100644 --- a/blake2.cpp +++ b/blake2.cpp @@ -38,8 +38,8 @@ // https://github.com/weidai11/cryptopp/issues/743 #if defined(__xlC__) && (__xlC__ < 0x0d01) # define CRYPTOPP_DISABLE_ALTIVEC 1 -# define CRYPTOPP_POWER7_ALTIVEC 1 -# undef CRYPTOPP_POWER7_AVAILABLE +# define CRYPTOPP_POWER8_ALTIVEC 1 +# undef CRYPTOPP_POWER8_AVAILABLE # undef CRYPTOPP_ALTIVEC_AVAILABLE #endif @@ -171,8 +171,8 @@ extern void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state); extern void BLAKE2_Compress64_NEON(const byte* input, BLAKE2b_State& state); #endif -#if CRYPTOPP_POWER7_AVAILABLE -extern void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state); +#if CRYPTOPP_POWER8_AVAILABLE +extern void BLAKE2_Compress32_POWER8(const byte* input, BLAKE2s_State& state); #elif CRYPTOPP_ALTIVEC_AVAILABLE extern void BLAKE2_Compress32_ALTIVEC(const byte* input, BLAKE2s_State& state); #endif @@ -233,8 +233,8 @@ unsigned int BLAKE2s::OptimalDataAlignment() const return 4; else #endif -#if (CRYPTOPP_POWER7_AVAILABLE) - if (HasPower7()) +#if (CRYPTOPP_POWER8_AVAILABLE) + if (HasPower8()) return 16; else #elif (CRYPTOPP_ALTIVEC_AVAILABLE) @@ -257,8 +257,8 @@ std::string BLAKE2s::AlgorithmProvider() const return "NEON"; else #endif -#if (CRYPTOPP_POWER7_AVAILABLE) - if (HasPower7()) +#if (CRYPTOPP_POWER8_AVAILABLE) + if (HasPower8()) return "Power7"; else #elif (CRYPTOPP_ALTIVEC_AVAILABLE) @@ -690,10 +690,10 @@ void BLAKE2s::Compress(const byte *input) return BLAKE2_Compress32_NEON(input, m_state); } #endif -#if CRYPTOPP_POWER7_AVAILABLE - if(HasPower7()) +#if CRYPTOPP_POWER8_AVAILABLE + if(HasPower8()) { - return BLAKE2_Compress32_POWER7(input, m_state); + return BLAKE2_Compress32_POWER8(input, m_state); } #elif CRYPTOPP_ALTIVEC_AVAILABLE if(HasAltivec()) diff --git a/blake2b_simd.cpp b/blake2b_simd.cpp index 9a220bd1..ff138321 100644 --- a/blake2b_simd.cpp +++ b/blake2b_simd.cpp @@ -28,7 +28,7 @@ // https://github.com/weidai11/cryptopp/issues/743 #if defined(__xlC__) && (__xlC__ < 0x0d01) # define CRYPTOPP_DISABLE_ALTIVEC 1 -# undef CRYPTOPP_POWER7_AVAILABLE +# undef CRYPTOPP_POWER8_AVAILABLE # undef CRYPTOPP_ALTIVEC_AVAILABLE #endif diff --git a/blake2s_simd.cpp b/blake2s_simd.cpp index 4b46c525..827916cd 100644 --- a/blake2s_simd.cpp +++ b/blake2s_simd.cpp @@ -38,8 +38,8 @@ // https://github.com/weidai11/cryptopp/issues/743 #if defined(__xlC__) && (__xlC__ < 0x0d01) # define CRYPTOPP_DISABLE_ALTIVEC 1 -# define CRYPTOPP_POWER7_ALTIVEC 1 -# undef CRYPTOPP_POWER7_AVAILABLE +# define CRYPTOPP_POWER8_ALTIVEC 1 +# undef CRYPTOPP_POWER8_AVAILABLE # undef CRYPTOPP_ALTIVEC_AVAILABLE #endif @@ -692,7 +692,7 @@ void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state) } #endif // CRYPTOPP_ARM_NEON_AVAILABLE -#if (CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE) +#if (CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE) inline uint32x4_p VecLoad32(const void* p) { @@ -1015,11 +1015,11 @@ void BLAKE2_Compress32_CORE(const byte* input, BLAKE2s_State& state) VecStore32LE(state.h()+0, VecXor(ff0, VecXor(row1, row3))); VecStore32LE(state.h()+4, VecXor(ff1, VecXor(row2, row4))); } -#endif // CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE +#endif // CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE -#if (CRYPTOPP_POWER7_AVAILABLE) +#if (CRYPTOPP_POWER8_AVAILABLE) -void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state) +void BLAKE2_Compress32_POWER8(const byte* input, BLAKE2s_State& state) { BLAKE2_Compress32_CORE(input, state); } diff --git a/chacha.cpp b/chacha.cpp index dccde301..0e88467d 100644 --- a/chacha.cpp +++ b/chacha.cpp @@ -28,8 +28,8 @@ extern void ChaCha_OperateKeystream_AVX2(const word32 *state, const byte* input, extern void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *output, unsigned int rounds); #endif -#if (CRYPTOPP_POWER7_AVAILABLE) -extern void ChaCha_OperateKeystream_POWER7(const word32 *state, const byte* input, byte *output, unsigned int rounds); +#if (CRYPTOPP_POWER8_AVAILABLE) +extern void ChaCha_OperateKeystream_POWER8(const word32 *state, const byte* input, byte *output, unsigned int rounds); #elif (CRYPTOPP_ALTIVEC_AVAILABLE) extern void ChaCha_OperateKeystream_ALTIVEC(const word32 *state, const byte* input, byte *output, unsigned int rounds); #endif @@ -153,13 +153,13 @@ void ChaCha_OperateKeystream(KeystreamOperation operation, } #endif -#if (CRYPTOPP_POWER7_AVAILABLE) - if (HasPower7()) +#if (CRYPTOPP_POWER8_AVAILABLE) + if (HasPower8()) { while (iterationCount >= 4 && MultiBlockSafe(state[12], 4)) { const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL; - ChaCha_OperateKeystream_POWER7(state, xorInput ? input : NULLPTR, output, rounds); + ChaCha_OperateKeystream_POWER8(state, xorInput ? input : NULLPTR, output, rounds); // MultiBlockSafe avoids overflow on the counter words state[12] += 4; @@ -267,8 +267,8 @@ std::string ChaCha_AlgorithmProvider() return "NEON"; else #endif -#if (CRYPTOPP_POWER7_AVAILABLE) - if (HasPower7()) +#if (CRYPTOPP_POWER8_AVAILABLE) + if (HasPower8()) return "Power7"; else #elif (CRYPTOPP_ALTIVEC_AVAILABLE) diff --git a/chacha_simd.cpp b/chacha_simd.cpp index 65316384..9fd6b0f1 100644 --- a/chacha_simd.cpp +++ b/chacha_simd.cpp @@ -209,7 +209,7 @@ inline __m128i RotateLeft<16>(const __m128i val) #if (CRYPTOPP_ALTIVEC_AVAILABLE) -// ChaCha_OperateKeystream_POWER7 is optimized for POWER7. However, Altivec +// ChaCha_OperateKeystream_POWER8 is optimized for POWER7. However, Altivec // is supported by using vec_ld and vec_st, and using a composite VecAdd // that supports 64-bit element adds. vec_ld and vec_st add significant // overhead when memory is not aligned. Despite the drawbacks Altivec @@ -827,7 +827,7 @@ void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte * #endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE -#if (CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE) +#if (CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE) // ChaCha_OperateKeystream_CORE will use either POWER7 or ALTIVEC, // depending on the flags used to compile this source file. The @@ -1096,11 +1096,11 @@ inline void ChaCha_OperateKeystream_CORE(const word32 *state, const byte* input, VecStore32LE(output + 15*16, r3_3); } -#endif // CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE +#endif // CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE -#if (CRYPTOPP_POWER7_AVAILABLE) +#if (CRYPTOPP_POWER8_AVAILABLE) -void ChaCha_OperateKeystream_POWER7(const word32 *state, const byte* input, byte *output, unsigned int rounds) +void ChaCha_OperateKeystream_POWER8(const word32 *state, const byte* input, byte *output, unsigned int rounds) { ChaCha_OperateKeystream_CORE(state, input, output, rounds); } diff --git a/gcm.cpp b/gcm.cpp index d1c3011c..c7c51071 100644 --- a/gcm.cpp +++ b/gcm.cpp @@ -75,8 +75,8 @@ extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c); extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c); #endif -#if CRYPTOPP_POWER7_AVAILABLE -extern void GCM_Xor16_POWER7(byte *a, const byte *b, const byte *c); +#if CRYPTOPP_POWER8_AVAILABLE +extern void GCM_Xor16_POWER8(byte *a, const byte *b, const byte *c); #endif #if CRYPTOPP_CLMUL_AVAILABLE @@ -213,11 +213,11 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const for (k=1; k #endif -#if defined(CRYPTOPP_POWER7_AVAILABLE) +#if defined(CRYPTOPP_POWER8_AVAILABLE) # include "adv_simd.h" # include "ppc_simd.h" #endif diff --git a/simon64_simd.cpp b/simon64_simd.cpp index 3ad26bcd..3863e779 100644 --- a/simon64_simd.cpp +++ b/simon64_simd.cpp @@ -576,7 +576,7 @@ inline void SIMON64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1, for (int i = 0; i < static_cast(rounds & ~1)-1; i += 2) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk1 = vec_splats(subkeys[i]); const uint32x4_p rk2 = vec_splats(subkeys[i+1]); #else @@ -592,7 +592,7 @@ inline void SIMON64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1, if (rounds & 1) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[rounds-1]); #else const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3}; @@ -634,7 +634,7 @@ inline void SIMON64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1, if (rounds & 1) { std::swap(x1, y1); -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[rounds-1]); #else const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3}; @@ -647,7 +647,7 @@ inline void SIMON64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1, for (int i = static_cast(rounds-2); i >= 0; i -= 2) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk1 = vec_splats(subkeys[i+1]); const uint32x4_p rk2 = vec_splats(subkeys[i]); #else @@ -696,7 +696,7 @@ inline void SIMON64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, for (int i = 0; i < static_cast(rounds & ~1)-1; i += 2) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk1 = vec_splats(subkeys[i]); const uint32x4_p rk2 = vec_splats(subkeys[i+1]); #else @@ -717,7 +717,7 @@ inline void SIMON64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, if (rounds & 1) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[rounds-1]); #else const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3}; @@ -771,7 +771,7 @@ inline void SIMON64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, { std::swap(x1, y1); std::swap(x2, y2); std::swap(x3, y3); -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[rounds-1]); #else const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3}; @@ -786,7 +786,7 @@ inline void SIMON64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, for (int i = static_cast(rounds-2); i >= 0; i -= 2) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk1 = vec_splats(subkeys[i+1]); const uint32x4_p rk2 = vec_splats(subkeys[i]); #else diff --git a/speck.cpp b/speck.cpp index 61fac2cf..d58a2e38 100644 --- a/speck.cpp +++ b/speck.cpp @@ -235,8 +235,8 @@ std::string SPECK64::Base::AlgorithmProvider() const if (HasNEON()) return "NEON"; # endif -# if (CRYPTOPP_POWER7_AVAILABLE) - if (HasPower7()) +# if (CRYPTOPP_POWER8_AVAILABLE) + if (HasPower8()) return "Power7"; # endif # if (CRYPTOPP_ALTIVEC_AVAILABLE) diff --git a/speck64_simd.cpp b/speck64_simd.cpp index 885acad4..08aaaa19 100644 --- a/speck64_simd.cpp +++ b/speck64_simd.cpp @@ -517,7 +517,7 @@ void SPECK64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1, for (int i=0; i < static_cast(rounds); ++i) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[i]); #else // subkeys has extra elements so memory backs the last subkey @@ -564,7 +564,7 @@ void SPECK64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1, for (int i = static_cast(rounds-1); i >= 0; --i) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[i]); #else // subkeys has extra elements so memory backs the last subkey @@ -616,7 +616,7 @@ void SPECK64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, for (int i=0; i < static_cast(rounds); ++i) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[i]); #else // subkeys has extra elements so memory backs the last subkey @@ -685,7 +685,7 @@ void SPECK64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, for (int i = static_cast(rounds-1); i >= 0; --i) { -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_POWER8_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[i]); #else // subkeys has extra elements so memory backs the last subkey