From 3725c8411b7b278faddde1e88b11cb5b584a9531 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sun, 18 Nov 2018 16:35:02 -0500 Subject: [PATCH] Move CPU_ProbePower7 and CPU_ProbePower8 into their own source files (GH #742) --- Filelist.txt | 2 + GNUmakefile | 10 +++- ppc_power7.cpp | 88 +++++++++++++++++++++++++++ ppc_power8.cpp | 144 ++++++++++++++++++++++++++++++++++++++++++++ rijndael_simd.cpp | 149 ---------------------------------------------- 5 files changed, 243 insertions(+), 150 deletions(-) create mode 100644 ppc_power7.cpp create mode 100644 ppc_power8.cpp diff --git a/Filelist.txt b/Filelist.txt index 876e99bb..c7f83a90 100644 --- a/Filelist.txt +++ b/Filelist.txt @@ -226,6 +226,8 @@ poly1305.cpp poly1305.h polynomi.cpp polynomi.h +ppc_power7.cpp +ppc_power8.cpp ppc_simd.h ppc_simd.cpp pssr.cpp diff --git a/GNUmakefile b/GNUmakefile index 5d72a471..507aec96 100755 --- a/GNUmakefile +++ b/GNUmakefile @@ -1440,10 +1440,18 @@ lea_simd.o : lea_simd.cpp neon_simd.o : neon_simd.cpp $(CXX) $(strip $(CXXFLAGS) $(NEON_FLAG) -c) $< -# AltiVec, Power7, Power8 available +# AltiVec available ppc_simd.o : ppc_simd.cpp $(CXX) $(strip $(CXXFLAGS) $(ALTIVEC_FLAG) -c) $< +# Power7 available +ppc_power7.o : ppc_power7.cpp + $(CXX) $(strip $(CXXFLAGS) $(POWER7_FLAG) -c) $< + +# Power8 available +ppc_power8.o : ppc_power8.cpp + $(CXX) $(strip $(CXXFLAGS) $(POWER8_FLAG) -c) $< + # AESNI or ARMv7a/ARMv8a available rijndael_simd.o : rijndael_simd.cpp $(CXX) $(strip $(CXXFLAGS) $(AES_FLAG) -c) $< diff --git a/ppc_power7.cpp b/ppc_power7.cpp new file mode 100644 index 00000000..0f838cd9 --- /dev/null +++ b/ppc_power7.cpp @@ -0,0 +1,88 @@ +// ppc_power7.cpp - written and placed in the public domain by +// Jeffrey Walton, Uri Blumenthal and Marcel Raad. +// +// This source file uses intrinsics and built-ins to gain access to +// Power7 instructions. A separate source file is needed because +// additional CXXFLAGS are required to enable the appropriate +// instructions sets in some build configurations. + +#include "pch.h" +#include "config.h" + +#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY +# include +# include +#endif + +#if defined(_ARCH_PWR7) +# include "ppc_simd.h" +#endif + +// Squash MS LNK4221 and libtool warnings +extern const char PPC_POWER7_FNAME[] = __FILE__; + +NAMESPACE_BEGIN(CryptoPP) + +// ************************* Feature Probes ************************* // + +#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY +extern "C" { + typedef void (*SigHandler)(int); + + static jmp_buf s_jmpSIGILL; + static void SigIllHandler(int) + { + longjmp(s_jmpSIGILL, 1); + } +} +#endif // CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY + +#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64) + bool CPU_ProbePower7() +{ +#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) + return false; +#elif (_ARCH_PWR7) +# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY) + + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile int result = false; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpSIGILL)) + result = false; + else + { + // POWER7 added unaligned loads and store operations + byte b1[19] = {255, 255, 255, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, b2[17]; + + // Specifically call the VSX loads and stores + #if defined(__xlc__) || defined(__xlC__) + vec_xst(vec_xl(0, b1+3), 0, b2+1); + #else + vec_vsx_st(vec_vsx_ld(0, b1+3), 0, b2+1); + #endif + + result = (0 == std::memcmp(b1+3, b2+1, 16)); + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // _ARCH_PWR7 +} + +#endif // PPC32 or PPC64 + +NAMESPACE_END diff --git a/ppc_power8.cpp b/ppc_power8.cpp new file mode 100644 index 00000000..17ae638c --- /dev/null +++ b/ppc_power8.cpp @@ -0,0 +1,144 @@ +// ppc_power8.cpp - written and placed in the public domain by +// Jeffrey Walton, Uri Blumenthal and Marcel Raad. +// +// This source file uses intrinsics and built-ins to gain access to +// Power8 instructions. A separate source file is needed because +// additional CXXFLAGS are required to enable the appropriate +// instructions sets in some build configurations. + +#include "pch.h" +#include "config.h" + +#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY +# include +# include +#endif + +#if defined(_ARCH_PWR8) || defined(__CRYPTO__) +# include "ppc_simd.h" +#endif + +// Squash MS LNK4221 and libtool warnings +extern const char PPC_POWER8_FNAME[] = __FILE__; + +NAMESPACE_BEGIN(CryptoPP) + +// ************************* Feature Probes ************************* // + +#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY +extern "C" { + typedef void (*SigHandler)(int); + + static jmp_buf s_jmpSIGILL; + static void SigIllHandler(int) + { + longjmp(s_jmpSIGILL, 1); + } +} +#endif // CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY + +#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64) +bool CPU_ProbePower8() +{ +#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) + return false; +#elif (_ARCH_PWR8) +# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY) + + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile int result = true; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpSIGILL)) + result = false; + else + { + // POWER8 added 64-bit SIMD operations + const word64 x = W64LIT(0xffffffffffffffff); + word64 w1[2] = {x, x}, w2[2] = {4, 6}, w3[2]; + + // Specifically call the VSX loads and stores + #if defined(__xlc__) || defined(__xlC__) + const uint64x2_p v1 = (uint64x2_p)vec_xl(0, (byte*)w1); + const uint64x2_p v2 = (uint64x2_p)vec_xl(0, (byte*)w2); + const uint64x2_p v3 = VecAdd(v1, v2); // 64-bit add + vec_xst((uint8x16_p)v3, 0, (byte*)w3); + #else + const uint64x2_p v1 = (uint64x2_p)vec_vsx_ld(0, (byte*)w1); + const uint64x2_p v2 = (uint64x2_p)vec_vsx_ld(0, (byte*)w2); + const uint64x2_p v3 = VecAdd(v1, v2); // 64-bit add + vec_vsx_st((uint8x16_p)v3, 0, (byte*)w3); + #endif + + // Relies on integer wrap + result = (w3[0] == 3 && w3[1] == 5); + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // _ARCH_PWR8 +} + +bool CPU_ProbeAES() +{ +#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) + return false; +#elif (__CRYPTO__) +# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY) + + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile int result = true; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpSIGILL)) + result = false; + else + { + byte key[16] = {0xA0, 0xFA, 0xFE, 0x17, 0x88, 0x54, 0x2c, 0xb1, + 0x23, 0xa3, 0x39, 0x39, 0x2a, 0x6c, 0x76, 0x05}; + byte state[16] = {0x19, 0x3d, 0xe3, 0xb3, 0xa0, 0xf4, 0xe2, 0x2b, + 0x9a, 0xc6, 0x8d, 0x2a, 0xe9, 0xf8, 0x48, 0x08}; + byte r[16] = {255}, z[16] = {}; + + uint8x16_p k = (uint8x16_p)VecLoad(0, key); + uint8x16_p s = (uint8x16_p)VecLoad(0, state); + s = VecEncrypt(s, k); + s = VecEncryptLast(s, k); + s = VecDecrypt(s, k); + s = VecDecryptLast(s, k); + VecStore(s, r); + + result = (0 != std::memcmp(r, z, 16)); + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // __CRYPTO__ +} +#endif // PPC32 or PPC64 + +NAMESPACE_END diff --git a/rijndael_simd.cpp b/rijndael_simd.cpp index f4c9a288..1369307f 100644 --- a/rijndael_simd.cpp +++ b/rijndael_simd.cpp @@ -137,155 +137,6 @@ bool CPU_ProbeAES() } #endif // ARM32 or ARM64 -#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64) - bool CPU_ProbePower7() -{ -#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) - return false; -#elif (CRYPTOPP_POWER7_AVAILABLE) || (CRYPTOPP_POWER8_AVAILABLE) -# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY) - - // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 - volatile int result = false; - - volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); - if (oldHandler == SIG_ERR) - return false; - - volatile sigset_t oldMask; - if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) - return false; - - if (setjmp(s_jmpSIGILL)) - result = false; - else - { - // POWER7 added unaligned loads and store operations - byte b1[19] = {255, 255, 255, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, b2[17]; - - // Specifically call the VSX loads and stores - #if defined(__xlc__) || defined(__xlC__) - vec_xst(vec_xl(0, b1+3), 0, b2+1); - #else - vec_vsx_st(vec_vsx_ld(0, b1+3), 0, b2+1); - #endif - - result = (0 == std::memcmp(b1+3, b2+1, 16)); - } - - sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); - signal(SIGILL, oldHandler); - return result; -# endif -#else - return false; -#endif // CRYPTOPP_POWER7_AVAILABLE -} - -bool CPU_ProbePower8() -{ -#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) - return false; -#elif (CRYPTOPP_POWER8_AVAILABLE) -# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY) - - // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 - volatile int result = true; - - volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); - if (oldHandler == SIG_ERR) - return false; - - volatile sigset_t oldMask; - if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) - return false; - - if (setjmp(s_jmpSIGILL)) - result = false; - else - { - // POWER8 added 64-bit SIMD operations - const word64 x = W64LIT(0xffffffffffffffff); - word64 w1[2] = {x, x}, w2[2] = {4, 6}, w3[2]; - - // Specifically call the VSX loads and stores - #if defined(__xlc__) || defined(__xlC__) - const uint64x2_p v1 = (uint64x2_p)vec_xl(0, (byte*)w1); - const uint64x2_p v2 = (uint64x2_p)vec_xl(0, (byte*)w2); - const uint64x2_p v3 = VecAdd(v1, v2); // 64-bit add - vec_xst((uint8x16_p)v3, 0, (byte*)w3); - #else - const uint64x2_p v1 = (uint64x2_p)vec_vsx_ld(0, (byte*)w1); - const uint64x2_p v2 = (uint64x2_p)vec_vsx_ld(0, (byte*)w2); - const uint64x2_p v3 = VecAdd(v1, v2); // 64-bit add - vec_vsx_st((uint8x16_p)v3, 0, (byte*)w3); - #endif - - // Relies on integer wrap - result = (w3[0] == 3 && w3[1] == 5); - } - - sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); - signal(SIGILL, oldHandler); - return result; -# endif -#else - return false; -#endif // CRYPTOPP_POWER8_AVAILABLE -} - -bool CPU_ProbeAES() -{ -#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) - return false; -#elif (CRYPTOPP_POWER8_AES_AVAILABLE) -# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY) - - // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 - volatile int result = true; - - volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); - if (oldHandler == SIG_ERR) - return false; - - volatile sigset_t oldMask; - if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) - return false; - - if (setjmp(s_jmpSIGILL)) - result = false; - else - { - byte key[16] = {0xA0, 0xFA, 0xFE, 0x17, 0x88, 0x54, 0x2c, 0xb1, - 0x23, 0xa3, 0x39, 0x39, 0x2a, 0x6c, 0x76, 0x05}; - byte state[16] = {0x19, 0x3d, 0xe3, 0xb3, 0xa0, 0xf4, 0xe2, 0x2b, - 0x9a, 0xc6, 0x8d, 0x2a, 0xe9, 0xf8, 0x48, 0x08}; - byte r[16] = {255}, z[16] = {}; - - uint8x16_p k = (uint8x16_p)VecLoad(0, key); - uint8x16_p s = (uint8x16_p)VecLoad(0, state); - s = VecEncrypt(s, k); - s = VecEncryptLast(s, k); - s = VecDecrypt(s, k); - s = VecDecryptLast(s, k); - VecStore(s, r); - - result = (0 != std::memcmp(r, z, 16)); - } - - sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); - signal(SIGILL, oldHandler); - return result; -# endif -#else - return false; -#endif // CRYPTOPP_POWER8_AES_AVAILABLE -} -#endif // PPC32 or PPC64 - // ***************************** ARMv8 ***************************** // #if (CRYPTOPP_ARM_AES_AVAILABLE)