From 3129ad4d705f88d5ddf72ef9975dac8d08fa7fe9 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Mon, 19 Nov 2018 02:28:29 -0500 Subject: [PATCH] Fix LLVM Clang compile on PowerPC --- GNUmakefile | 73 +++++++++++++++++++++++++++++++++++++++++------ blake2s_simd.cpp | 7 +++-- gcm_simd.cpp | 21 ++++++++++---- ppc_power7.cpp | 2 +- ppc_power8.cpp | 2 +- ppc_simd.h | 34 ++++++++++++++++------ rijndael_simd.cpp | 2 +- sha_simd.cpp | 50 ++++++-------------------------- 8 files changed, 124 insertions(+), 67 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index c6ee8c6b..8af5503a 100755 --- a/GNUmakefile +++ b/GNUmakefile @@ -71,8 +71,8 @@ IS_MINGW := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "MinGW") IS_CYGWIN := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "Cygwin") IS_DARWIN := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "Darwin") IS_NETBSD := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "NetBSD") -IS_AIX := $(shell echo "$(UNAMEX)" | $(GREP) -i -c "aix") -IS_SUN := $(shell echo "$(UNAMEX)" | $(GREP) -i -c "SunOS") +IS_AIX := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "aix") +IS_SUN := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "SunOS") SUN_COMPILER := $(shell $(CXX) -V 2>&1 | $(GREP) -i -c -E 'CC: (Sun|Studio)') GCC_COMPILER := $(shell $(CXX) --version 2>/dev/null | $(GREP) -v -E '(llvm|clang)' | $(GREP) -i -c -E '(gcc|g\+\+)') @@ -118,8 +118,8 @@ endif # Fixup AIX ifeq ($(IS_AIX),1) TPROG = TestPrograms/test_64bit.cxx - HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TPROG) -o $(TOUT) 2>&1 | $(GREP) -i -c -E $(BAD_RESULT)) - ifeq ($(HAVE_OPT),0) + HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) + ifeq ($(strip $(HAVE_OPT)),0) IS_PPC64=1 else IS_PPC32=1 @@ -623,17 +623,51 @@ ifeq ($(DETECT_FEATURES),1) # endif #endif + ##################################################################### + # AES is a separate submodule of POWER8 due to possible export + # restrictions by the government. It is the reason LLVM choose + # different intrinsics than GCC and XLC. + + TPROG = TestPrograms/test_ppc_aes.cxx + TOPT = $(POWER9_FLAG) + HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) + ifeq ($(strip $(HAVE_OPT)),0) + AES_FLAG = $(POWER9_FLAG) + endif + + TPROG = TestPrograms/test_ppc_aes.cxx + TOPT = $(POWER8_FLAG) + HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) + ifeq ($(strip $(HAVE_OPT)),0) + AES_FLAG = $(POWER8_FLAG) + endif + + TPROG = TestPrograms/test_ppc_sha.cxx + TOPT = $(POWER9_FLAG) + HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) + ifeq ($(strip $(HAVE_OPT)),0) + SHA_FLAG = $(POWER9_FLAG) + endif + + TPROG = TestPrograms/test_ppc_sha.cxx + TOPT = $(POWER8_FLAG) + HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) + ifeq ($(strip $(HAVE_OPT)),0) + SHA_FLAG = $(POWER8_FLAG) + endif + + ##################################################################### + # Looking for a POWER8 option + TPROG = TestPrograms/test_ppc_power8.cxx TOPT = $(POWER9_FLAG) HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) ifeq ($(strip $(HAVE_OPT)),0) ALTIVEC_FLAG = $(POWER9_FLAG) - AES_FLAG = $(POWER9_FLAG) BLAKE2B_FLAG = $(POWER9_FLAG) BLAKE2S_FLAG = $(POWER9_FLAG) CHACHA_FLAG = $(POWER9_FLAG) GCM_FLAG = $(POWER9_FLAG) - SHA_FLAG = $(POWER9_FLAG) SM4_FLAG = $(POWER9_FLAG) SIMON64_FLAG = $(POWER9_FLAG) SIMON128_FLAG = $(POWER9_FLAG) @@ -648,12 +682,10 @@ ifeq ($(DETECT_FEATURES),1) HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) ifeq ($(strip $(HAVE_OPT)),0) ALTIVEC_FLAG = $(POWER8_FLAG) - AES_FLAG = $(POWER8_FLAG) BLAKE2B_FLAG = $(POWER8_FLAG) BLAKE2S_FLAG = $(POWER8_FLAG) CHACHA_FLAG = $(POWER8_FLAG) GCM_FLAG = $(POWER8_FLAG) - SHA_FLAG = $(POWER8_FLAG) SM4_FLAG = $(POWER8_FLAG) SIMON64_FLAG = $(POWER8_FLAG) SIMON128_FLAG = $(POWER8_FLAG) @@ -663,6 +695,9 @@ ifeq ($(DETECT_FEATURES),1) POWER8_FLAG = endif + ##################################################################### + # Looking for a POWER7 option + TPROG = TestPrograms/test_ppc_power7.cxx TOPT = $(POWER7_FLAG) HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) @@ -680,6 +715,9 @@ ifeq ($(DETECT_FEATURES),1) POWER7_FLAG = endif + ##################################################################### + # Looking for an Altivec option + TPROG = TestPrograms/test_ppc_altivec.cxx TOPT = $(POWER6_FLAG) HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) @@ -707,6 +745,9 @@ ifeq ($(DETECT_FEATURES),1) POWER4_FLAG = endif + ##################################################################### + # Fixups for algorithms that can drop to a lower ISA, if needed + # Drop to Power7 if Power8 is not available. ifeq ($(POWER8_FLAG),) GCM_FLAG = $(POWER7_FLAG) @@ -720,6 +761,9 @@ ifeq ($(DETECT_FEATURES),1) SPECK64_FLAG = $(ALTIVEC_FLAG) endif + ##################################################################### + # Fixups for missing ISAs + ifeq ($(ALTIVEC_FLAG),) CXXFLAGS += -DCRYPTOPP_DISABLE_ALTIVEC else ifeq ($(POWER9_FLAG)$(POWER8_FLAG)$(POWER7_FLAG),) @@ -728,6 +772,19 @@ ifeq ($(DETECT_FEATURES),1) CXXFLAGS += -DCRYPTOPP_DISABLE_POWER8 endif + ##################################################################### + # Fixups for missing crypto + + ifneq ($(POWER9_FLAG)$(POWER8_FLAG),) + ifeq ($(AES_FLAG),) + CXXFLAGS += -DCRYPTOPP_DISABLE_POWER8_AES + endif + ifeq ($(SHA_FLAG),) + CXXFLAGS += -DCRYPTOPP_DISABLE_POWER8_SHA + endif + # CXXFLAGS += -DCRYPTOPP_DISABLE_POWER8_VMULL + endif + # DETECT_FEATURES endif diff --git a/blake2s_simd.cpp b/blake2s_simd.cpp index af4aba61..a481b447 100644 --- a/blake2s_simd.cpp +++ b/blake2s_simd.cpp @@ -812,6 +812,9 @@ inline uint32x4_p VectorSet32(const uint32x4_p a, const uint32x4_p b) const uint8x16_p mask = {12,13,14,15, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC}; return VecPermute(a, VecShiftLeftOctet<12>(b), mask); } + + // Quiet IBM XLC warning + return VecXor(a, a); } template @@ -1005,14 +1008,14 @@ void BLAKE2_Compress32_CORE(const byte* input, BLAKE2s_State& state) void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state) { - BLAKE2_Compress32_CORE(input, state); + BLAKE2_Compress32_CORE(input, state); } #elif (CRYPTOPP_ALTIVEC_AVAILABLE) void BLAKE2_Compress32_ALTIVEC(const byte* input, BLAKE2s_State& state) { - BLAKE2_Compress32_CORE(input, state); + BLAKE2_Compress32_CORE(input, state); } #endif diff --git a/gcm_simd.cpp b/gcm_simd.cpp index b701f398..78e842b5 100644 --- a/gcm_simd.cpp +++ b/gcm_simd.cpp @@ -64,7 +64,7 @@ extern const char GCM_SIMD_FNAME[] = __FILE__; ANONYMOUS_NAMESPACE_BEGIN -// ************************* Miscellaneous ************************* // +// *************************** ARM NEON *************************** // #if CRYPTOPP_ARM_PMULL_AVAILABLE #if defined(__GNUC__) @@ -168,7 +168,10 @@ inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b) #endif // Microsoft and compatibles #endif // CRYPTOPP_ARM_PMULL_AVAILABLE +// ************************** Power 8 Crypto ************************** // + #if CRYPTOPP_POWER8_VMULL_AVAILABLE + using CryptoPP::uint32x4_p; using CryptoPP::uint64x2_p; using CryptoPP::VecGetLow; @@ -201,8 +204,10 @@ inline uint64x2_p VMULL2LE(const uint64x2_p& val) // _mm_clmulepi64_si128(a, b, 0x00) inline uint64x2_p VMULL_00LE(const uint64x2_p& a, const uint64x2_p& b) { -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) +#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__)) return VMULL2LE(__vpmsumd (VecGetHigh(a), VecGetHigh(b))); +#elif defined(__clang__) + return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecGetHigh(a), VecGetHigh(b))); #else return VMULL2LE(__builtin_crypto_vpmsumd (VecGetHigh(a), VecGetHigh(b))); #endif @@ -214,8 +219,10 @@ inline uint64x2_p VMULL_01LE(const uint64x2_p& a, const uint64x2_p& b) // Small speedup. VecGetHigh(b) ensures the high dword of 'b' is 0. // The 0 used in the vmull yields 0 for the high product, so the high // dword of 'a' is "don't care". -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) +#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__)) return VMULL2LE(__vpmsumd (a, VecGetHigh(b))); +#elif defined(__clang__) + return VMULL2LE(__builtin_altivec_crypto_vpmsumd (a, VecGetHigh(b))); #else return VMULL2LE(__builtin_crypto_vpmsumd (a, VecGetHigh(b))); #endif @@ -227,8 +234,10 @@ inline uint64x2_p VMULL_10LE(const uint64x2_p& a, const uint64x2_p& b) // Small speedup. VecGetHigh(a) ensures the high dword of 'a' is 0. // The 0 used in the vmull yields 0 for the high product, so the high // dword of 'b' is "don't care". -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) +#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__)) return VMULL2LE(__vpmsumd (VecGetHigh(a), b)); +#elif defined(__clang__) + return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecGetHigh(a), b)); #else return VMULL2LE(__builtin_crypto_vpmsumd (VecGetHigh(a), b)); #endif @@ -240,8 +249,10 @@ inline uint64x2_p VMULL_11LE(const uint64x2_p& a, const uint64x2_p& b) // Small speedup. VecGetLow(a) ensures the high dword of 'a' is 0. // The 0 used in the vmull yields 0 for the high product, so the high // dword of 'b' is "don't care". -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) +#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__)) return VMULL2LE(__vpmsumd (VecGetLow(a), b)); +#elif defined(__clang__) + return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecGetLow(a), b)); #else return VMULL2LE(__builtin_crypto_vpmsumd (VecGetLow(a), b)); #endif diff --git a/ppc_power7.cpp b/ppc_power7.cpp index 0f838cd9..751ce0c5 100644 --- a/ppc_power7.cpp +++ b/ppc_power7.cpp @@ -65,7 +65,7 @@ extern "C" { byte b1[19] = {255, 255, 255, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, b2[17]; // Specifically call the VSX loads and stores - #if defined(__xlc__) || defined(__xlC__) + #if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__)) vec_xst(vec_xl(0, b1+3), 0, b2+1); #else vec_vsx_st(vec_vsx_ld(0, b1+3), 0, b2+1); diff --git a/ppc_power8.cpp b/ppc_power8.cpp index 17ae638c..3b9c2581 100644 --- a/ppc_power8.cpp +++ b/ppc_power8.cpp @@ -66,7 +66,7 @@ bool CPU_ProbePower8() word64 w1[2] = {x, x}, w2[2] = {4, 6}, w3[2]; // Specifically call the VSX loads and stores - #if defined(__xlc__) || defined(__xlC__) + #if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__)) const uint64x2_p v1 = (uint64x2_p)vec_xl(0, (byte*)w1); const uint64x2_p v2 = (uint64x2_p)vec_xl(0, (byte*)w2); const uint64x2_p v3 = VecAdd(v1, v2); // 64-bit add diff --git a/ppc_simd.h b/ppc_simd.h index 6bdb0e52..6b745037 100644 --- a/ppc_simd.h +++ b/ppc_simd.h @@ -32,6 +32,12 @@ # undef bool #endif +// IBM XLC on AIX does not define __CRYPTO__ like it should. More LLVM goodness. +#if defined(_AIX) && defined(__xlC__) +# undef __CRYPTO__ +# define __CRYPTO__ 1 +#endif + // VecLoad_ALTIVEC and VecStore_ALTIVEC are // too noisy on modern compilers #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE @@ -879,7 +885,7 @@ inline bool VecNotEqual(const T1 vec1, const T2 vec2) //////////////////////// Power8 Crypto //////////////////////// -#if defined(_ARCH_PWR8) || defined(CRYPTOPP_DOXYGEN_PROCESSING) +#if defined(__CRYPTO__) || defined(CRYPTOPP_DOXYGEN_PROCESSING) /// \brief One round of AES encryption /// \tparam T1 vector type @@ -893,8 +899,10 @@ inline bool VecNotEqual(const T1 vec1, const T2 vec2) template inline T1 VecEncrypt(const T1 state, const T2 key) { -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) +#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__)) return (T1)__vcipher((uint8x16_p)state, (uint8x16_p)key); +#elif defined(__clang__) + return (T1)__builtin_altivec_crypto_vcipher((uint64x2_p)state, (uint64x2_p)key); #elif defined(__GNUC__) return (T1)__builtin_crypto_vcipher((uint64x2_p)state, (uint64x2_p)key); #else @@ -914,8 +922,10 @@ inline T1 VecEncrypt(const T1 state, const T2 key) template inline T1 VecEncryptLast(const T1 state, const T2 key) { -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) +#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__)) return (T1)__vcipherlast((uint8x16_p)state, (uint8x16_p)key); +#elif defined(__clang__) + return (T1)__builtin_altivec_crypto_vcipherlast((uint64x2_p)state, (uint64x2_p)key); #elif defined(__GNUC__) return (T1)__builtin_crypto_vcipherlast((uint64x2_p)state, (uint64x2_p)key); #else @@ -935,8 +945,10 @@ inline T1 VecEncryptLast(const T1 state, const T2 key) template inline T1 VecDecrypt(const T1 state, const T2 key) { -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) +#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__)) return (T1)__vncipher((uint8x16_p)state, (uint8x16_p)key); +#elif defined(__clang__) + return (T1)__builtin_altivec_crypto_vncipher((uint64x2_p)state, (uint64x2_p)key); #elif defined(__GNUC__) return (T1)__builtin_crypto_vncipher((uint64x2_p)state, (uint64x2_p)key); #else @@ -956,8 +968,10 @@ inline T1 VecDecrypt(const T1 state, const T2 key) template inline T1 VecDecryptLast(const T1 state, const T2 key) { -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) +#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__)) return (T1)__vncipherlast((uint8x16_p)state, (uint8x16_p)key); +#elif defined(__clang__) + return (T1)__builtin_altivec_crypto_vncipherlast((uint64x2_p)state, (uint64x2_p)key); #elif defined(__GNUC__) return (T1)__builtin_crypto_vncipherlast((uint64x2_p)state, (uint64x2_p)key); #else @@ -977,8 +991,10 @@ inline T1 VecDecryptLast(const T1 state, const T2 key) template inline T VecSHA256(const T vec) { -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) +#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__)) return (T)__vshasigmaw((uint32x4_p)vec, func, subfunc); +#elif defined(__clang__) + return (T)__builtin_altivec_crypto_vshasigmaw((uint32x4_p)vec, func, subfunc); #elif defined(__GNUC__) return (T)__builtin_crypto_vshasigmaw((uint32x4_p)vec, func, subfunc); #else @@ -998,8 +1014,10 @@ inline T VecSHA256(const T vec) template inline T VecSHA512(const T vec) { -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) +#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__)) return (T)__vshasigmad((uint64x2_p)vec, func, subfunc); +#elif defined(__clang__) + return (T)__builtin_altivec_crypto_vshasigmad((uint64x2_p)vec, func, subfunc); #elif defined(__GNUC__) return (T)__builtin_crypto_vshasigmad((uint64x2_p)vec, func, subfunc); #else @@ -1007,7 +1025,7 @@ inline T VecSHA512(const T vec) #endif } -#endif // _ARCH_PWR8 +#endif // __CRYPTO__ #endif // _ALTIVEC_ diff --git a/rijndael_simd.cpp b/rijndael_simd.cpp index 1369307f..9fde6c8c 100644 --- a/rijndael_simd.cpp +++ b/rijndael_simd.cpp @@ -529,7 +529,7 @@ size_t Rijndael_Dec_AdvancedProcessBlocks_AESNI(const word32 *subKeys, size_t ro #endif // CRYPTOPP_AESNI_AVAILABLE -// ***************************** Power 8 ***************************** // +// ************************** Power 8 Crypto ************************** // #if (CRYPTOPP_POWER8_AES_AVAILABLE) diff --git a/sha_simd.cpp b/sha_simd.cpp index 1a7214a9..84fe14a5 100644 --- a/sha_simd.cpp +++ b/sha_simd.cpp @@ -222,7 +222,7 @@ bool CPU_ProbeSHA256() else { byte r[16], z[16] = {0}; - uint8x16_p x = ((uint8x16_p){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); + uint8x16_p x = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; x = VecSHA256<0,0>(x); x = VecSHA256<0,1>(x); @@ -1142,41 +1142,25 @@ uint32x4_p8 VectorMaj(const uint32x4_p8 x, const uint32x4_p8 y, const uint32x4_p static inline uint32x4_p8 Vector_sigma0(const uint32x4_p8 val) { -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) - return __vshasigmaw(val, 0, 0); -#else - return __builtin_crypto_vshasigmaw(val, 0, 0); -#endif + return VecSHA256<0,0>(val); } static inline uint32x4_p8 Vector_sigma1(const uint32x4_p8 val) { -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) - return __vshasigmaw(val, 0, 0xf); -#else - return __builtin_crypto_vshasigmaw(val, 0, 0xf); -#endif + return VecSHA256<0,0xf>(val); } static inline uint32x4_p8 VectorSigma0(const uint32x4_p8 val) { -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) - return __vshasigmaw(val, 1, 0); -#else - return __builtin_crypto_vshasigmaw(val, 1, 0); -#endif + return VecSHA256<1,0>(val); } static inline uint32x4_p8 VectorSigma1(const uint32x4_p8 val) { -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) - return __vshasigmaw(val, 1, 0xf); -#else - return __builtin_crypto_vshasigmaw(val, 1, 0xf); -#endif + return VecSHA256<1,0xf>(val); } static inline @@ -1417,41 +1401,25 @@ uint64x2_p8 VectorMaj(const uint64x2_p8 x, const uint64x2_p8 y, const uint64x2_p static inline uint64x2_p8 Vector_sigma0(const uint64x2_p8 val) { -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) - return __vshasigmad(val, 0, 0); -#else - return __builtin_crypto_vshasigmad(val, 0, 0); -#endif + return VecSHA512<0,0>(val); } static inline uint64x2_p8 Vector_sigma1(const uint64x2_p8 val) { -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) - return __vshasigmad(val, 0, 0xf); -#else - return __builtin_crypto_vshasigmad(val, 0, 0xf); -#endif + return VecSHA512<0,0xf>(val); } static inline uint64x2_p8 VectorSigma0(const uint64x2_p8 val) { -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) - return __vshasigmad(val, 1, 0); -#else - return __builtin_crypto_vshasigmad(val, 1, 0); -#endif + return VecSHA512<1,0>(val); } static inline uint64x2_p8 VectorSigma1(const uint64x2_p8 val) { -#if defined(__xlc__) || defined(__xlC__) || defined(__clang__) - return __vshasigmad(val, 1, 0xf); -#else - return __builtin_crypto_vshasigmad(val, 1, 0xf); -#endif + return VecSHA512<1,0xf>(val); } static inline