Fix LLVM Clang compile on PowerPC

pull/748/head
Jeffrey Walton 2018-11-19 02:28:29 -05:00
parent c9f1a26024
commit 3129ad4d70
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
8 changed files with 124 additions and 67 deletions

View File

@ -71,8 +71,8 @@ IS_MINGW := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "MinGW")
IS_CYGWIN := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "Cygwin")
IS_DARWIN := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "Darwin")
IS_NETBSD := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "NetBSD")
IS_AIX := $(shell echo "$(UNAMEX)" | $(GREP) -i -c "aix")
IS_SUN := $(shell echo "$(UNAMEX)" | $(GREP) -i -c "SunOS")
IS_AIX := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "aix")
IS_SUN := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "SunOS")
SUN_COMPILER := $(shell $(CXX) -V 2>&1 | $(GREP) -i -c -E 'CC: (Sun|Studio)')
GCC_COMPILER := $(shell $(CXX) --version 2>/dev/null | $(GREP) -v -E '(llvm|clang)' | $(GREP) -i -c -E '(gcc|g\+\+)')
@ -118,8 +118,8 @@ endif
# Fixup AIX
ifeq ($(IS_AIX),1)
TPROG = TestPrograms/test_64bit.cxx
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TPROG) -o $(TOUT) 2>&1 | $(GREP) -i -c -E $(BAD_RESULT))
ifeq ($(HAVE_OPT),0)
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
ifeq ($(strip $(HAVE_OPT)),0)
IS_PPC64=1
else
IS_PPC32=1
@ -623,17 +623,51 @@ ifeq ($(DETECT_FEATURES),1)
# endif
#endif
#####################################################################
# AES is a separate submodule of POWER8 due to possible export
# restrictions by the government. It is the reason LLVM choose
# different intrinsics than GCC and XLC.
TPROG = TestPrograms/test_ppc_aes.cxx
TOPT = $(POWER9_FLAG)
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
ifeq ($(strip $(HAVE_OPT)),0)
AES_FLAG = $(POWER9_FLAG)
endif
TPROG = TestPrograms/test_ppc_aes.cxx
TOPT = $(POWER8_FLAG)
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
ifeq ($(strip $(HAVE_OPT)),0)
AES_FLAG = $(POWER8_FLAG)
endif
TPROG = TestPrograms/test_ppc_sha.cxx
TOPT = $(POWER9_FLAG)
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
ifeq ($(strip $(HAVE_OPT)),0)
SHA_FLAG = $(POWER9_FLAG)
endif
TPROG = TestPrograms/test_ppc_sha.cxx
TOPT = $(POWER8_FLAG)
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
ifeq ($(strip $(HAVE_OPT)),0)
SHA_FLAG = $(POWER8_FLAG)
endif
#####################################################################
# Looking for a POWER8 option
TPROG = TestPrograms/test_ppc_power8.cxx
TOPT = $(POWER9_FLAG)
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
ifeq ($(strip $(HAVE_OPT)),0)
ALTIVEC_FLAG = $(POWER9_FLAG)
AES_FLAG = $(POWER9_FLAG)
BLAKE2B_FLAG = $(POWER9_FLAG)
BLAKE2S_FLAG = $(POWER9_FLAG)
CHACHA_FLAG = $(POWER9_FLAG)
GCM_FLAG = $(POWER9_FLAG)
SHA_FLAG = $(POWER9_FLAG)
SM4_FLAG = $(POWER9_FLAG)
SIMON64_FLAG = $(POWER9_FLAG)
SIMON128_FLAG = $(POWER9_FLAG)
@ -648,12 +682,10 @@ ifeq ($(DETECT_FEATURES),1)
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
ifeq ($(strip $(HAVE_OPT)),0)
ALTIVEC_FLAG = $(POWER8_FLAG)
AES_FLAG = $(POWER8_FLAG)
BLAKE2B_FLAG = $(POWER8_FLAG)
BLAKE2S_FLAG = $(POWER8_FLAG)
CHACHA_FLAG = $(POWER8_FLAG)
GCM_FLAG = $(POWER8_FLAG)
SHA_FLAG = $(POWER8_FLAG)
SM4_FLAG = $(POWER8_FLAG)
SIMON64_FLAG = $(POWER8_FLAG)
SIMON128_FLAG = $(POWER8_FLAG)
@ -663,6 +695,9 @@ ifeq ($(DETECT_FEATURES),1)
POWER8_FLAG =
endif
#####################################################################
# Looking for a POWER7 option
TPROG = TestPrograms/test_ppc_power7.cxx
TOPT = $(POWER7_FLAG)
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
@ -680,6 +715,9 @@ ifeq ($(DETECT_FEATURES),1)
POWER7_FLAG =
endif
#####################################################################
# Looking for an Altivec option
TPROG = TestPrograms/test_ppc_altivec.cxx
TOPT = $(POWER6_FLAG)
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
@ -707,6 +745,9 @@ ifeq ($(DETECT_FEATURES),1)
POWER4_FLAG =
endif
#####################################################################
# Fixups for algorithms that can drop to a lower ISA, if needed
# Drop to Power7 if Power8 is not available.
ifeq ($(POWER8_FLAG),)
GCM_FLAG = $(POWER7_FLAG)
@ -720,6 +761,9 @@ ifeq ($(DETECT_FEATURES),1)
SPECK64_FLAG = $(ALTIVEC_FLAG)
endif
#####################################################################
# Fixups for missing ISAs
ifeq ($(ALTIVEC_FLAG),)
CXXFLAGS += -DCRYPTOPP_DISABLE_ALTIVEC
else ifeq ($(POWER9_FLAG)$(POWER8_FLAG)$(POWER7_FLAG),)
@ -728,6 +772,19 @@ ifeq ($(DETECT_FEATURES),1)
CXXFLAGS += -DCRYPTOPP_DISABLE_POWER8
endif
#####################################################################
# Fixups for missing crypto
ifneq ($(POWER9_FLAG)$(POWER8_FLAG),)
ifeq ($(AES_FLAG),)
CXXFLAGS += -DCRYPTOPP_DISABLE_POWER8_AES
endif
ifeq ($(SHA_FLAG),)
CXXFLAGS += -DCRYPTOPP_DISABLE_POWER8_SHA
endif
# CXXFLAGS += -DCRYPTOPP_DISABLE_POWER8_VMULL
endif
# DETECT_FEATURES
endif

View File

@ -812,6 +812,9 @@ inline uint32x4_p VectorSet32(const uint32x4_p a, const uint32x4_p b)
const uint8x16_p mask = {12,13,14,15, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
return VecPermute(a, VecShiftLeftOctet<12>(b), mask);
}
// Quiet IBM XLC warning
return VecXor(a, a);
}
template <unsigned int E1, unsigned int E2, unsigned int E3, unsigned int E4>
@ -1005,14 +1008,14 @@ void BLAKE2_Compress32_CORE(const byte* input, BLAKE2s_State& state)
void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state)
{
BLAKE2_Compress32_CORE(input, state);
BLAKE2_Compress32_CORE(input, state);
}
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
void BLAKE2_Compress32_ALTIVEC(const byte* input, BLAKE2s_State& state)
{
BLAKE2_Compress32_CORE(input, state);
BLAKE2_Compress32_CORE(input, state);
}
#endif

View File

@ -64,7 +64,7 @@ extern const char GCM_SIMD_FNAME[] = __FILE__;
ANONYMOUS_NAMESPACE_BEGIN
// ************************* Miscellaneous ************************* //
// *************************** ARM NEON *************************** //
#if CRYPTOPP_ARM_PMULL_AVAILABLE
#if defined(__GNUC__)
@ -168,7 +168,10 @@ inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
#endif // Microsoft and compatibles
#endif // CRYPTOPP_ARM_PMULL_AVAILABLE
// ************************** Power 8 Crypto ************************** //
#if CRYPTOPP_POWER8_VMULL_AVAILABLE
using CryptoPP::uint32x4_p;
using CryptoPP::uint64x2_p;
using CryptoPP::VecGetLow;
@ -201,8 +204,10 @@ inline uint64x2_p VMULL2LE(const uint64x2_p& val)
// _mm_clmulepi64_si128(a, b, 0x00)
inline uint64x2_p VMULL_00LE(const uint64x2_p& a, const uint64x2_p& b)
{
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
return VMULL2LE(__vpmsumd (VecGetHigh(a), VecGetHigh(b)));
#elif defined(__clang__)
return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecGetHigh(a), VecGetHigh(b)));
#else
return VMULL2LE(__builtin_crypto_vpmsumd (VecGetHigh(a), VecGetHigh(b)));
#endif
@ -214,8 +219,10 @@ inline uint64x2_p VMULL_01LE(const uint64x2_p& a, const uint64x2_p& b)
// Small speedup. VecGetHigh(b) ensures the high dword of 'b' is 0.
// The 0 used in the vmull yields 0 for the high product, so the high
// dword of 'a' is "don't care".
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
return VMULL2LE(__vpmsumd (a, VecGetHigh(b)));
#elif defined(__clang__)
return VMULL2LE(__builtin_altivec_crypto_vpmsumd (a, VecGetHigh(b)));
#else
return VMULL2LE(__builtin_crypto_vpmsumd (a, VecGetHigh(b)));
#endif
@ -227,8 +234,10 @@ inline uint64x2_p VMULL_10LE(const uint64x2_p& a, const uint64x2_p& b)
// Small speedup. VecGetHigh(a) ensures the high dword of 'a' is 0.
// The 0 used in the vmull yields 0 for the high product, so the high
// dword of 'b' is "don't care".
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
return VMULL2LE(__vpmsumd (VecGetHigh(a), b));
#elif defined(__clang__)
return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecGetHigh(a), b));
#else
return VMULL2LE(__builtin_crypto_vpmsumd (VecGetHigh(a), b));
#endif
@ -240,8 +249,10 @@ inline uint64x2_p VMULL_11LE(const uint64x2_p& a, const uint64x2_p& b)
// Small speedup. VecGetLow(a) ensures the high dword of 'a' is 0.
// The 0 used in the vmull yields 0 for the high product, so the high
// dword of 'b' is "don't care".
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
return VMULL2LE(__vpmsumd (VecGetLow(a), b));
#elif defined(__clang__)
return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecGetLow(a), b));
#else
return VMULL2LE(__builtin_crypto_vpmsumd (VecGetLow(a), b));
#endif

View File

@ -65,7 +65,7 @@ extern "C" {
byte b1[19] = {255, 255, 255, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, b2[17];
// Specifically call the VSX loads and stores
#if defined(__xlc__) || defined(__xlC__)
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
vec_xst(vec_xl(0, b1+3), 0, b2+1);
#else
vec_vsx_st(vec_vsx_ld(0, b1+3), 0, b2+1);

View File

@ -66,7 +66,7 @@ bool CPU_ProbePower8()
word64 w1[2] = {x, x}, w2[2] = {4, 6}, w3[2];
// Specifically call the VSX loads and stores
#if defined(__xlc__) || defined(__xlC__)
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
const uint64x2_p v1 = (uint64x2_p)vec_xl(0, (byte*)w1);
const uint64x2_p v2 = (uint64x2_p)vec_xl(0, (byte*)w2);
const uint64x2_p v3 = VecAdd(v1, v2); // 64-bit add

View File

@ -32,6 +32,12 @@
# undef bool
#endif
// IBM XLC on AIX does not define __CRYPTO__ like it should. More LLVM goodness.
#if defined(_AIX) && defined(__xlC__)
# undef __CRYPTO__
# define __CRYPTO__ 1
#endif
// VecLoad_ALTIVEC and VecStore_ALTIVEC are
// too noisy on modern compilers
#if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
@ -879,7 +885,7 @@ inline bool VecNotEqual(const T1 vec1, const T2 vec2)
//////////////////////// Power8 Crypto ////////////////////////
#if defined(_ARCH_PWR8) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
#if defined(__CRYPTO__) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
/// \brief One round of AES encryption
/// \tparam T1 vector type
@ -893,8 +899,10 @@ inline bool VecNotEqual(const T1 vec1, const T2 vec2)
template <class T1, class T2>
inline T1 VecEncrypt(const T1 state, const T2 key)
{
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
return (T1)__vcipher((uint8x16_p)state, (uint8x16_p)key);
#elif defined(__clang__)
return (T1)__builtin_altivec_crypto_vcipher((uint64x2_p)state, (uint64x2_p)key);
#elif defined(__GNUC__)
return (T1)__builtin_crypto_vcipher((uint64x2_p)state, (uint64x2_p)key);
#else
@ -914,8 +922,10 @@ inline T1 VecEncrypt(const T1 state, const T2 key)
template <class T1, class T2>
inline T1 VecEncryptLast(const T1 state, const T2 key)
{
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
return (T1)__vcipherlast((uint8x16_p)state, (uint8x16_p)key);
#elif defined(__clang__)
return (T1)__builtin_altivec_crypto_vcipherlast((uint64x2_p)state, (uint64x2_p)key);
#elif defined(__GNUC__)
return (T1)__builtin_crypto_vcipherlast((uint64x2_p)state, (uint64x2_p)key);
#else
@ -935,8 +945,10 @@ inline T1 VecEncryptLast(const T1 state, const T2 key)
template <class T1, class T2>
inline T1 VecDecrypt(const T1 state, const T2 key)
{
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
return (T1)__vncipher((uint8x16_p)state, (uint8x16_p)key);
#elif defined(__clang__)
return (T1)__builtin_altivec_crypto_vncipher((uint64x2_p)state, (uint64x2_p)key);
#elif defined(__GNUC__)
return (T1)__builtin_crypto_vncipher((uint64x2_p)state, (uint64x2_p)key);
#else
@ -956,8 +968,10 @@ inline T1 VecDecrypt(const T1 state, const T2 key)
template <class T1, class T2>
inline T1 VecDecryptLast(const T1 state, const T2 key)
{
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
return (T1)__vncipherlast((uint8x16_p)state, (uint8x16_p)key);
#elif defined(__clang__)
return (T1)__builtin_altivec_crypto_vncipherlast((uint64x2_p)state, (uint64x2_p)key);
#elif defined(__GNUC__)
return (T1)__builtin_crypto_vncipherlast((uint64x2_p)state, (uint64x2_p)key);
#else
@ -977,8 +991,10 @@ inline T1 VecDecryptLast(const T1 state, const T2 key)
template <int func, int subfunc, class T>
inline T VecSHA256(const T vec)
{
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
return (T)__vshasigmaw((uint32x4_p)vec, func, subfunc);
#elif defined(__clang__)
return (T)__builtin_altivec_crypto_vshasigmaw((uint32x4_p)vec, func, subfunc);
#elif defined(__GNUC__)
return (T)__builtin_crypto_vshasigmaw((uint32x4_p)vec, func, subfunc);
#else
@ -998,8 +1014,10 @@ inline T VecSHA256(const T vec)
template <int func, int subfunc, class T>
inline T VecSHA512(const T vec)
{
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
return (T)__vshasigmad((uint64x2_p)vec, func, subfunc);
#elif defined(__clang__)
return (T)__builtin_altivec_crypto_vshasigmad((uint64x2_p)vec, func, subfunc);
#elif defined(__GNUC__)
return (T)__builtin_crypto_vshasigmad((uint64x2_p)vec, func, subfunc);
#else
@ -1007,7 +1025,7 @@ inline T VecSHA512(const T vec)
#endif
}
#endif // _ARCH_PWR8
#endif // __CRYPTO__
#endif // _ALTIVEC_

View File

@ -529,7 +529,7 @@ size_t Rijndael_Dec_AdvancedProcessBlocks_AESNI(const word32 *subKeys, size_t ro
#endif // CRYPTOPP_AESNI_AVAILABLE
// ***************************** Power 8 ***************************** //
// ************************** Power 8 Crypto ************************** //
#if (CRYPTOPP_POWER8_AES_AVAILABLE)

View File

@ -222,7 +222,7 @@ bool CPU_ProbeSHA256()
else
{
byte r[16], z[16] = {0};
uint8x16_p x = ((uint8x16_p){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0});
uint8x16_p x = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
x = VecSHA256<0,0>(x);
x = VecSHA256<0,1>(x);
@ -1142,41 +1142,25 @@ uint32x4_p8 VectorMaj(const uint32x4_p8 x, const uint32x4_p8 y, const uint32x4_p
static inline
uint32x4_p8 Vector_sigma0(const uint32x4_p8 val)
{
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
return __vshasigmaw(val, 0, 0);
#else
return __builtin_crypto_vshasigmaw(val, 0, 0);
#endif
return VecSHA256<0,0>(val);
}
static inline
uint32x4_p8 Vector_sigma1(const uint32x4_p8 val)
{
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
return __vshasigmaw(val, 0, 0xf);
#else
return __builtin_crypto_vshasigmaw(val, 0, 0xf);
#endif
return VecSHA256<0,0xf>(val);
}
static inline
uint32x4_p8 VectorSigma0(const uint32x4_p8 val)
{
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
return __vshasigmaw(val, 1, 0);
#else
return __builtin_crypto_vshasigmaw(val, 1, 0);
#endif
return VecSHA256<1,0>(val);
}
static inline
uint32x4_p8 VectorSigma1(const uint32x4_p8 val)
{
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
return __vshasigmaw(val, 1, 0xf);
#else
return __builtin_crypto_vshasigmaw(val, 1, 0xf);
#endif
return VecSHA256<1,0xf>(val);
}
static inline
@ -1417,41 +1401,25 @@ uint64x2_p8 VectorMaj(const uint64x2_p8 x, const uint64x2_p8 y, const uint64x2_p
static inline
uint64x2_p8 Vector_sigma0(const uint64x2_p8 val)
{
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
return __vshasigmad(val, 0, 0);
#else
return __builtin_crypto_vshasigmad(val, 0, 0);
#endif
return VecSHA512<0,0>(val);
}
static inline
uint64x2_p8 Vector_sigma1(const uint64x2_p8 val)
{
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
return __vshasigmad(val, 0, 0xf);
#else
return __builtin_crypto_vshasigmad(val, 0, 0xf);
#endif
return VecSHA512<0,0xf>(val);
}
static inline
uint64x2_p8 VectorSigma0(const uint64x2_p8 val)
{
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
return __vshasigmad(val, 1, 0);
#else
return __builtin_crypto_vshasigmad(val, 1, 0);
#endif
return VecSHA512<1,0>(val);
}
static inline
uint64x2_p8 VectorSigma1(const uint64x2_p8 val)
{
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
return __vshasigmad(val, 1, 0xf);
#else
return __builtin_crypto_vshasigmad(val, 1, 0xf);
#endif
return VecSHA512<1,0xf>(val);
}
static inline