Fix LLVM Clang compile on PowerPC
parent
c9f1a26024
commit
3129ad4d70
73
GNUmakefile
73
GNUmakefile
|
|
@ -71,8 +71,8 @@ IS_MINGW := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "MinGW")
|
||||||
IS_CYGWIN := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "Cygwin")
|
IS_CYGWIN := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "Cygwin")
|
||||||
IS_DARWIN := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "Darwin")
|
IS_DARWIN := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "Darwin")
|
||||||
IS_NETBSD := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "NetBSD")
|
IS_NETBSD := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "NetBSD")
|
||||||
IS_AIX := $(shell echo "$(UNAMEX)" | $(GREP) -i -c "aix")
|
IS_AIX := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "aix")
|
||||||
IS_SUN := $(shell echo "$(UNAMEX)" | $(GREP) -i -c "SunOS")
|
IS_SUN := $(shell echo "$(SYSTEMX)" | $(GREP) -i -c "SunOS")
|
||||||
|
|
||||||
SUN_COMPILER := $(shell $(CXX) -V 2>&1 | $(GREP) -i -c -E 'CC: (Sun|Studio)')
|
SUN_COMPILER := $(shell $(CXX) -V 2>&1 | $(GREP) -i -c -E 'CC: (Sun|Studio)')
|
||||||
GCC_COMPILER := $(shell $(CXX) --version 2>/dev/null | $(GREP) -v -E '(llvm|clang)' | $(GREP) -i -c -E '(gcc|g\+\+)')
|
GCC_COMPILER := $(shell $(CXX) --version 2>/dev/null | $(GREP) -v -E '(llvm|clang)' | $(GREP) -i -c -E '(gcc|g\+\+)')
|
||||||
|
|
@ -118,8 +118,8 @@ endif
|
||||||
# Fixup AIX
|
# Fixup AIX
|
||||||
ifeq ($(IS_AIX),1)
|
ifeq ($(IS_AIX),1)
|
||||||
TPROG = TestPrograms/test_64bit.cxx
|
TPROG = TestPrograms/test_64bit.cxx
|
||||||
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TPROG) -o $(TOUT) 2>&1 | $(GREP) -i -c -E $(BAD_RESULT))
|
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
||||||
ifeq ($(HAVE_OPT),0)
|
ifeq ($(strip $(HAVE_OPT)),0)
|
||||||
IS_PPC64=1
|
IS_PPC64=1
|
||||||
else
|
else
|
||||||
IS_PPC32=1
|
IS_PPC32=1
|
||||||
|
|
@ -623,17 +623,51 @@ ifeq ($(DETECT_FEATURES),1)
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#####################################################################
|
||||||
|
# AES is a separate submodule of POWER8 due to possible export
|
||||||
|
# restrictions by the government. It is the reason LLVM choose
|
||||||
|
# different intrinsics than GCC and XLC.
|
||||||
|
|
||||||
|
TPROG = TestPrograms/test_ppc_aes.cxx
|
||||||
|
TOPT = $(POWER9_FLAG)
|
||||||
|
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
||||||
|
ifeq ($(strip $(HAVE_OPT)),0)
|
||||||
|
AES_FLAG = $(POWER9_FLAG)
|
||||||
|
endif
|
||||||
|
|
||||||
|
TPROG = TestPrograms/test_ppc_aes.cxx
|
||||||
|
TOPT = $(POWER8_FLAG)
|
||||||
|
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
||||||
|
ifeq ($(strip $(HAVE_OPT)),0)
|
||||||
|
AES_FLAG = $(POWER8_FLAG)
|
||||||
|
endif
|
||||||
|
|
||||||
|
TPROG = TestPrograms/test_ppc_sha.cxx
|
||||||
|
TOPT = $(POWER9_FLAG)
|
||||||
|
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
||||||
|
ifeq ($(strip $(HAVE_OPT)),0)
|
||||||
|
SHA_FLAG = $(POWER9_FLAG)
|
||||||
|
endif
|
||||||
|
|
||||||
|
TPROG = TestPrograms/test_ppc_sha.cxx
|
||||||
|
TOPT = $(POWER8_FLAG)
|
||||||
|
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
||||||
|
ifeq ($(strip $(HAVE_OPT)),0)
|
||||||
|
SHA_FLAG = $(POWER8_FLAG)
|
||||||
|
endif
|
||||||
|
|
||||||
|
#####################################################################
|
||||||
|
# Looking for a POWER8 option
|
||||||
|
|
||||||
TPROG = TestPrograms/test_ppc_power8.cxx
|
TPROG = TestPrograms/test_ppc_power8.cxx
|
||||||
TOPT = $(POWER9_FLAG)
|
TOPT = $(POWER9_FLAG)
|
||||||
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
||||||
ifeq ($(strip $(HAVE_OPT)),0)
|
ifeq ($(strip $(HAVE_OPT)),0)
|
||||||
ALTIVEC_FLAG = $(POWER9_FLAG)
|
ALTIVEC_FLAG = $(POWER9_FLAG)
|
||||||
AES_FLAG = $(POWER9_FLAG)
|
|
||||||
BLAKE2B_FLAG = $(POWER9_FLAG)
|
BLAKE2B_FLAG = $(POWER9_FLAG)
|
||||||
BLAKE2S_FLAG = $(POWER9_FLAG)
|
BLAKE2S_FLAG = $(POWER9_FLAG)
|
||||||
CHACHA_FLAG = $(POWER9_FLAG)
|
CHACHA_FLAG = $(POWER9_FLAG)
|
||||||
GCM_FLAG = $(POWER9_FLAG)
|
GCM_FLAG = $(POWER9_FLAG)
|
||||||
SHA_FLAG = $(POWER9_FLAG)
|
|
||||||
SM4_FLAG = $(POWER9_FLAG)
|
SM4_FLAG = $(POWER9_FLAG)
|
||||||
SIMON64_FLAG = $(POWER9_FLAG)
|
SIMON64_FLAG = $(POWER9_FLAG)
|
||||||
SIMON128_FLAG = $(POWER9_FLAG)
|
SIMON128_FLAG = $(POWER9_FLAG)
|
||||||
|
|
@ -648,12 +682,10 @@ ifeq ($(DETECT_FEATURES),1)
|
||||||
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
||||||
ifeq ($(strip $(HAVE_OPT)),0)
|
ifeq ($(strip $(HAVE_OPT)),0)
|
||||||
ALTIVEC_FLAG = $(POWER8_FLAG)
|
ALTIVEC_FLAG = $(POWER8_FLAG)
|
||||||
AES_FLAG = $(POWER8_FLAG)
|
|
||||||
BLAKE2B_FLAG = $(POWER8_FLAG)
|
BLAKE2B_FLAG = $(POWER8_FLAG)
|
||||||
BLAKE2S_FLAG = $(POWER8_FLAG)
|
BLAKE2S_FLAG = $(POWER8_FLAG)
|
||||||
CHACHA_FLAG = $(POWER8_FLAG)
|
CHACHA_FLAG = $(POWER8_FLAG)
|
||||||
GCM_FLAG = $(POWER8_FLAG)
|
GCM_FLAG = $(POWER8_FLAG)
|
||||||
SHA_FLAG = $(POWER8_FLAG)
|
|
||||||
SM4_FLAG = $(POWER8_FLAG)
|
SM4_FLAG = $(POWER8_FLAG)
|
||||||
SIMON64_FLAG = $(POWER8_FLAG)
|
SIMON64_FLAG = $(POWER8_FLAG)
|
||||||
SIMON128_FLAG = $(POWER8_FLAG)
|
SIMON128_FLAG = $(POWER8_FLAG)
|
||||||
|
|
@ -663,6 +695,9 @@ ifeq ($(DETECT_FEATURES),1)
|
||||||
POWER8_FLAG =
|
POWER8_FLAG =
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
#####################################################################
|
||||||
|
# Looking for a POWER7 option
|
||||||
|
|
||||||
TPROG = TestPrograms/test_ppc_power7.cxx
|
TPROG = TestPrograms/test_ppc_power7.cxx
|
||||||
TOPT = $(POWER7_FLAG)
|
TOPT = $(POWER7_FLAG)
|
||||||
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
||||||
|
|
@ -680,6 +715,9 @@ ifeq ($(DETECT_FEATURES),1)
|
||||||
POWER7_FLAG =
|
POWER7_FLAG =
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
#####################################################################
|
||||||
|
# Looking for an Altivec option
|
||||||
|
|
||||||
TPROG = TestPrograms/test_ppc_altivec.cxx
|
TPROG = TestPrograms/test_ppc_altivec.cxx
|
||||||
TOPT = $(POWER6_FLAG)
|
TOPT = $(POWER6_FLAG)
|
||||||
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
HAVE_OPT = $(shell $(CXX) $(CXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
||||||
|
|
@ -707,6 +745,9 @@ ifeq ($(DETECT_FEATURES),1)
|
||||||
POWER4_FLAG =
|
POWER4_FLAG =
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
#####################################################################
|
||||||
|
# Fixups for algorithms that can drop to a lower ISA, if needed
|
||||||
|
|
||||||
# Drop to Power7 if Power8 is not available.
|
# Drop to Power7 if Power8 is not available.
|
||||||
ifeq ($(POWER8_FLAG),)
|
ifeq ($(POWER8_FLAG),)
|
||||||
GCM_FLAG = $(POWER7_FLAG)
|
GCM_FLAG = $(POWER7_FLAG)
|
||||||
|
|
@ -720,6 +761,9 @@ ifeq ($(DETECT_FEATURES),1)
|
||||||
SPECK64_FLAG = $(ALTIVEC_FLAG)
|
SPECK64_FLAG = $(ALTIVEC_FLAG)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
#####################################################################
|
||||||
|
# Fixups for missing ISAs
|
||||||
|
|
||||||
ifeq ($(ALTIVEC_FLAG),)
|
ifeq ($(ALTIVEC_FLAG),)
|
||||||
CXXFLAGS += -DCRYPTOPP_DISABLE_ALTIVEC
|
CXXFLAGS += -DCRYPTOPP_DISABLE_ALTIVEC
|
||||||
else ifeq ($(POWER9_FLAG)$(POWER8_FLAG)$(POWER7_FLAG),)
|
else ifeq ($(POWER9_FLAG)$(POWER8_FLAG)$(POWER7_FLAG),)
|
||||||
|
|
@ -728,6 +772,19 @@ ifeq ($(DETECT_FEATURES),1)
|
||||||
CXXFLAGS += -DCRYPTOPP_DISABLE_POWER8
|
CXXFLAGS += -DCRYPTOPP_DISABLE_POWER8
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
#####################################################################
|
||||||
|
# Fixups for missing crypto
|
||||||
|
|
||||||
|
ifneq ($(POWER9_FLAG)$(POWER8_FLAG),)
|
||||||
|
ifeq ($(AES_FLAG),)
|
||||||
|
CXXFLAGS += -DCRYPTOPP_DISABLE_POWER8_AES
|
||||||
|
endif
|
||||||
|
ifeq ($(SHA_FLAG),)
|
||||||
|
CXXFLAGS += -DCRYPTOPP_DISABLE_POWER8_SHA
|
||||||
|
endif
|
||||||
|
# CXXFLAGS += -DCRYPTOPP_DISABLE_POWER8_VMULL
|
||||||
|
endif
|
||||||
|
|
||||||
# DETECT_FEATURES
|
# DETECT_FEATURES
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -812,6 +812,9 @@ inline uint32x4_p VectorSet32(const uint32x4_p a, const uint32x4_p b)
|
||||||
const uint8x16_p mask = {12,13,14,15, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
|
const uint8x16_p mask = {12,13,14,15, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
|
||||||
return VecPermute(a, VecShiftLeftOctet<12>(b), mask);
|
return VecPermute(a, VecShiftLeftOctet<12>(b), mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Quiet IBM XLC warning
|
||||||
|
return VecXor(a, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <unsigned int E1, unsigned int E2, unsigned int E3, unsigned int E4>
|
template <unsigned int E1, unsigned int E2, unsigned int E3, unsigned int E4>
|
||||||
|
|
@ -1005,14 +1008,14 @@ void BLAKE2_Compress32_CORE(const byte* input, BLAKE2s_State& state)
|
||||||
|
|
||||||
void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state)
|
void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state)
|
||||||
{
|
{
|
||||||
BLAKE2_Compress32_CORE(input, state);
|
BLAKE2_Compress32_CORE(input, state);
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
|
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||||
|
|
||||||
void BLAKE2_Compress32_ALTIVEC(const byte* input, BLAKE2s_State& state)
|
void BLAKE2_Compress32_ALTIVEC(const byte* input, BLAKE2s_State& state)
|
||||||
{
|
{
|
||||||
BLAKE2_Compress32_CORE(input, state);
|
BLAKE2_Compress32_CORE(input, state);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
21
gcm_simd.cpp
21
gcm_simd.cpp
|
|
@ -64,7 +64,7 @@ extern const char GCM_SIMD_FNAME[] = __FILE__;
|
||||||
|
|
||||||
ANONYMOUS_NAMESPACE_BEGIN
|
ANONYMOUS_NAMESPACE_BEGIN
|
||||||
|
|
||||||
// ************************* Miscellaneous ************************* //
|
// *************************** ARM NEON *************************** //
|
||||||
|
|
||||||
#if CRYPTOPP_ARM_PMULL_AVAILABLE
|
#if CRYPTOPP_ARM_PMULL_AVAILABLE
|
||||||
#if defined(__GNUC__)
|
#if defined(__GNUC__)
|
||||||
|
|
@ -168,7 +168,10 @@ inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
|
||||||
#endif // Microsoft and compatibles
|
#endif // Microsoft and compatibles
|
||||||
#endif // CRYPTOPP_ARM_PMULL_AVAILABLE
|
#endif // CRYPTOPP_ARM_PMULL_AVAILABLE
|
||||||
|
|
||||||
|
// ************************** Power 8 Crypto ************************** //
|
||||||
|
|
||||||
#if CRYPTOPP_POWER8_VMULL_AVAILABLE
|
#if CRYPTOPP_POWER8_VMULL_AVAILABLE
|
||||||
|
|
||||||
using CryptoPP::uint32x4_p;
|
using CryptoPP::uint32x4_p;
|
||||||
using CryptoPP::uint64x2_p;
|
using CryptoPP::uint64x2_p;
|
||||||
using CryptoPP::VecGetLow;
|
using CryptoPP::VecGetLow;
|
||||||
|
|
@ -201,8 +204,10 @@ inline uint64x2_p VMULL2LE(const uint64x2_p& val)
|
||||||
// _mm_clmulepi64_si128(a, b, 0x00)
|
// _mm_clmulepi64_si128(a, b, 0x00)
|
||||||
inline uint64x2_p VMULL_00LE(const uint64x2_p& a, const uint64x2_p& b)
|
inline uint64x2_p VMULL_00LE(const uint64x2_p& a, const uint64x2_p& b)
|
||||||
{
|
{
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
|
||||||
return VMULL2LE(__vpmsumd (VecGetHigh(a), VecGetHigh(b)));
|
return VMULL2LE(__vpmsumd (VecGetHigh(a), VecGetHigh(b)));
|
||||||
|
#elif defined(__clang__)
|
||||||
|
return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecGetHigh(a), VecGetHigh(b)));
|
||||||
#else
|
#else
|
||||||
return VMULL2LE(__builtin_crypto_vpmsumd (VecGetHigh(a), VecGetHigh(b)));
|
return VMULL2LE(__builtin_crypto_vpmsumd (VecGetHigh(a), VecGetHigh(b)));
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -214,8 +219,10 @@ inline uint64x2_p VMULL_01LE(const uint64x2_p& a, const uint64x2_p& b)
|
||||||
// Small speedup. VecGetHigh(b) ensures the high dword of 'b' is 0.
|
// Small speedup. VecGetHigh(b) ensures the high dword of 'b' is 0.
|
||||||
// The 0 used in the vmull yields 0 for the high product, so the high
|
// The 0 used in the vmull yields 0 for the high product, so the high
|
||||||
// dword of 'a' is "don't care".
|
// dword of 'a' is "don't care".
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
|
||||||
return VMULL2LE(__vpmsumd (a, VecGetHigh(b)));
|
return VMULL2LE(__vpmsumd (a, VecGetHigh(b)));
|
||||||
|
#elif defined(__clang__)
|
||||||
|
return VMULL2LE(__builtin_altivec_crypto_vpmsumd (a, VecGetHigh(b)));
|
||||||
#else
|
#else
|
||||||
return VMULL2LE(__builtin_crypto_vpmsumd (a, VecGetHigh(b)));
|
return VMULL2LE(__builtin_crypto_vpmsumd (a, VecGetHigh(b)));
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -227,8 +234,10 @@ inline uint64x2_p VMULL_10LE(const uint64x2_p& a, const uint64x2_p& b)
|
||||||
// Small speedup. VecGetHigh(a) ensures the high dword of 'a' is 0.
|
// Small speedup. VecGetHigh(a) ensures the high dword of 'a' is 0.
|
||||||
// The 0 used in the vmull yields 0 for the high product, so the high
|
// The 0 used in the vmull yields 0 for the high product, so the high
|
||||||
// dword of 'b' is "don't care".
|
// dword of 'b' is "don't care".
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
|
||||||
return VMULL2LE(__vpmsumd (VecGetHigh(a), b));
|
return VMULL2LE(__vpmsumd (VecGetHigh(a), b));
|
||||||
|
#elif defined(__clang__)
|
||||||
|
return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecGetHigh(a), b));
|
||||||
#else
|
#else
|
||||||
return VMULL2LE(__builtin_crypto_vpmsumd (VecGetHigh(a), b));
|
return VMULL2LE(__builtin_crypto_vpmsumd (VecGetHigh(a), b));
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -240,8 +249,10 @@ inline uint64x2_p VMULL_11LE(const uint64x2_p& a, const uint64x2_p& b)
|
||||||
// Small speedup. VecGetLow(a) ensures the high dword of 'a' is 0.
|
// Small speedup. VecGetLow(a) ensures the high dword of 'a' is 0.
|
||||||
// The 0 used in the vmull yields 0 for the high product, so the high
|
// The 0 used in the vmull yields 0 for the high product, so the high
|
||||||
// dword of 'b' is "don't care".
|
// dword of 'b' is "don't care".
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
|
||||||
return VMULL2LE(__vpmsumd (VecGetLow(a), b));
|
return VMULL2LE(__vpmsumd (VecGetLow(a), b));
|
||||||
|
#elif defined(__clang__)
|
||||||
|
return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecGetLow(a), b));
|
||||||
#else
|
#else
|
||||||
return VMULL2LE(__builtin_crypto_vpmsumd (VecGetLow(a), b));
|
return VMULL2LE(__builtin_crypto_vpmsumd (VecGetLow(a), b));
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -65,7 +65,7 @@ extern "C" {
|
||||||
byte b1[19] = {255, 255, 255, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, b2[17];
|
byte b1[19] = {255, 255, 255, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, b2[17];
|
||||||
|
|
||||||
// Specifically call the VSX loads and stores
|
// Specifically call the VSX loads and stores
|
||||||
#if defined(__xlc__) || defined(__xlC__)
|
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
|
||||||
vec_xst(vec_xl(0, b1+3), 0, b2+1);
|
vec_xst(vec_xl(0, b1+3), 0, b2+1);
|
||||||
#else
|
#else
|
||||||
vec_vsx_st(vec_vsx_ld(0, b1+3), 0, b2+1);
|
vec_vsx_st(vec_vsx_ld(0, b1+3), 0, b2+1);
|
||||||
|
|
|
||||||
|
|
@ -66,7 +66,7 @@ bool CPU_ProbePower8()
|
||||||
word64 w1[2] = {x, x}, w2[2] = {4, 6}, w3[2];
|
word64 w1[2] = {x, x}, w2[2] = {4, 6}, w3[2];
|
||||||
|
|
||||||
// Specifically call the VSX loads and stores
|
// Specifically call the VSX loads and stores
|
||||||
#if defined(__xlc__) || defined(__xlC__)
|
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
|
||||||
const uint64x2_p v1 = (uint64x2_p)vec_xl(0, (byte*)w1);
|
const uint64x2_p v1 = (uint64x2_p)vec_xl(0, (byte*)w1);
|
||||||
const uint64x2_p v2 = (uint64x2_p)vec_xl(0, (byte*)w2);
|
const uint64x2_p v2 = (uint64x2_p)vec_xl(0, (byte*)w2);
|
||||||
const uint64x2_p v3 = VecAdd(v1, v2); // 64-bit add
|
const uint64x2_p v3 = VecAdd(v1, v2); // 64-bit add
|
||||||
|
|
|
||||||
34
ppc_simd.h
34
ppc_simd.h
|
|
@ -32,6 +32,12 @@
|
||||||
# undef bool
|
# undef bool
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// IBM XLC on AIX does not define __CRYPTO__ like it should. More LLVM goodness.
|
||||||
|
#if defined(_AIX) && defined(__xlC__)
|
||||||
|
# undef __CRYPTO__
|
||||||
|
# define __CRYPTO__ 1
|
||||||
|
#endif
|
||||||
|
|
||||||
// VecLoad_ALTIVEC and VecStore_ALTIVEC are
|
// VecLoad_ALTIVEC and VecStore_ALTIVEC are
|
||||||
// too noisy on modern compilers
|
// too noisy on modern compilers
|
||||||
#if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
|
#if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
|
||||||
|
|
@ -879,7 +885,7 @@ inline bool VecNotEqual(const T1 vec1, const T2 vec2)
|
||||||
|
|
||||||
//////////////////////// Power8 Crypto ////////////////////////
|
//////////////////////// Power8 Crypto ////////////////////////
|
||||||
|
|
||||||
#if defined(_ARCH_PWR8) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
#if defined(__CRYPTO__) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
||||||
|
|
||||||
/// \brief One round of AES encryption
|
/// \brief One round of AES encryption
|
||||||
/// \tparam T1 vector type
|
/// \tparam T1 vector type
|
||||||
|
|
@ -893,8 +899,10 @@ inline bool VecNotEqual(const T1 vec1, const T2 vec2)
|
||||||
template <class T1, class T2>
|
template <class T1, class T2>
|
||||||
inline T1 VecEncrypt(const T1 state, const T2 key)
|
inline T1 VecEncrypt(const T1 state, const T2 key)
|
||||||
{
|
{
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
|
||||||
return (T1)__vcipher((uint8x16_p)state, (uint8x16_p)key);
|
return (T1)__vcipher((uint8x16_p)state, (uint8x16_p)key);
|
||||||
|
#elif defined(__clang__)
|
||||||
|
return (T1)__builtin_altivec_crypto_vcipher((uint64x2_p)state, (uint64x2_p)key);
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
return (T1)__builtin_crypto_vcipher((uint64x2_p)state, (uint64x2_p)key);
|
return (T1)__builtin_crypto_vcipher((uint64x2_p)state, (uint64x2_p)key);
|
||||||
#else
|
#else
|
||||||
|
|
@ -914,8 +922,10 @@ inline T1 VecEncrypt(const T1 state, const T2 key)
|
||||||
template <class T1, class T2>
|
template <class T1, class T2>
|
||||||
inline T1 VecEncryptLast(const T1 state, const T2 key)
|
inline T1 VecEncryptLast(const T1 state, const T2 key)
|
||||||
{
|
{
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
|
||||||
return (T1)__vcipherlast((uint8x16_p)state, (uint8x16_p)key);
|
return (T1)__vcipherlast((uint8x16_p)state, (uint8x16_p)key);
|
||||||
|
#elif defined(__clang__)
|
||||||
|
return (T1)__builtin_altivec_crypto_vcipherlast((uint64x2_p)state, (uint64x2_p)key);
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
return (T1)__builtin_crypto_vcipherlast((uint64x2_p)state, (uint64x2_p)key);
|
return (T1)__builtin_crypto_vcipherlast((uint64x2_p)state, (uint64x2_p)key);
|
||||||
#else
|
#else
|
||||||
|
|
@ -935,8 +945,10 @@ inline T1 VecEncryptLast(const T1 state, const T2 key)
|
||||||
template <class T1, class T2>
|
template <class T1, class T2>
|
||||||
inline T1 VecDecrypt(const T1 state, const T2 key)
|
inline T1 VecDecrypt(const T1 state, const T2 key)
|
||||||
{
|
{
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
|
||||||
return (T1)__vncipher((uint8x16_p)state, (uint8x16_p)key);
|
return (T1)__vncipher((uint8x16_p)state, (uint8x16_p)key);
|
||||||
|
#elif defined(__clang__)
|
||||||
|
return (T1)__builtin_altivec_crypto_vncipher((uint64x2_p)state, (uint64x2_p)key);
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
return (T1)__builtin_crypto_vncipher((uint64x2_p)state, (uint64x2_p)key);
|
return (T1)__builtin_crypto_vncipher((uint64x2_p)state, (uint64x2_p)key);
|
||||||
#else
|
#else
|
||||||
|
|
@ -956,8 +968,10 @@ inline T1 VecDecrypt(const T1 state, const T2 key)
|
||||||
template <class T1, class T2>
|
template <class T1, class T2>
|
||||||
inline T1 VecDecryptLast(const T1 state, const T2 key)
|
inline T1 VecDecryptLast(const T1 state, const T2 key)
|
||||||
{
|
{
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
|
||||||
return (T1)__vncipherlast((uint8x16_p)state, (uint8x16_p)key);
|
return (T1)__vncipherlast((uint8x16_p)state, (uint8x16_p)key);
|
||||||
|
#elif defined(__clang__)
|
||||||
|
return (T1)__builtin_altivec_crypto_vncipherlast((uint64x2_p)state, (uint64x2_p)key);
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
return (T1)__builtin_crypto_vncipherlast((uint64x2_p)state, (uint64x2_p)key);
|
return (T1)__builtin_crypto_vncipherlast((uint64x2_p)state, (uint64x2_p)key);
|
||||||
#else
|
#else
|
||||||
|
|
@ -977,8 +991,10 @@ inline T1 VecDecryptLast(const T1 state, const T2 key)
|
||||||
template <int func, int subfunc, class T>
|
template <int func, int subfunc, class T>
|
||||||
inline T VecSHA256(const T vec)
|
inline T VecSHA256(const T vec)
|
||||||
{
|
{
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
|
||||||
return (T)__vshasigmaw((uint32x4_p)vec, func, subfunc);
|
return (T)__vshasigmaw((uint32x4_p)vec, func, subfunc);
|
||||||
|
#elif defined(__clang__)
|
||||||
|
return (T)__builtin_altivec_crypto_vshasigmaw((uint32x4_p)vec, func, subfunc);
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
return (T)__builtin_crypto_vshasigmaw((uint32x4_p)vec, func, subfunc);
|
return (T)__builtin_crypto_vshasigmaw((uint32x4_p)vec, func, subfunc);
|
||||||
#else
|
#else
|
||||||
|
|
@ -998,8 +1014,10 @@ inline T VecSHA256(const T vec)
|
||||||
template <int func, int subfunc, class T>
|
template <int func, int subfunc, class T>
|
||||||
inline T VecSHA512(const T vec)
|
inline T VecSHA512(const T vec)
|
||||||
{
|
{
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
|
||||||
return (T)__vshasigmad((uint64x2_p)vec, func, subfunc);
|
return (T)__vshasigmad((uint64x2_p)vec, func, subfunc);
|
||||||
|
#elif defined(__clang__)
|
||||||
|
return (T)__builtin_altivec_crypto_vshasigmad((uint64x2_p)vec, func, subfunc);
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
return (T)__builtin_crypto_vshasigmad((uint64x2_p)vec, func, subfunc);
|
return (T)__builtin_crypto_vshasigmad((uint64x2_p)vec, func, subfunc);
|
||||||
#else
|
#else
|
||||||
|
|
@ -1007,7 +1025,7 @@ inline T VecSHA512(const T vec)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // _ARCH_PWR8
|
#endif // __CRYPTO__
|
||||||
|
|
||||||
#endif // _ALTIVEC_
|
#endif // _ALTIVEC_
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -529,7 +529,7 @@ size_t Rijndael_Dec_AdvancedProcessBlocks_AESNI(const word32 *subKeys, size_t ro
|
||||||
|
|
||||||
#endif // CRYPTOPP_AESNI_AVAILABLE
|
#endif // CRYPTOPP_AESNI_AVAILABLE
|
||||||
|
|
||||||
// ***************************** Power 8 ***************************** //
|
// ************************** Power 8 Crypto ************************** //
|
||||||
|
|
||||||
#if (CRYPTOPP_POWER8_AES_AVAILABLE)
|
#if (CRYPTOPP_POWER8_AES_AVAILABLE)
|
||||||
|
|
||||||
|
|
|
||||||
50
sha_simd.cpp
50
sha_simd.cpp
|
|
@ -222,7 +222,7 @@ bool CPU_ProbeSHA256()
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
byte r[16], z[16] = {0};
|
byte r[16], z[16] = {0};
|
||||||
uint8x16_p x = ((uint8x16_p){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0});
|
uint8x16_p x = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||||
|
|
||||||
x = VecSHA256<0,0>(x);
|
x = VecSHA256<0,0>(x);
|
||||||
x = VecSHA256<0,1>(x);
|
x = VecSHA256<0,1>(x);
|
||||||
|
|
@ -1142,41 +1142,25 @@ uint32x4_p8 VectorMaj(const uint32x4_p8 x, const uint32x4_p8 y, const uint32x4_p
|
||||||
static inline
|
static inline
|
||||||
uint32x4_p8 Vector_sigma0(const uint32x4_p8 val)
|
uint32x4_p8 Vector_sigma0(const uint32x4_p8 val)
|
||||||
{
|
{
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
return VecSHA256<0,0>(val);
|
||||||
return __vshasigmaw(val, 0, 0);
|
|
||||||
#else
|
|
||||||
return __builtin_crypto_vshasigmaw(val, 0, 0);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
uint32x4_p8 Vector_sigma1(const uint32x4_p8 val)
|
uint32x4_p8 Vector_sigma1(const uint32x4_p8 val)
|
||||||
{
|
{
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
return VecSHA256<0,0xf>(val);
|
||||||
return __vshasigmaw(val, 0, 0xf);
|
|
||||||
#else
|
|
||||||
return __builtin_crypto_vshasigmaw(val, 0, 0xf);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
uint32x4_p8 VectorSigma0(const uint32x4_p8 val)
|
uint32x4_p8 VectorSigma0(const uint32x4_p8 val)
|
||||||
{
|
{
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
return VecSHA256<1,0>(val);
|
||||||
return __vshasigmaw(val, 1, 0);
|
|
||||||
#else
|
|
||||||
return __builtin_crypto_vshasigmaw(val, 1, 0);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
uint32x4_p8 VectorSigma1(const uint32x4_p8 val)
|
uint32x4_p8 VectorSigma1(const uint32x4_p8 val)
|
||||||
{
|
{
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
return VecSHA256<1,0xf>(val);
|
||||||
return __vshasigmaw(val, 1, 0xf);
|
|
||||||
#else
|
|
||||||
return __builtin_crypto_vshasigmaw(val, 1, 0xf);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
|
|
@ -1417,41 +1401,25 @@ uint64x2_p8 VectorMaj(const uint64x2_p8 x, const uint64x2_p8 y, const uint64x2_p
|
||||||
static inline
|
static inline
|
||||||
uint64x2_p8 Vector_sigma0(const uint64x2_p8 val)
|
uint64x2_p8 Vector_sigma0(const uint64x2_p8 val)
|
||||||
{
|
{
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
return VecSHA512<0,0>(val);
|
||||||
return __vshasigmad(val, 0, 0);
|
|
||||||
#else
|
|
||||||
return __builtin_crypto_vshasigmad(val, 0, 0);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
uint64x2_p8 Vector_sigma1(const uint64x2_p8 val)
|
uint64x2_p8 Vector_sigma1(const uint64x2_p8 val)
|
||||||
{
|
{
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
return VecSHA512<0,0xf>(val);
|
||||||
return __vshasigmad(val, 0, 0xf);
|
|
||||||
#else
|
|
||||||
return __builtin_crypto_vshasigmad(val, 0, 0xf);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
uint64x2_p8 VectorSigma0(const uint64x2_p8 val)
|
uint64x2_p8 VectorSigma0(const uint64x2_p8 val)
|
||||||
{
|
{
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
return VecSHA512<1,0>(val);
|
||||||
return __vshasigmad(val, 1, 0);
|
|
||||||
#else
|
|
||||||
return __builtin_crypto_vshasigmad(val, 1, 0);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
uint64x2_p8 VectorSigma1(const uint64x2_p8 val)
|
uint64x2_p8 VectorSigma1(const uint64x2_p8 val)
|
||||||
{
|
{
|
||||||
#if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
|
return VecSHA512<1,0xf>(val);
|
||||||
return __vshasigmad(val, 1, 0xf);
|
|
||||||
#else
|
|
||||||
return __builtin_crypto_vshasigmad(val, 1, 0xf);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue