diff --git a/GNUmakefile b/GNUmakefile index e043cb63..7b68286d 100755 --- a/GNUmakefile +++ b/GNUmakefile @@ -266,6 +266,13 @@ endif # -DCRYPTOPP_DISABLE_SSSE3 # Begin SunCC ifeq ($(SUN_COMPILER),1) + COUNT := $(shell $(CXX) $(CXXFLAGS) -E -xarch=sse2 -xdumpmacros /dev/null 2>&1 | $(GREP) -i -c "illegal") + ifeq ($(COUNT),0) + AES_FLAG = -xarch=sse2 -D__SSE2__=1 + GCM_FLAG = -xarch=sse2 -D__SSE2__=1 + SHA_FLAG = -xarch=sse2 -D__SSE2__=1 + LDFLAGS += -xarch=sse2 + endif COUNT := $(shell $(CXX) $(CXXFLAGS) -E -xarch=ssse3 -xdumpmacros /dev/null 2>&1 | $(GREP) -i -c "illegal") ifeq ($(COUNT),0) SSSE3_FLAG = -xarch=ssse3 -D__SSSE3__=1 @@ -396,9 +403,12 @@ ifneq ($(IS_PPC32)$(IS_PPC64)$(IS_AIX),000) ALTIVEC_FLAG = -mcpu=power4 -maltivec ARIA_FLAG = -mcpu=power4 -maltivec BLAKE2_FLAG = -mcpu=power4 -maltivec + CHAM_FLAG = -mcpu=power4 -maltivec + LEA_FLAG = -mcpu=power4 -maltivec SIMON_FLAG = -mcpu=power4 -maltivec - SIMECK_FLAG = -mcpu=power4 -maltivec SPECK_FLAG = -mcpu=power4 -maltivec + SIMECK_FLAG = -mcpu=power4 -maltivec + SM4_FLAG = -mcpu=power7 -maltivecs endif # GCC and some compatibles HAVE_CRYPTO = $(shell echo | $(CXX) -x c++ $(CXXFLAGS) -mcpu=power8 -maltivec -dM -E - 2>/dev/null | $(GREP) -i -c -E '_ARCH_PWR8|_ARCH_PWR9|__CRYPTO') @@ -407,9 +417,12 @@ ifneq ($(IS_PPC32)$(IS_PPC64)$(IS_AIX),000) AES_FLAG = -mcpu=power8 -maltivec GCM_FLAG = -mcpu=power8 -maltivec SHA_FLAG = -mcpu=power8 -maltivec - SIMECK_FLAG = -mcpu=power8 -maltivec + CHAM_FLAG = -mcpu=power8 -maltivec + LEA_FLAG = -mcpu=power8 -maltivec SIMON_FLAG = -mcpu=power8 -maltivec SPECK_FLAG = -mcpu=power8 -maltivec + SIMECK_FLAG = -mcpu=power8 -maltivec + SM4_FLAG = -mcpu=power8 -maltivec endif # IBM XL C/C++ HAVE_ALTIVEC = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr7 -qaltivec -E adhoc.cpp.proto 2>/dev/null | $(GREP) -i -c '__ALTIVEC__') @@ -417,9 +430,12 @@ ifneq ($(IS_PPC32)$(IS_PPC64)$(IS_AIX),000) ALTIVEC_FLAG = -qarch=pwr7 -qaltivec ARIA_FLAG = -qarch=pwr7 -qaltivec BLAKE2_FLAG = -qarch=pwr7 -qaltivec + CHAM_FLAG = -qarch=pwr7 -qaltivec + LEA_FLAG = -qarch=pwr7 -qaltivec SIMECK_FLAG = -qarch=pwr7 -qaltivec SIMON_FLAG = -qarch=pwr7 -qaltivec SPECK_FLAG = -qarch=pwr7 -qaltivec + SM4_FLAG = -qarch=pwr7 -qaltivec endif # IBM XL C/C++ HAVE_CRYPTO = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr8 -qaltivec -E adhoc.cpp.proto 2>/dev/null | $(GREP) -i -c -E '_ARCH_PWR8|_ARCH_PWR9|__CRYPTO') @@ -430,9 +446,12 @@ ifneq ($(IS_PPC32)$(IS_PPC64)$(IS_AIX),000) SHA_FLAG = -qarch=pwr8 -qaltivec ARIA_FLAG = -qarch=pwr8 -qaltivec BLAKE2_FLAG = -qarch=pwr8 -qaltivec + CHAM_FLAG = -qarch=pwr8 -qaltivec + LEA_FLAG = -qarch=pwr8 -qaltivec SIMECK_FLAG = -qarch=pwr8 -qaltivec SIMON_FLAG = -qarch=pwr8 -qaltivec SPECK_FLAG = -qarch=pwr8 -qaltivec + SM4_FLAG = -qarch=pwr8 -qaltivec endif endif @@ -442,10 +461,6 @@ ifeq ($(XLC_COMPILER),1) ifeq ($(findstring -qrtti,$(CXXFLAGS)),) CXXFLAGS += -qrtti endif - # -fPIC causes link errors dues to unknown option - ifneq ($(findstring -fPIC,$(CXXFLAGS)),) - CXXFLAGS := $(CXXFLAGS:-fPIC=-qpic) - endif HAVE_BITS=$(shell echo $(CXXFLAGS) | $(GREP) -i -c -E '\-q32|\-q64') ifeq ($(IS_PPC64)$(XLC_COMPILER)$(HAVE_BITS),110) CXXFLAGS += -q64 @@ -480,6 +495,11 @@ ifeq ($(SUN_COMPILER),1) CXXFLAGS := $(subst -fPIC,-KPIC,$(CXXFLAGS)) endif +# Remove -fPIC if present. IBM XL C/C++ use -qpic +ifeq ($(XLC_COMPILER),1) + CXXFLAGS := $(subst -fPIC,-qpic,$(CXXFLAGS)) +endif + # Add -pipe for everything except IBM XL C/C++, SunCC and ARM. # Allow ARM-64 because they seems to have >1 GB of memory ifeq ($(XLC_COMPILER)$(SUN_COMPILER)$(IS_ARM32),000) diff --git a/ppc-simd.h b/ppc-simd.h index ddfa24ce..c33358a6 100644 --- a/ppc-simd.h +++ b/ppc-simd.h @@ -147,7 +147,7 @@ inline T Reverse(const T& src) inline uint32x4_p VectorLoadBE(const uint8_t src[16]) { #if defined(CRYPTOPP_XLC_VERSION) - return (uint32x4_p)vec_xl_be(0, src); + return (uint32x4_p)vec_xl_be(0, (byte*)src); #else # if defined(CRYPTOPP_LITTLE_ENDIAN) return (uint32x4_p)Reverse(vec_vsx_ld(0, src)); @@ -168,7 +168,7 @@ inline uint32x4_p VectorLoadBE(const uint8_t src[16]) inline uint32x4_p VectorLoadBE(int off, const uint8_t src[16]) { #if defined(CRYPTOPP_XLC_VERSION) - return (uint32x4_p)vec_xl_be(off, src); + return (uint32x4_p)vec_xl_be(off, (byte*)src); #else # if defined(CRYPTOPP_LITTLE_ENDIAN) return (uint32x4_p)Reverse(vec_vsx_ld(off, src)); @@ -213,7 +213,7 @@ inline uint32x4_p VectorLoad(int off, const byte src[16]) inline uint32x4_p VectorLoadKey(const byte src[16]) { #if defined(CRYPTOPP_XLC_VERSION) - return (uint32x4_p)vec_xl(0, src); + return (uint32x4_p)vec_xl(0, (byte*)src); #else return (uint32x4_p)vec_vsx_ld(0, src); #endif @@ -229,7 +229,7 @@ inline uint32x4_p VectorLoadKey(const byte src[16]) inline uint32x4_p VectorLoadKey(const word32 src[4]) { #if defined(CRYPTOPP_XLC_VERSION) - return (uint32x4_p)vec_xl(0, src); + return (uint32x4_p)vec_xl(0, (byte*)src); #else return (uint32x4_p)vec_vsx_ld(0, src); #endif @@ -246,7 +246,7 @@ inline uint32x4_p VectorLoadKey(const word32 src[4]) inline uint32x4_p VectorLoadKey(int off, const byte src[16]) { #if defined(CRYPTOPP_XLC_VERSION) - return (uint32x4_p)vec_xl(off, src); + return (uint32x4_p)vec_xl(off, (byte*)src); #else return (uint32x4_p)vec_vsx_ld(off, src); #endif @@ -424,7 +424,7 @@ inline T1 VectorShiftLeft(const T1& vec1, const T2& vec2) #endif // CRYPTOPP_POWER7_AVAILABLE -#if defined(CRYPTOPP_POWER8_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING) +#if defined(CRYPTOPP_POWER8_AES_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING) /// \brief One round of AES encryption /// \tparam T1 vector type @@ -506,6 +506,10 @@ inline T1 VectorDecryptLast(const T1& state, const T2& key) #endif } +#endif // CRYPTOPP_POWER8_AES_AVAILABLE + +#if defined(CRYPTOPP_POWER8_SHA_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING) + /// \brief SHA256 Sigma functions /// \tparam func function /// \tparam subfunc sub-function @@ -546,7 +550,7 @@ inline T VectorSHA512(const T& vec) #endif } -#endif // CRYPTOPP_POWER8_AVAILABLE +#endif // CRYPTOPP_POWER8_SHA_AVAILABLE NAMESPACE_END diff --git a/sha-simd.cpp b/sha-simd.cpp index b2639840..30d6068d 100644 --- a/sha-simd.cpp +++ b/sha-simd.cpp @@ -990,7 +990,7 @@ typedef __vector unsigned long long uint64x2_p8; uint32x4_p8 VEC_XL_BE(int offset, const uint8_t* data) { #if defined(CRYPTOPP_XLC_VERSION) - return vec_xl_be(offset, data); + return (uint32x4_p8)vec_xl_be(offset, (uint8_t*)data); #else uint32x4_p8 res; __asm(" lxvd2x %x0, %1, %2 \n\t" @@ -1016,7 +1016,7 @@ template static inline uint32x4_p8 VectorLoad32x4u(const T* data, int offset) { #if defined(CRYPTOPP_XLC_VERSION) - return (uint32x4_p8)vec_xl(offset, data); + return (uint32x4_p8)vec_xl(offset, (uint8_t*)data); #else return (uint32x4_p8)vec_vsx_ld(offset, data); #endif @@ -1324,7 +1324,7 @@ template static inline uint64x2_p8 VectorLoad64x2u(const T* data, int offset) { #if defined(CRYPTOPP_XLC_VERSION) - return (uint64x2_p8)vec_xl(offset, (const uint8_t*)data); + return (uint64x2_p8)vec_xl(offset, (uint8_t*)data); #else return (uint64x2_p8)vec_vsx_ld(offset, (const uint8_t*)data); #endif