diff --git a/Filelist.txt b/Filelist.txt index 911cdc81..b26a1ea5 100644 --- a/Filelist.txt +++ b/Filelist.txt @@ -120,6 +120,7 @@ fips140.h fipsalgt.cpp fipstest.cpp fltrimpl.h +gcm-simd.cpp gcm.cpp gcm.h gf256.cpp @@ -204,6 +205,7 @@ poly1305.cpp poly1305.h polynomi.cpp polynomi.h +ppc-simd.cpp pssr.cpp pssr.h pubkey.cpp diff --git a/GNUmakefile b/GNUmakefile index 41a2afb2..40f77788 100755 --- a/GNUmakefile +++ b/GNUmakefile @@ -334,20 +334,32 @@ ifeq ($(IS_ARMV8),1) endif # PowerPC and PowerPC-64 -ifneq ($(IS_PPC32)$(IS_PPC64),00) +ifneq ($(IS_PPC32)$(IS_PPC64)$(IS_AIX),000) # GCC and some compatibles - HAVE_CRYPTO = $(shell echo | $(CXX) -x c++ $(CXXFLAGS) -mcpu=power8 -dM -E - 2>/dev/null | $(GREP) -i -c __CRYPTO) - ifeq ($(HAVE_CRYPTO),1) - AES_FLAG = -mcpu=power8 - GCM_FLAG = -mcpu=power8 - SHA_FLAG = -mcpu=power8 + HAVE_ALTIVEC = $(shell echo | $(CXX) -x c++ $(CXXFLAGS) -maltivec -dM -E - 2>/dev/null | $(GREP) -i -c '__ALTIVEC__') + ifneq ($(HAVE_ALTIVEC),0) + ALTIVEC_FLAG = -maltivec + endif + # GCC and some compatibles + HAVE_CRYPTO = $(shell echo | $(CXX) -x c++ $(CXXFLAGS) -mcpu=power8 -maltivec -mvsx -dM -E - 2>/dev/null | $(GREP) -i -c -E '_ARCH_PWR8|_ARCH_PWR9|__CRYPTO') + ifneq ($(HAVE_CRYPTO),0) + AES_FLAG = -mcpu=power8 -maltivec -mvsx + GCM_FLAG = -mcpu=power8 -maltivec -mvsx + SHA_FLAG = -mcpu=power8 -maltivec -mvsx + ALTIVEC_FLAG = -mcpu=power8 -maltivec -mvsx endif # IBM XL C/C++ - HAVE_CRYPTO = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr8 -qaltivec rijndael.cpp -dM -E 2>/dev/null | $(GREP) -i -c __CRYPTO) - ifeq ($(HAVE_CRYPTO),1) + HAVE_ALTIVEC = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qaltivec -E adhoc.cpp.proto 2>/dev/null | $(GREP) -i -c '__ALTIVEC__') + ifneq ($(HAVE_ALTIVEC),0) + ALTIVEC_FLAG = -qaltivec + endif + # IBM XL C/C++ + HAVE_CRYPTO = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr8 -qaltivec -E adhoc.cpp.proto 2>/dev/null | $(GREP) -i -c -E '_ARCH_PWR8|_ARCH_PWR9|__CRYPTO') + ifneq ($(HAVE_CRYPTO),0) AES_FLAG = -qarch=pwr8 -qaltivec GCM_FLAG = -qarch=pwr8 -qaltivec SHA_FLAG = -qarch=pwr8 -qaltivec + ALTIVEC_FLAG = -qarch=pwr8 -qaltivec endif endif @@ -924,10 +936,6 @@ endif aria-simd.o : aria-simd.cpp $(CXX) $(strip $(CXXFLAGS) $(ARIA_FLAG) -c) $< -# SSE4.2 or NEON available -neon-simd.o : neon-simd.cpp - $(CXX) $(strip $(CXXFLAGS) $(NEON_FLAG) -c) $< - # SSE4.2 or ARMv8a available blake2-simd.o : blake2-simd.cpp $(CXX) $(strip $(CXXFLAGS) $(BLAKE2_FLAG) -c) $< @@ -940,6 +948,14 @@ crc-simd.o : crc-simd.cpp gcm-simd.o : gcm-simd.cpp $(CXX) $(strip $(CXXFLAGS) $(GCM_FLAG) -c) $< +# NEON available +neon-simd.o : neon-simd.cpp + $(CXX) $(strip $(CXXFLAGS) $(NEON_FLAG) -c) $< + +# AltiVec, Power7, Power8 available +ppc-simd.o : ppc-simd.cpp + $(CXX) $(strip $(CXXFLAGS) $(ALTIVEC_FLAG) -c) $< + # AESNI or ARMv7a/ARMv8a available rijndael-simd.o : rijndael-simd.cpp $(CXX) $(strip $(CXXFLAGS) $(AES_FLAG) -c) $< diff --git a/config.h b/config.h index 41753c3a..14641ccc 100644 --- a/config.h +++ b/config.h @@ -10,22 +10,22 @@ // define this if running on a big-endian CPU #if !defined(IS_LITTLE_ENDIAN) && !defined(IS_BIG_ENDIAN) && (defined(__BIG_ENDIAN__) || (defined(__s390__) || defined(__s390x__) || defined(__zarch__)) || (defined(__m68k__) || defined(__MC68K__)) || defined(__sparc) || defined(__sparc__) || defined(__hppa__) || defined(__MIPSEB__) || defined(__ARMEB__) || (defined(__MWERKS__) && !defined(__INTEL__))) -# define IS_BIG_ENDIAN +# define IS_BIG_ENDIAN 1 #endif // define this if running on a little-endian CPU // big endian will be assumed if IS_LITTLE_ENDIAN is not defined #if !defined(IS_BIG_ENDIAN) && !defined(IS_LITTLE_ENDIAN) -# define IS_LITTLE_ENDIAN +# define IS_LITTLE_ENDIAN 1 #endif -// Sanity checks. Some processors have more than big-, little- and bi-endian modes. PDP mode, where order results in "4312", should -// raise red flags immediately. Additionally, mis-classified machines, like (previosuly) S/390, should raise red flags immediately. +// Sanity checks. Some processors have more than big, little and bi-endian modes. PDP mode, where order results in "4312", should +// raise red flags immediately. Additionally, mis-classified machines, like (previosuly) S/390, should raise red flags immediately. #if defined(IS_BIG_ENDIAN) && defined(__GNUC__) && defined(__BYTE_ORDER__) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) -# error "IS_BIG_ENDIAN is set, but __BYTE_ORDER__ does not equal __ORDER_BIG_ENDIAN__" +# error "IS_BIG_ENDIAN is set, but __BYTE_ORDER__ is not __ORDER_BIG_ENDIAN__" #endif #if defined(IS_LITTLE_ENDIAN) && defined(__GNUC__) && defined(__BYTE_ORDER__) && (__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__) -# error "IS_LITTLE_ENDIAN is set, but __BYTE_ORDER__ does not equal __ORDER_LITTLE_ENDIAN__" +# error "IS_LITTLE_ENDIAN is set, but __BYTE_ORDER__ is not __ORDER_LITTLE_ENDIAN__" #endif // Define this if you want to disable all OS-dependent features, @@ -232,6 +232,10 @@ const lword LWORD_MAX = W64LIT(0xffffffffffffffff); #define CRYPTOPP_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) #endif +#if defined(__xlc__) || defined(__xlC__) + #define CRYPTOPP_XLC_VERSION ((__xlC__ / 256) * 10000 + (__xlC__ % 256) * 100) +#endif + // Apple and LLVM's Clang. Apple Clang version 7.0 roughly equals LLVM Clang version 3.7 #if defined(__clang__ ) && defined(__apple_build_version__) #define CRYPTOPP_APPLE_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) @@ -299,7 +303,7 @@ NAMESPACE_END #ifndef CRYPTOPP_L1_CACHE_LINE_SIZE // This should be a lower bound on the L1 cache line size. It's used for defense against timing attacks. // Also see http://stackoverflow.com/questions/794632/programmatically-get-the-cache-line-size. - #if defined(_M_X64) || defined(__x86_64__) || (__arm64__) || (__aarch64__) + #if defined(_M_X64) || defined(__x86_64__) || defined(__arm64__) || defined(__aarch64__) || defined(__powerpc64__) || defined(_ARCH_PPC64) #define CRYPTOPP_L1_CACHE_LINE_SIZE 64 #else // L1 cache line size is 32 on Pentium III and earlier @@ -422,9 +426,9 @@ NAMESPACE_END #endif // AltiVec and Power8 crypto -#if defined(__powerpc64__) +#if defined(__powerpc64__) || defined(_ARCH_PPC64) #define CRYPTOPP_BOOL_PPC64 1 -#elif defined(__powerpc__) +#elif defined(__powerpc__) || defined(_ARCH_PPC) #define CRYPTOPP_BOOL_PPC32 1 #endif @@ -585,9 +589,22 @@ NAMESPACE_END #if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64) +// An old Apple G5 with GCC 4.01 has AltiVec. +#if !defined(CRYPTOPP_ALTIVEC_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) +# if defined(__ALTIVEC__) || (CRYPTOPP_XLC_VERSION >= 100000) || (CRYPTOPP_GCC_VERSION >= 40000) +# define CRYPTOPP_ALTIVEC_AVAILABLE 1 +# endif +#endif + +#if !defined(CRYPTOPP_POWER8_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) +# if defined(_ARCH_PWR8) || (CRYPTOPP_XLC_VERSION >= 130000) || (CRYPTOPP_GCC_VERSION >= 40800) +# define CRYPTOPP_POWER8_AVAILABLE 1 +# endif +#endif + #if !defined(CRYPTOPP_POWER8_CRYPTO_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) -# if defined(__CRYPTO__) || defined(__ALTIVEC__) || defined(__POWER8_VECTOR__) -//# define CRYPTOPP_POWER8_AES_AVAILABLE 1 +# if defined(__CRYPTO__) || defined(_ARCH_PWR8) || (CRYPTOPP_XLC_VERSION >= 130000) || (CRYPTOPP_GCC_VERSION >= 40800) +# define CRYPTOPP_POWER8_AES_AVAILABLE 1 //# define CRYPTOPP_POWER8_SHA_AVAILABLE 1 //# define CRYPTOPP_POWER8_CRC_AVAILABLE 1 # endif diff --git a/cpu.cpp b/cpu.cpp index f886c726..5cb2b597 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -13,6 +13,10 @@ #include "misc.h" #include "stdcpp.h" +#ifdef _AIX +# include +#endif + #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY # include # include @@ -316,7 +320,7 @@ void DetectX86Features() g_x86DetectionDone = true; } -// *************************** ARM-32, Aarch32 and Aarch64 CPUs *************************** +// *************************** ARM-32, Aarch32 and Aarch64 *************************** #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) @@ -553,6 +557,115 @@ void DetectArmFeatures() g_ArmDetectionDone = true; } +// *************************** PowerPC and PowerPC64 *************************** + +#elif (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64) + +#if defined(__linux__) +# include +# ifndef PPC_FEATURE_HAS_ALTIVEC +# define PPC_FEATURE_HAS_ALTIVEC 0x10000000 +# endif +# ifndef PPC_FEATURE_ARCH_2_06 +# define PPC_FEATURE_ARCH_2_06 0x00000100 +# endif +# ifndef PPC_FEATURE2_ARCH_2_07 +# define PPC_FEATURE2_ARCH_2_07 0x80000000 +# endif +#endif + +bool CRYPTOPP_SECTION_INIT g_PowerpcDetectionDone = false; +bool CRYPTOPP_SECTION_INIT g_hasAltivec = false, CRYPTOPP_SECTION_INIT g_hasPower8 = false; +bool CRYPTOPP_SECTION_INIT g_hasAES = false, CRYPTOPP_SECTION_INIT g_hasSHA1 = false, CRYPTOPP_SECTION_INIT g_hasSHA2 = false; +word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; + +// Can't use bool return type because early Apple systems, +// like G5's, perform '#define bool __bool' in . +extern int CPU_ProbeAltivec(); +extern int CPU_ProbePower8(); +extern int CPU_ProbeAES(); +extern int CPU_ProbeSHA1(); +extern int CPU_ProbeSHA2(); + +inline bool CPU_QueryAltivec() +{ +#if defined(__linux__) + if (getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC) + return true; +#endif + return false; +} + +#if 0 +inline bool CPU_QueryPower7() +{ + // Power7 and ISA 2.06 +#if defined(__linux__) + if (getauxval(AT_HWCAP) & PPC_FEATURE_ARCH_2_06) + return true; +#endif + return false; +} +#endif + +inline bool CPU_QueryPower8() +{ + // Power8 and ISA 2.07 provide in-core crypto +#if defined(__linux__) + if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07) + return true; +#endif + return false; +} + +inline bool CPU_QueryAES() +{ + // Power8 and ISA 2.07 provide in-core crypto +#if defined(__linux__) + if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07) + return true; +#endif + return false; +} + +inline bool CPU_QuerySHA1() +{ + // Power8 and ISA 2.07 provide in-core crypto +#if defined(__linux__) + if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07) + return true; +#endif + return false; +} +inline bool CPU_QuerySHA2() +{ + // Power8 and ISA 2.07 provide in-core crypto +#if defined(__linux__) + if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07) + return true; +#endif + return false; +} + +void DetectPowerpcFeatures() +{ + // The CPU_ProbeXXX's return false for OSes which + // can't tolerate SIGILL-based probes, like Apple + g_hasAltivec = CPU_QueryAltivec() || CPU_ProbeAltivec(); + g_hasPower8 = CPU_QueryPower8() || CPU_ProbePower8(); + //g_hasPMULL = CPU_QueryPMULL() || CPU_ProbePMULL(); + g_hasAES = CPU_QueryAES() || CPU_ProbeAES(); + g_hasSHA1 = CPU_QuerySHA1() || CPU_ProbeSHA1(); + g_hasSHA2 = CPU_QuerySHA2() || CPU_ProbeSHA2(); + +#ifdef _AIX + // /usr/include/sys/systemcfg.h + g_cacheLineSize = getsystemcfg(SC_L1C_DLS); +#endif + + g_PowerpcDetectionDone = true; +} + #endif NAMESPACE_END @@ -567,6 +680,8 @@ struct InitializeCpu CryptoPP::DetectX86Features(); #elif CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64 CryptoPP::DetectArmFeatures(); +#elif CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64 + CryptoPP::DetectPowerpcFeatures(); #endif } }; diff --git a/cpu.h b/cpu.h index 1d53ad73..8b8e72c3 100644 --- a/cpu.h +++ b/cpu.h @@ -49,6 +49,8 @@ NAMESPACE_BEGIN(CryptoPP) +// ***************************** IA-32 ***************************** // + #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 || CRYPTOPP_DOXYGEN_PROCESSING #define CRYPTOPP_CPUID_AVAILABLE 1 @@ -263,6 +265,8 @@ inline int GetCacheLineSize() } #endif // CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 +// ***************************** ARM-32, Aarch32 and Aarch64 ***************************** // + #if CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64 || CRYPTOPP_DOXYGEN_PROCESSING // Hide from Doxygen @@ -386,8 +390,108 @@ inline bool HasSHA2() } #endif // CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64 +// ***************************** PowerPC ***************************** // + +#if CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64 || CRYPTOPP_DOXYGEN_PROCESSING + +// Hide from Doxygen +#ifndef CRYPTOPP_DOXYGEN_PROCESSING +extern bool g_PowerpcDetectionDone; +extern bool g_hasAltivec, g_hasPower7, g_hasPower8, g_hasAES, g_hasSHA1, g_hasSHA2; +extern word32 g_cacheLineSize; +void CRYPTOPP_API DetectPowerpcFeatures(); +#endif // CRYPTOPP_DOXYGEN_PROCESSING + +//! \brief Determine if a PowerPC processor has Altivec available +//! \returns true if the hardware is capable of Altivec at runtime, false otherwise. +//! \details Altivec instructions are available under most modern PowerPCs. +//! \details Runtime support requires compile time support. When compiling with GCC, you may +//! need to compile with -mcpu=power8; while IBM XL C/C++ compilers require +//! -qarch=pwr8 -qaltivec. Also see PowerPC's _ALTIVEC_ preprocessor macro. +//! \note This function is only available on PowerPC and PowerPC-64 platforms +inline bool HasAltivec() +{ + if (!g_PowerpcDetectionDone) + DetectPowerpcFeatures(); + return g_hasAltivec; +} + +//! \brief Determine if a PowerPC processor has Power7 available +//! \returns true if the hardware is capable of Power7 at runtime, false otherwise. +//! \details Altivec instructions are available under most modern PowerPCs. +//! \details Runtime support requires compile time support. When compiling with GCC, you may +//! need to compile with -mcpu=power7; while IBM XL C/C++ compilers require +//! -qarch=pwr7 -qaltivec. Also see PowerPC's _ALTIVEC_ preprocessor macro. +//! \note This function is only available on PowerPC and PowerPC-64 platforms +inline bool HasPower8() +{ + if (!g_PowerpcDetectionDone) + DetectPowerpcFeatures(); + return g_hasPower8; +} + +//! \brief Determine if a PowerPC processor has AES available +//! \returns true if the hardware is capable of AES at runtime, false otherwise. +//! \details AES is part of the in-crypto extensions on Power8 and Power9. +//! \details Runtime support requires compile time support. When compiling with GCC, you may +//! need to compile with -mcpu=power8; while IBM XL C/C++ compilers require +//! -qarch=pwr8 -qaltivec. Also see PowerPC's __CRYPTO preprocessor macro. +//! \note This function is only available on PowerPC and PowerPC-64 platforms +inline bool HasAES() +{ + if (!g_PowerpcDetectionDone) + DetectPowerpcFeatures(); + return g_hasAES; +} + +//! \brief Determine if a PowerPC processor has AES available +//! \returns true if the hardware is capable of AES at runtime, false otherwise. +//! \details AES is part of the in-crypto extensions on Power8 and Power9. +//! \details Runtime support requires compile time support. When compiling with GCC, you may +//! need to compile with -mcpu=power8; while IBM XL C/C++ compilers require +//! -qarch=pwr8 -qaltivec. Also see PowerPC's __CRYPTO preprocessor macro. +//! \note This function is only available on PowerPC and PowerPC-64 platforms +inline bool HasSHA1() +{ + if (!g_PowerpcDetectionDone) + DetectPowerpcFeatures(); + return g_hasSHA1; +} + +//! \brief Determine if a PowerPC processor has AES available +//! \returns true if the hardware is capable of AES at runtime, false otherwise. +//! \details AES is part of the in-crypto extensions on Power8 and Power9. +//! \details Runtime support requires compile time support. When compiling with GCC, you may +//! need to compile with -mcpu=power8; while IBM XL C/C++ compilers require +//! -qarch=pwr8 -qaltivec. Also see PowerPC's __CRYPTO preprocessor macro. +//! \note This function is only available on PowerPC and PowerPC-64 platforms +inline bool HasSHA2() +{ + if (!g_PowerpcDetectionDone) + DetectPowerpcFeatures(); + return g_hasSHA2; +} + +//! \brief Provides the cache line size +//! \returns lower bound on the size of a cache line in bytes, if available +//! \details GetCacheLineSize() returns the lower bound on the size of a cache line, if it +//! is available. If the value is not available at runtime, then 32 is returned for a 32-bit +//! processor and 64 is returned for a 64-bit processor. +//! \details x86/x32/x64 uses CPUID to determine the value and its usually accurate. The ARM +//! processor equivalent is a privileged instruction, so a compile time value is returned. +inline int GetCacheLineSize() +{ + if (!g_PowerpcDetectionDone) + DetectPowerpcFeatures(); + return g_cacheLineSize; +} + +#endif // CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64 + +// ***************************** L1 cache line ***************************** // + // Non-Intel systems -#if !(CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) +#if !(CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64) //! \brief Provides the cache line size at runtime //! \returns true if the hardware is capable of CRC32 at runtime, false otherwise. //! \details GetCacheLineSize() provides is an estimate using CRYPTOPP_L1_CACHE_LINE_SIZE. @@ -400,6 +504,8 @@ inline int GetCacheLineSize() #endif // CRYPTOPP_GENERATE_X64_MASM +// ***************************** Inline ASM Helper ***************************** // + #ifndef CRYPTOPP_DOXYGEN_PROCESSING #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 diff --git a/ppc-simd.cpp b/ppc-simd.cpp new file mode 100644 index 00000000..876fcda7 --- /dev/null +++ b/ppc-simd.cpp @@ -0,0 +1,320 @@ +// crc-simd.cpp - written and placed in the public domain by +// Jeffrey Walton, Uri Blumenthal and Marcel Raad. +// +// This source file uses intrinsics to gain access to AltiVec, +// Power8 and in-core crypto instructions. A separate source file +// is needed because additional CXXFLAGS are required to enable the +// appropriate instructions sets in some build configurations. + +#include "pch.h" +#include "config.h" +#include "stdcpp.h" + +// We set CRYPTOPP_ALTIVEC_AVAILABLE and friends based on +// compiler version and preprocessor macros. If the compiler +// feature is not available, then we have to disable it here. +#if !defined(__ALTIVEC__) +# undef CRYPTOPP_ALTIVEC_AVAILABLE +#endif +#if !(defined(__CRYPTO__) || defined(_ARCH_PWR8) || defined(_ARCH_PWR9)) +# undef CRYPTOPP_POWER8_AVAILABLE +# undef CRYPTOPP_POWER8_AES_AVAILABLE +# undef CRYPTOPP_POWER8_SHA_AVAILABLE +# undef CRYPTOPP_POWER8_CRYPTO_AVAILABLE +#endif + +// We can't use bool return type because early Apple systems, +// like G5's, perform '#define bool __bool' in . +#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) +# include "altivec.h" +#endif + +#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) +# if defined(CRYPTOPP_XLC_VERSION) + // #include + typedef vector unsigned char uint8x16_p8; + typedef vector unsigned long long uint64x2_p8; +#elif defined(CRYPTOPP_GCC_VERSION) + typedef vector unsigned char uint8x16_p8; + typedef vector unsigned long uint64x2_p8; + #endif +#endif + +#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY +# include +# include +#endif + +#ifndef EXCEPTION_EXECUTE_HANDLER +# define EXCEPTION_EXECUTE_HANDLER 1 +#endif + +NAMESPACE_BEGIN(CryptoPP) + +#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY +extern "C" { + typedef void (*SigHandler)(int); + + static jmp_buf s_jmpSIGILL; + static void SigIllHandler(int) + { + longjmp(s_jmpSIGILL, 1); + } +}; +#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY + +int CPU_ProbeAltivec() +{ +#if (CRYPTOPP_ALTIVEC_AVAILABLE) +# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY) + + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile int result = true; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpSIGILL)) + result = false; + else + { + CRYPTOPP_ALIGN_DATA(16) + const byte b1[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + CRYPTOPP_ALIGN_DATA(16) + const byte b2[16] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; + CRYPTOPP_ALIGN_DATA(16) byte b3[16]; +#if defined(CRYPTOPP_XLC_VERSION) + const uint8x16_p8 v1 = vec_ld(0, b1); + const uint8x16_p8 v2 = vec_ld(0, b2); + const uint8x16_p8 v3 = vec_xor(v1, v2); + vec_st(v3, 0, b3); +#elif defined(CRYPTOPP_GCC_VERSION) + const uint64x2_p8 v1 = (uint64x2_p8)vec_ld(0, b1); + const uint64x2_p8 v2 = (uint64x2_p8)vec_ld(0, b2); + const uint64x2_p8 v3 = (uint64x2_p8)vec_xor(v1, v2); + vec_st((uint8x16_p8)v3, 0, b3); +#endif + result = (0 == std::memcmp(b2, b3, 16)); + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_ALTIVEC_AVAILABLE +} + +#if 0 +int CPU_ProbePower7() +{ +#if (CRYPTOPP_POWER7_AVAILABLE) +# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY) + + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile int result = false; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpSIGILL)) + result = false; + else + { + CRYPTOPP_ALIGN_DATA(16) // Non-const due to XL C/C++ + byte b1[19] = {-1, -1, -1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; + CRYPTOPP_ALIGN_DATA(16) byte b2[16]; +#if defined(CRYPTOPP_XLC_VERSION) + const uint8x16_p8 v1 = vec_xl(0, reinterpret_cast(b1)+3); + vec_xst(v1, 0, reinterpret_cast(b2)); +#elif defined(CRYPTOPP_GCC_VERSION) + const uint8x16_p8 v1 = vec_vsx_ld(0, b1+3); + vec_vsx_st(v1, 0, (byte*)b2); +#endif + result = (0 == std::memcmp(b1+3, b2, 16)); + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_POWER7_AVAILABLE +} +#endif + +int CPU_ProbePower8() +{ +#if (CRYPTOPP_POWER8_AVAILABLE) +# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY) + + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile int result = true; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpSIGILL)) + result = false; + else + { + CRYPTOPP_ALIGN_DATA(16) // Non-const due to XL C/C++ + byte b1[19] = {-1, -1, -1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; + CRYPTOPP_ALIGN_DATA(16) byte b2[16]; +#if defined(CRYPTOPP_XLC_VERSION) + const uint8x16_p8 v1 = vec_xl(0, reinterpret_cast(b1)+3); + vec_xst(v1, 0, reinterpret_cast(b2)); +#elif defined(CRYPTOPP_GCC_VERSION) + const uint8x16_p8 v1 = vec_vsx_ld(0, b1+3); + vec_vsx_st(v1, 0, b2); +#endif + result = (0 == std::memcmp(b1+3, b2, 16)); + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_ALTIVEC_AVAILABLE +} + +int CPU_ProbeAES() +{ +#if (CRYPTOPP_POWER8_AES_AVAILABLE) +# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY) + + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile int result = true; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpSIGILL)) + result = false; + else + { + CRYPTOPP_ALIGN_DATA(16) // Non-const due to XL C/C++ + byte key[16] = {0xA0, 0xFA, 0xFE, 0x17, 0x88, 0x54, 0x2c, 0xb1, 0x23, 0xa3, 0x39, 0x39, 0x2a, 0x6c, 0x76, 0x05}; + CRYPTOPP_ALIGN_DATA(16) // Non-const due to XL C/C++ + byte state[16] = {0x19, 0x3d, 0xe3, 0xb3, 0xa0, 0xf4, 0xe2, 0x2b, 0x9a, 0xc6, 0x8d, 0x2a, 0xe9, 0xf8, 0x48, 0x08}; + CRYPTOPP_ALIGN_DATA(16) byte r[16] = {-1}, z[16] = {}; +#if defined(CRYPTOPP_XLC_VERSION) + uint8x16_p8 k = vec_xl(0, reinterpret_cast(key)); + uint8x16_p8 s = vec_xl(0, reinterpret_cast(state)); + s = __vncipher(s, k); + s = __vncipherlast(s, k); + vec_xst(s, 0, reinterpret_cast(r)); +#elif defined(CRYPTOPP_GCC_VERSION) + uint64x2_p8 k = (uint64x2_p8)vec_xl(0, key); + uint64x2_p8 s = (uint64x2_p8)vec_xl(0, state); + s = __builtin_crypto_vncipher(s, k); + s = __builtin_crypto_vncipherlast(s, k); + vec_xst((uint8x16_p8)s, 0, r); +#endif + result = (0 != std::memcmp(r, z, 16)); + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_ALTIVEC_AVAILABLE +} + +int CPU_ProbeSHA1() +{ +#if (CRYPTOPP_ALTIVEC_AVAILABLE) +# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY) + + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile int result = false; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpSIGILL)) + result = false; + else + { + + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_ALTIVEC_AVAILABLE +} + +int CPU_ProbeSHA2() +{ +#if (CRYPTOPP_ALTIVEC_AVAILABLE) +# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY) + + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile int result = false; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpSIGILL)) + result = false; + else + { + + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_ALTIVEC_AVAILABLE +} + +NAMESPACE_END diff --git a/validat1.cpp b/validat1.cpp index e9f4ab91..ff626eba 100644 --- a/validat1.cpp +++ b/validat1.cpp @@ -342,14 +342,7 @@ bool TestSettings() #endif std::cout << std::endl; -#ifdef CRYPTOPP_CPUID_AVAILABLE - bool hasSSE2 = HasSSE2(); - bool hasSSSE3 = HasSSSE3(); - bool hasSSE41 = HasSSE41(); - bool hasSSE42 = HasSSE42(); - bool isP4 = IsP4(); - int cacheLineSize = GetCacheLineSize(); - + const int cacheLineSize = GetCacheLineSize(); if (cacheLineSize < 16 || cacheLineSize > 256 || !IsPowerOf2(cacheLineSize)) { std::cout << "FAILED: "; @@ -357,10 +350,18 @@ bool TestSettings() } else std::cout << "passed: "; + std::cout << "cacheLineSize == " << cacheLineSize << std::endl; + +#ifdef CRYPTOPP_CPUID_AVAILABLE + bool hasSSE2 = HasSSE2(); + bool hasSSSE3 = HasSSSE3(); + bool hasSSE41 = HasSSE41(); + bool hasSSE42 = HasSSE42(); + bool isP4 = IsP4(); std::cout << "hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSSE3 << ", hasSSE4.1 == " << hasSSE41 << ", hasSSE4.2 == " << hasSSE42; std::cout << ", hasAESNI == " << HasAESNI() << ", hasCLMUL == " << HasCLMUL() << ", hasRDRAND == " << HasRDRAND() << ", hasRDSEED == " << HasRDSEED(); - std::cout << ", hasSHA == " << HasSHA() << ", isP4 == " << isP4 << ", cacheLineSize == " << cacheLineSize << std::endl; + std::cout << ", hasSHA == " << HasSHA() << ", isP4 == " << isP4; #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) bool hasNEON = HasNEON(); @@ -373,6 +374,18 @@ bool TestSettings() std::cout << "passed: "; std::cout << "hasNEON == " << hasNEON << ", hasCRC32 == " << hasCRC32 << ", hasPMULL == " << hasPMULL; std::cout << ", hasAES == " << hasAES << ", hasSHA1 == " << hasSHA1 << ", hasSHA2 == " << hasSHA2 << std::endl; + +#elif (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64) + bool hasAltivec = HasAltivec(); + bool hasPower8 = HasPower8(); + bool hasAES = HasAES(); + bool hasSHA1 = HasSHA1(); + bool hasSHA2 = HasSHA2(); + + std::cout << "passed: "; + std::cout << "hasAltivec == " << hasAltivec << ", hasPower8 == " << hasPower8; + std::cout << ", hasAES == " << hasAES << ", hasSHA1 == " << hasSHA1 << ", hasSHA2 == " << hasSHA2 << std::endl; + #endif if (!pass)