diff --git a/Filelist.txt b/Filelist.txt index c7f83a90..253e85bc 100644 --- a/Filelist.txt +++ b/Filelist.txt @@ -82,6 +82,8 @@ cryptlib.h cryptlib.vcxproj cryptlib.vcxproj.filters cryptopp.rc +darn.cpp +darn.h datatest.cpp default.cpp default.h @@ -228,8 +230,9 @@ polynomi.cpp polynomi.h ppc_power7.cpp ppc_power8.cpp -ppc_simd.h +ppc_power9.cpp ppc_simd.cpp +ppc_simd.h pssr.cpp pssr.h pubkey.cpp @@ -534,9 +537,13 @@ TestPrograms/test_arm_sha.cxx TestPrograms/test_crypto_v84.cxx TestPrograms/test_cxx.cxx TestPrograms/test_newlib.cxx +TestPrograms/test_ppc_aes.cxx TestPrograms/test_ppc_altivec.cxx TestPrograms/test_ppc_power7.cxx TestPrograms/test_ppc_power8.cxx +TestPrograms/test_ppc_power9.cxx +TestPrograms/test_ppc_sha.cxx +TestPrograms/test_ppc_vmull.cxx TestPrograms/test_pthreads.cxx TestPrograms/test_x86_aes.cxx TestPrograms/test_x86_avx.cxx diff --git a/GNUmakefile b/GNUmakefile index d1842d41..6f8c1155 100755 --- a/GNUmakefile +++ b/GNUmakefile @@ -683,7 +683,7 @@ ifeq ($(DETECT_FEATURES),1) ##################################################################### # Looking for a POWER8 option - TPROG = TestPrograms/test_ppc_power8.cxx + TPROG = TestPrograms/test_ppc_power9.cxx TOPT = $(POWER9_FLAG) HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l) ifeq ($(strip $(HAVE_OPT)),0) @@ -691,6 +691,7 @@ ifeq ($(DETECT_FEATURES),1) BLAKE2B_FLAG = $(POWER9_FLAG) BLAKE2S_FLAG = $(POWER9_FLAG) CHACHA_FLAG = $(POWER9_FLAG) + DARN_FLAG = $(POWER9_FLAG) SM4_FLAG = $(POWER9_FLAG) SIMON64_FLAG = $(POWER9_FLAG) SIMON128_FLAG = $(POWER9_FLAG) @@ -801,6 +802,8 @@ ifeq ($(DETECT_FEATURES),1) CXXFLAGS += -DCRYPTOPP_DISABLE_POWER7 else ifeq ($(POWER9_FLAG)$(POWER8_FLAG),) CXXFLAGS += -DCRYPTOPP_DISABLE_POWER8 + else ifeq ($(POWER9_FLAG),) + CXXFLAGS += -DCRYPTOPP_DISABLE_POWER9 endif ##################################################################### @@ -1534,6 +1537,10 @@ chacha_avx.o : chacha_avx.cpp cham_simd.o : cham_simd.cpp $(CXX) $(strip $(CXXFLAGS) $(CHAM_FLAG) -c) $< +# Power9 available +darn.o : darn.cpp + $(CXX) $(strip $(CXXFLAGS) $(DARN_FLAG) -c) $< + # SSE2 on i586 sse_simd.o : sse_simd.cpp $(CXX) $(strip $(CXXFLAGS) $(SSE_FLAG) -c) $< @@ -1566,6 +1573,10 @@ ppc_power7.o : ppc_power7.cpp ppc_power8.o : ppc_power8.cpp $(CXX) $(strip $(CXXFLAGS) $(POWER8_FLAG) -c) $< +# Power9 available +ppc_power9.o : ppc_power9.cpp + $(CXX) $(strip $(CXXFLAGS) $(POWER9_FLAG) -c) $< + # AESNI or ARMv7a/ARMv8a available rijndael_simd.o : rijndael_simd.cpp $(CXX) $(strip $(CXXFLAGS) $(AES_FLAG) -c) $< diff --git a/TestPrograms/test_ppc_power9.cxx b/TestPrograms/test_ppc_power9.cxx new file mode 100644 index 00000000..82e1e755 --- /dev/null +++ b/TestPrograms/test_ppc_power9.cxx @@ -0,0 +1,33 @@ +#include +int main(int argc, char* argv[]) +{ + const unsigned char b = (unsigned char)argc; + const unsigned int r = (0xf << 24) | (0x3 << 16) | (0xf << 8) | (0x3 << 0); +#if defined(__clang__) + bool x = __builtin_altivec_byte_in_range(b, r); +#elif defined(__GNUC__) + bool x = __builtin_byte_in_range(b, r); +#else + int XXX[-1]; +#endif + +#if UINTPTR_MAX == 0xffffffffffffffffULL +# if defined(__clang__) + unsigned long long y = __builtin_altivec_darn(); +# elif defined(__GNUC__) + unsigned long long y = __builtin_darn(); +# else + int XXX[-1]; +# endif +#else +# if defined(__clang__) + unsigned int y = __builtin_altivec_darn_32(); +# elif defined(__GNUC__) + unsigned int y = __builtin_darn_32(); +# else + int XXX[-1]; +# endif +#endif + + return 0; +} diff --git a/bench1.cpp b/bench1.cpp index 6e8bc94e..51b4be41 100644 --- a/bench1.cpp +++ b/bench1.cpp @@ -6,13 +6,19 @@ #include "validate.h" #include "cpu.h" -#include "drbg.h" #include "factory.h" #include "algparam.h" #include "argnames.h" #include "smartptr.h" #include "stdcpp.h" +#include "osrng.h" +#include "drbg.h" +#include "darn.h" +#include "mersenne.h" +#include "rdrand.h" +#include "padlkrng.h" + #include #include #include @@ -437,6 +443,10 @@ void Benchmark1(double t, double hertz) BenchMarkByNameKeyLess("RDRAND"); if (HasRDSEED()) BenchMarkByNameKeyLess("RDSEED"); +#endif +#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64) + if (HasDARN()) + BenchMarkByNameKeyLess("DARN"); #endif BenchMarkByNameKeyLess("AES/OFB RNG"); BenchMarkByNameKeyLess("Hash_DRBG(SHA1)"); diff --git a/config.h b/config.h index b1505a8c..333c0bcd 100644 --- a/config.h +++ b/config.h @@ -779,35 +779,49 @@ NAMESPACE_END # undef CRYPTOPP_DISABLE_ALTIVEC # undef CRYPTOPP_DISABLE_POWER7 # undef CRYPTOPP_DISABLE_POWER8 +# undef CRYPTOPP_DISABLE_POWER9 # define CRYPTOPP_DISABLE_ALTIVEC 1 # define CRYPTOPP_DISABLE_POWER7 1 # define CRYPTOPP_DISABLE_POWER8 1 +# define CRYPTOPP_DISABLE_POWER9 1 #endif // An old Apple G5 with GCC 4.01 has AltiVec, but its only Power4 or so. #if !defined(CRYPTOPP_ALTIVEC_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ALTIVEC) # if defined(_ARCH_PWR4) || defined(__ALTIVEC__) || \ - (CRYPTOPP_XLC_VERSION >= 100000) || (CRYPTOPP_GCC_VERSION >= 40001) + (CRYPTOPP_XLC_VERSION >= 100000) || (CRYPTOPP_GCC_VERSION >= 40001) || \ + (CRYPTOPP_CLANG_VERSION >= 20900) # define CRYPTOPP_ALTIVEC_AVAILABLE 1 # endif #endif // We need Power7 for unaligned loads and stores #if !defined(CRYPTOPP_POWER7_AVAILABLE) && !defined(CRYPTOPP_DISABLE_POWER7) && defined(CRYPTOPP_ALTIVEC_AVAILABLE) -# if defined(_ARCH_PWR7) || (CRYPTOPP_XLC_VERSION >= 100000) || (CRYPTOPP_GCC_VERSION >= 40100) +# if defined(_ARCH_PWR7) || (CRYPTOPP_XLC_VERSION >= 100000) || \ + (CRYPTOPP_GCC_VERSION >= 40100) || (CRYPTOPP_CLANG_VERSION >= 30100) # define CRYPTOPP_POWER7_AVAILABLE 1 # endif #endif // We need Power8 for in-core crypto and 64-bit vector types #if !defined(CRYPTOPP_POWER8_AVAILABLE) && !defined(CRYPTOPP_DISABLE_POWER8) && defined(CRYPTOPP_POWER7_AVAILABLE) -# if defined(_ARCH_PWR8) || (CRYPTOPP_XLC_VERSION >= 130000) || (CRYPTOPP_GCC_VERSION >= 40800) +# if defined(_ARCH_PWR8) || (CRYPTOPP_XLC_VERSION >= 130000) || \ + (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_CLANG_VERSION >= 70000) # define CRYPTOPP_POWER8_AVAILABLE 1 # endif #endif +// Power9 for random numbers +#if !defined(CRYPTOPP_POWER9_AVAILABLE) && !defined(CRYPTOPP_DISABLE_POWER9) && defined(CRYPTOPP_POWER8_AVAILABLE) +# if defined(_ARCH_PWR9) || (CRYPTOPP_XLC_VERSION >= 130200) || \ + (CRYPTOPP_GCC_VERSION >= 70000) || (CRYPTOPP_CLANG_VERSION >= 80000) +# define CRYPTOPP_POWER9_AVAILABLE 1 +# endif +#endif + #if !defined(CRYPTOPP_POWER8_AES_AVAILABLE) && !defined(CRYPTOPP_DISABLE_POWER8_AES) && defined(CRYPTOPP_POWER8_AVAILABLE) -# if defined(__CRYPTO__) || defined(_ARCH_PWR8) || (CRYPTOPP_XLC_VERSION >= 130000) || (CRYPTOPP_GCC_VERSION >= 40800) +# if defined(__CRYPTO__) || defined(_ARCH_PWR8) || (CRYPTOPP_XLC_VERSION >= 130000) || \ + (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_CLANG_VERSION >= 70000) //# define CRYPTOPP_POWER8_CRC_AVAILABLE 1 # define CRYPTOPP_POWER8_AES_AVAILABLE 1 # define CRYPTOPP_POWER8_VMULL_AVAILABLE 1 @@ -815,7 +829,7 @@ NAMESPACE_END # endif #endif -#endif // PPC, PPC64 +#endif // PPC32, PPC64 // ***************** Miscellaneous ******************** diff --git a/cpu.cpp b/cpu.cpp index 92d525da..63289e8d 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -827,20 +827,25 @@ bool CRYPTOPP_SECTION_INIT g_PowerpcDetectionDone = false; bool CRYPTOPP_SECTION_INIT g_hasAltivec = false; bool CRYPTOPP_SECTION_INIT g_hasPower7 = false; bool CRYPTOPP_SECTION_INIT g_hasPower8 = false; +bool CRYPTOPP_SECTION_INIT g_hasPower9 = false; bool CRYPTOPP_SECTION_INIT g_hasAES = false; bool CRYPTOPP_SECTION_INIT g_hasPMULL = false; bool CRYPTOPP_SECTION_INIT g_hasSHA256 = false; bool CRYPTOPP_SECTION_INIT g_hasSHA512 = false; +bool CRYPTOPP_SECTION_INIT g_hasDARN = false; word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; extern bool CPU_ProbeAltivec(); extern bool CPU_ProbePower7(); extern bool CPU_ProbePower8(); +extern bool CPU_ProbePower9(); extern bool CPU_ProbeAES(); extern bool CPU_ProbePMULL(); extern bool CPU_ProbeSHA256(); extern bool CPU_ProbeSHA512(); +extern bool CPU_ProbeDARN(); +// Linux defines #ifndef PPC_FEATURE_HAS_ALTIVEC # define PPC_FEATURE_HAS_ALTIVEC 0x10000000 #endif @@ -850,17 +855,39 @@ extern bool CPU_ProbeSHA512(); #ifndef PPC_FEATURE2_ARCH_2_07 # define PPC_FEATURE2_ARCH_2_07 0x80000000 #endif +#ifndef PPC_FEATURE2_ARCH_3_00 +# define PPC_FEATURE2_ARCH_3_00 0x00800000 +#endif #ifndef PPC_FEATURE2_VEC_CRYPTO # define PPC_FEATURE2_VEC_CRYPTO 0x02000000 #endif +// AIX defines. We used to just call __power_7_andup() +// and friends but at Power9, too many compilers were +// missing __power_9_andup(). Instead we switched to +// a pattern similar to OpenSSL caps testing. +#ifndef __power_6_andup +# define __power_6_andup() __power_set(0xffffffffU<<14) +#endif +#ifndef __power_7_andup +# define __power_7_andup() __power_set(0xffffffffU<<15) +#endif +#ifndef __power_8_andup +# define __power_8_andup() __power_set(0xffffffffU<<16) +#endif +#ifndef __power_9_andup +# define __power_9_andup() __power_set(0xffffffffU<<17) +#endif + +// AIX first supported Altivec at Power6, though it +// was available much earlier for other vendors. inline bool CPU_QueryAltivec() { #if defined(__linux__) if ((getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC) != 0) return true; #elif defined(_AIX) - if (__power_vmx() != 0) + if (__power_6_andup() != 0) return true; #elif defined(__APPLE__) && defined(__POWERPC__) unsigned int device, version; @@ -896,6 +923,19 @@ inline bool CPU_QueryPower8() return false; } +inline bool CPU_QueryPower9() +{ + // Power9 and ISA 3.0. +#if defined(__linux__) + if ((getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_3_00) != 0) + return true; +#elif defined(_AIX) + if (__power_9_andup() != 0) + return true; +#endif + return false; +} + inline bool CPU_QueryAES() { // Power8 and ISA 2.07 provide in-core crypto. Glibc @@ -951,6 +991,20 @@ inline bool CPU_QuerySHA512() return false; } +// Power9 random number generator +inline bool CPU_QueryDARN() +{ + // Power9 and ISA 3.0 provide DARN. +#if defined(__linux__) + if ((getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_3_00) != 0) + return true; +#elif defined(_AIX) + if (__power_9_andup() != 0) + return true; +#endif + return false; +} + void DetectPowerpcFeatures() { // The CPU_ProbeXXX's return false for OSes which @@ -958,10 +1012,12 @@ void DetectPowerpcFeatures() g_hasAltivec = CPU_QueryAltivec() || CPU_ProbeAltivec(); g_hasPower7 = CPU_QueryPower7() || CPU_ProbePower7(); g_hasPower8 = CPU_QueryPower8() || CPU_ProbePower8(); + g_hasPower9 = CPU_QueryPower9() || CPU_ProbePower9(); g_hasPMULL = CPU_QueryPMULL() || CPU_ProbePMULL(); g_hasAES = CPU_QueryAES() || CPU_ProbeAES(); g_hasSHA256 = CPU_QuerySHA256() || CPU_ProbeSHA256(); g_hasSHA512 = CPU_QuerySHA512() || CPU_ProbeSHA512(); + g_hasDARN = CPU_QueryDARN() || CPU_ProbeDARN(); #if defined(_AIX) && defined(SC_L1C_DLS) // /usr/include/sys/systemcfg.h diff --git a/cpu.h b/cpu.h index 648c859f..220eedfd 100644 --- a/cpu.h +++ b/cpu.h @@ -591,10 +591,12 @@ extern bool g_PowerpcDetectionDone; extern bool g_hasAltivec; extern bool g_hasPower7; extern bool g_hasPower8; +extern bool g_hasPower9; extern bool g_hasAES; extern bool g_hasPMULL; extern bool g_hasSHA256; extern bool g_hasSHA512; +extern bool g_hasDARN; extern word32 g_cacheLineSize; void CRYPTOPP_API DetectPowerpcFeatures(); #endif // CRYPTOPP_DOXYGEN_PROCESSING @@ -608,7 +610,7 @@ void CRYPTOPP_API DetectPowerpcFeatures(); /// \details Runtime support requires compile time support. When compiling with GCC, you may /// need to compile with -mcpu=power4; while IBM XL C/C++ compilers require /// -qarch=pwr6 -qaltivec. Also see PowerPC's _ALTIVEC_ preprocessor macro. -/// \details Atilvec was first available on Power4 platforms. However Crypto++ releies heavily +/// \details Atilvec was first available in the early 2000's. However Crypto++ releies heavily /// on unaligned loads and stores which is a Power7 feature. If the platform lacks Power7 /// extensions, then the GNUmakefile sets -DCRYPTOPP_DISABLE_POWER7. /// \note This function is only available on PowerPC and PowerPC-64 platforms @@ -619,13 +621,13 @@ inline bool HasAltivec() return g_hasAltivec; } -/// \brief Determine if a PowerPC processor has Power8 available -/// \returns true if the hardware is capable of Power8 at runtime, false otherwise. +/// \brief Determine if a PowerPC processor has Power7 available +/// \returns true if the hardware is capable of Power7 at runtime, false otherwise. /// \details Altivec instructions are available under most modern PowerPCs. /// \details Runtime support requires compile time support. When compiling with GCC, you may -/// need to compile with -mcpu=power8; while IBM XL C/C++ compilers require -/// -qarch=pwr8 -qaltivec. Also see PowerPC's _ALTIVEC_ preprocessor macro. -/// \details Atilvec was first available on Power4 platforms. However Crypto++ releies heavily +/// need to compile with -mcpu=power7; while IBM XL C/C++ compilers require +/// -qarch=pwr7 -qaltivec. Also see PowerPC's _ALTIVEC_ preprocessor macro. +/// \details Atilvec was first available in the early 2000's. However Crypto++ releies heavily /// on unaligned loads and stores which is a Power7 feature. If the platform lacks Power7 /// extensions, then the GNUmakefile sets -DCRYPTOPP_DISABLE_POWER7. /// \note This function is only available on PowerPC and PowerPC-64 platforms @@ -642,7 +644,7 @@ inline bool HasPower7() /// \details Runtime support requires compile time support. When compiling with GCC, you may /// need to compile with -mcpu=power8; while IBM XL C/C++ compilers require /// -qarch=pwr8 -qaltivec. Also see PowerPC's _ALTIVEC_ preprocessor macro. -/// \details Atilvec was first available on Power4 platforms. However Crypto++ releies heavily +/// \details Atilvec was first available in the early 2000's. However Crypto++ releies heavily /// on unaligned loads and stores which is a Power7 feature. If the platform lacks Power7 /// extensions, then the GNUmakefile sets -DCRYPTOPP_DISABLE_POWER7. /// \note This function is only available on PowerPC and PowerPC-64 platforms @@ -653,6 +655,23 @@ inline bool HasPower8() return g_hasPower8; } +/// \brief Determine if a PowerPC processor has Power9 available +/// \returns true if the hardware is capable of Power9 at runtime, false otherwise. +/// \details Altivec instructions are available under most modern PowerPCs. +/// \details Runtime support requires compile time support. When compiling with GCC, you may +/// need to compile with -mcpu=power9; while IBM XL C/C++ compilers require +/// -qarch=pwr9 -qaltivec. Also see PowerPC's _ALTIVEC_ preprocessor macro. +/// \details Atilvec was first available in the early 2000's. However Crypto++ releies heavily +/// on unaligned loads and stores which is a Power7 feature. If the platform lacks Power7 +/// extensions, then the GNUmakefile sets -DCRYPTOPP_DISABLE_POWER7. +/// \note This function is only available on PowerPC and PowerPC-64 platforms +inline bool HasPower9() +{ + if (!g_PowerpcDetectionDone) + DetectPowerpcFeatures(); + return g_hasPower9; +} + /// \brief Determine if a PowerPC processor has AES available /// \returns true if the hardware is capable of AES at runtime, false otherwise. /// \details AES is part of the in-crypto extensions on Power8 and Power9. @@ -709,6 +728,23 @@ inline bool HasSHA512() return g_hasSHA512; } +/// \brief Determine if a PowerPC processor has DARN available +/// \returns true if the hardware is capable of DARN at runtime, false otherwise. +/// \details Altivec instructions are available under most modern PowerPCs. +/// \details Runtime support requires compile time support. When compiling with GCC, you may +/// need to compile with -mcpu=power9; while IBM XL C/C++ compilers require +/// -qarch=pwr9 -qaltivec. Also see PowerPC's _ALTIVEC_ preprocessor macro. +/// \details Atilvec was first available in the early 2000's. However Crypto++ relies heavily +/// on unaligned loads and stores which is a Power7 feature. If the platform lacks Power7 +/// extensions, then the GNUmakefile sets -DCRYPTOPP_DISABLE_POWER7. +/// \note This function is only available on PowerPC and PowerPC-64 platforms +inline bool HasDARN() +{ + if (!g_PowerpcDetectionDone) + DetectPowerpcFeatures(); + return g_hasDARN; +} + /// \brief Provides the cache line size /// \returns lower bound on the size of a cache line in bytes, if available /// \details GetCacheLineSize() returns the lower bound on the size of a cache line, if it diff --git a/darn.cpp b/darn.cpp new file mode 100644 index 00000000..5c40ba78 --- /dev/null +++ b/darn.cpp @@ -0,0 +1,214 @@ +// darn.cpp - written and placed in public domain by Jeffrey Walton + +#include "pch.h" +#include "config.h" +#include "cryptlib.h" +#include "secblock.h" +#include "darn.h" +#include "cpu.h" + +// At the moment only GCC 7.0 (and above) seems to support __builtin_darn() +// and __builtin_darn_32(). Clang 7.0 does not provide them. XLC is unknown, +// but there are no hits when searching IBM's site. To cover more platforms +// we provide GCC inline assembly like we do with RDRAND and RDSEED. +// Platforms that don't support GCC inline assembly or the builtin will fail +// the compile. + +#if defined(__GNUC__) || defined(__IBM_GCC_ASM) +# define GCC_DARN_ASM_AVAILABLE 1 +#endif + +///////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////// + +NAMESPACE_BEGIN(CryptoPP) + +#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64) + +// *************************** 32-bit *************************** // + +#if (CRYPTOPP_BOOL_PPC32) + +// Fills 4 bytes, buffer must be aligned +inline void DARN32(void* output) +{ + CRYPTOPP_ASSERT(IsAlignedOn(output, GetAlignmentOf())); + word32* ptr = reinterpret_cast(output); + +#if defined(GCC_DARN_ASM_AVAILABLE) + // This is "darn r3, 0". When L=0 a 32-bit conditioned word + // is returned. On failure 0xffffffffffffffff is returned. + // The Power manual recommends only checking the low 32-bit + // word for this case. See Power ISA 3.0 specification, p. 78. + do + { + __asm__ __volatile__ ( + #if (CRYPTOPP_BIG_ENDIAN) + ".byte 0x7c, 0x60, 0x05, 0xe6 \n\t" // r3 = darn 3, 0 + "mr %0, 3 \n\t" // val = r3 + #else + ".byte 0xe6, 0x05, 0x60, 0x7c \n\t" // r3 = darn 3, 0 + "mr %0, 3 \n\t" // val = r3 + #endif + : "=r" (*ptr) : : "r3" + ); + } while (*ptr == 0xFFFFFFFFu); +#elif defined(_ARCH_PWR9) + // This is probably going to break some platforms. + // We will deal with them as we encounter them. + *ptr = __builtin_darn_32(); +#else + int XXX[-1]; +#endif +} +#endif // PPC32 + +// *************************** 64-bit *************************** // + +#if (CRYPTOPP_BOOL_PPC64) + +// Fills 8 bytes, buffer must be aligned +inline void DARN64(void* output) +{ + CRYPTOPP_ASSERT(IsAlignedOn(output, GetAlignmentOf())); + word64* ptr = reinterpret_cast(output); + +#if defined(GCC_DARN_ASM_AVAILABLE) + // This is "darn r3, 1". When L=1 a 64-bit conditioned word + // is returned. On failure 0xffffffffffffffff is returned. + // See Power ISA 3.0 specification, p. 78. + do + { + __asm__ __volatile__ ( + #if (CRYPTOPP_BIG_ENDIAN) + ".byte 0x7c, 0x61, 0x05, 0xe6 \n\t" // r3 = darn 3, 1 + "mr %0, 3 \n\t" // val = r3 + #else + ".byte 0xe6, 0x05, 0x61, 0x7c \n\t" // r3 = darn 3, 1 + "mr %0, 3 \n\t" // val = r3 + #endif + : "=r" (*ptr) : : "r3" + ); + } while (*ptr == 0xFFFFFFFFFFFFFFFFull); +#elif defined(_ARCH_PWR9) + // This is probably going to break some platforms. + // We will deal with them as we encounter them. + *ptr = __builtin_darn(); +#else + int XXX[-1]; +#endif +} +#endif // PPC64 + +// ************************ Standard C++ ************************ // + +DARN::DARN() +{ + if (!HasDARN()) + throw DARN_Err("HasDARN"); +} + +void DARN::GenerateBlock(byte *output, size_t size) +{ + CRYPTOPP_ASSERT((output && size) || !(output || size)); + if (size == 0) return; + size_t i = 0; + +#if (CRYPTOPP_BOOL_PPC64) + + word64 val; + i = reinterpret_cast(output) & 0x7; + + if (i != 0) + { + DARN64(&val); + std::memcpy(output, &val, i); + + output += i; + size -= i; + } + + // Output is aligned + for (i = 0; i < size/8; i++) + DARN64(output+i*8); + + output += i*8; + size -= i*8; + + if (size) + { + DARN64(&val); + std::memcpy(output, &val, size); + } + +#elif (CRYPTOPP_BOOL_PPC32) + + word32 val; + i = reinterpret_cast(output) & 0x3; + + if (i != 0) + { + DARN32(&val); + std::memcpy(output, &val, i); + + output += i; + size -= i; + } + + for (i = 0; i < size/4; i++) + DARN32(output+i*4); + + output += 4; + size -= 4; + + if (size) + { + DARN32(&val); + std::memcpy(output, &val, size); + } + +#else + // No suitable compiler found + CRYPTOPP_UNUSED(output); + throw NotImplemented("DARN: failed to find a suitable implementation"); +#endif +} + +void DARN::DiscardBytes(size_t n) +{ + // RoundUpToMultipleOf is used because a full word is read, and its cheaper + // to discard full words. There's no sense in dealing with tail bytes. + FixedSizeSecBlock discard; + n = RoundUpToMultipleOf(n, sizeof(word64)); + + size_t count = STDMIN(n, discard.SizeInBytes()); + while (count) + { + GenerateBlock(discard.BytePtr(), count); + n -= count; + count = STDMIN(n, discard.SizeInBytes()); + } +} + +#else // not PPC32 or PPC64 + +DARN::DARN() +{ + throw DARN_Err("HasDARN"); +} + +void DARN::GenerateBlock(byte *output, size_t size) +{ + // Constructor will throw, should not get here + CRYPTOPP_UNUSED(output); CRYPTOPP_UNUSED(size); +} + +void DARN::DiscardBytes(size_t n) +{ + // Constructor will throw, should not get here + CRYPTOPP_UNUSED(n); +} + +#endif // PPC32 or PPC64 + +NAMESPACE_END diff --git a/darn.h b/darn.h new file mode 100644 index 00000000..66b69de3 --- /dev/null +++ b/darn.h @@ -0,0 +1,87 @@ +// darn.h - written and placed in public domain by Jeffrey Walton +// DARN requires POWER9/ISA 3.0. + +// At the moment only GCC 7.0 (and above) seems to support __builtin_darn() +// and __builtin_darn_32(). Clang 7.0 does not provide them. XLC is unknown, +// but there are no hits when searching IBM's site. To cover more platforms +// we provide GCC inline assembly like we do with RDRAND and RDSEED. +// Platforms that don't support GCC inline assembly or the builtin will fail +// the compile. Also see +// https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-ISA-3_002e0.html + +/// \file darn.h +/// \brief Classes for DARN RNG +/// \since Crypto++ 8.0 + +#ifndef CRYPTOPP_DARN_H +#define CRYPTOPP_DARN_H + +#include "cryptlib.h" + +NAMESPACE_BEGIN(CryptoPP) + +/// \brief Exception thrown when a DARN generator encounters +/// a generator related error. +/// \sa Power +/// ISA Version 3.0B +/// \since Crypto++ 8.0 +class DARN_Err : public Exception +{ +public: + DARN_Err(const std::string &operation) + : Exception(OTHER_ERROR, "DARN: " + operation + " operation failed") {} +}; + +/// \brief Hardware generated random numbers using DARN instruction +/// \details DARN() provides access to Power9's random number generator. +/// \details According to Power ISA 3.0B manual, the random number generator +/// provided by this instruction is NIST SP800-90B and SP800-90C compliant to +/// the extent possible given the completeness of the standards at the time +/// the hardware is designed. The random number generator provides a minimum +/// of 0.5 bits of entropy per bit. +/// \par Wraps +/// darn instruction +/// \sa Power +/// ISA Version 3.0B, MaurerRandomnessTest() for random bit generators +/// \since Crypto++ 8.0 +class DARN : public RandomNumberGenerator +{ +public: + CRYPTOPP_STATIC_CONSTEXPR const char* StaticAlgorithmName() { return "DARN"; } + + virtual ~DARN() {} + + /// \brief Construct a DARN generator + /// \throws DARN_Err if the random number generator is not available + DARN(); + + /// \brief Generate random array of bytes + /// \param output the byte buffer + /// \param size the length of the buffer, in bytes + virtual void GenerateBlock(byte *output, size_t size); + + /// \brief Generate and discard n bytes + /// \param n the number of bytes to generate and discard + /// \details the RDSEED generator discards words, not bytes. If n is + /// not a multiple of a machine word, then it is rounded up to + /// that size. + virtual void DiscardBytes(size_t n); + + /// \brief Update RNG state with additional unpredictable values + /// \param input unused + /// \param length unused + /// \details The operation is a nop for this generator. + virtual void IncorporateEntropy(const byte *input, size_t length) + { + // Override to avoid the base class' throw. + CRYPTOPP_UNUSED(input); CRYPTOPP_UNUSED(length); + } + + std::string AlgorithmProvider() const { + return "Power9"; + } +}; + +NAMESPACE_END + +#endif // CRYPTOPP_DARN_H diff --git a/ppc_power9.cpp b/ppc_power9.cpp new file mode 100644 index 00000000..970cc19b --- /dev/null +++ b/ppc_power9.cpp @@ -0,0 +1,129 @@ +// ppc_power9.cpp - written and placed in the public domain by +// Jeffrey Walton, Uri Blumenthal and Marcel Raad. +// +// This source file uses intrinsics and built-ins to gain access to +// Power9 instructions. A separate source file is needed because +// additional CXXFLAGS are required to enable the appropriate +// instructions sets in some build configurations. + +#include "pch.h" +#include "config.h" + +#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY +# include +# include +#endif + +#if defined(_ARCH_PWR9) +# include "ppc_simd.h" +#endif + +// Squash MS LNK4221 and libtool warnings +extern const char PPC_POWER9_FNAME[] = __FILE__; + +NAMESPACE_BEGIN(CryptoPP) + +// ************************* Feature Probes ************************* // + +#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY +extern "C" { + typedef void (*SigHandler)(int); + + static jmp_buf s_jmpSIGILL; + static void SigIllHandler(int) + { + longjmp(s_jmpSIGILL, 1); + } +} +#endif // CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY + +#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64) + +bool CPU_ProbePower9() +{ +#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) + return false; +#elif defined(CRYPTOPP_POWER9_AVAILABLE) +# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY) + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721954 + volatile int result = true; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpSIGILL)) + result = false; + else + { + // This is "darn r3, 0". It provides a conditioned 32-bit + // word. It is available on both 32-bit and 64-bit. +#if CRYPTOPP_BIG_ENDIAN + __asm__ __volatile__ (".byte 0x7c, 0x60, 0x05, 0xe6 \n" : : : "r3"); +#else + __asm__ __volatile__ (".byte 0xe6, 0x05, 0x60, 0x7c \n" : : : "r3"); +#endif + result = true; + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // _ARCH_PWR9 +} + +// The DARN probe is not guarded with a preprocessor macro at the moment. We don't +// use CRYPTOPP_POWER9_AVAILABLE because old compilers, like GCC 4.8 on CentOS 7, +// will report NO even though we can produce the random numbers. Other Power9 +// implementations which use builtins will use the preprocessor macro guard. This +// strategy also gets into a situation where Power9 is not available but DARN is. +bool CPU_ProbeDARN() +{ +#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) + return false; +#else +# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY) + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721954 + volatile int result = true; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpSIGILL)) + result = false; + else + { + // "darn r3, 1" in big- and little-endian +#if CRYPTOPP_BIG_ENDIAN + __asm__ __volatile__ (".byte 0x7c, 0x61, 0x05, 0xe6 \n" : : : "r3"); +#else + __asm__ __volatile__ (".byte 0xe6, 0x05, 0x61, 0x7c \n" : : : "r3"); +#endif + // If we got here without SIGILL then success + result = true; + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); + signal(SIGILL, oldHandler); + return result; +# endif +#endif // DARN +} + +#endif // PPC32 or PPC64 + +NAMESPACE_END diff --git a/ppc_simd.cpp b/ppc_simd.cpp index 07d435fd..b8182ad5 100644 --- a/ppc_simd.cpp +++ b/ppc_simd.cpp @@ -89,4 +89,5 @@ bool CPU_ProbeAltivec() } # endif // CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64 + NAMESPACE_END diff --git a/rdrand.cpp b/rdrand.cpp index baa3aa8f..15afcd93 100644 --- a/rdrand.cpp +++ b/rdrand.cpp @@ -72,7 +72,7 @@ inline void RDRAND32(void* output) #endif } -#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 +#if (CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32) // Fills 8 bytes inline void RDRAND64(void* output) { @@ -107,10 +107,10 @@ void RDRAND::GenerateBlock(byte *output, size_t size) #elif defined(GCC_RDRAND_ASM_AVAILABLE) -# if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 +# if (CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32) size_t i = 0; for (i = 0; i < size/8; i++) - RDRAND64(reinterpret_cast(output)+i); + RDRAND64(output+i*8); output += i*8; size -= i*8; @@ -124,7 +124,7 @@ void RDRAND::GenerateBlock(byte *output, size_t size) # else size_t i = 0; for (i = 0; i < size/4; i++) - RDRAND32(reinterpret_cast(output)+i); + RDRAND32(output+i*4); output += i*4; size -= i*4; @@ -178,7 +178,7 @@ inline void RDSEED32(void* output) #endif } -#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 +#if (CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32) // Fills 8 bytes inline void RDSEED64(void* output) { @@ -212,10 +212,10 @@ void RDSEED::GenerateBlock(byte *output, size_t size) MASM_RDSEED_GenerateBlock(output, size); #elif defined(GCC_RDSEED_ASM_AVAILABLE) -# if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 +# if (CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32) size_t i = 0; for (i = 0; i < size/8; i++) - RDSEED64(reinterpret_cast(output)+i); + RDSEED64(output+i*8); output += i*8; size -= i*8; @@ -229,7 +229,7 @@ void RDSEED::GenerateBlock(byte *output, size_t size) # else size_t i = 0; for (i = 0; i < size/4; i++) - RDSEED32(reinterpret_cast(output)+i); + RDSEED32(output+i*4); output += i*4; size -= i*4; @@ -273,11 +273,13 @@ RDRAND::RDRAND() void RDRAND::GenerateBlock(byte *output, size_t size) { + // Constructor will throw, should not get here CRYPTOPP_UNUSED(output); CRYPTOPP_UNUSED(size); } void RDRAND::DiscardBytes(size_t n) { + // Constructor will throw, should not get here CRYPTOPP_UNUSED(n); } @@ -288,11 +290,13 @@ RDSEED::RDSEED() void RDSEED::GenerateBlock(byte *output, size_t size) { + // Constructor will throw, should not get here CRYPTOPP_UNUSED(output); CRYPTOPP_UNUSED(size); } void RDSEED::DiscardBytes(size_t n) { + // Constructor will throw, should not get here CRYPTOPP_UNUSED(n); } diff --git a/rdrand.h b/rdrand.h index 5884765a..41e4921e 100644 --- a/rdrand.h +++ b/rdrand.h @@ -20,8 +20,8 @@ // GenerateBlock unconditionally retries and always fulfills the request. // Throughput varies wildly depending on processor and manufacturer. A Core i5 or -// Core i7 RDRAND can generate at over 200 MiB/s. Its below the theroetical -// maximum, but it takes about 5 instructions to generate, retry and store a +// Core i7 RDRAND can generate at over 200 MiB/s. It is below the theroetical +// maximum, but it takes about 5 instructions to generate, retry and store a // result. A low-end Celeron may perform RDRAND at about 7 MiB/s. RDSEED // performs at about 1/4 to 1/2 the rate of RDRAND. AMD RDRAND performed poorly // during testing with Athlon X4 845. The Bulldozer v4 only performed at 1 MiB/s. diff --git a/regtest1.cpp b/regtest1.cpp index a57ca63b..848ad885 100644 --- a/regtest1.cpp +++ b/regtest1.cpp @@ -27,6 +27,7 @@ #include "osrng.h" #include "drbg.h" +#include "darn.h" #include "mersenne.h" #include "rdrand.h" #include "padlkrng.h" @@ -132,6 +133,10 @@ void RegisterFactories1() RegisterDefaultFactoryFor(); if (HasRDSEED()) RegisterDefaultFactoryFor(); +#endif +#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64) + if (HasDARN()) + RegisterDefaultFactoryFor(); #endif RegisterDefaultFactoryFor::Encryption >("AES/OFB RNG"); RegisterDefaultFactoryFor >("Hash_DRBG(SHA1)"); diff --git a/validat3.cpp b/validat3.cpp index 94eb9d37..feecb14f 100644 --- a/validat3.cpp +++ b/validat3.cpp @@ -12,6 +12,7 @@ #include "rng.h" #include "drbg.h" +#include "darn.h" #include "osrng.h" #include "rdrand.h" #include "mersenne.h" @@ -54,6 +55,9 @@ bool ValidateAll(bool thorough) pass=TestRDRAND() && pass; pass=TestRDSEED() && pass; #endif +#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64) + pass=TestDARN() && pass; +#endif #if defined(CRYPTOPP_EXTENDED_VALIDATION) // http://github.com/weidai11/cryptopp/issues/92 pass=TestSecBlock() && pass; @@ -383,6 +387,7 @@ bool TestSettings() const bool hasAltivec = HasAltivec(); const bool hasPower7 = HasPower7(); const bool hasPower8 = HasPower8(); + const bool hasPower9 = HasPower9(); const bool hasPMULL = HasPMULL(); const bool hasAES = HasAES(); const bool hasSHA256 = HasSHA256(); @@ -390,9 +395,9 @@ bool TestSettings() std::cout << "passed: "; std::cout << "hasAltivec == " << hasAltivec << ", hasPower7 == " << hasPower7; - std::cout << ", hasPower8 == " << hasPower8 << ", hasPMULL == " << hasPMULL; - std::cout << ", hasAES == " << hasAES << ", hasSHA256 == " << hasSHA256; - std::cout << ", hasSHA512 == " << hasSHA512 << "\n"; + std::cout << ", hasPower8 == " << hasPower8 << ", hasPower9 == " << hasPower9; + std::cout << ", hasPMULL == " << hasPMULL << ", hasAES == " << hasAES; + std::cout << ", hasSHA256 == " << hasSHA256 << ", hasSHA512 == " << hasSHA512 << "\n"; #endif @@ -718,7 +723,7 @@ bool TestMersenne() #endif #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) - bool TestPadlockRNG() +bool TestPadlockRNG() { std::cout << "\nTesting Padlock RNG generator...\n\n"; @@ -877,7 +882,50 @@ bool TestRDSEED() return pass; } -#endif +#endif // x86, x32, or x64 + +#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64) +bool TestDARN() +{ + std::cout << "\nTesting DARN generator...\n\n"; + + bool pass = true; + member_ptr rng; + + try {rng.reset(new DARN);} + catch (const DARN_Err &) {} + + if (rng.get()) + { + DARN& darn = dynamic_cast(*rng.get()); + pass = Test_RandomNumberGenerator(darn) && pass; + + MaurerRandomnessTest maurer; + const unsigned int SIZE = 1024*10; + RandomNumberSource(darn, SIZE, true, new Redirector(maurer)); + + CRYPTOPP_ASSERT(0 == maurer.BytesNeeded()); + const double mv = maurer.GetTestValue(); + if (mv < 0.98f) + pass = false; + + std::ostringstream oss; + oss.flags(std::ios::fixed); + oss.precision(6); + + if (!pass) + oss << "FAILED:"; + else + oss << "passed:"; + oss << " Maurer Randomness Test returned value " << mv << "\n"; + std::cout << oss.str(); + } + else + std::cout << "DARN generator not available, skipping test.\n"; + + return pass; +} +#endif // PPC32 or PPC64 bool ValidateHashDRBG() { diff --git a/validate.h b/validate.h index 1e2beaf4..f892ca8b 100644 --- a/validate.h +++ b/validate.h @@ -33,6 +33,9 @@ bool TestRDRAND(); bool TestRDSEED(); bool TestPadlockRNG(); #endif +#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64) +bool TestDARN(); +#endif bool ValidateBaseCode(); bool ValidateEncoder(); bool ValidateCRC32();