From e2d53c4f1dfd3a95bd62a3bd063a9938601dede0 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sun, 10 Jul 2016 01:56:40 -0400 Subject: [PATCH 1/5] Disable -O5 tests for Sun Studio. The optimizer is producing bad code for BLAKE2b --- cryptest.sh | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/cryptest.sh b/cryptest.sh index dd02a697..e47ae203 100755 --- a/cryptest.sh +++ b/cryptest.sh @@ -343,6 +343,12 @@ else fi fi +# Fixup... SunCC appears to botch the code generation +if [[ ("$SUN_COMPILER" -ne "0" )]];then + HAVE_O5=0 + OPT_O5= +fi + # Hit or miss, mostly hit HAVE_OS=0 OPT_OS= @@ -500,7 +506,7 @@ HAVE_X86_AES=0 HAVE_X86_RDRAND=0 HAVE_X86_RDSEED=0 HAVE_X86_PCLMUL=0 -if [[ (("$IS_X86" -ne "0") || ("$IS_X64" -ne "0")) ]]; then +if [[ (("$IS_X86" -ne "0") || ("$IS_X64" -ne "0")) && ("$SUN_COMPILER" -eq "0") ]]; then rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then @@ -529,8 +535,9 @@ fi # ld-gold linker testing if [[ (-z "$HAVE_LDGOLD") ]]; then HAVE_LDGOLD=0 - LD_GOLD=$(which ld.gold 2>/dev/null | "$GREP" -v "no ld.gold" | head -1) - if [[ (! -z "$LD_GOLD") ]]; then + LD_GOLD=$(which ld.gold 2>&1 | "$GREP" -v "no ld.gold" | head -1) + ELF_FILE=$(which file 2>&1 | "$GREP" -v "no file" | head -1) + if [[ (! -z "$LD_GOLD") && (! -z "$ELF_FILE") ]]; then HAVE_LDGOLD=$(file "$LD_GOLD" | cut -d":" -f 2 | "$EGREP" -i -c "elf") fi fi From e69994620238e887a94eaddeb7c3b6fc9992e6d3 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sun, 10 Jul 2016 15:01:37 -0400 Subject: [PATCH 2/5] Update comment --- cryptest.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cryptest.sh b/cryptest.sh index e47ae203..427c598e 100755 --- a/cryptest.sh +++ b/cryptest.sh @@ -22,6 +22,8 @@ # The fastest results (in running time) will most likely use: # HAVE_VALGRIND=0 WANT_BENCHMARKS=0 ./cryptest.sh +# Using 'fast' is shorthand for it: +# ./cryptest.sh fast ############################################ # Set to suite your taste From b80f725accdd27c854c3575f877ec2a24d6081ec Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sun, 10 Jul 2016 15:02:22 -0400 Subject: [PATCH 3/5] Add header for SunCC 12.4 --- crc.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crc.cpp b/crc.cpp index 738d1238..ad411a85 100644 --- a/crc.cpp +++ b/crc.cpp @@ -13,6 +13,11 @@ NAMESPACE_BEGIN(CryptoPP) # undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE #endif +// SunCC 12.4 and above +#if defined(CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE) && (__SUNPRO_CC >= 0x5130) +# include +#endif + /* Table of CRC-32's of all single byte values (made by makecrc.c) */ const word32 CRC32::m_tab[] = { #ifdef IS_LITTLE_ENDIAN From 36da00cc9293aa7f5297489e7d32d0cb19e2a4a4 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sun, 10 Jul 2016 15:16:20 -0400 Subject: [PATCH 4/5] Add RDRAND and RDSEED for SunCC 12.1 and above --- rdrand.asm | 48 ++++++++++++++++++++++++------------------------ rdrand.cpp | 15 +++++++++++++-- 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/rdrand.asm b/rdrand.asm index 73f5a964..9fcf6e0a 100644 --- a/rdrand.asm +++ b/rdrand.asm @@ -108,7 +108,7 @@ Call_RDRAND_EAX: ;; RDRAND is not available prior to VS2012. Just emit ;; the byte codes using DB. This is `rdrand eax`. DB 0Fh, 0C7h, 0F0h - + ;; If CF=1, the number returned by RDRAND is valid. ;; If CF=0, a random number was not available. jc RDRAND_succeeded @@ -126,13 +126,13 @@ RDRAND_succeeded: cmp bsize, MWSIZE jb Partial_Machine_Word - + Full_Machine_Word: mov DWORD PTR [buffer], eax add buffer, MWSIZE ;; No need for Intel Core 2 slow workarounds, like sub bsize, MWSIZE ;; `lea buffer,[buffer+MWSIZE]` for faster adds - + ;; Continue jmp GenerateBlock_Top @@ -146,9 +146,9 @@ Partial_Machine_Word: mov WORD PTR [buffer], ax shr eax, 16 add buffer, 2 - + Bit_1_Not_Set: - + ;; Test bit 0 to see if size is at least 1 test bsize, 1 jz GenerateBlock_Success @@ -164,14 +164,14 @@ GenerateBlock_PreRet: ;; Test for success (was the request completely fulfilled?) cmp bsize, 0 - je GenerateBlock_Success - + je GenerateBlock_Success + GenerateBlock_Failure: xor eax, eax mov al, RDRAND_FAILURE ret - + GenerateBlock_Success: xor eax, eax @@ -241,7 +241,7 @@ RDRAND_succeeded: cmp bsize, MWSIZE jb Partial_Machine_Word - + Full_Machine_Word: mov QWORD PTR [buffer], rax @@ -284,19 +284,19 @@ Bit_0_Not_Set: ;; We've hit all the bits jmp GenerateBlock_Success - + GenerateBlock_PreRet: ;; Test for success (was the request completely fulfilled?) cmp bsize, 0 je GenerateBlock_Success - + GenerateBlock_Failure: xor rax, rax mov al, RDRAND_FAILURE ret - + GenerateBlock_Success: xor rax, rax @@ -352,7 +352,7 @@ Call_RDSEED_EAX: ;; RDSEED is not available prior to VS2012. Just emit ;; the byte codes using DB. This is `rdseed eax`. DB 0Fh, 0C7h, 0F8h - + ;; If CF=1, the number returned by RDSEED is valid. ;; If CF=0, a random number was not available. jc RDSEED_succeeded @@ -370,13 +370,13 @@ RDSEED_succeeded: cmp bsize, MWSIZE jb Partial_Machine_Word - + Full_Machine_Word: mov DWORD PTR [buffer], eax add buffer, MWSIZE ;; No need for Intel Core 2 slow workarounds, like sub bsize, MWSIZE ;; `lea buffer,[buffer+MWSIZE]` for faster adds - + ;; Continue jmp GenerateBlock_Top @@ -390,9 +390,9 @@ Partial_Machine_Word: mov WORD PTR [buffer], ax shr eax, 16 add buffer, 2 - + Bit_1_Not_Set: - + ;; Test bit 0 to see if size is at least 1 test bsize, 1 jz GenerateBlock_Success @@ -408,14 +408,14 @@ GenerateBlock_PreRet: ;; Test for success (was the request completely fulfilled?) cmp bsize, 0 - je GenerateBlock_Success - + je GenerateBlock_Success + GenerateBlock_Failure: xor eax, eax mov al, RDSEED_FAILURE ret - + GenerateBlock_Success: xor eax, eax @@ -485,7 +485,7 @@ RDSEED_succeeded: cmp bsize, MWSIZE jb Partial_Machine_Word - + Full_Machine_Word: mov QWORD PTR [buffer], rax @@ -528,19 +528,19 @@ Bit_0_Not_Set: ;; We've hit all the bits jmp GenerateBlock_Success - + GenerateBlock_PreRet: ;; Test for success (was the request completely fulfilled?) cmp bsize, 0 je GenerateBlock_Success - + GenerateBlock_Failure: xor rax, rax mov al, RDSEED_FAILURE ret - + GenerateBlock_Success: xor rax, rax diff --git a/rdrand.cpp b/rdrand.cpp index 4edb1bd8..2cbca4fe 100644 --- a/rdrand.cpp +++ b/rdrand.cpp @@ -68,7 +68,7 @@ #if defined(CRYPTOPP_CPUID_AVAILABLE) # define MSC_INTRIN_COMPILER ((CRYPTOPP_MSC_VERSION >= 1700) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30200) || (_INTEL_COMPILER >= 1210)) -# define GCC_INTRIN_COMPILER ((CRYPTOPP_GCC_VERSION >= 40600) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30200) || (_INTEL_COMPILER >= 1210)) +# define GCC_INTRIN_COMPILER ((CRYPTOPP_GCC_VERSION >= 40600) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30200) || (_INTEL_COMPILER >= 1210)) || (__SUNPRO_CC >= 0x5130) #else # define MSC_INTRIN_COMPILER 0 # define GCC_INTRIN_COMPILER 0 @@ -99,6 +99,17 @@ # elif CRYPTOPP_BOOL_RDSEED_ASM # define GCC_RDSEED_ASM_AVAILABLE 1 # endif +#elif defined(CRYPTOPP_CPUID_AVAILABLE) && (__SUNPRO_CC >= 0x5100) +# if GCC_INTRIN_COMPILER && defined(__RDRND__) +# define ALL_RDRAND_INTRIN_AVAILABLE 1 +# elif CRYPTOPP_BOOL_RDRAND_ASM +# define GCC_RDRAND_ASM_AVAILABLE 1 +# endif +# if GCC_INTRIN_COMPILER && defined(__RDSEED__) +# define ALL_RDSEED_INTRIN_AVAILABLE 1 +# elif CRYPTOPP_BOOL_RDSEED_ASM +# define GCC_RDSEED_ASM_AVAILABLE 1 +# endif #endif // Debug diagnostics @@ -131,7 +142,7 @@ ///////////////////////////////////////////////////////////////////// #if (ALL_RDRAND_INTRIN_AVAILABLE || ALL_RDSEED_INTRIN_AVAILABLE) -# include // rdrand, MSC, ICC, and GCC +# include // rdrand, MSC, ICC, GCC, and SunCC # if defined(__GNUC__) && (CRYPTOPP_GCC_VERSION >= 40600) # include // rdseed for some compilers, like GCC # endif From f8200af3ae18c564bfb260ee5ce52c6df74dee78 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sun, 10 Jul 2016 16:21:19 -0400 Subject: [PATCH 5/5] Add SunCC 12.4 support for specialized implementations if CRC, RDRAND and BLAKE2 --- blake2.cpp | 9 +++++++-- crc.cpp | 2 +- rdrand.cpp | 4 ++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/blake2.cpp b/blake2.cpp index 0cb858b1..7e86a98f 100644 --- a/blake2.cpp +++ b/blake2.cpp @@ -22,9 +22,14 @@ NAMESPACE_BEGIN(CryptoPP) # undef CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE #endif -// Testing shows Sun CC needs 12.4 for _mm_set_epi64x -#if (__SUNPRO_CC <= 0x5130) +// SunCC needs 12.4 for _mm_set_epi64x, _mm_blend_epi16, _mm_shuffle_epi16, etc +#if defined(__SUNPRO_CC) && (__SUNPRO_CC < 0x5130) # undef CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE +# undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE +#elif (__SUNPRO_CC >= 0x5130) +# include // _mm_set_epi64x +# include // _mm_blend_epi16 +# include // _mm_shuffle_epi16 #endif // Visual Studio needs VS2008 (1500); no dependency on _mm_set_epi64x() diff --git a/crc.cpp b/crc.cpp index ad411a85..3cdcc9a9 100644 --- a/crc.cpp +++ b/crc.cpp @@ -15,7 +15,7 @@ NAMESPACE_BEGIN(CryptoPP) // SunCC 12.4 and above #if defined(CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE) && (__SUNPRO_CC >= 0x5130) -# include +# include #endif /* Table of CRC-32's of all single byte values (made by makecrc.c) */ diff --git a/rdrand.cpp b/rdrand.cpp index 2cbca4fe..fec720c0 100644 --- a/rdrand.cpp +++ b/rdrand.cpp @@ -100,12 +100,12 @@ # define GCC_RDSEED_ASM_AVAILABLE 1 # endif #elif defined(CRYPTOPP_CPUID_AVAILABLE) && (__SUNPRO_CC >= 0x5100) -# if GCC_INTRIN_COMPILER && defined(__RDRND__) +# if GCC_INTRIN_COMPILER && defined(__RDRND__) && (__SUNPRO_CC >= 0x5130) # define ALL_RDRAND_INTRIN_AVAILABLE 1 # elif CRYPTOPP_BOOL_RDRAND_ASM # define GCC_RDRAND_ASM_AVAILABLE 1 # endif -# if GCC_INTRIN_COMPILER && defined(__RDSEED__) +# if GCC_INTRIN_COMPILER && defined(__RDSEED__) && (__SUNPRO_CC >= 0x5150) # define ALL_RDSEED_INTRIN_AVAILABLE 1 # elif CRYPTOPP_BOOL_RDSEED_ASM # define GCC_RDSEED_ASM_AVAILABLE 1