diff --git a/blake2.cpp b/blake2.cpp index 0cb858b1..7e86a98f 100644 --- a/blake2.cpp +++ b/blake2.cpp @@ -22,9 +22,14 @@ NAMESPACE_BEGIN(CryptoPP) # undef CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE #endif -// Testing shows Sun CC needs 12.4 for _mm_set_epi64x -#if (__SUNPRO_CC <= 0x5130) +// SunCC needs 12.4 for _mm_set_epi64x, _mm_blend_epi16, _mm_shuffle_epi16, etc +#if defined(__SUNPRO_CC) && (__SUNPRO_CC < 0x5130) # undef CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE +# undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE +#elif (__SUNPRO_CC >= 0x5130) +# include // _mm_set_epi64x +# include // _mm_blend_epi16 +# include // _mm_shuffle_epi16 #endif // Visual Studio needs VS2008 (1500); no dependency on _mm_set_epi64x() diff --git a/crc.cpp b/crc.cpp index 738d1238..3cdcc9a9 100644 --- a/crc.cpp +++ b/crc.cpp @@ -13,6 +13,11 @@ NAMESPACE_BEGIN(CryptoPP) # undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE #endif +// SunCC 12.4 and above +#if defined(CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE) && (__SUNPRO_CC >= 0x5130) +# include +#endif + /* Table of CRC-32's of all single byte values (made by makecrc.c) */ const word32 CRC32::m_tab[] = { #ifdef IS_LITTLE_ENDIAN diff --git a/cryptest.sh b/cryptest.sh index dd02a697..427c598e 100755 --- a/cryptest.sh +++ b/cryptest.sh @@ -22,6 +22,8 @@ # The fastest results (in running time) will most likely use: # HAVE_VALGRIND=0 WANT_BENCHMARKS=0 ./cryptest.sh +# Using 'fast' is shorthand for it: +# ./cryptest.sh fast ############################################ # Set to suite your taste @@ -343,6 +345,12 @@ else fi fi +# Fixup... SunCC appears to botch the code generation +if [[ ("$SUN_COMPILER" -ne "0" )]];then + HAVE_O5=0 + OPT_O5= +fi + # Hit or miss, mostly hit HAVE_OS=0 OPT_OS= @@ -500,7 +508,7 @@ HAVE_X86_AES=0 HAVE_X86_RDRAND=0 HAVE_X86_RDSEED=0 HAVE_X86_PCLMUL=0 -if [[ (("$IS_X86" -ne "0") || ("$IS_X64" -ne "0")) ]]; then +if [[ (("$IS_X86" -ne "0") || ("$IS_X64" -ne "0")) && ("$SUN_COMPILER" -eq "0") ]]; then rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then @@ -529,8 +537,9 @@ fi # ld-gold linker testing if [[ (-z "$HAVE_LDGOLD") ]]; then HAVE_LDGOLD=0 - LD_GOLD=$(which ld.gold 2>/dev/null | "$GREP" -v "no ld.gold" | head -1) - if [[ (! -z "$LD_GOLD") ]]; then + LD_GOLD=$(which ld.gold 2>&1 | "$GREP" -v "no ld.gold" | head -1) + ELF_FILE=$(which file 2>&1 | "$GREP" -v "no file" | head -1) + if [[ (! -z "$LD_GOLD") && (! -z "$ELF_FILE") ]]; then HAVE_LDGOLD=$(file "$LD_GOLD" | cut -d":" -f 2 | "$EGREP" -i -c "elf") fi fi diff --git a/rdrand.asm b/rdrand.asm index 73f5a964..9fcf6e0a 100644 --- a/rdrand.asm +++ b/rdrand.asm @@ -108,7 +108,7 @@ Call_RDRAND_EAX: ;; RDRAND is not available prior to VS2012. Just emit ;; the byte codes using DB. This is `rdrand eax`. DB 0Fh, 0C7h, 0F0h - + ;; If CF=1, the number returned by RDRAND is valid. ;; If CF=0, a random number was not available. jc RDRAND_succeeded @@ -126,13 +126,13 @@ RDRAND_succeeded: cmp bsize, MWSIZE jb Partial_Machine_Word - + Full_Machine_Word: mov DWORD PTR [buffer], eax add buffer, MWSIZE ;; No need for Intel Core 2 slow workarounds, like sub bsize, MWSIZE ;; `lea buffer,[buffer+MWSIZE]` for faster adds - + ;; Continue jmp GenerateBlock_Top @@ -146,9 +146,9 @@ Partial_Machine_Word: mov WORD PTR [buffer], ax shr eax, 16 add buffer, 2 - + Bit_1_Not_Set: - + ;; Test bit 0 to see if size is at least 1 test bsize, 1 jz GenerateBlock_Success @@ -164,14 +164,14 @@ GenerateBlock_PreRet: ;; Test for success (was the request completely fulfilled?) cmp bsize, 0 - je GenerateBlock_Success - + je GenerateBlock_Success + GenerateBlock_Failure: xor eax, eax mov al, RDRAND_FAILURE ret - + GenerateBlock_Success: xor eax, eax @@ -241,7 +241,7 @@ RDRAND_succeeded: cmp bsize, MWSIZE jb Partial_Machine_Word - + Full_Machine_Word: mov QWORD PTR [buffer], rax @@ -284,19 +284,19 @@ Bit_0_Not_Set: ;; We've hit all the bits jmp GenerateBlock_Success - + GenerateBlock_PreRet: ;; Test for success (was the request completely fulfilled?) cmp bsize, 0 je GenerateBlock_Success - + GenerateBlock_Failure: xor rax, rax mov al, RDRAND_FAILURE ret - + GenerateBlock_Success: xor rax, rax @@ -352,7 +352,7 @@ Call_RDSEED_EAX: ;; RDSEED is not available prior to VS2012. Just emit ;; the byte codes using DB. This is `rdseed eax`. DB 0Fh, 0C7h, 0F8h - + ;; If CF=1, the number returned by RDSEED is valid. ;; If CF=0, a random number was not available. jc RDSEED_succeeded @@ -370,13 +370,13 @@ RDSEED_succeeded: cmp bsize, MWSIZE jb Partial_Machine_Word - + Full_Machine_Word: mov DWORD PTR [buffer], eax add buffer, MWSIZE ;; No need for Intel Core 2 slow workarounds, like sub bsize, MWSIZE ;; `lea buffer,[buffer+MWSIZE]` for faster adds - + ;; Continue jmp GenerateBlock_Top @@ -390,9 +390,9 @@ Partial_Machine_Word: mov WORD PTR [buffer], ax shr eax, 16 add buffer, 2 - + Bit_1_Not_Set: - + ;; Test bit 0 to see if size is at least 1 test bsize, 1 jz GenerateBlock_Success @@ -408,14 +408,14 @@ GenerateBlock_PreRet: ;; Test for success (was the request completely fulfilled?) cmp bsize, 0 - je GenerateBlock_Success - + je GenerateBlock_Success + GenerateBlock_Failure: xor eax, eax mov al, RDSEED_FAILURE ret - + GenerateBlock_Success: xor eax, eax @@ -485,7 +485,7 @@ RDSEED_succeeded: cmp bsize, MWSIZE jb Partial_Machine_Word - + Full_Machine_Word: mov QWORD PTR [buffer], rax @@ -528,19 +528,19 @@ Bit_0_Not_Set: ;; We've hit all the bits jmp GenerateBlock_Success - + GenerateBlock_PreRet: ;; Test for success (was the request completely fulfilled?) cmp bsize, 0 je GenerateBlock_Success - + GenerateBlock_Failure: xor rax, rax mov al, RDSEED_FAILURE ret - + GenerateBlock_Success: xor rax, rax diff --git a/rdrand.cpp b/rdrand.cpp index 4edb1bd8..fec720c0 100644 --- a/rdrand.cpp +++ b/rdrand.cpp @@ -68,7 +68,7 @@ #if defined(CRYPTOPP_CPUID_AVAILABLE) # define MSC_INTRIN_COMPILER ((CRYPTOPP_MSC_VERSION >= 1700) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30200) || (_INTEL_COMPILER >= 1210)) -# define GCC_INTRIN_COMPILER ((CRYPTOPP_GCC_VERSION >= 40600) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30200) || (_INTEL_COMPILER >= 1210)) +# define GCC_INTRIN_COMPILER ((CRYPTOPP_GCC_VERSION >= 40600) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30200) || (_INTEL_COMPILER >= 1210)) || (__SUNPRO_CC >= 0x5130) #else # define MSC_INTRIN_COMPILER 0 # define GCC_INTRIN_COMPILER 0 @@ -99,6 +99,17 @@ # elif CRYPTOPP_BOOL_RDSEED_ASM # define GCC_RDSEED_ASM_AVAILABLE 1 # endif +#elif defined(CRYPTOPP_CPUID_AVAILABLE) && (__SUNPRO_CC >= 0x5100) +# if GCC_INTRIN_COMPILER && defined(__RDRND__) && (__SUNPRO_CC >= 0x5130) +# define ALL_RDRAND_INTRIN_AVAILABLE 1 +# elif CRYPTOPP_BOOL_RDRAND_ASM +# define GCC_RDRAND_ASM_AVAILABLE 1 +# endif +# if GCC_INTRIN_COMPILER && defined(__RDSEED__) && (__SUNPRO_CC >= 0x5150) +# define ALL_RDSEED_INTRIN_AVAILABLE 1 +# elif CRYPTOPP_BOOL_RDSEED_ASM +# define GCC_RDSEED_ASM_AVAILABLE 1 +# endif #endif // Debug diagnostics @@ -131,7 +142,7 @@ ///////////////////////////////////////////////////////////////////// #if (ALL_RDRAND_INTRIN_AVAILABLE || ALL_RDSEED_INTRIN_AVAILABLE) -# include // rdrand, MSC, ICC, and GCC +# include // rdrand, MSC, ICC, GCC, and SunCC # if defined(__GNUC__) && (CRYPTOPP_GCC_VERSION >= 40600) # include // rdseed for some compilers, like GCC # endif