Merge branch 'master' into hmqv

pull/263/head
Jeffrey Walton 2016-07-10 16:24:05 -04:00
commit 8ceba46e49
5 changed files with 61 additions and 31 deletions

View File

@ -22,9 +22,14 @@ NAMESPACE_BEGIN(CryptoPP)
# undef CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE # undef CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
#endif #endif
// Testing shows Sun CC needs 12.4 for _mm_set_epi64x // SunCC needs 12.4 for _mm_set_epi64x, _mm_blend_epi16, _mm_shuffle_epi16, etc
#if (__SUNPRO_CC <= 0x5130) #if defined(__SUNPRO_CC) && (__SUNPRO_CC < 0x5130)
# undef CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE # undef CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
# undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE
#elif (__SUNPRO_CC >= 0x5130)
# include <emmintrin.h> // _mm_set_epi64x
# include <smmintrin.h> // _mm_blend_epi16
# include <tmmintrin.h> // _mm_shuffle_epi16
#endif #endif
// Visual Studio needs VS2008 (1500); no dependency on _mm_set_epi64x() // Visual Studio needs VS2008 (1500); no dependency on _mm_set_epi64x()

View File

@ -13,6 +13,11 @@ NAMESPACE_BEGIN(CryptoPP)
# undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE # undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE
#endif #endif
// SunCC 12.4 and above
#if defined(CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE) && (__SUNPRO_CC >= 0x5130)
# include <nmmintrin.h>
#endif
/* Table of CRC-32's of all single byte values (made by makecrc.c) */ /* Table of CRC-32's of all single byte values (made by makecrc.c) */
const word32 CRC32::m_tab[] = { const word32 CRC32::m_tab[] = {
#ifdef IS_LITTLE_ENDIAN #ifdef IS_LITTLE_ENDIAN

View File

@ -22,6 +22,8 @@
# The fastest results (in running time) will most likely use: # The fastest results (in running time) will most likely use:
# HAVE_VALGRIND=0 WANT_BENCHMARKS=0 ./cryptest.sh # HAVE_VALGRIND=0 WANT_BENCHMARKS=0 ./cryptest.sh
# Using 'fast' is shorthand for it:
# ./cryptest.sh fast
############################################ ############################################
# Set to suite your taste # Set to suite your taste
@ -343,6 +345,12 @@ else
fi fi
fi fi
# Fixup... SunCC appears to botch the code generation
if [[ ("$SUN_COMPILER" -ne "0" )]];then
HAVE_O5=0
OPT_O5=
fi
# Hit or miss, mostly hit # Hit or miss, mostly hit
HAVE_OS=0 HAVE_OS=0
OPT_OS= OPT_OS=
@ -500,7 +508,7 @@ HAVE_X86_AES=0
HAVE_X86_RDRAND=0 HAVE_X86_RDRAND=0
HAVE_X86_RDSEED=0 HAVE_X86_RDSEED=0
HAVE_X86_PCLMUL=0 HAVE_X86_PCLMUL=0
if [[ (("$IS_X86" -ne "0") || ("$IS_X64" -ne "0")) ]]; then if [[ (("$IS_X86" -ne "0") || ("$IS_X64" -ne "0")) && ("$SUN_COMPILER" -eq "0") ]]; then
rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 rm -f "$TMP/adhoc.exe" > /dev/null 2>&1
"$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 "$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then if [[ "$?" -eq "0" ]]; then
@ -529,8 +537,9 @@ fi
# ld-gold linker testing # ld-gold linker testing
if [[ (-z "$HAVE_LDGOLD") ]]; then if [[ (-z "$HAVE_LDGOLD") ]]; then
HAVE_LDGOLD=0 HAVE_LDGOLD=0
LD_GOLD=$(which ld.gold 2>/dev/null | "$GREP" -v "no ld.gold" | head -1) LD_GOLD=$(which ld.gold 2>&1 | "$GREP" -v "no ld.gold" | head -1)
if [[ (! -z "$LD_GOLD") ]]; then ELF_FILE=$(which file 2>&1 | "$GREP" -v "no file" | head -1)
if [[ (! -z "$LD_GOLD") && (! -z "$ELF_FILE") ]]; then
HAVE_LDGOLD=$(file "$LD_GOLD" | cut -d":" -f 2 | "$EGREP" -i -c "elf") HAVE_LDGOLD=$(file "$LD_GOLD" | cut -d":" -f 2 | "$EGREP" -i -c "elf")
fi fi
fi fi

View File

@ -108,7 +108,7 @@ Call_RDRAND_EAX:
;; RDRAND is not available prior to VS2012. Just emit ;; RDRAND is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdrand eax`. ;; the byte codes using DB. This is `rdrand eax`.
DB 0Fh, 0C7h, 0F0h DB 0Fh, 0C7h, 0F0h
;; If CF=1, the number returned by RDRAND is valid. ;; If CF=1, the number returned by RDRAND is valid.
;; If CF=0, a random number was not available. ;; If CF=0, a random number was not available.
jc RDRAND_succeeded jc RDRAND_succeeded
@ -126,13 +126,13 @@ RDRAND_succeeded:
cmp bsize, MWSIZE cmp bsize, MWSIZE
jb Partial_Machine_Word jb Partial_Machine_Word
Full_Machine_Word: Full_Machine_Word:
mov DWORD PTR [buffer], eax mov DWORD PTR [buffer], eax
add buffer, MWSIZE ;; No need for Intel Core 2 slow workarounds, like add buffer, MWSIZE ;; No need for Intel Core 2 slow workarounds, like
sub bsize, MWSIZE ;; `lea buffer,[buffer+MWSIZE]` for faster adds sub bsize, MWSIZE ;; `lea buffer,[buffer+MWSIZE]` for faster adds
;; Continue ;; Continue
jmp GenerateBlock_Top jmp GenerateBlock_Top
@ -146,9 +146,9 @@ Partial_Machine_Word:
mov WORD PTR [buffer], ax mov WORD PTR [buffer], ax
shr eax, 16 shr eax, 16
add buffer, 2 add buffer, 2
Bit_1_Not_Set: Bit_1_Not_Set:
;; Test bit 0 to see if size is at least 1 ;; Test bit 0 to see if size is at least 1
test bsize, 1 test bsize, 1
jz GenerateBlock_Success jz GenerateBlock_Success
@ -164,14 +164,14 @@ GenerateBlock_PreRet:
;; Test for success (was the request completely fulfilled?) ;; Test for success (was the request completely fulfilled?)
cmp bsize, 0 cmp bsize, 0
je GenerateBlock_Success je GenerateBlock_Success
GenerateBlock_Failure: GenerateBlock_Failure:
xor eax, eax xor eax, eax
mov al, RDRAND_FAILURE mov al, RDRAND_FAILURE
ret ret
GenerateBlock_Success: GenerateBlock_Success:
xor eax, eax xor eax, eax
@ -241,7 +241,7 @@ RDRAND_succeeded:
cmp bsize, MWSIZE cmp bsize, MWSIZE
jb Partial_Machine_Word jb Partial_Machine_Word
Full_Machine_Word: Full_Machine_Word:
mov QWORD PTR [buffer], rax mov QWORD PTR [buffer], rax
@ -284,19 +284,19 @@ Bit_0_Not_Set:
;; We've hit all the bits ;; We've hit all the bits
jmp GenerateBlock_Success jmp GenerateBlock_Success
GenerateBlock_PreRet: GenerateBlock_PreRet:
;; Test for success (was the request completely fulfilled?) ;; Test for success (was the request completely fulfilled?)
cmp bsize, 0 cmp bsize, 0
je GenerateBlock_Success je GenerateBlock_Success
GenerateBlock_Failure: GenerateBlock_Failure:
xor rax, rax xor rax, rax
mov al, RDRAND_FAILURE mov al, RDRAND_FAILURE
ret ret
GenerateBlock_Success: GenerateBlock_Success:
xor rax, rax xor rax, rax
@ -352,7 +352,7 @@ Call_RDSEED_EAX:
;; RDSEED is not available prior to VS2012. Just emit ;; RDSEED is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdseed eax`. ;; the byte codes using DB. This is `rdseed eax`.
DB 0Fh, 0C7h, 0F8h DB 0Fh, 0C7h, 0F8h
;; If CF=1, the number returned by RDSEED is valid. ;; If CF=1, the number returned by RDSEED is valid.
;; If CF=0, a random number was not available. ;; If CF=0, a random number was not available.
jc RDSEED_succeeded jc RDSEED_succeeded
@ -370,13 +370,13 @@ RDSEED_succeeded:
cmp bsize, MWSIZE cmp bsize, MWSIZE
jb Partial_Machine_Word jb Partial_Machine_Word
Full_Machine_Word: Full_Machine_Word:
mov DWORD PTR [buffer], eax mov DWORD PTR [buffer], eax
add buffer, MWSIZE ;; No need for Intel Core 2 slow workarounds, like add buffer, MWSIZE ;; No need for Intel Core 2 slow workarounds, like
sub bsize, MWSIZE ;; `lea buffer,[buffer+MWSIZE]` for faster adds sub bsize, MWSIZE ;; `lea buffer,[buffer+MWSIZE]` for faster adds
;; Continue ;; Continue
jmp GenerateBlock_Top jmp GenerateBlock_Top
@ -390,9 +390,9 @@ Partial_Machine_Word:
mov WORD PTR [buffer], ax mov WORD PTR [buffer], ax
shr eax, 16 shr eax, 16
add buffer, 2 add buffer, 2
Bit_1_Not_Set: Bit_1_Not_Set:
;; Test bit 0 to see if size is at least 1 ;; Test bit 0 to see if size is at least 1
test bsize, 1 test bsize, 1
jz GenerateBlock_Success jz GenerateBlock_Success
@ -408,14 +408,14 @@ GenerateBlock_PreRet:
;; Test for success (was the request completely fulfilled?) ;; Test for success (was the request completely fulfilled?)
cmp bsize, 0 cmp bsize, 0
je GenerateBlock_Success je GenerateBlock_Success
GenerateBlock_Failure: GenerateBlock_Failure:
xor eax, eax xor eax, eax
mov al, RDSEED_FAILURE mov al, RDSEED_FAILURE
ret ret
GenerateBlock_Success: GenerateBlock_Success:
xor eax, eax xor eax, eax
@ -485,7 +485,7 @@ RDSEED_succeeded:
cmp bsize, MWSIZE cmp bsize, MWSIZE
jb Partial_Machine_Word jb Partial_Machine_Word
Full_Machine_Word: Full_Machine_Word:
mov QWORD PTR [buffer], rax mov QWORD PTR [buffer], rax
@ -528,19 +528,19 @@ Bit_0_Not_Set:
;; We've hit all the bits ;; We've hit all the bits
jmp GenerateBlock_Success jmp GenerateBlock_Success
GenerateBlock_PreRet: GenerateBlock_PreRet:
;; Test for success (was the request completely fulfilled?) ;; Test for success (was the request completely fulfilled?)
cmp bsize, 0 cmp bsize, 0
je GenerateBlock_Success je GenerateBlock_Success
GenerateBlock_Failure: GenerateBlock_Failure:
xor rax, rax xor rax, rax
mov al, RDSEED_FAILURE mov al, RDSEED_FAILURE
ret ret
GenerateBlock_Success: GenerateBlock_Success:
xor rax, rax xor rax, rax

View File

@ -68,7 +68,7 @@
#if defined(CRYPTOPP_CPUID_AVAILABLE) #if defined(CRYPTOPP_CPUID_AVAILABLE)
# define MSC_INTRIN_COMPILER ((CRYPTOPP_MSC_VERSION >= 1700) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30200) || (_INTEL_COMPILER >= 1210)) # define MSC_INTRIN_COMPILER ((CRYPTOPP_MSC_VERSION >= 1700) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30200) || (_INTEL_COMPILER >= 1210))
# define GCC_INTRIN_COMPILER ((CRYPTOPP_GCC_VERSION >= 40600) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30200) || (_INTEL_COMPILER >= 1210)) # define GCC_INTRIN_COMPILER ((CRYPTOPP_GCC_VERSION >= 40600) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30200) || (_INTEL_COMPILER >= 1210)) || (__SUNPRO_CC >= 0x5130)
#else #else
# define MSC_INTRIN_COMPILER 0 # define MSC_INTRIN_COMPILER 0
# define GCC_INTRIN_COMPILER 0 # define GCC_INTRIN_COMPILER 0
@ -99,6 +99,17 @@
# elif CRYPTOPP_BOOL_RDSEED_ASM # elif CRYPTOPP_BOOL_RDSEED_ASM
# define GCC_RDSEED_ASM_AVAILABLE 1 # define GCC_RDSEED_ASM_AVAILABLE 1
# endif # endif
#elif defined(CRYPTOPP_CPUID_AVAILABLE) && (__SUNPRO_CC >= 0x5100)
# if GCC_INTRIN_COMPILER && defined(__RDRND__) && (__SUNPRO_CC >= 0x5130)
# define ALL_RDRAND_INTRIN_AVAILABLE 1
# elif CRYPTOPP_BOOL_RDRAND_ASM
# define GCC_RDRAND_ASM_AVAILABLE 1
# endif
# if GCC_INTRIN_COMPILER && defined(__RDSEED__) && (__SUNPRO_CC >= 0x5150)
# define ALL_RDSEED_INTRIN_AVAILABLE 1
# elif CRYPTOPP_BOOL_RDSEED_ASM
# define GCC_RDSEED_ASM_AVAILABLE 1
# endif
#endif #endif
// Debug diagnostics // Debug diagnostics
@ -131,7 +142,7 @@
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
#if (ALL_RDRAND_INTRIN_AVAILABLE || ALL_RDSEED_INTRIN_AVAILABLE) #if (ALL_RDRAND_INTRIN_AVAILABLE || ALL_RDSEED_INTRIN_AVAILABLE)
# include <immintrin.h> // rdrand, MSC, ICC, and GCC # include <immintrin.h> // rdrand, MSC, ICC, GCC, and SunCC
# if defined(__GNUC__) && (CRYPTOPP_GCC_VERSION >= 40600) # if defined(__GNUC__) && (CRYPTOPP_GCC_VERSION >= 40600)
# include <x86intrin.h> // rdseed for some compilers, like GCC # include <x86intrin.h> // rdseed for some compilers, like GCC
# endif # endif