Improve performance of RDRAND and RDSEED (Issue 387)

pull/354/merge
Jeffrey Walton 2017-03-07 03:57:23 -05:00
parent 1e5d6ee8d4
commit 14d92f9eba
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
5 changed files with 695 additions and 1154 deletions

550
rdrand.S
View File

@ -12,31 +12,10 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Naming convention used in rdrand.{h|cpp|asm|S}
;; MSC = Microsoft Compiler (and compatibles)
;; GCC = GNU Compiler (and compatibles)
;; ALL = MSC and GCC (and compatibles)
;; RRA = RDRAND, Assembly
;; RSA = RDSEED, Assembly
;; RRI = RDRAND, Intrinsic
;; RSA = RDSEED, Intrinsic
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; C/C++ Function prototypes ;; C/C++ Function prototypes
;; X86, X32 and X64: ;; X86, X32 and X64:
;; extern "C" int NASM_RRA_GenerateBlock(byte* ptr, size_t size, unsigned int safety); ;; extern "C" void NASM_RDRAND_GenerateBlock(byte* ptr, size_t size);
;; extern "C" void NASM_RDSEED_GenerateBlock(byte* ptr, size_t size);
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Return values
%define RDRAND_SUCCESS 1
%define RDRAND_FAILURE 0
%define RDSEED_SUCCESS 1
%define RDSEED_FAILURE 0
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -45,275 +24,211 @@
;; Arg1, byte* buffer ;; Arg1, byte* buffer
;; Arg2, size_t bsize ;; Arg2, size_t bsize
;; Arg3, unsigned int safety
;; EAX (out): success (1), failure (0)
global NASM_RRA_GenerateBlock global NASM_RDRAND_GenerateBlock
section .text section .text
%ifdef X86 %ifdef X86
align 8 align 8
cpu 486 cpu 486
%else %else
align 16 align 16
%endif %endif
NASM_RRA_GenerateBlock: NASM_RDRAND_GenerateBlock:
%ifdef X86 %ifdef X86
%define arg1 [ebp+04h] %define arg1 [ebp+04h]
%define arg2 [ebp+08h] %define arg2 [ebp+08h]
%define arg3 [ebp+0ch]
%define MWSIZE 04h ;; machine word size %define MWSIZE 04h ;; machine word size
%else %else
%define MWSIZE 08h ;; machine word size %define MWSIZE 08h ;; machine word size
%endif %endif
%define buffer edi %define buffer edi
%define bsize esi %define bsize esi
%define safety edx
%ifdef X86 %ifdef X86
.Load_Arguments: .Load_Arguments:
mov buffer, arg1 mov buffer, arg1
mov bsize, arg2 mov bsize, arg2
mov safety, arg3
%endif %endif
.Validate_Pointer: ;; Top of While loop
cmp buffer, 0
je .GenerateBlock_PreRet
;; Top of While loop
.GenerateBlock_Top: .GenerateBlock_Top:
;; Check remaining size ;; Check remaining size
cmp bsize, 0 cmp bsize, 0
je .GenerateBlock_Success je .GenerateBlock_Return
.Call_RDRAND:
%ifdef X86 %ifdef X86
.Call_RDRAND_EAX: .Call_RDRAND_EAX:
%else %else
.Call_RDRAND_RAX: .Call_RDRAND_RAX:
DB 48h ;; X32 can use the full register, issue the REX.w prefix DB 48h ;; X32 can use the full register, issue the REX.w prefix
%endif %endif
;; RDRAND is not available prior to VS2012. Just emit ;; RDRAND is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdrand eax`. ;; the byte codes using DB. This is `rdrand eax`.
DB 0Fh, 07h, 0F0h DB 0Fh, 07h, 0F0h
;; If CF=1, the number returned by RDRAND is valid. ;; If CF=1, the number returned by RDRAND is valid.
;; If CF=0, a random number was not available. ;; If CF=0, a random number was not available.
jc .RDRAND_succeeded jnc .Call_RDRAND
.RDRAND_failed:
;; Exit if we've reached the limit
cmp safety, 0
je .GenerateBlock_Failure
dec safety
jmp .GenerateBlock_Top
.RDRAND_succeeded: .RDRAND_succeeded:
cmp bsize, MWSIZE cmp bsize, MWSIZE
jb .Partial_Machine_Word jb .Partial_Machine_Word
.Full_Machine_Word: .Full_Machine_Word:
%ifdef X32 %ifdef X32
mov [buffer+4], eax ;; We can only move 4 at a time mov [buffer+4], eax ;; We can only move 4 at a time
DB 048h ;; Combined, these result in DB 048h ;; Combined, these result in
shr eax, 32 ;; `shr rax, 32` shr eax, 32 ;; `shr rax, 32`
%endif %endif
mov [buffer], eax mov [buffer], eax
add buffer, MWSIZE ;; No need for Intel Core 2 slow word workarounds, add buffer, MWSIZE ;; No need for Intel Core 2 slow word workarounds,
sub bsize, MWSIZE ;; like `lea buffer,[buffer+MWSIZE]` for faster adds sub bsize, MWSIZE ;; like `lea buffer,[buffer+MWSIZE]` for faster adds
;; Continue ;; Continue
jmp .GenerateBlock_Top jmp .GenerateBlock_Top
;; 1,2,3 bytes remain for X86 ;; 1,2,3 bytes remain for X86
;; 1,2,3,4,5,6,7 remain for X32 ;; 1,2,3,4,5,6,7 remain for X32
.Partial_Machine_Word: .Partial_Machine_Word:
%ifdef X32 %ifdef X32
;; Test bit 2 to see if size is at least 4 ;; Test bit 2 to see if size is at least 4
test bsize, 4 test bsize, 4
jz .Bit_2_Not_Set jz .Bit_2_Not_Set
mov [buffer], eax mov [buffer], eax
add buffer, 4 add buffer, 4
DB 048h ;; Combined, these result in DB 048h ;; Combined, these result in
shr eax, 32 ;; `shr rax, 32` shr eax, 32 ;; `shr rax, 32`
.Bit_2_Not_Set: .Bit_2_Not_Set:
%endif %endif
;; Test bit 1 to see if size is at least 2 ;; Test bit 1 to see if size is at least 2
test bsize, 2 test bsize, 2
jz .Bit_1_Not_Set jz .Bit_1_Not_Set
mov [buffer], ax mov [buffer], ax
shr eax, 16 shr eax, 16
add buffer, 2 add buffer, 2
.Bit_1_Not_Set: .Bit_1_Not_Set:
;; Test bit 0 to see if size is at least 1 ;; Test bit 0 to see if size is at least 1
test bsize, 1 test bsize, 1
jz .GenerateBlock_Success jz .Bit_0_Not_Set
mov [buffer], al mov [buffer], al
.Bit_0_Not_Set: .Bit_0_Not_Set:
;; We've hit all the bits ;; We've hit all the bits
jmp .GenerateBlock_Success
.GenerateBlock_PreRet: .GenerateBlock_Return:
;; Test for success (was the request completely fulfilled?) xor eax, eax
cmp bsize, 0 ret
je .GenerateBlock_Success
.GenerateBlock_Failure: %endif ;; X86 and X32
xor eax, eax
mov al, RDRAND_FAILURE
ret
.GenerateBlock_Success:
xor eax, eax
mov al, RDRAND_SUCCESS
ret
%endif ;; X86 and X32
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%ifdef X64 ;; Set via the command line %ifdef X64 ;; Set via the command line
global NASM_RRA_GenerateBlock global NASM_RDRAND_GenerateBlock
section .text section .text
align 16 align 16
;; Arg1, byte* buffer ;; Arg1, byte* buffer
;; Arg2, size_t bsize ;; Arg2, size_t bsize
;; Arg3, unsigned int safety
;; RAX (out): success (1), failure (0)
NASM_RRA_GenerateBlock: NASM_RDRAND_GenerateBlock:
%define MWSIZE 08h ;; machine word size %define MWSIZE 08h ;; machine word size
%define buffer rdi %define buffer rdi
%define bsize rsi %define bsize rsi
%define safety edx
;; No need for Load_Arguments due to fastcall ;; No need for Load_Arguments due to fastcall
.Validate_Pointer: ;; Top of While loop
;; Validate pointer
cmp buffer, 0
je .GenerateBlock_PreRet
;; Top of While loop
.GenerateBlock_Top: .GenerateBlock_Top:
;; Check remaining size ;; Check remaining size
cmp bsize, 0 cmp bsize, 0
je .GenerateBlock_Success je .GenerateBlock_Return
.Call_RDRAND_RAX: .Call_RDRAND_RAX:
;; RDRAND is not available prior to VS2012. Just emit ;; RDRAND is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdrand rax`. ;; the byte codes using DB. This is `rdrand rax`.
DB 048h, 0Fh, 0C7h, 0F0h DB 048h, 0Fh, 0C7h, 0F0h
;; If CF=1, the number returned by RDRAND is valid. ;; If CF=1, the number returned by RDRAND is valid.
;; If CF=0, a random number was not available. ;; If CF=0, a random number was not available.
jc .RDRAND_succeeded jnc .Call_RDRAND_RAX
.RDRAND_failed:
;; Exit if we've reached the limit
cmp safety, 0h
je .GenerateBlock_Failure
dec safety
jmp .GenerateBlock_Top
.RDRAND_succeeded: .RDRAND_succeeded:
cmp bsize, MWSIZE cmp bsize, MWSIZE
jb .Partial_Machine_Word jb .Partial_Machine_Word
.Full_Machine_Word: .Full_Machine_Word:
mov [buffer], rax mov [buffer], rax
add buffer, MWSIZE add buffer, MWSIZE
sub bsize, MWSIZE sub bsize, MWSIZE
;; Continue ;; Continue
jmp .GenerateBlock_Top jmp .GenerateBlock_Top
;; 1,2,3,4,5,6,7 bytes remain ;; 1,2,3,4,5,6,7 bytes remain
.Partial_Machine_Word: .Partial_Machine_Word:
;; Test bit 2 to see if size is at least 4 ;; Test bit 2 to see if size is at least 4
test bsize, 4 test bsize, 4
jz .Bit_2_Not_Set jz .Bit_2_Not_Set
mov [buffer], eax mov [buffer], eax
shr rax, 32 shr rax, 32
add buffer, 4 add buffer, 4
.Bit_2_Not_Set: .Bit_2_Not_Set:
;; Test bit 1 to see if size is at least 2 ;; Test bit 1 to see if size is at least 2
test bsize, 2 test bsize, 2
jz .Bit_1_Not_Set jz .Bit_1_Not_Set
mov [buffer], ax mov [buffer], ax
shr eax, 16 shr eax, 16
add buffer, 2 add buffer, 2
.Bit_1_Not_Set: .Bit_1_Not_Set:
;; Test bit 0 to see if size is at least 1 ;; Test bit 0 to see if size is at least 1
test bsize, 1 test bsize, 1
jz .GenerateBlock_Success jz .Bit_0_Not_Set
mov [buffer], al mov [buffer], al
.Bit_0_Not_Set: .Bit_0_Not_Set:
;; We've hit all the bits ;; We've hit all the bits
jmp .GenerateBlock_Success
.GenerateBlock_PreRet: .GenerateBlock_Return:
;; Test for success (was the request completely fulfilled?) xor rax, rax
cmp bsize, 0 ret
je .GenerateBlock_Success
.GenerateBlock_Failure:
xor rax, rax
mov al, RDRAND_FAILURE
ret
.GenerateBlock_Success:
xor rax, rax
mov al, RDRAND_SUCCESS
ret
%endif ;; X64 %endif ;; X64
@ -324,273 +239,208 @@ NASM_RRA_GenerateBlock:
;; Arg1, byte* buffer ;; Arg1, byte* buffer
;; Arg2, size_t bsize ;; Arg2, size_t bsize
;; Arg3, unsigned int safety
;; EAX (out): success (1), failure (0)
global NASM_RSA_GenerateBlock global NASM_RDSEED_GenerateBlock
section .text section .text
align 8 align 8
%ifdef X86 %ifdef X86
align 8 align 8
cpu 486 cpu 486
%else %else
align 16 align 16
%endif %endif
NASM_RSA_GenerateBlock: NASM_RDSEED_GenerateBlock:
%ifdef X86 %ifdef X86
%define arg1 [ebp+04h] %define arg1 [ebp+04h]
%define arg2 [ebp+08h] %define arg2 [ebp+08h]
%define arg3 [ebp+0ch]
%define MWSIZE 04h ;; machine word size %define MWSIZE 04h ;; machine word size
%else %else
%define MWSIZE 08h ;; machine word size %define MWSIZE 08h ;; machine word size
%endif %endif
%define buffer edi %define buffer edi
%define bsize esi %define bsize esi
%define safety edx
%ifdef X86 %ifdef X86
.Load_Arguments: .Load_Arguments:
mov buffer, arg1 mov buffer, arg1
mov bsize, arg2 mov bsize, arg2
mov safety, arg3
%endif %endif
.Validate_Pointer: ;; Top of While loop
cmp buffer, 0
je .GenerateBlock_PreRet
;; Top of While loop
.GenerateBlock_Top: .GenerateBlock_Top:
;; Check remaining size ;; Check remaining size
cmp bsize, 0 cmp bsize, 0
je .GenerateBlock_Success je .GenerateBlock_Return
.Call_RDSEED:
%ifdef X86 %ifdef X86
.Call_RDSEED_EAX: .Call_RDSEED_EAX:
%else %else
.Call_RDSEED_RAX: .Call_RDSEED_RAX:
DB 48h ;; X32 can use the full register, issue the REX.w prefix DB 48h ;; X32 can use the full register, issue the REX.w prefix
%endif %endif
;; RDSEED is not available prior to VS2012. Just emit ;; RDSEED is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdseed eax`. ;; the byte codes using DB. This is `rdseed eax`.
DB 0Fh, 0C7h, 0F8h DB 0Fh, 0C7h, 0F8h
;; If CF=1, the number returned by RDSEED is valid. ;; If CF=1, the number returned by RDSEED is valid.
;; If CF=0, a random number was not available. ;; If CF=0, a random number was not available.
jc .RDSEED_succeeded jnc .Call_RDSEED
.RDSEED_failed:
;; Exit if we've reached the limit
cmp safety, 0
je .GenerateBlock_Failure
dec safety
jmp .GenerateBlock_Top
.RDSEED_succeeded: .RDSEED_succeeded:
cmp bsize, MWSIZE cmp bsize, MWSIZE
jb .Partial_Machine_Word jb .Partial_Machine_Word
.Full_Machine_Word: .Full_Machine_Word:
mov [buffer], eax mov [buffer], eax
add buffer, MWSIZE ;; No need for Intel Core 2 slow word workarounds, add buffer, MWSIZE ;; No need for Intel Core 2 slow word workarounds,
sub bsize, MWSIZE ;; like `lea buffer,[buffer+MWSIZE]` for faster adds sub bsize, MWSIZE ;; like `lea buffer,[buffer+MWSIZE]` for faster adds
;; Continue ;; Continue
jmp .GenerateBlock_Top jmp .GenerateBlock_Top
;; 1,2,3 bytes remain for X86 ;; 1,2,3 bytes remain for X86
;; 1,2,3,4,5,6,7 remain for X32 ;; 1,2,3,4,5,6,7 remain for X32
.Partial_Machine_Word: .Partial_Machine_Word:
%ifdef X32 %ifdef X32
;; Test bit 2 to see if size is at least 4 ;; Test bit 2 to see if size is at least 4
test bsize, 4 test bsize, 4
jz .Bit_2_Not_Set jz .Bit_2_Not_Set
mov [buffer], eax mov [buffer], eax
add buffer, 4 add buffer, 4
DB 048h ;; Combined, these result in DB 048h ;; Combined, these result in
shr eax, 32 ;; `shr rax, 32` shr eax, 32 ;; `shr rax, 32`
.Bit_2_Not_Set: .Bit_2_Not_Set:
%endif %endif
;; Test bit 1 to see if size is at least 2 ;; Test bit 1 to see if size is at least 2
test bsize, 2 test bsize, 2
jz .Bit_1_Not_Set jz .Bit_1_Not_Set
mov [buffer], ax mov [buffer], ax
shr eax, 16 shr eax, 16
add buffer, 2 add buffer, 2
.Bit_1_Not_Set: .Bit_1_Not_Set:
;; Test bit 0 to see if size is at least 1 ;; Test bit 0 to see if size is at least 1
test bsize, 1 test bsize, 1
jz .GenerateBlock_Success jz .Bit_0_Not_Set
mov [buffer], al mov [buffer], al
.Bit_0_Not_Set: .Bit_0_Not_Set:
;; We've hit all the bits ;; We've hit all the bits
jmp .GenerateBlock_Success
.GenerateBlock_PreRet: .GenerateBlock_Return:
;; Test for success (was the request completely fulfilled?) xor eax, eax
cmp bsize, 0 ret
je .GenerateBlock_Success
.GenerateBlock_Failure:
xor eax, eax
mov al, RDSEED_FAILURE
ret
.GenerateBlock_Success:
xor eax, eax
mov al, RDSEED_SUCCESS
ret
%endif ;; X86 and X32 %endif ;; X86 and X32
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%ifdef X64 ;; Set via the command line %ifdef X64 ;; Set via the command line
global NASM_RSA_GenerateBlock global NASM_RDSEED_GenerateBlock
section .text section .text
align 16 align 16
;; Arg1, byte* buffer ;; Arg1, byte* buffer
;; Arg2, size_t bsize ;; Arg2, size_t bsize
;; Arg3, unsigned int safety
;; RAX (out): success (1), failure (0)
NASM_RSA_GenerateBlock: NASM_RDSEED_GenerateBlock:
%define MWSIZE 08h ;; machine word size %define MWSIZE 08h ;; machine word size
%define buffer rdi %define buffer rdi
%define bsize rsi %define bsize rsi
%define safety edx
;; No need for Load_Arguments due to fastcall ;; No need for Load_Arguments due to fastcall
.Validate_Pointer: ;; Top of While loop
;; Validate pointer
cmp buffer, 0
je .GenerateBlock_PreRet
;; Top of While loop
.GenerateBlock_Top: .GenerateBlock_Top:
;; Check remaining size ;; Check remaining size
cmp bsize, 0 cmp bsize, 0
je .GenerateBlock_Success je .GenerateBlock_Return
.Call_RDSEED_RAX: .Call_RDSEED_RAX:
;; RDSEED is not available prior to VS2012. Just emit ;; RDSEED is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdseed rax`. ;; the byte codes using DB. This is `rdseed rax`.
DB 048h, 0Fh, 0C7h, 0F8h DB 048h, 0Fh, 0C7h, 0F8h
;; If CF=1, the number returned by RDSEED is valid. ;; If CF=1, the number returned by RDSEED is valid.
;; If CF=0, a random number was not available. ;; If CF=0, a random number was not available.
jc .RDSEED_succeeded jnc .Call_RDSEED_RAX
.RDSEED_failed:
;; Exit if we've reached the limit
cmp safety, 0
je .GenerateBlock_Failure
dec safety
jmp .GenerateBlock_Top
.RDSEED_succeeded: .RDSEED_succeeded:
cmp bsize, MWSIZE cmp bsize, MWSIZE
jb .Partial_Machine_Word jb .Partial_Machine_Word
.Full_Machine_Word: .Full_Machine_Word:
mov [buffer], rax mov [buffer], rax
add buffer, MWSIZE add buffer, MWSIZE
sub bsize, MWSIZE sub bsize, MWSIZE
;; Continue ;; Continue
jmp .GenerateBlock_Top jmp .GenerateBlock_Top
;; 1,2,3,4,5,6,7 bytes remain ;; 1,2,3,4,5,6,7 bytes remain
.Partial_Machine_Word: .Partial_Machine_Word:
;; Test bit 2 to see if size is at least 4 ;; Test bit 2 to see if size is at least 4
test bsize, 4 test bsize, 4
jz .Bit_2_Not_Set jz .Bit_2_Not_Set
mov [buffer], eax mov [buffer], eax
shr rax, 32 shr rax, 32
add buffer, 4 add buffer, 4
.Bit_2_Not_Set: .Bit_2_Not_Set:
;; Test bit 1 to see if size is at least 2 ;; Test bit 1 to see if size is at least 2
test bsize, 2 test bsize, 2
jz .Bit_1_Not_Set jz .Bit_1_Not_Set
mov [buffer], ax mov [buffer], ax
shr eax, 16 shr eax, 16
add buffer, 2 add buffer, 2
.Bit_1_Not_Set: .Bit_1_Not_Set:
;; Test bit 0 to see if size is at least 1 ;; Test bit 0 to see if size is at least 1
test bsize, 1 test bsize, 1
jz .GenerateBlock_Success jz .Bit_0_Not_Set
mov [buffer], al mov [buffer], al
.Bit_0_Not_Set: .Bit_0_Not_Set:
;; We've hit all the bits ;; We've hit all the bits
jmp .GenerateBlock_Success
.GenerateBlock_PreRet: .GenerateBlock_Return:
;; Test for success (was the request completely fulfilled?) xor rax, rax
cmp bsize, 0 ret
je .GenerateBlock_Success
.GenerateBlock_Failure: %endif ;; _M_X64
xor rax, rax
mov al, RDSEED_FAILURE
ret
.GenerateBlock_Success:
xor rax, rax
mov al, RDSEED_SUCCESS
ret
%endif ;; _M_X64
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@ -13,24 +13,15 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
TITLE MASM_RRA_GenerateBlock and MASM_RSA_GenerateBlock TITLE MASM_RDRAND_GenerateBlock and MASM_RDSEED_GenerateBlock
SUBTITLE Microsoft specific ASM code to utilize RDRAND and RDSEED for down level Microsoft toolchains SUBTITLE Microsoft specific ASM code to utilize RDRAND and RDSEED for down level Microsoft toolchains
PUBLIC MASM_RRA_GenerateBlock PUBLIC MASM_RDRAND_GenerateBlock
PUBLIC MASM_RSA_GenerateBlock PUBLIC MASM_RDSEED_GenerateBlock
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Naming convention used in rdrand.{h|cpp|asm}
;; MSC = Microsoft Compiler (and compatibles)
;; GCC = GNU Compiler (and compatibles)
;; ALL = MSC and GCC (and compatibles)
;; RRA = RDRAND, Assembly
;; RSA = RDSEED, Assembly
;; RRI = RDRAND, Intrinsic
;; RSA = RDSEED, Intrinsic
;; Caller/Callee Saved Registers ;; Caller/Callee Saved Registers
;; https://msdn.microsoft.com/en-us/library/6t169e9c.aspx ;; https://msdn.microsoft.com/en-us/library/6t169e9c.aspx
@ -39,19 +30,9 @@ PUBLIC MASM_RSA_GenerateBlock
;; C/C++ Function prototypes ;; C/C++ Function prototypes
;; X86: ;; X86:
;; extern "C" int MASM_RRA_GenerateBlock(byte* ptr, size_t size, unsigned int safety); ;; extern "C" void MASM_RDRAND_GenerateBlock(byte* ptr, size_t size);
;; X64: ;; X64:
;; extern "C" int __fastcall MASM_RRA_GenerateBlock(byte* ptr, size_t size, unsigned int safety); ;; extern "C" void __fastcall MASM_RDRAND_GenerateBlock(byte* ptr, size_t size);
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Return values
RDRAND_SUCCESS EQU 1
RDRAND_FAILURE EQU 0
RDSEED_SUCCESS EQU 1
RDSEED_FAILURE EQU 0
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -77,109 +58,80 @@ OPTION EPILOGUE:NONE
;; Caller pushes on stack following CDECL (right to left) ;; Caller pushes on stack following CDECL (right to left)
;; arg1: byte* buffer ;; arg1: byte* buffer
;; arg2: size_t bsize ;; arg2: size_t bsize
;; arg3: unsigned int safety
;; EAX (out): success (1), failure (0)
MASM_RRA_GenerateBlock PROC ;; arg1:DWORD, arg2:DWORD, arg3:DWORD MASM_RDRAND_GenerateBlock PROC ;; arg1:DWORD, arg2:DWORD
MWSIZE EQU 04h ;; machine word size MWSIZE EQU 04h ;; machine word size
buffer EQU edi buffer EQU edi
bsize EQU edx bsize EQU edx
safety EQU ecx
Load_Arguments: Load_Arguments:
mov buffer, DWORD PTR [esp+04h] ;; arg1 mov buffer, DWORD PTR [esp+04h] ;; arg1
mov bsize, DWORD PTR [esp+08h] ;; arg2 mov bsize, DWORD PTR [esp+08h] ;; arg2
mov safety, DWORD PTR [esp+0Ch] ;; arg3
Validate_Pointer: ;; Top of While loop
cmp buffer, 0
je GenerateBlock_PreRet
;; Top of While loop
GenerateBlock_Top: GenerateBlock_Top:
;; Check remaining size ;; Check remaining size
cmp bsize, 0 cmp bsize, 0
je GenerateBlock_Success je GenerateBlock_Return
Call_RDRAND_EAX: Call_RDRAND_EAX:
;; RDRAND is not available prior to VS2012. Just emit ;; RDRAND is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdrand eax`. ;; the byte codes using DB. This is `rdrand eax`.
DB 0Fh, 0C7h, 0F0h DB 0Fh, 0C7h, 0F0h
;; If CF=1, the number returned by RDRAND is valid. ;; If CF=1, the number returned by RDRAND is valid.
;; If CF=0, a random number was not available. ;; If CF=0, a random number was not available.
jc RDRAND_succeeded
RDRAND_failed: ;; Retry immediately
jnc Call_RDRAND_EAX
;; Exit if we've reached the limit
cmp safety, 0
je GenerateBlock_Failure
dec safety
jmp GenerateBlock_Top
RDRAND_succeeded: RDRAND_succeeded:
cmp bsize, MWSIZE cmp bsize, MWSIZE
jb Partial_Machine_Word jb Partial_Machine_Word
Full_Machine_Word: Full_Machine_Word:
mov DWORD PTR [buffer], eax mov DWORD PTR [buffer], eax
add buffer, MWSIZE ;; No need for Intel Core 2 slow workarounds, like add buffer, MWSIZE ;; No need for Intel Core 2 slow workarounds, like
sub bsize, MWSIZE ;; `lea buffer,[buffer+MWSIZE]` for faster adds sub bsize, MWSIZE ;; `lea buffer,[buffer+MWSIZE]` for faster adds
;; Continue ;; Continue
jmp GenerateBlock_Top jmp GenerateBlock_Top
;; 1,2,3 bytes remain ;; 1,2,3 bytes remain
Partial_Machine_Word: Partial_Machine_Word:
;; Test bit 1 to see if size is at least 2 ;; Test bit 1 to see if size is at least 2
test bsize, 2 test bsize, 2
jz Bit_1_Not_Set jz Bit_1_Not_Set
mov WORD PTR [buffer], ax mov WORD PTR [buffer], ax
shr eax, 16 shr eax, 16
add buffer, 2 add buffer, 2
Bit_1_Not_Set: Bit_1_Not_Set:
;; Test bit 0 to see if size is at least 1 ;; Test bit 0 to see if size is at least 1
test bsize, 1 test bsize, 1
jz GenerateBlock_Success jz Bit_0_Not_Set
mov BYTE PTR [buffer], al mov BYTE PTR [buffer], al
Bit_0_Not_Set: Bit_0_Not_Set:
;; We've hit all the bits ;; We've hit all the bits
jmp GenerateBlock_Success
GenerateBlock_PreRet: GenerateBlock_Return:
;; Test for success (was the request completely fulfilled?) ;; Clear artifacts
cmp bsize, 0 xor eax, eax
je GenerateBlock_Success ret
GenerateBlock_Failure: MASM_RDRAND_GenerateBlock ENDP
xor eax, eax
mov al, RDRAND_FAILURE
ret
GenerateBlock_Success:
xor eax, eax
mov al, RDRAND_SUCCESS
ret
MASM_RRA_GenerateBlock ENDP
ENDIF ;; _M_X86 ENDIF ;; _M_X86
@ -198,116 +150,87 @@ OPTION EPILOGUE:NONE
;; RCX (in): arg1, byte* buffer ;; RCX (in): arg1, byte* buffer
;; RDX (in): arg2, size_t bsize ;; RDX (in): arg2, size_t bsize
;; R8d (in): arg3, unsigned int safety
;; RAX (out): success (1), failure (0)
MASM_RRA_GenerateBlock PROC MASM_RDRAND_GenerateBlock PROC
MWSIZE EQU 08h ;; machine word size MWSIZE EQU 08h ;; machine word size
buffer EQU rcx buffer EQU rcx
bsize EQU rdx bsize EQU rdx
safety EQU r8d
;; No need for Load_Arguments due to fastcall ;; No need for Load_Arguments due to fastcall
Validate_Pointer: ;; Top of While loop
;; Validate pointer
cmp buffer, 0
je GenerateBlock_PreRet
;; Top of While loop
GenerateBlock_Top: GenerateBlock_Top:
;; Check remaining size ;; Check remaining size
cmp bsize, 0 cmp bsize, 0
je GenerateBlock_Success je GenerateBlock_Return
Call_RDRAND_RAX: Call_RDRAND_RAX:
;; RDRAND is not available prior to VS2012. Just emit ;; RDRAND is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdrand rax`. ;; the byte codes using DB. This is `rdrand rax`.
DB 048h, 0Fh, 0C7h, 0F0h DB 048h, 0Fh, 0C7h, 0F0h
;; If CF=1, the number returned by RDRAND is valid. ;; If CF=1, the number returned by RDRAND is valid.
;; If CF=0, a random number was not available. ;; If CF=0, a random number was not available.
jc RDRAND_succeeded
RDRAND_failed: ;; Retry immediately
jnc Call_RDRAND_RAX
;; Exit if we've reached the limit
cmp safety, 0
je GenerateBlock_Failure
dec safety
jmp GenerateBlock_Top
RDRAND_succeeded: RDRAND_succeeded:
cmp bsize, MWSIZE cmp bsize, MWSIZE
jb Partial_Machine_Word jb Partial_Machine_Word
Full_Machine_Word: Full_Machine_Word:
mov QWORD PTR [buffer], rax mov QWORD PTR [buffer], rax
add buffer, MWSIZE add buffer, MWSIZE
sub bsize, MWSIZE sub bsize, MWSIZE
;; Continue ;; Continue
jmp GenerateBlock_Top jmp GenerateBlock_Top
;; 1,2,3,4,5,6,7 bytes remain ;; 1,2,3,4,5,6,7 bytes remain
Partial_Machine_Word: Partial_Machine_Word:
;; Test bit 2 to see if size is at least 4 ;; Test bit 2 to see if size is at least 4
test bsize, 4 test bsize, 4
jz Bit_2_Not_Set jz Bit_2_Not_Set
mov DWORD PTR [buffer], eax mov DWORD PTR [buffer], eax
shr rax, 32 shr rax, 32
add buffer, 4 add buffer, 4
Bit_2_Not_Set: Bit_2_Not_Set:
;; Test bit 1 to see if size is at least 2 ;; Test bit 1 to see if size is at least 2
test bsize, 2 test bsize, 2
jz Bit_1_Not_Set jz Bit_1_Not_Set
mov WORD PTR [buffer], ax mov WORD PTR [buffer], ax
shr eax, 16 shr eax, 16
add buffer, 2 add buffer, 2
Bit_1_Not_Set: Bit_1_Not_Set:
;; Test bit 0 to see if size is at least 1 ;; Test bit 0 to see if size is at least 1
test bsize, 1 test bsize, 1
jz GenerateBlock_Success jz Bit_0_Not_Set
mov BYTE PTR [buffer], al mov BYTE PTR [buffer], al
Bit_0_Not_Set: Bit_0_Not_Set:
;; We've hit all the bits ;; We've hit all the bits
jmp GenerateBlock_Success
GenerateBlock_PreRet: GenerateBlock_Return:
;; Test for success (was the request completely fulfilled?) ;; Clear artifacts
cmp bsize, 0 xor rax, rax
je GenerateBlock_Success
GenerateBlock_Failure:
xor rax, rax
mov al, RDRAND_FAILURE
ret ret
GenerateBlock_Success: MASM_RDRAND_GenerateBlock ENDP
xor rax, rax
mov al, RDRAND_SUCCESS
ret
MASM_RRA_GenerateBlock ENDP
ENDIF ;; _M_X64 ENDIF ;; _M_X64
@ -328,109 +251,80 @@ OPTION EPILOGUE:NONE
;; Caller pushes on stack following CDECL (right to left) ;; Caller pushes on stack following CDECL (right to left)
;; arg1: byte* buffer ;; arg1: byte* buffer
;; arg2: size_t bsize ;; arg2: size_t bsize
;; arg3: unsigned int safety
;; EAX (out): success (1), failure (0)
MASM_RSA_GenerateBlock PROC ;; arg1:DWORD, arg2:DWORD, arg3:DWORD MASM_RDSEED_GenerateBlock PROC ;; arg1:DWORD, arg2:DWORD
MWSIZE EQU 04h ;; machine word size MWSIZE EQU 04h ;; machine word size
buffer EQU edi buffer EQU edi
bsize EQU edx bsize EQU edx
safety EQU ecx
Load_Arguments: Load_Arguments:
mov buffer, DWORD PTR [esp+04h] ;; arg1 mov buffer, DWORD PTR [esp+04h] ;; arg1
mov bsize, DWORD PTR [esp+08h] ;; arg2 mov bsize, DWORD PTR [esp+08h] ;; arg2
mov safety, DWORD PTR [esp+0Ch] ;; arg3
Validate_Pointer: ;; Top of While loop
cmp buffer, 0
je GenerateBlock_PreRet
;; Top of While loop
GenerateBlock_Top: GenerateBlock_Top:
;; Check remaining size ;; Check remaining size
cmp bsize, 0 cmp bsize, 0
je GenerateBlock_Success je GenerateBlock_Return
Call_RDSEED_EAX: Call_RDSEED_EAX:
;; RDSEED is not available prior to VS2012. Just emit ;; RDSEED is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdseed eax`. ;; the byte codes using DB. This is `rdseed eax`.
DB 0Fh, 0C7h, 0F8h DB 0Fh, 0C7h, 0F8h
;; If CF=1, the number returned by RDSEED is valid. ;; If CF=1, the number returned by RDSEED is valid.
;; If CF=0, a random number was not available. ;; If CF=0, a random number was not available.
jc RDSEED_succeeded
RDSEED_failed: ;; Retry immediately
jnc Call_RDSEED_EAX
;; Exit if we've reached the limit
cmp safety, 0
je GenerateBlock_Failure
dec safety
jmp GenerateBlock_Top
RDSEED_succeeded: RDSEED_succeeded:
cmp bsize, MWSIZE cmp bsize, MWSIZE
jb Partial_Machine_Word jb Partial_Machine_Word
Full_Machine_Word: Full_Machine_Word:
mov DWORD PTR [buffer], eax mov DWORD PTR [buffer], eax
add buffer, MWSIZE ;; No need for Intel Core 2 slow workarounds, like add buffer, MWSIZE ;; No need for Intel Core 2 slow workarounds, like
sub bsize, MWSIZE ;; `lea buffer,[buffer+MWSIZE]` for faster adds sub bsize, MWSIZE ;; `lea buffer,[buffer+MWSIZE]` for faster adds
;; Continue ;; Continue
jmp GenerateBlock_Top jmp GenerateBlock_Top
;; 1,2,3 bytes remain ;; 1,2,3 bytes remain
Partial_Machine_Word: Partial_Machine_Word:
;; Test bit 1 to see if size is at least 2 ;; Test bit 1 to see if size is at least 2
test bsize, 2 test bsize, 2
jz Bit_1_Not_Set jz Bit_1_Not_Set
mov WORD PTR [buffer], ax mov WORD PTR [buffer], ax
shr eax, 16 shr eax, 16
add buffer, 2 add buffer, 2
Bit_1_Not_Set: Bit_1_Not_Set:
;; Test bit 0 to see if size is at least 1 ;; Test bit 0 to see if size is at least 1
test bsize, 1 test bsize, 1
jz GenerateBlock_Success jz Bit_0_Not_Set
mov BYTE PTR [buffer], al mov BYTE PTR [buffer], al
Bit_0_Not_Set: Bit_0_Not_Set:
;; We've hit all the bits ;; We've hit all the bits
jmp GenerateBlock_Success
GenerateBlock_PreRet: GenerateBlock_Return:
;; Test for success (was the request completely fulfilled?) ;; Clear artifacts
cmp bsize, 0 xor eax, eax
je GenerateBlock_Success
GenerateBlock_Failure:
xor eax, eax
mov al, RDSEED_FAILURE
ret ret
GenerateBlock_Success: MASM_RDSEED_GenerateBlock ENDP
xor eax, eax
mov al, RDSEED_SUCCESS
ret
MASM_RSA_GenerateBlock ENDP
ENDIF ;; _M_X86 ENDIF ;; _M_X86
@ -449,116 +343,87 @@ OPTION EPILOGUE:NONE
;; RCX (in): arg1, byte* buffer ;; RCX (in): arg1, byte* buffer
;; RDX (in): arg2, size_t bsize ;; RDX (in): arg2, size_t bsize
;; R8d (in): arg3, unsigned int safety
;; RAX (out): success (1), failure (0)
MASM_RSA_GenerateBlock PROC ;; arg1:QWORD,arg2:QWORD,arg3:DWORD MASM_RDSEED_GenerateBlock PROC ;; arg1:QWORD,arg2:QWORD
MWSIZE EQU 08h ;; machine word size MWSIZE EQU 08h ;; machine word size
buffer EQU rcx buffer EQU rcx
bsize EQU rdx bsize EQU rdx
safety EQU r8d
;; No need for Load_Arguments due to fastcall ;; No need for Load_Arguments due to fastcall
Validate_Pointer: ;; Top of While loop
;; Validate pointer
cmp buffer, 0
je GenerateBlock_PreRet
;; Top of While loop
GenerateBlock_Top: GenerateBlock_Top:
;; Check remaining size ;; Check remaining size
cmp bsize, 0 cmp bsize, 0
je GenerateBlock_Success je GenerateBlock_Return
Call_RDSEED_RAX: Call_RDSEED_RAX:
;; RDSEED is not available prior to VS2012. Just emit ;; RDSEED is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdseed rax`. ;; the byte codes using DB. This is `rdseed rax`.
DB 048h, 0Fh, 0C7h, 0F8h DB 048h, 0Fh, 0C7h, 0F8h
;; If CF=1, the number returned by RDSEED is valid. ;; If CF=1, the number returned by RDSEED is valid.
;; If CF=0, a random number was not available. ;; If CF=0, a random number was not available.
jc RDSEED_succeeded
RDSEED_failed: ;; Retry immediately
jnc Call_RDSEED_RAX
;; Exit if we've reached the limit
cmp safety, 0
je GenerateBlock_Failure
dec safety
jmp GenerateBlock_Top
RDSEED_succeeded: RDSEED_succeeded:
cmp bsize, MWSIZE cmp bsize, MWSIZE
jb Partial_Machine_Word jb Partial_Machine_Word
Full_Machine_Word: Full_Machine_Word:
mov QWORD PTR [buffer], rax mov QWORD PTR [buffer], rax
add buffer, MWSIZE add buffer, MWSIZE
sub bsize, MWSIZE sub bsize, MWSIZE
;; Continue ;; Continue
jmp GenerateBlock_Top jmp GenerateBlock_Top
;; 1,2,3,4,5,6,7 bytes remain ;; 1,2,3,4,5,6,7 bytes remain
Partial_Machine_Word: Partial_Machine_Word:
;; Test bit 2 to see if size is at least 4 ;; Test bit 2 to see if size is at least 4
test bsize, 4 test bsize, 4
jz Bit_2_Not_Set jz Bit_2_Not_Set
mov DWORD PTR [buffer], eax mov DWORD PTR [buffer], eax
shr rax, 32 shr rax, 32
add buffer, 4 add buffer, 4
Bit_2_Not_Set: Bit_2_Not_Set:
;; Test bit 1 to see if size is at least 2 ;; Test bit 1 to see if size is at least 2
test bsize, 2 test bsize, 2
jz Bit_1_Not_Set jz Bit_1_Not_Set
mov WORD PTR [buffer], ax mov WORD PTR [buffer], ax
shr eax, 16 shr eax, 16
add buffer, 2 add buffer, 2
Bit_1_Not_Set: Bit_1_Not_Set:
;; Test bit 0 to see if size is at least 1 ;; Test bit 0 to see if size is at least 1
test bsize, 1 test bsize, 1
jz GenerateBlock_Success jz Bit_0_Not_Set
mov BYTE PTR [buffer], al mov BYTE PTR [buffer], al
Bit_0_Not_Set: Bit_0_Not_Set:
;; We've hit all the bits ;; We've hit all the bits
jmp GenerateBlock_Success
GenerateBlock_PreRet: GenerateBlock_Return:
;; Test for success (was the request completely fulfilled?) ;; Clear artifacts
cmp bsize, 0 xor rax, rax
je GenerateBlock_Success ret
GenerateBlock_Failure: MASM_RDSEED_GenerateBlock ENDP
xor rax, rax
mov al, RDSEED_FAILURE
ret
GenerateBlock_Success:
xor rax, rax
mov al, RDSEED_SUCCESS
ret
MASM_RSA_GenerateBlock ENDP
ENDIF ;; _M_X64 ENDIF ;; _M_X64

View File

@ -7,26 +7,21 @@
#include "rdrand.h" #include "rdrand.h"
#include "cpu.h" #include "cpu.h"
#include <iostream>
#if CRYPTOPP_MSC_VERSION #if CRYPTOPP_MSC_VERSION
# pragma warning(disable: 4100) # pragma warning(disable: 4100)
#endif #endif
// This file (and friends) provides both RDRAND and RDSEED, but its somewhat // This file (and friends) provides both RDRAND and RDSEED. They were added at
// experimental. They were added at Crypto++ 5.6.3. At compile time, it // Crypto++ 5.6.3. At compile time, it uses CRYPTOPP_BOOL_{X86|X32|X64}
// indirectly uses CRYPTOPP_BOOL_{X86|X32|X64} (via CRYPTOPP_CPUID_AVAILABLE)
// to select an implementation or "throw NotImplemented". At runtime, the // to select an implementation or "throw NotImplemented". At runtime, the
// class uses the result of CPUID to determine if RDRAND or RDSEED are // class uses the result of CPUID to determine if RDRAND or RDSEED are
// available. If not available, a lazy throw strategy is used. I.e., the // available. If not available, then a SIGILL will result.
// throw is deferred until GenerateBlock() is called. // The original classes accepted a retry count. Retries were superflous for
// RDRAND, and RDSEED encountered a failure about 1 in 256 bytes depending
// Here's the naming convention for the functions.... // on the processor. Retries were removed at Crypto++ 6.0 because the
// MSC = Microsoft Compiler (and compatibles) // functions always fulfill the request.
// GCC = GNU Compiler (and compatibles)
// ALL = MSC and GCC (and compatibles)
// RRA = RDRAND, Assembly
// RSA = RDSEED, Assembly
// RRI = RDRAND, Intrinsic
// RSA = RDSEED, Intrinsic
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
@ -44,24 +39,17 @@
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
// In general, the library's ASM code is best on Windows, and Intrinsics is
// the best code under GCC. Clang is missing symbols, so it gets ASM.
// The NASM code is optimized well on Linux, but its not easy to cut-in.
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
# ifndef CRYPTOPP_CPUID_AVAILABLE
# define CRYPTOPP_CPUID_AVAILABLE
# endif
#endif
#if defined(CRYPTOPP_CPUID_AVAILABLE) #if defined(CRYPTOPP_CPUID_AVAILABLE)
# if defined(CRYPTOPP_MSC_VERSION) # if defined(CRYPTOPP_MSC_VERSION)
# if (CRYPTOPP_MSC_VERSION >= 1700) # if (CRYPTOPP_MSC_VERSION >= 1700)
# define ALL_RDRAND_INTRIN_AVAILABLE 1 # define MASM_RDRAND_ASM_AVAILABLE 1
// # define ALL_RDRAND_INTRIN_AVAILABLE 1
# else # else
# define MASM_RDRAND_ASM_AVAILABLE 1 # define MASM_RDRAND_ASM_AVAILABLE 1
# endif # endif
# if (CRYPTOPP_MSC_VERSION >= 1800) # if (CRYPTOPP_MSC_VERSION >= 1800)
# define ALL_RDSEED_INTRIN_AVAILABLE 1 # define MASM_RDSEED_ASM_AVAILABLE 1
// # define ALL_RDSEED_INTRIN_AVAILABLE 1
# else # else
# define MASM_RDSEED_ASM_AVAILABLE 1 # define MASM_RDSEED_ASM_AVAILABLE 1
# endif # endif
@ -80,45 +68,15 @@
# define GCC_RDSEED_ASM_AVAILABLE 1 # define GCC_RDSEED_ASM_AVAILABLE 1
# endif # endif
# elif defined(CRYPTOPP_GCC_VERSION) # elif defined(CRYPTOPP_GCC_VERSION)
# if defined(__RDRND__) && (CRYPTOPP_GCC_VERSION >= 30200) # if defined(__RDRND__) || (CRYPTOPP_GCC_VERSION >= 40600)
# define ALL_RDRAND_INTRIN_AVAILABLE 1
# else
# define GCC_RDRAND_ASM_AVAILABLE 1 # define GCC_RDRAND_ASM_AVAILABLE 1
# endif # endif
# if defined(__RDSEED__) && (CRYPTOPP_GCC_VERSION >= 30200) # if defined(__RDSEED__) || (CRYPTOPP_GCC_VERSION >= 40600)
# define ALL_RDSEED_INTRIN_AVAILABLE 1
# else
# define GCC_RDSEED_ASM_AVAILABLE 1 # define GCC_RDSEED_ASM_AVAILABLE 1
# endif # endif
# endif # endif
#endif #endif
// Debug diagnostics
#if 0
# if MASM_RDRAND_ASM_AVAILABLE
# pragma message ("MASM_RDRAND_ASM_AVAILABLE is 1")
# elif NASM_RDRAND_ASM_AVAILABLE
# pragma message ("NASM_RDRAND_ASM_AVAILABLE is 1")
# elif GCC_RDRAND_ASM_AVAILABLE
# pragma message ("GCC_RDRAND_ASM_AVAILABLE is 1")
# elif ALL_RDRAND_INTRIN_AVAILABLE
# pragma message ("ALL_RDRAND_INTRIN_AVAILABLE is 1")
# else
# pragma message ("RDRAND is not available")
# endif
# if MASM_RDSEED_ASM_AVAILABLE
# pragma message ("MASM_RDSEED_ASM_AVAILABLE is 1")
# elif NASM_RDSEED_ASM_AVAILABLE
# pragma message ("NASM_RDSEED_ASM_AVAILABLE is 1")
# elif GCC_RDSEED_ASM_AVAILABLE
# pragma message ("GCC_RDSEED_ASM_AVAILABLE is 1")
# elif ALL_RDSEED_INTRIN_AVAILABLE
# pragma message ("ALL_RDSEED_INTRIN_AVAILABLE is 1")
# else
# pragma message ("RDSEED is not available")
# endif
#endif
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
@ -129,376 +87,303 @@
# endif # endif
# if defined(__has_include) # if defined(__has_include)
# if __has_include(<x86intrin.h>) # if __has_include(<x86intrin.h>)
# include <x86intrin.h> // rdrand for Clang (immintrin.h); rdseed for Clang (rdseedintrin.h) # include <x86intrin.h>
# endif # endif
# endif # endif
#endif #endif
#if MASM_RDRAND_ASM_AVAILABLE #if MASM_RDRAND_ASM_AVAILABLE
# ifdef _M_X64 # ifdef _M_X64
extern "C" int CRYPTOPP_FASTCALL MASM_RRA_GenerateBlock(byte*, size_t, unsigned int); extern "C" void CRYPTOPP_FASTCALL MASM_RDRAND_GenerateBlock(byte*, size_t);
// # pragma comment(lib, "rdrand-x64.lib") // # pragma comment(lib, "rdrand-x64.lib")
# else # else
extern "C" int MASM_RRA_GenerateBlock(byte*, size_t, unsigned int); extern "C" void MASM_RDRAND_GenerateBlock(byte*, size_t);
// # pragma comment(lib, "rdrand-x86.lib") // # pragma comment(lib, "rdrand-x86.lib")
# endif # endif
#endif #endif
#if MASM_RDSEED_ASM_AVAILABLE #if MASM_RDSEED_ASM_AVAILABLE
# ifdef _M_X64 # ifdef _M_X64
extern "C" int CRYPTOPP_FASTCALL MASM_RSA_GenerateBlock(byte*, size_t, unsigned int); extern "C" void CRYPTOPP_FASTCALL MASM_RDSEED_GenerateBlock(byte*, size_t);
// # pragma comment(lib, "rdrand-x64.lib") // # pragma comment(lib, "rdrand-x64.lib")
# else # else
extern "C" int MASM_RSA_GenerateBlock(byte*, size_t, unsigned int); extern "C" void MASM_RDSEED_GenerateBlock(byte*, size_t);
// # pragma comment(lib, "rdrand-x86.lib") // # pragma comment(lib, "rdrand-x86.lib")
# endif # endif
#endif #endif
#if NASM_RDRAND_ASM_AVAILABLE #if NASM_RDRAND_ASM_AVAILABLE
extern "C" int NASM_RRA_GenerateBlock(byte*, size_t, unsigned int); extern "C" void NASM_RDRAND_GenerateBlock(byte*, size_t);
#endif #endif
#if NASM_RDSEED_ASM_AVAILABLE #if NASM_RDSEED_ASM_AVAILABLE
extern "C" int NASM_RSA_GenerateBlock(byte*, size_t, unsigned int); extern "C" void NASM_RDSEED_GenerateBlock(byte*, size_t);
#endif #endif
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
ANONYMOUS_NAMESPACE_BEGIN
// GCC, MSVC and SunCC has optimized calls to RDRAND away. We experieced it
// under GCC and MSVC. Other have reported it for SunCC. This attempts
// to tame the optimizer even though it abuses the volatile keyword.
static volatile int s_unused;
ANONYMOUS_NAMESPACE_END
NAMESPACE_BEGIN(CryptoPP) NAMESPACE_BEGIN(CryptoPP)
#if ALL_RDRAND_INTRIN_AVAILABLE // Fills 4 bytes
static int ALL_RRI_GenerateBlock(byte *output, size_t size, unsigned int safety) inline void RDRAND32(void* output)
{ {
CRYPTOPP_ASSERT((output && size) || !(output || size)); #if defined(__SUNPRO_CC)
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 __asm__ __volatile__
word32 val; (
".byte 0x0f, 0xc7, 0xf0;\n"
".byte 0x73, 0xfb;\n"
: "=a" (*reinterpret_cast<word32*>(output))
: : "cc"
);
#elif defined(GCC_RDRAND_ASM_AVAILABLE)
__asm__ __volatile__
(
INTEL_NOPREFIX
ASL(1)
AS1(rdrand eax)
ASJ(jnc, 1, b)
ATT_NOPREFIX
: "=a" (*reinterpret_cast<word32*>(output))
: : "cc"
);
#elif defined(ALL_RDRAND_INTRIN_AVAILABLE)
while(!_rdrand32_step(reinterpret_cast<word32*>(output))) {}
#else #else
word64 val; // RDRAND not detected at compile time, or no suitable compiler found
throw NotImplemented("RDRAND: failed to find an implementation");
#endif #endif
while (size >= sizeof(val))
{
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
if (_rdrand32_step((word32*)output))
#else
// Cast due to GCC, http://github.com/weidai11/cryptopp/issues/236
if (_rdrand64_step(reinterpret_cast<unsigned long long*>(output)))
#endif
{
output += sizeof(val);
size -= sizeof(val);
}
else
{
if (!safety--)
{
CRYPTOPP_ASSERT(0);
return 0;
}
}
}
if (size)
{
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
if (_rdrand32_step(&val))
#else
// Cast due to GCC, http://github.com/weidai11/cryptopp/issues/236
if (_rdrand64_step(reinterpret_cast<unsigned long long*>(&val)))
#endif
{
memcpy(output, &val, size);
size = 0;
}
else
{
if (!safety--)
{
CRYPTOPP_ASSERT(0);
return 0;
}
}
}
SecureWipeBuffer(&val, 1);
return int(size == 0);
}
#endif // ALL_RDRAND_INTRINSIC_AVAILABLE
#if GCC_RDRAND_ASM_AVAILABLE
static int GCC_RRA_GenerateBlock(byte *output, size_t size, unsigned int safety)
{
CRYPTOPP_ASSERT((output && size) || !(output || size));
#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32
word64 val;
#else
word32 val;
#endif
char rc;
while (size)
{
__asm__ volatile(
#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32
".byte 0x48, 0x0f, 0xc7, 0xf0;\n" // rdrand rax
#else
".byte 0x0f, 0xc7, 0xf0;\n" // rdrand eax
#endif
"setc %1; "
: "=a" (val), "=qm" (rc)
:
: "cc"
);
if (rc)
{
if (size >= sizeof(val))
{
PutWord(true, LITTLE_ENDIAN_ORDER, output, val, NULLPTR);
output += sizeof(val);
size -= sizeof(val);
}
else
{
memcpy(output, &val, size);
size = 0;
}
}
else
{
if (!safety--)
{
CRYPTOPP_ASSERT(0);
return 0;
}
}
}
SecureWipeBuffer(&val, 1);
return int(size == 0);
} }
#endif // GCC_RDRAND_ASM_AVAILABLE #if CRYPTOPP_BOOL_X64
// Fills 8 bytes
inline void RDRAND64(void* output)
{
#if defined(__SUNPRO_CC) && (__SUNPRO_CC >= 0x5100)
__asm__ __volatile__
(
".byte 0x48, 0x0f, 0xc7, 0xf0;\n"
".byte 0x73, 0xfa;\n"
: "=a" (*reinterpret_cast<word64*>(output))
: : "cc"
);
#elif defined(GCC_RDRAND_ASM_AVAILABLE)
__asm__ __volatile__
(
INTEL_NOPREFIX
ASL(1)
AS1(rdrand rax)
ASJ(jnc, 1, b)
ATT_NOPREFIX
: "=a" (*reinterpret_cast<word64*>(output))
: : "cc"
);
#elif defined(ALL_RDRAND_INTRIN_AVAILABLE)
while(!_rdrand64_step(reinterpret_cast<unsigned long long*>(output))) {}
#else
// RDRAND not detected at compile time, or no suitable compiler found
throw NotImplemented("RDRAND: failed to find an implementation");
#endif
}
#endif // CRYPTOPP_BOOL_X64 and RDRAND64
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
void RDRAND::GenerateBlock(byte *output, size_t size) void RDRAND::GenerateBlock(byte *output, size_t size)
{ {
CRYPTOPP_UNUSED(output), CRYPTOPP_UNUSED(size); CRYPTOPP_ASSERT((output && size) || !(output || size));
CRYPTOPP_ASSERT((output && size) || !(output || size)); if (size == 0) return;
if(!HasRDRAND()) #if defined(NASM_RDRAND_ASM_AVAILABLE)
throw NotImplemented("RDRAND: rdrand is not available on this platform");
int rc; CRYPTOPP_UNUSED(rc); NASM_RDRAND_GenerateBlock(output, size);
#if MASM_RDRAND_ASM_AVAILABLE
rc = MASM_RRA_GenerateBlock(output, size, m_retries); #elif defined(MASM_RDRAND_ASM_AVAILABLE)
if (!rc) { throw RDRAND_Err("MASM_RRA_GenerateBlock"); }
#elif NASM_RDRAND_ASM_AVAILABLE MASM_RDRAND_GenerateBlock(output, size);
rc = NASM_RRA_GenerateBlock(output, size, m_retries);
if (!rc) { throw RDRAND_Err("NASM_RRA_GenerateBlock"); } #elif CRYPTOPP_BOOL_X64
#elif ALL_RDRAND_INTRIN_AVAILABLE size_t i = 0;
rc = ALL_RRI_GenerateBlock(output, size, m_retries); for (i = 0; i < size/8; i++)
if (!rc) { throw RDRAND_Err("ALL_RRI_GenerateBlock"); } RDRAND64(reinterpret_cast<word64*>(output)+i);
#elif GCC_RDRAND_ASM_AVAILABLE
rc = GCC_RRA_GenerateBlock(output, size, m_retries); output += i*8;
if (!rc) { throw RDRAND_Err("GCC_RRA_GenerateBlock"); } size -= i*8;
if (size)
{
word64 val;
RDRAND64(&val);
std::memcpy(output, &val, size);
}
#elif (CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86)
size_t i = 0;
for (i = 0; i < size/4; i++)
RDRAND32(reinterpret_cast<word32*>(output)+i);
output += i*4;
size -= i*4;
if (size)
{
word32 val;
RDRAND32(&val);
std::memcpy(output, &val, size);
}
#else #else
// RDRAND not detected at compile time, and no suitable compiler found // RDRAND not detected at compile time, or no suitable compiler found
throw NotImplemented("RDRAND: failed to find a suitable implementation???"); throw NotImplemented("RDRAND: failed to find a suitable implementation");
#endif // CRYPTOPP_CPUID_AVAILABLE #endif
// Size is not 0
s_unused ^= output[0];
} }
void RDRAND::DiscardBytes(size_t n) void RDRAND::DiscardBytes(size_t n)
{ {
// RoundUpToMultipleOf is used because a full word is read, and its cheaper // RoundUpToMultipleOf is used because a full word is read, and its cheaper
// to discard full words. There's no sense in dealing with tail bytes. // to discard full words. There's no sense in dealing with tail bytes.
CRYPTOPP_ASSERT(HasRDRAND()); FixedSizeSecBlock<word64, 16> discard;
#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 n = RoundUpToMultipleOf(n, sizeof(word64));
FixedSizeSecBlock<word64, 16> discard;
n = RoundUpToMultipleOf(n, sizeof(word64));
#else
FixedSizeSecBlock<word32, 16> discard;
n = RoundUpToMultipleOf(n, sizeof(word32));
#endif
size_t count = STDMIN(n, discard.SizeInBytes()); size_t count = STDMIN(n, discard.SizeInBytes());
while (count) while (count)
{ {
GenerateBlock(discard.BytePtr(), count); GenerateBlock(discard.BytePtr(), count);
n -= count; n -= count;
count = STDMIN(n, discard.SizeInBytes()); count = STDMIN(n, discard.SizeInBytes());
} }
} }
#endif // CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
#if ALL_RDSEED_INTRIN_AVAILABLE // Fills 4 bytes
static int ALL_RSI_GenerateBlock(byte *output, size_t size, unsigned int safety) inline void RDSEED32(void* output)
{ {
CRYPTOPP_ASSERT((output && size) || !(output || size)); #if defined(__SUNPRO_CC)
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 __asm__ __volatile__
word32 val; (
".byte 0x0f, 0xc7, 0xf8;\n"
".byte 0x73, 0xfb;\n"
: "=a" (*reinterpret_cast<word32*>(output))
: : "cc"
);
#elif defined(GCC_RDSEED_ASM_AVAILABLE)
__asm__ __volatile__
(
INTEL_NOPREFIX
ASL(1)
AS1(rdseed eax)
ASJ(jnc, 1, b)
ATT_NOPREFIX
: "=a" (*reinterpret_cast<word32*>(output))
: : "cc"
);
#elif defined(ALL_RDSEED_INTRIN_AVAILABLE)
while(!_rdseed32_step(reinterpret_cast<unsigned long long*>(output))) {}
#else #else
word64 val; // RDSEED not detected at compile time, or no suitable compiler found
throw NotImplemented("RDSEED: failed to find an implementation");
#endif #endif
while (size >= sizeof(val))
{
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
if (_rdseed32_step((word32*)output))
#else
// Cast due to GCC, http://github.com/weidai11/cryptopp/issues/236
if (_rdseed64_step(reinterpret_cast<unsigned long long*>(output)))
#endif
{
output += sizeof(val);
size -= sizeof(val);
}
else
{
if (!safety--)
{
CRYPTOPP_ASSERT(0);
return 0;
}
}
}
if (size)
{
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
if (_rdseed32_step(&val))
#else
// Cast due to GCC, http://github.com/weidai11/cryptopp/issues/236
if (_rdseed64_step(reinterpret_cast<unsigned long long*>(&val)))
#endif
{
memcpy(output, &val, size);
size = 0;
}
else
{
if (!safety--)
{
CRYPTOPP_ASSERT(0);
return 0;
}
}
}
SecureWipeBuffer(&val, 1);
return int(size == 0);
} }
#endif // ALL_RDSEED_INTRIN_AVAILABLE
#if GCC_RDSEED_ASM_AVAILABLE // Fills 8 bytes
static int GCC_RSA_GenerateBlock(byte *output, size_t size, unsigned int safety) inline void RDSEED64(void* output)
{ {
CRYPTOPP_ASSERT((output && size) || !(output || size)); #if defined(__SUNPRO_CC) && (__SUNPRO_CC >= 0x5100)
#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 __asm__ __volatile__
word64 val; (
".byte 0x48, 0x0f, 0xc7, 0xf8;\n"
".byte 0x73, 0xfa;\n"
: "=a" (*reinterpret_cast<word64*>(output))
: : "cc"
);
#elif defined(GCC_RDSEED_ASM_AVAILABLE)
__asm__ __volatile__
(
INTEL_NOPREFIX
ASL(1)
AS1(rdseed rax)
ASJ(jnc, 1, b)
ATT_NOPREFIX
: "=a" (*reinterpret_cast<word64*>(output))
: : "cc"
);
#elif defined(ALL_RDSEED_INTRIN_AVAILABLE)
while(!_rdseed64_step(reinterpret_cast<unsigned long long*>(output))) {}
#else #else
word32 val; // RDSEED not detected at compile time, or no suitable compiler found
throw NotImplemented("RDSEED: failed to find an implementation");
#endif #endif
char rc;
while (size)
{
__asm__ volatile(
#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32
".byte 0x48, 0x0f, 0xc7, 0xf8;\n" // rdseed rax
#else
".byte 0x0f, 0xc7, 0xf8;\n" // rdseed eax
#endif
"setc %1; "
: "=a" (val), "=qm" (rc)
:
: "cc"
);
if (rc)
{
if (size >= sizeof(val))
{
PutWord(true, LITTLE_ENDIAN_ORDER, output, val, NULLPTR);
output += sizeof(val);
size -= sizeof(val);
}
else
{
memcpy(output, &val, size);
size = 0;
}
}
else
{
if (!safety--)
{
CRYPTOPP_ASSERT(0);
return 0;
}
}
}
SecureWipeBuffer(&val, 1);
return int(size == 0);
} }
#endif // GCC_RDSEED_ASM_AVAILABLE
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
void RDSEED::GenerateBlock(byte *output, size_t size) void RDSEED::GenerateBlock(byte *output, size_t size)
{ {
CRYPTOPP_UNUSED(output), CRYPTOPP_UNUSED(size); CRYPTOPP_ASSERT((output && size) || !(output || size));
CRYPTOPP_ASSERT((output && size) || !(output || size)); if (size == 0) return;
if(!HasRDSEED()) #if defined(NASM_RDSEED_ASM_AVAILABLE)
throw NotImplemented("RDSEED: rdseed is not available on this platform");
int rc; CRYPTOPP_UNUSED(rc); NASM_RDSEED_GenerateBlock(output, size);
#if MASM_RDSEED_ASM_AVAILABLE
rc = MASM_RSA_GenerateBlock(output, size, m_retries); #elif defined(MASM_RDSEED_ASM_AVAILABLE)
if (!rc) { throw RDSEED_Err("MASM_RSA_GenerateBlock"); }
#elif NASM_RDSEED_ASM_AVAILABLE MASM_RDSEED_GenerateBlock(output, size);
rc = NASM_RSA_GenerateBlock(output, size, m_retries);
if (!rc) { throw RDRAND_Err("NASM_RSA_GenerateBlock"); } #elif CRYPTOPP_BOOL_X64
#elif ALL_RDSEED_INTRIN_AVAILABLE size_t i = 0;
rc = ALL_RSI_GenerateBlock(output, size, m_retries); for (i = 0; i < size/8; i++)
if (!rc) { throw RDSEED_Err("ALL_RSI_GenerateBlock"); } RDSEED64(reinterpret_cast<word64*>(output)+i);
#elif GCC_RDSEED_ASM_AVAILABLE
rc = GCC_RSA_GenerateBlock(output, size, m_retries); output += i*8;
if (!rc) { throw RDSEED_Err("GCC_RSA_GenerateBlock"); } size -= i*8;
#else
// RDSEED not detected at compile time, and no suitable compiler found if (size)
throw NotImplemented("RDSEED: failed to find a suitable implementation???"); {
word64 val;
RDSEED64(&val);
std::memcpy(output, &val, size);
}
#elif (CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86)
size_t i = 0;
for (i = 0; i < size/4; i++)
RDSEED32(reinterpret_cast<word32*>(output)+i);
output += i*4;
size -= i*4;
if (size)
{
word32 val;
RDSEED32(&val);
std::memcpy(output, &val, size);
}
#endif #endif
// Size is not 0
s_unused ^= output[0];
} }
void RDSEED::DiscardBytes(size_t n) void RDSEED::DiscardBytes(size_t n)
{ {
// RoundUpToMultipleOf is used because a full word is read, and its cheaper // RoundUpToMultipleOf is used because a full word is read, and its cheaper
// to discard full words. There's no sense in dealing with tail bytes. // to discard full words. There's no sense in dealing with tail bytes.
CRYPTOPP_ASSERT(HasRDSEED()); FixedSizeSecBlock<word64, 16> discard;
#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 n = RoundUpToMultipleOf(n, sizeof(word64));
FixedSizeSecBlock<word64, 16> discard;
n = RoundUpToMultipleOf(n, sizeof(word64));
#else
FixedSizeSecBlock<word32, 16> discard;
n = RoundUpToMultipleOf(n, sizeof(word32));
#endif
size_t count = STDMIN(n, discard.SizeInBytes()); size_t count = STDMIN(n, discard.SizeInBytes());
while (count) while (count)
{ {
GenerateBlock(discard.BytePtr(), count); GenerateBlock(discard.BytePtr(), count);
n -= count; n -= count;
count = STDMIN(n, discard.SizeInBytes()); count = STDMIN(n, discard.SizeInBytes());
} }
} }
#endif // CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
NAMESPACE_END NAMESPACE_END

206
rdrand.h
View File

@ -9,16 +9,24 @@
#include "cryptlib.h" #include "cryptlib.h"
// This file (and friends) provides both RDRAND and RDSEED, but its somewhat // This file (and friends) provides both RDRAND and RDSEED. They were added at
// experimental. They were added at Crypto++ 5.6.3. At compile time, it // Crypto++ 5.6.3. At compile time, it uses CRYPTOPP_BOOL_{X86|X32|X64}
// indirectly uses CRYPTOPP_BOOL_{X86|X32|X64} (via CRYPTOPP_CPUID_AVAILABLE)
// to select an implementation or "throw NotImplemented". At runtime, the // to select an implementation or "throw NotImplemented". At runtime, the
// class uses the result of CPUID to determine if RDRAND or RDSEED are // class uses the result of CPUID to determine if RDRAND or RDSEED are
// available. If not available, a lazy throw strategy is used. I.e., the // available. If not available, then a SIGILL will result.
// throw is deferred until GenerateBlock() is called. // The original classes accepted a retry count. Retries were superflous for
// RDRAND, and RDSEED encountered a failure about 1 in 256 bytes depending
// on the processor. Retries were removed at Crypto++ 6.0 because the
// functions always fulfill the request.
// Microsoft added RDRAND in August 2012, VS2012. GCC added RDRAND in December 2010, GCC 4.6. // Throughput varies wildly depending on processor and manufacturer. A Core i5 or
// Clang added RDRAND in July 2012, Clang 3.2. Intel added RDRAND in September 2011, ICC 12.1. // Core i7 RDRAND can generate at over 200 MiB/s. A low end Celeron may perform
// RDRAND at 7 MiB/s. RDSEED performs at about 1/4 to 1/2 the rate of RDRAND.
// AMD RDRAND performed poorly during testing with Athlon X4 845 (Bulldozer v4).
// Microsoft added RDRAND in August 2012, VS2012; RDSEED in October 2013, VS2013.
// GCC added RDRAND in December 2010, GCC 4.6. LLVM added RDRAND in July 2012, Clang 3.2.
// Intel added RDRAND in September 2011, ICC 12.1.
NAMESPACE_BEGIN(CryptoPP) NAMESPACE_BEGIN(CryptoPP)
@ -28,8 +36,8 @@ NAMESPACE_BEGIN(CryptoPP)
class RDRAND_Err : public Exception class RDRAND_Err : public Exception
{ {
public: public:
RDRAND_Err(const std::string &operation) RDRAND_Err(const std::string &operation)
: Exception(OTHER_ERROR, "RDRAND: " + operation + " operation failed") {} : Exception(OTHER_ERROR, "RDRAND: " + operation + " operation failed") {}
}; };
//! \brief Hardware generated random numbers using RDRAND instruction //! \brief Hardware generated random numbers using RDRAND instruction
@ -38,73 +46,39 @@ public:
class RDRAND : public RandomNumberGenerator class RDRAND : public RandomNumberGenerator
{ {
public: public:
virtual ~RDRAND() {} CRYPTOPP_STATIC_CONSTEXPR const char* StaticAlgorithmName() { return "RDRAND"; }
std::string AlgorithmName() const {return "RDRAND";} virtual ~RDRAND() {}
//! \brief Construct a RDRAND generator //! \brief Construct a RDRAND generator
//! \param retries the number of retries for failed calls to the hardware //! \param retries the number of retries for failed calls to the hardware
//! \details RDRAND() constructs a generator with a maximum number of retires //! \details According to DJ of Intel, the Intel RDRAND circuit does not underflow.
//! for failed generation attempts. //! If it did hypothetically underflow, then it would return 0 for the random value.
//! \details According to DJ of Intel, the Intel RDRAND circuit does not underflow. //! AMD's RDRAND implementation appears to provide the same behavior except the
//! If it did hypothetically underflow, then it would return 0 for the random value. //! values are not generated consistent with FIPS 140.
//! AMD's RDRAND implementation appears to provide the same behavior except the RDRAND() {}
//! values are not generated consistent with FIPS 140.
RDRAND(unsigned int retries = 4) : m_retries(retries) {}
//! \brief Retrieve the number of retries used by the generator //! \brief Generate random array of bytes
//! \returns the number of times GenerateBlock() will attempt to recover from a failed generation //! \param output the byte buffer
unsigned int GetRetries() const //! \param size the length of the buffer, in bytes
{ virtual void GenerateBlock(byte *output, size_t size);
return m_retries;
}
//! \brief Set the number of retries used by the generator //! \brief Generate and discard n bytes
//! \param retries number of times GenerateBlock() will attempt to recover from a failed generation //! \param n the number of bytes to generate and discard
void SetRetries(unsigned int retries) //! \details the RDSEED generator discards words, not bytes. If n is
{ //! not a multiple of a machine word, then it is rounded up to
m_retries = retries; //! that size.
} virtual void DiscardBytes(size_t n);
//! \brief Generate random array of bytes //! \brief Update RNG state with additional unpredictable values
//! \param output the byte buffer //! \param input unused
//! \param size the length of the buffer, in bytes //! \param length unused
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) //! \details The operation is a nop for this generator.
virtual void GenerateBlock(byte *output, size_t size); virtual void IncorporateEntropy(const byte *input, size_t length)
#else {
virtual void GenerateBlock(byte *output, size_t size) { // Override to avoid the base class' throw.
CRYPTOPP_UNUSED(output), CRYPTOPP_UNUSED(size); CRYPTOPP_UNUSED(input); CRYPTOPP_UNUSED(length);
throw NotImplemented("RDRAND: rdrand is not available on this platform"); }
}
#endif
//! \brief Generate and discard n bytes
//! \param n the number of bytes to generate and discard
//! \details the RDSEED generator discards words, not bytes. If n is
//! not a multiple of a machine word, then it is rounded up to
//! that size.
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
virtual void DiscardBytes(size_t n);
#else
virtual void DiscardBytes(size_t n) {
CRYPTOPP_UNUSED(n);
throw NotImplemented("RDRAND: rdrand is not available on this platform");
}
#endif
//! \brief Update RNG state with additional unpredictable values
//! \param input unused
//! \param length unused
//! \details The operation is a nop for this generator.
virtual void IncorporateEntropy(const byte *input, size_t length)
{
// Override to avoid the base class' throw.
CRYPTOPP_UNUSED(input); CRYPTOPP_UNUSED(length);
// CRYPTOPP_ASSERT(0); // warn in debug builds
}
private:
unsigned int m_retries;
}; };
//! \brief Exception thrown when a RDSEED generator encounters //! \brief Exception thrown when a RDSEED generator encounters
@ -113,8 +87,8 @@ private:
class RDSEED_Err : public Exception class RDSEED_Err : public Exception
{ {
public: public:
RDSEED_Err(const std::string &operation) RDSEED_Err(const std::string &operation)
: Exception(OTHER_ERROR, "RDSEED: " + operation + " operation failed") {} : Exception(OTHER_ERROR, "RDSEED: " + operation + " operation failed") {}
}; };
//! \brief Hardware generated random numbers using RDSEED instruction //! \brief Hardware generated random numbers using RDSEED instruction
@ -123,72 +97,40 @@ public:
class RDSEED : public RandomNumberGenerator class RDSEED : public RandomNumberGenerator
{ {
public: public:
virtual ~RDSEED() {} CRYPTOPP_STATIC_CONSTEXPR const char* StaticAlgorithmName() { return "RDSEED"; }
std::string AlgorithmName() const {return "RDSEED";} virtual ~RDSEED() {}
//! \brief Construct a RDSEED generator //! \brief Construct a RDSEED generator
//! \param retries the number of retries for failed calls to the hardware //! \details Empirical testing under a 6th generaton i7 (6200U) shows RDSEED fails
//! \details RDSEED() constructs a generator with a maximum number of retires //! to fulfill requests at about once every for every 256 bytes requested.
//! for failed generation attempts. //! The generator runs about 4 times slower than RDRAND.
//! \details Empirical testing under a 6th generaton i7 (6200U) shows RDSEED fails RDSEED() {}
//! to fulfill requests at about once every for every 256 bytes requested.
//! The default retries reflects the expected ceiling when requesting 10,000 bytes.
RDSEED(unsigned int retries = 64) : m_retries(retries) {}
//! \brief Retrieve the number of retries used by the generator //! \brief Generate random array of bytes
//! \returns the number of times GenerateBlock() will attempt to recover from a failed generation //! \param output the byte buffer
unsigned int GetRetries() const //! \param size the length of the buffer, in bytes
{ virtual void GenerateBlock(byte *output, size_t size);
return m_retries;
}
//! \brief Set the number of retries used by the generator //! \brief Generate and discard n bytes
//! \param retries number of times GenerateBlock() will attempt to recover from a failed generation //! \param n the number of bytes to generate and discard
void SetRetries(unsigned int retries) //! \details the RDSEED generator discards words, not bytes. If n is
{ //! not a multiple of a machine word, then it is rounded up to
m_retries = retries; //! that size.
} virtual void DiscardBytes(size_t n);
//! \brief Generate random array of bytes //! \brief Update RNG state with additional unpredictable values
//! \param output the byte buffer //! \param input unused
//! \param size the length of the buffer, in bytes //! \param length unused
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) //! \details The operation is a nop for this generator.
virtual void GenerateBlock(byte *output, size_t size); virtual void IncorporateEntropy(const byte *input, size_t length)
#else {
virtual void GenerateBlock(byte *output, size_t size) { // Override to avoid the base class' throw.
CRYPTOPP_UNUSED(output), CRYPTOPP_UNUSED(size); CRYPTOPP_UNUSED(input); CRYPTOPP_UNUSED(length);
throw NotImplemented("RDSEED: rdseed is not available on this platform"); }
}
#endif
//! \brief Generate and discard n bytes
//! \param n the number of bytes to generate and discard
//! \details the RDSEED generator discards words, not bytes. If n is
//! not a multiple of a machine word, then it is rounded up to
//! that size.
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
virtual void DiscardBytes(size_t n);
#else
virtual void DiscardBytes(size_t n) {
CRYPTOPP_UNUSED(n);
throw NotImplemented("RDSEED: rdseed is not available on this platform");
}
#endif
//! \brief Update RNG state with additional unpredictable values
//! \param input unused
//! \param length unused
//! \details The operation is a nop for this generator.
virtual void IncorporateEntropy(const byte *input, size_t length)
{
// Override to avoid the base class' throw.
CRYPTOPP_UNUSED(input); CRYPTOPP_UNUSED(length);
// CRYPTOPP_ASSERT(0); // warn in debug builds
}
private: private:
unsigned int m_retries; unsigned int m_retries;
}; };
NAMESPACE_END NAMESPACE_END

View File

@ -679,7 +679,6 @@ bool TestRDRAND()
// Squash code coverage warnings on unused functions // Squash code coverage warnings on unused functions
(void)rdrand.AlgorithmName(); (void)rdrand.AlgorithmName();
(void)rdrand.CanIncorporateEntropy(); (void)rdrand.CanIncorporateEntropy();
rdrand.SetRetries(rdrand.GetRetries());
rdrand.IncorporateEntropy(NULLPTR, 0); rdrand.IncorporateEntropy(NULLPTR, 0);
if (!(entropy && compress && discard)) if (!(entropy && compress && discard))
@ -694,7 +693,7 @@ bool TestRDSEED()
{ {
// Testing on 5th generation i5 shows RDSEED needs about 128 retries for 10K bytes // Testing on 5th generation i5 shows RDSEED needs about 128 retries for 10K bytes
// on 64-bit/amd64 VM, and it needs more for an 32-bit/i686 VM. // on 64-bit/amd64 VM, and it needs more for an 32-bit/i686 VM.
RDSEED rdseed(256); RDSEED rdseed;
bool entropy = true, compress = true, discard = true; bool entropy = true, compress = true, discard = true;
static const unsigned int SIZE = 10000; static const unsigned int SIZE = 10000;
@ -758,7 +757,6 @@ bool TestRDSEED()
// Squash code coverage warnings on unused functions // Squash code coverage warnings on unused functions
(void)rdseed.AlgorithmName(); (void)rdseed.AlgorithmName();
(void)rdseed.CanIncorporateEntropy(); (void)rdseed.CanIncorporateEntropy();
rdseed.SetRetries(rdseed.GetRetries());
rdseed.IncorporateEntropy(NULLPTR, 0); rdseed.IncorporateEntropy(NULLPTR, 0);
if (!(entropy && compress && discard)) if (!(entropy && compress && discard))
@ -1410,8 +1408,9 @@ bool TestModeIV(SymmetricCipher &e, SymmetricCipher &d)
SecByteBlock lastIV, iv(e.IVSize()); SecByteBlock lastIV, iv(e.IVSize());
StreamTransformationFilter filter(e, new StreamTransformationFilter(d)); StreamTransformationFilter filter(e, new StreamTransformationFilter(d));
// vector_ptr<byte> due to Enterprise Analysis finding on the stack based array. // Enterprise Analysis finding on the stack based array
vector_ptr<byte> plaintext(20480); const int BUF_SIZE=20480U;
AlignedSecByteBlock plaintext(BUF_SIZE);
for (unsigned int i=1; i<20480; i*=2) for (unsigned int i=1; i<20480; i*=2)
{ {