From 14d92f9eba81191e3537ac14ff21d84ef7405e9c Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Tue, 7 Mar 2017 03:57:23 -0500 Subject: [PATCH] Improve performance of RDRAND and RDSEED (Issue 387) --- rdrand.S | 550 +++++++++++++++++------------------------------ rdrand.asm | 489 +++++++++++++++--------------------------- rdrand.cpp | 595 +++++++++++++++++++++------------------------------ rdrand.h | 206 +++++++----------- validat1.cpp | 9 +- 5 files changed, 695 insertions(+), 1154 deletions(-) diff --git a/rdrand.S b/rdrand.S index 4a8f387a..ded318af 100644 --- a/rdrand.S +++ b/rdrand.S @@ -12,31 +12,10 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Naming convention used in rdrand.{h|cpp|asm|S} -;; MSC = Microsoft Compiler (and compatibles) -;; GCC = GNU Compiler (and compatibles) -;; ALL = MSC and GCC (and compatibles) -;; RRA = RDRAND, Assembly -;; RSA = RDSEED, Assembly -;; RRI = RDRAND, Intrinsic -;; RSA = RDSEED, Intrinsic - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - ;; C/C++ Function prototypes ;; X86, X32 and X64: -;; extern "C" int NASM_RRA_GenerateBlock(byte* ptr, size_t size, unsigned int safety); - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Return values -%define RDRAND_SUCCESS 1 -%define RDRAND_FAILURE 0 - -%define RDSEED_SUCCESS 1 -%define RDSEED_FAILURE 0 +;; extern "C" void NASM_RDRAND_GenerateBlock(byte* ptr, size_t size); +;; extern "C" void NASM_RDSEED_GenerateBlock(byte* ptr, size_t size); ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -45,275 +24,211 @@ ;; Arg1, byte* buffer ;; Arg2, size_t bsize -;; Arg3, unsigned int safety -;; EAX (out): success (1), failure (0) -global NASM_RRA_GenerateBlock -section .text +global NASM_RDRAND_GenerateBlock +section .text %ifdef X86 -align 8 -cpu 486 +align 8 +cpu 486 %else -align 16 +align 16 %endif -NASM_RRA_GenerateBlock: +NASM_RDRAND_GenerateBlock: %ifdef X86 %define arg1 [ebp+04h] %define arg2 [ebp+08h] -%define arg3 [ebp+0ch] %define MWSIZE 04h ;; machine word size %else %define MWSIZE 08h ;; machine word size %endif - %define buffer edi - %define bsize esi - %define safety edx +%define buffer edi +%define bsize esi %ifdef X86 .Load_Arguments: - mov buffer, arg1 - mov bsize, arg2 - mov safety, arg3 + mov buffer, arg1 + mov bsize, arg2 %endif -.Validate_Pointer: - - cmp buffer, 0 - je .GenerateBlock_PreRet - - ;; Top of While loop + ;; Top of While loop .GenerateBlock_Top: - ;; Check remaining size - cmp bsize, 0 - je .GenerateBlock_Success + ;; Check remaining size + cmp bsize, 0 + je .GenerateBlock_Return +.Call_RDRAND: %ifdef X86 .Call_RDRAND_EAX: %else .Call_RDRAND_RAX: - DB 48h ;; X32 can use the full register, issue the REX.w prefix + DB 48h ;; X32 can use the full register, issue the REX.w prefix %endif - ;; RDRAND is not available prior to VS2012. Just emit - ;; the byte codes using DB. This is `rdrand eax`. - DB 0Fh, 07h, 0F0h + ;; RDRAND is not available prior to VS2012. Just emit + ;; the byte codes using DB. This is `rdrand eax`. + DB 0Fh, 07h, 0F0h - ;; If CF=1, the number returned by RDRAND is valid. - ;; If CF=0, a random number was not available. - jc .RDRAND_succeeded - -.RDRAND_failed: - - ;; Exit if we've reached the limit - cmp safety, 0 - je .GenerateBlock_Failure - - dec safety - jmp .GenerateBlock_Top + ;; If CF=1, the number returned by RDRAND is valid. + ;; If CF=0, a random number was not available. + jnc .Call_RDRAND .RDRAND_succeeded: - cmp bsize, MWSIZE - jb .Partial_Machine_Word + cmp bsize, MWSIZE + jb .Partial_Machine_Word .Full_Machine_Word: %ifdef X32 - mov [buffer+4], eax ;; We can only move 4 at a time - DB 048h ;; Combined, these result in - shr eax, 32 ;; `shr rax, 32` + mov [buffer+4], eax ;; We can only move 4 at a time + DB 048h ;; Combined, these result in + shr eax, 32 ;; `shr rax, 32` %endif - mov [buffer], eax - add buffer, MWSIZE ;; No need for Intel Core 2 slow word workarounds, - sub bsize, MWSIZE ;; like `lea buffer,[buffer+MWSIZE]` for faster adds + mov [buffer], eax + add buffer, MWSIZE ;; No need for Intel Core 2 slow word workarounds, + sub bsize, MWSIZE ;; like `lea buffer,[buffer+MWSIZE]` for faster adds - ;; Continue - jmp .GenerateBlock_Top + ;; Continue + jmp .GenerateBlock_Top - ;; 1,2,3 bytes remain for X86 - ;; 1,2,3,4,5,6,7 remain for X32 + ;; 1,2,3 bytes remain for X86 + ;; 1,2,3,4,5,6,7 remain for X32 .Partial_Machine_Word: %ifdef X32 - ;; Test bit 2 to see if size is at least 4 - test bsize, 4 - jz .Bit_2_Not_Set + ;; Test bit 2 to see if size is at least 4 + test bsize, 4 + jz .Bit_2_Not_Set - mov [buffer], eax - add buffer, 4 + mov [buffer], eax + add buffer, 4 - DB 048h ;; Combined, these result in - shr eax, 32 ;; `shr rax, 32` + DB 048h ;; Combined, these result in + shr eax, 32 ;; `shr rax, 32` .Bit_2_Not_Set: %endif - ;; Test bit 1 to see if size is at least 2 - test bsize, 2 - jz .Bit_1_Not_Set + ;; Test bit 1 to see if size is at least 2 + test bsize, 2 + jz .Bit_1_Not_Set - mov [buffer], ax - shr eax, 16 - add buffer, 2 + mov [buffer], ax + shr eax, 16 + add buffer, 2 .Bit_1_Not_Set: - ;; Test bit 0 to see if size is at least 1 - test bsize, 1 - jz .GenerateBlock_Success + ;; Test bit 0 to see if size is at least 1 + test bsize, 1 + jz .Bit_0_Not_Set - mov [buffer], al + mov [buffer], al .Bit_0_Not_Set: - ;; We've hit all the bits - jmp .GenerateBlock_Success + ;; We've hit all the bits -.GenerateBlock_PreRet: +.GenerateBlock_Return: - ;; Test for success (was the request completely fulfilled?) - cmp bsize, 0 - je .GenerateBlock_Success + xor eax, eax + ret -.GenerateBlock_Failure: - - xor eax, eax - mov al, RDRAND_FAILURE - ret - -.GenerateBlock_Success: - - xor eax, eax - mov al, RDRAND_SUCCESS - ret - -%endif ;; X86 and X32 +%endif ;; X86 and X32 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -%ifdef X64 ;; Set via the command line +%ifdef X64 ;; Set via the command line -global NASM_RRA_GenerateBlock -section .text -align 16 +global NASM_RDRAND_GenerateBlock +section .text +align 16 ;; Arg1, byte* buffer ;; Arg2, size_t bsize -;; Arg3, unsigned int safety -;; RAX (out): success (1), failure (0) -NASM_RRA_GenerateBlock: +NASM_RDRAND_GenerateBlock: %define MWSIZE 08h ;; machine word size %define buffer rdi %define bsize rsi -%define safety edx - ;; No need for Load_Arguments due to fastcall + ;; No need for Load_Arguments due to fastcall -.Validate_Pointer: - - ;; Validate pointer - cmp buffer, 0 - je .GenerateBlock_PreRet - - ;; Top of While loop + ;; Top of While loop .GenerateBlock_Top: - ;; Check remaining size - cmp bsize, 0 - je .GenerateBlock_Success + ;; Check remaining size + cmp bsize, 0 + je .GenerateBlock_Return .Call_RDRAND_RAX: - ;; RDRAND is not available prior to VS2012. Just emit - ;; the byte codes using DB. This is `rdrand rax`. - DB 048h, 0Fh, 0C7h, 0F0h + ;; RDRAND is not available prior to VS2012. Just emit + ;; the byte codes using DB. This is `rdrand rax`. + DB 048h, 0Fh, 0C7h, 0F0h - ;; If CF=1, the number returned by RDRAND is valid. - ;; If CF=0, a random number was not available. - jc .RDRAND_succeeded - -.RDRAND_failed: - - ;; Exit if we've reached the limit - cmp safety, 0h - je .GenerateBlock_Failure - - dec safety - jmp .GenerateBlock_Top + ;; If CF=1, the number returned by RDRAND is valid. + ;; If CF=0, a random number was not available. + jnc .Call_RDRAND_RAX .RDRAND_succeeded: - cmp bsize, MWSIZE - jb .Partial_Machine_Word + cmp bsize, MWSIZE + jb .Partial_Machine_Word .Full_Machine_Word: - mov [buffer], rax - add buffer, MWSIZE - sub bsize, MWSIZE + mov [buffer], rax + add buffer, MWSIZE + sub bsize, MWSIZE - ;; Continue - jmp .GenerateBlock_Top + ;; Continue + jmp .GenerateBlock_Top - ;; 1,2,3,4,5,6,7 bytes remain + ;; 1,2,3,4,5,6,7 bytes remain .Partial_Machine_Word: - ;; Test bit 2 to see if size is at least 4 - test bsize, 4 - jz .Bit_2_Not_Set + ;; Test bit 2 to see if size is at least 4 + test bsize, 4 + jz .Bit_2_Not_Set - mov [buffer], eax - shr rax, 32 - add buffer, 4 + mov [buffer], eax + shr rax, 32 + add buffer, 4 .Bit_2_Not_Set: - ;; Test bit 1 to see if size is at least 2 - test bsize, 2 - jz .Bit_1_Not_Set + ;; Test bit 1 to see if size is at least 2 + test bsize, 2 + jz .Bit_1_Not_Set - mov [buffer], ax - shr eax, 16 - add buffer, 2 + mov [buffer], ax + shr eax, 16 + add buffer, 2 .Bit_1_Not_Set: - ;; Test bit 0 to see if size is at least 1 - test bsize, 1 - jz .GenerateBlock_Success + ;; Test bit 0 to see if size is at least 1 + test bsize, 1 + jz .Bit_0_Not_Set - mov [buffer], al + mov [buffer], al .Bit_0_Not_Set: - ;; We've hit all the bits - jmp .GenerateBlock_Success + ;; We've hit all the bits -.GenerateBlock_PreRet: +.GenerateBlock_Return: - ;; Test for success (was the request completely fulfilled?) - cmp bsize, 0 - je .GenerateBlock_Success - -.GenerateBlock_Failure: - - xor rax, rax - mov al, RDRAND_FAILURE - ret - -.GenerateBlock_Success: - - xor rax, rax - mov al, RDRAND_SUCCESS - ret + xor rax, rax + ret %endif ;; X64 @@ -324,273 +239,208 @@ NASM_RRA_GenerateBlock: ;; Arg1, byte* buffer ;; Arg2, size_t bsize -;; Arg3, unsigned int safety -;; EAX (out): success (1), failure (0) -global NASM_RSA_GenerateBlock -section .text -align 8 +global NASM_RDSEED_GenerateBlock +section .text +align 8 %ifdef X86 -align 8 -cpu 486 +align 8 +cpu 486 %else -align 16 +align 16 %endif -NASM_RSA_GenerateBlock: +NASM_RDSEED_GenerateBlock: %ifdef X86 %define arg1 [ebp+04h] %define arg2 [ebp+08h] -%define arg3 [ebp+0ch] %define MWSIZE 04h ;; machine word size %else %define MWSIZE 08h ;; machine word size %endif - %define buffer edi - %define bsize esi - %define safety edx +%define buffer edi +%define bsize esi %ifdef X86 .Load_Arguments: - mov buffer, arg1 - mov bsize, arg2 - mov safety, arg3 + mov buffer, arg1 + mov bsize, arg2 %endif -.Validate_Pointer: - - cmp buffer, 0 - je .GenerateBlock_PreRet - - ;; Top of While loop + ;; Top of While loop .GenerateBlock_Top: - ;; Check remaining size - cmp bsize, 0 - je .GenerateBlock_Success + ;; Check remaining size + cmp bsize, 0 + je .GenerateBlock_Return +.Call_RDSEED: %ifdef X86 .Call_RDSEED_EAX: %else .Call_RDSEED_RAX: - DB 48h ;; X32 can use the full register, issue the REX.w prefix + DB 48h ;; X32 can use the full register, issue the REX.w prefix %endif - ;; RDSEED is not available prior to VS2012. Just emit - ;; the byte codes using DB. This is `rdseed eax`. - DB 0Fh, 0C7h, 0F8h + ;; RDSEED is not available prior to VS2012. Just emit + ;; the byte codes using DB. This is `rdseed eax`. + DB 0Fh, 0C7h, 0F8h - ;; If CF=1, the number returned by RDSEED is valid. - ;; If CF=0, a random number was not available. - jc .RDSEED_succeeded - -.RDSEED_failed: - - ;; Exit if we've reached the limit - cmp safety, 0 - je .GenerateBlock_Failure - - dec safety - jmp .GenerateBlock_Top + ;; If CF=1, the number returned by RDSEED is valid. + ;; If CF=0, a random number was not available. + jnc .Call_RDSEED .RDSEED_succeeded: - cmp bsize, MWSIZE - jb .Partial_Machine_Word + cmp bsize, MWSIZE + jb .Partial_Machine_Word .Full_Machine_Word: - mov [buffer], eax - add buffer, MWSIZE ;; No need for Intel Core 2 slow word workarounds, - sub bsize, MWSIZE ;; like `lea buffer,[buffer+MWSIZE]` for faster adds + mov [buffer], eax + add buffer, MWSIZE ;; No need for Intel Core 2 slow word workarounds, + sub bsize, MWSIZE ;; like `lea buffer,[buffer+MWSIZE]` for faster adds - ;; Continue - jmp .GenerateBlock_Top + ;; Continue + jmp .GenerateBlock_Top - ;; 1,2,3 bytes remain for X86 - ;; 1,2,3,4,5,6,7 remain for X32 + ;; 1,2,3 bytes remain for X86 + ;; 1,2,3,4,5,6,7 remain for X32 .Partial_Machine_Word: %ifdef X32 - ;; Test bit 2 to see if size is at least 4 - test bsize, 4 - jz .Bit_2_Not_Set + ;; Test bit 2 to see if size is at least 4 + test bsize, 4 + jz .Bit_2_Not_Set - mov [buffer], eax - add buffer, 4 + mov [buffer], eax + add buffer, 4 - DB 048h ;; Combined, these result in - shr eax, 32 ;; `shr rax, 32` + DB 048h ;; Combined, these result in + shr eax, 32 ;; `shr rax, 32` .Bit_2_Not_Set: %endif - ;; Test bit 1 to see if size is at least 2 - test bsize, 2 - jz .Bit_1_Not_Set + ;; Test bit 1 to see if size is at least 2 + test bsize, 2 + jz .Bit_1_Not_Set - mov [buffer], ax - shr eax, 16 - add buffer, 2 + mov [buffer], ax + shr eax, 16 + add buffer, 2 .Bit_1_Not_Set: - ;; Test bit 0 to see if size is at least 1 - test bsize, 1 - jz .GenerateBlock_Success + ;; Test bit 0 to see if size is at least 1 + test bsize, 1 + jz .Bit_0_Not_Set - mov [buffer], al + mov [buffer], al .Bit_0_Not_Set: - ;; We've hit all the bits - jmp .GenerateBlock_Success + ;; We've hit all the bits -.GenerateBlock_PreRet: +.GenerateBlock_Return: - ;; Test for success (was the request completely fulfilled?) - cmp bsize, 0 - je .GenerateBlock_Success - -.GenerateBlock_Failure: - - xor eax, eax - mov al, RDSEED_FAILURE - ret - -.GenerateBlock_Success: - - xor eax, eax - mov al, RDSEED_SUCCESS - ret + xor eax, eax + ret %endif ;; X86 and X32 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -%ifdef X64 ;; Set via the command line +%ifdef X64 ;; Set via the command line -global NASM_RSA_GenerateBlock -section .text -align 16 +global NASM_RDSEED_GenerateBlock +section .text +align 16 ;; Arg1, byte* buffer ;; Arg2, size_t bsize -;; Arg3, unsigned int safety -;; RAX (out): success (1), failure (0) -NASM_RSA_GenerateBlock: +NASM_RDSEED_GenerateBlock: %define MWSIZE 08h ;; machine word size %define buffer rdi %define bsize rsi -%define safety edx - ;; No need for Load_Arguments due to fastcall + ;; No need for Load_Arguments due to fastcall -.Validate_Pointer: - - ;; Validate pointer - cmp buffer, 0 - je .GenerateBlock_PreRet - - ;; Top of While loop + ;; Top of While loop .GenerateBlock_Top: - ;; Check remaining size - cmp bsize, 0 - je .GenerateBlock_Success + ;; Check remaining size + cmp bsize, 0 + je .GenerateBlock_Return .Call_RDSEED_RAX: - ;; RDSEED is not available prior to VS2012. Just emit - ;; the byte codes using DB. This is `rdseed rax`. - DB 048h, 0Fh, 0C7h, 0F8h + ;; RDSEED is not available prior to VS2012. Just emit + ;; the byte codes using DB. This is `rdseed rax`. + DB 048h, 0Fh, 0C7h, 0F8h - ;; If CF=1, the number returned by RDSEED is valid. - ;; If CF=0, a random number was not available. - jc .RDSEED_succeeded - -.RDSEED_failed: - - ;; Exit if we've reached the limit - cmp safety, 0 - je .GenerateBlock_Failure - - dec safety - jmp .GenerateBlock_Top + ;; If CF=1, the number returned by RDSEED is valid. + ;; If CF=0, a random number was not available. + jnc .Call_RDSEED_RAX .RDSEED_succeeded: - cmp bsize, MWSIZE - jb .Partial_Machine_Word + cmp bsize, MWSIZE + jb .Partial_Machine_Word .Full_Machine_Word: - mov [buffer], rax - add buffer, MWSIZE - sub bsize, MWSIZE + mov [buffer], rax + add buffer, MWSIZE + sub bsize, MWSIZE - ;; Continue - jmp .GenerateBlock_Top + ;; Continue + jmp .GenerateBlock_Top - ;; 1,2,3,4,5,6,7 bytes remain + ;; 1,2,3,4,5,6,7 bytes remain .Partial_Machine_Word: - ;; Test bit 2 to see if size is at least 4 - test bsize, 4 - jz .Bit_2_Not_Set + ;; Test bit 2 to see if size is at least 4 + test bsize, 4 + jz .Bit_2_Not_Set - mov [buffer], eax - shr rax, 32 - add buffer, 4 + mov [buffer], eax + shr rax, 32 + add buffer, 4 .Bit_2_Not_Set: - ;; Test bit 1 to see if size is at least 2 - test bsize, 2 - jz .Bit_1_Not_Set + ;; Test bit 1 to see if size is at least 2 + test bsize, 2 + jz .Bit_1_Not_Set - mov [buffer], ax - shr eax, 16 - add buffer, 2 + mov [buffer], ax + shr eax, 16 + add buffer, 2 .Bit_1_Not_Set: - ;; Test bit 0 to see if size is at least 1 - test bsize, 1 - jz .GenerateBlock_Success + ;; Test bit 0 to see if size is at least 1 + test bsize, 1 + jz .Bit_0_Not_Set - mov [buffer], al + mov [buffer], al .Bit_0_Not_Set: - ;; We've hit all the bits - jmp .GenerateBlock_Success + ;; We've hit all the bits -.GenerateBlock_PreRet: +.GenerateBlock_Return: - ;; Test for success (was the request completely fulfilled?) - cmp bsize, 0 - je .GenerateBlock_Success + xor rax, rax + ret -.GenerateBlock_Failure: - - xor rax, rax - mov al, RDSEED_FAILURE - ret - -.GenerateBlock_Success: - - xor rax, rax - mov al, RDSEED_SUCCESS - ret - -%endif ;; _M_X64 +%endif ;; _M_X64 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - diff --git a/rdrand.asm b/rdrand.asm index c181a4dc..362e1011 100644 --- a/rdrand.asm +++ b/rdrand.asm @@ -13,24 +13,15 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -TITLE MASM_RRA_GenerateBlock and MASM_RSA_GenerateBlock +TITLE MASM_RDRAND_GenerateBlock and MASM_RDSEED_GenerateBlock SUBTITLE Microsoft specific ASM code to utilize RDRAND and RDSEED for down level Microsoft toolchains -PUBLIC MASM_RRA_GenerateBlock -PUBLIC MASM_RSA_GenerateBlock +PUBLIC MASM_RDRAND_GenerateBlock +PUBLIC MASM_RDSEED_GenerateBlock ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Naming convention used in rdrand.{h|cpp|asm} -;; MSC = Microsoft Compiler (and compatibles) -;; GCC = GNU Compiler (and compatibles) -;; ALL = MSC and GCC (and compatibles) -;; RRA = RDRAND, Assembly -;; RSA = RDSEED, Assembly -;; RRI = RDRAND, Intrinsic -;; RSA = RDSEED, Intrinsic - ;; Caller/Callee Saved Registers ;; https://msdn.microsoft.com/en-us/library/6t169e9c.aspx @@ -39,19 +30,9 @@ PUBLIC MASM_RSA_GenerateBlock ;; C/C++ Function prototypes ;; X86: -;; extern "C" int MASM_RRA_GenerateBlock(byte* ptr, size_t size, unsigned int safety); +;; extern "C" void MASM_RDRAND_GenerateBlock(byte* ptr, size_t size); ;; X64: -;; extern "C" int __fastcall MASM_RRA_GenerateBlock(byte* ptr, size_t size, unsigned int safety); - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Return values -RDRAND_SUCCESS EQU 1 -RDRAND_FAILURE EQU 0 - -RDSEED_SUCCESS EQU 1 -RDSEED_FAILURE EQU 0 +;; extern "C" void __fastcall MASM_RDRAND_GenerateBlock(byte* ptr, size_t size); ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -77,109 +58,80 @@ OPTION EPILOGUE:NONE ;; Caller pushes on stack following CDECL (right to left) ;; arg1: byte* buffer ;; arg2: size_t bsize -;; arg3: unsigned int safety -;; EAX (out): success (1), failure (0) -MASM_RRA_GenerateBlock PROC ;; arg1:DWORD, arg2:DWORD, arg3:DWORD +MASM_RDRAND_GenerateBlock PROC ;; arg1:DWORD, arg2:DWORD - MWSIZE EQU 04h ;; machine word size - buffer EQU edi - bsize EQU edx - safety EQU ecx + MWSIZE EQU 04h ;; machine word size + buffer EQU edi + bsize EQU edx Load_Arguments: - mov buffer, DWORD PTR [esp+04h] ;; arg1 - mov bsize, DWORD PTR [esp+08h] ;; arg2 - mov safety, DWORD PTR [esp+0Ch] ;; arg3 + mov buffer, DWORD PTR [esp+04h] ;; arg1 + mov bsize, DWORD PTR [esp+08h] ;; arg2 -Validate_Pointer: - - cmp buffer, 0 - je GenerateBlock_PreRet - - ;; Top of While loop + ;; Top of While loop GenerateBlock_Top: - ;; Check remaining size - cmp bsize, 0 - je GenerateBlock_Success + ;; Check remaining size + cmp bsize, 0 + je GenerateBlock_Return Call_RDRAND_EAX: - ;; RDRAND is not available prior to VS2012. Just emit - ;; the byte codes using DB. This is `rdrand eax`. - DB 0Fh, 0C7h, 0F0h + ;; RDRAND is not available prior to VS2012. Just emit + ;; the byte codes using DB. This is `rdrand eax`. + DB 0Fh, 0C7h, 0F0h - ;; If CF=1, the number returned by RDRAND is valid. - ;; If CF=0, a random number was not available. - jc RDRAND_succeeded + ;; If CF=1, the number returned by RDRAND is valid. + ;; If CF=0, a random number was not available. -RDRAND_failed: - - ;; Exit if we've reached the limit - cmp safety, 0 - je GenerateBlock_Failure - - dec safety - jmp GenerateBlock_Top + ;; Retry immediately + jnc Call_RDRAND_EAX RDRAND_succeeded: - cmp bsize, MWSIZE - jb Partial_Machine_Word + cmp bsize, MWSIZE + jb Partial_Machine_Word Full_Machine_Word: - mov DWORD PTR [buffer], eax - add buffer, MWSIZE ;; No need for Intel Core 2 slow workarounds, like - sub bsize, MWSIZE ;; `lea buffer,[buffer+MWSIZE]` for faster adds + mov DWORD PTR [buffer], eax + add buffer, MWSIZE ;; No need for Intel Core 2 slow workarounds, like + sub bsize, MWSIZE ;; `lea buffer,[buffer+MWSIZE]` for faster adds - ;; Continue - jmp GenerateBlock_Top + ;; Continue + jmp GenerateBlock_Top - ;; 1,2,3 bytes remain + ;; 1,2,3 bytes remain Partial_Machine_Word: - ;; Test bit 1 to see if size is at least 2 - test bsize, 2 - jz Bit_1_Not_Set + ;; Test bit 1 to see if size is at least 2 + test bsize, 2 + jz Bit_1_Not_Set - mov WORD PTR [buffer], ax - shr eax, 16 - add buffer, 2 + mov WORD PTR [buffer], ax + shr eax, 16 + add buffer, 2 Bit_1_Not_Set: - ;; Test bit 0 to see if size is at least 1 - test bsize, 1 - jz GenerateBlock_Success + ;; Test bit 0 to see if size is at least 1 + test bsize, 1 + jz Bit_0_Not_Set - mov BYTE PTR [buffer], al + mov BYTE PTR [buffer], al Bit_0_Not_Set: - ;; We've hit all the bits - jmp GenerateBlock_Success + ;; We've hit all the bits -GenerateBlock_PreRet: +GenerateBlock_Return: - ;; Test for success (was the request completely fulfilled?) - cmp bsize, 0 - je GenerateBlock_Success + ;; Clear artifacts + xor eax, eax + ret -GenerateBlock_Failure: - - xor eax, eax - mov al, RDRAND_FAILURE - ret - -GenerateBlock_Success: - - xor eax, eax - mov al, RDRAND_SUCCESS - ret - -MASM_RRA_GenerateBlock ENDP +MASM_RDRAND_GenerateBlock ENDP ENDIF ;; _M_X86 @@ -198,116 +150,87 @@ OPTION EPILOGUE:NONE ;; RCX (in): arg1, byte* buffer ;; RDX (in): arg2, size_t bsize -;; R8d (in): arg3, unsigned int safety -;; RAX (out): success (1), failure (0) -MASM_RRA_GenerateBlock PROC +MASM_RDRAND_GenerateBlock PROC - MWSIZE EQU 08h ;; machine word size - buffer EQU rcx - bsize EQU rdx - safety EQU r8d + MWSIZE EQU 08h ;; machine word size + buffer EQU rcx + bsize EQU rdx - ;; No need for Load_Arguments due to fastcall + ;; No need for Load_Arguments due to fastcall -Validate_Pointer: - - ;; Validate pointer - cmp buffer, 0 - je GenerateBlock_PreRet - - ;; Top of While loop + ;; Top of While loop GenerateBlock_Top: - ;; Check remaining size - cmp bsize, 0 - je GenerateBlock_Success + ;; Check remaining size + cmp bsize, 0 + je GenerateBlock_Return Call_RDRAND_RAX: - ;; RDRAND is not available prior to VS2012. Just emit - ;; the byte codes using DB. This is `rdrand rax`. - DB 048h, 0Fh, 0C7h, 0F0h + ;; RDRAND is not available prior to VS2012. Just emit + ;; the byte codes using DB. This is `rdrand rax`. + DB 048h, 0Fh, 0C7h, 0F0h - ;; If CF=1, the number returned by RDRAND is valid. - ;; If CF=0, a random number was not available. - jc RDRAND_succeeded + ;; If CF=1, the number returned by RDRAND is valid. + ;; If CF=0, a random number was not available. -RDRAND_failed: - - ;; Exit if we've reached the limit - cmp safety, 0 - je GenerateBlock_Failure - - dec safety - jmp GenerateBlock_Top + ;; Retry immediately + jnc Call_RDRAND_RAX RDRAND_succeeded: - cmp bsize, MWSIZE - jb Partial_Machine_Word + cmp bsize, MWSIZE + jb Partial_Machine_Word Full_Machine_Word: - mov QWORD PTR [buffer], rax - add buffer, MWSIZE - sub bsize, MWSIZE + mov QWORD PTR [buffer], rax + add buffer, MWSIZE + sub bsize, MWSIZE - ;; Continue - jmp GenerateBlock_Top + ;; Continue + jmp GenerateBlock_Top - ;; 1,2,3,4,5,6,7 bytes remain + ;; 1,2,3,4,5,6,7 bytes remain Partial_Machine_Word: - ;; Test bit 2 to see if size is at least 4 - test bsize, 4 - jz Bit_2_Not_Set + ;; Test bit 2 to see if size is at least 4 + test bsize, 4 + jz Bit_2_Not_Set - mov DWORD PTR [buffer], eax - shr rax, 32 - add buffer, 4 + mov DWORD PTR [buffer], eax + shr rax, 32 + add buffer, 4 Bit_2_Not_Set: - ;; Test bit 1 to see if size is at least 2 - test bsize, 2 - jz Bit_1_Not_Set + ;; Test bit 1 to see if size is at least 2 + test bsize, 2 + jz Bit_1_Not_Set - mov WORD PTR [buffer], ax - shr eax, 16 - add buffer, 2 + mov WORD PTR [buffer], ax + shr eax, 16 + add buffer, 2 Bit_1_Not_Set: - ;; Test bit 0 to see if size is at least 1 - test bsize, 1 - jz GenerateBlock_Success + ;; Test bit 0 to see if size is at least 1 + test bsize, 1 + jz Bit_0_Not_Set - mov BYTE PTR [buffer], al + mov BYTE PTR [buffer], al Bit_0_Not_Set: - ;; We've hit all the bits - jmp GenerateBlock_Success + ;; We've hit all the bits -GenerateBlock_PreRet: +GenerateBlock_Return: - ;; Test for success (was the request completely fulfilled?) - cmp bsize, 0 - je GenerateBlock_Success - -GenerateBlock_Failure: - - xor rax, rax - mov al, RDRAND_FAILURE + ;; Clear artifacts + xor rax, rax ret -GenerateBlock_Success: - - xor rax, rax - mov al, RDRAND_SUCCESS - ret - -MASM_RRA_GenerateBlock ENDP +MASM_RDRAND_GenerateBlock ENDP ENDIF ;; _M_X64 @@ -328,109 +251,80 @@ OPTION EPILOGUE:NONE ;; Caller pushes on stack following CDECL (right to left) ;; arg1: byte* buffer ;; arg2: size_t bsize -;; arg3: unsigned int safety -;; EAX (out): success (1), failure (0) -MASM_RSA_GenerateBlock PROC ;; arg1:DWORD, arg2:DWORD, arg3:DWORD +MASM_RDSEED_GenerateBlock PROC ;; arg1:DWORD, arg2:DWORD - MWSIZE EQU 04h ;; machine word size - buffer EQU edi - bsize EQU edx - safety EQU ecx + MWSIZE EQU 04h ;; machine word size + buffer EQU edi + bsize EQU edx Load_Arguments: - mov buffer, DWORD PTR [esp+04h] ;; arg1 - mov bsize, DWORD PTR [esp+08h] ;; arg2 - mov safety, DWORD PTR [esp+0Ch] ;; arg3 + mov buffer, DWORD PTR [esp+04h] ;; arg1 + mov bsize, DWORD PTR [esp+08h] ;; arg2 -Validate_Pointer: - - cmp buffer, 0 - je GenerateBlock_PreRet - - ;; Top of While loop + ;; Top of While loop GenerateBlock_Top: - ;; Check remaining size - cmp bsize, 0 - je GenerateBlock_Success + ;; Check remaining size + cmp bsize, 0 + je GenerateBlock_Return Call_RDSEED_EAX: - ;; RDSEED is not available prior to VS2012. Just emit - ;; the byte codes using DB. This is `rdseed eax`. - DB 0Fh, 0C7h, 0F8h + ;; RDSEED is not available prior to VS2012. Just emit + ;; the byte codes using DB. This is `rdseed eax`. + DB 0Fh, 0C7h, 0F8h - ;; If CF=1, the number returned by RDSEED is valid. - ;; If CF=0, a random number was not available. - jc RDSEED_succeeded + ;; If CF=1, the number returned by RDSEED is valid. + ;; If CF=0, a random number was not available. -RDSEED_failed: - - ;; Exit if we've reached the limit - cmp safety, 0 - je GenerateBlock_Failure - - dec safety - jmp GenerateBlock_Top + ;; Retry immediately + jnc Call_RDSEED_EAX RDSEED_succeeded: - cmp bsize, MWSIZE - jb Partial_Machine_Word + cmp bsize, MWSIZE + jb Partial_Machine_Word Full_Machine_Word: - mov DWORD PTR [buffer], eax - add buffer, MWSIZE ;; No need for Intel Core 2 slow workarounds, like - sub bsize, MWSIZE ;; `lea buffer,[buffer+MWSIZE]` for faster adds + mov DWORD PTR [buffer], eax + add buffer, MWSIZE ;; No need for Intel Core 2 slow workarounds, like + sub bsize, MWSIZE ;; `lea buffer,[buffer+MWSIZE]` for faster adds - ;; Continue - jmp GenerateBlock_Top + ;; Continue + jmp GenerateBlock_Top - ;; 1,2,3 bytes remain + ;; 1,2,3 bytes remain Partial_Machine_Word: - ;; Test bit 1 to see if size is at least 2 - test bsize, 2 - jz Bit_1_Not_Set + ;; Test bit 1 to see if size is at least 2 + test bsize, 2 + jz Bit_1_Not_Set - mov WORD PTR [buffer], ax - shr eax, 16 - add buffer, 2 + mov WORD PTR [buffer], ax + shr eax, 16 + add buffer, 2 Bit_1_Not_Set: - ;; Test bit 0 to see if size is at least 1 - test bsize, 1 - jz GenerateBlock_Success + ;; Test bit 0 to see if size is at least 1 + test bsize, 1 + jz Bit_0_Not_Set - mov BYTE PTR [buffer], al + mov BYTE PTR [buffer], al Bit_0_Not_Set: - ;; We've hit all the bits - jmp GenerateBlock_Success + ;; We've hit all the bits -GenerateBlock_PreRet: +GenerateBlock_Return: - ;; Test for success (was the request completely fulfilled?) - cmp bsize, 0 - je GenerateBlock_Success - -GenerateBlock_Failure: - - xor eax, eax - mov al, RDSEED_FAILURE + ;; Clear artifacts + xor eax, eax ret -GenerateBlock_Success: - - xor eax, eax - mov al, RDSEED_SUCCESS - ret - -MASM_RSA_GenerateBlock ENDP +MASM_RDSEED_GenerateBlock ENDP ENDIF ;; _M_X86 @@ -449,116 +343,87 @@ OPTION EPILOGUE:NONE ;; RCX (in): arg1, byte* buffer ;; RDX (in): arg2, size_t bsize -;; R8d (in): arg3, unsigned int safety -;; RAX (out): success (1), failure (0) -MASM_RSA_GenerateBlock PROC ;; arg1:QWORD,arg2:QWORD,arg3:DWORD +MASM_RDSEED_GenerateBlock PROC ;; arg1:QWORD,arg2:QWORD - MWSIZE EQU 08h ;; machine word size - buffer EQU rcx - bsize EQU rdx - safety EQU r8d + MWSIZE EQU 08h ;; machine word size + buffer EQU rcx + bsize EQU rdx - ;; No need for Load_Arguments due to fastcall + ;; No need for Load_Arguments due to fastcall -Validate_Pointer: - - ;; Validate pointer - cmp buffer, 0 - je GenerateBlock_PreRet - - ;; Top of While loop + ;; Top of While loop GenerateBlock_Top: - ;; Check remaining size - cmp bsize, 0 - je GenerateBlock_Success + ;; Check remaining size + cmp bsize, 0 + je GenerateBlock_Return Call_RDSEED_RAX: - ;; RDSEED is not available prior to VS2012. Just emit - ;; the byte codes using DB. This is `rdseed rax`. - DB 048h, 0Fh, 0C7h, 0F8h + ;; RDSEED is not available prior to VS2012. Just emit + ;; the byte codes using DB. This is `rdseed rax`. + DB 048h, 0Fh, 0C7h, 0F8h - ;; If CF=1, the number returned by RDSEED is valid. - ;; If CF=0, a random number was not available. - jc RDSEED_succeeded + ;; If CF=1, the number returned by RDSEED is valid. + ;; If CF=0, a random number was not available. -RDSEED_failed: - - ;; Exit if we've reached the limit - cmp safety, 0 - je GenerateBlock_Failure - - dec safety - jmp GenerateBlock_Top + ;; Retry immediately + jnc Call_RDSEED_RAX RDSEED_succeeded: - cmp bsize, MWSIZE - jb Partial_Machine_Word + cmp bsize, MWSIZE + jb Partial_Machine_Word Full_Machine_Word: - mov QWORD PTR [buffer], rax - add buffer, MWSIZE - sub bsize, MWSIZE + mov QWORD PTR [buffer], rax + add buffer, MWSIZE + sub bsize, MWSIZE - ;; Continue - jmp GenerateBlock_Top + ;; Continue + jmp GenerateBlock_Top - ;; 1,2,3,4,5,6,7 bytes remain + ;; 1,2,3,4,5,6,7 bytes remain Partial_Machine_Word: - ;; Test bit 2 to see if size is at least 4 - test bsize, 4 - jz Bit_2_Not_Set + ;; Test bit 2 to see if size is at least 4 + test bsize, 4 + jz Bit_2_Not_Set - mov DWORD PTR [buffer], eax - shr rax, 32 - add buffer, 4 + mov DWORD PTR [buffer], eax + shr rax, 32 + add buffer, 4 Bit_2_Not_Set: - ;; Test bit 1 to see if size is at least 2 - test bsize, 2 - jz Bit_1_Not_Set + ;; Test bit 1 to see if size is at least 2 + test bsize, 2 + jz Bit_1_Not_Set - mov WORD PTR [buffer], ax - shr eax, 16 - add buffer, 2 + mov WORD PTR [buffer], ax + shr eax, 16 + add buffer, 2 Bit_1_Not_Set: - ;; Test bit 0 to see if size is at least 1 - test bsize, 1 - jz GenerateBlock_Success + ;; Test bit 0 to see if size is at least 1 + test bsize, 1 + jz Bit_0_Not_Set - mov BYTE PTR [buffer], al + mov BYTE PTR [buffer], al Bit_0_Not_Set: - ;; We've hit all the bits - jmp GenerateBlock_Success + ;; We've hit all the bits -GenerateBlock_PreRet: +GenerateBlock_Return: - ;; Test for success (was the request completely fulfilled?) - cmp bsize, 0 - je GenerateBlock_Success + ;; Clear artifacts + xor rax, rax + ret -GenerateBlock_Failure: - - xor rax, rax - mov al, RDSEED_FAILURE - ret - -GenerateBlock_Success: - - xor rax, rax - mov al, RDSEED_SUCCESS - ret - -MASM_RSA_GenerateBlock ENDP +MASM_RDSEED_GenerateBlock ENDP ENDIF ;; _M_X64 diff --git a/rdrand.cpp b/rdrand.cpp index db1f336e..cba095ad 100644 --- a/rdrand.cpp +++ b/rdrand.cpp @@ -7,26 +7,21 @@ #include "rdrand.h" #include "cpu.h" +#include + #if CRYPTOPP_MSC_VERSION # pragma warning(disable: 4100) #endif -// This file (and friends) provides both RDRAND and RDSEED, but its somewhat -// experimental. They were added at Crypto++ 5.6.3. At compile time, it -// indirectly uses CRYPTOPP_BOOL_{X86|X32|X64} (via CRYPTOPP_CPUID_AVAILABLE) +// This file (and friends) provides both RDRAND and RDSEED. They were added at +// Crypto++ 5.6.3. At compile time, it uses CRYPTOPP_BOOL_{X86|X32|X64} // to select an implementation or "throw NotImplemented". At runtime, the // class uses the result of CPUID to determine if RDRAND or RDSEED are -// available. If not available, a lazy throw strategy is used. I.e., the -// throw is deferred until GenerateBlock() is called. - -// Here's the naming convention for the functions.... -// MSC = Microsoft Compiler (and compatibles) -// GCC = GNU Compiler (and compatibles) -// ALL = MSC and GCC (and compatibles) -// RRA = RDRAND, Assembly -// RSA = RDSEED, Assembly -// RRI = RDRAND, Intrinsic -// RSA = RDSEED, Intrinsic +// available. If not available, then a SIGILL will result. +// The original classes accepted a retry count. Retries were superflous for +// RDRAND, and RDSEED encountered a failure about 1 in 256 bytes depending +// on the processor. Retries were removed at Crypto++ 6.0 because the +// functions always fulfill the request. ///////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////// @@ -44,24 +39,17 @@ ///////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////// -// In general, the library's ASM code is best on Windows, and Intrinsics is -// the best code under GCC. Clang is missing symbols, so it gets ASM. -// The NASM code is optimized well on Linux, but its not easy to cut-in. -#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) -# ifndef CRYPTOPP_CPUID_AVAILABLE -# define CRYPTOPP_CPUID_AVAILABLE -# endif -#endif - #if defined(CRYPTOPP_CPUID_AVAILABLE) # if defined(CRYPTOPP_MSC_VERSION) # if (CRYPTOPP_MSC_VERSION >= 1700) -# define ALL_RDRAND_INTRIN_AVAILABLE 1 +# define MASM_RDRAND_ASM_AVAILABLE 1 +// # define ALL_RDRAND_INTRIN_AVAILABLE 1 # else # define MASM_RDRAND_ASM_AVAILABLE 1 # endif # if (CRYPTOPP_MSC_VERSION >= 1800) -# define ALL_RDSEED_INTRIN_AVAILABLE 1 +# define MASM_RDSEED_ASM_AVAILABLE 1 +// # define ALL_RDSEED_INTRIN_AVAILABLE 1 # else # define MASM_RDSEED_ASM_AVAILABLE 1 # endif @@ -80,45 +68,15 @@ # define GCC_RDSEED_ASM_AVAILABLE 1 # endif # elif defined(CRYPTOPP_GCC_VERSION) -# if defined(__RDRND__) && (CRYPTOPP_GCC_VERSION >= 30200) -# define ALL_RDRAND_INTRIN_AVAILABLE 1 -# else +# if defined(__RDRND__) || (CRYPTOPP_GCC_VERSION >= 40600) # define GCC_RDRAND_ASM_AVAILABLE 1 # endif -# if defined(__RDSEED__) && (CRYPTOPP_GCC_VERSION >= 30200) -# define ALL_RDSEED_INTRIN_AVAILABLE 1 -# else +# if defined(__RDSEED__) || (CRYPTOPP_GCC_VERSION >= 40600) # define GCC_RDSEED_ASM_AVAILABLE 1 # endif # endif #endif -// Debug diagnostics -#if 0 -# if MASM_RDRAND_ASM_AVAILABLE -# pragma message ("MASM_RDRAND_ASM_AVAILABLE is 1") -# elif NASM_RDRAND_ASM_AVAILABLE -# pragma message ("NASM_RDRAND_ASM_AVAILABLE is 1") -# elif GCC_RDRAND_ASM_AVAILABLE -# pragma message ("GCC_RDRAND_ASM_AVAILABLE is 1") -# elif ALL_RDRAND_INTRIN_AVAILABLE -# pragma message ("ALL_RDRAND_INTRIN_AVAILABLE is 1") -# else -# pragma message ("RDRAND is not available") -# endif -# if MASM_RDSEED_ASM_AVAILABLE -# pragma message ("MASM_RDSEED_ASM_AVAILABLE is 1") -# elif NASM_RDSEED_ASM_AVAILABLE -# pragma message ("NASM_RDSEED_ASM_AVAILABLE is 1") -# elif GCC_RDSEED_ASM_AVAILABLE -# pragma message ("GCC_RDSEED_ASM_AVAILABLE is 1") -# elif ALL_RDSEED_INTRIN_AVAILABLE -# pragma message ("ALL_RDSEED_INTRIN_AVAILABLE is 1") -# else -# pragma message ("RDSEED is not available") -# endif -#endif - ///////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////// @@ -129,376 +87,303 @@ # endif # if defined(__has_include) # if __has_include() -# include // rdrand for Clang (immintrin.h); rdseed for Clang (rdseedintrin.h) +# include # endif # endif #endif #if MASM_RDRAND_ASM_AVAILABLE # ifdef _M_X64 -extern "C" int CRYPTOPP_FASTCALL MASM_RRA_GenerateBlock(byte*, size_t, unsigned int); +extern "C" void CRYPTOPP_FASTCALL MASM_RDRAND_GenerateBlock(byte*, size_t); // # pragma comment(lib, "rdrand-x64.lib") # else -extern "C" int MASM_RRA_GenerateBlock(byte*, size_t, unsigned int); +extern "C" void MASM_RDRAND_GenerateBlock(byte*, size_t); // # pragma comment(lib, "rdrand-x86.lib") # endif #endif #if MASM_RDSEED_ASM_AVAILABLE # ifdef _M_X64 -extern "C" int CRYPTOPP_FASTCALL MASM_RSA_GenerateBlock(byte*, size_t, unsigned int); +extern "C" void CRYPTOPP_FASTCALL MASM_RDSEED_GenerateBlock(byte*, size_t); // # pragma comment(lib, "rdrand-x64.lib") # else -extern "C" int MASM_RSA_GenerateBlock(byte*, size_t, unsigned int); +extern "C" void MASM_RDSEED_GenerateBlock(byte*, size_t); // # pragma comment(lib, "rdrand-x86.lib") # endif #endif #if NASM_RDRAND_ASM_AVAILABLE -extern "C" int NASM_RRA_GenerateBlock(byte*, size_t, unsigned int); +extern "C" void NASM_RDRAND_GenerateBlock(byte*, size_t); #endif #if NASM_RDSEED_ASM_AVAILABLE -extern "C" int NASM_RSA_GenerateBlock(byte*, size_t, unsigned int); +extern "C" void NASM_RDSEED_GenerateBlock(byte*, size_t); #endif ///////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////// +ANONYMOUS_NAMESPACE_BEGIN +// GCC, MSVC and SunCC has optimized calls to RDRAND away. We experieced it +// under GCC and MSVC. Other have reported it for SunCC. This attempts +// to tame the optimizer even though it abuses the volatile keyword. +static volatile int s_unused; +ANONYMOUS_NAMESPACE_END + NAMESPACE_BEGIN(CryptoPP) -#if ALL_RDRAND_INTRIN_AVAILABLE -static int ALL_RRI_GenerateBlock(byte *output, size_t size, unsigned int safety) +// Fills 4 bytes +inline void RDRAND32(void* output) { - CRYPTOPP_ASSERT((output && size) || !(output || size)); -#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 - word32 val; +#if defined(__SUNPRO_CC) + __asm__ __volatile__ + ( + ".byte 0x0f, 0xc7, 0xf0;\n" + ".byte 0x73, 0xfb;\n" + : "=a" (*reinterpret_cast(output)) + : : "cc" + ); +#elif defined(GCC_RDRAND_ASM_AVAILABLE) + __asm__ __volatile__ + ( + INTEL_NOPREFIX + ASL(1) + AS1(rdrand eax) + ASJ(jnc, 1, b) + ATT_NOPREFIX + : "=a" (*reinterpret_cast(output)) + : : "cc" + ); +#elif defined(ALL_RDRAND_INTRIN_AVAILABLE) + while(!_rdrand32_step(reinterpret_cast(output))) {} #else - word64 val; + // RDRAND not detected at compile time, or no suitable compiler found + throw NotImplemented("RDRAND: failed to find an implementation"); #endif - - while (size >= sizeof(val)) - { -#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 - if (_rdrand32_step((word32*)output)) -#else - // Cast due to GCC, http://github.com/weidai11/cryptopp/issues/236 - if (_rdrand64_step(reinterpret_cast(output))) -#endif - { - output += sizeof(val); - size -= sizeof(val); - } - else - { - if (!safety--) - { - CRYPTOPP_ASSERT(0); - return 0; - } - } - } - - if (size) - { -#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 - if (_rdrand32_step(&val)) -#else - // Cast due to GCC, http://github.com/weidai11/cryptopp/issues/236 - if (_rdrand64_step(reinterpret_cast(&val))) -#endif - { - memcpy(output, &val, size); - size = 0; - } - else - { - if (!safety--) - { - CRYPTOPP_ASSERT(0); - return 0; - } - } - } - - SecureWipeBuffer(&val, 1); - - return int(size == 0); -} -#endif // ALL_RDRAND_INTRINSIC_AVAILABLE - -#if GCC_RDRAND_ASM_AVAILABLE -static int GCC_RRA_GenerateBlock(byte *output, size_t size, unsigned int safety) -{ - CRYPTOPP_ASSERT((output && size) || !(output || size)); -#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 - word64 val; -#else - word32 val; -#endif - char rc; - while (size) - { - __asm__ volatile( -#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 - ".byte 0x48, 0x0f, 0xc7, 0xf0;\n" // rdrand rax -#else - ".byte 0x0f, 0xc7, 0xf0;\n" // rdrand eax -#endif - "setc %1; " - : "=a" (val), "=qm" (rc) - : - : "cc" - ); - - if (rc) - { - if (size >= sizeof(val)) - { - PutWord(true, LITTLE_ENDIAN_ORDER, output, val, NULLPTR); - output += sizeof(val); - size -= sizeof(val); - } - else - { - memcpy(output, &val, size); - size = 0; - } - } - else - { - if (!safety--) - { - CRYPTOPP_ASSERT(0); - return 0; - } - } - } - - SecureWipeBuffer(&val, 1); - - return int(size == 0); } -#endif // GCC_RDRAND_ASM_AVAILABLE +#if CRYPTOPP_BOOL_X64 +// Fills 8 bytes +inline void RDRAND64(void* output) +{ +#if defined(__SUNPRO_CC) && (__SUNPRO_CC >= 0x5100) + __asm__ __volatile__ + ( + ".byte 0x48, 0x0f, 0xc7, 0xf0;\n" + ".byte 0x73, 0xfa;\n" + : "=a" (*reinterpret_cast(output)) + : : "cc" + ); +#elif defined(GCC_RDRAND_ASM_AVAILABLE) + __asm__ __volatile__ + ( + INTEL_NOPREFIX + ASL(1) + AS1(rdrand rax) + ASJ(jnc, 1, b) + ATT_NOPREFIX + : "=a" (*reinterpret_cast(output)) + : : "cc" + ); +#elif defined(ALL_RDRAND_INTRIN_AVAILABLE) + while(!_rdrand64_step(reinterpret_cast(output))) {} +#else + // RDRAND not detected at compile time, or no suitable compiler found + throw NotImplemented("RDRAND: failed to find an implementation"); +#endif +} +#endif // CRYPTOPP_BOOL_X64 and RDRAND64 -#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) void RDRAND::GenerateBlock(byte *output, size_t size) { - CRYPTOPP_UNUSED(output), CRYPTOPP_UNUSED(size); - CRYPTOPP_ASSERT((output && size) || !(output || size)); + CRYPTOPP_ASSERT((output && size) || !(output || size)); + if (size == 0) return; - if(!HasRDRAND()) - throw NotImplemented("RDRAND: rdrand is not available on this platform"); +#if defined(NASM_RDRAND_ASM_AVAILABLE) - int rc; CRYPTOPP_UNUSED(rc); -#if MASM_RDRAND_ASM_AVAILABLE - rc = MASM_RRA_GenerateBlock(output, size, m_retries); - if (!rc) { throw RDRAND_Err("MASM_RRA_GenerateBlock"); } -#elif NASM_RDRAND_ASM_AVAILABLE - rc = NASM_RRA_GenerateBlock(output, size, m_retries); - if (!rc) { throw RDRAND_Err("NASM_RRA_GenerateBlock"); } -#elif ALL_RDRAND_INTRIN_AVAILABLE - rc = ALL_RRI_GenerateBlock(output, size, m_retries); - if (!rc) { throw RDRAND_Err("ALL_RRI_GenerateBlock"); } -#elif GCC_RDRAND_ASM_AVAILABLE - rc = GCC_RRA_GenerateBlock(output, size, m_retries); - if (!rc) { throw RDRAND_Err("GCC_RRA_GenerateBlock"); } + NASM_RDRAND_GenerateBlock(output, size); + +#elif defined(MASM_RDRAND_ASM_AVAILABLE) + + MASM_RDRAND_GenerateBlock(output, size); + +#elif CRYPTOPP_BOOL_X64 + size_t i = 0; + for (i = 0; i < size/8; i++) + RDRAND64(reinterpret_cast(output)+i); + + output += i*8; + size -= i*8; + + if (size) + { + word64 val; + RDRAND64(&val); + std::memcpy(output, &val, size); + } +#elif (CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86) + size_t i = 0; + for (i = 0; i < size/4; i++) + RDRAND32(reinterpret_cast(output)+i); + + output += i*4; + size -= i*4; + + if (size) + { + word32 val; + RDRAND32(&val); + std::memcpy(output, &val, size); + } #else - // RDRAND not detected at compile time, and no suitable compiler found - throw NotImplemented("RDRAND: failed to find a suitable implementation???"); -#endif // CRYPTOPP_CPUID_AVAILABLE + // RDRAND not detected at compile time, or no suitable compiler found + throw NotImplemented("RDRAND: failed to find a suitable implementation"); +#endif + + // Size is not 0 + s_unused ^= output[0]; } void RDRAND::DiscardBytes(size_t n) { - // RoundUpToMultipleOf is used because a full word is read, and its cheaper - // to discard full words. There's no sense in dealing with tail bytes. - CRYPTOPP_ASSERT(HasRDRAND()); -#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 - FixedSizeSecBlock discard; - n = RoundUpToMultipleOf(n, sizeof(word64)); -#else - FixedSizeSecBlock discard; - n = RoundUpToMultipleOf(n, sizeof(word32)); -#endif + // RoundUpToMultipleOf is used because a full word is read, and its cheaper + // to discard full words. There's no sense in dealing with tail bytes. + FixedSizeSecBlock discard; + n = RoundUpToMultipleOf(n, sizeof(word64)); - size_t count = STDMIN(n, discard.SizeInBytes()); - while (count) - { - GenerateBlock(discard.BytePtr(), count); - n -= count; - count = STDMIN(n, discard.SizeInBytes()); - } + size_t count = STDMIN(n, discard.SizeInBytes()); + while (count) + { + GenerateBlock(discard.BytePtr(), count); + n -= count; + count = STDMIN(n, discard.SizeInBytes()); + } } -#endif // CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 ///////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////// -#if ALL_RDSEED_INTRIN_AVAILABLE -static int ALL_RSI_GenerateBlock(byte *output, size_t size, unsigned int safety) +// Fills 4 bytes +inline void RDSEED32(void* output) { - CRYPTOPP_ASSERT((output && size) || !(output || size)); -#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 - word32 val; +#if defined(__SUNPRO_CC) + __asm__ __volatile__ + ( + ".byte 0x0f, 0xc7, 0xf8;\n" + ".byte 0x73, 0xfb;\n" + : "=a" (*reinterpret_cast(output)) + : : "cc" + ); +#elif defined(GCC_RDSEED_ASM_AVAILABLE) + __asm__ __volatile__ + ( + INTEL_NOPREFIX + ASL(1) + AS1(rdseed eax) + ASJ(jnc, 1, b) + ATT_NOPREFIX + : "=a" (*reinterpret_cast(output)) + : : "cc" + ); +#elif defined(ALL_RDSEED_INTRIN_AVAILABLE) + while(!_rdseed32_step(reinterpret_cast(output))) {} #else - word64 val; + // RDSEED not detected at compile time, or no suitable compiler found + throw NotImplemented("RDSEED: failed to find an implementation"); #endif - - while (size >= sizeof(val)) - { -#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 - if (_rdseed32_step((word32*)output)) -#else - // Cast due to GCC, http://github.com/weidai11/cryptopp/issues/236 - if (_rdseed64_step(reinterpret_cast(output))) -#endif - { - output += sizeof(val); - size -= sizeof(val); - } - else - { - if (!safety--) - { - CRYPTOPP_ASSERT(0); - return 0; - } - } - } - - if (size) - { -#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 - if (_rdseed32_step(&val)) -#else - // Cast due to GCC, http://github.com/weidai11/cryptopp/issues/236 - if (_rdseed64_step(reinterpret_cast(&val))) -#endif - { - memcpy(output, &val, size); - size = 0; - } - else - { - if (!safety--) - { - CRYPTOPP_ASSERT(0); - return 0; - } - } - } - - SecureWipeBuffer(&val, 1); - - return int(size == 0); } -#endif // ALL_RDSEED_INTRIN_AVAILABLE -#if GCC_RDSEED_ASM_AVAILABLE -static int GCC_RSA_GenerateBlock(byte *output, size_t size, unsigned int safety) +// Fills 8 bytes +inline void RDSEED64(void* output) { - CRYPTOPP_ASSERT((output && size) || !(output || size)); -#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 - word64 val; +#if defined(__SUNPRO_CC) && (__SUNPRO_CC >= 0x5100) + __asm__ __volatile__ + ( + ".byte 0x48, 0x0f, 0xc7, 0xf8;\n" + ".byte 0x73, 0xfa;\n" + : "=a" (*reinterpret_cast(output)) + : : "cc" + ); +#elif defined(GCC_RDSEED_ASM_AVAILABLE) + __asm__ __volatile__ + ( + INTEL_NOPREFIX + ASL(1) + AS1(rdseed rax) + ASJ(jnc, 1, b) + ATT_NOPREFIX + : "=a" (*reinterpret_cast(output)) + : : "cc" + ); +#elif defined(ALL_RDSEED_INTRIN_AVAILABLE) + while(!_rdseed64_step(reinterpret_cast(output))) {} #else - word32 val; + // RDSEED not detected at compile time, or no suitable compiler found + throw NotImplemented("RDSEED: failed to find an implementation"); #endif - char rc; - while (size) - { - __asm__ volatile( -#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 - ".byte 0x48, 0x0f, 0xc7, 0xf8;\n" // rdseed rax -#else - ".byte 0x0f, 0xc7, 0xf8;\n" // rdseed eax -#endif - "setc %1; " - : "=a" (val), "=qm" (rc) - : - : "cc" - ); - - if (rc) - { - if (size >= sizeof(val)) - { - PutWord(true, LITTLE_ENDIAN_ORDER, output, val, NULLPTR); - output += sizeof(val); - size -= sizeof(val); - } - else - { - memcpy(output, &val, size); - size = 0; - } - } - else - { - if (!safety--) - { - CRYPTOPP_ASSERT(0); - return 0; - } - } - } - - SecureWipeBuffer(&val, 1); - - return int(size == 0); } -#endif // GCC_RDSEED_ASM_AVAILABLE -#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) void RDSEED::GenerateBlock(byte *output, size_t size) { - CRYPTOPP_UNUSED(output), CRYPTOPP_UNUSED(size); - CRYPTOPP_ASSERT((output && size) || !(output || size)); + CRYPTOPP_ASSERT((output && size) || !(output || size)); + if (size == 0) return; - if(!HasRDSEED()) - throw NotImplemented("RDSEED: rdseed is not available on this platform"); +#if defined(NASM_RDSEED_ASM_AVAILABLE) - int rc; CRYPTOPP_UNUSED(rc); -#if MASM_RDSEED_ASM_AVAILABLE - rc = MASM_RSA_GenerateBlock(output, size, m_retries); - if (!rc) { throw RDSEED_Err("MASM_RSA_GenerateBlock"); } -#elif NASM_RDSEED_ASM_AVAILABLE - rc = NASM_RSA_GenerateBlock(output, size, m_retries); - if (!rc) { throw RDRAND_Err("NASM_RSA_GenerateBlock"); } -#elif ALL_RDSEED_INTRIN_AVAILABLE - rc = ALL_RSI_GenerateBlock(output, size, m_retries); - if (!rc) { throw RDSEED_Err("ALL_RSI_GenerateBlock"); } -#elif GCC_RDSEED_ASM_AVAILABLE - rc = GCC_RSA_GenerateBlock(output, size, m_retries); - if (!rc) { throw RDSEED_Err("GCC_RSA_GenerateBlock"); } -#else - // RDSEED not detected at compile time, and no suitable compiler found - throw NotImplemented("RDSEED: failed to find a suitable implementation???"); + NASM_RDSEED_GenerateBlock(output, size); + +#elif defined(MASM_RDSEED_ASM_AVAILABLE) + + MASM_RDSEED_GenerateBlock(output, size); + +#elif CRYPTOPP_BOOL_X64 + size_t i = 0; + for (i = 0; i < size/8; i++) + RDSEED64(reinterpret_cast(output)+i); + + output += i*8; + size -= i*8; + + if (size) + { + word64 val; + RDSEED64(&val); + std::memcpy(output, &val, size); + } +#elif (CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86) + size_t i = 0; + for (i = 0; i < size/4; i++) + RDSEED32(reinterpret_cast(output)+i); + + output += i*4; + size -= i*4; + + if (size) + { + word32 val; + RDSEED32(&val); + std::memcpy(output, &val, size); + } #endif + + // Size is not 0 + s_unused ^= output[0]; } void RDSEED::DiscardBytes(size_t n) { - // RoundUpToMultipleOf is used because a full word is read, and its cheaper - // to discard full words. There's no sense in dealing with tail bytes. - CRYPTOPP_ASSERT(HasRDSEED()); -#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 - FixedSizeSecBlock discard; - n = RoundUpToMultipleOf(n, sizeof(word64)); -#else - FixedSizeSecBlock discard; - n = RoundUpToMultipleOf(n, sizeof(word32)); -#endif + // RoundUpToMultipleOf is used because a full word is read, and its cheaper + // to discard full words. There's no sense in dealing with tail bytes. + FixedSizeSecBlock discard; + n = RoundUpToMultipleOf(n, sizeof(word64)); - size_t count = STDMIN(n, discard.SizeInBytes()); - while (count) - { - GenerateBlock(discard.BytePtr(), count); - n -= count; - count = STDMIN(n, discard.SizeInBytes()); - } + size_t count = STDMIN(n, discard.SizeInBytes()); + while (count) + { + GenerateBlock(discard.BytePtr(), count); + n -= count; + count = STDMIN(n, discard.SizeInBytes()); + } } -#endif // CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 NAMESPACE_END diff --git a/rdrand.h b/rdrand.h index 5e602697..10369b1c 100644 --- a/rdrand.h +++ b/rdrand.h @@ -9,16 +9,24 @@ #include "cryptlib.h" -// This file (and friends) provides both RDRAND and RDSEED, but its somewhat -// experimental. They were added at Crypto++ 5.6.3. At compile time, it -// indirectly uses CRYPTOPP_BOOL_{X86|X32|X64} (via CRYPTOPP_CPUID_AVAILABLE) +// This file (and friends) provides both RDRAND and RDSEED. They were added at +// Crypto++ 5.6.3. At compile time, it uses CRYPTOPP_BOOL_{X86|X32|X64} // to select an implementation or "throw NotImplemented". At runtime, the // class uses the result of CPUID to determine if RDRAND or RDSEED are -// available. If not available, a lazy throw strategy is used. I.e., the -// throw is deferred until GenerateBlock() is called. +// available. If not available, then a SIGILL will result. +// The original classes accepted a retry count. Retries were superflous for +// RDRAND, and RDSEED encountered a failure about 1 in 256 bytes depending +// on the processor. Retries were removed at Crypto++ 6.0 because the +// functions always fulfill the request. -// Microsoft added RDRAND in August 2012, VS2012. GCC added RDRAND in December 2010, GCC 4.6. -// Clang added RDRAND in July 2012, Clang 3.2. Intel added RDRAND in September 2011, ICC 12.1. +// Throughput varies wildly depending on processor and manufacturer. A Core i5 or +// Core i7 RDRAND can generate at over 200 MiB/s. A low end Celeron may perform +// RDRAND at 7 MiB/s. RDSEED performs at about 1/4 to 1/2 the rate of RDRAND. +// AMD RDRAND performed poorly during testing with Athlon X4 845 (Bulldozer v4). + +// Microsoft added RDRAND in August 2012, VS2012; RDSEED in October 2013, VS2013. +// GCC added RDRAND in December 2010, GCC 4.6. LLVM added RDRAND in July 2012, Clang 3.2. +// Intel added RDRAND in September 2011, ICC 12.1. NAMESPACE_BEGIN(CryptoPP) @@ -28,8 +36,8 @@ NAMESPACE_BEGIN(CryptoPP) class RDRAND_Err : public Exception { public: - RDRAND_Err(const std::string &operation) - : Exception(OTHER_ERROR, "RDRAND: " + operation + " operation failed") {} + RDRAND_Err(const std::string &operation) + : Exception(OTHER_ERROR, "RDRAND: " + operation + " operation failed") {} }; //! \brief Hardware generated random numbers using RDRAND instruction @@ -38,73 +46,39 @@ public: class RDRAND : public RandomNumberGenerator { public: - virtual ~RDRAND() {} + CRYPTOPP_STATIC_CONSTEXPR const char* StaticAlgorithmName() { return "RDRAND"; } - std::string AlgorithmName() const {return "RDRAND";} + virtual ~RDRAND() {} - //! \brief Construct a RDRAND generator - //! \param retries the number of retries for failed calls to the hardware - //! \details RDRAND() constructs a generator with a maximum number of retires - //! for failed generation attempts. - //! \details According to DJ of Intel, the Intel RDRAND circuit does not underflow. - //! If it did hypothetically underflow, then it would return 0 for the random value. - //! AMD's RDRAND implementation appears to provide the same behavior except the - //! values are not generated consistent with FIPS 140. - RDRAND(unsigned int retries = 4) : m_retries(retries) {} + //! \brief Construct a RDRAND generator + //! \param retries the number of retries for failed calls to the hardware + //! \details According to DJ of Intel, the Intel RDRAND circuit does not underflow. + //! If it did hypothetically underflow, then it would return 0 for the random value. + //! AMD's RDRAND implementation appears to provide the same behavior except the + //! values are not generated consistent with FIPS 140. + RDRAND() {} - //! \brief Retrieve the number of retries used by the generator - //! \returns the number of times GenerateBlock() will attempt to recover from a failed generation - unsigned int GetRetries() const - { - return m_retries; - } + //! \brief Generate random array of bytes + //! \param output the byte buffer + //! \param size the length of the buffer, in bytes + virtual void GenerateBlock(byte *output, size_t size); - //! \brief Set the number of retries used by the generator - //! \param retries number of times GenerateBlock() will attempt to recover from a failed generation - void SetRetries(unsigned int retries) - { - m_retries = retries; - } + //! \brief Generate and discard n bytes + //! \param n the number of bytes to generate and discard + //! \details the RDSEED generator discards words, not bytes. If n is + //! not a multiple of a machine word, then it is rounded up to + //! that size. + virtual void DiscardBytes(size_t n); - //! \brief Generate random array of bytes - //! \param output the byte buffer - //! \param size the length of the buffer, in bytes -#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) - virtual void GenerateBlock(byte *output, size_t size); -#else - virtual void GenerateBlock(byte *output, size_t size) { - CRYPTOPP_UNUSED(output), CRYPTOPP_UNUSED(size); - throw NotImplemented("RDRAND: rdrand is not available on this platform"); - } -#endif - - //! \brief Generate and discard n bytes - //! \param n the number of bytes to generate and discard - //! \details the RDSEED generator discards words, not bytes. If n is - //! not a multiple of a machine word, then it is rounded up to - //! that size. -#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) - virtual void DiscardBytes(size_t n); -#else - virtual void DiscardBytes(size_t n) { - CRYPTOPP_UNUSED(n); - throw NotImplemented("RDRAND: rdrand is not available on this platform"); - } -#endif - - //! \brief Update RNG state with additional unpredictable values - //! \param input unused - //! \param length unused - //! \details The operation is a nop for this generator. - virtual void IncorporateEntropy(const byte *input, size_t length) - { - // Override to avoid the base class' throw. - CRYPTOPP_UNUSED(input); CRYPTOPP_UNUSED(length); - // CRYPTOPP_ASSERT(0); // warn in debug builds - } - -private: - unsigned int m_retries; + //! \brief Update RNG state with additional unpredictable values + //! \param input unused + //! \param length unused + //! \details The operation is a nop for this generator. + virtual void IncorporateEntropy(const byte *input, size_t length) + { + // Override to avoid the base class' throw. + CRYPTOPP_UNUSED(input); CRYPTOPP_UNUSED(length); + } }; //! \brief Exception thrown when a RDSEED generator encounters @@ -113,8 +87,8 @@ private: class RDSEED_Err : public Exception { public: - RDSEED_Err(const std::string &operation) - : Exception(OTHER_ERROR, "RDSEED: " + operation + " operation failed") {} + RDSEED_Err(const std::string &operation) + : Exception(OTHER_ERROR, "RDSEED: " + operation + " operation failed") {} }; //! \brief Hardware generated random numbers using RDSEED instruction @@ -123,72 +97,40 @@ public: class RDSEED : public RandomNumberGenerator { public: - virtual ~RDSEED() {} + CRYPTOPP_STATIC_CONSTEXPR const char* StaticAlgorithmName() { return "RDSEED"; } - std::string AlgorithmName() const {return "RDSEED";} + virtual ~RDSEED() {} - //! \brief Construct a RDSEED generator - //! \param retries the number of retries for failed calls to the hardware - //! \details RDSEED() constructs a generator with a maximum number of retires - //! for failed generation attempts. - //! \details Empirical testing under a 6th generaton i7 (6200U) shows RDSEED fails - //! to fulfill requests at about once every for every 256 bytes requested. - //! The default retries reflects the expected ceiling when requesting 10,000 bytes. - RDSEED(unsigned int retries = 64) : m_retries(retries) {} + //! \brief Construct a RDSEED generator + //! \details Empirical testing under a 6th generaton i7 (6200U) shows RDSEED fails + //! to fulfill requests at about once every for every 256 bytes requested. + //! The generator runs about 4 times slower than RDRAND. + RDSEED() {} - //! \brief Retrieve the number of retries used by the generator - //! \returns the number of times GenerateBlock() will attempt to recover from a failed generation - unsigned int GetRetries() const - { - return m_retries; - } + //! \brief Generate random array of bytes + //! \param output the byte buffer + //! \param size the length of the buffer, in bytes + virtual void GenerateBlock(byte *output, size_t size); - //! \brief Set the number of retries used by the generator - //! \param retries number of times GenerateBlock() will attempt to recover from a failed generation - void SetRetries(unsigned int retries) - { - m_retries = retries; - } + //! \brief Generate and discard n bytes + //! \param n the number of bytes to generate and discard + //! \details the RDSEED generator discards words, not bytes. If n is + //! not a multiple of a machine word, then it is rounded up to + //! that size. + virtual void DiscardBytes(size_t n); - //! \brief Generate random array of bytes - //! \param output the byte buffer - //! \param size the length of the buffer, in bytes -#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) - virtual void GenerateBlock(byte *output, size_t size); -#else - virtual void GenerateBlock(byte *output, size_t size) { - CRYPTOPP_UNUSED(output), CRYPTOPP_UNUSED(size); - throw NotImplemented("RDSEED: rdseed is not available on this platform"); - } -#endif - - //! \brief Generate and discard n bytes - //! \param n the number of bytes to generate and discard - //! \details the RDSEED generator discards words, not bytes. If n is - //! not a multiple of a machine word, then it is rounded up to - //! that size. -#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) - virtual void DiscardBytes(size_t n); -#else - virtual void DiscardBytes(size_t n) { - CRYPTOPP_UNUSED(n); - throw NotImplemented("RDSEED: rdseed is not available on this platform"); - } -#endif - - //! \brief Update RNG state with additional unpredictable values - //! \param input unused - //! \param length unused - //! \details The operation is a nop for this generator. - virtual void IncorporateEntropy(const byte *input, size_t length) - { - // Override to avoid the base class' throw. - CRYPTOPP_UNUSED(input); CRYPTOPP_UNUSED(length); - // CRYPTOPP_ASSERT(0); // warn in debug builds - } + //! \brief Update RNG state with additional unpredictable values + //! \param input unused + //! \param length unused + //! \details The operation is a nop for this generator. + virtual void IncorporateEntropy(const byte *input, size_t length) + { + // Override to avoid the base class' throw. + CRYPTOPP_UNUSED(input); CRYPTOPP_UNUSED(length); + } private: - unsigned int m_retries; + unsigned int m_retries; }; NAMESPACE_END diff --git a/validat1.cpp b/validat1.cpp index 8a220879..e4e7c456 100644 --- a/validat1.cpp +++ b/validat1.cpp @@ -679,7 +679,6 @@ bool TestRDRAND() // Squash code coverage warnings on unused functions (void)rdrand.AlgorithmName(); (void)rdrand.CanIncorporateEntropy(); - rdrand.SetRetries(rdrand.GetRetries()); rdrand.IncorporateEntropy(NULLPTR, 0); if (!(entropy && compress && discard)) @@ -694,7 +693,7 @@ bool TestRDSEED() { // Testing on 5th generation i5 shows RDSEED needs about 128 retries for 10K bytes // on 64-bit/amd64 VM, and it needs more for an 32-bit/i686 VM. - RDSEED rdseed(256); + RDSEED rdseed; bool entropy = true, compress = true, discard = true; static const unsigned int SIZE = 10000; @@ -758,7 +757,6 @@ bool TestRDSEED() // Squash code coverage warnings on unused functions (void)rdseed.AlgorithmName(); (void)rdseed.CanIncorporateEntropy(); - rdseed.SetRetries(rdseed.GetRetries()); rdseed.IncorporateEntropy(NULLPTR, 0); if (!(entropy && compress && discard)) @@ -1410,8 +1408,9 @@ bool TestModeIV(SymmetricCipher &e, SymmetricCipher &d) SecByteBlock lastIV, iv(e.IVSize()); StreamTransformationFilter filter(e, new StreamTransformationFilter(d)); - // vector_ptr due to Enterprise Analysis finding on the stack based array. - vector_ptr plaintext(20480); + // Enterprise Analysis finding on the stack based array + const int BUF_SIZE=20480U; + AlignedSecByteBlock plaintext(BUF_SIZE); for (unsigned int i=1; i<20480; i*=2) {