From d3bcbc71da27e2157b16418a3d0f15b20db577f0 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Tue, 7 Mar 2017 12:57:47 -0500 Subject: [PATCH] Cutover to fastcall for MASM and X86 (Issue 387) Fix "target specific option mismatch" under GCC 4.6 --- rdrand.asm | 51 ++++++++++++++++++++------------------------------- rdrand.cpp | 26 ++++++++++++++++---------- 2 files changed, 36 insertions(+), 41 deletions(-) diff --git a/rdrand.asm b/rdrand.asm index 362e1011..5919307f 100644 --- a/rdrand.asm +++ b/rdrand.asm @@ -28,9 +28,9 @@ PUBLIC MASM_RDSEED_GenerateBlock ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; C/C++ Function prototypes +;; C/C++ Function prototypes (both are fastcall) ;; X86: -;; extern "C" void MASM_RDRAND_GenerateBlock(byte* ptr, size_t size); +;; extern "C" void __fastcall MASM_RDRAND_GenerateBlock(byte* ptr, size_t size); ;; X64: ;; extern "C" void __fastcall MASM_RDRAND_GenerateBlock(byte* ptr, size_t size); @@ -42,6 +42,9 @@ IFDEF _M_X86 ;; Set via the command line .486 .MODEL FLAT +ALIAS <@MASM_RDRAND_GenerateBlock@8> = +ALIAS <@MASM_RDSEED_GenerateBlock@8> = + ENDIF ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -51,25 +54,19 @@ IFDEF _M_X86 ;; Set via the command line .CODE ALIGN 8 -OPTION LANGUAGE:C OPTION PROLOGUE:NONE OPTION EPILOGUE:NONE -;; Caller pushes on stack following CDECL (right to left) -;; arg1: byte* buffer -;; arg2: size_t bsize +;; No need for Load_Arguments due to fastcall +;; ECX (in): arg1, byte* buffer +;; EDX (in): arg2, size_t bsize MASM_RDRAND_GenerateBlock PROC ;; arg1:DWORD, arg2:DWORD MWSIZE EQU 04h ;; machine word size - buffer EQU edi + buffer EQU ecx bsize EQU edx -Load_Arguments: - - mov buffer, DWORD PTR [esp+04h] ;; arg1 - mov bsize, DWORD PTR [esp+08h] ;; arg2 - ;; Top of While loop GenerateBlock_Top: @@ -148,8 +145,9 @@ ALIGN 16 OPTION PROLOGUE:NONE OPTION EPILOGUE:NONE -;; RCX (in): arg1, byte* buffer -;; RDX (in): arg2, size_t bsize +;; No need for Load_Arguments due to fastcall +;; RCX (in): arg1, byte* buffer +;; RDX (in): arg2, size_t bsize MASM_RDRAND_GenerateBlock PROC @@ -157,8 +155,6 @@ MASM_RDRAND_GenerateBlock PROC buffer EQU rcx bsize EQU rdx - ;; No need for Load_Arguments due to fastcall - ;; Top of While loop GenerateBlock_Top: @@ -244,25 +240,19 @@ IFDEF _M_X86 ;; Set via the command line .CODE ALIGN 8 -OPTION LANGUAGE:C OPTION PROLOGUE:NONE OPTION EPILOGUE:NONE -;; Caller pushes on stack following CDECL (right to left) -;; arg1: byte* buffer -;; arg2: size_t bsize +;; No need for Load_Arguments due to fastcall +;; ECX (in): arg1, byte* buffer +;; EDX (in): arg2, size_t bsize MASM_RDSEED_GenerateBlock PROC ;; arg1:DWORD, arg2:DWORD MWSIZE EQU 04h ;; machine word size - buffer EQU edi + buffer EQU ecx bsize EQU edx -Load_Arguments: - - mov buffer, DWORD PTR [esp+04h] ;; arg1 - mov bsize, DWORD PTR [esp+08h] ;; arg2 - ;; Top of While loop GenerateBlock_Top: @@ -341,17 +331,16 @@ ALIGN 16 OPTION PROLOGUE:NONE OPTION EPILOGUE:NONE -;; RCX (in): arg1, byte* buffer -;; RDX (in): arg2, size_t bsize +;; No need for Load_Arguments due to fastcall +;; RCX (in): arg1, byte* buffer +;; RDX (in): arg2, size_t bsize -MASM_RDSEED_GenerateBlock PROC ;; arg1:QWORD,arg2:QWORD +MASM_RDSEED_GenerateBlock PROC ;; arg1:QWORD, arg2:QWORD MWSIZE EQU 08h ;; machine word size buffer EQU rcx bsize EQU rdx - ;; No need for Load_Arguments due to fastcall - ;; Top of While loop GenerateBlock_Top: diff --git a/rdrand.cpp b/rdrand.cpp index ed823ee2..11d38d91 100644 --- a/rdrand.cpp +++ b/rdrand.cpp @@ -43,20 +43,26 @@ #if defined(CRYPTOPP_CPUID_AVAILABLE) # if defined(CRYPTOPP_MSC_VERSION) # if (CRYPTOPP_MSC_VERSION >= 1700) -// # define MASM_RDRAND_ASM_AVAILABLE 1 # define ALL_RDRAND_INTRIN_AVAILABLE 1 # else # define MASM_RDRAND_ASM_AVAILABLE 1 # endif # if (CRYPTOPP_MSC_VERSION >= 1800) -// # define MASM_RDSEED_ASM_AVAILABLE 1 # define ALL_RDSEED_INTRIN_AVAILABLE 1 # else # define MASM_RDSEED_ASM_AVAILABLE 1 # endif # elif defined(CRYPTOPP_LLVM_CLANG_VERSION) || defined(CRYPTOPP_APPLE_CLANG_VERSION) -# define GCC_RDRAND_ASM_AVAILABLE 1 -# define GCC_RDSEED_ASM_AVAILABLE 1 +# if defined(__RDRND__) +# define ALL_RDRAND_INTRIN_AVAILABLE 1 +# else +# define GCC_RDRAND_ASM_AVAILABLE 1 +# endif +# if defined(__RDSEED__) +# define ALL_RDSEED_INTRIN_AVAILABLE 1 +# else +# define GCC_RDSEED_ASM_AVAILABLE 1 +# endif # elif defined(__SUNPRO_CC) # if defined(__RDRND__) && (__SUNPRO_CC >= 0x5130) # define ALL_RDRAND_INTRIN_AVAILABLE 1 @@ -69,12 +75,12 @@ # define GCC_RDSEED_ASM_AVAILABLE 1 # endif # elif defined(CRYPTOPP_GCC_VERSION) -# if defined(__RDRND__) || (CRYPTOPP_GCC_VERSION >= 40600) +# if defined(__RDRND__) && (CRYPTOPP_GCC_VERSION >= 40600) # define ALL_RDRAND_INTRIN_AVAILABLE 1 # else # define GCC_RDRAND_ASM_AVAILABLE 1 # endif -# if defined(__RDSEED__) || (CRYPTOPP_GCC_VERSION >= 40600) +# if defined(__RDSEED__) && (CRYPTOPP_GCC_VERSION >= 40600) # define ALL_RDSEED_INTRIN_AVAILABLE 1 # else # define GCC_RDSEED_ASM_AVAILABLE 1 @@ -102,7 +108,7 @@ extern "C" void CRYPTOPP_FASTCALL MASM_RDRAND_GenerateBlock(byte*, size_t); // # pragma comment(lib, "rdrand-x64.lib") # else -extern "C" void MASM_RDRAND_GenerateBlock(byte*, size_t); +extern "C" void CRYPTOPP_FASTCALL MASM_RDRAND_GenerateBlock(byte*, size_t); // # pragma comment(lib, "rdrand-x86.lib") # endif #endif @@ -112,7 +118,7 @@ extern "C" void MASM_RDRAND_GenerateBlock(byte*, size_t); extern "C" void CRYPTOPP_FASTCALL MASM_RDSEED_GenerateBlock(byte*, size_t); // # pragma comment(lib, "rdrand-x64.lib") # else -extern "C" void MASM_RDSEED_GenerateBlock(byte*, size_t); +extern "C" void CRYPTOPP_FASTCALL MASM_RDSEED_GenerateBlock(byte*, size_t); // # pragma comment(lib, "rdrand-x86.lib") # endif #endif @@ -129,8 +135,8 @@ extern "C" void NASM_RDSEED_GenerateBlock(byte*, size_t); ///////////////////////////////////////////////////////////////////// ANONYMOUS_NAMESPACE_BEGIN -// GCC, MSVC and SunCC has optimized calls to RDRAND away. We experieced it -// under GCC and MSVC. Other have reported it for SunCC. This attempts +// GCC, MSVC and SunCC have optimized calls to RDRAND away. We experieced +// it under GCC and MSVC. Other have reported it for SunCC. This attempts // to tame the optimizer even though it abuses the volatile keyword. static volatile int s_unused; ANONYMOUS_NAMESPACE_END