diff --git a/cpu.cpp b/cpu.cpp index 25b50c71..0ba2e954 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -61,6 +61,16 @@ unsigned long int getauxval(unsigned long int) { return 0; } extern "C" unsigned long long __fastcall XGETBV64(unsigned int); #endif +#if CRYPTOGAMS_ARM_SHA1 || CRYPTOGAMS_ARM_SHA256 || CRYPTOGAMS_ARM_SHA512 +// The Cryptogams code uses a global variable named CRYPTOGAMS_armcaps +// for capabilities like ARMv7 and NEON. We allocate storage for +// CRYPTOGAMS_armcaps, and the Cryptogams modules use our symbol. +// The Cryptogams code defines ARMV7_NEON as 1<<0, so we need to +// set the bits accordingly in CRYPTOGAMS_armcaps. +extern "C" unsigned int CRYPTOGAMS_armcap_P; +unsigned int CRYPTOGAMS_armcap_P = 0; +#endif + ANONYMOUS_NAMESPACE_BEGIN #if defined(__APPLE__) @@ -850,6 +860,10 @@ void DetectArmFeatures() g_hasSM3 = CPU_QuerySM3(); // || CPU_ProbeSM3(); g_hasSM4 = CPU_QuerySM4(); // || CPU_ProbeSM4(); +# if CRYPTOGAMS_ARM_SHA1 || CRYPTOGAMS_ARM_SHA256 || CRYPTOGAMS_ARM_SHA512 + CRYPTOGAMS_armcap_P = g_hasNEON ? (1<<0) : 0; +# endif + #if defined(_SC_LEVEL1_DCACHE_LINESIZE) // Glibc does not implement on some platforms. The runtime returns 0 instead of error. // https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/posix/sysconf.c diff --git a/sha.cpp b/sha.cpp index aaf4e123..eef0538e 100644 --- a/sha.cpp +++ b/sha.cpp @@ -59,10 +59,6 @@ # undef CRYPTOPP_SSE2_ASM_AVAILABLE #endif -#if CRYPTOGAMS_ARM_SHA1 || CRYPTOGAMS_ARM_SHA256 || CRYPTOGAMS_ARM_SHA512 -unsigned int CRYPTOGAMS_armcaps = 0; -#endif - NAMESPACE_BEGIN(CryptoPP) #if CRYPTOPP_SHANI_AVAILABLE @@ -71,7 +67,7 @@ extern void SHA256_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, s #endif #if CRYPTOGAMS_ARM_SHA1 -extern "C" void sha1_block_data_order(word32* state, const word32 *data, size_t blocks, unsigned int caps); +extern "C" void sha1_block_data_order(word32* state, const word32 *data, size_t blocks); #endif #if CRYPTOPP_ARM_SHA1_AVAILABLE @@ -83,7 +79,7 @@ extern void SHA256_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, s #endif #if CRYPTOGAMS_ARM_SHA256 -extern "C" void sha256_block_data_order(word32* state, const word32 *data, size_t blocks, unsigned int caps); +extern "C" void sha256_block_data_order(word32* state, const word32 *data, size_t blocks); #endif #if CRYPTOPP_ARM_SHA512_AVAILABLE @@ -96,7 +92,7 @@ extern void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, #endif #if CRYPTOGAMS_ARM_SHA512 -extern "C" void sha512_block_data_order(word64* state, const word64 *data, size_t blocks, unsigned int caps); +extern "C" void sha512_block_data_order(word64* state, const word64 *data, size_t blocks); #endif // We add extern to export table to sha_simd.cpp, but it @@ -169,22 +165,6 @@ const word32 SHA256_K[64] = { 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; -ANONYMOUS_NAMESPACE_BEGIN - -#if CRYPTOGAMS_ARM_SHA1 || CRYPTOGAMS_ARM_SHA256 || CRYPTOGAMS_ARM_SHA512 -inline unsigned int CryptogamsArmCaps() -{ - // The Cryptogams code uses a global variable named CRYPTOGAMS_armcaps - // for capabilities like ARMv7 and NEON. We allocate storage for - // CRYPTOGAMS_armcaps, and the Cryptogams modules use our symbol. - // The Cryptogams code defines ARMV7_NEON as 1<<0, so we need to - // set the bits accordingly in CRYPTOGAMS_armcaps. - return CryptoPP::HasNEON() ? (1<<0) : 0; -} -#endif - -ANONYMOUS_NAMESPACE_END - //////////////////////////////// // start of Steve Reid's code // //////////////////////////////// @@ -314,9 +294,9 @@ void SHA1::Transform(word32 *state, const word32 *data) # if defined(CRYPTOPP_LITTLE_ENDIAN) word32 dataBuf[16]; ByteReverse(dataBuf, data, SHA1::BLOCKSIZE); - sha1_block_data_order(state, data, 1, CryptogamsArmCaps()); + sha1_block_data_order(state, data, 1); # else - sha1_block_data_order(state, data, 1, CryptogamsArmCaps()); + sha1_block_data_order(state, data, 1); # endif return; } @@ -347,7 +327,7 @@ size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length) #if CRYPTOGAMS_ARM_SHA1 if (HasARMv7()) { - sha1_block_data_order(m_state, input, length / SHA1::BLOCKSIZE, CryptogamsArmCaps()); + sha1_block_data_order(m_state, input, length / SHA1::BLOCKSIZE); return length & (SHA1::BLOCKSIZE - 1); } #endif @@ -859,9 +839,9 @@ void SHA256::Transform(word32 *state, const word32 *data) # if defined(CRYPTOPP_LITTLE_ENDIAN) word32 dataBuf[16]; ByteReverse(dataBuf, data, SHA256::BLOCKSIZE); - sha256_block_data_order(state, data, 1, CryptogamsArmCaps()); + sha256_block_data_order(state, data, 1); # else - sha256_block_data_order(state, data, 1, CryptogamsArmCaps()); + sha256_block_data_order(state, data, 1); # endif return; } @@ -907,7 +887,7 @@ size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length) #if CRYPTOGAMS_ARM_SHA256 if (HasARMv7()) { - sha256_block_data_order(m_state, input, length / SHA256::BLOCKSIZE, CryptogamsArmCaps()); + sha256_block_data_order(m_state, input, length / SHA256::BLOCKSIZE); return length & (SHA256::BLOCKSIZE - 1); } #endif @@ -970,7 +950,7 @@ size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length) #if CRYPTOGAMS_ARM_SHA256 if (HasARMv7()) { - sha256_block_data_order(m_state, input, length / SHA256::BLOCKSIZE, CryptogamsArmCaps());; + sha256_block_data_order(m_state, input, length / SHA256::BLOCKSIZE); return length & (SHA256::BLOCKSIZE - 1); } #endif @@ -1333,7 +1313,13 @@ void SHA512::Transform(word64 *state, const word64 *data) #if CRYPTOGAMS_ARM_SHA512 && 0 if (HasARMv7()) { - sha512_block_data_order(state, data, 1, CryptogamsArmCaps()); +# if (CRYPTOPP_LITTLE_ENDIAN) + word64 dataBuf[16]; + ByteReverse(dataBuf, data, SHA512::BLOCKSIZE); + sha512_block_data_order(state, dataBuf, 1); +# else + sha512_block_data_order(state, data, 1); +# endif return; } #endif diff --git a/sha1_armv4.S b/sha1_armv4.S index 1bcb83d4..f07faf57 100644 --- a/sha1_armv4.S +++ b/sha1_armv4.S @@ -1,4 +1,4 @@ -@ Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved. +@ Copyright 2007-2019 The OpenSSL Project Authors. All Rights Reserved. @ @ ==================================================================== @ Written by Andy Polyakov for the OpenSSL @@ -76,6 +76,8 @@ .code 32 #endif +.extern CRYPTOGAMS_armcap_P + .text .globl sha1_block_data_order @@ -84,7 +86,17 @@ .align 5 sha1_block_data_order: #if __ARM_MAX_ARCH__>=7 - mov r12,r3 +.Lsha1_block: + @ldr r12,.LCRYPTOGAMS_armcap + ldr r12,=CRYPTOGAMS_armcap_P +# if !defined(_WIN32) + adr r3,.Lsha1_block + @ldr r12,[r3,r12] @ CRYPTOGAMS_armcap_P + ldr r12,[r12] +# endif +# if defined(__APPLE__) || defined(_WIN32) + ldr r12,[r12] +# endif tst r12,#ARMV7_NEON bne .LNEON #endif @@ -540,6 +552,9 @@ sha1_block_data_order: #endif .size sha1_block_data_order,.-sha1_block_data_order +@ CRYPTOGAMS_armcap_P +.ltorg + .align 5 .LK_00_19:.word 0x5a827999 .LK_20_39:.word 0x6ed9eba1 diff --git a/sha1_armv4.h b/sha1_armv4.h index 63ec6394..9e836f06 100644 --- a/sha1_armv4.h +++ b/sha1_armv4.h @@ -11,7 +11,7 @@ extern "C" { /* Crypto++ modifed sha1_block_data_order to pass caps as a parameter. */ /* Also see https://github.com/weidai11/cryptopp/issues/846. */ -void sha1_block_data_order(void *state, const void *data, size_t blocks, unsigned int caps); +void sha1_block_data_order(void *state, const void *data, size_t blocks); /* Cryptogams arm caps */ #define ARMV7_NEON (1<<0) diff --git a/sha256_armv4.S b/sha256_armv4.S index 80d347d7..74c208f7 100644 --- a/sha256_armv4.S +++ b/sha256_armv4.S @@ -1,4 +1,4 @@ -@ Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved. +@ Copyright 2007-2019 The OpenSSL Project Authors. All Rights Reserved. @ @ ==================================================================== @ Written by Andy Polyakov for the OpenSSL @@ -76,6 +76,8 @@ .code 32 #endif +.extern CRYPTOGAMS_armcap_P + .text .type K256,%object @@ -101,18 +103,26 @@ K256: .word 0 @ terminator .align 5 + .globl sha256_block_data_order .type sha256_block_data_order,%function sha256_block_data_order: .Lsha256_block_data_order: #if __ARM_ARCH__<7 && !defined(__thumb2__) - mov r12,r3 sub r3,pc,#8 @ sha256_block_data_order #else - mov r12,r3 adr r3,.Lsha256_block_data_order #endif #if __ARM_MAX_ARCH__>=7 + @ldr r12,.LCRYPTOGAMS_armcap + ldr r12,=CRYPTOGAMS_armcap_P +# if !defined(_WIN32) + @ldr r12,[r3,r12] @ CRYPTOGAMS_armcap_P + ldr r12,[r12] @ CRYPTOGAMS_armcap_P +# endif +# if defined(__APPLE__) || defined(_WIN32) + ldr r12,[r12] +# endif tst r12,#ARMV7_NEON bne .LNEON #endif @@ -1882,6 +1892,10 @@ sha256_block_data_order: .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif .size sha256_block_data_order,.-sha256_block_data_order + +@ CRYPTOGAMS_armcap_P +.ltorg + #if __ARM_MAX_ARCH__>=7 .arch armv7-a .fpu neon diff --git a/sha256_armv4.h b/sha256_armv4.h index 8488b086..484a0911 100644 --- a/sha256_armv4.h +++ b/sha256_armv4.h @@ -11,7 +11,7 @@ extern "C" { /* Crypto++ modifed sha256_block_data_order to pass caps as a parameter. */ /* Also see https://github.com/weidai11/cryptopp/issues/846. */ -void sha256_block_data_order(void *state, const void *data, size_t blocks, unsigned int caps); +void sha256_block_data_order(void *state, const void *data, size_t blocks); /* Cryptogams arm caps */ #define ARMV7_NEON (1<<0) diff --git a/sha512_armv4.S b/sha512_armv4.S index 99e58682..4c80608e 100644 --- a/sha512_armv4.S +++ b/sha512_armv4.S @@ -1,4 +1,4 @@ -@ Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved. +@ Copyright 2007-2019 The OpenSSL Project Authors. All Rights Reserved. @ @ ==================================================================== @ Written by Andy Polyakov for the OpenSSL @@ -87,6 +87,8 @@ .code 32 #endif +.extern CRYPTOGAMS_armcap_P + .text .type K512,%object @@ -133,6 +135,7 @@ K512: WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .size K512,.-K512 + .skip 32 .globl sha512_block_data_order @@ -140,13 +143,20 @@ K512: sha512_block_data_order: .Lsha512_block_data_order: #if __ARM_ARCH__<7 && !defined(__thumb2__) - mov r12,r3 sub r3,pc,#8 @ sha512_block_data_order #else - mov r12,r3 adr r3,.Lsha512_block_data_order #endif #if __ARM_MAX_ARCH__>=7 + @ldr r12,.LCRYPTOGAMS_armcap + ldr r12,=CRYPTOGAMS_armcap_P +# if !defined(_WIN32) + @ldr r12,[r3,r12] @ CRYPTOGAMS_armcap_P + ldr r12,[r12] @ CRYPTOGAMS_armcap_P +# endif +# if defined(__APPLE__) || defined(_WIN32) + ldr r12,[r12] +# endif tst r12,#ARMV7_NEON bne .LNEON #endif @@ -528,6 +538,10 @@ sha512_block_data_order: .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif .size sha512_block_data_order,.-sha512_block_data_order + +@ CRYPTOGAMS_armcap_P +.ltorg + #if __ARM_MAX_ARCH__>=7 .arch armv7-a .fpu neon @@ -540,6 +554,7 @@ sha512_block_data_order_neon: dmb @ errata #451034 on early Cortex A8 add r2,r1,r2,lsl#7 @ len to point at the end of inp adr r3,K512 + @VFP_ABI_PUSH vldmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ load context .Loop_neon: vshr.u64 d24,d20,#14 @ 0 @@ -1853,6 +1868,7 @@ sha512_block_data_order_neon: teq r1,r2 sub r3,#640 @ rewind K512 bne .Loop_neon + @VFP_ABI_POP bx lr @ .word 0xe12fff1e .size sha512_block_data_order_neon,.-sha512_block_data_order_neon #endif diff --git a/sha512_armv4.h b/sha512_armv4.h index 52cb81f8..8e9f34cf 100644 --- a/sha512_armv4.h +++ b/sha512_armv4.h @@ -9,9 +9,9 @@ extern "C" { #endif -/* Crypto++ modifed sha256_block_data_order to pass caps as a parameter. */ +/* Crypto++ modifed sha512_block_data_order to pass caps as a parameter. */ /* Also see https://github.com/weidai11/cryptopp/issues/846. */ -void sha512_block_data_order(void *state, const void *data, size_t blocks, unsigned int caps); +void sha512_block_data_order(void *state, const void *data, size_t blocks); /* Cryptogams arm caps */ #define ARMV7_NEON (1<<0)