diff --git a/sha.cpp b/sha.cpp index eef0538e..ce742b2a 100644 --- a/sha.cpp +++ b/sha.cpp @@ -998,7 +998,7 @@ std::string SHA512_AlgorithmProvider() if (HasSSE2()) return "SSE2"; #endif -#if CRYPTOGAMS_ARM_SHA512 && 0 +#if CRYPTOGAMS_ARM_SHA512 if (HasNEON()) return "NEON"; if (HasARMv7()) @@ -1310,7 +1310,7 @@ void SHA512::Transform(word64 *state, const word64 *data) return; } #endif -#if CRYPTOGAMS_ARM_SHA512 && 0 +#if CRYPTOGAMS_ARM_SHA512 if (HasARMv7()) { # if (CRYPTOPP_LITTLE_ENDIAN) diff --git a/sha1_armv4.S b/sha1_armv4.S index f07faf57..189d7e21 100644 --- a/sha1_armv4.S +++ b/sha1_armv4.S @@ -85,21 +85,21 @@ .align 5 sha1_block_data_order: +.Lsha1_block_data_order: +#if __ARM_ARCH__<7 && !defined(__thumb2__) + sub r3,pc,#8 @ sha1_block_data_order +#else + adr r3,.Lsha1_block_data_order +#endif + #if __ARM_MAX_ARCH__>=7 -.Lsha1_block: - @ldr r12,.LCRYPTOGAMS_armcap ldr r12,=CRYPTOGAMS_armcap_P -# if !defined(_WIN32) - adr r3,.Lsha1_block - @ldr r12,[r3,r12] @ CRYPTOGAMS_armcap_P - ldr r12,[r12] -# endif -# if defined(__APPLE__) || defined(_WIN32) - ldr r12,[r12] -# endif + ldr r12,[r12] @ CRYPTOGAMS_armcap_P + tst r12,#ARMV7_NEON bne .LNEON #endif + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 ldmia r0,{r3,r4,r5,r6,r7} diff --git a/sha256_armv4.S b/sha256_armv4.S index 74c208f7..ef1f4854 100644 --- a/sha256_armv4.S +++ b/sha256_armv4.S @@ -113,19 +113,15 @@ sha256_block_data_order: #else adr r3,.Lsha256_block_data_order #endif + #if __ARM_MAX_ARCH__>=7 - @ldr r12,.LCRYPTOGAMS_armcap ldr r12,=CRYPTOGAMS_armcap_P -# if !defined(_WIN32) - @ldr r12,[r3,r12] @ CRYPTOGAMS_armcap_P ldr r12,[r12] @ CRYPTOGAMS_armcap_P -# endif -# if defined(__APPLE__) || defined(_WIN32) - ldr r12,[r12] -# endif + tst r12,#ARMV7_NEON bne .LNEON #endif + add r2,r1,r2,lsl#6 @ len to point at the end of inp stmdb sp!,{r0,r1,r2,r4-r11,lr} ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} diff --git a/sha512_armv4.S b/sha512_armv4.S index 4c80608e..a1e8cd44 100644 --- a/sha512_armv4.S +++ b/sha512_armv4.S @@ -79,6 +79,9 @@ # define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 #endif +#define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +#define VFP_ABI_POP vldmia sp!,{d8-d15} + #if defined(__thumb2__) .syntax unified .thumb @@ -147,19 +150,15 @@ sha512_block_data_order: #else adr r3,.Lsha512_block_data_order #endif + #if __ARM_MAX_ARCH__>=7 - @ldr r12,.LCRYPTOGAMS_armcap ldr r12,=CRYPTOGAMS_armcap_P -# if !defined(_WIN32) - @ldr r12,[r3,r12] @ CRYPTOGAMS_armcap_P ldr r12,[r12] @ CRYPTOGAMS_armcap_P -# endif -# if defined(__APPLE__) || defined(_WIN32) - ldr r12,[r12] -# endif + tst r12,#ARMV7_NEON bne .LNEON #endif + add r2,r1,r2,lsl#7 @ len to point at the end of inp stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} sub r14,r3,#672 @ K512 @@ -554,7 +553,7 @@ sha512_block_data_order_neon: dmb @ errata #451034 on early Cortex A8 add r2,r1,r2,lsl#7 @ len to point at the end of inp adr r3,K512 - @VFP_ABI_PUSH + VFP_ABI_PUSH vldmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ load context .Loop_neon: vshr.u64 d24,d20,#14 @ 0 @@ -1868,7 +1867,7 @@ sha512_block_data_order_neon: teq r1,r2 sub r3,#640 @ rewind K512 bne .Loop_neon - @VFP_ABI_POP + VFP_ABI_POP bx lr @ .word 0xe12fff1e .size sha512_block_data_order_neon,.-sha512_block_data_order_neon #endif