Fix SHA512 on ARM benchmarks

This was a mistake when porting from Cryptogams to Crypto++. The macros VFP_ABI_PUSH and VFP_ABI_POP needed to be defined because they save and restore SIMD register state. They were originally missing during the port. The benchmarks would hang because the doubles we used for benchmarking were blown away in sha512_block_data_order_neon.
pull/853/head
Jeffrey Walton 2019-05-25 06:23:19 -04:00
parent 92df2a685f
commit fc10a7f1ea
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
4 changed files with 23 additions and 28 deletions

View File

@ -998,7 +998,7 @@ std::string SHA512_AlgorithmProvider()
if (HasSSE2())
return "SSE2";
#endif
#if CRYPTOGAMS_ARM_SHA512 && 0
#if CRYPTOGAMS_ARM_SHA512
if (HasNEON())
return "NEON";
if (HasARMv7())
@ -1310,7 +1310,7 @@ void SHA512::Transform(word64 *state, const word64 *data)
return;
}
#endif
#if CRYPTOGAMS_ARM_SHA512 && 0
#if CRYPTOGAMS_ARM_SHA512
if (HasARMv7())
{
# if (CRYPTOPP_LITTLE_ENDIAN)

View File

@ -85,21 +85,21 @@
.align 5
sha1_block_data_order:
.Lsha1_block_data_order:
#if __ARM_ARCH__<7 && !defined(__thumb2__)
sub r3,pc,#8 @ sha1_block_data_order
#else
adr r3,.Lsha1_block_data_order
#endif
#if __ARM_MAX_ARCH__>=7
.Lsha1_block:
@ldr r12,.LCRYPTOGAMS_armcap
ldr r12,=CRYPTOGAMS_armcap_P
# if !defined(_WIN32)
adr r3,.Lsha1_block
@ldr r12,[r3,r12] @ CRYPTOGAMS_armcap_P
ldr r12,[r12]
# endif
# if defined(__APPLE__) || defined(_WIN32)
ldr r12,[r12]
# endif
ldr r12,[r12] @ CRYPTOGAMS_armcap_P
tst r12,#ARMV7_NEON
bne .LNEON
#endif
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
ldmia r0,{r3,r4,r5,r6,r7}

View File

@ -113,19 +113,15 @@ sha256_block_data_order:
#else
adr r3,.Lsha256_block_data_order
#endif
#if __ARM_MAX_ARCH__>=7
@ldr r12,.LCRYPTOGAMS_armcap
ldr r12,=CRYPTOGAMS_armcap_P
# if !defined(_WIN32)
@ldr r12,[r3,r12] @ CRYPTOGAMS_armcap_P
ldr r12,[r12] @ CRYPTOGAMS_armcap_P
# endif
# if defined(__APPLE__) || defined(_WIN32)
ldr r12,[r12]
# endif
tst r12,#ARMV7_NEON
bne .LNEON
#endif
add r2,r1,r2,lsl#6 @ len to point at the end of inp
stmdb sp!,{r0,r1,r2,r4-r11,lr}
ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}

View File

@ -79,6 +79,9 @@
# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1
#endif
#define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
#define VFP_ABI_POP vldmia sp!,{d8-d15}
#if defined(__thumb2__)
.syntax unified
.thumb
@ -147,19 +150,15 @@ sha512_block_data_order:
#else
adr r3,.Lsha512_block_data_order
#endif
#if __ARM_MAX_ARCH__>=7
@ldr r12,.LCRYPTOGAMS_armcap
ldr r12,=CRYPTOGAMS_armcap_P
# if !defined(_WIN32)
@ldr r12,[r3,r12] @ CRYPTOGAMS_armcap_P
ldr r12,[r12] @ CRYPTOGAMS_armcap_P
# endif
# if defined(__APPLE__) || defined(_WIN32)
ldr r12,[r12]
# endif
tst r12,#ARMV7_NEON
bne .LNEON
#endif
add r2,r1,r2,lsl#7 @ len to point at the end of inp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
sub r14,r3,#672 @ K512
@ -554,7 +553,7 @@ sha512_block_data_order_neon:
dmb @ errata #451034 on early Cortex A8
add r2,r1,r2,lsl#7 @ len to point at the end of inp
adr r3,K512
@VFP_ABI_PUSH
VFP_ABI_PUSH
vldmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ load context
.Loop_neon:
vshr.u64 d24,d20,#14 @ 0
@ -1868,7 +1867,7 @@ sha512_block_data_order_neon:
teq r1,r2
sub r3,#640 @ rewind K512
bne .Loop_neon
@VFP_ABI_POP
VFP_ABI_POP
bx lr @ .word 0xe12fff1e
.size sha512_block_data_order_neon,.-sha512_block_data_order_neon
#endif