Fix SHA512 on ARM benchmarks
This was a mistake when porting from Cryptogams to Crypto++. The macros VFP_ABI_PUSH and VFP_ABI_POP needed to be defined because they save and restore SIMD register state. They were originally missing during the port. The benchmarks would hang because the doubles we used for benchmarking were blown away in sha512_block_data_order_neon.pull/853/head
parent
92df2a685f
commit
fc10a7f1ea
4
sha.cpp
4
sha.cpp
|
|
@ -998,7 +998,7 @@ std::string SHA512_AlgorithmProvider()
|
|||
if (HasSSE2())
|
||||
return "SSE2";
|
||||
#endif
|
||||
#if CRYPTOGAMS_ARM_SHA512 && 0
|
||||
#if CRYPTOGAMS_ARM_SHA512
|
||||
if (HasNEON())
|
||||
return "NEON";
|
||||
if (HasARMv7())
|
||||
|
|
@ -1310,7 +1310,7 @@ void SHA512::Transform(word64 *state, const word64 *data)
|
|||
return;
|
||||
}
|
||||
#endif
|
||||
#if CRYPTOGAMS_ARM_SHA512 && 0
|
||||
#if CRYPTOGAMS_ARM_SHA512
|
||||
if (HasARMv7())
|
||||
{
|
||||
# if (CRYPTOPP_LITTLE_ENDIAN)
|
||||
|
|
|
|||
20
sha1_armv4.S
20
sha1_armv4.S
|
|
@ -85,21 +85,21 @@
|
|||
|
||||
.align 5
|
||||
sha1_block_data_order:
|
||||
.Lsha1_block_data_order:
|
||||
#if __ARM_ARCH__<7 && !defined(__thumb2__)
|
||||
sub r3,pc,#8 @ sha1_block_data_order
|
||||
#else
|
||||
adr r3,.Lsha1_block_data_order
|
||||
#endif
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.Lsha1_block:
|
||||
@ldr r12,.LCRYPTOGAMS_armcap
|
||||
ldr r12,=CRYPTOGAMS_armcap_P
|
||||
# if !defined(_WIN32)
|
||||
adr r3,.Lsha1_block
|
||||
@ldr r12,[r3,r12] @ CRYPTOGAMS_armcap_P
|
||||
ldr r12,[r12]
|
||||
# endif
|
||||
# if defined(__APPLE__) || defined(_WIN32)
|
||||
ldr r12,[r12]
|
||||
# endif
|
||||
ldr r12,[r12] @ CRYPTOGAMS_armcap_P
|
||||
|
||||
tst r12,#ARMV7_NEON
|
||||
bne .LNEON
|
||||
#endif
|
||||
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
|
||||
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
|
||||
ldmia r0,{r3,r4,r5,r6,r7}
|
||||
|
|
|
|||
|
|
@ -113,19 +113,15 @@ sha256_block_data_order:
|
|||
#else
|
||||
adr r3,.Lsha256_block_data_order
|
||||
#endif
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
@ldr r12,.LCRYPTOGAMS_armcap
|
||||
ldr r12,=CRYPTOGAMS_armcap_P
|
||||
# if !defined(_WIN32)
|
||||
@ldr r12,[r3,r12] @ CRYPTOGAMS_armcap_P
|
||||
ldr r12,[r12] @ CRYPTOGAMS_armcap_P
|
||||
# endif
|
||||
# if defined(__APPLE__) || defined(_WIN32)
|
||||
ldr r12,[r12]
|
||||
# endif
|
||||
|
||||
tst r12,#ARMV7_NEON
|
||||
bne .LNEON
|
||||
#endif
|
||||
|
||||
add r2,r1,r2,lsl#6 @ len to point at the end of inp
|
||||
stmdb sp!,{r0,r1,r2,r4-r11,lr}
|
||||
ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
|
|
|
|||
|
|
@ -79,6 +79,9 @@
|
|||
# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1
|
||||
#endif
|
||||
|
||||
#define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
|
||||
#define VFP_ABI_POP vldmia sp!,{d8-d15}
|
||||
|
||||
#if defined(__thumb2__)
|
||||
.syntax unified
|
||||
.thumb
|
||||
|
|
@ -147,19 +150,15 @@ sha512_block_data_order:
|
|||
#else
|
||||
adr r3,.Lsha512_block_data_order
|
||||
#endif
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
@ldr r12,.LCRYPTOGAMS_armcap
|
||||
ldr r12,=CRYPTOGAMS_armcap_P
|
||||
# if !defined(_WIN32)
|
||||
@ldr r12,[r3,r12] @ CRYPTOGAMS_armcap_P
|
||||
ldr r12,[r12] @ CRYPTOGAMS_armcap_P
|
||||
# endif
|
||||
# if defined(__APPLE__) || defined(_WIN32)
|
||||
ldr r12,[r12]
|
||||
# endif
|
||||
|
||||
tst r12,#ARMV7_NEON
|
||||
bne .LNEON
|
||||
#endif
|
||||
|
||||
add r2,r1,r2,lsl#7 @ len to point at the end of inp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
|
||||
sub r14,r3,#672 @ K512
|
||||
|
|
@ -554,7 +553,7 @@ sha512_block_data_order_neon:
|
|||
dmb @ errata #451034 on early Cortex A8
|
||||
add r2,r1,r2,lsl#7 @ len to point at the end of inp
|
||||
adr r3,K512
|
||||
@VFP_ABI_PUSH
|
||||
VFP_ABI_PUSH
|
||||
vldmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ load context
|
||||
.Loop_neon:
|
||||
vshr.u64 d24,d20,#14 @ 0
|
||||
|
|
@ -1868,7 +1867,7 @@ sha512_block_data_order_neon:
|
|||
teq r1,r2
|
||||
sub r3,#640 @ rewind K512
|
||||
bne .Loop_neon
|
||||
@VFP_ABI_POP
|
||||
VFP_ABI_POP
|
||||
bx lr @ .word 0xe12fff1e
|
||||
.size sha512_block_data_order_neon,.-sha512_block_data_order_neon
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue