Breakout sha_block_data_order and sha_block_data_order_neon (GH #847)
parent
d4b533a60f
commit
81da61fe7b
14
cpu.cpp
14
cpu.cpp
|
|
@ -61,16 +61,6 @@ unsigned long int getauxval(unsigned long int) { return 0; }
|
||||||
extern "C" unsigned long long __fastcall XGETBV64(unsigned int);
|
extern "C" unsigned long long __fastcall XGETBV64(unsigned int);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if CRYPTOGAMS_ARM_SHA1 || CRYPTOGAMS_ARM_SHA256 || CRYPTOGAMS_ARM_SHA512
|
|
||||||
// The Cryptogams code uses a global variable named CRYPTOGAMS_armcap_P
|
|
||||||
// for capabilities like ARMv7 and NEON. We allocate storage for
|
|
||||||
// CRYPTOGAMS_armcap_P, and the Cryptogams object files use our symbol.
|
|
||||||
// The Cryptogams code defines ARMV7_NEON as 1<<0, so we need to set
|
|
||||||
// the bits accordingly in CRYPTOGAMS_armcap_P.
|
|
||||||
extern "C" unsigned int CRYPTOGAMS_armcap_P;
|
|
||||||
unsigned int CRYPTOGAMS_armcap_P = 0;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ANONYMOUS_NAMESPACE_BEGIN
|
ANONYMOUS_NAMESPACE_BEGIN
|
||||||
|
|
||||||
#if defined(__APPLE__)
|
#if defined(__APPLE__)
|
||||||
|
|
@ -860,10 +850,6 @@ void DetectArmFeatures()
|
||||||
g_hasSM3 = CPU_QuerySM3(); // || CPU_ProbeSM3();
|
g_hasSM3 = CPU_QuerySM3(); // || CPU_ProbeSM3();
|
||||||
g_hasSM4 = CPU_QuerySM4(); // || CPU_ProbeSM4();
|
g_hasSM4 = CPU_QuerySM4(); // || CPU_ProbeSM4();
|
||||||
|
|
||||||
# if CRYPTOGAMS_ARM_SHA1 || CRYPTOGAMS_ARM_SHA256 || CRYPTOGAMS_ARM_SHA512
|
|
||||||
CRYPTOGAMS_armcap_P = g_hasNEON ? (1<<0) : 0;
|
|
||||||
# endif
|
|
||||||
|
|
||||||
#if defined(_SC_LEVEL1_DCACHE_LINESIZE)
|
#if defined(_SC_LEVEL1_DCACHE_LINESIZE)
|
||||||
// Glibc does not implement on some platforms. The runtime returns 0 instead of error.
|
// Glibc does not implement on some platforms. The runtime returns 0 instead of error.
|
||||||
// https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/posix/sysconf.c
|
// https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/posix/sysconf.c
|
||||||
|
|
|
||||||
51
sha.cpp
51
sha.cpp
|
|
@ -68,6 +68,7 @@ extern void SHA256_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, s
|
||||||
|
|
||||||
#if CRYPTOGAMS_ARM_SHA1
|
#if CRYPTOGAMS_ARM_SHA1
|
||||||
extern "C" void sha1_block_data_order(word32* state, const word32 *data, size_t blocks);
|
extern "C" void sha1_block_data_order(word32* state, const word32 *data, size_t blocks);
|
||||||
|
extern "C" void sha1_block_data_order_neon(word32* state, const word32 *data, size_t blocks);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if CRYPTOPP_ARM_SHA1_AVAILABLE
|
#if CRYPTOPP_ARM_SHA1_AVAILABLE
|
||||||
|
|
@ -80,6 +81,7 @@ extern void SHA256_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, s
|
||||||
|
|
||||||
#if CRYPTOGAMS_ARM_SHA256
|
#if CRYPTOGAMS_ARM_SHA256
|
||||||
extern "C" void sha256_block_data_order(word32* state, const word32 *data, size_t blocks);
|
extern "C" void sha256_block_data_order(word32* state, const word32 *data, size_t blocks);
|
||||||
|
extern "C" void sha256_block_data_order_neon(word32* state, const word32 *data, size_t blocks);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if CRYPTOPP_ARM_SHA512_AVAILABLE
|
#if CRYPTOPP_ARM_SHA512_AVAILABLE
|
||||||
|
|
@ -93,6 +95,7 @@ extern void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data,
|
||||||
|
|
||||||
#if CRYPTOGAMS_ARM_SHA512
|
#if CRYPTOGAMS_ARM_SHA512
|
||||||
extern "C" void sha512_block_data_order(word64* state, const word64 *data, size_t blocks);
|
extern "C" void sha512_block_data_order(word64* state, const word64 *data, size_t blocks);
|
||||||
|
extern "C" void sha512_block_data_order_neon(word64* state, const word64 *data, size_t blocks);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// We add extern to export table to sha_simd.cpp, but it
|
// We add extern to export table to sha_simd.cpp, but it
|
||||||
|
|
@ -289,6 +292,17 @@ void SHA1::Transform(word32 *state, const word32 *data)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if CRYPTOGAMS_ARM_SHA1 && 0
|
#if CRYPTOGAMS_ARM_SHA1 && 0
|
||||||
|
if (HasNEON())
|
||||||
|
{
|
||||||
|
# if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||||
|
word32 dataBuf[16];
|
||||||
|
ByteReverse(dataBuf, data, SHA1::BLOCKSIZE);
|
||||||
|
sha1_block_data_order_neon(state, data, 1);
|
||||||
|
# else
|
||||||
|
sha1_block_data_order_neon(state, data, 1);
|
||||||
|
# endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (HasARMv7())
|
if (HasARMv7())
|
||||||
{
|
{
|
||||||
# if defined(CRYPTOPP_LITTLE_ENDIAN)
|
# if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||||
|
|
@ -325,6 +339,11 @@ size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if CRYPTOGAMS_ARM_SHA1
|
#if CRYPTOGAMS_ARM_SHA1
|
||||||
|
if (HasNEON())
|
||||||
|
{
|
||||||
|
sha1_block_data_order_neon(m_state, input, length / SHA1::BLOCKSIZE);
|
||||||
|
return length & (SHA1::BLOCKSIZE - 1);
|
||||||
|
}
|
||||||
if (HasARMv7())
|
if (HasARMv7())
|
||||||
{
|
{
|
||||||
sha1_block_data_order(m_state, input, length / SHA1::BLOCKSIZE);
|
sha1_block_data_order(m_state, input, length / SHA1::BLOCKSIZE);
|
||||||
|
|
@ -834,6 +853,17 @@ void SHA256::Transform(word32 *state, const word32 *data)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if CRYPTOGAMS_ARM_SHA256 && 0
|
#if CRYPTOGAMS_ARM_SHA256 && 0
|
||||||
|
if (HasNEON())
|
||||||
|
{
|
||||||
|
# if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||||
|
word32 dataBuf[16];
|
||||||
|
ByteReverse(dataBuf, data, SHA256::BLOCKSIZE);
|
||||||
|
sha256_block_data_order_neon(state, data, 1);
|
||||||
|
# else
|
||||||
|
sha256_block_data_order_neon(state, data, 1);
|
||||||
|
# endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (HasARMv7())
|
if (HasARMv7())
|
||||||
{
|
{
|
||||||
# if defined(CRYPTOPP_LITTLE_ENDIAN)
|
# if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||||
|
|
@ -885,6 +915,11 @@ size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if CRYPTOGAMS_ARM_SHA256
|
#if CRYPTOGAMS_ARM_SHA256
|
||||||
|
if (HasNEON())
|
||||||
|
{
|
||||||
|
sha256_block_data_order_neon(m_state, input, length / SHA256::BLOCKSIZE);
|
||||||
|
return length & (SHA256::BLOCKSIZE - 1);
|
||||||
|
}
|
||||||
if (HasARMv7())
|
if (HasARMv7())
|
||||||
{
|
{
|
||||||
sha256_block_data_order(m_state, input, length / SHA256::BLOCKSIZE);
|
sha256_block_data_order(m_state, input, length / SHA256::BLOCKSIZE);
|
||||||
|
|
@ -948,6 +983,11 @@ size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if CRYPTOGAMS_ARM_SHA256
|
#if CRYPTOGAMS_ARM_SHA256
|
||||||
|
if (HasNEON())
|
||||||
|
{
|
||||||
|
sha256_block_data_order_neon(m_state, input, length / SHA256::BLOCKSIZE);
|
||||||
|
return length & (SHA256::BLOCKSIZE - 1);
|
||||||
|
}
|
||||||
if (HasARMv7())
|
if (HasARMv7())
|
||||||
{
|
{
|
||||||
sha256_block_data_order(m_state, input, length / SHA256::BLOCKSIZE);
|
sha256_block_data_order(m_state, input, length / SHA256::BLOCKSIZE);
|
||||||
|
|
@ -1311,6 +1351,17 @@ void SHA512::Transform(word64 *state, const word64 *data)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if CRYPTOGAMS_ARM_SHA512
|
#if CRYPTOGAMS_ARM_SHA512
|
||||||
|
if (HasNEON())
|
||||||
|
{
|
||||||
|
# if (CRYPTOPP_LITTLE_ENDIAN)
|
||||||
|
word64 dataBuf[16];
|
||||||
|
ByteReverse(dataBuf, data, SHA512::BLOCKSIZE);
|
||||||
|
sha512_block_data_order_neon(state, dataBuf, 1);
|
||||||
|
# else
|
||||||
|
sha512_block_data_order_neon(state, data, 1);
|
||||||
|
# endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (HasARMv7())
|
if (HasARMv7())
|
||||||
{
|
{
|
||||||
# if (CRYPTOPP_LITTLE_ENDIAN)
|
# if (CRYPTOPP_LITTLE_ENDIAN)
|
||||||
|
|
|
||||||
21
sha1_armv4.S
21
sha1_armv4.S
|
|
@ -76,33 +76,25 @@
|
||||||
.code 32
|
.code 32
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
.extern CRYPTOGAMS_armcap_P
|
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
|
.align 5
|
||||||
.globl sha1_block_data_order
|
.globl sha1_block_data_order
|
||||||
.type sha1_block_data_order,%function
|
.type sha1_block_data_order,%function
|
||||||
|
|
||||||
.align 5
|
|
||||||
sha1_block_data_order:
|
sha1_block_data_order:
|
||||||
.Lsha1_block_data_order:
|
.Lsha1_block_data_order:
|
||||||
|
|
||||||
#if __ARM_ARCH__<7 && !defined(__thumb2__)
|
#if __ARM_ARCH__<7 && !defined(__thumb2__)
|
||||||
sub r3,pc,#8 @ sha1_block_data_order
|
sub r3,pc,#8 @ sha1_block_data_order
|
||||||
#else
|
#else
|
||||||
adr r3,.Lsha1_block_data_order
|
adr r3,.Lsha1_block_data_order
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if __ARM_MAX_ARCH__>=7
|
|
||||||
ldr r12,=CRYPTOGAMS_armcap_P
|
|
||||||
ldr r12,[r12] @ CRYPTOGAMS_armcap_P
|
|
||||||
|
|
||||||
tst r12,#ARMV7_NEON
|
|
||||||
bne .LNEON
|
|
||||||
#endif
|
|
||||||
|
|
||||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
|
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
|
||||||
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
|
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
|
||||||
ldmia r0,{r3,r4,r5,r6,r7}
|
ldmia r0,{r3,r4,r5,r6,r7}
|
||||||
|
|
||||||
.Lloop:
|
.Lloop:
|
||||||
ldr r8,.LK_00_19
|
ldr r8,.LK_00_19
|
||||||
mov r14,sp
|
mov r14,sp
|
||||||
|
|
@ -552,9 +544,6 @@ sha1_block_data_order:
|
||||||
#endif
|
#endif
|
||||||
.size sha1_block_data_order,.-sha1_block_data_order
|
.size sha1_block_data_order,.-sha1_block_data_order
|
||||||
|
|
||||||
@ CRYPTOGAMS_armcap_P
|
|
||||||
.ltorg
|
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
.LK_00_19:.word 0x5a827999
|
.LK_00_19:.word 0x5a827999
|
||||||
.LK_20_39:.word 0x6ed9eba1
|
.LK_20_39:.word 0x6ed9eba1
|
||||||
|
|
@ -566,10 +555,12 @@ sha1_block_data_order:
|
||||||
.arch armv7-a
|
.arch armv7-a
|
||||||
.fpu neon
|
.fpu neon
|
||||||
|
|
||||||
|
.globl sha1_block_data_order_neon
|
||||||
.type sha1_block_data_order_neon,%function
|
.type sha1_block_data_order_neon,%function
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
sha1_block_data_order_neon:
|
sha1_block_data_order_neon:
|
||||||
.LNEON:
|
|
||||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
|
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
|
||||||
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
|
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
|
||||||
@ dmb @ errata #451034 on early Cortex A8
|
@ dmb @ errata #451034 on early Cortex A8
|
||||||
|
|
|
||||||
|
|
@ -76,8 +76,6 @@
|
||||||
.code 32
|
.code 32
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
.extern CRYPTOGAMS_armcap_P
|
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.type K256,%object
|
.type K256,%object
|
||||||
|
|
@ -103,30 +101,24 @@ K256:
|
||||||
.word 0 @ terminator
|
.word 0 @ terminator
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
.globl sha256_block_data_order
|
.globl sha256_block_data_order
|
||||||
.type sha256_block_data_order,%function
|
.type sha256_block_data_order,%function
|
||||||
|
|
||||||
sha256_block_data_order:
|
sha256_block_data_order:
|
||||||
.Lsha256_block_data_order:
|
.Lsha256_block_data_order:
|
||||||
|
|
||||||
#if __ARM_ARCH__<7 && !defined(__thumb2__)
|
#if __ARM_ARCH__<7 && !defined(__thumb2__)
|
||||||
sub r3,pc,#8 @ sha256_block_data_order
|
sub r3,pc,#8 @ sha256_block_data_order
|
||||||
#else
|
#else
|
||||||
adr r3,.Lsha256_block_data_order
|
adr r3,.Lsha256_block_data_order
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if __ARM_MAX_ARCH__>=7
|
|
||||||
ldr r12,=CRYPTOGAMS_armcap_P
|
|
||||||
ldr r12,[r12] @ CRYPTOGAMS_armcap_P
|
|
||||||
|
|
||||||
tst r12,#ARMV7_NEON
|
|
||||||
bne .LNEON
|
|
||||||
#endif
|
|
||||||
|
|
||||||
add r2,r1,r2,lsl#6 @ len to point at the end of inp
|
add r2,r1,r2,lsl#6 @ len to point at the end of inp
|
||||||
stmdb sp!,{r0,r1,r2,r4-r11,lr}
|
stmdb sp!,{r0,r1,r2,r4-r11,lr}
|
||||||
ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
|
ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
|
||||||
sub r14,r3,#256+32 @ K256
|
sub r14,r3,#256+32 @ K256
|
||||||
sub sp,sp,#16*4 @ alloca(X[16])
|
sub sp,sp,#16*4 @ alloca(X[16])
|
||||||
|
|
||||||
.Loop:
|
.Loop:
|
||||||
# if __ARM_ARCH__>=7
|
# if __ARM_ARCH__>=7
|
||||||
ldr r2,[r1],#4
|
ldr r2,[r1],#4
|
||||||
|
|
@ -1889,9 +1881,6 @@ sha256_block_data_order:
|
||||||
#endif
|
#endif
|
||||||
.size sha256_block_data_order,.-sha256_block_data_order
|
.size sha256_block_data_order,.-sha256_block_data_order
|
||||||
|
|
||||||
@ CRYPTOGAMS_armcap_P
|
|
||||||
.ltorg
|
|
||||||
|
|
||||||
#if __ARM_MAX_ARCH__>=7
|
#if __ARM_MAX_ARCH__>=7
|
||||||
.arch armv7-a
|
.arch armv7-a
|
||||||
.fpu neon
|
.fpu neon
|
||||||
|
|
@ -1901,7 +1890,7 @@ sha256_block_data_order:
|
||||||
.align 5
|
.align 5
|
||||||
.skip 16
|
.skip 16
|
||||||
sha256_block_data_order_neon:
|
sha256_block_data_order_neon:
|
||||||
.LNEON:
|
|
||||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
|
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
|
||||||
|
|
||||||
sub r11,sp,#16*4+16
|
sub r11,sp,#16*4+16
|
||||||
|
|
|
||||||
|
|
@ -87,8 +87,6 @@
|
||||||
.code 32
|
.code 32
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
.extern CRYPTOGAMS_armcap_P
|
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.type K512,%object
|
.type K512,%object
|
||||||
|
|
@ -138,24 +136,19 @@ K512:
|
||||||
|
|
||||||
.skip 32
|
.skip 32
|
||||||
|
|
||||||
|
.align 5
|
||||||
.globl sha512_block_data_order
|
.globl sha512_block_data_order
|
||||||
.type sha512_block_data_order,%function
|
.type sha512_block_data_order,%function
|
||||||
|
|
||||||
sha512_block_data_order:
|
sha512_block_data_order:
|
||||||
.Lsha512_block_data_order:
|
.Lsha512_block_data_order:
|
||||||
|
|
||||||
#if __ARM_ARCH__<7 && !defined(__thumb2__)
|
#if __ARM_ARCH__<7 && !defined(__thumb2__)
|
||||||
sub r3,pc,#8 @ sha512_block_data_order
|
sub r3,pc,#8 @ sha512_block_data_order
|
||||||
#else
|
#else
|
||||||
adr r3,.Lsha512_block_data_order
|
adr r3,.Lsha512_block_data_order
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if __ARM_MAX_ARCH__>=7
|
|
||||||
ldr r12,=CRYPTOGAMS_armcap_P
|
|
||||||
ldr r12,[r12] @ CRYPTOGAMS_armcap_P
|
|
||||||
|
|
||||||
tst r12,#ARMV7_NEON
|
|
||||||
bne .LNEON
|
|
||||||
#endif
|
|
||||||
|
|
||||||
add r2,r1,r2,lsl#7 @ len to point at the end of inp
|
add r2,r1,r2,lsl#7 @ len to point at the end of inp
|
||||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
|
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
|
||||||
sub r14,r3,#672 @ K512
|
sub r14,r3,#672 @ K512
|
||||||
|
|
@ -535,18 +528,16 @@ sha512_block_data_order:
|
||||||
#endif
|
#endif
|
||||||
.size sha512_block_data_order,.-sha512_block_data_order
|
.size sha512_block_data_order,.-sha512_block_data_order
|
||||||
|
|
||||||
@ CRYPTOGAMS_armcap_P
|
|
||||||
.ltorg
|
|
||||||
|
|
||||||
#if __ARM_MAX_ARCH__>=7
|
#if __ARM_MAX_ARCH__>=7
|
||||||
.arch armv7-a
|
.arch armv7-a
|
||||||
.fpu neon
|
.fpu neon
|
||||||
|
|
||||||
|
.align 4
|
||||||
.globl sha512_block_data_order_neon
|
.globl sha512_block_data_order_neon
|
||||||
.type sha512_block_data_order_neon,%function
|
.type sha512_block_data_order_neon,%function
|
||||||
.align 4
|
|
||||||
sha512_block_data_order_neon:
|
sha512_block_data_order_neon:
|
||||||
.LNEON:
|
|
||||||
dmb @ errata #451034 on early Cortex A8
|
dmb @ errata #451034 on early Cortex A8
|
||||||
add r2,r1,r2,lsl#7 @ len to point at the end of inp
|
add r2,r1,r2,lsl#7 @ len to point at the end of inp
|
||||||
adr r3,K512
|
adr r3,K512
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue