Add ARM SHA1 asm implementation from Cryptogams. Cryptogams is Andy Polyakov's project used to create high speed crypto algorithms and share them with other developers. Cryptogams has a dual license. First is the OpenSSL license because Andy contributes to OpenSSL. Second is a BSD license for those who want a more permissive license. Andy's implementation runs about 30% faster than C/C++ code. Testing on a 1 GHz Cortex-A7 shows Cryptograms at 16 cpb, and C++ at 23 cpb. The integration instructions are documented at [Cryptogams SHA](https://wiki.openssl.org/index.php/Cryptogams_SHA) on the OpenSSL wiki.pull/840/head
parent
4a21619bff
commit
1a63112faf
|
|
@ -314,6 +314,8 @@ serpentp.h
|
|||
sha.cpp
|
||||
sha_simd.cpp
|
||||
sha.h
|
||||
sha1_armv4.h
|
||||
sha1_armv4.S
|
||||
sha3.cpp
|
||||
sha3.h
|
||||
shacal2.cpp
|
||||
|
|
|
|||
12
GNUmakefile
12
GNUmakefile
|
|
@ -1057,9 +1057,9 @@ endif
|
|||
# Cryptogams AES for ARMv4 and above. We couple to ARMv7.
|
||||
# Avoid iOS. It cannot consume the assembly.
|
||||
ifeq ($(IS_ARM32),1)
|
||||
CRYPTOGAMS_AES_FLAG = -march=armv7-a
|
||||
CRYPTOGAMS_AES_FLAG += -Wa,--noexecstack
|
||||
SRCS += aes_armv4.S
|
||||
CRYPTOGAMS_ARCH_FLAG = -march=armv7-a
|
||||
CRYPTOGAMS_ARCH_FLAG += -Wa,--noexecstack
|
||||
SRCS += aes_armv4.S sha1_armv4.S
|
||||
endif
|
||||
|
||||
# List cryptlib.cpp first, then cpu.cpp, then integer.cpp to tame C++ static initialization problems.
|
||||
|
|
@ -1423,7 +1423,7 @@ endif # Dependencies
|
|||
|
||||
# Cryptogams ARM asm implementation.
|
||||
aes_armv4.o : aes_armv4.S
|
||||
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_AES_FLAG) -c) $<
|
||||
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_ARCH_FLAG) -c) $<
|
||||
|
||||
# SSSE3 or NEON available
|
||||
aria_simd.o : aria_simd.cpp
|
||||
|
|
@ -1505,6 +1505,10 @@ rijndael_simd.o : rijndael_simd.cpp
|
|||
sha_simd.o : sha_simd.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS) $(SHA_FLAG) -c) $<
|
||||
|
||||
# Cryptogams ARM asm implementation.
|
||||
sha1_armv4.o : sha1_armv4.S
|
||||
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_ARCH_FLAG) -c) $<
|
||||
|
||||
sha3_simd.o : sha3_simd.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS) $(SHA3_FLAG) -c) $<
|
||||
|
||||
|
|
|
|||
|
|
@ -592,9 +592,9 @@ INCL := $(filter-out resource.h,$(sort $(wildcard *.h)))
|
|||
# Avoid iOS. It cannot consume the assembly.
|
||||
ifeq ($(IS_ARM32),1)
|
||||
ifneq ($(IS_IOS),1)
|
||||
CRYPTOGAMS_AES_FLAG = -march=armv7-a
|
||||
CRYPTOGAMS_AES_FLAG += -Wa,--noexecstack
|
||||
SRCS += aes_armv4.S
|
||||
CRYPTOGAMS_ARCH_FLAG = -march=armv7-a
|
||||
CRYPTOGAMS_ARCH_FLAG += -Wa,--noexecstack
|
||||
SRCS += aes_armv4.S sha1_armv4.S
|
||||
endif
|
||||
endif
|
||||
|
||||
|
|
@ -800,7 +800,7 @@ endif # Dependencies
|
|||
|
||||
# Cryptogams ARM asm implementation.
|
||||
aes_armv4.o : aes_armv4.S
|
||||
$(CXX) $(strip $(CXXFLAGS) -fpermissive $(CRYPTOGAMS_AES_FLAG) -c) $<
|
||||
$(CXX) $(strip $(CXXFLAGS) -fpermissive $(CRYPTOGAMS_ARCH_FLAG) -c) $<
|
||||
|
||||
cpu-features.o: cpu-features.h cpu-features.c
|
||||
$(CXX) $(strip $(CXXFLAGS) -fpermissive -c) cpu-features.c
|
||||
|
|
|
|||
|
|
@ -29,8 +29,8 @@ Daniel J. Bernstein, Jack Lloyd - chacha.cpp, chacha_simd.cpp, chacha_avx.cpp
|
|||
Andrew Moon - ed25519, x25519, donna_32.cpp, donna_64.cpp, donna_sse.cpp
|
||||
|
||||
The Crypto++ Library uses portions of Andy Polyakov's CRYPTOGAMS for Poly1305
|
||||
scalar multiplication and aes_armv4.S. CRYPTOGAMS is dual licensed with a
|
||||
permissive BSD-style license. The CRYPTOGAMS license is reproduced below.
|
||||
scalar multiplication, aes_armv4.S and sha1_armv4.S. CRYPTOGAMS is dual licensed
|
||||
with a permissive BSD-style license. The CRYPTOGAMS license is reproduced below.
|
||||
|
||||
The Crypto++ Library uses portions of Jack Lloyd's Botan for ChaCha SSE2 and
|
||||
AVX. Botan placed the code in public domain for Crypto++ to use.
|
||||
|
|
|
|||
15
config_asm.h
15
config_asm.h
|
|
@ -354,15 +354,18 @@
|
|||
# undef CRYPTOPP_ARM_SHA2_AVAILABLE
|
||||
#endif
|
||||
|
||||
// Cryptogams offers an ARM asm AES implementation. Crypto++ does
|
||||
// not provide an asm implementation. The Cryptogams implementation
|
||||
// is about 2x faster than C/C++. Define this to use the Cryptogams
|
||||
// AES implementation on GNU Linux systems. When defined, Crypto++
|
||||
// will use aes_armv4.S. LLVM miscompiles aes_armv4.S so disable
|
||||
// under Clang. See https://bugs.llvm.org/show_bug.cgi?id=38133.
|
||||
// Cryptogams offers an ARM asm implementations for AES and SHA. Crypto++ does
|
||||
// not provide an asm implementation. The Cryptogams AES implementation is
|
||||
// about 50% faster than C/C++, and SHA implementation is about 30% faster
|
||||
// than C/C++. Define this to use the Cryptogams AES and SHA implementations
|
||||
// on GNU Linux systems. When defined, Crypto++ will use aes_armv4.S and
|
||||
// sha1_armv4.S. LLVM miscompiles aes_armv4.S so disable under Clang. Also see
|
||||
// https://bugs.llvm.org/show_bug.cgi?id=38133.
|
||||
#if !defined(CRYPTOPP_DISABLE_ASM) && defined(__arm__)
|
||||
# if defined(__GNUC__) && !defined(__clang__)
|
||||
# define CRYPTOGAMS_ARM_AES 1
|
||||
# define CRYPTOGAMS_ARM_SHA1 1
|
||||
# define CRYPTOGAMS_ARM_SHA256 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
|
|
|||
32
sha.cpp
32
sha.cpp
|
|
@ -56,6 +56,11 @@ extern void SHA1_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, siz
|
|||
extern void SHA256_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order);
|
||||
#endif
|
||||
|
||||
#if (CRYPTOGAMS_ARM_SHA1)
|
||||
extern "C" unsigned int CRYPTOGAMS_armcaps;
|
||||
extern "C" int sha1_block_data_order(word32* state, const word32 *data, size_t blocks);
|
||||
#endif
|
||||
|
||||
#if CRYPTOPP_ARM_SHA1_AVAILABLE
|
||||
extern void SHA1_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
|
||||
#endif
|
||||
|
|
@ -232,6 +237,12 @@ std::string SHA1::AlgorithmProvider() const
|
|||
if (HasSSE2())
|
||||
return "SSE2";
|
||||
#endif
|
||||
#if CRYPTOGAMS_ARM_SHA1
|
||||
if (HasNEON())
|
||||
return "NEON";
|
||||
if (HasARMv7())
|
||||
return "ARMv7";
|
||||
#endif
|
||||
#if CRYPTOPP_ARM_SHA1_AVAILABLE
|
||||
if (HasSHA1())
|
||||
return "ARMv8";
|
||||
|
|
@ -260,6 +271,14 @@ void SHA1::Transform(word32 *state, const word32 *data)
|
|||
return;
|
||||
}
|
||||
#endif
|
||||
#if CRYPTOGAMS_ARM_SHA1 && 0
|
||||
// TODO: convert LE to BE and use Cryptogams code
|
||||
if (HasARMv7())
|
||||
{
|
||||
sha1_block_data_order(state, data, 1);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if CRYPTOPP_ARM_SHA1_AVAILABLE
|
||||
if (HasSHA1())
|
||||
{
|
||||
|
|
@ -283,6 +302,19 @@ size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length)
|
|||
return length & (SHA1::BLOCKSIZE - 1);
|
||||
}
|
||||
#endif
|
||||
#if CRYPTOGAMS_ARM_SHA1
|
||||
if (HasARMv7())
|
||||
{
|
||||
// The Cryptogams code uses a global variable named CRYPTOGAMS_armcaps
|
||||
// for capabilities like ARMv7 and NEON. Storage is allocated in the
|
||||
// module. We still need to set CRYPTOGAMS_armcaps accordingly.
|
||||
// The Cryptogams code defines NEON as 1<<0; see ARMV7_NEON.
|
||||
static unsigned int unused = CRYPTOGAMS_armcaps = HasNEON() ? (1<<0) : 0;
|
||||
|
||||
sha1_block_data_order(m_state, input, length / SHA1::BLOCKSIZE);
|
||||
return length & (SHA1::BLOCKSIZE - 1);
|
||||
}
|
||||
#endif
|
||||
#if CRYPTOPP_ARM_SHA1_AVAILABLE
|
||||
if (HasSHA1())
|
||||
{
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,21 @@
|
|||
/* Header file for use with Cryptogam's ARMv4 SHA1. */
|
||||
/* Also see http://www.openssl.org/~appro/cryptogams/ */
|
||||
/* https://wiki.openssl.org/index.php/Cryptogams_SHA. */
|
||||
|
||||
#ifndef CRYPTOGAMS_SHA1_ARMV4_H
|
||||
#define CRYPTOGAMS_SHA1_ARMV4_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void sha1_block_data_order(void *state, const void *data, size_t blocks);
|
||||
|
||||
/* Cryptogams arm caps */
|
||||
#define ARMV7_NEON (1<<0)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* CRYPTOGAMS_SHA1_ARMV4_H */
|
||||
Loading…
Reference in New Issue