Cryptogams is Andy Polyakov's project used to create high speed crypto algorithms and share them with other developers. Cryptogams has a dual license. First is the OpenSSL license because Andy contributes to OpenSSL. Second is a BSD license for those who want a more permissive license. Andy's implementation runs about 45% faster than C/C++ code. Testing on a 1 GHz Cortex-A7 shows Cryptograms at 17 cpb, and C++ at 30 cpb. The integration instructions are documented at [Cryptogams SHA](https://wiki.openssl.org/index.php/Cryptogams_SHA) on the OpenSSL wiki.pull/843/head
parent
8c99b1cd75
commit
4c9ca6b723
|
|
@ -316,6 +316,8 @@ sha_simd.cpp
|
||||||
sha.h
|
sha.h
|
||||||
sha1_armv4.h
|
sha1_armv4.h
|
||||||
sha1_armv4.S
|
sha1_armv4.S
|
||||||
|
sha256_armv4.h
|
||||||
|
sha256_armv4.S
|
||||||
sha3.cpp
|
sha3.cpp
|
||||||
sha3.h
|
sha3.h
|
||||||
shacal2.cpp
|
shacal2.cpp
|
||||||
|
|
|
||||||
|
|
@ -1059,7 +1059,7 @@ endif
|
||||||
ifeq ($(IS_ARM32),1)
|
ifeq ($(IS_ARM32),1)
|
||||||
CRYPTOGAMS_ARCH_FLAG = -march=armv7-a
|
CRYPTOGAMS_ARCH_FLAG = -march=armv7-a
|
||||||
CRYPTOGAMS_ARCH_FLAG += -Wa,--noexecstack
|
CRYPTOGAMS_ARCH_FLAG += -Wa,--noexecstack
|
||||||
SRCS += aes_armv4.S sha1_armv4.S
|
SRCS += aes_armv4.S sha1_armv4.S sha256_armv4.S
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# List cryptlib.cpp first, then cpu.cpp, then integer.cpp to tame C++ static initialization problems.
|
# List cryptlib.cpp first, then cpu.cpp, then integer.cpp to tame C++ static initialization problems.
|
||||||
|
|
@ -1509,6 +1509,10 @@ sha_simd.o : sha_simd.cpp
|
||||||
sha1_armv4.o : sha1_armv4.S
|
sha1_armv4.o : sha1_armv4.S
|
||||||
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_ARCH_FLAG) -c) $<
|
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_ARCH_FLAG) -c) $<
|
||||||
|
|
||||||
|
# Cryptogams ARM asm implementation.
|
||||||
|
sha256_armv4.o : sha256_armv4.S
|
||||||
|
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_ARCH_FLAG) -c) $<
|
||||||
|
|
||||||
sha3_simd.o : sha3_simd.cpp
|
sha3_simd.o : sha3_simd.cpp
|
||||||
$(CXX) $(strip $(CXXFLAGS) $(SHA3_FLAG) -c) $<
|
$(CXX) $(strip $(CXXFLAGS) $(SHA3_FLAG) -c) $<
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -594,7 +594,7 @@ ifeq ($(IS_ARM32),1)
|
||||||
ifneq ($(IS_IOS),1)
|
ifneq ($(IS_IOS),1)
|
||||||
CRYPTOGAMS_ARCH_FLAG = -march=armv7-a
|
CRYPTOGAMS_ARCH_FLAG = -march=armv7-a
|
||||||
CRYPTOGAMS_ARCH_FLAG += -Wa,--noexecstack
|
CRYPTOGAMS_ARCH_FLAG += -Wa,--noexecstack
|
||||||
SRCS += aes_armv4.S sha1_armv4.S
|
SRCS += aes_armv4.S sha1_armv4.S sha256_armv4.S
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
@ -865,6 +865,14 @@ rijndael_simd.o : rijndael_simd.cpp
|
||||||
sha_simd.o : sha_simd.cpp
|
sha_simd.o : sha_simd.cpp
|
||||||
$(CXX) $(strip $(CXXFLAGS) $(SHA_FLAG) -c) $<
|
$(CXX) $(strip $(CXXFLAGS) $(SHA_FLAG) -c) $<
|
||||||
|
|
||||||
|
# Cryptogams ARM asm implementation.
|
||||||
|
sha1_armv4.o : sha1_armv4.S
|
||||||
|
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_ARCH_FLAG) -c) $<
|
||||||
|
|
||||||
|
# Cryptogams ARM asm implementation.
|
||||||
|
sha256_armv4.o : sha256_armv4.S
|
||||||
|
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_ARCH_FLAG) -c) $<
|
||||||
|
|
||||||
# SSE4.2/SHA-NI or ARMv8a available
|
# SSE4.2/SHA-NI or ARMv8a available
|
||||||
shacal2_simd.o : shacal2_simd.cpp
|
shacal2_simd.o : shacal2_simd.cpp
|
||||||
$(CXX) $(strip $(CXXFLAGS) $(SHA_FLAG) -c) $<
|
$(CXX) $(strip $(CXXFLAGS) $(SHA_FLAG) -c) $<
|
||||||
|
|
|
||||||
|
|
@ -29,8 +29,9 @@ Daniel J. Bernstein, Jack Lloyd - chacha.cpp, chacha_simd.cpp, chacha_avx.cpp
|
||||||
Andrew Moon - ed25519, x25519, donna_32.cpp, donna_64.cpp, donna_sse.cpp
|
Andrew Moon - ed25519, x25519, donna_32.cpp, donna_64.cpp, donna_sse.cpp
|
||||||
|
|
||||||
The Crypto++ Library uses portions of Andy Polyakov's CRYPTOGAMS for Poly1305
|
The Crypto++ Library uses portions of Andy Polyakov's CRYPTOGAMS for Poly1305
|
||||||
scalar multiplication, aes_armv4.S and sha1_armv4.S. CRYPTOGAMS is dual licensed
|
scalar multiplication, aes_armv4.S, sha1_armv4.S and sha256_armv4.S. CRYPTOGAMS
|
||||||
with a permissive BSD-style license. The CRYPTOGAMS license is reproduced below.
|
is dual licensed with a permissive BSD-style license. The CRYPTOGAMS license is
|
||||||
|
reproduced below.
|
||||||
|
|
||||||
The Crypto++ Library uses portions of Jack Lloyd's Botan for ChaCha SSE2 and
|
The Crypto++ Library uses portions of Jack Lloyd's Botan for ChaCha SSE2 and
|
||||||
AVX. Botan placed the code in public domain for Crypto++ to use.
|
AVX. Botan placed the code in public domain for Crypto++ to use.
|
||||||
|
|
|
||||||
|
|
@ -1213,6 +1213,3 @@ _armv4_AES_decrypt:
|
||||||
sub r10,r10,#1024
|
sub r10,r10,#1024
|
||||||
ldr pc,[sp],#4 @ pop and return
|
ldr pc,[sp],#4 @ pop and return
|
||||||
.size _armv4_AES_decrypt,.-_armv4_AES_decrypt
|
.size _armv4_AES_decrypt,.-_armv4_AES_decrypt
|
||||||
.byte 65,69,83,32,102,111,114,32,65,82,77,118,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
|
||||||
.align 2
|
|
||||||
.align 2
|
|
||||||
|
|
|
||||||
|
|
@ -358,9 +358,9 @@
|
||||||
// not provide an asm implementation. The Cryptogams AES implementation is
|
// not provide an asm implementation. The Cryptogams AES implementation is
|
||||||
// about 50% faster than C/C++, and SHA implementation is about 30% faster
|
// about 50% faster than C/C++, and SHA implementation is about 30% faster
|
||||||
// than C/C++. Define this to use the Cryptogams AES and SHA implementations
|
// than C/C++. Define this to use the Cryptogams AES and SHA implementations
|
||||||
// on GNU Linux systems. When defined, Crypto++ will use aes_armv4.S and
|
// on GNU Linux systems. When defined, Crypto++ will use aes_armv4.S,
|
||||||
// sha1_armv4.S. LLVM miscompiles aes_armv4.S so disable under Clang. Also see
|
// sha1_armv4.S and sha256_armv4.S. LLVM miscompiles aes_armv4.S so disable
|
||||||
// https://bugs.llvm.org/show_bug.cgi?id=38133.
|
// under Clang. Also see https://bugs.llvm.org/show_bug.cgi?id=38133.
|
||||||
#if !defined(CRYPTOPP_DISABLE_ASM) && defined(__arm__)
|
#if !defined(CRYPTOPP_DISABLE_ASM) && defined(__arm__)
|
||||||
# if defined(__GNUC__) && !defined(__clang__)
|
# if defined(__GNUC__) && !defined(__clang__)
|
||||||
# define CRYPTOGAMS_ARM_AES 1
|
# define CRYPTOGAMS_ARM_AES 1
|
||||||
|
|
|
||||||
50
sha.cpp
50
sha.cpp
|
|
@ -69,6 +69,11 @@ extern void SHA1_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, siz
|
||||||
extern void SHA256_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
|
extern void SHA256_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if (CRYPTOGAMS_ARM_SHA256)
|
||||||
|
extern "C" unsigned int CRYPTOGAMS_armcaps;
|
||||||
|
extern "C" int sha256_block_data_order(word32* state, const word32 *data, size_t blocks);
|
||||||
|
#endif
|
||||||
|
|
||||||
#if CRYPTOPP_ARM_SHA512_AVAILABLE
|
#if CRYPTOPP_ARM_SHA512_AVAILABLE
|
||||||
extern void SHA512_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
|
extern void SHA512_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -309,7 +314,8 @@ size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length)
|
||||||
// for capabilities like ARMv7 and NEON. Storage is allocated in the
|
// for capabilities like ARMv7 and NEON. Storage is allocated in the
|
||||||
// module. We still need to set CRYPTOGAMS_armcaps accordingly.
|
// module. We still need to set CRYPTOGAMS_armcaps accordingly.
|
||||||
// The Cryptogams code defines NEON as 1<<0; see ARMV7_NEON.
|
// The Cryptogams code defines NEON as 1<<0; see ARMV7_NEON.
|
||||||
static unsigned int unused = CRYPTOGAMS_armcaps = HasNEON() ? (1<<0) : 0;
|
static const unsigned int unused = CRYPTOGAMS_armcaps = HasNEON() ? (1<<0) : 0;
|
||||||
|
CRYPTOPP_UNUSED(unused);
|
||||||
|
|
||||||
sha1_block_data_order(m_state, input, length / SHA1::BLOCKSIZE);
|
sha1_block_data_order(m_state, input, length / SHA1::BLOCKSIZE);
|
||||||
return length & (SHA1::BLOCKSIZE - 1);
|
return length & (SHA1::BLOCKSIZE - 1);
|
||||||
|
|
@ -428,6 +434,12 @@ std::string SHA256_AlgorithmProvider()
|
||||||
if (HasSSE2())
|
if (HasSSE2())
|
||||||
return "SSE2";
|
return "SSE2";
|
||||||
#endif
|
#endif
|
||||||
|
#if CRYPTOGAMS_ARM_SHA256
|
||||||
|
if (HasNEON())
|
||||||
|
return "NEON";
|
||||||
|
if (HasARMv7())
|
||||||
|
return "ARMv7";
|
||||||
|
#endif
|
||||||
#if CRYPTOPP_ARM_SHA2_AVAILABLE
|
#if CRYPTOPP_ARM_SHA2_AVAILABLE
|
||||||
if (HasSHA2())
|
if (HasSHA2())
|
||||||
return "ARMv8";
|
return "ARMv8";
|
||||||
|
|
@ -811,6 +823,14 @@ void SHA256::Transform(word32 *state, const word32 *data)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if CRYPTOGAMS_ARM_SHA256 && 0
|
||||||
|
// TODO: convert LE to BE and use Cryptogams code
|
||||||
|
if (HasARMv7())
|
||||||
|
{
|
||||||
|
sha256_block_data_order(state, data, 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if CRYPTOPP_ARM_SHA2_AVAILABLE
|
#if CRYPTOPP_ARM_SHA2_AVAILABLE
|
||||||
if (HasSHA2())
|
if (HasSHA2())
|
||||||
{
|
{
|
||||||
|
|
@ -849,6 +869,20 @@ size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if CRYPTOGAMS_ARM_SHA256
|
||||||
|
if (HasARMv7())
|
||||||
|
{
|
||||||
|
// The Cryptogams code uses a global variable named CRYPTOGAMS_armcaps
|
||||||
|
// for capabilities like ARMv7 and NEON. Storage is allocated in the
|
||||||
|
// module. We still need to set CRYPTOGAMS_armcaps accordingly.
|
||||||
|
// The Cryptogams code defines NEON as 1<<0; see ARMV7_NEON.
|
||||||
|
static const unsigned int unused = CRYPTOGAMS_armcaps = HasNEON() ? (1<<0) : 0;
|
||||||
|
CRYPTOPP_UNUSED(unused);
|
||||||
|
|
||||||
|
sha256_block_data_order(m_state, input, length / SHA256::BLOCKSIZE);
|
||||||
|
return length & (SHA256::BLOCKSIZE - 1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if CRYPTOPP_ARM_SHA2_AVAILABLE
|
#if CRYPTOPP_ARM_SHA2_AVAILABLE
|
||||||
if (HasSHA2())
|
if (HasSHA2())
|
||||||
{
|
{
|
||||||
|
|
@ -905,6 +939,20 @@ size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if CRYPTOGAMS_ARM_SHA256
|
||||||
|
if (HasARMv7())
|
||||||
|
{
|
||||||
|
// The Cryptogams code uses a global variable named CRYPTOGAMS_armcaps
|
||||||
|
// for capabilities like ARMv7 and NEON. Storage is allocated in the
|
||||||
|
// module. We still need to set CRYPTOGAMS_armcaps accordingly.
|
||||||
|
// The Cryptogams code defines NEON as 1<<0; see ARMV7_NEON.
|
||||||
|
static const unsigned int unused = CRYPTOGAMS_armcaps = HasNEON() ? (1<<0) : 0;
|
||||||
|
CRYPTOPP_UNUSED(unused);
|
||||||
|
|
||||||
|
sha256_block_data_order(m_state, input, length / SHA256::BLOCKSIZE);
|
||||||
|
return length & (SHA256::BLOCKSIZE - 1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if CRYPTOPP_ARM_SHA2_AVAILABLE
|
#if CRYPTOPP_ARM_SHA2_AVAILABLE
|
||||||
if (HasSHA2())
|
if (HasSHA2())
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -565,8 +565,7 @@ sha1_block_data_order:
|
||||||
.word CRYPTOGAMS_armcaps-.Lsha1_block
|
.word CRYPTOGAMS_armcaps-.Lsha1_block
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
|
||||||
.align 2
|
|
||||||
.align 5
|
.align 5
|
||||||
#if __ARM_MAX_ARCH__>=7
|
#if __ARM_MAX_ARCH__>=7
|
||||||
.arch armv7-a
|
.arch armv7-a
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,21 @@
|
||||||
|
/* Header file for use with Cryptogam's ARMv4 SHA1. */
|
||||||
|
/* Also see http://www.openssl.org/~appro/cryptogams/ */
|
||||||
|
/* https://wiki.openssl.org/index.php/Cryptogams_SHA. */
|
||||||
|
|
||||||
|
#ifndef CRYPTOGAMS_SHA256_ARMV4_H
|
||||||
|
#define CRYPTOGAMS_SHA256_ARMV4_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void sha256_block_data_order(void *state, const void *data, size_t blocks);
|
||||||
|
|
||||||
|
/* Cryptogams arm caps */
|
||||||
|
#define ARMV7_NEON (1<<0)
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* CRYPTOGAMS_SHA256_ARMV4_H */
|
||||||
Loading…
Reference in New Issue