Add ARM AES asm implementation from Cryptogams (GH #683)

pull/687/head
Jeffrey Walton 2018-07-11 06:59:44 -04:00
parent bdac2de36e
commit 3ff7d7f028
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
9 changed files with 1412 additions and 18 deletions

View File

@ -5,6 +5,8 @@ adv-simd.h
adler32.cpp adler32.cpp
adler32.h adler32.h
aes.h aes.h
aes-armv4.h
aes-armv4.S
algebra.cpp algebra.cpp
algebra.h algebra.h
algparam.cpp algparam.cpp

View File

@ -129,6 +129,13 @@ else
CXXFLAGS ?= -DNDEBUG -g2 -O3 CXXFLAGS ?= -DNDEBUG -g2 -O3
endif endif
# On ARM we may compile aes-armv4.S though the CC compiler
ifeq ($(GCC_COMPILER),1)
CC=gcc
else ifeq ($(CLANG_COMPILER),1)
CC=clang
endif
# Default prefix for make install # Default prefix for make install
ifeq ($(PREFIX),) ifeq ($(PREFIX),)
PREFIX = /usr/local PREFIX = /usr/local
@ -723,6 +730,13 @@ SRCS += winpipes.cpp
INCL += resource.h INCL += resource.h
endif endif
# Cryptogams AES for ARMv4 and above. We couple to ARMv7.
# Disable Thumb via -marm due to unaligned byte buffers.
ifeq ($(IS_ARM32),1)
CRYPTOGAMS_AES_ARCH = -march=armv7-a -marm
SRCS += aes-armv4.S
endif
# List cryptlib.cpp first, then cpu.cpp, then integer.cpp to tame C++ static initialization problems. # List cryptlib.cpp first, then cpu.cpp, then integer.cpp to tame C++ static initialization problems.
OBJS := $(SRCS:.cpp=.o) OBJS := $(SRCS:.cpp=.o)
OBJS := $(OBJS:.S=.o) OBJS := $(OBJS:.S=.o)
@ -1060,6 +1074,10 @@ ifeq ($(wildcard GNUmakefile.deps),GNUmakefile.deps)
-include GNUmakefile.deps -include GNUmakefile.deps
endif # Dependencies endif # Dependencies
# Cryptogams ARM asm implementation. CRYPTOGAMS_AES_ARCH includes -marm.
aes-armv4.o : aes-armv4.S
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_AES_ARCH) -mfloat-abi=$(FP_ABI) -c) $<
# SSSE3 or NEON available # SSSE3 or NEON available
aria-simd.o : aria-simd.cpp aria-simd.o : aria-simd.cpp
$(CXX) $(strip $(CXXFLAGS) $(ARIA_FLAG) -c) $< $(CXX) $(strip $(CXXFLAGS) $(ARIA_FLAG) -c) $<

1237
aes-armv4.S Normal file

File diff suppressed because it is too large Load Diff

31
aes-armv4.h Normal file
View File

@ -0,0 +1,31 @@
/* Header file for use with Cryptogam's ARMv4 AES. */
/* Also see http://www.openssl.org/~appro/cryptogams/ and */
/* https://wiki.openssl.org/index.php?title=Cryptogams_AES */
#ifndef CRYPTOGAMS_AES_ARMV4_H
#define CRYPTOGAMS_AES_ARMV4_H
#ifdef __cplusplus
extern "C" {
#endif
#define AES_MAXNR 14
//typedef struct AES_KEY_st {
// unsigned int rd_key[4 * (AES_MAXNR + 1)];
// int rounds;
//} AES_KEY;
// Instead of AES_KEY we use a 'word32 rkey[4*15+4]'. It has space for
// both the AES_MAXNR round keys and the round numbers in the tail.
int AES_set_encrypt_key(const unsigned char *userKey, const int bits, unsigned int *rkey);
int AES_set_decrypt_key(const unsigned char *userKey, const int bits, unsigned int *rkey);
void AES_encrypt(const unsigned char in[16], unsigned char out[16], const unsigned int *rkey);
void AES_decrypt(const unsigned char in[16], unsigned char out[16], const unsigned int *rkey);
#ifdef __cplusplus
}
#endif
#endif /* CRYPTOGAMS_AES_ARMV4_H */

View File

@ -651,6 +651,15 @@ NAMESPACE_END
# undef CRYPTOPP_ARM_ACLE_AVAILABLE # undef CRYPTOPP_ARM_ACLE_AVAILABLE
#endif #endif
// Cryptogams offers an ARM asm AES implementation. Crypto++ does
// not provide an ARM implementation. The Cryptogams implementation
// is about 2x faster than C/C++. Define this to use the Cryptogams
// AES implementation on GNU Linux systems. When defined, Crypto++
// will use aes-armv4.S.
#if !defined(CRYPTOPP_DISABLE_ASM) && defined(__GNUC__) && defined(__arm__)
# define CRYPTOGAMS_ARM_AES 1
#endif
#endif // ARM32, ARM64 #endif // ARM32, ARM64
// ***************** AltiVec and Power8 ******************** // ***************** AltiVec and Power8 ********************

View File

@ -5,7 +5,16 @@
// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM rijndael.cpp" to generate MASM code // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM rijndael.cpp" to generate MASM code
/* /*
July 2017: Added support for ARM AES instructions via compiler intrinsics. July 2018: Added support for ARMv7 AES instructions via Cryptogams ASM.
See the head notes in aes-armv4.S for copyright and license.
*/
/*
September 2017: Added support for Power8 AES instructions via compiler intrinsics.
*/
/*
July 2017: Added support for ARMv8 AES instructions via compiler intrinsics.
*/ */
/* /*
@ -240,6 +249,24 @@ ANONYMOUS_NAMESPACE_END
#define fd(x) (f8(x) ^ f4(x) ^ x) #define fd(x) (f8(x) ^ f4(x) ^ x)
#define fe(x) (f8(x) ^ f4(x) ^ f2(x)) #define fe(x) (f8(x) ^ f4(x) ^ f2(x))
unsigned int Rijndael::Base::OptimalDataAlignment() const
{
// CFB mode performs an extra memcpy if buffer is not aligned.
#if (CRYPTOPP_ARM_AES_AVAILABLE)
if (HasAES())
return 1;
#endif
#if (CRYPTOGAMS_ARM_AES)
if (HasARMv7())
return 1;
#endif
#if (CRYPTOPP_POWER8_AES_AVAILABLE)
if (HasAES())
return 1;
#endif
return BlockTransformation::OptimalDataAlignment();
}
void Rijndael::Base::FillEncTable() void Rijndael::Base::FillEncTable()
{ {
for (int i=0; i<256; i++) for (int i=0; i<256; i++)
@ -300,6 +327,13 @@ extern size_t Rijndael_Dec_AdvancedProcessBlocks_ARMV8(const word32 *subkeys, si
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags); const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
#endif #endif
#if (CRYPTOGAMS_ARM_AES)
extern "C" int AES_set_encrypt_key(const unsigned char *userKey, const int bitLen, word32 *rkey);
extern "C" int AES_set_decrypt_key(const unsigned char *userKey, const int bitLen, word32 *rkey);
extern "C" void AES_encrypt(const unsigned char in[16], unsigned char out[16], const word32 *rkey);
extern "C" void AES_decrypt(const unsigned char in[16], unsigned char out[16], const word32 *rkey);
#endif
#if (CRYPTOPP_POWER8_AES_AVAILABLE) #if (CRYPTOPP_POWER8_AES_AVAILABLE)
extern void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen, extern void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen,
word32* rk, const byte* Se); word32* rk, const byte* Se);
@ -310,6 +344,33 @@ extern size_t Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(const word32 *su
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags); const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
#endif #endif
#if (CRYPTOGAMS_ARM_AES)
int CRYPTOPP_NOINLINE
CRYPTOGAMS_set_encrypt_key(const byte *userKey, const int bitLen, word32 *rkey)
{
return AES_set_encrypt_key(userKey, bitLen, rkey);
}
int CRYPTOPP_NOINLINE
CRYPTOGAMS_set_decrypt_key(const byte *userKey, const int bitLen, word32 *rkey)
{
return AES_set_decrypt_key(userKey, bitLen, rkey);
}
void CRYPTOPP_NOINLINE
CRYPTOGAMS_encrypt(const byte *inBlock, const byte *xorBlock, byte *outBlock, const word32 *rkey)
{
AES_encrypt(inBlock, outBlock, rkey);
if (xorBlock)
xorbuf (outBlock, xorBlock, 16);
}
void CRYPTOPP_NOINLINE
CRYPTOGAMS_decrypt(const byte *inBlock, const byte *xorBlock, byte *outBlock, const word32 *rkey)
{
AES_decrypt(inBlock, outBlock, rkey);
if (xorBlock)
xorbuf (outBlock, xorBlock, 16);
}
#endif
std::string Rijndael::Base::AlgorithmProvider() const std::string Rijndael::Base::AlgorithmProvider() const
{ {
#if (CRYPTOPP_AESNI_AVAILABLE) #if (CRYPTOPP_AESNI_AVAILABLE)
@ -324,6 +385,10 @@ std::string Rijndael::Base::AlgorithmProvider() const
if (HasAES()) if (HasAES())
return "ARMv8"; return "ARMv8";
#endif #endif
#if (CRYPTOGAMS_ARM_AES)
if (HasARMv7())
return "ARMv7";
#endif
#if (CRYPTOPP_POWER8_AES_AVAILABLE) #if (CRYPTOPP_POWER8_AES_AVAILABLE)
if (HasAES()) if (HasAES())
return "Power8"; return "Power8";
@ -335,6 +400,20 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLen, c
{ {
AssertValidKeyLength(keyLen); AssertValidKeyLength(keyLen);
#if (CRYPTOGAMS_ARM_AES)
if (HasARMv7())
{
m_rounds = keyLen/4 + 6;
m_key.New(4*(15+1)+4);
if (IsForwardTransformation())
CRYPTOGAMS_set_encrypt_key(userKey, keyLen*8, m_key.begin());
else
CRYPTOGAMS_set_decrypt_key(userKey, keyLen*8, m_key.begin());
return;
}
#endif
#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86 #if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86
m_aliasBlock.New(s_sizeToAllocate); m_aliasBlock.New(s_sizeToAllocate);
// The alias block is only used on IA-32 when unaligned data access is in effect. // The alias block is only used on IA-32 when unaligned data access is in effect.
@ -474,6 +553,14 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
} }
#endif #endif
#if (CRYPTOGAMS_ARM_AES)
if (HasARMv7())
{
CRYPTOGAMS_encrypt(inBlock, xorBlock, outBlock, m_key.begin());
return;
}
#endif
#if (CRYPTOPP_POWER8_AES_AVAILABLE) #if (CRYPTOPP_POWER8_AES_AVAILABLE)
if (HasAES()) if (HasAES())
{ {
@ -519,8 +606,8 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
QUARTER_ROUND_FE(s0, t1, t2, t3, t0) QUARTER_ROUND_FE(s0, t1, t2, t3, t0)
// Nr - 2 full rounds: // Nr - 2 full rounds:
unsigned int r = m_rounds/2 - 1; unsigned int r = m_rounds/2 - 1;
do do
{ {
s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3]; s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
@ -536,8 +623,8 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
QUARTER_ROUND_E(s1, t2, t3, t0, t1) QUARTER_ROUND_E(s1, t2, t3, t0, t1)
QUARTER_ROUND_E(s0, t1, t2, t3, t0) QUARTER_ROUND_E(s0, t1, t2, t3, t0)
rk += 8; rk += 8;
} while (--r); } while (--r);
word32 tbw[4]; word32 tbw[4];
byte *const tempBlock = (byte *)tbw; byte *const tempBlock = (byte *)tbw;
@ -568,6 +655,14 @@ void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
} }
#endif #endif
#if (CRYPTOGAMS_ARM_AES)
if (HasARMv7())
{
CRYPTOGAMS_decrypt(inBlock, xorBlock, outBlock, m_key.begin());
return;
}
#endif
#if (CRYPTOPP_POWER8_AES_AVAILABLE) #if (CRYPTOPP_POWER8_AES_AVAILABLE)
if (HasAES()) if (HasAES())
{ {
@ -613,8 +708,8 @@ void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
QUARTER_ROUND_FD(s0, t3, t2, t1, t0) QUARTER_ROUND_FD(s0, t3, t2, t1, t0)
// Nr - 2 full rounds: // Nr - 2 full rounds:
unsigned int r = m_rounds/2 - 1; unsigned int r = m_rounds/2 - 1;
do do
{ {
s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3]; s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
@ -630,8 +725,8 @@ void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
QUARTER_ROUND_D(s1, t0, t3, t2, t1) QUARTER_ROUND_D(s1, t0, t3, t2, t1)
QUARTER_ROUND_D(s0, t3, t2, t1, t0) QUARTER_ROUND_D(s0, t3, t2, t1, t0)
rk += 8; rk += 8;
} while (--r); } while (--r);
#if !(defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) || defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)) #if !(defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) || defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS))
// timing attack countermeasure. see comments at top for more details // timing attack countermeasure. see comments at top for more details

View File

@ -45,6 +45,7 @@ class CRYPTOPP_DLL Rijndael : public Rijndael_Info, public BlockCipherDocumentat
public: public:
void UncheckedSetKey(const byte *userKey, unsigned int keyLength, const NameValuePairs &params); void UncheckedSetKey(const byte *userKey, unsigned int keyLength, const NameValuePairs &params);
std::string AlgorithmProvider() const; std::string AlgorithmProvider() const;
unsigned int OptimalDataAlignment() const;
protected: protected:
static void FillEncTable(); static void FillEncTable();
@ -57,7 +58,7 @@ class CRYPTOPP_DLL Rijndael : public Rijndael_Info, public BlockCipherDocumentat
static const word32 rcon[]; static const word32 rcon[];
unsigned int m_rounds; unsigned int m_rounds;
FixedSizeAlignedSecBlock<word32, 4*15> m_key; SecBlock<word32, AllocatorWithCleanup<word32, true> > m_key;
mutable SecByteBlock m_aliasBlock; mutable SecByteBlock m_aliasBlock;
}; };

View File

@ -202,6 +202,7 @@ void CFB_CipherTemplate<BASE>::ProcessData(byte *outString, const byte *inString
policy.Iterate(outString, inString, cipherDir, length / bytesPerIteration); policy.Iterate(outString, inString, cipherDir, length / bytesPerIteration);
else else
{ {
// GCC and Clang does not like this on ARM.
memcpy(outString, inString, length); memcpy(outString, inString, length);
policy.Iterate(outString, outString, cipherDir, length / bytesPerIteration); policy.Iterate(outString, outString, cipherDir, length / bytesPerIteration);
} }

View File

@ -162,11 +162,11 @@ int scoped_main(int argc, char *argv[])
s_globalSeed.resize(16, ' '); s_globalSeed.resize(16, ' ');
#if (CRYPTOPP_USE_AES_GENERATOR) #if (CRYPTOPP_USE_AES_GENERATOR)
// Fetch the OFB_Mode<AES> interface, not the RandomNumberGenerator // Fetch the SymmetricCipher interface, not the RandomNumberGenerator
// interface, to key the underlying cipher. If CRYPTOPP_USE_AES_GENERATOR is 1 // interface, to key the underlying cipher. If CRYPTOPP_USE_AES_GENERATOR is 1
// then AES/OFB based is used. Otherwise the OS random number generator is used. // then AES/OFB based is used. Otherwise the OS random number generator is used.
OFB_Mode<AES>::Encryption& aesg = dynamic_cast<OFB_Mode<AES>::Encryption&>(GlobalRNG()); SymmetricCipher& cipher = dynamic_cast<SymmetricCipher&>(GlobalRNG());
aesg.SetKeyWithIV((byte *)s_globalSeed.data(), 16, (byte *)s_globalSeed.data()); cipher.SetKeyWithIV((byte *)s_globalSeed.data(), 16, (byte *)s_globalSeed.data());
#endif #endif
std::string command, executableName, macFilename; std::string command, executableName, macFilename;
@ -880,8 +880,8 @@ bool Validate(int alg, bool thorough, const char *seedInput)
// Fetch the OFB_Mode<AES> interface, not the RandomNumberGenerator // Fetch the OFB_Mode<AES> interface, not the RandomNumberGenerator
// interface, to key the underlying cipher. If CRYPTOPP_USE_AES_GENERATOR is 1 // interface, to key the underlying cipher. If CRYPTOPP_USE_AES_GENERATOR is 1
// then AES/OFB based is used. Otherwise the OS random number generator is used. // then AES/OFB based is used. Otherwise the OS random number generator is used.
OFB_Mode<AES>::Encryption& aesg = dynamic_cast<OFB_Mode<AES>::Encryption&>(GlobalRNG()); SymmetricCipher& cipher = dynamic_cast<SymmetricCipher&>(GlobalRNG());
aesg.SetKeyWithIV((byte *)s_globalSeed.data(), 16, (byte *)s_globalSeed.data()); cipher.SetKeyWithIV((byte *)s_globalSeed.data(), 16, (byte *)s_globalSeed.data());
#endif #endif
g_testBegin = ::time(NULLPTR); g_testBegin = ::time(NULLPTR);
@ -1000,9 +1000,9 @@ bool Validate(int alg, bool thorough, const char *seedInput)
g_testEnd = ::time(NULLPTR); g_testEnd = ::time(NULLPTR);
std::cout << "\nSeed used was " << "'" << s_globalSeed << "'" << std::endl; std::cout << "\nSeed used was " << s_globalSeed;
std::cout << "Test started at " << TimeToString(g_testBegin) << std::endl; std::cout << "\nTest started at " << TimeToString(g_testBegin);
std::cout << "Test ended at " << TimeToString(g_testEnd) << std::endl; std::cout << "\nTest ended at " << TimeToString(g_testEnd) << std::endl;
return result; return result;
} }