Add ARM AES asm implementation from Cryptogams (GH #683)
parent
bdac2de36e
commit
3ff7d7f028
|
|
@ -5,6 +5,8 @@ adv-simd.h
|
|||
adler32.cpp
|
||||
adler32.h
|
||||
aes.h
|
||||
aes-armv4.h
|
||||
aes-armv4.S
|
||||
algebra.cpp
|
||||
algebra.h
|
||||
algparam.cpp
|
||||
|
|
|
|||
18
GNUmakefile
18
GNUmakefile
|
|
@ -129,6 +129,13 @@ else
|
|||
CXXFLAGS ?= -DNDEBUG -g2 -O3
|
||||
endif
|
||||
|
||||
# On ARM we may compile aes-armv4.S though the CC compiler
|
||||
ifeq ($(GCC_COMPILER),1)
|
||||
CC=gcc
|
||||
else ifeq ($(CLANG_COMPILER),1)
|
||||
CC=clang
|
||||
endif
|
||||
|
||||
# Default prefix for make install
|
||||
ifeq ($(PREFIX),)
|
||||
PREFIX = /usr/local
|
||||
|
|
@ -723,6 +730,13 @@ SRCS += winpipes.cpp
|
|||
INCL += resource.h
|
||||
endif
|
||||
|
||||
# Cryptogams AES for ARMv4 and above. We couple to ARMv7.
|
||||
# Disable Thumb via -marm due to unaligned byte buffers.
|
||||
ifeq ($(IS_ARM32),1)
|
||||
CRYPTOGAMS_AES_ARCH = -march=armv7-a -marm
|
||||
SRCS += aes-armv4.S
|
||||
endif
|
||||
|
||||
# List cryptlib.cpp first, then cpu.cpp, then integer.cpp to tame C++ static initialization problems.
|
||||
OBJS := $(SRCS:.cpp=.o)
|
||||
OBJS := $(OBJS:.S=.o)
|
||||
|
|
@ -1060,6 +1074,10 @@ ifeq ($(wildcard GNUmakefile.deps),GNUmakefile.deps)
|
|||
-include GNUmakefile.deps
|
||||
endif # Dependencies
|
||||
|
||||
# Cryptogams ARM asm implementation. CRYPTOGAMS_AES_ARCH includes -marm.
|
||||
aes-armv4.o : aes-armv4.S
|
||||
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_AES_ARCH) -mfloat-abi=$(FP_ABI) -c) $<
|
||||
|
||||
# SSSE3 or NEON available
|
||||
aria-simd.o : aria-simd.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS) $(ARIA_FLAG) -c) $<
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,31 @@
|
|||
/* Header file for use with Cryptogam's ARMv4 AES. */
|
||||
/* Also see http://www.openssl.org/~appro/cryptogams/ and */
|
||||
/* https://wiki.openssl.org/index.php?title=Cryptogams_AES */
|
||||
|
||||
#ifndef CRYPTOGAMS_AES_ARMV4_H
|
||||
#define CRYPTOGAMS_AES_ARMV4_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define AES_MAXNR 14
|
||||
|
||||
//typedef struct AES_KEY_st {
|
||||
// unsigned int rd_key[4 * (AES_MAXNR + 1)];
|
||||
// int rounds;
|
||||
//} AES_KEY;
|
||||
|
||||
// Instead of AES_KEY we use a 'word32 rkey[4*15+4]'. It has space for
|
||||
// both the AES_MAXNR round keys and the round numbers in the tail.
|
||||
|
||||
int AES_set_encrypt_key(const unsigned char *userKey, const int bits, unsigned int *rkey);
|
||||
int AES_set_decrypt_key(const unsigned char *userKey, const int bits, unsigned int *rkey);
|
||||
void AES_encrypt(const unsigned char in[16], unsigned char out[16], const unsigned int *rkey);
|
||||
void AES_decrypt(const unsigned char in[16], unsigned char out[16], const unsigned int *rkey);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* CRYPTOGAMS_AES_ARMV4_H */
|
||||
9
config.h
9
config.h
|
|
@ -651,6 +651,15 @@ NAMESPACE_END
|
|||
# undef CRYPTOPP_ARM_ACLE_AVAILABLE
|
||||
#endif
|
||||
|
||||
// Cryptogams offers an ARM asm AES implementation. Crypto++ does
|
||||
// not provide an ARM implementation. The Cryptogams implementation
|
||||
// is about 2x faster than C/C++. Define this to use the Cryptogams
|
||||
// AES implementation on GNU Linux systems. When defined, Crypto++
|
||||
// will use aes-armv4.S.
|
||||
#if !defined(CRYPTOPP_DISABLE_ASM) && defined(__GNUC__) && defined(__arm__)
|
||||
# define CRYPTOGAMS_ARM_AES 1
|
||||
#endif
|
||||
|
||||
#endif // ARM32, ARM64
|
||||
|
||||
// ***************** AltiVec and Power8 ********************
|
||||
|
|
|
|||
113
rijndael.cpp
113
rijndael.cpp
|
|
@ -5,7 +5,16 @@
|
|||
// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM rijndael.cpp" to generate MASM code
|
||||
|
||||
/*
|
||||
July 2017: Added support for ARM AES instructions via compiler intrinsics.
|
||||
July 2018: Added support for ARMv7 AES instructions via Cryptogams ASM.
|
||||
See the head notes in aes-armv4.S for copyright and license.
|
||||
*/
|
||||
|
||||
/*
|
||||
September 2017: Added support for Power8 AES instructions via compiler intrinsics.
|
||||
*/
|
||||
|
||||
/*
|
||||
July 2017: Added support for ARMv8 AES instructions via compiler intrinsics.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
|
@ -240,6 +249,24 @@ ANONYMOUS_NAMESPACE_END
|
|||
#define fd(x) (f8(x) ^ f4(x) ^ x)
|
||||
#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
|
||||
|
||||
unsigned int Rijndael::Base::OptimalDataAlignment() const
|
||||
{
|
||||
// CFB mode performs an extra memcpy if buffer is not aligned.
|
||||
#if (CRYPTOPP_ARM_AES_AVAILABLE)
|
||||
if (HasAES())
|
||||
return 1;
|
||||
#endif
|
||||
#if (CRYPTOGAMS_ARM_AES)
|
||||
if (HasARMv7())
|
||||
return 1;
|
||||
#endif
|
||||
#if (CRYPTOPP_POWER8_AES_AVAILABLE)
|
||||
if (HasAES())
|
||||
return 1;
|
||||
#endif
|
||||
return BlockTransformation::OptimalDataAlignment();
|
||||
}
|
||||
|
||||
void Rijndael::Base::FillEncTable()
|
||||
{
|
||||
for (int i=0; i<256; i++)
|
||||
|
|
@ -300,6 +327,13 @@ extern size_t Rijndael_Dec_AdvancedProcessBlocks_ARMV8(const word32 *subkeys, si
|
|||
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
|
||||
#endif
|
||||
|
||||
#if (CRYPTOGAMS_ARM_AES)
|
||||
extern "C" int AES_set_encrypt_key(const unsigned char *userKey, const int bitLen, word32 *rkey);
|
||||
extern "C" int AES_set_decrypt_key(const unsigned char *userKey, const int bitLen, word32 *rkey);
|
||||
extern "C" void AES_encrypt(const unsigned char in[16], unsigned char out[16], const word32 *rkey);
|
||||
extern "C" void AES_decrypt(const unsigned char in[16], unsigned char out[16], const word32 *rkey);
|
||||
#endif
|
||||
|
||||
#if (CRYPTOPP_POWER8_AES_AVAILABLE)
|
||||
extern void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen,
|
||||
word32* rk, const byte* Se);
|
||||
|
|
@ -310,6 +344,33 @@ extern size_t Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(const word32 *su
|
|||
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
|
||||
#endif
|
||||
|
||||
#if (CRYPTOGAMS_ARM_AES)
|
||||
int CRYPTOPP_NOINLINE
|
||||
CRYPTOGAMS_set_encrypt_key(const byte *userKey, const int bitLen, word32 *rkey)
|
||||
{
|
||||
return AES_set_encrypt_key(userKey, bitLen, rkey);
|
||||
}
|
||||
int CRYPTOPP_NOINLINE
|
||||
CRYPTOGAMS_set_decrypt_key(const byte *userKey, const int bitLen, word32 *rkey)
|
||||
{
|
||||
return AES_set_decrypt_key(userKey, bitLen, rkey);
|
||||
}
|
||||
void CRYPTOPP_NOINLINE
|
||||
CRYPTOGAMS_encrypt(const byte *inBlock, const byte *xorBlock, byte *outBlock, const word32 *rkey)
|
||||
{
|
||||
AES_encrypt(inBlock, outBlock, rkey);
|
||||
if (xorBlock)
|
||||
xorbuf (outBlock, xorBlock, 16);
|
||||
}
|
||||
void CRYPTOPP_NOINLINE
|
||||
CRYPTOGAMS_decrypt(const byte *inBlock, const byte *xorBlock, byte *outBlock, const word32 *rkey)
|
||||
{
|
||||
AES_decrypt(inBlock, outBlock, rkey);
|
||||
if (xorBlock)
|
||||
xorbuf (outBlock, xorBlock, 16);
|
||||
}
|
||||
#endif
|
||||
|
||||
std::string Rijndael::Base::AlgorithmProvider() const
|
||||
{
|
||||
#if (CRYPTOPP_AESNI_AVAILABLE)
|
||||
|
|
@ -324,6 +385,10 @@ std::string Rijndael::Base::AlgorithmProvider() const
|
|||
if (HasAES())
|
||||
return "ARMv8";
|
||||
#endif
|
||||
#if (CRYPTOGAMS_ARM_AES)
|
||||
if (HasARMv7())
|
||||
return "ARMv7";
|
||||
#endif
|
||||
#if (CRYPTOPP_POWER8_AES_AVAILABLE)
|
||||
if (HasAES())
|
||||
return "Power8";
|
||||
|
|
@ -335,6 +400,20 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLen, c
|
|||
{
|
||||
AssertValidKeyLength(keyLen);
|
||||
|
||||
#if (CRYPTOGAMS_ARM_AES)
|
||||
if (HasARMv7())
|
||||
{
|
||||
m_rounds = keyLen/4 + 6;
|
||||
m_key.New(4*(15+1)+4);
|
||||
|
||||
if (IsForwardTransformation())
|
||||
CRYPTOGAMS_set_encrypt_key(userKey, keyLen*8, m_key.begin());
|
||||
else
|
||||
CRYPTOGAMS_set_decrypt_key(userKey, keyLen*8, m_key.begin());
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86
|
||||
m_aliasBlock.New(s_sizeToAllocate);
|
||||
// The alias block is only used on IA-32 when unaligned data access is in effect.
|
||||
|
|
@ -474,6 +553,14 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
|
|||
}
|
||||
#endif
|
||||
|
||||
#if (CRYPTOGAMS_ARM_AES)
|
||||
if (HasARMv7())
|
||||
{
|
||||
CRYPTOGAMS_encrypt(inBlock, xorBlock, outBlock, m_key.begin());
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (CRYPTOPP_POWER8_AES_AVAILABLE)
|
||||
if (HasAES())
|
||||
{
|
||||
|
|
@ -519,8 +606,8 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
|
|||
QUARTER_ROUND_FE(s0, t1, t2, t3, t0)
|
||||
|
||||
// Nr - 2 full rounds:
|
||||
unsigned int r = m_rounds/2 - 1;
|
||||
do
|
||||
unsigned int r = m_rounds/2 - 1;
|
||||
do
|
||||
{
|
||||
s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
|
||||
|
||||
|
|
@ -536,8 +623,8 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
|
|||
QUARTER_ROUND_E(s1, t2, t3, t0, t1)
|
||||
QUARTER_ROUND_E(s0, t1, t2, t3, t0)
|
||||
|
||||
rk += 8;
|
||||
} while (--r);
|
||||
rk += 8;
|
||||
} while (--r);
|
||||
|
||||
word32 tbw[4];
|
||||
byte *const tempBlock = (byte *)tbw;
|
||||
|
|
@ -568,6 +655,14 @@ void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
|
|||
}
|
||||
#endif
|
||||
|
||||
#if (CRYPTOGAMS_ARM_AES)
|
||||
if (HasARMv7())
|
||||
{
|
||||
CRYPTOGAMS_decrypt(inBlock, xorBlock, outBlock, m_key.begin());
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (CRYPTOPP_POWER8_AES_AVAILABLE)
|
||||
if (HasAES())
|
||||
{
|
||||
|
|
@ -613,8 +708,8 @@ void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
|
|||
QUARTER_ROUND_FD(s0, t3, t2, t1, t0)
|
||||
|
||||
// Nr - 2 full rounds:
|
||||
unsigned int r = m_rounds/2 - 1;
|
||||
do
|
||||
unsigned int r = m_rounds/2 - 1;
|
||||
do
|
||||
{
|
||||
s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
|
||||
|
||||
|
|
@ -630,8 +725,8 @@ void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
|
|||
QUARTER_ROUND_D(s1, t0, t3, t2, t1)
|
||||
QUARTER_ROUND_D(s0, t3, t2, t1, t0)
|
||||
|
||||
rk += 8;
|
||||
} while (--r);
|
||||
rk += 8;
|
||||
} while (--r);
|
||||
|
||||
#if !(defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) || defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS))
|
||||
// timing attack countermeasure. see comments at top for more details
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ class CRYPTOPP_DLL Rijndael : public Rijndael_Info, public BlockCipherDocumentat
|
|||
public:
|
||||
void UncheckedSetKey(const byte *userKey, unsigned int keyLength, const NameValuePairs ¶ms);
|
||||
std::string AlgorithmProvider() const;
|
||||
unsigned int OptimalDataAlignment() const;
|
||||
|
||||
protected:
|
||||
static void FillEncTable();
|
||||
|
|
@ -57,7 +58,7 @@ class CRYPTOPP_DLL Rijndael : public Rijndael_Info, public BlockCipherDocumentat
|
|||
static const word32 rcon[];
|
||||
|
||||
unsigned int m_rounds;
|
||||
FixedSizeAlignedSecBlock<word32, 4*15> m_key;
|
||||
SecBlock<word32, AllocatorWithCleanup<word32, true> > m_key;
|
||||
mutable SecByteBlock m_aliasBlock;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -202,6 +202,7 @@ void CFB_CipherTemplate<BASE>::ProcessData(byte *outString, const byte *inString
|
|||
policy.Iterate(outString, inString, cipherDir, length / bytesPerIteration);
|
||||
else
|
||||
{
|
||||
// GCC and Clang does not like this on ARM.
|
||||
memcpy(outString, inString, length);
|
||||
policy.Iterate(outString, outString, cipherDir, length / bytesPerIteration);
|
||||
}
|
||||
|
|
|
|||
16
test.cpp
16
test.cpp
|
|
@ -162,11 +162,11 @@ int scoped_main(int argc, char *argv[])
|
|||
s_globalSeed.resize(16, ' ');
|
||||
|
||||
#if (CRYPTOPP_USE_AES_GENERATOR)
|
||||
// Fetch the OFB_Mode<AES> interface, not the RandomNumberGenerator
|
||||
// Fetch the SymmetricCipher interface, not the RandomNumberGenerator
|
||||
// interface, to key the underlying cipher. If CRYPTOPP_USE_AES_GENERATOR is 1
|
||||
// then AES/OFB based is used. Otherwise the OS random number generator is used.
|
||||
OFB_Mode<AES>::Encryption& aesg = dynamic_cast<OFB_Mode<AES>::Encryption&>(GlobalRNG());
|
||||
aesg.SetKeyWithIV((byte *)s_globalSeed.data(), 16, (byte *)s_globalSeed.data());
|
||||
SymmetricCipher& cipher = dynamic_cast<SymmetricCipher&>(GlobalRNG());
|
||||
cipher.SetKeyWithIV((byte *)s_globalSeed.data(), 16, (byte *)s_globalSeed.data());
|
||||
#endif
|
||||
|
||||
std::string command, executableName, macFilename;
|
||||
|
|
@ -880,8 +880,8 @@ bool Validate(int alg, bool thorough, const char *seedInput)
|
|||
// Fetch the OFB_Mode<AES> interface, not the RandomNumberGenerator
|
||||
// interface, to key the underlying cipher. If CRYPTOPP_USE_AES_GENERATOR is 1
|
||||
// then AES/OFB based is used. Otherwise the OS random number generator is used.
|
||||
OFB_Mode<AES>::Encryption& aesg = dynamic_cast<OFB_Mode<AES>::Encryption&>(GlobalRNG());
|
||||
aesg.SetKeyWithIV((byte *)s_globalSeed.data(), 16, (byte *)s_globalSeed.data());
|
||||
SymmetricCipher& cipher = dynamic_cast<SymmetricCipher&>(GlobalRNG());
|
||||
cipher.SetKeyWithIV((byte *)s_globalSeed.data(), 16, (byte *)s_globalSeed.data());
|
||||
#endif
|
||||
|
||||
g_testBegin = ::time(NULLPTR);
|
||||
|
|
@ -1000,9 +1000,9 @@ bool Validate(int alg, bool thorough, const char *seedInput)
|
|||
|
||||
g_testEnd = ::time(NULLPTR);
|
||||
|
||||
std::cout << "\nSeed used was " << "'" << s_globalSeed << "'" << std::endl;
|
||||
std::cout << "Test started at " << TimeToString(g_testBegin) << std::endl;
|
||||
std::cout << "Test ended at " << TimeToString(g_testEnd) << std::endl;
|
||||
std::cout << "\nSeed used was " << s_globalSeed;
|
||||
std::cout << "\nTest started at " << TimeToString(g_testBegin);
|
||||
std::cout << "\nTest ended at " << TimeToString(g_testEnd) << std::endl;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue