Add AES for ARMv8 (Issue 458)

pull/461/head
Jeffrey Walton 2017-08-11 07:31:09 -04:00
parent e5b1be035e
commit 173dd0b530
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
1 changed files with 122 additions and 0 deletions

View File

@ -81,6 +81,13 @@ NAMESPACE_BEGIN(CryptoPP)
# define CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS 1
#endif
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
static void Rijndael_Enc_ProcessAndXorBlock_ARMV8(const byte *inBlock, const byte *xorBlock, byte *outBlock,
const word32 *subKeys, unsigned int rounds);
static void Rijndael_Dec_ProcessAndXorBlock_ARMV8(const byte *inBlock, const byte *xorBlock, byte *outBlock,
const word32 *subKeys, unsigned int rounds);
#endif
// Hack for SunCC, http://github.com/weidai11/cryptopp/issues/224
#if (__SUNPRO_CC >= 0x5130)
# define MAYBE_CONST
@ -383,6 +390,10 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, c
if (HasAESNI())
ConditionalByteReverse(BIG_ENDIAN_ORDER, rk+4, rk+4, (m_rounds-1)*16);
#endif
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
if (HasAES())
ConditionalByteReverse(BIG_ENDIAN_ORDER, rk+4, rk+4, (m_rounds-1)*16);
#endif
}
void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
@ -397,6 +408,13 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
return (void)Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
}
#endif
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
if (HasAES())
{
Rijndael_Enc_ProcessAndXorBlock_ARMV8(inBlock, xorBlock, outBlock, m_key.begin(), m_rounds);
return;
}
#endif
typedef BlockGetAndPut<word32, NativeByteOrder> Block;
@ -475,6 +493,13 @@ void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
return;
}
#endif
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
if (HasAES())
{
Rijndael_Dec_ProcessAndXorBlock_ARMV8(inBlock, xorBlock, outBlock, m_key.begin(), m_rounds);
return;
}
#endif
typedef BlockGetAndPut<word32, NativeByteOrder> Block;
@ -1356,6 +1381,103 @@ size_t Rijndael::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo
}
#endif // CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86
#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
void Rijndael_Enc_ProcessAndXorBlock_ARMV8(const byte *inBlock, const byte *xorBlock, byte *outBlock,
const word32 *subKeys, unsigned int rounds)
{
uint8x16_t data = vld1q_u8(inBlock);
const byte *keys = reinterpret_cast<const byte*>(subKeys);
// Unroll the loop, profit 0.3 to 0.5 cpb.
data = vaeseq_u8(data, vld1q_u8(keys+0));
data = vaesmcq_u8(data);
data = vaeseq_u8(data, vld1q_u8(keys+16));
data = vaesmcq_u8(data);
data = vaeseq_u8(data, vld1q_u8(keys+32));
data = vaesmcq_u8(data);
data = vaeseq_u8(data, vld1q_u8(keys+48));
data = vaesmcq_u8(data);
data = vaeseq_u8(data, vld1q_u8(keys+64));
data = vaesmcq_u8(data);
data = vaeseq_u8(data, vld1q_u8(keys+80));
data = vaesmcq_u8(data);
data = vaeseq_u8(data, vld1q_u8(keys+96));
data = vaesmcq_u8(data);
data = vaeseq_u8(data, vld1q_u8(keys+112));
data = vaesmcq_u8(data);
data = vaeseq_u8(data, vld1q_u8(keys+128));
data = vaesmcq_u8(data);
unsigned int i=9;
for ( ; i<rounds-1; ++i)
{
// AES single round encryption
data = vaeseq_u8(data, vld1q_u8(keys+i*16));
// AES mix columns
data = vaesmcq_u8(data);
}
// AES single round encryption
data = vaeseq_u8(data, vld1q_u8(keys+i*16));
// Final Add (bitwise Xor)
data = veorq_u8(data, vld1q_u8(keys+(i+1)*16));
if (xorBlock)
vst1q_u8(outBlock, veorq_u8(data, vld1q_u8(xorBlock)));
else
vst1q_u8(outBlock, data);
}
void Rijndael_Dec_ProcessAndXorBlock_ARMV8(const byte *inBlock, const byte *xorBlock, byte *outBlock,
const word32 *subKeys, unsigned int rounds)
{
uint8x16_t data = vld1q_u8(inBlock);
const byte *keys = reinterpret_cast<const byte*>(subKeys);
// Unroll the loop, profit 0.3 to 0.5 cpb.
data = vaesdq_u8(data, vld1q_u8(keys+0));
data = vaesimcq_u8(data);
data = vaesdq_u8(data, vld1q_u8(keys+16));
data = vaesimcq_u8(data);
data = vaesdq_u8(data, vld1q_u8(keys+32));
data = vaesimcq_u8(data);
data = vaesdq_u8(data, vld1q_u8(keys+48));
data = vaesimcq_u8(data);
data = vaesdq_u8(data, vld1q_u8(keys+64));
data = vaesimcq_u8(data);
data = vaesdq_u8(data, vld1q_u8(keys+80));
data = vaesimcq_u8(data);
data = vaesdq_u8(data, vld1q_u8(keys+96));
data = vaesimcq_u8(data);
data = vaesdq_u8(data, vld1q_u8(keys+112));
data = vaesimcq_u8(data);
data = vaesdq_u8(data, vld1q_u8(keys+128));
data = vaesimcq_u8(data);
unsigned int i=9;
for ( ; i<rounds-1; ++i)
{
// AES single round decryption
data = vaesdq_u8(data, vld1q_u8(keys+i*16));
// AES inverse mix columns
data = vaesimcq_u8(data);
}
// AES single round decryption
data = vaesdq_u8(data, vld1q_u8(keys+i*16));
// Final Add (bitwise Xor)
data = veorq_u8(data, vld1q_u8(keys+(i+1)*16));
if (xorBlock)
vst1q_u8(outBlock, veorq_u8(data, vld1q_u8(xorBlock)));
else
vst1q_u8(outBlock, data);
}
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
NAMESPACE_END
#endif