Fix ARMv8 AES Decryption

AES ECB and CTR mode encryption is running about 2.8 to 3.0 cpb.
Hallelujah, its a wrap.
pull/461/head
Jeffrey Walton 2017-08-06 22:57:02 -04:00
parent 701ec3aa1f
commit 1459042324
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
2 changed files with 12 additions and 49 deletions

View File

@ -10,10 +10,6 @@
#include "config.h"
#include "misc.h"
// TODO: Remove after debugging
#include <stdio.h>
#include <stdlib.h>
// Clang and GCC hoops...
#if !(defined(__ARM_FEATURE_CRYPTO) || defined(_MSC_VER))
# undef CRYPTOPP_ARM_AES_AVAILABLE
@ -82,6 +78,8 @@ bool CPU_TryAES_ARMV8()
uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
uint8x16_t r1 = vaeseq_u8(data, key);
uint8x16_t r2 = vaesdq_u8(data, key);
r1 = vaesmcq_u8(r1);
r2 = vaesimcq_u8(r2);
result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
}
@ -110,6 +108,8 @@ bool CPU_TryAES_ARMV8()
uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
uint8x16_t r1 = vaeseq_u8(data, key);
uint8x16_t r2 = vaesdq_u8(data, key);
r1 = vaesmcq_u8(r1);
r2 = vaesimcq_u8(r2);
// Hack... GCC optimizes away the code and returns true
result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
@ -127,40 +127,9 @@ bool CPU_TryAES_ARMV8()
#if (CRYPTOPP_ARM_AES_AVAILABLE)
void PrintMessage(const byte *inBlock)
{
printf("M: ");
for (unsigned int j=0; j<16; ++j)
printf("%02X", inBlock[j]);
printf("\n");
}
void PrintCipher(const byte *outBlock)
{
printf("C: ");
for (unsigned int j=0; j<16; ++j)
printf("%02X", outBlock[j]);
printf("\n");
}
void PrintSubKeys(const word32 *keys, unsigned int rounds)
{
const byte* k = (const byte*)keys;
for (unsigned int i=0; i<rounds+1; ++i)
{
printf("R%d: ", i);
for (unsigned int j=0; j<16; ++j)
printf("%02X", *(k+(i*16)+j));
printf("\n");
}
}
void Rijndael_Enc_ProcessAndXorBlock_ARMV8(const byte *inBlock, const byte *xorBlock, byte *outBlock,
const word32 *subKeys, unsigned int rounds)
{
//PrintMessage(inBlock);
//PrintSubKeys(subKeys, rounds);
uint8x16_t data = vld1q_u8(inBlock);
const byte *keys = reinterpret_cast<const byte*>(subKeys);
@ -175,6 +144,7 @@ void Rijndael_Enc_ProcessAndXorBlock_ARMV8(const byte *inBlock, const byte *xorB
// One round of encryption: AES, no Mix
data = vaeseq_u8(data, vld1q_u8(keys+i*16));
// Final Add (bitwise Xor)
data = veorq_u8(data, vld1q_u8(keys+(i+1)*16));
@ -182,40 +152,33 @@ void Rijndael_Enc_ProcessAndXorBlock_ARMV8(const byte *inBlock, const byte *xorB
vst1q_u8(outBlock, veorq_u8(data, vld1q_u8(xorBlock)));
else
vst1q_u8(outBlock, data);
//PrintCipher(outBlock);
}
void Rijndael_Dec_ProcessAndXorBlock_ARMV8(const byte *inBlock, const byte *xorBlock, byte *outBlock,
const word32 *subKeys, unsigned int rounds)
{
//PrintSubKeys(subKeys, rounds);
//PrintSubKeys(subKeys, rounds);
uint8x16_t data = vld1q_u8(inBlock);
const byte *keys = reinterpret_cast<const byte*>(subKeys);
// AES single round decryption
data = vaesdq_u8(data, vld1q_u8(keys));
unsigned int i;
for (i=0; i<rounds-1; ++i)
{
// AES mix columns
data = vaesmcq_u8(data);
// AES single round decryption
data = vaesdq_u8(data, vld1q_u8(keys+i*16));
// AES inverse mix columns
data = vaesimcq_u8(data);
}
// AES single round decryption
data = vaesdq_u8(data, vld1q_u8(keys+i*16));
// Final Add (bitwise Xor)
data = veorq_u8(data, vld1q_u8(keys+i*16));
data = veorq_u8(data, vld1q_u8(keys+(i+1)*16));
if (xorBlock)
vst1q_u8(outBlock, veorq_u8(data, vld1q_u8(xorBlock)));
else
vst1q_u8(outBlock, data);
//PrintCipher(outBlock);
}
#endif // CRYPTOPP_ARM_AES_AVAILABLE

View File

@ -434,7 +434,7 @@ void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
}
#endif
#if (CRYPTOPP_ARM_AES_AVAILABLE) && 0
#if (CRYPTOPP_ARM_AES_AVAILABLE)
if (HasAES())
{
Rijndael_Dec_ProcessAndXorBlock_ARMV8(inBlock, xorBlock, outBlock, m_key.begin(), m_rounds);