Avoid extra loads of workspace variables

pull/402/head
Jeffrey Walton 2017-04-16 13:00:45 -04:00
parent ddc0f3a899
commit 1d1a150737
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
2 changed files with 12 additions and 14 deletions

View File

@ -248,21 +248,16 @@ inline void ARIA_GSRK(const word32 X[4], const word32 Y[4], byte RK[16])
#if CRYPTOPP_ENABLE_ARIA_NEON_INTRINSICS
template <unsigned int N>
inline void ARIA_GSRK_NEON(const word32 X[4], const word32 Y[4], byte RK[16])
inline void ARIA_GSRK_NEON(const uint32x4_t X, const uint32x4_t Y, byte RK[16])
{
static const unsigned int Q1 = (4-(N/32)) % 4;
static const unsigned int Q2 = (3-(N/32)) % 4;
static const unsigned int R = N % 32;
const uint32x4_t a = vld1q_u32((const uint32_t*)X);
const uint32x4_t t = vld1q_u32((const uint32_t*)Y);
const uint32x4_t b = vextq_u32(t, t, Q1);
const uint32x4_t c = vextq_u32(t, t, Q2);
vst1q_u32(reinterpret_cast<uint32_t*>(RK),
veorq_u32(a, veorq_u32(
vshrq_n_u32(b, R),
vshlq_n_u32(c, 32-R))));
veorq_u32(X, veorq_u32(
vshrq_n_u32(vextq_u32(Y, Y, Q1), R),
vshlq_n_u32(vextq_u32(Y, Y, Q2), 32-R))));
}
#endif
@ -401,6 +396,11 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam
#if CRYPTOPP_ENABLE_ARIA_NEON_INTRINSICS
if (HasNEON())
{
const uint32x4_t w0 = vld1q_u32((const uint32_t*)(m_w.data()+0));
const uint32x4_t w1 = vld1q_u32((const uint32_t*)(m_w.data()+8));
const uint32x4_t w2 = vld1q_u32((const uint32_t*)(m_w.data()+12));
const uint32x4_t w3 = vld1q_u32((const uint32_t*)(m_w.data()+16));
ARIA_GSRK_NEON<19>(w0, w1, rk + 0);
ARIA_GSRK_NEON<19>(w1, w2, rk + 16);
ARIA_GSRK_NEON<19>(w2, w3, rk + 32);
@ -585,7 +585,6 @@ void ARIA::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, b
_mm_loadu_si128((const __m128i*)(outBlock)),
_mm_loadu_si128((const __m128i*)(xorBlock))));
}
return;
}
else
@ -641,7 +640,6 @@ void ARIA::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, b
vld1q_u32((const uint32_t*)outBlock),
vld1q_u32((const uint32_t*)xorBlock)));
}
return;
}
else
#endif // CRYPTOPP_ENABLE_ARIA_NEON_INTRINSICS

2
aria.h
View File

@ -48,7 +48,7 @@ public:
void ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const;
private:
// Reference implementation allocates a table of 17 sub-keys.
// Reference implementation allocates a table of 17 round keys.
FixedSizeAlignedSecBlock<byte, 16*17> m_rk; // round keys
FixedSizeAlignedSecBlock<word32, 4*7> m_w; // w0, w1, w2, w3, t and u
unsigned int m_rounds;