Avoid extra loads of workspace variables
parent
ddc0f3a899
commit
1d1a150737
24
aria.cpp
24
aria.cpp
|
|
@ -248,21 +248,16 @@ inline void ARIA_GSRK(const word32 X[4], const word32 Y[4], byte RK[16])
|
||||||
|
|
||||||
#if CRYPTOPP_ENABLE_ARIA_NEON_INTRINSICS
|
#if CRYPTOPP_ENABLE_ARIA_NEON_INTRINSICS
|
||||||
template <unsigned int N>
|
template <unsigned int N>
|
||||||
inline void ARIA_GSRK_NEON(const word32 X[4], const word32 Y[4], byte RK[16])
|
inline void ARIA_GSRK_NEON(const uint32x4_t X, const uint32x4_t Y, byte RK[16])
|
||||||
{
|
{
|
||||||
static const unsigned int Q1 = (4-(N/32)) % 4;
|
static const unsigned int Q1 = (4-(N/32)) % 4;
|
||||||
static const unsigned int Q2 = (3-(N/32)) % 4;
|
static const unsigned int Q2 = (3-(N/32)) % 4;
|
||||||
static const unsigned int R = N % 32;
|
static const unsigned int R = N % 32;
|
||||||
|
|
||||||
const uint32x4_t a = vld1q_u32((const uint32_t*)X);
|
|
||||||
const uint32x4_t t = vld1q_u32((const uint32_t*)Y);
|
|
||||||
const uint32x4_t b = vextq_u32(t, t, Q1);
|
|
||||||
const uint32x4_t c = vextq_u32(t, t, Q2);
|
|
||||||
|
|
||||||
vst1q_u32(reinterpret_cast<uint32_t*>(RK),
|
vst1q_u32(reinterpret_cast<uint32_t*>(RK),
|
||||||
veorq_u32(a, veorq_u32(
|
veorq_u32(X, veorq_u32(
|
||||||
vshrq_n_u32(b, R),
|
vshrq_n_u32(vextq_u32(Y, Y, Q1), R),
|
||||||
vshlq_n_u32(c, 32-R))));
|
vshlq_n_u32(vextq_u32(Y, Y, Q2), 32-R))));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
@ -401,6 +396,11 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam
|
||||||
#if CRYPTOPP_ENABLE_ARIA_NEON_INTRINSICS
|
#if CRYPTOPP_ENABLE_ARIA_NEON_INTRINSICS
|
||||||
if (HasNEON())
|
if (HasNEON())
|
||||||
{
|
{
|
||||||
|
const uint32x4_t w0 = vld1q_u32((const uint32_t*)(m_w.data()+0));
|
||||||
|
const uint32x4_t w1 = vld1q_u32((const uint32_t*)(m_w.data()+8));
|
||||||
|
const uint32x4_t w2 = vld1q_u32((const uint32_t*)(m_w.data()+12));
|
||||||
|
const uint32x4_t w3 = vld1q_u32((const uint32_t*)(m_w.data()+16));
|
||||||
|
|
||||||
ARIA_GSRK_NEON<19>(w0, w1, rk + 0);
|
ARIA_GSRK_NEON<19>(w0, w1, rk + 0);
|
||||||
ARIA_GSRK_NEON<19>(w1, w2, rk + 16);
|
ARIA_GSRK_NEON<19>(w1, w2, rk + 16);
|
||||||
ARIA_GSRK_NEON<19>(w2, w3, rk + 32);
|
ARIA_GSRK_NEON<19>(w2, w3, rk + 32);
|
||||||
|
|
@ -467,7 +467,7 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam
|
||||||
#if CRYPTOPP_ENABLE_ARIA_SSE2_INTRINSICS
|
#if CRYPTOPP_ENABLE_ARIA_SSE2_INTRINSICS
|
||||||
if (HasSSE2())
|
if (HasSSE2())
|
||||||
{
|
{
|
||||||
a=reinterpret_cast<word32*>(rk); s=m_w.data()+24; z=a+r*4;
|
a=reinterpret_cast<word32*>(rk); s=m_w.data()+24; z=a+r*4;
|
||||||
_mm_store_si128((__m128i*)t, _mm_load_si128((const __m128i*)a));
|
_mm_store_si128((__m128i*)t, _mm_load_si128((const __m128i*)a));
|
||||||
_mm_store_si128((__m128i*)a, _mm_load_si128((const __m128i*)z));
|
_mm_store_si128((__m128i*)a, _mm_load_si128((const __m128i*)z));
|
||||||
_mm_store_si128((__m128i*)z, _mm_load_si128((const __m128i*)t));
|
_mm_store_si128((__m128i*)z, _mm_load_si128((const __m128i*)t));
|
||||||
|
|
@ -492,7 +492,7 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
a=reinterpret_cast<word32*>(rk); s=m_w.data()+24; z=a+r*4;
|
a=reinterpret_cast<word32*>(rk); s=m_w.data()+24; z=a+r*4;
|
||||||
::memcpy(t, a, 16); ::memcpy(a, z, 16); ::memcpy(z, t, 16);
|
::memcpy(t, a, 16); ::memcpy(a, z, 16); ::memcpy(z, t, 16);
|
||||||
|
|
||||||
a+=4; z-=4;
|
a+=4; z-=4;
|
||||||
|
|
@ -585,7 +585,6 @@ void ARIA::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, b
|
||||||
_mm_loadu_si128((const __m128i*)(outBlock)),
|
_mm_loadu_si128((const __m128i*)(outBlock)),
|
||||||
_mm_loadu_si128((const __m128i*)(xorBlock))));
|
_mm_loadu_si128((const __m128i*)(xorBlock))));
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
@ -641,7 +640,6 @@ void ARIA::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, b
|
||||||
vld1q_u32((const uint32_t*)outBlock),
|
vld1q_u32((const uint32_t*)outBlock),
|
||||||
vld1q_u32((const uint32_t*)xorBlock)));
|
vld1q_u32((const uint32_t*)xorBlock)));
|
||||||
}
|
}
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif // CRYPTOPP_ENABLE_ARIA_NEON_INTRINSICS
|
#endif // CRYPTOPP_ENABLE_ARIA_NEON_INTRINSICS
|
||||||
|
|
|
||||||
2
aria.h
2
aria.h
|
|
@ -48,7 +48,7 @@ public:
|
||||||
void ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const;
|
void ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Reference implementation allocates a table of 17 sub-keys.
|
// Reference implementation allocates a table of 17 round keys.
|
||||||
FixedSizeAlignedSecBlock<byte, 16*17> m_rk; // round keys
|
FixedSizeAlignedSecBlock<byte, 16*17> m_rk; // round keys
|
||||||
FixedSizeAlignedSecBlock<word32, 4*7> m_w; // w0, w1, w2, w3, t and u
|
FixedSizeAlignedSecBlock<word32, 4*7> m_w; // w0, w1, w2, w3, t and u
|
||||||
unsigned int m_rounds;
|
unsigned int m_rounds;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue