diff --git a/aria.cpp b/aria.cpp index fabc7ae6..a2629320 100644 --- a/aria.cpp +++ b/aria.cpp @@ -171,9 +171,7 @@ ANONYMOUS_NAMESPACE_END NAMESPACE_BEGIN(CryptoPP) -#define ARIA_WO(X,Y) (((word32 *)(X))[Y]) - -inline byte ARIA_BRF(const word32 x, int y) { +inline byte ARIA_BRF(const word32 x, const int y) { return GETBYTE(x, y); } @@ -181,13 +179,28 @@ inline word32 ReverseWord(const word32 w) { return ByteReverse(w); } -inline word32 LoadWord(const word32 x) { - return ConditionalByteReverse(BIG_ENDIAN_ORDER, x); +// Retireve the i-th word, optionally in Big Endian +template +inline word32 LoadWord(const word32 x[4], const unsigned int i) { + if (big_endian) + return ConditionalByteReverse(BIG_ENDIAN_ORDER, x[i]); + else + return x[i]; +} + +// Reinterpret x as a word32[], and retireve the i-th word, optionally in Big Endian +template +inline word32 LoadWord(const byte x[16], const unsigned int i) { + if (big_endian) + return ConditionalByteReverse(BIG_ENDIAN_ORDER, reinterpret_cast(x)[i]); + else + return reinterpret_cast(x)[i]; } // Key XOR Layer #define ARIA_KXL { \ - t[0]^=ARIA_WO(rk,0); t[1]^=ARIA_WO(rk,1); t[2]^=ARIA_WO(rk,2); t[3]^=ARIA_WO(rk,3); \ + t[0]^=LoadWord(rk,0); t[1]^=LoadWord(rk,1); \ + t[2]^=LoadWord(rk,2); t[3]^=LoadWord(rk,3); \ } // S-Box Layer 1 + M @@ -217,19 +230,6 @@ inline word32 LoadWord(const word32 x) { (T3) = ReverseWord((T3)); \ } -#define ARIA_FO {SBL1_M(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3]) ARIA_P(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3])} -#define ARIA_FE {SBL2_M(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3]) ARIA_P(t[2],t[3],t[0],t[1]) ARIA_MM(t[0],t[1],t[2],t[3])} - -// n-bit right shift of Y XORed to X -#define ARIA_GSRK(RK, X, Y, n) { \ - q = 4-((n)/32); \ - r = (n) % 32; \ - ARIA_WO((RK),0) = ((X)[0]) ^ (((Y)[(q )%4])>>r) ^ (((Y)[(q+3)%4])<<(32-r)); \ - ARIA_WO((RK),1) = ((X)[1]) ^ (((Y)[(q+1)%4])>>r) ^ (((Y)[(q )%4])<<(32-r)); \ - ARIA_WO((RK),2) = ((X)[2]) ^ (((Y)[(q+2)%4])>>r) ^ (((Y)[(q+1)%4])<<(32-r)); \ - ARIA_WO((RK),3) = ((X)[3]) ^ (((Y)[(q+3)%4])>>r) ^ (((Y)[(q+2)%4])<<(32-r)); \ - } - #if defined(_MSC_VER) #define ARIA_M1(X,Y) { \ w=rotrFixed((X), 8); \ @@ -241,6 +241,22 @@ inline word32 LoadWord(const word32 x) { } #endif +#define ARIA_FO {SBL1_M(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3]) ARIA_P(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3])} +#define ARIA_FE {SBL2_M(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3]) ARIA_P(t[2],t[3],t[0],t[1]) ARIA_MM(t[0],t[1],t[2],t[3])} + +// n-bit right shift of Y XORed to X +template +inline void ARIA_GSRK(const word32 X[4], const word32 Y[4], byte RK[16]) +{ + // MSVC is not generating a "rotate immediate". Unroll and constify to help it along. + static const unsigned int Q = 4-(N/32); + static const unsigned int R = N % 32; + reinterpret_cast(RK)[0] = (X[0]) ^ ((Y[(Q )%4])>>R) ^ ((Y[(Q+3)%4])<<(32-R)); + reinterpret_cast(RK)[1] = (X[1]) ^ ((Y[(Q+1)%4])>>R) ^ ((Y[(Q )%4])<<(32-R)); + reinterpret_cast(RK)[2] = (X[2]) ^ ((Y[(Q+2)%4])>>R) ^ ((Y[(Q+1)%4])<<(32-R)); + reinterpret_cast(RK)[3] = (X[3]) ^ ((Y[(Q+3)%4])>>R) ^ ((Y[(Q+2)%4])<<(32-R)); + } + void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const NameValuePairs ¶ms) { CRYPTOPP_ASSERT(key && keylen); @@ -275,9 +291,8 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam // w0 has room for 32 bytes. w1-w3 each has room for 16 bytes. t is a 16 byte temp area. word32 *w0 = m_w.data(), *w1 = m_w.data()+8, *w2 = m_w.data()+12, *w3 = m_w.data()+16, *t = m_w.data()+20; - w0[0] = LoadWord(ARIA_WO(mk,0)); w0[1] = LoadWord(ARIA_WO(mk,1)); - w0[0] = LoadWord(ARIA_WO(mk,0)); w0[1] = LoadWord(ARIA_WO(mk,1)); - w0[2] = LoadWord(ARIA_WO(mk,2)); w0[3] = LoadWord(ARIA_WO(mk,3)); + w0[0] = LoadWord(mk,0); w0[1] = LoadWord(mk,1); + w0[2] = LoadWord(mk,2); w0[3] = LoadWord(mk,3); t[0]=w0[0]^KRK[q][0]; t[1]=w0[1]^KRK[q][1]; t[2]=w0[2]^KRK[q][2]; t[3]=w0[3]^KRK[q][3]; @@ -285,13 +300,13 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam if (keyBits > 128) { - w1[0] = LoadWord(ARIA_WO(mk,4)); - w1[1] = LoadWord(ARIA_WO(mk,5)); + w1[0] = LoadWord(mk,4); + w1[1] = LoadWord(mk,5); if (keyBits > 192) { - w1[2] = LoadWord(ARIA_WO(mk,6)); - w1[3] = LoadWord(ARIA_WO(mk,7)); + w1[2] = LoadWord(mk,6); + w1[3] = LoadWord(mk,7); } else { @@ -317,29 +332,29 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam ARIA_FO; w3[0]=t[0]^w1[0]; w3[1]=t[1]^w1[1]; w3[2]=t[2]^w1[2]; w3[3]=t[3]^w1[3]; - ARIA_GSRK(rk + 0, w0, w1, 19); - ARIA_GSRK(rk + 16, w1, w2, 19); - ARIA_GSRK(rk + 32, w2, w3, 19); - ARIA_GSRK(rk + 48, w3, w0, 19); - ARIA_GSRK(rk + 64, w0, w1, 31); - ARIA_GSRK(rk + 80, w1, w2, 31); - ARIA_GSRK(rk + 96, w2, w3, 31); - ARIA_GSRK(rk + 112, w3, w0, 31); - ARIA_GSRK(rk + 128, w0, w1, 67); - ARIA_GSRK(rk + 144, w1, w2, 67); - ARIA_GSRK(rk + 160, w2, w3, 67); - ARIA_GSRK(rk + 176, w3, w0, 67); - ARIA_GSRK(rk + 192, w0, w1, 97); + ARIA_GSRK<19>(w0, w1, rk + 0); + ARIA_GSRK<19>(w1, w2, rk + 16); + ARIA_GSRK<19>(w2, w3, rk + 32); + ARIA_GSRK<19>(w3, w0, rk + 48); + ARIA_GSRK<31>(w0, w1, rk + 64); + ARIA_GSRK<31>(w1, w2, rk + 80); + ARIA_GSRK<31>(w2, w3, rk + 96); + ARIA_GSRK<31>(w3, w0, rk + 112); + ARIA_GSRK<67>(w0, w1, rk + 128); + ARIA_GSRK<67>(w1, w2, rk + 144); + ARIA_GSRK<67>(w2, w3, rk + 160); + ARIA_GSRK<67>(w3, w0, rk + 176); + ARIA_GSRK<97>(w0, w1, rk + 192); if (keyBits > 128) { - ARIA_GSRK(rk + 208, w1, w2, 97); - ARIA_GSRK(rk + 224, w2, w3, 97); + ARIA_GSRK<97>(w1, w2, rk + 208); + ARIA_GSRK<97>(w2, w3, rk + 224); if (keyBits > 192) { - ARIA_GSRK(rk + 240, w3, w0, 97); - ARIA_GSRK(rk + 256, w0, w1, 109); + ARIA_GSRK< 97>(w3, w0, rk + 240); + ARIA_GSRK<109>(w0, w1, rk + 256); } } @@ -383,8 +398,8 @@ void ARIA::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, b const byte *rk = reinterpret_cast(m_rk.data()); word32 *t = const_cast(m_w.data()+20); - t[0] = LoadWord(ARIA_WO(i,0)); t[1] = LoadWord(ARIA_WO(i,1)); - t[2] = LoadWord(ARIA_WO(i,2)); t[3] = LoadWord(ARIA_WO(i,3)); + t[0] = LoadWord(i,0); t[1] = LoadWord(i,1); + t[2] = LoadWord(i,2); t[3] = LoadWord(i,3); if (m_rounds > 12) { ARIA_KXL rk+= 16; ARIA_FO @@ -421,6 +436,7 @@ void ARIA::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, b o[14] = (byte)(S1[ARIA_BRF(t[3],1)] ) ^ rk[13]; o[15] = (byte)(S2[ARIA_BRF(t[3],0)] ) ^ rk[12]; #else + #define ARIA_WORD(X,Y) (((word32 *)(X))[Y]) o[ 0] = (byte)(X1[ARIA_BRF(t[0],3)] ); o[ 1] = (byte)(X2[ARIA_BRF(t[0],2)]>>8); o[ 2] = (byte)(S1[ARIA_BRF(t[0],1)] ); @@ -437,12 +453,12 @@ void ARIA::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, b o[13] = (byte)(X2[ARIA_BRF(t[3],2)]>>8); o[14] = (byte)(S1[ARIA_BRF(t[3],1)] ); o[15] = (byte)(S2[ARIA_BRF(t[3],0)] ); - ARIA_WO(o,0)^=ARIA_WO(rk,0); ARIA_WO(o,1)^=ARIA_WO(rk,1); - ARIA_WO(o,2)^=ARIA_WO(rk,2); ARIA_WO(o,3)^=ARIA_WO(rk,3); + ARIA_WORD(o,0)^=LoadWord(rk,0); ARIA_WORD(o,1)^=LoadWord(rk,1); + ARIA_WORD(o,2)^=LoadWord(rk,2); ARIA_WORD(o,3)^=LoadWord(rk,3); #endif if (x) - for (size_t n=0; n<16; ++n) + for (unsigned int n=0; n<16; ++n) o[n] ^= x[n]; }