From 6145d52b22de2228ed4679e0ae221f6cf06526b5 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Mon, 31 Jul 2017 04:16:39 -0400 Subject: [PATCH] Add GCM_SetKeyWithoutResync_PMULL --- gcm-simd.cpp | 80 +++++++++++++++++++++++++++++++++++++++++++++++++--- gcm.cpp | 63 ++--------------------------------------- 2 files changed, 78 insertions(+), 65 deletions(-) diff --git a/gcm-simd.cpp b/gcm-simd.cpp index 12118128..ffa909a8 100644 --- a/gcm-simd.cpp +++ b/gcm-simd.cpp @@ -279,6 +279,31 @@ uint64x2_t GCM_Multiply_PMULL(const uint64x2_t &x, const uint64x2_t &h, const ui return GCM_Reduce_PMULL(c0, c1, c2, r); } +void GCM_SetKeyWithoutResync_PMULL(byte *mulTable, byte *hashKey, unsigned int tableSize) +{ + const uint64x2_t r = s_clmulConstants[0]; + const uint64x2_t t = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(hashKey))); + const uint64x2_t h0 = vextq_u64(t, t, 1); + + uint64x2_t h = h0; + unsigned int i; + for (i=0; i(mtable); @@ -366,6 +391,46 @@ const unsigned int s_cltableSizeInBlocks = 8; ANONYMOUS_NAMESPACE_END +#if 0 +// preserved for testing +void gcm_gf_mult(const unsigned char *a, const unsigned char *b, unsigned char *c) +{ + word64 Z0=0, Z1=0, V0, V1; + + typedef BlockGetAndPut Block; + Block::Get(a)(V0)(V1); + + for (int i=0; i<16; i++) + { + for (int j=0x80; j!=0; j>>=1) + { + int x = b[i] & j; + Z0 ^= x ? V0 : 0; + Z1 ^= x ? V1 : 0; + x = (int)V1 & 1; + V1 = (V1>>1) | (V0<<63); + V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0); + } + } + Block::Put(NULLPTR, c)(Z0)(Z1); +} + +__m128i _mm_clmulepi64_si128(const __m128i &a, const __m128i &b, int i) +{ + word64 A[1] = {ByteReverse(((word64*)&a)[i&1])}; + word64 B[1] = {ByteReverse(((word64*)&b)[i>>4])}; + + PolynomialMod2 pa((byte *)A, 8); + PolynomialMod2 pb((byte *)B, 8); + PolynomialMod2 c = pa*pb; + + __m128i output; + for (int i=0; i<16; i++) + ((byte *)&output)[i] = c.GetByte(i); + return output; +} +#endif + __m128i GCM_Reduce_CLMUL(__m128i c0, __m128i c1, __m128i c2, const __m128i &r) { /* @@ -410,18 +475,25 @@ __m128i GCM_Multiply_CLMUL(const __m128i &x, const __m128i &h, const __m128i &r) void GCM_SetKeyWithoutResync_CLMUL(byte *mulTable, byte *hashKey, unsigned int tableSize) { const __m128i r = s_clmulConstants[0]; - __m128i h0 = _mm_shuffle_epi8(_mm_load_si128((__m128i *)(void *)hashKey), s_clmulConstants[1]); - __m128i h = h0; + const __m128i h0 = _mm_shuffle_epi8(_mm_load_si128((__m128i *)(void *)hashKey), s_clmulConstants[1]); - for (unsigned int i=0; i Block; - Block::Get(a)(V0)(V1); - - for (int i=0; i<16; i++) - { - for (int j=0x80; j!=0; j>>=1) - { - int x = b[i] & j; - Z0 ^= x ? V0 : 0; - Z1 ^= x ? V1 : 0; - x = (int)V1 & 1; - V1 = (V1>>1) | (V0<<63); - V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0); - } - } - Block::Put(NULLPTR, c)(Z0)(Z1); -} - -__m128i _mm_clmulepi64_si128(const __m128i &a, const __m128i &b, int i) -{ - word64 A[1] = {ByteReverse(((word64*)&a)[i&1])}; - word64 B[1] = {ByteReverse(((word64*)&b)[i>>4])}; - - PolynomialMod2 pa((byte *)A, 8); - PolynomialMod2 pb((byte *)B, 8); - PolynomialMod2 c = pa*pb; - - __m128i output; - for (int i=0; i<16; i++) - ((byte *)&output)[i] = c.GetByte(i); - return output; -} -#endif - inline static void Xor16(byte *a, const byte *b, const byte *c) { CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf())); @@ -151,6 +111,7 @@ const unsigned int s_cltableSizeInBlocks = 8; extern size_t GCM_AuthenticateBlocks_PMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer); extern uint64x2_t GCM_Multiply_PMULL(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r); +extern void GCM_SetKeyWithoutResync_PMULL(byte *mulTable, byte *hashKey, unsigned int tableSize); CRYPTOPP_ALIGN_DATA(16) const word64 s_clmulConstants64[] = { @@ -217,27 +178,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const #elif CRYPTOPP_ARM_PMULL_AVAILABLE if (HasPMULL()) { - const uint64x2_t r = s_clmulConstants[0]; - const uint64x2_t t = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(hashKey))); - const uint64x2_t h0 = vextq_u64(t, t, 1); - - uint64x2_t h = h0; - for (i=0; i