diff --git a/donna_32.cpp b/donna_32.cpp index 45289319..0879fc4a 100644 --- a/donna_32.cpp +++ b/donna_32.cpp @@ -42,6 +42,7 @@ typedef word32 bignum25519align16[12]; #define mul32x32_64(a,b) (((word64)(a))*(b)) +const byte basePoint[32] = {9}; const word32 reduce_mask_25 = (1 << 25) - 1; const word32 reduce_mask_26 = (1 << 26) - 1; @@ -354,7 +355,7 @@ curve25519_square_times(bignum25519 out, const bignum25519 in, int count) { /* Take a little-endian, 32-byte number and expand it into polynomial form */ void -curve25519_expand(bignum25519 out, const unsigned char in[32]) { +curve25519_expand(bignum25519 out, const byte in[32]) { word32 x0,x1,x2,x3,x4,x5,x6,x7; GetBlock block(in); @@ -376,7 +377,7 @@ curve25519_expand(bignum25519 out, const unsigned char in[32]) { * little-endian, 32-byte array */ void -curve25519_contract(unsigned char out[32], const bignum25519 in) { +curve25519_contract(byte out[32], const bignum25519 in) { bignum25519 f; curve25519_copy(f, in); @@ -436,10 +437,10 @@ curve25519_contract(unsigned char out[32], const bignum25519 in) { f[9] <<= 6; #define F(i, s) \ - out[s+0] |= (unsigned char )(f[i] & 0xff); \ - out[s+1] = (unsigned char )((f[i] >> 8) & 0xff); \ - out[s+2] = (unsigned char )((f[i] >> 16) & 0xff); \ - out[s+3] = (unsigned char )((f[i] >> 24) & 0xff); + out[s+0] |= (byte)(f[i] & 0xff); \ + out[s+1] = (byte)((f[i] >> 8) & 0xff); \ + out[s+2] = (byte)((f[i] >> 16) & 0xff); \ + out[s+3] = (byte)((f[i] >> 24) & 0xff); out[0] = 0; out[16] = 0; @@ -475,8 +476,6 @@ int curve25519_CXX(byte sharedKey[32], const byte secretKey[32], const byte othe int curve25519(byte publicKey[32], const byte secretKey[32]) { - const byte basePoint[32] = {9}; - #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE) if (HasSSE2()) return curve25519_SSE2(publicKey, secretKey, basePoint); diff --git a/donna_64.cpp b/donna_64.cpp index 15037b02..c1c78322 100644 --- a/donna_64.cpp +++ b/donna_64.cpp @@ -49,6 +49,7 @@ typedef word64 bignum25519[5]; #define shr128(out,in,shift) out = (word64)(in >> (shift)); #define shl128(out,in,shift) out = (word64)((in << shift) >> 64); +const byte basePoint[32] = {9}; const word64 reduce_mask_40 = ((word64)1 << 40) - 1; const word64 reduce_mask_51 = ((word64)1 << 51) - 1; const word64 reduce_mask_56 = ((word64)1 << 56) - 1; @@ -183,7 +184,6 @@ curve25519_mul(bignum25519 out, const bignum25519 in2, const bignum25519 in) { mul64x64_128(mul, r4, s4) add128(t[3], mul) #endif - r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51); add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51); add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51); @@ -292,7 +292,7 @@ curve25519_square(bignum25519 out, const bignum25519 in) { /* Take a little-endian, 32-byte number and expand it into polynomial form */ inline void -curve25519_expand(bignum25519 out, const unsigned char *in) { +curve25519_expand(bignum25519 out, const byte *in) { word64 x0,x1,x2,x3; GetBlock block(in); @@ -309,7 +309,7 @@ curve25519_expand(bignum25519 out, const unsigned char *in) { * little-endian, 32-byte array */ inline void -curve25519_contract(unsigned char *out, const bignum25519 input) { +curve25519_contract(byte *out, const bignum25519 input) { word64 t[5]; word64 f, i; @@ -348,7 +348,7 @@ curve25519_contract(unsigned char *out, const bignum25519 input) { #define write51full(n,shift) \ f = ((t[n] >> shift) | (t[n+1] << (51 - shift))); \ - for (i = 0; i < 8; i++, f >>= 8) *out++ = (unsigned char)f; + for (i = 0; i < 8; i++, f >>= 8) *out++ = (byte)f; #define write51(n) write51full(n,13*n) write51(0) write51(1) @@ -375,8 +375,6 @@ int curve25519_CXX(byte sharedKey[32], const byte secretKey[32], const byte othe int curve25519(byte publicKey[32], const byte secretKey[32]) { - const byte basePoint[32] = {9}; - #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE) if (HasSSE2()) return curve25519_SSE2(publicKey, secretKey, basePoint); diff --git a/donna_sse.cpp b/donna_sse.cpp index b35d5d99..a3b0d1a0 100644 --- a/donna_sse.cpp +++ b/donna_sse.cpp @@ -33,8 +33,11 @@ using CryptoPP::byte; using CryptoPP::word32; using CryptoPP::word64; +using CryptoPP::GetBlock; +using CryptoPP::LittleEndian; + typedef union packedelem8_t { - unsigned char u[16]; + byte u[16]; xmmi v; } packedelem8; @@ -945,28 +948,22 @@ curve25519_square_packed64(packedelem64 *out, const packedelem64 *r) { /* Take a little-endian, 32-byte number and expand it into polynomial form */ void -curve25519_expand(bignum25519 out, const unsigned char in[32]) { +curve25519_expand(bignum25519 out, const byte in[32]) { word32 x0,x1,x2,x3,x4,x5,x6,x7; - x0 = *(word32 *)(in + 0); - x1 = *(word32 *)(in + 4); - x2 = *(word32 *)(in + 8); - x3 = *(word32 *)(in + 12); - x4 = *(word32 *)(in + 16); - x5 = *(word32 *)(in + 20); - x6 = *(word32 *)(in + 24); - x7 = *(word32 *)(in + 28); + GetBlock block(in); + block(x0)(x1)(x2)(x3)(x4)(x5)(x6)(x7); - out[0] = ( x0 ) & 0x3ffffff; + out[0] = ( x0 ) & 0x3ffffff; out[1] = ((((word64)x1 << 32) | x0) >> 26) & 0x1ffffff; out[2] = ((((word64)x2 << 32) | x1) >> 19) & 0x3ffffff; out[3] = ((((word64)x3 << 32) | x2) >> 13) & 0x1ffffff; - out[4] = (( x3) >> 6) & 0x3ffffff; - out[5] = ( x4 ) & 0x1ffffff; + out[4] = (( x3) >> 6) & 0x3ffffff; + out[5] = ( x4 ) & 0x1ffffff; out[6] = ((((word64)x5 << 32) | x4) >> 25) & 0x3ffffff; out[7] = ((((word64)x6 << 32) | x5) >> 19) & 0x1ffffff; out[8] = ((((word64)x7 << 32) | x6) >> 12) & 0x3ffffff; - out[9] = (( x7) >> 6) & 0x1ffffff; + out[9] = (( x7) >> 6) & 0x1ffffff; out[10] = 0; out[11] = 0; } @@ -975,7 +972,7 @@ curve25519_expand(bignum25519 out, const unsigned char in[32]) { * little-endian, 32-byte array */ void -curve25519_contract(unsigned char out[32], const bignum25519 in) { +curve25519_contract(byte out[32], const bignum25519 in) { ALIGN(16) bignum25519 f; curve25519_copy(f, in); @@ -1035,10 +1032,10 @@ curve25519_contract(unsigned char out[32], const bignum25519 in) { f[9] <<= 6; #define F(i, s) \ - out[s+0] |= (unsigned char )(f[i] & 0xff); \ - out[s+1] = (unsigned char )((f[i] >> 8) & 0xff); \ - out[s+2] = (unsigned char )((f[i] >> 16) & 0xff); \ - out[s+3] = (unsigned char )((f[i] >> 24) & 0xff); + out[s+0] |= (byte)(f[i] & 0xff); \ + out[s+1] = (byte)((f[i] >> 8) & 0xff); \ + out[s+2] = (byte)((f[i] >> 16) & 0xff); \ + out[s+3] = (byte)((f[i] >> 24) & 0xff); out[0] = 0; out[16] = 0; @@ -1055,85 +1052,6 @@ curve25519_contract(unsigned char out[32], const bignum25519 in) { #undef F } -/* if (iswap) swap(a, b) */ -inline void -curve25519_swap_conditional(bignum25519 a, bignum25519 b, word32 iswap) { - const word32 swap = (word32)(-(int32_t)iswap); - xmmi a0,a1,a2,b0,b1,b2,x0,x1,x2; - xmmi mask = _mm_cvtsi32_si128(swap); - mask = _mm_shuffle_epi32(mask, 0); - a0 = _mm_load_si128((xmmi *)a + 0); - a1 = _mm_load_si128((xmmi *)a + 1); - b0 = _mm_load_si128((xmmi *)b + 0); - b1 = _mm_load_si128((xmmi *)b + 1); - b0 = _mm_xor_si128(a0, b0); - b1 = _mm_xor_si128(a1, b1); - x0 = _mm_and_si128(b0, mask); - x1 = _mm_and_si128(b1, mask); - x0 = _mm_xor_si128(x0, a0); - x1 = _mm_xor_si128(x1, a1); - a0 = _mm_xor_si128(x0, b0); - a1 = _mm_xor_si128(x1, b1); - _mm_store_si128((xmmi *)a + 0, x0); - _mm_store_si128((xmmi *)a + 1, x1); - _mm_store_si128((xmmi *)b + 0, a0); - _mm_store_si128((xmmi *)b + 1, a1); - - a2 = _mm_load_si128((xmmi *)a + 2); - b2 = _mm_load_si128((xmmi *)b + 2); - b2 = _mm_xor_si128(a2, b2); - x2 = _mm_and_si128(b2, mask); - x2 = _mm_xor_si128(x2, a2); - a2 = _mm_xor_si128(x2, b2); - _mm_store_si128((xmmi *)b + 2, a2); - _mm_store_si128((xmmi *)a + 2, x2); -} - -/* out = (flag) ? out : in */ -inline void -curve25519_move_conditional_bytes(byte out[96], const byte in[96], word32 flag) { - xmmi a0,a1,a2,a3,a4,a5,b0,b1,b2,b3,b4,b5; - const word32 nb = flag - 1; - xmmi masknb = _mm_shuffle_epi32(_mm_cvtsi32_si128(nb),0); - a0 = _mm_load_si128((xmmi *)in + 0); - a1 = _mm_load_si128((xmmi *)in + 1); - a2 = _mm_load_si128((xmmi *)in + 2); - b0 = _mm_load_si128((xmmi *)out + 0); - b1 = _mm_load_si128((xmmi *)out + 1); - b2 = _mm_load_si128((xmmi *)out + 2); - a0 = _mm_andnot_si128(masknb, a0); - a1 = _mm_andnot_si128(masknb, a1); - a2 = _mm_andnot_si128(masknb, a2); - b0 = _mm_and_si128(masknb, b0); - b1 = _mm_and_si128(masknb, b1); - b2 = _mm_and_si128(masknb, b2); - a0 = _mm_or_si128(a0, b0); - a1 = _mm_or_si128(a1, b1); - a2 = _mm_or_si128(a2, b2); - _mm_store_si128((xmmi*)out + 0, a0); - _mm_store_si128((xmmi*)out + 1, a1); - _mm_store_si128((xmmi*)out + 2, a2); - - a3 = _mm_load_si128((xmmi *)in + 3); - a4 = _mm_load_si128((xmmi *)in + 4); - a5 = _mm_load_si128((xmmi *)in + 5); - b3 = _mm_load_si128((xmmi *)out + 3); - b4 = _mm_load_si128((xmmi *)out + 4); - b5 = _mm_load_si128((xmmi *)out + 5); - a3 = _mm_andnot_si128(masknb, a3); - a4 = _mm_andnot_si128(masknb, a4); - a5 = _mm_andnot_si128(masknb, a5); - b3 = _mm_and_si128(masknb, b3); - b4 = _mm_and_si128(masknb, b4); - b5 = _mm_and_si128(masknb, b5); - a3 = _mm_or_si128(a3, b3); - a4 = _mm_or_si128(a4, b4); - a5 = _mm_or_si128(a5, b5); - _mm_store_si128((xmmi*)out + 3, a3); - _mm_store_si128((xmmi*)out + 4, a4); - _mm_store_si128((xmmi*)out + 5, a5); -} - ANONYMOUS_NAMESPACE_END NAMESPACE_BEGIN(CryptoPP)