From 57109b3120b8286cdd7b4d143854f2d27873368c Mon Sep 17 00:00:00 2001 From: weidai Date: Fri, 25 Jul 2003 00:15:52 +0000 Subject: [PATCH] fix bugs in 64-bit CPU support --- blumshub.cpp | 4 +- blumshub.h | 2 +- config.h | 86 +++--- cryptlib.cpp | 2 + cryptlib.dsp | 16 +- cryptlib.h | 2 +- gf2n.cpp | 2 +- integer.cpp | 747 ++++++++++++++++++++++++++++++++------------------- integer.h | 3 + misc.h | 7 +- modes.cpp | 2 +- modes.h | 2 +- nbtheory.cpp | 40 +-- nbtheory.h | 2 +- seal.cpp | 2 +- seal.h | 2 +- strciphr.cpp | 2 +- strciphr.h | 4 +- validat1.cpp | 14 +- 19 files changed, 554 insertions(+), 387 deletions(-) diff --git a/blumshub.cpp b/blumshub.cpp index 40c654af..6e1854d8 100644 --- a/blumshub.cpp +++ b/blumshub.cpp @@ -39,9 +39,9 @@ BlumBlumShub::BlumBlumShub(const Integer &p, const Integer &q, const Integer &se { } -void BlumBlumShub::Seek(dword index) +void BlumBlumShub::Seek(lword index) { - Integer i(Integer::POSITIVE, HIGH_WORD(index), word(index)); + Integer i(Integer::POSITIVE, index); i *= 8; Integer e = a_exp_b_mod_c (2, i / maxBits + 1, (p-1)*(q-1)); current = modn.Exponentiate(x0, e); diff --git a/blumshub.h b/blumshub.h index dbbb8be4..6583e886 100644 --- a/blumshub.h +++ b/blumshub.h @@ -46,7 +46,7 @@ public: BlumBlumShub(const Integer &p, const Integer &q, const Integer &seed); bool IsRandomAccess() const {return true;} - void Seek(dword index); + void Seek(lword index); protected: const Integer p, q; diff --git a/config.h b/config.h index d4f5cb83..09774e49 100644 --- a/config.h +++ b/config.h @@ -91,77 +91,63 @@ # define __USE_W32_SOCKETS #endif -typedef unsigned char byte; // moved outside namespace for Borland C++Builder 5 +typedef unsigned char byte; // put in global namespace to avoid ambiguity with other byte typedefs NAMESPACE_BEGIN(CryptoPP) typedef unsigned short word16; - typedef unsigned int word32; +typedef unsigned int word32; #if defined(__GNUC__) || defined(__MWERKS__) -# define WORD64_AVAILABLE + #define WORD64_AVAILABLE typedef unsigned long long word64; -# define W64LIT(x) x##LL + #define W64LIT(x) x##LL #elif defined(_MSC_VER) || defined(__BCPLUSPLUS__) -# define WORD64_AVAILABLE + #define WORD64_AVAILABLE typedef unsigned __int64 word64; -# define W64LIT(x) x##ui64 + #define W64LIT(x) x##ui64 #endif -#if defined(__alpha__) || defined(__ia64__) || defined(_ARCH_PPC64) || defined(__x86_64__) || defined(__mips64) || defined(__sparc_v9__) || defined(__sparcv9) || defined(__sparc_v8__) || defined(__sparcv8) -# define CRYPTOPP_64BIT_CPU -#endif - -// defined this if your CPU is not 64-bit to use alternative code that avoids word64 -#if defined(WORD64_AVAILABLE) && !defined(CRYPTOPP_64BIT_CPU) -# define CRYPTOPP_SLOW_WORD64 -#endif - -// word should have the same size as your CPU registers -// dword should be twice as big as word - -#if (defined(__GNUC__) && !defined(__alpha)) || defined(__MWERKS__) - typedef unsigned long word; - typedef unsigned long long dword; -#elif defined(_MSC_VER) || defined(__BCPLUSPLUS__) - typedef unsigned __int32 word; - typedef unsigned __int64 dword; +// define largest word type +#ifdef WORD64_AVAILABLE + typedef word64 lword; #else - typedef unsigned int word; - typedef unsigned long dword; + typedef word32 lword; +#endif + +#if defined(__alpha__) || defined(__ia64__) || defined(_ARCH_PPC64) || defined(__x86_64__) || defined(__mips64) + // These platforms have 64-bit CPU registers. Unfortunately most C++ compilers doesn't + // allow any way to access the 64-bit by 64-bit multiply instruction without using + // assembly, so in order to use word64 as word, the assembly instruction must be defined + // in Dword::Multiply(). + typedef word32 hword; + typedef word64 word; +#else + #define CRYPTOPP_NATIVE_DWORD_AVAILABLE + #ifdef WORD64_AVAILABLE + #define CRYPTOPP_SLOW_WORD64 // defined this if your CPU is not 64-bit to use alternative code that avoids word64 + typedef word16 hword; + typedef word32 word; + typedef word64 dword; + #else + typedef word8 hword; + typedef word16 word; + typedef word32 dword; + #endif #endif const unsigned int WORD_SIZE = sizeof(word); const unsigned int WORD_BITS = WORD_SIZE * 8; -#define LOW_WORD(x) (word)(x) - -union dword_union -{ - dword_union (const dword &dw) : dw(dw) {} - dword dw; - word w[2]; -}; - -#ifdef IS_LITTLE_ENDIAN -# define HIGH_WORD(x) (dword_union(x).w[1]) -#else -# define HIGH_WORD(x) (dword_union(x).w[0]) -#endif - -// if the above HIGH_WORD macro doesn't work (if you are not sure, compile it -// and run the validation tests), try this: -// #define HIGH_WORD(x) (word)((x)>>WORD_BITS) - #if defined(_MSC_VER) || defined(__BCPLUSPLUS__) -# define INTEL_INTRINSICS -# define FAST_ROTATE + #define INTEL_INTRINSICS + #define FAST_ROTATE #elif defined(__MWERKS__) && TARGET_CPU_PPC -# define PPC_INTRINSICS -# define FAST_ROTATE + #define PPC_INTRINSICS + #define FAST_ROTATE #elif defined(__GNUC__) && defined(__i386__) // GCC does peephole optimizations which should result in using rotate instructions -# define FAST_ROTATE + #define FAST_ROTATE #endif NAMESPACE_END diff --git a/cryptlib.cpp b/cryptlib.cpp index cafaaa71..a4f972e7 100644 --- a/cryptlib.cpp +++ b/cryptlib.cpp @@ -22,7 +22,9 @@ CRYPTOPP_COMPILE_ASSERT(sizeof(word32) == 4); #ifdef WORD64_AVAILABLE CRYPTOPP_COMPILE_ASSERT(sizeof(word64) == 8); #endif +#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE CRYPTOPP_COMPILE_ASSERT(sizeof(dword) == 2*sizeof(word)); +#endif const std::string BufferedTransformation::NULL_CHANNEL; const NullNameValuePairs g_nullNameValuePairs; diff --git a/cryptlib.dsp b/cryptlib.dsp index 1205ba2b..1935989a 100644 --- a/cryptlib.dsp +++ b/cryptlib.dsp @@ -282,14 +282,6 @@ SOURCE=.\dh2.cpp # End Source File # Begin Source File -SOURCE=.\diamond.cpp -# End Source File -# Begin Source File - -SOURCE=.\diamondt.cpp -# End Source File -# Begin Source File - SOURCE=.\dll.cpp # SUBTRACT CPP /YX /Yc /Yu # End Source File @@ -748,10 +740,6 @@ SOURCE=.\dh2.h # End Source File # Begin Source File -SOURCE=.\diamond.h -# End Source File -# Begin Source File - SOURCE=.\dmac.h # End Source File # Begin Source File @@ -760,6 +748,10 @@ SOURCE=.\dsa.h # End Source File # Begin Source File +SOURCE=.\dword.h +# End Source File +# Begin Source File + SOURCE=.\ec2n.h # End Source File # Begin Source File diff --git a/cryptlib.h b/cryptlib.h index 4c0e24aa..35231047 100644 --- a/cryptlib.h +++ b/cryptlib.h @@ -496,7 +496,7 @@ public: //! returns whether this cipher supports random access virtual bool IsRandomAccess() const =0; //! for random access ciphers, seek to an absolute position - virtual void Seek(dword n) + virtual void Seek(lword n) { assert(!IsRandomAccess()); throw NotImplemented("StreamTransformation: this object doesn't support random access"); diff --git a/gf2n.cpp b/gf2n.cpp index b7b4bf9c..93d5edeb 100644 --- a/gf2n.cpp +++ b/gf2n.cpp @@ -143,7 +143,7 @@ void PolynomialMod2::Decode(BufferedTransformation &bt, unsigned int inputLen) { byte b; bt.Get(b); - reg[(i-1)/WORD_SIZE] |= b << ((i-1)%WORD_SIZE)*8; + reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8; } } diff --git a/integer.cpp b/integer.cpp index deb60f95..52042dd5 100644 --- a/integer.cpp +++ b/integer.cpp @@ -60,8 +60,6 @@ void AlignedAllocator::deallocate(void *p, size_type n) } #endif -#define MAKE_DWORD(lowWord, highWord) ((dword(highWord)< m_halfs.low); + #endif + return r; + } + + DWord operator-(word a) + { + DWord r; + #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE + r.m_whole = m_whole - a; + #else + r.m_halfs.low = m_halfs.low - a; + r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low); + #endif + return r; + } + + // returns quotient, which must fit in a word + word operator/(word divisor); + + word operator%(word a); + + bool operator!() const + { + #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE + return !m_whole; + #else + return !m_halfs.high && !m_halfs.low; + #endif + } + + word GetLowHalf() const {return m_halfs.low;} + word GetHighHalf() const {return m_halfs.high;} + word GetHighHalfAsBorrow() const {return 0-m_halfs.high;} + +private: + union + { + #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE + dword m_whole; + #endif + struct + { + #ifdef IS_LITTLE_ENDIAN + word low; + word high; + #else + word high; + word low; + #endif + } m_halfs; + }; +}; + +class Word +{ +public: + Word() {} + + Word(word value) + { + m_whole = value; + } + + Word(hword low, hword high) + { + m_whole = low | (word(high) << (WORD_BITS/2)); + } + + static Word Multiply(hword a, hword b) + { + Word r; + r.m_whole = (word)a * b; + return r; + } + + Word operator-(Word a) + { + Word r; + r.m_whole = m_whole - a.m_whole; + return r; + } + + Word operator-(hword a) + { + Word r; + r.m_whole = m_whole - a; + return r; + } + + // returns quotient, which must fit in a word + hword operator/(hword divisor) + { + return hword(m_whole / divisor); + } + + bool operator!() const + { + return !m_whole; + } + + word GetWhole() const {return m_whole;} + hword GetLowHalf() const {return hword(m_whole);} + hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));} + hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));} + +private: + word m_whole; +}; + +// do a 3 word by 2 word divide, returns quotient and leaves remainder in A +template +S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL) +{ + // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S + assert(A[2] < B1 || (A[2]==B1 && A[1] < B0)); + + // estimate the quotient: do a 2 S by 1 S divide + S Q; + if (S(B1+1) == 0) + Q = A[2]; + else + Q = D(A[1], A[2]) / S(B1+1); + + // now subtract Q*B from A + D p = D::Multiply(B0, Q); + D u = (D) A[0] - p.GetLowHalf(); + A[0] = u.GetLowHalf(); + u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q); + A[1] = u.GetLowHalf(); + A[2] += u.GetHighHalf(); + + // Q <= actual quotient, so fix it + while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0)) + { + u = (D) A[0] - B0; + A[0] = u.GetLowHalf(); + u = (D) A[1] - B1 - u.GetHighHalfAsBorrow(); + A[1] = u.GetLowHalf(); + A[2] += u.GetHighHalf(); + Q++; + assert(Q); // shouldn't overflow + } + + return Q; +} + +// do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1 +template +inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B) +{ + if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS) + return D(Ah.GetLowHalf(), Ah.GetHighHalf()); + else + { + S Q[2]; + T[0] = Al.GetLowHalf(); + T[1] = Al.GetHighHalf(); + T[2] = Ah.GetLowHalf(); + T[3] = Ah.GetHighHalf(); + Q[1] = DivideThreeWordsByTwo(T+1, B.GetLowHalf(), B.GetHighHalf()); + Q[0] = DivideThreeWordsByTwo(T, B.GetLowHalf(), B.GetHighHalf()); + return D(Q[0], Q[1]); + } +} + +// returns quotient, which must fit in a word +inline word DWord::operator/(word a) +{ + #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE + return word(m_whole / a); + #else + hword r[4]; + return DivideFourWordsByTwo(r, m_halfs.low, m_halfs.high, a).GetWhole(); + #endif +} + +inline word DWord::operator%(word a) +{ + #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE + return word(m_whole % a); + #else + if (a < (word(1) << (WORD_BITS/2))) + { + hword h = hword(a); + word r = m_halfs.high % h; + r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h; + return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h); + } + else + { + hword r[4]; + DivideFourWordsByTwo(r, m_halfs.low, m_halfs.high, a); + return Word(r[0], r[1]).GetWhole(); + } + #endif } // ******************************************************** @@ -162,69 +432,30 @@ word Portable::Add(word *C, const word *A, const word *B, unsigned int N) { assert (N%2 == 0); -#ifdef IS_LITTLE_ENDIAN - if (sizeof(dword) == sizeof(size_t)) // dword is only register size + DWord u(0, 0); + for (unsigned int i = 0; i < N; i+=2) { - dword carry = 0; - N >>= 1; - for (unsigned int i = 0; i < N; i++) - { - dword a = ((const dword *)A)[i] + carry; - dword c = a + ((const dword *)B)[i]; - ((dword *)C)[i] = c; - carry = (a < carry) | (c < a); - } - return (word)carry; - } - else -#endif - { - word carry = 0; - for (unsigned int i = 0; i < N; i+=2) - { - dword u = (dword) carry + A[i] + B[i]; - C[i] = LOW_WORD(u); - u = (dword) HIGH_WORD(u) + A[i+1] + B[i+1]; - C[i+1] = LOW_WORD(u); - carry = HIGH_WORD(u); - } - return carry; + u = DWord(A[i]) + B[i] + u.GetHighHalf(); + C[i] = u.GetLowHalf(); + u = DWord(A[i+1]) + B[i+1] + u.GetHighHalf(); + C[i+1] = u.GetLowHalf(); } + return u.GetHighHalf(); } word Portable::Subtract(word *C, const word *A, const word *B, unsigned int N) { assert (N%2 == 0); -#ifdef IS_LITTLE_ENDIAN - if (sizeof(dword) == sizeof(size_t)) // dword is only register size + DWord u(0, 0); + for (unsigned int i = 0; i < N; i+=2) { - dword borrow = 0; - N >>= 1; - for (unsigned int i = 0; i < N; i++) - { - dword a = ((const dword *)A)[i]; - dword b = a - borrow; - dword c = b - ((const dword *)B)[i]; - ((dword *)C)[i] = c; - borrow = (b > a) | (c > b); - } - return (word)borrow; - } - else -#endif - { - word borrow=0; - for (unsigned i = 0; i < N; i+=2) - { - dword u = (dword) A[i] - B[i] - borrow; - C[i] = LOW_WORD(u); - u = (dword) A[i+1] - B[i+1] - (word)(0-HIGH_WORD(u)); - C[i+1] = LOW_WORD(u); - borrow = 0-HIGH_WORD(u); - } - return borrow; + u = (DWord) A[i] - B[i] - u.GetHighHalfAsBorrow(); + C[i] = u.GetLowHalf(); + u = (DWord) A[i+1] - B[i+1] - u.GetHighHalfAsBorrow(); + C[i+1] = u.GetLowHalf(); } + return 0-u.GetHighHalf(); } void Portable::Multiply2(word *C, const word *A, const word *B) @@ -261,38 +492,28 @@ void Portable::Multiply2(word *C, const word *A, const word *B) unsigned int ai = A[1] < A[0]; unsigned int bi = B[0] < B[1]; unsigned int di = ai & bi; - dword d = (dword)D[di]*D[di+2]; + DWord d = DWord::Multiply(D[di], D[di+2]); D[1] = D[3] = 0; unsigned int si = ai + !bi; word s = D[si]; - dword A0B0 = (dword)A[0]*B[0]; - C[0] = LOW_WORD(A0B0); + DWord A0B0 = DWord::Multiply(A[0], B[0]); + C[0] = A0B0.GetLowHalf(); - dword A1B1 = (dword)A[1]*B[1]; - dword t = (dword) HIGH_WORD(A0B0) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1); - C[1] = LOW_WORD(t); + DWord A1B1 = DWord::Multiply(A[1], B[1]); + DWord t = (DWord) A0B0.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf(); + C[1] = t.GetLowHalf(); - t = A1B1 + HIGH_WORD(t) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s; - C[2] = LOW_WORD(t); - C[3] = HIGH_WORD(t); + t = A1B1 + t.GetHighHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s; + C[2] = t.GetLowHalf(); + C[3] = t.GetHighHalf(); } inline void Portable::Multiply2Bottom(word *C, const word *A, const word *B) { -#ifdef IS_LITTLE_ENDIAN - if (sizeof(dword) == sizeof(size_t)) - { - dword a = *(const dword *)A, b = *(const dword *)B; - ((dword *)C)[0] = a*b; - } - else -#endif - { - dword t = (dword)A[0]*B[0]; - C[0] = LOW_WORD(t); - C[1] = HIGH_WORD(t) + A[0]*B[1] + A[1]*B[0]; - } + DWord t = DWord::Multiply(A[0], B[0]); + C[0] = t.GetLowHalf(); + C[1] = t.GetHighHalf() + A[0]*B[1] + A[1]*B[0]; } word Portable::Multiply2Add(word *C, const word *A, const word *B) @@ -301,77 +522,77 @@ word Portable::Multiply2Add(word *C, const word *A, const word *B) unsigned int ai = A[1] < A[0]; unsigned int bi = B[0] < B[1]; unsigned int di = ai & bi; - dword d = (dword)D[di]*D[di+2]; + DWord d = DWord::Multiply(D[di], D[di+2]); D[1] = D[3] = 0; unsigned int si = ai + !bi; word s = D[si]; - dword A0B0 = (dword)A[0]*B[0]; - dword t = A0B0 + C[0]; - C[0] = LOW_WORD(t); + DWord A0B0 = DWord::Multiply(A[0], B[0]); + DWord t = A0B0 + C[0]; + C[0] = t.GetLowHalf(); - dword A1B1 = (dword)A[1]*B[1]; - t = (dword) HIGH_WORD(t) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1) + C[1]; - C[1] = LOW_WORD(t); + DWord A1B1 = DWord::Multiply(A[1], B[1]); + t = (DWord) t.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf() + C[1]; + C[1] = t.GetLowHalf(); - t = (dword) HIGH_WORD(t) + LOW_WORD(A1B1) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s + C[2]; - C[2] = LOW_WORD(t); + t = (DWord) t.GetHighHalf() + A1B1.GetLowHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s + C[2]; + C[2] = t.GetLowHalf(); - t = (dword) HIGH_WORD(t) + HIGH_WORD(A1B1) + C[3]; - C[3] = LOW_WORD(t); - return HIGH_WORD(t); + t = (DWord) t.GetHighHalf() + A1B1.GetHighHalf() + C[3]; + C[3] = t.GetLowHalf(); + return t.GetHighHalf(); } #define MulAcc(x, y) \ - p = (dword)A[x] * B[y] + c; \ - c = LOW_WORD(p); \ - p = (dword)d + HIGH_WORD(p); \ - d = LOW_WORD(p); \ - e += HIGH_WORD(p); + p = DWord::MultiplyAndAdd(A[x], B[y], c); \ + c = p.GetLowHalf(); \ + p = (DWord) d + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e += p.GetHighHalf(); #define SaveMulAcc(s, x, y) \ R[s] = c; \ - p = (dword)A[x] * B[y] + d; \ - c = LOW_WORD(p); \ - p = (dword)e + HIGH_WORD(p); \ - d = LOW_WORD(p); \ - e = HIGH_WORD(p); + p = DWord::MultiplyAndAdd(A[x], B[y], d); \ + c = p.GetLowHalf(); \ + p = (DWord) e + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e = p.GetHighHalf(); #define SquAcc(x, y) \ - q = (dword)A[x] * A[y]; \ + q = DWord::Multiply(A[x], A[y]); \ p = q + c; \ - c = LOW_WORD(p); \ - p = (dword)d + HIGH_WORD(p); \ - d = LOW_WORD(p); \ - e += HIGH_WORD(p); \ + c = p.GetLowHalf(); \ + p = (DWord) d + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e += p.GetHighHalf(); \ p = q + c; \ - c = LOW_WORD(p); \ - p = (dword)d + HIGH_WORD(p); \ - d = LOW_WORD(p); \ - e += HIGH_WORD(p); + c = p.GetLowHalf(); \ + p = (DWord) d + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e += p.GetHighHalf(); #define SaveSquAcc(s, x, y) \ R[s] = c; \ - q = (dword)A[x] * A[y]; \ + q = DWord::Multiply(A[x], A[y]); \ p = q + d; \ - c = LOW_WORD(p); \ - p = (dword)e + HIGH_WORD(p); \ - d = LOW_WORD(p); \ - e = HIGH_WORD(p); \ + c = p.GetLowHalf(); \ + p = (DWord) e + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e = p.GetHighHalf(); \ p = q + c; \ - c = LOW_WORD(p); \ - p = (dword)d + HIGH_WORD(p); \ - d = LOW_WORD(p); \ - e += HIGH_WORD(p); + c = p.GetLowHalf(); \ + p = (DWord) d + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e += p.GetHighHalf(); void Portable::Multiply4(word *R, const word *A, const word *B) { - dword p; + DWord p; word c, d, e; - p = (dword)A[0] * B[0]; - R[0] = LOW_WORD(p); - c = HIGH_WORD(p); + p = DWord::Multiply(A[0], B[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); d = e = 0; MulAcc(0, 1); @@ -394,38 +615,38 @@ void Portable::Multiply4(word *R, const word *A, const word *B) MulAcc(3, 2); R[5] = c; - p = (dword)A[3] * B[3] + d; - R[6] = LOW_WORD(p); - R[7] = e + HIGH_WORD(p); + p = DWord::MultiplyAndAdd(A[3], B[3], d); + R[6] = p.GetLowHalf(); + R[7] = e + p.GetHighHalf(); } void Portable::Square2(word *R, const word *A) { - dword p, q; + DWord p, q; word c, d, e; - p = (dword)A[0] * A[0]; - R[0] = LOW_WORD(p); - c = HIGH_WORD(p); + p = DWord::Multiply(A[0], A[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); d = e = 0; SquAcc(0, 1); R[1] = c; - p = (dword)A[1] * A[1] + d; - R[2] = LOW_WORD(p); - R[3] = e + HIGH_WORD(p); + p = DWord::MultiplyAndAdd(A[1], A[1], d); + R[2] = p.GetLowHalf(); + R[3] = e + p.GetHighHalf(); } void Portable::Square4(word *R, const word *A) { const word *B = A; - dword p, q; + DWord p, q; word c, d, e; - p = (dword)A[0] * A[0]; - R[0] = LOW_WORD(p); - c = HIGH_WORD(p); + p = DWord::Multiply(A[0], A[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); d = e = 0; SquAcc(0, 1); @@ -442,19 +663,19 @@ void Portable::Square4(word *R, const word *A) SaveSquAcc(4, 2, 3); R[5] = c; - p = (dword)A[3] * A[3] + d; - R[6] = LOW_WORD(p); - R[7] = e + HIGH_WORD(p); + p = DWord::MultiplyAndAdd(A[3], A[3], d); + R[6] = p.GetLowHalf(); + R[7] = e + p.GetHighHalf(); } void Portable::Multiply8(word *R, const word *A, const word *B) { - dword p; + DWord p; word c, d, e; - p = (dword)A[0] * B[0]; - R[0] = LOW_WORD(p); - c = HIGH_WORD(p); + p = DWord::Multiply(A[0], B[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); d = e = 0; MulAcc(0, 1); @@ -533,19 +754,19 @@ void Portable::Multiply8(word *R, const word *A, const word *B) MulAcc(7, 6); R[13] = c; - p = (dword)A[7] * B[7] + d; - R[14] = LOW_WORD(p); - R[15] = e + HIGH_WORD(p); + p = DWord::MultiplyAndAdd(A[7], B[7], d); + R[14] = p.GetLowHalf(); + R[15] = e + p.GetHighHalf(); } void Portable::Multiply4Bottom(word *R, const word *A, const word *B) { - dword p; + DWord p; word c, d, e; - p = (dword)A[0] * B[0]; - R[0] = LOW_WORD(p); - c = HIGH_WORD(p); + p = DWord::Multiply(A[0], B[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); d = e = 0; MulAcc(0, 1); @@ -561,12 +782,12 @@ void Portable::Multiply4Bottom(word *R, const word *A, const word *B) void Portable::Multiply8Bottom(word *R, const word *A, const word *B) { - dword p; + DWord p; word c, d, e; - p = (dword)A[0] * B[0]; - R[0] = LOW_WORD(p); - c = HIGH_WORD(p); + p = DWord::Multiply(A[0], B[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); d = e = 0; MulAcc(0, 1); @@ -620,6 +841,7 @@ class PentiumOptimized : public Portable public: static word __fastcall Add(word *C, const word *A, const word *B, unsigned int N); static word __fastcall Subtract(word *C, const word *A, const word *B, unsigned int N); +// TODO test this with .NET #if _MSC_VER < 1300 static inline void Square4(word *R, const word *A) { // VC60 workaround: MSVC 6.0 has an optimization bug that makes @@ -628,6 +850,7 @@ public: // bug is fixed. Multiply4(R, A, A); } +//#endif }; typedef PentiumOptimized LowLevel; @@ -1703,88 +1926,7 @@ void PentiumOptimized::Multiply8(word* Z, const word* X, const word* Y) ); } -#elif defined(__GNUC__) && defined(CRYPTOPP_64BIT_CPU) - -#ifdef __alpha__ -#define MUL64x64(a, b, c, d) c = a*b; __asm__("umulh %1,%2,%0" : "=r" (d) : "r" (a), "r" (b)) -#elif defined(__ia64__) -#define MUL64x64(a, b, c, d) c = a*b; __asm__("xmpy.hu %0=%1,%2" : "=f" (d) : "f" (a), "f" (b)) -#elif defined(_ARCH_PPC64) -#define MUL64x64(a, b, c, d) c = a*b; __asm__("mulhdu %0,%1,%2" : "=r" (d) : "r" (a), "r" (b) : "cc") -#elif defined(__x86_64__) -#define MUL64x64(a, b, c, d) __asm__("mulq %3" : "=d" (d), "=a" (c) : "a" (a), "rm" (b) : "cc") -#elif defined(__mips64) -#define MUL64x64(a, b, c, d) __asm__("dmultu %2,%3" : "=h" (d), "=l" (c) : "r" (a), "r" (b)) -#elif defined(__sparc_v9__) || defined(__sparcv9) || defined(__sparc_v8__) || defined(__sparcv8) -#define MUL64x64(a, b, c, d) __asm__("umul %2,%3,%1;rd %%y,%0" : "=r" (d), "=r" (c) : "r" (a), "r" (b)) -#endif - -class OptimizedFor64BitCPU : public Portable -{ -public: - static inline void Multiply2(word *C, const word *A, const word *B); - static inline word Multiply2Add(word *C, const word *A, const word *B); - static inline void Multiply4(word *C, const word *A, const word *B); - static inline unsigned int MultiplyRecursionLimit() {return 4;} - - static inline void Multiply4Bottom(word *C, const word *A, const word *B); - static inline unsigned int MultiplyBottomRecursionLimit() {return 4;} - - static inline void Square4(word *R, const word *A) - { - Multiply4(R, A, A); - } -}; - -typedef OptimizedFor64BitCPU LowLevel; - -inline void OptimizedFor64BitCPU::Multiply2(word *C, const word *A, const word *B) -{ - register dword c, d, a = *(const dword *)A, b = *(const dword *)B; - MUL64x64(a, b, c, d); - ((dword *)C)[0] = c; - ((dword *)C)[1] = d; -} - -inline word OptimizedFor64BitCPU::Multiply2Add(word *C, const word *A, const word *B) -{ - register dword c, d, e, a = *(const dword *)A, b = *(const dword *)B; - c = ((dword *)C)[0]; - MUL64x64(a, b, d, e); - d += c; - ((dword *)C)[0] = d; - d = (d < c); - c = ((dword *)C)[1] + d; - d = (c < d); - c += e; - ((dword *)C)[1] = c; - d |= (c < e); - return d; -} - -inline void OptimizedFor64BitCPU::Multiply4(word *R, const word *A, const word *B) -{ - Multiply2(R, A, B); - Multiply2(R+4, A+2, B+2); - word carry = Multiply2Add(R+2, A+0, B+2); - carry += Multiply2Add(R+2, A+2, B+0); - Increment(R+6, 2, carry); -} - -static inline void Multiply2BottomAdd(word *C, const word *A, const word *B) -{ - register dword a = *(const dword *)A, b = *(const dword *)B; - ((dword *)C)[0] = a*b + ((dword *)C)[0]; -} - -inline void OptimizedFor64BitCPU::Multiply4Bottom(word *R, const word *A, const word *B) -{ - Multiply2(R, A, B); - Multiply2BottomAdd(R+2, A+0, B+2); - Multiply2BottomAdd(R+2, A+2, B+0); -} - -#else // no processor specific code available +#else // no processor specific code at this layer typedef Portable LowLevel; @@ -1970,13 +2112,12 @@ void RecursiveMultiplyTop(word *R, word *T, const word *L, const word *A, const if (N==4) { P::Multiply4(T, A, B); - ((dword *)R)[0] = ((dword *)T)[2]; - ((dword *)R)[1] = ((dword *)T)[3]; + memcpy(R, T+4, 4*WORD_SIZE); } else if (N==2) { P::Multiply2(T, A, B); - ((dword *)R)[0] = ((dword *)T)[1]; + memcpy(R, T+2, 2*WORD_SIZE); } else { @@ -2088,6 +2229,18 @@ inline void MultiplyTop(word *R, word *T, const word *L, const word *A, const wo RecursiveMultiplyTop(R, T, L, A, B, N); } +static word LinearMultiply(word *C, const word *A, word B, unsigned int N) +{ + word carry=0; + for(unsigned i=0; i B1 || (A[1]==B1 && A[0]>=B0)) { - u = (dword) A[0] - B0; - A[0] = LOW_WORD(u); - u = (dword) A[1] - B1 - (word)(0-HIGH_WORD(u)); - A[1] = LOW_WORD(u); - A[2] += HIGH_WORD(u); + u = (DWord) A[0] - B0; + A[0] = u.GetLowHalf(); + u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow(); + A[1] = u.GetLowHalf(); + A[2] += u.GetHighHalf(); Q++; assert(Q); // shouldn't overflow } @@ -2318,6 +2477,27 @@ static inline void AtomicDivide(word *Q, const word *A, const word *B) #endif } } +*/ + +static inline void AtomicDivide(word *Q, const word *A, const word *B) +{ + word T[4]; + DWord q = DivideFourWordsByTwo(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1])); + Q[0] = q.GetLowHalf(); + Q[1] = q.GetHighHalf(); + +#ifndef NDEBUG + if (B[0] || B[1]) + { + // multiply quotient and divisor and add remainder, make sure it equals dividend + assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0](value)); +} + Integer::Integer(signed long value) : reg(2) { @@ -2581,7 +2768,7 @@ Integer::Integer(signed long value) value = -value; } reg[0] = word(value); - reg[1] = word(SafeRightShift(value)); + reg[1] = word(SafeRightShift((unsigned long)value)); } Integer::Integer(Sign s, word high, word low) @@ -2877,13 +3064,13 @@ void Integer::Decode(BufferedTransformation &bt, unsigned int inputLen, Signedne for (unsigned int i=inputLen; i > 0; i--) { bt.Get(b); - reg[(i-1)/WORD_SIZE] |= b << ((i-1)%WORD_SIZE)*8; + reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8; } if (sign == NEGATIVE) { for (unsigned i=inputLen; i diff --git a/modes.cpp b/modes.cpp index 0d163cb2..d0fc7d2a 100644 --- a/modes.cpp +++ b/modes.cpp @@ -38,7 +38,7 @@ void CipherModeBase::GetNextIV(byte *IV) memcpy(IV, m_register, BlockSize()); } -void CTR_ModePolicy::SeekToIteration(dword iterationCount) +void CTR_ModePolicy::SeekToIteration(lword iterationCount) { int carry=0; for (int i=BlockSize()-1; i>=0; i--) diff --git a/modes.h b/modes.h index 34dd21ab..8cdea99a 100644 --- a/modes.h +++ b/modes.h @@ -157,7 +157,7 @@ private: bool CanOperateKeystream() const {return true;} void OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, unsigned int iterationCount); void CipherResynchronize(byte *keystreamBuffer, const byte *iv); - void SeekToIteration(dword iterationCount); + void SeekToIteration(lword iterationCount); inline void ProcessMultipleBlocks(byte *output, const byte *input, unsigned int n); diff --git a/nbtheory.cpp b/nbtheory.cpp index 8c2e0423..013e4419 100644 --- a/nbtheory.cpp +++ b/nbtheory.cpp @@ -15,12 +15,12 @@ NAMESPACE_BEGIN(CryptoPP) const word s_lastSmallPrime = 32719; -std::vector * NewPrimeTable() +std::vector * NewPrimeTable() { const unsigned int maxPrimeTableSize = 3511; - std::auto_ptr > pPrimeTable(new std::vector); - std::vector &primeTable = *pPrimeTable; + std::auto_ptr > pPrimeTable(new std::vector); + std::vector &primeTable = *pPrimeTable; primeTable.reserve(maxPrimeTableSize); primeTable.push_back(2); @@ -42,9 +42,9 @@ std::vector * NewPrimeTable() return pPrimeTable.release(); } -const word * GetPrimeTable(unsigned int &size) +const word16 * GetPrimeTable(unsigned int &size) { - std::vector &primeTable = StaticObject >(&NewPrimeTable); + std::vector &primeTable = StaticObject >(&NewPrimeTable); size = primeTable.size(); return &primeTable[0]; } @@ -52,10 +52,10 @@ const word * GetPrimeTable(unsigned int &size) bool IsSmallPrime(const Integer &p) { unsigned int primeTableSize; - const word * primeTable = GetPrimeTable(primeTableSize); + const word16 * primeTable = GetPrimeTable(primeTableSize); if (p.IsPositive() && p <= primeTable[primeTableSize-1]) - return std::binary_search(primeTable, primeTable+primeTableSize, (word)p.ConvertToLong()); + return std::binary_search(primeTable, primeTable+primeTableSize, (word16)p.ConvertToLong()); else return false; } @@ -63,7 +63,7 @@ bool IsSmallPrime(const Integer &p) bool TrialDivision(const Integer &p, unsigned bound) { unsigned int primeTableSize; - const word * primeTable = GetPrimeTable(primeTableSize); + const word16 * primeTable = GetPrimeTable(primeTableSize); assert(primeTable[primeTableSize-1] >= bound); @@ -81,7 +81,7 @@ bool TrialDivision(const Integer &p, unsigned bound) bool SmallDivisorsTest(const Integer &p) { unsigned int primeTableSize; - const word * primeTable = GetPrimeTable(primeTableSize); + const word16 * primeTable = GetPrimeTable(primeTableSize); return !TrialDivision(p, primeTable[primeTableSize-1]); } @@ -278,7 +278,7 @@ public: bool NextCandidate(Integer &c); void DoSieve(); - static void SieveSingle(std::vector &sieve, word p, const Integer &first, const Integer &step, word stepInv); + static void SieveSingle(std::vector &sieve, word16 p, const Integer &first, const Integer &step, word16 stepInv); Integer m_first, m_last, m_step; signed int m_delta; @@ -315,12 +315,12 @@ bool PrimeSieve::NextCandidate(Integer &c) } } -void PrimeSieve::SieveSingle(std::vector &sieve, word p, const Integer &first, const Integer &step, word stepInv) +void PrimeSieve::SieveSingle(std::vector &sieve, word16 p, const Integer &first, const Integer &step, word16 stepInv) { if (stepInv) { unsigned int sieveSize = sieve.size(); - word j = word((dword(p-(first%p))*stepInv) % p); + word j = word((word32(p-(first%p))*stepInv) % p); // if the first multiple of p is p, skip it if (first.WordCount() <= 1 && first + step*j == p) j += p; @@ -332,7 +332,7 @@ void PrimeSieve::SieveSingle(std::vector &sieve, word p, const Integer &fi void PrimeSieve::DoSieve() { unsigned int primeTableSize; - const word * primeTable = GetPrimeTable(primeTableSize); + const word16 * primeTable = GetPrimeTable(primeTableSize); const unsigned int maxSieveSize = 32768; unsigned int sieveSize = STDMIN(Integer(maxSieveSize), (m_last-m_first)/m_step+1).ConvertToLong(); @@ -352,11 +352,11 @@ void PrimeSieve::DoSieve() Integer halfStep = m_step >> 1; for (unsigned int i = 0; i < primeTableSize; ++i) { - word p = primeTable[i]; - word stepInv = m_step.InverseMod(p); + word16 p = primeTable[i]; + word16 stepInv = m_step.InverseMod(p); SieveSingle(m_sieve, p, m_first, m_step, stepInv); - word halfStepInv = 2*stepInv < p ? 2*stepInv : 2*stepInv-p; + word16 halfStepInv = 2*stepInv < p ? 2*stepInv : 2*stepInv-p; SieveSingle(m_sieve, p, qFirst, halfStep, halfStepInv); } } @@ -380,11 +380,11 @@ bool FirstPrime(Integer &p, const Integer &max, const Integer &equiv, const Inte } unsigned int primeTableSize; - const word * primeTable = GetPrimeTable(primeTableSize); + const word16 * primeTable = GetPrimeTable(primeTableSize); if (p <= primeTable[primeTableSize-1]) { - const word *pItr; + const word16 *pItr; --p; if (p.IsPositive()) @@ -441,7 +441,7 @@ static bool ProvePrime(const Integer &p, const Integer &q) return false; unsigned int primeTableSize; - const word * primeTable = GetPrimeTable(primeTableSize); + const word16 * primeTable = GetPrimeTable(primeTableSize); assert(primeTableSize >= 50); for (int i=0; i<50; i++) @@ -499,7 +499,7 @@ Integer MaurerProvablePrime(RandomNumberGenerator &rng, unsigned int bits) Integer p; unsigned int primeTableSize; - const word * primeTable = GetPrimeTable(primeTableSize); + const word16 * primeTable = GetPrimeTable(primeTableSize); if (bits < smallPrimeBound) { diff --git a/nbtheory.h b/nbtheory.h index cb953f24..c731c508 100644 --- a/nbtheory.h +++ b/nbtheory.h @@ -9,7 +9,7 @@ NAMESPACE_BEGIN(CryptoPP) // obtain pointer to small prime table and get its size -CRYPTOPP_DLL const word * GetPrimeTable(unsigned int &size); +CRYPTOPP_DLL const word16 * GetPrimeTable(unsigned int &size); // ************ primality testing **************** diff --git a/seal.cpp b/seal.cpp index a478f9a5..0962c67f 100644 --- a/seal.cpp +++ b/seal.cpp @@ -75,7 +75,7 @@ void SEAL_Policy::CipherResynchronize(byte *keystreamBuffer, const byte *IV) } template -void SEAL_Policy::SeekToIteration(dword iterationCount) +void SEAL_Policy::SeekToIteration(lword iterationCount) { m_outsideCounter = m_startCount + (unsigned int)(iterationCount / m_iterationsPerCount); m_insideCounter = (unsigned int)(iterationCount % m_iterationsPerCount); diff --git a/seal.h b/seal.h index 2e190026..9157720a 100644 --- a/seal.h +++ b/seal.h @@ -23,7 +23,7 @@ protected: void OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, unsigned int iterationCount); void CipherResynchronize(byte *keystreamBuffer, const byte *IV); bool IsRandomAccess() const {return true;} - void SeekToIteration(dword iterationCount); + void SeekToIteration(lword iterationCount); private: FixedSizeSecBlock m_T; diff --git a/strciphr.cpp b/strciphr.cpp index 1f03d4c5..3394b204 100644 --- a/strciphr.cpp +++ b/strciphr.cpp @@ -91,7 +91,7 @@ void AdditiveCipherTemplate::Resynchronize(const byte *iv) } template -void AdditiveCipherTemplate::Seek(dword position) +void AdditiveCipherTemplate::Seek(lword position) { PolicyInterface &policy = AccessPolicy(); unsigned int bytesPerIteration = policy.GetBytesPerIteration(); diff --git a/strciphr.h b/strciphr.h index 3e0a739a..eb1d22fe 100644 --- a/strciphr.h +++ b/strciphr.h @@ -66,7 +66,7 @@ struct CRYPTOPP_DLL CRYPTOPP_NO_VTABLE AdditiveCipherAbstractPolicy virtual void CipherSetKey(const NameValuePairs ¶ms, const byte *key, unsigned int length) =0; virtual void CipherResynchronize(byte *keystreamBuffer, const byte *iv) {throw NotImplemented("StreamTransformation: this object doesn't support resynchronization");} virtual bool IsRandomAccess() const =0; - virtual void SeekToIteration(dword iterationCount) {assert(!IsRandomAccess()); throw NotImplemented("StreamTransformation: this object doesn't support random access");} + virtual void SeekToIteration(lword iterationCount) {assert(!IsRandomAccess()); throw NotImplemented("StreamTransformation: this object doesn't support random access");} }; template @@ -130,7 +130,7 @@ public: bool IsSelfInverting() const {return true;} bool IsForwardTransformation() const {return true;} bool IsRandomAccess() const {return GetPolicy().IsRandomAccess();} - void Seek(dword position); + void Seek(lword position); typedef typename BASE::PolicyInterface PolicyInterface; diff --git a/validat1.cpp b/validat1.cpp index 1342de53..6c0aee3f 100644 --- a/validat1.cpp +++ b/validat1.cpp @@ -188,7 +188,7 @@ bool TestSettings() pass = false; } cout << "sizeof(word64) == " << sizeof(word64) << endl; -#else +#elif CRYPTOPP_NATIVE_DWORD_AVAILABLE if (sizeof(dword) >= 8) { cout << "FAILED: sizeof(dword) >= 8, but WORD64_AVAILABLE not defined" << endl; @@ -198,6 +198,7 @@ bool TestSettings() cout << "passed: word64 not available" << endl; #endif +#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE if (sizeof(dword) == 2*sizeof(word)) cout << "passed: "; else @@ -206,16 +207,7 @@ bool TestSettings() pass = false; } cout << "sizeof(word) == " << sizeof(word) << ", sizeof(dword) == " << sizeof(dword) << endl; - - dword test = (dword(1)<