optimizations

pull/2/head
weidai 2007-04-16 00:21:07 +00:00
parent 199c796021
commit 5cad605559
6 changed files with 209 additions and 58 deletions

View File

@ -22,7 +22,7 @@ GF2_32::Element GF2_32::Multiply(Element a, Element b) const
table[3] = m_modulus ^ (a<<1); table[3] = m_modulus ^ (a<<1);
} }
#ifdef FAST_ROTATE #if CRYPTOPP_FAST_ROTATE(32)
b = rotrFixed(b, 30U); b = rotrFixed(b, 30U);
word32 result = table[b&2]; word32 result = table[b&2];

244
misc.h
View File

@ -4,14 +4,39 @@
#include "cryptlib.h" #include "cryptlib.h"
#include "smartptr.h" #include "smartptr.h"
#ifdef INTEL_INTRINSICS #ifdef _MSC_VER
#include <stdlib.h> #include <stdlib.h>
#if _MSC_VER >= 1400
// VC2005 workaround: disable declarations that conflict with winnt.h
#define _interlockedbittestandset CRYPTOPP_DISABLED_INTRINSIC_1
#define _interlockedbittestandreset CRYPTOPP_DISABLED_INTRINSIC_2
#include <intrin.h>
#undef _interlockedbittestandset
#undef _interlockedbittestandreset
#define CRYPTOPP_FAST_ROTATE(x) 1
#elif _MSC_VER >= 1300
#define CRYPTOPP_FAST_ROTATE(x) ((x) == 32 | (x) == 64)
#else
#define CRYPTOPP_FAST_ROTATE(x) ((x) == 32)
#endif
#elif (defined(__MWERKS__) && TARGET_CPU_PPC) || \
(defined(__GNUC__) && (defined(_ARCH_PWR2) || defined(_ARCH_PWR) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || defined(_ARCH_COM)))
#define CRYPTOPP_FAST_ROTATE(x) ((x) == 32)
#elif defined(__GNUC__) && (CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86) // depend on GCC's peephole optimization to generate rotate instructions
#define CRYPTOPP_FAST_ROTATE(x) 1
#elif
#define CRYPTOPP_FAST_ROTATE(x) 0
#endif #endif
#ifdef __BORLANDC__ #ifdef __BORLANDC__
#include <mem.h> #include <mem.h>
#endif #endif
#if defined(__GNUC__) && !defined(__sun__) && !defined(__MINGW32__)
#define CRYPTOPP_BYTESWAP_AVAILABLE
#include <byteswap.h>
#endif
NAMESPACE_BEGIN(CryptoPP) NAMESPACE_BEGIN(CryptoPP)
// ************** compile-time assertion *************** // ************** compile-time assertion ***************
@ -289,8 +314,13 @@ inline T1 RoundUpToMultipleOf(const T1 &n, const T2 &m)
} }
template <class T> template <class T>
inline unsigned int GetAlignment(T *dummy=NULL) // VC60 workaround inline unsigned int GetAlignmentOf(T *dummy=NULL) // VC60 workaround
{ {
#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86
if (sizeof(T) < 16)
return 1; // alignment not needed on x86 and x64
#endif
#if (_MSC_VER >= 1300) #if (_MSC_VER >= 1300)
return __alignof(T); return __alignof(T);
#elif defined(__GNUC__) #elif defined(__GNUC__)
@ -304,13 +334,13 @@ inline unsigned int GetAlignment(T *dummy=NULL) // VC60 workaround
inline bool IsAlignedOn(const void *p, unsigned int alignment) inline bool IsAlignedOn(const void *p, unsigned int alignment)
{ {
return IsPowerOf2(alignment) ? ModPowerOf2((size_t)p, alignment) == 0 : (size_t)p % alignment == 0; return alignment==1 || (IsPowerOf2(alignment) ? ModPowerOf2((size_t)p, alignment) == 0 : (size_t)p % alignment == 0);
} }
template <class T> template <class T>
inline bool IsAligned(const void *p, T *dummy=NULL) // VC60 workaround inline bool IsAligned(const void *p, T *dummy=NULL) // VC60 workaround
{ {
return IsAlignedOn(p, GetAlignment<T>()); return IsAlignedOn(p, GetAlignmentOf<T>());
} }
#ifdef IS_LITTLE_ENDIAN #ifdef IS_LITTLE_ENDIAN
@ -418,31 +448,29 @@ template <class T> inline T rotrMod(T x, unsigned int y)
return T((x>>y) | (x<<(sizeof(T)*8-y))); return T((x>>y) | (x<<(sizeof(T)*8-y)));
} }
#ifdef INTEL_INTRINSICS #ifdef _MSC_VER
#pragma intrinsic(_lrotl, _lrotr)
template<> inline word32 rotlFixed<word32>(word32 x, unsigned int y) template<> inline word32 rotlFixed<word32>(word32 x, unsigned int y)
{ {
assert(y < 32); assert(y < 8*sizeof(x));
return y ? _lrotl(x, y) : x; return y ? _lrotl(x, y) : x;
} }
template<> inline word32 rotrFixed<word32>(word32 x, unsigned int y) template<> inline word32 rotrFixed<word32>(word32 x, unsigned int y)
{ {
assert(y < 32); assert(y < 8*sizeof(x));
return y ? _lrotr(x, y) : x; return y ? _lrotr(x, y) : x;
} }
template<> inline word32 rotlVariable<word32>(word32 x, unsigned int y) template<> inline word32 rotlVariable<word32>(word32 x, unsigned int y)
{ {
assert(y < 32); assert(y < 8*sizeof(x));
return _lrotl(x, y); return _lrotl(x, y);
} }
template<> inline word32 rotrVariable<word32>(word32 x, unsigned int y) template<> inline word32 rotrVariable<word32>(word32 x, unsigned int y)
{ {
assert(y < 32); assert(y < 8*sizeof(x));
return _lrotr(x, y); return _lrotr(x, y);
} }
@ -456,9 +484,119 @@ template<> inline word32 rotrMod<word32>(word32 x, unsigned int y)
return _lrotr(x, y); return _lrotr(x, y);
} }
#endif // #ifdef INTEL_INTRINSICS #if _MSC_VER >= 1300
#ifdef PPC_INTRINSICS template<> inline word64 rotlFixed<word64>(word64 x, unsigned int y)
{
assert(y < 8*sizeof(x));
return y ? _rotl64(x, y) : x;
}
template<> inline word64 rotrFixed<word64>(word64 x, unsigned int y)
{
assert(y < 8*sizeof(x));
return y ? _rotr64(x, y) : x;
}
template<> inline word64 rotlVariable<word64>(word64 x, unsigned int y)
{
assert(y < 8*sizeof(x));
return _rotl64(x, y);
}
template<> inline word64 rotrVariable<word64>(word64 x, unsigned int y)
{
assert(y < 8*sizeof(x));
return _rotr64(x, y);
}
template<> inline word64 rotlMod<word64>(word64 x, unsigned int y)
{
return _rotl64(x, y);
}
template<> inline word64 rotrMod<word64>(word64 x, unsigned int y)
{
return _rotr64(x, y);
}
#endif // #if _MSC_VER >= 1310
#if _MSC_VER >= 1400 && (!defined(__INTEL_COMPILER) || __INTEL_COMPILER >= 1000)
template<> inline word16 rotlFixed<word16>(word16 x, unsigned int y)
{
assert(y < 8*sizeof(x));
return y ? _rotl16(x, y) : x;
}
template<> inline word16 rotrFixed<word16>(word16 x, unsigned int y)
{
assert(y < 8*sizeof(x));
return y ? _rotr16(x, y) : x;
}
template<> inline word16 rotlVariable<word16>(word16 x, unsigned int y)
{
assert(y < 8*sizeof(x));
return _rotl16(x, y);
}
template<> inline word16 rotrVariable<word16>(word16 x, unsigned int y)
{
assert(y < 8*sizeof(x));
return _rotr16(x, y);
}
template<> inline word16 rotlMod<word16>(word16 x, unsigned int y)
{
return _rotl16(x, y);
}
template<> inline word16 rotrMod<word16>(word16 x, unsigned int y)
{
return _rotr16(x, y);
}
template<> inline byte rotlFixed<byte>(byte x, unsigned int y)
{
assert(y < 8*sizeof(x));
return y ? _rotl8(x, y) : x;
}
template<> inline byte rotrFixed<byte>(byte x, unsigned int y)
{
assert(y < 8*sizeof(x));
return y ? _rotr8(x, y) : x;
}
template<> inline byte rotlVariable<byte>(byte x, unsigned int y)
{
assert(y < 8*sizeof(x));
return _rotl8(x, y);
}
template<> inline byte rotrVariable<byte>(byte x, unsigned int y)
{
assert(y < 8*sizeof(x));
return _rotr8(x, y);
}
template<> inline byte rotlMod<byte>(byte x, unsigned int y)
{
return _rotl8(x, y);
}
template<> inline byte rotrMod<byte>(byte x, unsigned int y)
{
return _rotr8(x, y);
}
#endif // #if _MSC_VER >= 1400
#endif // #ifdef _MSC_VER
#if (defined(__MWERKS__) && TARGET_CPU_PPC)
template<> inline word32 rotlFixed<word32>(word32 x, unsigned int y) template<> inline word32 rotlFixed<word32>(word32 x, unsigned int y)
{ {
@ -494,7 +632,7 @@ template<> inline word32 rotrMod<word32>(word32 x, unsigned int y)
return (__rlwnm(x,32-y,0,31)); return (__rlwnm(x,32-y,0,31));
} }
#endif // #ifdef PPC_INTRINSICS #endif // #if (defined(__MWERKS__) && TARGET_CPU_PPC)
// ************** endian reversal *************** // ************** endian reversal ***************
@ -514,15 +652,27 @@ inline byte ByteReverse(byte value)
inline word16 ByteReverse(word16 value) inline word16 ByteReverse(word16 value)
{ {
#ifdef CRYPTOPP_BYTESWAP_AVAILABLE
return bswap_16(value);
#elif defined(_MSC_VER) && _MSC_VER >= 1300
return _byteswap_ushort(value);
#else
return rotlFixed(value, 8U); return rotlFixed(value, 8U);
#endif
} }
inline word32 ByteReverse(word32 value) inline word32 ByteReverse(word32 value)
{ {
#ifdef PPC_INTRINSICS #if defined(__GNUC__) && defined(CRYPTOPP_X86_ASM_AVAILABLE)
// PPC: load reverse indexed instruction __asm__ ("bswap %0" : "=r" (value) : "0" (value));
return value;
#elif defined(CRYPTOPP_BYTESWAP_AVAILABLE)
return bswap_32(value);
#elif defined(__MWERKS__) && TARGET_CPU_PPC
return (word32)__lwbrx(&value,0); return (word32)__lwbrx(&value,0);
#elif defined(FAST_ROTATE) #elif defined(_MSC_VER) && _MSC_VER >= 1300
return _byteswap_ulong(value);
#elif CRYPTOPP_FAST_ROTATE(32)
// 5 instructions with rotate instruction, 9 without // 5 instructions with rotate instruction, 9 without
return (rotrFixed(value, 8U) & 0xff00ff00) | (rotlFixed(value, 8U) & 0x00ff00ff); return (rotrFixed(value, 8U) & 0xff00ff00) | (rotlFixed(value, 8U) & 0x00ff00ff);
#else #else
@ -535,7 +685,14 @@ inline word32 ByteReverse(word32 value)
#ifdef WORD64_AVAILABLE #ifdef WORD64_AVAILABLE
inline word64 ByteReverse(word64 value) inline word64 ByteReverse(word64 value)
{ {
#ifdef CRYPTOPP_SLOW_WORD64 #if defined(__GNUC__) && defined(CRYPTOPP_X86_ASM_AVAILABLE) && defined(__x86_64__)
__asm__ ("bswap %0" : "=r" (value) : "0" (value));
return value;
#elif defined(CRYPTOPP_BYTESWAP_AVAILABLE)
return bswap_64(value);
#elif defined(_MSC_VER) && _MSC_VER >= 1300
return _byteswap_uint64(value);
#elif defined(CRYPTOPP_SLOW_WORD64)
return (word64(ByteReverse(word32(value))) << 32) | ByteReverse(word32(value>>32)); return (word64(ByteReverse(word32(value))) << 32) | ByteReverse(word32(value>>32));
#else #else
value = ((value & W64LIT(0xFF00FF00FF00FF00)) >> 8) | ((value & W64LIT(0x00FF00FF00FF00FF)) << 8); value = ((value & W64LIT(0xFF00FF00FF00FF00)) >> 8) | ((value & W64LIT(0x00FF00FF00FF00FF)) << 8);
@ -637,6 +794,7 @@ inline void GetUserKey(ByteOrder order, T *out, size_t outlen, const byte *in, s
ConditionalByteReverse(order, out, out, RoundUpToMultipleOf(inlen, U)); ConditionalByteReverse(order, out, out, RoundUpToMultipleOf(inlen, U));
} }
#ifndef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
inline byte UnalignedGetWordNonTemplate(ByteOrder order, const byte *block, byte*) inline byte UnalignedGetWordNonTemplate(ByteOrder order, const byte *block, byte*)
{ {
return block[0]; return block[0];
@ -681,18 +839,12 @@ inline word64 UnalignedGetWordNonTemplate(ByteOrder order, const byte *block, wo
} }
#endif #endif
template <class T> inline void UnalignedPutWordNonTemplate(ByteOrder order, byte *block, byte value, const byte *xorBlock)
inline T UnalignedGetWord(ByteOrder order, const byte *block, T*dummy=NULL)
{
return UnalignedGetWordNonTemplate(order, block, dummy);
}
inline void UnalignedPutWord(ByteOrder order, byte *block, byte value, const byte *xorBlock = NULL)
{ {
block[0] = xorBlock ? (value ^ xorBlock[0]) : value; block[0] = xorBlock ? (value ^ xorBlock[0]) : value;
} }
inline void UnalignedPutWord(ByteOrder order, byte *block, word16 value, const byte *xorBlock = NULL) inline void UnalignedPutWordNonTemplate(ByteOrder order, byte *block, word16 value, const byte *xorBlock)
{ {
if (order == BIG_ENDIAN_ORDER) if (order == BIG_ENDIAN_ORDER)
{ {
@ -712,7 +864,7 @@ inline void UnalignedPutWord(ByteOrder order, byte *block, word16 value, const b
} }
} }
inline void UnalignedPutWord(ByteOrder order, byte *block, word32 value, const byte *xorBlock = NULL) inline void UnalignedPutWordNonTemplate(ByteOrder order, byte *block, word32 value, const byte *xorBlock)
{ {
if (order == BIG_ENDIAN_ORDER) if (order == BIG_ENDIAN_ORDER)
{ {
@ -739,7 +891,7 @@ inline void UnalignedPutWord(ByteOrder order, byte *block, word32 value, const b
} }
#ifdef WORD64_AVAILABLE #ifdef WORD64_AVAILABLE
inline void UnalignedPutWord(ByteOrder order, byte *block, word64 value, const byte *xorBlock = NULL) inline void UnalignedPutWordNonTemplate(ByteOrder order, byte *block, word64 value, const byte *xorBlock)
{ {
if (order == BIG_ENDIAN_ORDER) if (order == BIG_ENDIAN_ORDER)
{ {
@ -777,17 +929,17 @@ inline void UnalignedPutWord(ByteOrder order, byte *block, word64 value, const b
} }
} }
#endif #endif
#endif // #ifndef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
template <class T> template <class T>
inline T GetWord(bool assumeAligned, ByteOrder order, const byte *block) inline T GetWord(bool assumeAligned, ByteOrder order, const byte *block)
{ {
if (assumeAligned) #ifndef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
{ if (!assumeAligned)
assert(IsAligned<T>(block)); return UnalignedGetWordNonTemplate(order, block);
return ConditionalByteReverse(order, *reinterpret_cast<const T *>(block)); assert(IsAligned<T>(block));
} #endif
else return ConditionalByteReverse(order, *reinterpret_cast<const T *>(block));
return UnalignedGetWord<T>(order, block);
} }
template <class T> template <class T>
@ -799,17 +951,13 @@ inline void GetWord(bool assumeAligned, ByteOrder order, T &result, const byte *
template <class T> template <class T>
inline void PutWord(bool assumeAligned, ByteOrder order, byte *block, T value, const byte *xorBlock = NULL) inline void PutWord(bool assumeAligned, ByteOrder order, byte *block, T value, const byte *xorBlock = NULL)
{ {
if (assumeAligned) #ifndef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
{ if (!assumeAligned)
assert(IsAligned<T>(block)); return UnalignedGetWordNonTemplate(order, block, value, xorBlock);
assert(IsAligned<T>(xorBlock)); assert(IsAligned<T>(block));
if (xorBlock) assert(IsAligned<T>(xorBlock));
*reinterpret_cast<T *>(block) = ConditionalByteReverse(order, value) ^ *reinterpret_cast<const T *>(xorBlock); #endif
else *reinterpret_cast<T *>(block) = ConditionalByteReverse(order, value) ^ (xorBlock ? *reinterpret_cast<const T *>(xorBlock) : 0);
*reinterpret_cast<T *>(block) = ConditionalByteReverse(order, value);
}
else
UnalignedPutWord(order, block, value, xorBlock);
} }
template <class T, class B, bool A=true> template <class T, class B, bool A=true>
@ -927,4 +1075,4 @@ inline T SafeLeftShift(T value)
NAMESPACE_END NAMESPACE_END
#endif // MISC_H #endif

View File

@ -71,7 +71,7 @@ void SEAL_Policy<B>::CipherSetKey(const NameValuePairs &params, const byte *key,
template <class B> template <class B>
void SEAL_Policy<B>::CipherResynchronize(byte *keystreamBuffer, const byte *IV) void SEAL_Policy<B>::CipherResynchronize(byte *keystreamBuffer, const byte *IV)
{ {
m_outsideCounter = IV ? UnalignedGetWord<word32>(BIG_ENDIAN_ORDER, IV) : 0; m_outsideCounter = IV ? GetWord<word32>(false, BIG_ENDIAN_ORDER, IV) : 0;
m_startCount = m_outsideCounter; m_startCount = m_outsideCounter;
m_insideCounter = 0; m_insideCounter = 0;
} }
@ -86,7 +86,6 @@ void SEAL_Policy<B>::SeekToIteration(lword iterationCount)
template <class B> template <class B>
void SEAL_Policy<B>::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount) void SEAL_Policy<B>::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
{ {
KeystreamOutput<B> keystreamOutput(operation, output, input);
word32 a, b, c, d, n1, n2, n3, n4; word32 a, b, c, d, n1, n2, n3, n4;
unsigned int p, q; unsigned int p, q;
@ -175,10 +174,13 @@ void SEAL_Policy<B>::OperateKeystream(KeystreamOperation operation, byte *output
d = rotrFixed(d, 9U); d = rotrFixed(d, 9U);
a += Ttab(q); a += Ttab(q);
keystreamOutput (b + m_S[4*i+0]) #define SEAL_OUTPUT(x) \
(c ^ m_S[4*i+1]) CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 0, b + m_S[4*i+0]);\
(d + m_S[4*i+2]) CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 1, c ^ m_S[4*i+1]);\
(a ^ m_S[4*i+3]); CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 2, d + m_S[4*i+2]);\
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 3, a ^ m_S[4*i+3]);
CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SEAL_OUTPUT, 4*4);
if (i & 1) if (i & 1)
{ {

2
seal.h
View File

@ -18,7 +18,7 @@ class CRYPTOPP_NO_VTABLE SEAL_Policy : public AdditiveCipherConcretePolicy<word3
protected: protected:
void CipherSetKey(const NameValuePairs &params, const byte *key, size_t length); void CipherSetKey(const NameValuePairs &params, const byte *key, size_t length);
void OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount); void OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount);
void CipherGetNextIV(byte *IV) {UnalignedPutWord(BIG_ENDIAN_ORDER, IV, m_outsideCounter+1);} void CipherGetNextIV(byte *IV) {PutWord(false, BIG_ENDIAN_ORDER, IV, m_outsideCounter+1);}
void CipherResynchronize(byte *keystreamBuffer, const byte *IV); void CipherResynchronize(byte *keystreamBuffer, const byte *IV);
bool IsRandomAccess() const {return true;} bool IsRandomAccess() const {return true;}
void SeekToIteration(lword iterationCount); void SeekToIteration(lword iterationCount);

View File

@ -26,6 +26,8 @@ public:
TruncatedHashTemplate(size_t digestSize) TruncatedHashTemplate(size_t digestSize)
: m_digestSize(digestSize) {} : m_digestSize(digestSize) {}
void Restart()
{m_hm.Restart();}
void Update(const byte *input, size_t length) void Update(const byte *input, size_t length)
{m_hm.Update(input, length);} {m_hm.Update(input, length);}
unsigned int DigestSize() const {return m_digestSize;} unsigned int DigestSize() const {return m_digestSize;}

View File

@ -20,8 +20,7 @@ inline void SetWords(word *r, word a, size_t n)
inline void CopyWords(word *r, const word *a, size_t n) inline void CopyWords(word *r, const word *a, size_t n)
{ {
for (size_t i=0; i<n; i++) memcpy(r, a, n*WORD_SIZE);
r[i] = a[i];
} }
inline void XorWords(word *r, const word *a, const word *b, size_t n) inline void XorWords(word *r, const word *a, const word *b, size_t n)