MMX/SSE2 optimizations
parent
3b89824be3
commit
643b302227
3305
integer.cpp
3305
integer.cpp
File diff suppressed because it is too large
Load Diff
37
integer.h
37
integer.h
|
|
@ -11,44 +11,13 @@
|
|||
|
||||
NAMESPACE_BEGIN(CryptoPP)
|
||||
|
||||
#if defined(SSE2_INTRINSICS_AVAILABLE)
|
||||
template <class T>
|
||||
class AlignedAllocator : public AllocatorBase<T>
|
||||
{
|
||||
public:
|
||||
CRYPTOPP_INHERIT_ALLOCATOR_TYPES
|
||||
|
||||
pointer allocate(size_type n, const void *);
|
||||
void deallocate(void *p, size_type n);
|
||||
pointer reallocate(T *p, size_type oldSize, size_type newSize, bool preserve)
|
||||
{
|
||||
return StandardReallocate(*this, p, oldSize, newSize, preserve);
|
||||
}
|
||||
|
||||
#if !(defined(CRYPTOPP_MALLOC_ALIGNMENT_IS_16) || defined(CRYPTOPP_MEMALIGN_AVAILABLE) || defined(CRYPTOPP_MM_MALLOC_AVAILABLE))
|
||||
#define CRYPTOPP_NO_ALIGNED_ALLOC
|
||||
AlignedAllocator() : m_pBlock(NULL) {}
|
||||
protected:
|
||||
void *m_pBlock;
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef CRYPTOPP_IMPORTS
|
||||
CRYPTOPP_DLL_TEMPLATE_CLASS AlignedAllocator<word>;
|
||||
#endif
|
||||
|
||||
typedef SecBlock<word, AlignedAllocator<word> > SecAlignedWordBlock;
|
||||
#else
|
||||
typedef SecWordBlock SecAlignedWordBlock;
|
||||
#endif
|
||||
|
||||
void CRYPTOPP_DLL CRYPTOPP_API DisableSSE2();
|
||||
|
||||
struct InitializeInteger // used to initialize static variables
|
||||
{
|
||||
InitializeInteger();
|
||||
};
|
||||
|
||||
typedef SecBlock<word, AllocatorWithCleanup<word, CRYPTOPP_BOOL_X86> > IntegerSecBlock;
|
||||
|
||||
//! multiple precision integer and basic arithmetics
|
||||
/*! This class can represent positive and negative integers
|
||||
with absolute value less than (256**sizeof(word)) ** (256**sizeof(int)).
|
||||
|
|
@ -406,7 +375,7 @@ private:
|
|||
friend void PositiveMultiply(Integer &product, const Integer &a, const Integer &b);
|
||||
friend void PositiveDivide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor);
|
||||
|
||||
SecAlignedWordBlock reg;
|
||||
IntegerSecBlock reg;
|
||||
Sign sign;
|
||||
};
|
||||
|
||||
|
|
|
|||
570
rijndael.cpp
570
rijndael.cpp
|
|
@ -51,10 +51,7 @@ being unloaded from L1 cache, until that round is finished.
|
|||
|
||||
#include "rijndael.h"
|
||||
#include "misc.h"
|
||||
|
||||
#ifdef CRYPTOPP_L1_CACHE_ALIGN_NOT_AVAILABLE
|
||||
#pragma message("Don't know how to align data on L1 cache boundary. Defense against AES timing attack may be affected.")
|
||||
#endif
|
||||
#include "cpu.h"
|
||||
|
||||
NAMESPACE_BEGIN(CryptoPP)
|
||||
|
||||
|
|
@ -122,25 +119,25 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, c
|
|||
for (i = 1; i < m_rounds; i++) {
|
||||
rk += 4;
|
||||
rk[0] =
|
||||
Td0[Se[GETBYTE(rk[0], 3)]] ^
|
||||
Td1[Se[GETBYTE(rk[0], 2)]] ^
|
||||
Td2[Se[GETBYTE(rk[0], 1)]] ^
|
||||
Td3[Se[GETBYTE(rk[0], 0)]];
|
||||
Td[0*256+Se[GETBYTE(rk[0], 3)]] ^
|
||||
Td[1*256+Se[GETBYTE(rk[0], 2)]] ^
|
||||
Td[2*256+Se[GETBYTE(rk[0], 1)]] ^
|
||||
Td[3*256+Se[GETBYTE(rk[0], 0)]];
|
||||
rk[1] =
|
||||
Td0[Se[GETBYTE(rk[1], 3)]] ^
|
||||
Td1[Se[GETBYTE(rk[1], 2)]] ^
|
||||
Td2[Se[GETBYTE(rk[1], 1)]] ^
|
||||
Td3[Se[GETBYTE(rk[1], 0)]];
|
||||
Td[0*256+Se[GETBYTE(rk[1], 3)]] ^
|
||||
Td[1*256+Se[GETBYTE(rk[1], 2)]] ^
|
||||
Td[2*256+Se[GETBYTE(rk[1], 1)]] ^
|
||||
Td[3*256+Se[GETBYTE(rk[1], 0)]];
|
||||
rk[2] =
|
||||
Td0[Se[GETBYTE(rk[2], 3)]] ^
|
||||
Td1[Se[GETBYTE(rk[2], 2)]] ^
|
||||
Td2[Se[GETBYTE(rk[2], 1)]] ^
|
||||
Td3[Se[GETBYTE(rk[2], 0)]];
|
||||
Td[0*256+Se[GETBYTE(rk[2], 3)]] ^
|
||||
Td[1*256+Se[GETBYTE(rk[2], 2)]] ^
|
||||
Td[2*256+Se[GETBYTE(rk[2], 1)]] ^
|
||||
Td[3*256+Se[GETBYTE(rk[2], 0)]];
|
||||
rk[3] =
|
||||
Td0[Se[GETBYTE(rk[3], 3)]] ^
|
||||
Td1[Se[GETBYTE(rk[3], 2)]] ^
|
||||
Td2[Se[GETBYTE(rk[3], 1)]] ^
|
||||
Td3[Se[GETBYTE(rk[3], 0)]];
|
||||
Td[0*256+Se[GETBYTE(rk[3], 3)]] ^
|
||||
Td[1*256+Se[GETBYTE(rk[3], 2)]] ^
|
||||
Td[2*256+Se[GETBYTE(rk[3], 1)]] ^
|
||||
Td[3*256+Se[GETBYTE(rk[3], 0)]];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -148,14 +145,244 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, c
|
|||
ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key + m_rounds*4, m_key + m_rounds*4, 16);
|
||||
}
|
||||
|
||||
const static unsigned int s_lineSizeDiv4 = CRYPTOPP_L1_CACHE_LINE_SIZE/4;
|
||||
#ifdef IS_BIG_ENDIAN
|
||||
const static unsigned int s_i3=3, s_i2=2, s_i1=1, s_i0=0;
|
||||
#else
|
||||
const static unsigned int s_i3=0, s_i2=1, s_i1=2, s_i0=3;
|
||||
#endif
|
||||
#pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
|
||||
|
||||
void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
|
||||
{
|
||||
#ifdef CRYPTOPP_X86_ASM_AVAILABLE
|
||||
if (HasMMX())
|
||||
{
|
||||
const word32 *k = m_key;
|
||||
const word32 *kLoopEnd = k + m_rounds*4;
|
||||
#ifdef __GNUC__
|
||||
word32 t0, t1, t2, t3;
|
||||
__asm__ __volatile__
|
||||
(
|
||||
".intel_syntax noprefix;"
|
||||
AS1( push ebx)
|
||||
AS1( push ebp)
|
||||
AS2( mov ebp, eax)
|
||||
AS2( movd mm5, ecx)
|
||||
#else
|
||||
AS2( mov edx, g_cacheLineSize)
|
||||
AS2( mov edi, inBlock)
|
||||
AS2( mov esi, k)
|
||||
AS2( movd mm5, kLoopEnd)
|
||||
AS1( push ebp)
|
||||
AS2( lea ebp, Te)
|
||||
#endif
|
||||
AS2( mov eax, [esi+0*4]) // s0
|
||||
AS2( xor eax, [edi+0*4])
|
||||
AS2( movd mm0, eax)
|
||||
AS2( mov ebx, [esi+1*4])
|
||||
AS2( xor ebx, [edi+1*4])
|
||||
AS2( movd mm1, ebx)
|
||||
AS2( and ebx, eax)
|
||||
AS2( mov eax, [esi+2*4])
|
||||
AS2( xor eax, [edi+2*4])
|
||||
AS2( movd mm2, eax)
|
||||
AS2( and ebx, eax)
|
||||
AS2( mov ecx, [esi+3*4])
|
||||
AS2( xor ecx, [edi+3*4])
|
||||
AS2( and ebx, ecx)
|
||||
|
||||
// read Te0 into L1 cache. this code could be simplifed by using lfence, but that is an SSE2 instruction
|
||||
AS2( and ebx, 0)
|
||||
AS2( mov edi, ebx) // make index depend on previous loads to simulate lfence
|
||||
ASL(2)
|
||||
AS2( and ebx, [ebp+edi])
|
||||
AS2( add edi, edx)
|
||||
AS2( and ebx, [ebp+edi])
|
||||
AS2( add edi, edx)
|
||||
AS2( and ebx, [ebp+edi])
|
||||
AS2( add edi, edx)
|
||||
AS2( and ebx, [ebp+edi])
|
||||
AS2( add edi, edx)
|
||||
AS2( cmp edi, 1024)
|
||||
ASJ( jl, 2, b)
|
||||
AS2( and ebx, [ebp+1020])
|
||||
AS2( movd mm6, ebx)
|
||||
AS2( pxor mm2, mm6)
|
||||
AS2( pxor mm1, mm6)
|
||||
AS2( pxor mm0, mm6)
|
||||
AS2( xor ecx, ebx)
|
||||
|
||||
AS2( mov edi, [esi+4*4]) // t0
|
||||
AS2( mov eax, [esi+5*4])
|
||||
AS2( mov ebx, [esi+6*4])
|
||||
AS2( mov edx, [esi+7*4])
|
||||
AS2( add esi, 8*4)
|
||||
AS2( movd mm4, esi)
|
||||
|
||||
#define QUARTER_ROUND(t, a, b, c, d) \
|
||||
AS2(movzx esi, t##l)\
|
||||
AS2(d, [ebp+0*1024+4*esi])\
|
||||
AS2(movzx esi, t##h)\
|
||||
AS2(c, [ebp+1*1024+4*esi])\
|
||||
AS2(shr e##t##x, 16)\
|
||||
AS2(movzx esi, t##l)\
|
||||
AS2(b, [ebp+2*1024+4*esi])\
|
||||
AS2(movzx esi, t##h)\
|
||||
AS2(a, [ebp+3*1024+4*esi])
|
||||
|
||||
#define s0 xor edi
|
||||
#define s1 xor eax
|
||||
#define s2 xor ebx
|
||||
#define s3 xor ecx
|
||||
#define t0 xor edi
|
||||
#define t1 xor eax
|
||||
#define t2 xor ebx
|
||||
#define t3 xor edx
|
||||
|
||||
QUARTER_ROUND(c, t0, t1, t2, t3)
|
||||
AS2( movd ecx, mm2)
|
||||
QUARTER_ROUND(c, t3, t0, t1, t2)
|
||||
AS2( movd ecx, mm1)
|
||||
QUARTER_ROUND(c, t2, t3, t0, t1)
|
||||
AS2( movd ecx, mm0)
|
||||
QUARTER_ROUND(c, t1, t2, t3, t0)
|
||||
AS2( movd mm2, ebx)
|
||||
AS2( movd mm1, eax)
|
||||
AS2( movd mm0, edi)
|
||||
#undef QUARTER_ROUND
|
||||
|
||||
AS2( movd esi, mm4)
|
||||
|
||||
ASL(0)
|
||||
AS2( mov edi, [esi+0*4])
|
||||
AS2( mov eax, [esi+1*4])
|
||||
AS2( mov ebx, [esi+2*4])
|
||||
AS2( mov ecx, [esi+3*4])
|
||||
|
||||
#define QUARTER_ROUND(t, a, b, c, d) \
|
||||
AS2(movzx esi, t##l)\
|
||||
AS2(a, [ebp+3*1024+4*esi])\
|
||||
AS2(movzx esi, t##h)\
|
||||
AS2(b, [ebp+2*1024+4*esi])\
|
||||
AS2(shr e##t##x, 16)\
|
||||
AS2(movzx esi, t##l)\
|
||||
AS2(c, [ebp+1*1024+4*esi])\
|
||||
AS2(movzx esi, t##h)\
|
||||
AS2(d, [ebp+0*1024+4*esi])
|
||||
|
||||
QUARTER_ROUND(d, s0, s1, s2, s3)
|
||||
AS2( movd edx, mm2)
|
||||
QUARTER_ROUND(d, s3, s0, s1, s2)
|
||||
AS2( movd edx, mm1)
|
||||
QUARTER_ROUND(d, s2, s3, s0, s1)
|
||||
AS2( movd edx, mm0)
|
||||
QUARTER_ROUND(d, s1, s2, s3, s0)
|
||||
AS2( movd esi, mm4)
|
||||
AS2( movd mm2, ebx)
|
||||
AS2( movd mm1, eax)
|
||||
AS2( movd mm0, edi)
|
||||
|
||||
AS2( mov edi, [esi+4*4])
|
||||
AS2( mov eax, [esi+5*4])
|
||||
AS2( mov ebx, [esi+6*4])
|
||||
AS2( mov edx, [esi+7*4])
|
||||
|
||||
QUARTER_ROUND(c, t0, t1, t2, t3)
|
||||
AS2( movd ecx, mm2)
|
||||
QUARTER_ROUND(c, t3, t0, t1, t2)
|
||||
AS2( movd ecx, mm1)
|
||||
QUARTER_ROUND(c, t2, t3, t0, t1)
|
||||
AS2( movd ecx, mm0)
|
||||
QUARTER_ROUND(c, t1, t2, t3, t0)
|
||||
AS2( movd mm2, ebx)
|
||||
AS2( movd mm1, eax)
|
||||
AS2( movd mm0, edi)
|
||||
|
||||
AS2( movd esi, mm4)
|
||||
AS2( movd edi, mm5)
|
||||
AS2( add esi, 8*4)
|
||||
AS2( movd mm4, esi)
|
||||
AS2( cmp edi, esi)
|
||||
ASJ( jne, 0, b)
|
||||
|
||||
#undef QUARTER_ROUND
|
||||
#undef s0
|
||||
#undef s1
|
||||
#undef s2
|
||||
#undef s3
|
||||
#undef t0
|
||||
#undef t1
|
||||
#undef t2
|
||||
#undef t3
|
||||
|
||||
AS2( mov eax, [edi+0*4])
|
||||
AS2( mov ecx, [edi+1*4])
|
||||
AS2( mov esi, [edi+2*4])
|
||||
AS2( mov edi, [edi+3*4])
|
||||
|
||||
#define QUARTER_ROUND(a, b, c, d) \
|
||||
AS2( movzx ebx, dl)\
|
||||
AS2( movzx ebx, BYTE PTR [ebp+1+4*ebx])\
|
||||
AS2( shl ebx, 3*8)\
|
||||
AS2( xor a, ebx)\
|
||||
AS2( movzx ebx, dh)\
|
||||
AS2( movzx ebx, BYTE PTR [ebp+1+4*ebx])\
|
||||
AS2( shl ebx, 2*8)\
|
||||
AS2( xor b, ebx)\
|
||||
AS2( shr edx, 16)\
|
||||
AS2( movzx ebx, dl)\
|
||||
AS2( shr edx, 8)\
|
||||
AS2( movzx ebx, BYTE PTR [ebp+1+4*ebx])\
|
||||
AS2( shl ebx, 1*8)\
|
||||
AS2( xor c, ebx)\
|
||||
AS2( movzx ebx, BYTE PTR [ebp+1+4*edx])\
|
||||
AS2( xor d, ebx)
|
||||
|
||||
QUARTER_ROUND(eax, ecx, esi, edi)
|
||||
AS2( movd edx, mm2)
|
||||
QUARTER_ROUND(edi, eax, ecx, esi)
|
||||
AS2( movd edx, mm1)
|
||||
QUARTER_ROUND(esi, edi, eax, ecx)
|
||||
AS2( movd edx, mm0)
|
||||
QUARTER_ROUND(ecx, esi, edi, eax)
|
||||
|
||||
#undef QUARTER_ROUND
|
||||
|
||||
AS1( pop ebp)
|
||||
AS1( emms)
|
||||
|
||||
#ifdef __GNUC__
|
||||
AS1( pop ebx)
|
||||
".att_syntax prefix;"
|
||||
: "=a" (t0), "=c" (t1), "=S" (t2), "=D" (t3)
|
||||
: "a" (Te), "D" (inBlock), "S" (k), "c" (kLoopEnd), "d" (g_cacheLineSize)
|
||||
: "memory", "cc"
|
||||
);
|
||||
|
||||
if (xorBlock)
|
||||
{
|
||||
t0 ^= ((const word32 *)xorBlock)[0];
|
||||
t1 ^= ((const word32 *)xorBlock)[1];
|
||||
t2 ^= ((const word32 *)xorBlock)[2];
|
||||
t3 ^= ((const word32 *)xorBlock)[3];
|
||||
}
|
||||
((word32 *)outBlock)[0] = t0;
|
||||
((word32 *)outBlock)[1] = t1;
|
||||
((word32 *)outBlock)[2] = t2;
|
||||
((word32 *)outBlock)[3] = t3;
|
||||
#else
|
||||
AS2( mov ebx, xorBlock)
|
||||
AS2( test ebx, ebx)
|
||||
ASJ( jz, 1, f)
|
||||
AS2( xor eax, [ebx+0*4])
|
||||
AS2( xor ecx, [ebx+1*4])
|
||||
AS2( xor esi, [ebx+2*4])
|
||||
AS2( xor edi, [ebx+3*4])
|
||||
ASL(1)
|
||||
AS2( mov ebx, outBlock)
|
||||
AS2( mov [ebx+0*4], eax)
|
||||
AS2( mov [ebx+1*4], ecx)
|
||||
AS2( mov [ebx+2*4], esi)
|
||||
AS2( mov [ebx+3*4], edi)
|
||||
#endif
|
||||
}
|
||||
else
|
||||
#endif // #ifdef CRYPTOPP_X86_ASM_AVAILABLE
|
||||
{
|
||||
word32 s0, s1, s2, s3, t0, t1, t2, t3;
|
||||
const word32 *rk = m_key;
|
||||
|
|
@ -171,95 +398,68 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
|
|||
rk += 8;
|
||||
|
||||
// timing attack countermeasure. see comments at top for more details
|
||||
const int cacheLineSize = GetCacheLineSize();
|
||||
unsigned int i;
|
||||
word32 u = 0;
|
||||
for (i=0; i<sizeof(Te0)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)
|
||||
u &= (Te0[i+0*s_lineSizeDiv4] & Te0[i+2*s_lineSizeDiv4]) & (Te0[i+1*s_lineSizeDiv4] & Te0[i+3*s_lineSizeDiv4]);
|
||||
for (i=0; i<1024; i+=cacheLineSize)
|
||||
u &= *(const word32 *)(((const byte *)Te)+i);
|
||||
u &= Te[255];
|
||||
s0 |= u; s1 |= u; s2 |= u; s3 |= u;
|
||||
|
||||
// first round
|
||||
t0 ^=
|
||||
Te0[GETBYTE(s0, s_i3)] ^
|
||||
rotrFixed(Te0[GETBYTE(s1, s_i2)], 8) ^
|
||||
rotrFixed(Te0[GETBYTE(s2, s_i1)], 16) ^
|
||||
rotrFixed(Te0[GETBYTE(s3, s_i0)], 24);
|
||||
t1 ^=
|
||||
Te0[GETBYTE(s1, s_i3)] ^
|
||||
rotrFixed(Te0[GETBYTE(s2, s_i2)], 8) ^
|
||||
rotrFixed(Te0[GETBYTE(s3, s_i1)], 16) ^
|
||||
rotrFixed(Te0[GETBYTE(s0, s_i0)], 24);
|
||||
t2 ^=
|
||||
Te0[GETBYTE(s2, s_i3)] ^
|
||||
rotrFixed(Te0[GETBYTE(s3, s_i2)], 8) ^
|
||||
rotrFixed(Te0[GETBYTE(s0, s_i1)], 16) ^
|
||||
rotrFixed(Te0[GETBYTE(s1, s_i0)], 24);
|
||||
t3 ^=
|
||||
Te0[GETBYTE(s3, s_i3)] ^
|
||||
rotrFixed(Te0[GETBYTE(s0, s_i2)], 8) ^
|
||||
rotrFixed(Te0[GETBYTE(s1, s_i1)], 16) ^
|
||||
rotrFixed(Te0[GETBYTE(s2, s_i0)], 24);
|
||||
#ifdef IS_BIG_ENDIAN
|
||||
#define QUARTER_ROUND(t, a, b, c, d) \
|
||||
a ^= rotrFixed(Te[byte(t)], 24); t >>= 8;\
|
||||
b ^= rotrFixed(Te[byte(t)], 16); t >>= 8;\
|
||||
c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\
|
||||
d ^= Te[t];
|
||||
#else
|
||||
#define QUARTER_ROUND(t, a, b, c, d) \
|
||||
d ^= Te[byte(t)]; t >>= 8;\
|
||||
c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\
|
||||
b ^= rotrFixed(Te[byte(t)], 16); t >>= 8;\
|
||||
a ^= rotrFixed(Te[t], 24);
|
||||
#endif
|
||||
|
||||
QUARTER_ROUND(s3, t0, t1, t2, t3)
|
||||
QUARTER_ROUND(s2, t3, t0, t1, t2)
|
||||
QUARTER_ROUND(s1, t2, t3, t0, t1)
|
||||
QUARTER_ROUND(s0, t1, t2, t3, t0)
|
||||
#undef QUARTER_ROUND
|
||||
|
||||
// Nr - 2 full rounds:
|
||||
unsigned int r = m_rounds/2 - 1;
|
||||
do
|
||||
{
|
||||
s0 =
|
||||
Te0[GETBYTE(t0, 3)] ^
|
||||
Te1[GETBYTE(t1, 2)] ^
|
||||
Te2[GETBYTE(t2, 1)] ^
|
||||
Te3[GETBYTE(t3, 0)] ^
|
||||
rk[0];
|
||||
s1 =
|
||||
Te0[GETBYTE(t1, 3)] ^
|
||||
Te1[GETBYTE(t2, 2)] ^
|
||||
Te2[GETBYTE(t3, 1)] ^
|
||||
Te3[GETBYTE(t0, 0)] ^
|
||||
rk[1];
|
||||
s2 =
|
||||
Te0[GETBYTE(t2, 3)] ^
|
||||
Te1[GETBYTE(t3, 2)] ^
|
||||
Te2[GETBYTE(t0, 1)] ^
|
||||
Te3[GETBYTE(t1, 0)] ^
|
||||
rk[2];
|
||||
s3 =
|
||||
Te0[GETBYTE(t3, 3)] ^
|
||||
Te1[GETBYTE(t0, 2)] ^
|
||||
Te2[GETBYTE(t1, 1)] ^
|
||||
Te3[GETBYTE(t2, 0)] ^
|
||||
rk[3];
|
||||
#define QUARTER_ROUND(t, a, b, c, d) \
|
||||
a ^= Te[3*256+byte(t)]; t >>= 8;\
|
||||
b ^= Te[2*256+byte(t)]; t >>= 8;\
|
||||
c ^= Te[1*256+byte(t)]; t >>= 8;\
|
||||
d ^= Te[t];
|
||||
|
||||
t0 =
|
||||
Te0[GETBYTE(s0, 3)] ^
|
||||
Te1[GETBYTE(s1, 2)] ^
|
||||
Te2[GETBYTE(s2, 1)] ^
|
||||
Te3[GETBYTE(s3, 0)] ^
|
||||
rk[4];
|
||||
t1 =
|
||||
Te0[GETBYTE(s1, 3)] ^
|
||||
Te1[GETBYTE(s2, 2)] ^
|
||||
Te2[GETBYTE(s3, 1)] ^
|
||||
Te3[GETBYTE(s0, 0)] ^
|
||||
rk[5];
|
||||
t2 =
|
||||
Te0[GETBYTE(s2, 3)] ^
|
||||
Te1[GETBYTE(s3, 2)] ^
|
||||
Te2[GETBYTE(s0, 1)] ^
|
||||
Te3[GETBYTE(s1, 0)] ^
|
||||
rk[6];
|
||||
t3 =
|
||||
Te0[GETBYTE(s3, 3)] ^
|
||||
Te1[GETBYTE(s0, 2)] ^
|
||||
Te2[GETBYTE(s1, 1)] ^
|
||||
Te3[GETBYTE(s2, 0)] ^
|
||||
rk[7];
|
||||
s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
|
||||
|
||||
QUARTER_ROUND(t3, s0, s1, s2, s3)
|
||||
QUARTER_ROUND(t2, s3, s0, s1, s2)
|
||||
QUARTER_ROUND(t1, s2, s3, s0, s1)
|
||||
QUARTER_ROUND(t0, s1, s2, s3, s0)
|
||||
|
||||
t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
|
||||
|
||||
QUARTER_ROUND(s3, t0, t1, t2, t3)
|
||||
QUARTER_ROUND(s2, t3, t0, t1, t2)
|
||||
QUARTER_ROUND(s1, t2, t3, t0, t1)
|
||||
QUARTER_ROUND(s0, t1, t2, t3, t0)
|
||||
#undef QUARTER_ROUND
|
||||
|
||||
rk += 8;
|
||||
} while (--r);
|
||||
|
||||
// timing attack countermeasure. see comments at top for more details
|
||||
u = 0;
|
||||
for (i=0; i<sizeof(Se)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)
|
||||
u &= (((word32*)Se)[i+0*s_lineSizeDiv4] & ((word32*)Se)[i+2*s_lineSizeDiv4]) & (((word32*)Se)[i+1*s_lineSizeDiv4] & ((word32*)Se)[i+3*s_lineSizeDiv4]);
|
||||
for (i=0; i<256; i+=cacheLineSize)
|
||||
u &= *(const word32 *)(Se+i);
|
||||
u &= *(const word32 *)(Se+252);
|
||||
t0 |= u; t1 |= u; t2 |= u; t3 |= u;
|
||||
|
||||
word32 tbw[4];
|
||||
|
|
@ -267,23 +467,17 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
|
|||
word32 *const obw = (word32 *)outBlock;
|
||||
const word32 *const xbw = (const word32 *)xorBlock;
|
||||
|
||||
// last round
|
||||
tempBlock[0] = Se[GETBYTE(t0, 3)];
|
||||
tempBlock[1] = Se[GETBYTE(t1, 2)];
|
||||
tempBlock[2] = Se[GETBYTE(t2, 1)];
|
||||
tempBlock[3] = Se[GETBYTE(t3, 0)];
|
||||
tempBlock[4] = Se[GETBYTE(t1, 3)];
|
||||
tempBlock[5] = Se[GETBYTE(t2, 2)];
|
||||
tempBlock[6] = Se[GETBYTE(t3, 1)];
|
||||
tempBlock[7] = Se[GETBYTE(t0, 0)];
|
||||
tempBlock[8] = Se[GETBYTE(t2, 3)];
|
||||
tempBlock[9] = Se[GETBYTE(t3, 2)];
|
||||
tempBlock[10] = Se[GETBYTE(t0, 1)];
|
||||
tempBlock[11] = Se[GETBYTE(t1, 0)];
|
||||
tempBlock[12] = Se[GETBYTE(t3, 3)];
|
||||
tempBlock[13] = Se[GETBYTE(t0, 2)];
|
||||
tempBlock[14] = Se[GETBYTE(t1, 1)];
|
||||
tempBlock[15] = Se[GETBYTE(t2, 0)];
|
||||
#define QUARTER_ROUND(t, a, b, c, d) \
|
||||
tempBlock[a] = Se[byte(t)]; t >>= 8;\
|
||||
tempBlock[b] = Se[byte(t)]; t >>= 8;\
|
||||
tempBlock[c] = Se[byte(t)]; t >>= 8;\
|
||||
tempBlock[d] = Se[t];
|
||||
|
||||
QUARTER_ROUND(t2, 15, 2, 5, 8)
|
||||
QUARTER_ROUND(t1, 11, 14, 1, 4)
|
||||
QUARTER_ROUND(t0, 7, 10, 13, 0)
|
||||
QUARTER_ROUND(t3, 3, 6, 9, 12)
|
||||
#undef QUARTER_ROUND
|
||||
|
||||
if (xbw)
|
||||
{
|
||||
|
|
@ -300,6 +494,7 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
|
|||
obw[3] = tbw[3] ^ rk[3];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
|
||||
{
|
||||
|
|
@ -317,95 +512,68 @@ void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
|
|||
rk += 8;
|
||||
|
||||
// timing attack countermeasure. see comments at top for more details
|
||||
const int cacheLineSize = GetCacheLineSize();
|
||||
unsigned int i;
|
||||
word32 u = 0;
|
||||
for (i=0; i<sizeof(Td0)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)
|
||||
u &= (Td0[i+0*s_lineSizeDiv4] & Td0[i+2*s_lineSizeDiv4]) & (Td0[i+1*s_lineSizeDiv4] & Td0[i+3*s_lineSizeDiv4]);
|
||||
for (i=0; i<1024; i+=cacheLineSize)
|
||||
u &= *(const word32 *)(((const byte *)Td)+i);
|
||||
u &= Td[255];
|
||||
s0 |= u; s1 |= u; s2 |= u; s3 |= u;
|
||||
|
||||
// first round
|
||||
t0 ^=
|
||||
Td0[GETBYTE(s0, s_i3)] ^
|
||||
rotrFixed(Td0[GETBYTE(s3, s_i2)], 8) ^
|
||||
rotrFixed(Td0[GETBYTE(s2, s_i1)], 16) ^
|
||||
rotrFixed(Td0[GETBYTE(s1, s_i0)], 24);
|
||||
t1 ^=
|
||||
Td0[GETBYTE(s1, s_i3)] ^
|
||||
rotrFixed(Td0[GETBYTE(s0, s_i2)], 8) ^
|
||||
rotrFixed(Td0[GETBYTE(s3, s_i1)], 16) ^
|
||||
rotrFixed(Td0[GETBYTE(s2, s_i0)], 24);
|
||||
t2 ^=
|
||||
Td0[GETBYTE(s2, s_i3)] ^
|
||||
rotrFixed(Td0[GETBYTE(s1, s_i2)], 8) ^
|
||||
rotrFixed(Td0[GETBYTE(s0, s_i1)], 16) ^
|
||||
rotrFixed(Td0[GETBYTE(s3, s_i0)], 24);
|
||||
t3 ^=
|
||||
Td0[GETBYTE(s3, s_i3)] ^
|
||||
rotrFixed(Td0[GETBYTE(s2, s_i2)], 8) ^
|
||||
rotrFixed(Td0[GETBYTE(s1, s_i1)], 16) ^
|
||||
rotrFixed(Td0[GETBYTE(s0, s_i0)], 24);
|
||||
#ifdef IS_BIG_ENDIAN
|
||||
#define QUARTER_ROUND(t, a, b, c, d) \
|
||||
a ^= rotrFixed(Td[byte(t)], 24); t >>= 8;\
|
||||
b ^= rotrFixed(Td[byte(t)], 16); t >>= 8;\
|
||||
c ^= rotrFixed(Td[byte(t)], 8); t >>= 8;\
|
||||
d ^= Td[t];
|
||||
#else
|
||||
#define QUARTER_ROUND(t, a, b, c, d) \
|
||||
d ^= Td[byte(t)]; t >>= 8;\
|
||||
c ^= rotrFixed(Td[byte(t)], 8); t >>= 8;\
|
||||
b ^= rotrFixed(Td[byte(t)], 16); t >>= 8;\
|
||||
a ^= rotrFixed(Td[t], 24);
|
||||
#endif
|
||||
|
||||
QUARTER_ROUND(s3, t2, t1, t0, t3)
|
||||
QUARTER_ROUND(s2, t1, t0, t3, t2)
|
||||
QUARTER_ROUND(s1, t0, t3, t2, t1)
|
||||
QUARTER_ROUND(s0, t3, t2, t1, t0)
|
||||
#undef QUARTER_ROUND
|
||||
|
||||
// Nr - 2 full rounds:
|
||||
unsigned int r = m_rounds/2 - 1;
|
||||
do
|
||||
{
|
||||
s0 =
|
||||
Td0[GETBYTE(t0, 3)] ^
|
||||
Td1[GETBYTE(t3, 2)] ^
|
||||
Td2[GETBYTE(t2, 1)] ^
|
||||
Td3[GETBYTE(t1, 0)] ^
|
||||
rk[0];
|
||||
s1 =
|
||||
Td0[GETBYTE(t1, 3)] ^
|
||||
Td1[GETBYTE(t0, 2)] ^
|
||||
Td2[GETBYTE(t3, 1)] ^
|
||||
Td3[GETBYTE(t2, 0)] ^
|
||||
rk[1];
|
||||
s2 =
|
||||
Td0[GETBYTE(t2, 3)] ^
|
||||
Td1[GETBYTE(t1, 2)] ^
|
||||
Td2[GETBYTE(t0, 1)] ^
|
||||
Td3[GETBYTE(t3, 0)] ^
|
||||
rk[2];
|
||||
s3 =
|
||||
Td0[GETBYTE(t3, 3)] ^
|
||||
Td1[GETBYTE(t2, 2)] ^
|
||||
Td2[GETBYTE(t1, 1)] ^
|
||||
Td3[GETBYTE(t0, 0)] ^
|
||||
rk[3];
|
||||
#define QUARTER_ROUND(t, a, b, c, d) \
|
||||
a ^= Td[3*256+byte(t)]; t >>= 8;\
|
||||
b ^= Td[2*256+byte(t)]; t >>= 8;\
|
||||
c ^= Td[1*256+byte(t)]; t >>= 8;\
|
||||
d ^= Td[t];
|
||||
|
||||
t0 =
|
||||
Td0[GETBYTE(s0, 3)] ^
|
||||
Td1[GETBYTE(s3, 2)] ^
|
||||
Td2[GETBYTE(s2, 1)] ^
|
||||
Td3[GETBYTE(s1, 0)] ^
|
||||
rk[4];
|
||||
t1 =
|
||||
Td0[GETBYTE(s1, 3)] ^
|
||||
Td1[GETBYTE(s0, 2)] ^
|
||||
Td2[GETBYTE(s3, 1)] ^
|
||||
Td3[GETBYTE(s2, 0)] ^
|
||||
rk[5];
|
||||
t2 =
|
||||
Td0[GETBYTE(s2, 3)] ^
|
||||
Td1[GETBYTE(s1, 2)] ^
|
||||
Td2[GETBYTE(s0, 1)] ^
|
||||
Td3[GETBYTE(s3, 0)] ^
|
||||
rk[6];
|
||||
t3 =
|
||||
Td0[GETBYTE(s3, 3)] ^
|
||||
Td1[GETBYTE(s2, 2)] ^
|
||||
Td2[GETBYTE(s1, 1)] ^
|
||||
Td3[GETBYTE(s0, 0)] ^
|
||||
rk[7];
|
||||
s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
|
||||
|
||||
QUARTER_ROUND(t3, s2, s1, s0, s3)
|
||||
QUARTER_ROUND(t2, s1, s0, s3, s2)
|
||||
QUARTER_ROUND(t1, s0, s3, s2, s1)
|
||||
QUARTER_ROUND(t0, s3, s2, s1, s0)
|
||||
|
||||
t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
|
||||
|
||||
QUARTER_ROUND(s3, t2, t1, t0, t3)
|
||||
QUARTER_ROUND(s2, t1, t0, t3, t2)
|
||||
QUARTER_ROUND(s1, t0, t3, t2, t1)
|
||||
QUARTER_ROUND(s0, t3, t2, t1, t0)
|
||||
#undef QUARTER_ROUND
|
||||
|
||||
rk += 8;
|
||||
} while (--r);
|
||||
|
||||
// timing attack countermeasure. see comments at top for more details
|
||||
u = 0;
|
||||
for (i=0; i<sizeof(Sd)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)
|
||||
u &= (((word32*)Sd)[i+0*s_lineSizeDiv4] & ((word32*)Sd)[i+2*s_lineSizeDiv4]) & (((word32*)Sd)[i+1*s_lineSizeDiv4] & ((word32*)Sd)[i+3*s_lineSizeDiv4]);
|
||||
for (i=0; i<256; i+=cacheLineSize)
|
||||
u &= *(const word32 *)(Sd+i);
|
||||
u &= *(const word32 *)(Sd+252);
|
||||
t0 |= u; t1 |= u; t2 |= u; t3 |= u;
|
||||
|
||||
word32 tbw[4];
|
||||
|
|
@ -413,23 +581,17 @@ void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
|
|||
word32 *const obw = (word32 *)outBlock;
|
||||
const word32 *const xbw = (const word32 *)xorBlock;
|
||||
|
||||
// last round
|
||||
tempBlock[0] = Sd[GETBYTE(t0, 3)];
|
||||
tempBlock[1] = Sd[GETBYTE(t3, 2)];
|
||||
tempBlock[2] = Sd[GETBYTE(t2, 1)];
|
||||
tempBlock[3] = Sd[GETBYTE(t1, 0)];
|
||||
tempBlock[4] = Sd[GETBYTE(t1, 3)];
|
||||
tempBlock[5] = Sd[GETBYTE(t0, 2)];
|
||||
tempBlock[6] = Sd[GETBYTE(t3, 1)];
|
||||
tempBlock[7] = Sd[GETBYTE(t2, 0)];
|
||||
tempBlock[8] = Sd[GETBYTE(t2, 3)];
|
||||
tempBlock[9] = Sd[GETBYTE(t1, 2)];
|
||||
tempBlock[10] = Sd[GETBYTE(t0, 1)];
|
||||
tempBlock[11] = Sd[GETBYTE(t3, 0)];
|
||||
tempBlock[12] = Sd[GETBYTE(t3, 3)];
|
||||
tempBlock[13] = Sd[GETBYTE(t2, 2)];
|
||||
tempBlock[14] = Sd[GETBYTE(t1, 1)];
|
||||
tempBlock[15] = Sd[GETBYTE(t0, 0)];
|
||||
#define QUARTER_ROUND(t, a, b, c, d) \
|
||||
tempBlock[a] = Sd[byte(t)]; t >>= 8;\
|
||||
tempBlock[b] = Sd[byte(t)]; t >>= 8;\
|
||||
tempBlock[c] = Sd[byte(t)]; t >>= 8;\
|
||||
tempBlock[d] = Sd[t];
|
||||
|
||||
QUARTER_ROUND(t2, 7, 2, 13, 8)
|
||||
QUARTER_ROUND(t1, 3, 14, 9, 4)
|
||||
QUARTER_ROUND(t0, 15, 10, 5, 0)
|
||||
QUARTER_ROUND(t3, 11, 6, 1, 12)
|
||||
#undef QUARTER_ROUND
|
||||
|
||||
if (xbw)
|
||||
{
|
||||
|
|
|
|||
15
rijndael.h
15
rijndael.h
|
|
@ -25,16 +25,10 @@ class CRYPTOPP_DLL Rijndael : public Rijndael_Info, public BlockCipherDocumentat
|
|||
|
||||
protected:
|
||||
// VS2005 workaround: have to put these on seperate lines, or error C2487 is triggered in DLL build
|
||||
CRYPTOPP_L1_CACHE_ALIGN(static const byte Se[256]);
|
||||
CRYPTOPP_L1_CACHE_ALIGN(static const byte Sd[256]);
|
||||
CRYPTOPP_L1_CACHE_ALIGN(static const word32 Te0[256]);
|
||||
static const word32 Te1[256];
|
||||
static const word32 Te2[256];
|
||||
static const word32 Te3[256];
|
||||
CRYPTOPP_L1_CACHE_ALIGN(static const word32 Td0[256]);
|
||||
static const word32 Td1[256];
|
||||
static const word32 Td2[256];
|
||||
static const word32 Td3[256];
|
||||
static const byte Se[256];
|
||||
static const byte Sd[256];
|
||||
static const word32 Te[4*256];
|
||||
static const word32 Td[4*256];
|
||||
|
||||
static const word32 rcon[];
|
||||
|
||||
|
|
@ -52,6 +46,7 @@ class CRYPTOPP_DLL Rijndael : public Rijndael_Info, public BlockCipherDocumentat
|
|||
{
|
||||
public:
|
||||
void ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const;
|
||||
void ProcessAndXorBlock_Old(const byte *inBlock, const byte *xorBlock, byte *outBlock) const;
|
||||
};
|
||||
|
||||
public:
|
||||
|
|
|
|||
444
sha.cpp
444
sha.cpp
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include "sha.h"
|
||||
#include "misc.h"
|
||||
#include "cpu.h"
|
||||
|
||||
NAMESPACE_BEGIN(CryptoPP)
|
||||
|
||||
|
|
@ -74,27 +75,43 @@ void SHA1::Transform(word32 *state, const word32 *data)
|
|||
state[2] += c;
|
||||
state[3] += d;
|
||||
state[4] += e;
|
||||
/* Wipe variables */
|
||||
a = b = c = d = e = 0;
|
||||
memset(W, 0, sizeof(W));
|
||||
}
|
||||
|
||||
// end of Steve Reid's code
|
||||
|
||||
// *************************************************************
|
||||
|
||||
void SHA224::InitState(HashWordType *state)
|
||||
{
|
||||
static const word32 s[8] = {0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4};
|
||||
memcpy(state, s, sizeof(s));
|
||||
}
|
||||
|
||||
void SHA256::InitState(HashWordType *state)
|
||||
{
|
||||
state[0] = 0x6a09e667;
|
||||
state[1] = 0xbb67ae85;
|
||||
state[2] = 0x3c6ef372;
|
||||
state[3] = 0xa54ff53a;
|
||||
state[4] = 0x510e527f;
|
||||
state[5] = 0x9b05688c;
|
||||
state[6] = 0x1f83d9ab;
|
||||
state[7] = 0x5be0cd19;
|
||||
static const word32 s[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
|
||||
memcpy(state, s, sizeof(s));
|
||||
}
|
||||
|
||||
static const word32 SHA256_K[64] = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
};
|
||||
|
||||
#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
|
||||
|
||||
#define Ch(x,y,z) (z^(x&(y^z)))
|
||||
|
|
@ -109,7 +126,7 @@ void SHA256::InitState(HashWordType *state)
|
|||
#define g(i) T[(6-i)&7]
|
||||
#define h(i) T[(7-i)&7]
|
||||
|
||||
#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j]+(j?blk2(i):blk0(i));\
|
||||
#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\
|
||||
d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
|
||||
|
||||
// for SHA256
|
||||
|
|
@ -141,98 +158,114 @@ void SHA256::Transform(word32 *state, const word32 *data)
|
|||
state[5] += f(0);
|
||||
state[6] += g(0);
|
||||
state[7] += h(0);
|
||||
/* Wipe variables */
|
||||
memset(W, 0, sizeof(W));
|
||||
memset(T, 0, sizeof(T));
|
||||
}
|
||||
|
||||
/*
|
||||
// smaller but slower
|
||||
void SHA256_Transform(word32 *state, const word32 *data)
|
||||
{
|
||||
word32 T[20];
|
||||
word32 W[32];
|
||||
unsigned int i = 0, j = 0;
|
||||
word32 *t = T+8;
|
||||
|
||||
memcpy(t, state, 8*4);
|
||||
word32 e = t[4], a = t[0];
|
||||
|
||||
do
|
||||
{
|
||||
word32 w = data[j];
|
||||
W[j] = w;
|
||||
w += K[j];
|
||||
w += t[7];
|
||||
w += S1(e);
|
||||
w += Ch(e, t[5], t[6]);
|
||||
e = t[3] + w;
|
||||
t[3] = t[3+8] = e;
|
||||
w += S0(t[0]);
|
||||
a = w + Maj(a, t[1], t[2]);
|
||||
t[-1] = t[7] = a;
|
||||
--t;
|
||||
++j;
|
||||
if (j%8 == 0)
|
||||
t += 8;
|
||||
} while (j<16);
|
||||
|
||||
do
|
||||
{
|
||||
i = j&0xf;
|
||||
word32 w = s1(W[i+16-2]) + s0(W[i+16-15]) + W[i] + W[i+16-7];
|
||||
W[i+16] = W[i] = w;
|
||||
w += K[j];
|
||||
w += t[7];
|
||||
w += S1(e);
|
||||
w += Ch(e, t[5], t[6]);
|
||||
e = t[3] + w;
|
||||
t[3] = t[3+8] = e;
|
||||
w += S0(t[0]);
|
||||
a = w + Maj(a, t[1], t[2]);
|
||||
t[-1] = t[7] = a;
|
||||
|
||||
w = s1(W[(i+1)+16-2]) + s0(W[(i+1)+16-15]) + W[(i+1)] + W[(i+1)+16-7];
|
||||
W[(i+1)+16] = W[(i+1)] = w;
|
||||
w += K[j+1];
|
||||
w += (t-1)[7];
|
||||
w += S1(e);
|
||||
w += Ch(e, (t-1)[5], (t-1)[6]);
|
||||
e = (t-1)[3] + w;
|
||||
(t-1)[3] = (t-1)[3+8] = e;
|
||||
w += S0((t-1)[0]);
|
||||
a = w + Maj(a, (t-1)[1], (t-1)[2]);
|
||||
(t-1)[-1] = (t-1)[7] = a;
|
||||
|
||||
t-=2;
|
||||
j+=2;
|
||||
if (j%8 == 0)
|
||||
t += 8;
|
||||
} while (j<64);
|
||||
|
||||
state[0] += a;
|
||||
state[1] += t[1];
|
||||
state[2] += t[2];
|
||||
state[3] += t[3];
|
||||
state[4] += e;
|
||||
state[5] += t[5];
|
||||
state[6] += t[6];
|
||||
state[7] += t[7];
|
||||
}
|
||||
*/
|
||||
|
||||
#undef S0
|
||||
#undef S1
|
||||
#undef s0
|
||||
#undef s1
|
||||
|
||||
const word32 SHA256::K[64] = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
};
|
||||
|
||||
void SHA224::InitState(HashWordType *state)
|
||||
{
|
||||
state[0] = 0xc1059ed8;
|
||||
state[1] = 0x367cd507;
|
||||
state[2] = 0x3070dd17;
|
||||
state[3] = 0xf70e5939;
|
||||
state[4] = 0xffc00b31;
|
||||
state[5] = 0x68581511;
|
||||
state[6] = 0x64f98fa7;
|
||||
state[7] = 0xbefa4fa4;
|
||||
}
|
||||
#undef R
|
||||
|
||||
// *************************************************************
|
||||
|
||||
#ifdef WORD64_AVAILABLE
|
||||
|
||||
void SHA384::InitState(HashWordType *state)
|
||||
{
|
||||
static const word64 s[8] = {
|
||||
W64LIT(0xcbbb9d5dc1059ed8), W64LIT(0x629a292a367cd507),
|
||||
W64LIT(0x9159015a3070dd17), W64LIT(0x152fecd8f70e5939),
|
||||
W64LIT(0x67332667ffc00b31), W64LIT(0x8eb44a8768581511),
|
||||
W64LIT(0xdb0c2e0d64f98fa7), W64LIT(0x47b5481dbefa4fa4)};
|
||||
memcpy(state, s, sizeof(s));
|
||||
}
|
||||
|
||||
void SHA512::InitState(HashWordType *state)
|
||||
{
|
||||
state[0] = W64LIT(0x6a09e667f3bcc908);
|
||||
state[1] = W64LIT(0xbb67ae8584caa73b);
|
||||
state[2] = W64LIT(0x3c6ef372fe94f82b);
|
||||
state[3] = W64LIT(0xa54ff53a5f1d36f1);
|
||||
state[4] = W64LIT(0x510e527fade682d1);
|
||||
state[5] = W64LIT(0x9b05688c2b3e6c1f);
|
||||
state[6] = W64LIT(0x1f83d9abfb41bd6b);
|
||||
state[7] = W64LIT(0x5be0cd19137e2179);
|
||||
static const word64 s[8] = {
|
||||
W64LIT(0x6a09e667f3bcc908), W64LIT(0xbb67ae8584caa73b),
|
||||
W64LIT(0x3c6ef372fe94f82b), W64LIT(0xa54ff53a5f1d36f1),
|
||||
W64LIT(0x510e527fade682d1), W64LIT(0x9b05688c2b3e6c1f),
|
||||
W64LIT(0x1f83d9abfb41bd6b), W64LIT(0x5be0cd19137e2179)};
|
||||
memcpy(state, s, sizeof(s));
|
||||
}
|
||||
|
||||
// for SHA512
|
||||
#define S0(x) (rotrFixed(x,28)^rotrFixed(x,34)^rotrFixed(x,39))
|
||||
#define S1(x) (rotrFixed(x,14)^rotrFixed(x,18)^rotrFixed(x,41))
|
||||
#define s0(x) (rotrFixed(x,1)^rotrFixed(x,8)^(x>>7))
|
||||
#define s1(x) (rotrFixed(x,19)^rotrFixed(x,61)^(x>>6))
|
||||
|
||||
void SHA512::Transform(word64 *state, const word64 *data)
|
||||
{
|
||||
word64 W[16];
|
||||
word64 T[8];
|
||||
/* Copy context->state[] to working vars */
|
||||
memcpy(T, state, sizeof(T));
|
||||
/* 80 operations, partially loop unrolled */
|
||||
for (unsigned int j=0; j<80; j+=16)
|
||||
{
|
||||
R( 0); R( 1); R( 2); R( 3);
|
||||
R( 4); R( 5); R( 6); R( 7);
|
||||
R( 8); R( 9); R(10); R(11);
|
||||
R(12); R(13); R(14); R(15);
|
||||
}
|
||||
/* Add the working vars back into context.state[] */
|
||||
state[0] += a(0);
|
||||
state[1] += b(0);
|
||||
state[2] += c(0);
|
||||
state[3] += d(0);
|
||||
state[4] += e(0);
|
||||
state[5] += f(0);
|
||||
state[6] += g(0);
|
||||
state[7] += h(0);
|
||||
/* Wipe variables */
|
||||
memset(W, 0, sizeof(W));
|
||||
memset(T, 0, sizeof(T));
|
||||
}
|
||||
|
||||
const word64 SHA512::K[80] = {
|
||||
CRYPTOPP_ALIGN_DATA(16) static const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN16 = {
|
||||
W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
|
||||
W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
|
||||
W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
|
||||
|
|
@ -275,16 +308,231 @@ const word64 SHA512::K[80] = {
|
|||
W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
|
||||
};
|
||||
|
||||
void SHA384::InitState(HashWordType *state)
|
||||
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
|
||||
// put assembly version in separate function, otherwise MSVC 2005 SP1 doesn't generate correct code for the non-assembly version
|
||||
static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state, const word64 *data)
|
||||
{
|
||||
state[0] = W64LIT(0xcbbb9d5dc1059ed8);
|
||||
state[1] = W64LIT(0x629a292a367cd507);
|
||||
state[2] = W64LIT(0x9159015a3070dd17);
|
||||
state[3] = W64LIT(0x152fecd8f70e5939);
|
||||
state[4] = W64LIT(0x67332667ffc00b31);
|
||||
state[5] = W64LIT(0x8eb44a8768581511);
|
||||
state[6] = W64LIT(0xdb0c2e0d64f98fa7);
|
||||
state[7] = W64LIT(0x47b5481dbefa4fa4);
|
||||
#ifdef __GNUC__
|
||||
__asm__ __volatile__
|
||||
(
|
||||
".intel_syntax noprefix;"
|
||||
AS1( push ebx)
|
||||
AS2( mov ebx, eax)
|
||||
#else
|
||||
AS2( lea ebx, SHA512_K)
|
||||
#endif
|
||||
|
||||
AS2( mov eax, esp)
|
||||
AS2( and esp, 0xfffffff0)
|
||||
AS2( sub esp, 27*16) // 17*16 for expanded data, 20*8 for state
|
||||
AS1( push eax)
|
||||
AS2( xor eax, eax)
|
||||
AS2( lea edi, [esp+4+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying
|
||||
AS2( lea esi, [esp+4+20*8+8]) // 16-byte alignment, then add 8
|
||||
|
||||
AS2( movq mm4, [ecx+0*8])
|
||||
AS2( movq [edi+0*8], mm4)
|
||||
AS2( movq mm0, [ecx+1*8])
|
||||
AS2( movq [edi+1*8], mm0)
|
||||
AS2( movq mm0, [ecx+2*8])
|
||||
AS2( movq [edi+2*8], mm0)
|
||||
AS2( movq mm0, [ecx+3*8])
|
||||
AS2( movq [edi+3*8], mm0)
|
||||
AS2( movq mm5, [ecx+4*8])
|
||||
AS2( movq [edi+4*8], mm5)
|
||||
AS2( movq mm0, [ecx+5*8])
|
||||
AS2( movq [edi+5*8], mm0)
|
||||
AS2( movq mm0, [ecx+6*8])
|
||||
AS2( movq [edi+6*8], mm0)
|
||||
AS2( movq mm0, [ecx+7*8])
|
||||
AS2( movq [edi+7*8], mm0)
|
||||
ASJ( jmp, 0, f)
|
||||
|
||||
#define SSE2_S0_S1(r, a, b, c) \
|
||||
AS2( movq mm6, r)\
|
||||
AS2( psrlq r, a)\
|
||||
AS2( movq mm7, r)\
|
||||
AS2( psllq mm6, 64-c)\
|
||||
AS2( pxor mm7, mm6)\
|
||||
AS2( psrlq r, b-a)\
|
||||
AS2( pxor mm7, r)\
|
||||
AS2( psllq mm6, c-b)\
|
||||
AS2( pxor mm7, mm6)\
|
||||
AS2( psrlq r, c-b)\
|
||||
AS2( pxor r, mm7)\
|
||||
AS2( psllq mm6, b-a)\
|
||||
AS2( pxor r, mm6)
|
||||
|
||||
#define SSE2_s0(r, a, b, c) \
|
||||
AS2( movdqa xmm6, r)\
|
||||
AS2( psrlq r, a)\
|
||||
AS2( movdqa xmm7, r)\
|
||||
AS2( psllq xmm6, 64-c)\
|
||||
AS2( pxor xmm7, xmm6)\
|
||||
AS2( psrlq r, b-a)\
|
||||
AS2( pxor xmm7, r)\
|
||||
AS2( psrlq r, c-b)\
|
||||
AS2( pxor r, xmm7)\
|
||||
AS2( psllq xmm6, c-a)\
|
||||
AS2( pxor r, xmm6)
|
||||
|
||||
#define SSE2_s1(r, a, b, c) \
|
||||
AS2( movdqa xmm6, r)\
|
||||
AS2( psrlq r, a)\
|
||||
AS2( movdqa xmm7, r)\
|
||||
AS2( psllq xmm6, 64-c)\
|
||||
AS2( pxor xmm7, xmm6)\
|
||||
AS2( psrlq r, b-a)\
|
||||
AS2( pxor xmm7, r)\
|
||||
AS2( psllq xmm6, c-b)\
|
||||
AS2( pxor xmm7, xmm6)\
|
||||
AS2( psrlq r, c-b)\
|
||||
AS2( pxor r, xmm7)
|
||||
|
||||
ASL(SHA512_Round)
|
||||
// k + w is in mm0, a is in mm4, e is in mm5
|
||||
AS2( paddq mm0, [edi+7*8]) // h
|
||||
AS2( movq mm2, [edi+5*8]) // f
|
||||
AS2( movq mm3, [edi+6*8]) // g
|
||||
AS2( pxor mm2, mm3)
|
||||
AS2( pand mm2, mm5)
|
||||
SSE2_S0_S1(mm5,14,18,41)
|
||||
AS2( pxor mm2, mm3)
|
||||
AS2( paddq mm0, mm2) // h += Ch(e,f,g)
|
||||
AS2( paddq mm5, mm0) // h += S1(e)
|
||||
AS2( movq mm2, [edi+1*8]) // b
|
||||
AS2( movq mm1, mm2)
|
||||
AS2( por mm2, mm4)
|
||||
AS2( pand mm2, [edi+2*8]) // c
|
||||
AS2( pand mm1, mm4)
|
||||
AS2( por mm1, mm2)
|
||||
AS2( paddq mm1, mm5) // temp = h + Maj(a,b,c)
|
||||
AS2( paddq mm5, [edi+3*8]) // e = d + h
|
||||
AS2( movq [edi+3*8], mm5)
|
||||
AS2( movq [edi+11*8], mm5)
|
||||
SSE2_S0_S1(mm4,28,34,39) // S0(a)
|
||||
AS2( paddq mm4, mm1) // a = temp + S0(a)
|
||||
AS2( movq [edi-8], mm4)
|
||||
AS2( movq [edi+7*8], mm4)
|
||||
AS1( ret)
|
||||
|
||||
// first 16 rounds
|
||||
ASL(0)
|
||||
AS2( movq mm0, [edx+eax*8])
|
||||
AS2( movq [esi+eax*8], mm0)
|
||||
AS2( movq [esi+eax*8+16*8], mm0)
|
||||
AS2( paddq mm0, [ebx+eax*8])
|
||||
ASC( call, SHA512_Round)
|
||||
AS1( inc eax)
|
||||
AS2( sub edi, 8)
|
||||
AS2( test eax, 7)
|
||||
ASJ( jnz, 0, b)
|
||||
AS2( add edi, 8*8)
|
||||
AS2( cmp eax, 16)
|
||||
ASJ( jne, 0, b)
|
||||
|
||||
// rest of the rounds
|
||||
AS2( movdqu xmm0, [esi+(16-2)*8])
|
||||
ASL(1)
|
||||
// data expansion, W[i-2] already in xmm0
|
||||
AS2( movdqu xmm3, [esi])
|
||||
AS2( paddq xmm3, [esi+(16-7)*8])
|
||||
AS2( movdqa xmm2, [esi+(16-15)*8])
|
||||
SSE2_s1(xmm0, 6, 19, 61)
|
||||
AS2( paddq xmm0, xmm3)
|
||||
SSE2_s0(xmm2, 1, 7, 8)
|
||||
AS2( paddq xmm0, xmm2)
|
||||
AS2( movdq2q mm0, xmm0)
|
||||
AS2( movhlps xmm1, xmm0)
|
||||
AS2( paddq mm0, [ebx+eax*8])
|
||||
AS2( movlps [esi], xmm0)
|
||||
AS2( movlps [esi+8], xmm1)
|
||||
AS2( movlps [esi+8*16], xmm0)
|
||||
AS2( movlps [esi+8*17], xmm1)
|
||||
// 2 rounds
|
||||
ASC( call, SHA512_Round)
|
||||
AS2( sub edi, 8)
|
||||
AS2( movdq2q mm0, xmm1)
|
||||
AS2( paddq mm0, [ebx+eax*8+8])
|
||||
ASC( call, SHA512_Round)
|
||||
// update indices and loop
|
||||
AS2( add esi, 16)
|
||||
AS2( add eax, 2)
|
||||
AS2( sub edi, 8)
|
||||
AS2( test eax, 7)
|
||||
ASJ( jnz, 1, b)
|
||||
// do housekeeping every 8 rounds
|
||||
AS2( mov esi, 0xf)
|
||||
AS2( and esi, eax)
|
||||
AS2( lea esi, [esp+4+20*8+8+esi*8])
|
||||
AS2( add edi, 8*8)
|
||||
AS2( cmp eax, 80)
|
||||
ASJ( jne, 1, b)
|
||||
|
||||
#define SSE2_CombineState(i) \
|
||||
AS2( movq mm0, [edi+i*8])\
|
||||
AS2( paddq mm0, [ecx+i*8])\
|
||||
AS2( movq [ecx+i*8], mm0)
|
||||
|
||||
SSE2_CombineState(0)
|
||||
SSE2_CombineState(1)
|
||||
SSE2_CombineState(2)
|
||||
SSE2_CombineState(3)
|
||||
SSE2_CombineState(4)
|
||||
SSE2_CombineState(5)
|
||||
SSE2_CombineState(6)
|
||||
SSE2_CombineState(7)
|
||||
|
||||
AS1( pop esp)
|
||||
AS1( emms)
|
||||
|
||||
#ifdef __GNUC__
|
||||
AS1( pop ebx)
|
||||
".att_syntax prefix;"
|
||||
:
|
||||
: "a" (SHA512_K), "c" (state), "d" (data)
|
||||
: "%esi", "%edi", "memory", "cc"
|
||||
);
|
||||
#endif
|
||||
}
|
||||
#endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
|
||||
|
||||
void SHA512::Transform(word64 *state, const word64 *data)
|
||||
{
|
||||
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
|
||||
if (HasSSE2())
|
||||
return SHA512_SSE2_Transform(state, data);
|
||||
#endif
|
||||
|
||||
#define S0(x) (rotrFixed(x,28)^rotrFixed(x,34)^rotrFixed(x,39))
|
||||
#define S1(x) (rotrFixed(x,14)^rotrFixed(x,18)^rotrFixed(x,41))
|
||||
#define s0(x) (rotrFixed(x,1)^rotrFixed(x,8)^(x>>7))
|
||||
#define s1(x) (rotrFixed(x,19)^rotrFixed(x,61)^(x>>6))
|
||||
|
||||
#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA512_K[i+j]+(j?blk2(i):blk0(i));\
|
||||
d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
|
||||
|
||||
word64 W[16];
|
||||
word64 T[8];
|
||||
/* Copy context->state[] to working vars */
|
||||
memcpy(T, state, sizeof(T));
|
||||
/* 80 operations, partially loop unrolled */
|
||||
for (unsigned int j=0; j<80; j+=16)
|
||||
{
|
||||
R( 0); R( 1); R( 2); R( 3);
|
||||
R( 4); R( 5); R( 6); R( 7);
|
||||
R( 8); R( 9); R(10); R(11);
|
||||
R(12); R(13); R(14); R(15);
|
||||
}
|
||||
/* Add the working vars back into context.state[] */
|
||||
state[0] += a(0);
|
||||
state[1] += b(0);
|
||||
state[2] += c(0);
|
||||
state[3] += d(0);
|
||||
state[4] += e(0);
|
||||
state[5] += f(0);
|
||||
state[6] += g(0);
|
||||
state[7] += h(0);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
6
sha.h
6
sha.h
|
|
@ -23,9 +23,6 @@ public:
|
|||
static void CRYPTOPP_API InitState(HashWordType *state);
|
||||
static void CRYPTOPP_API Transform(word32 *digest, const word32 *data);
|
||||
static const char * CRYPTOPP_API StaticAlgorithmName() {return "SHA-256";}
|
||||
|
||||
protected:
|
||||
static const word32 K[64];
|
||||
};
|
||||
|
||||
//! implements the SHA-224 standard
|
||||
|
|
@ -46,9 +43,6 @@ public:
|
|||
static void CRYPTOPP_API InitState(HashWordType *state);
|
||||
static void CRYPTOPP_API Transform(word64 *digest, const word64 *data);
|
||||
static const char * CRYPTOPP_API StaticAlgorithmName() {return "SHA-512";}
|
||||
|
||||
protected:
|
||||
static const word64 K[80];
|
||||
};
|
||||
|
||||
//! implements the SHA-384 standard
|
||||
|
|
|
|||
211
tiger.cpp
211
tiger.cpp
|
|
@ -3,6 +3,7 @@
|
|||
#include "pch.h"
|
||||
#include "tiger.h"
|
||||
#include "misc.h"
|
||||
#include "cpu.h"
|
||||
|
||||
#ifdef WORD64_AVAILABLE
|
||||
|
||||
|
|
@ -24,13 +25,187 @@ void Tiger::TruncatedFinal(byte *hash, size_t size)
|
|||
|
||||
m_data[7] = GetBitCountLo();
|
||||
|
||||
Transform(m_digest, m_data);
|
||||
CorrectEndianess(m_digest, m_digest, DigestSize());
|
||||
memcpy(hash, m_digest, size);
|
||||
Transform(m_state, m_data);
|
||||
CorrectEndianess(m_state, m_state, DigestSize());
|
||||
memcpy(hash, m_state, size);
|
||||
|
||||
Restart(); // reinit for next use
|
||||
}
|
||||
|
||||
void Tiger::Transform (word64 *digest, const word64 *X)
|
||||
{
|
||||
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
|
||||
if (HasSSE2())
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
__asm__ __volatile__
|
||||
(
|
||||
".intel_syntax noprefix;"
|
||||
AS1( push ebx)
|
||||
#else
|
||||
AS2( mov eax, digest)
|
||||
AS2( mov esi, X)
|
||||
AS2( lea edx, [table])
|
||||
#endif
|
||||
AS2( movq mm0, [eax])
|
||||
AS2( movq mm1, [eax+1*8])
|
||||
AS2( movq mm5, mm1)
|
||||
AS2( movq mm2, [eax+2*8])
|
||||
AS2( movq mm7, [edx+4*2048+0*8])
|
||||
AS2( movq mm6, [edx+4*2048+1*8])
|
||||
AS2( mov ecx, esp)
|
||||
AS2( and esp, 0xfffffff0)
|
||||
AS2( sub esp, 8*8)
|
||||
AS1( push ecx)
|
||||
|
||||
#define SSE2_round(a,b,c,x,mul) \
|
||||
AS2( pxor c, [x])\
|
||||
AS2( movd ecx, c)\
|
||||
AS2( movzx edi, cl)\
|
||||
AS2( movq mm3, [edx+0*2048+edi*8])\
|
||||
AS2( movzx edi, ch)\
|
||||
AS2( movq mm4, [edx+3*2048+edi*8])\
|
||||
AS2( shr ecx, 16)\
|
||||
AS2( movzx edi, cl)\
|
||||
AS2( pxor mm3, [edx+1*2048+edi*8])\
|
||||
AS2( movzx edi, ch)\
|
||||
AS2( pxor mm4, [edx+2*2048+edi*8])\
|
||||
AS3( pextrw ecx, c, 2)\
|
||||
AS2( movzx edi, cl)\
|
||||
AS2( pxor mm3, [edx+2*2048+edi*8])\
|
||||
AS2( movzx edi, ch)\
|
||||
AS2( pxor mm4, [edx+1*2048+edi*8])\
|
||||
AS3( pextrw ecx, c, 3)\
|
||||
AS2( movzx edi, cl)\
|
||||
AS2( pxor mm3, [edx+3*2048+edi*8])\
|
||||
AS2( psubq a, mm3)\
|
||||
AS2( movzx edi, ch)\
|
||||
AS2( pxor mm4, [edx+0*2048+edi*8])\
|
||||
AS2( paddq b, mm4)\
|
||||
SSE2_mul_##mul(b)
|
||||
|
||||
#define SSE2_mul_5(b) \
|
||||
AS2( movq mm3, b)\
|
||||
AS2( psllq b, 2)\
|
||||
AS2( paddq b, mm3)
|
||||
|
||||
#define SSE2_mul_7(b) \
|
||||
AS2( movq mm3, b)\
|
||||
AS2( psllq b, 3)\
|
||||
AS2( psubq b, mm3)
|
||||
|
||||
#define SSE2_mul_9(b) \
|
||||
AS2( movq mm3, b)\
|
||||
AS2( psllq b, 3)\
|
||||
AS2( paddq b, mm3)
|
||||
|
||||
#define label2_5 1
|
||||
#define label2_7 2
|
||||
#define label2_9 3
|
||||
|
||||
#define SSE2_pass(A,B,C,mul,X) \
|
||||
AS2( xor ebx, ebx)\
|
||||
ASL(mul)\
|
||||
SSE2_round(A,B,C,X+0*8+ebx,mul)\
|
||||
SSE2_round(B,C,A,X+1*8+ebx,mul)\
|
||||
AS2( cmp ebx, 6*8)\
|
||||
ASJ( je, label2_##mul, f)\
|
||||
SSE2_round(C,A,B,X+2*8+ebx,mul)\
|
||||
AS2( add ebx, 3*8)\
|
||||
ASJ( jmp, mul, b)\
|
||||
ASL(label2_##mul)
|
||||
|
||||
#define SSE2_key_schedule(Y,X) \
|
||||
AS2( movq mm3, [X+7*8])\
|
||||
AS2( pxor mm3, mm6)\
|
||||
AS2( movq mm4, [X+0*8])\
|
||||
AS2( psubq mm4, mm3)\
|
||||
AS2( movq [Y+0*8], mm4)\
|
||||
AS2( pxor mm4, [X+1*8])\
|
||||
AS2( movq mm3, mm4)\
|
||||
AS2( movq [Y+1*8], mm4)\
|
||||
AS2( paddq mm4, [X+2*8])\
|
||||
AS2( pxor mm3, mm7)\
|
||||
AS2( psllq mm3, 19)\
|
||||
AS2( movq [Y+2*8], mm4)\
|
||||
AS2( pxor mm3, mm4)\
|
||||
AS2( movq mm4, [X+3*8])\
|
||||
AS2( psubq mm4, mm3)\
|
||||
AS2( movq [Y+3*8], mm4)\
|
||||
AS2( pxor mm4, [X+4*8])\
|
||||
AS2( movq mm3, mm4)\
|
||||
AS2( movq [Y+4*8], mm4)\
|
||||
AS2( paddq mm4, [X+5*8])\
|
||||
AS2( pxor mm3, mm7)\
|
||||
AS2( psrlq mm3, 23)\
|
||||
AS2( movq [Y+5*8], mm4)\
|
||||
AS2( pxor mm3, mm4)\
|
||||
AS2( movq mm4, [X+6*8])\
|
||||
AS2( psubq mm4, mm3)\
|
||||
AS2( movq [Y+6*8], mm4)\
|
||||
AS2( pxor mm4, [X+7*8])\
|
||||
AS2( movq mm3, mm4)\
|
||||
AS2( movq [Y+7*8], mm4)\
|
||||
AS2( paddq mm4, [Y+0*8])\
|
||||
AS2( pxor mm3, mm7)\
|
||||
AS2( psllq mm3, 19)\
|
||||
AS2( movq [Y+0*8], mm4)\
|
||||
AS2( pxor mm3, mm4)\
|
||||
AS2( movq mm4, [Y+1*8])\
|
||||
AS2( psubq mm4, mm3)\
|
||||
AS2( movq [Y+1*8], mm4)\
|
||||
AS2( pxor mm4, [Y+2*8])\
|
||||
AS2( movq mm3, mm4)\
|
||||
AS2( movq [Y+2*8], mm4)\
|
||||
AS2( paddq mm4, [Y+3*8])\
|
||||
AS2( pxor mm3, mm7)\
|
||||
AS2( psrlq mm3, 23)\
|
||||
AS2( movq [Y+3*8], mm4)\
|
||||
AS2( pxor mm3, mm4)\
|
||||
AS2( movq mm4, [Y+4*8])\
|
||||
AS2( psubq mm4, mm3)\
|
||||
AS2( movq [Y+4*8], mm4)\
|
||||
AS2( pxor mm4, [Y+5*8])\
|
||||
AS2( movq [Y+5*8], mm4)\
|
||||
AS2( paddq mm4, [Y+6*8])\
|
||||
AS2( movq [Y+6*8], mm4)\
|
||||
AS2( pxor mm4, [edx+4*2048+2*8])\
|
||||
AS2( movq mm3, [Y+7*8])\
|
||||
AS2( psubq mm3, mm4)\
|
||||
AS2( movq [Y+7*8], mm3)
|
||||
|
||||
SSE2_pass(mm0, mm1, mm2, 5, esi)
|
||||
SSE2_key_schedule(esp+4, esi)
|
||||
SSE2_pass(mm2, mm0, mm1, 7, esp+4)
|
||||
SSE2_key_schedule(esp+4, esp+4)
|
||||
SSE2_pass(mm1, mm2, mm0, 9, esp+4)
|
||||
|
||||
AS2( pxor mm0, [eax+0*8])
|
||||
AS2( movq [eax+0*8], mm0)
|
||||
AS2( psubq mm1, mm5)
|
||||
AS2( movq [eax+1*8], mm1)
|
||||
AS2( paddq mm2, [eax+2*8])
|
||||
AS2( movq [eax+2*8], mm2)
|
||||
|
||||
AS1( pop esp)
|
||||
AS1( emms)
|
||||
#ifdef __GNUC__
|
||||
AS1( pop ebx)
|
||||
".att_syntax prefix;"
|
||||
:
|
||||
: "a" (digest), "S" (X), "d" (table)
|
||||
: "%ecx", "%edi", "memory", "cc"
|
||||
);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
word64 a = digest[0];
|
||||
word64 b = digest[1];
|
||||
word64 c = digest[2];
|
||||
word64 Y[8];
|
||||
|
||||
#define t1 (table)
|
||||
#define t2 (table+256)
|
||||
#define t3 (table+256*2)
|
||||
|
|
@ -42,15 +217,17 @@ void Tiger::TruncatedFinal(byte *hash, size_t size)
|
|||
b += t4[GETBYTE(c,1)] ^ t3[GETBYTE(c,3)] ^ t2[GETBYTE(c,5)] ^ t1[GETBYTE(c,7)]; \
|
||||
b *= mul
|
||||
|
||||
#define pass(a,b,c,mul,X) \
|
||||
round(a,b,c,X[0],mul); \
|
||||
round(b,c,a,X[1],mul); \
|
||||
round(c,a,b,X[2],mul); \
|
||||
round(a,b,c,X[3],mul); \
|
||||
round(b,c,a,X[4],mul); \
|
||||
round(c,a,b,X[5],mul); \
|
||||
round(a,b,c,X[6],mul); \
|
||||
round(b,c,a,X[7],mul)
|
||||
#define pass(a,b,c,mul,X) {\
|
||||
int i=0;\
|
||||
while (true)\
|
||||
{\
|
||||
round(a,b,c,X[i+0],mul); \
|
||||
round(b,c,a,X[i+1],mul); \
|
||||
if (i==6)\
|
||||
break;\
|
||||
round(c,a,b,X[i+2],mul); \
|
||||
i+=3;\
|
||||
}}
|
||||
|
||||
#define key_schedule(Y,X) \
|
||||
Y[0] = X[0] - (X[7]^W64LIT(0xA5A5A5A5A5A5A5A5)); \
|
||||
|
|
@ -70,13 +247,6 @@ void Tiger::TruncatedFinal(byte *hash, size_t size)
|
|||
Y[6] += Y[5]; \
|
||||
Y[7] -= Y[6] ^ W64LIT(0x0123456789ABCDEF)
|
||||
|
||||
void Tiger::Transform (word64 *digest, const word64 *X)
|
||||
{
|
||||
word64 a = digest[0];
|
||||
word64 b = digest[1];
|
||||
word64 c = digest[2];
|
||||
word64 Y[8];
|
||||
|
||||
pass(a,b,c,5,X);
|
||||
key_schedule(Y,X);
|
||||
pass(c,a,b,7,Y);
|
||||
|
|
@ -86,8 +256,7 @@ void Tiger::Transform (word64 *digest, const word64 *X)
|
|||
digest[0] = a ^ digest[0];
|
||||
digest[1] = b - digest[1];
|
||||
digest[2] = c + digest[2];
|
||||
|
||||
memset(Y, 0, sizeof(Y));
|
||||
}
|
||||
}
|
||||
|
||||
NAMESPACE_END
|
||||
|
|
|
|||
4
tiger.h
4
tiger.h
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
NAMESPACE_BEGIN(CryptoPP)
|
||||
|
||||
/// <a href="http://www.weidai.com/scan-mirror/md.html#Tiger">Tiger</a>
|
||||
/// <a href="http://www.cryptolounge.org/wiki/Tiger">Tiger</a>
|
||||
class Tiger : public IteratedHashWithStaticTransform<word64, LittleEndian, 64, 24, Tiger>
|
||||
{
|
||||
public:
|
||||
|
|
@ -19,7 +19,7 @@ public:
|
|||
static const char * StaticAlgorithmName() {return "Tiger";}
|
||||
|
||||
protected:
|
||||
static const word64 table[4*256];
|
||||
static const word64 table[4*256+3];
|
||||
};
|
||||
|
||||
NAMESPACE_END
|
||||
|
|
|
|||
328
whrlpool.cpp
328
whrlpool.cpp
|
|
@ -1,7 +1,7 @@
|
|||
// Whrlpool.cpp - modified by Kevin Springle from
|
||||
// whrlpool.cpp - originally modified by Kevin Springle from
|
||||
// Paulo Barreto and Vincent Rijmen's public domain code, whirlpool.c.
|
||||
// Updated to Whirlpool version 3.0, optimized and MMX version added by Wei Dai
|
||||
// Any modifications are placed in the public domain
|
||||
// Updated to Whirlpool version 3.0 by Wei Dai
|
||||
|
||||
// This is the original introductory comment:
|
||||
|
||||
|
|
@ -69,6 +69,7 @@
|
|||
|
||||
#include "whrlpool.h"
|
||||
#include "misc.h"
|
||||
#include "cpu.h"
|
||||
|
||||
NAMESPACE_BEGIN(CryptoPP)
|
||||
|
||||
|
|
@ -94,9 +95,9 @@ void Whirlpool::TruncatedFinal(byte *hash, size_t size)
|
|||
m_data[m_data.size()-2] = GetBitCountHi();
|
||||
m_data[m_data.size()-1] = GetBitCountLo();
|
||||
|
||||
Transform(m_digest, m_data);
|
||||
CorrectEndianess(m_digest, m_digest, DigestSize());
|
||||
memcpy(hash, m_digest, size);
|
||||
Transform(m_state, m_data);
|
||||
CorrectEndianess(m_state, m_state, DigestSize());
|
||||
memcpy(hash, m_state, size);
|
||||
|
||||
Restart(); // reinit for next use
|
||||
}
|
||||
|
|
@ -113,7 +114,7 @@ void Whirlpool::TruncatedFinal(byte *hash, size_t size)
|
|||
* employed).
|
||||
*/
|
||||
|
||||
static const word64 C0[256] = {
|
||||
CRYPTOPP_ALIGN_DATA(16) static const word64 Whirlpool_C[4*256+R] CRYPTOPP_SECTION_ALIGN16 = {
|
||||
W64LIT(0x18186018c07830d8), W64LIT(0x23238c2305af4626), W64LIT(0xc6c63fc67ef991b8), W64LIT(0xe8e887e8136fcdfb),
|
||||
W64LIT(0x878726874ca113cb), W64LIT(0xb8b8dab8a9626d11), W64LIT(0x0101040108050209), W64LIT(0x4f4f214f426e9e0d),
|
||||
W64LIT(0x3636d836adee6c9b), W64LIT(0xa6a6a2a6590451ff), W64LIT(0xd2d26fd2debdb90c), W64LIT(0xf5f5f3f5fb06f70e),
|
||||
|
|
@ -178,9 +179,7 @@ static const word64 C0[256] = {
|
|||
W64LIT(0x7070dd70a7ade0d7), W64LIT(0xb6b6e2b6d954716f), W64LIT(0xd0d067d0ceb7bd1e), W64LIT(0xeded93ed3b7ec7d6),
|
||||
W64LIT(0xcccc17cc2edb85e2), W64LIT(0x424215422a578468), W64LIT(0x98985a98b4c22d2c), W64LIT(0xa4a4aaa4490e55ed),
|
||||
W64LIT(0x2828a0285d885075), W64LIT(0x5c5c6d5cda31b886), W64LIT(0xf8f8c7f8933fed6b), W64LIT(0x8686228644a411c2),
|
||||
};
|
||||
|
||||
static const word64 C1[256] = {
|
||||
W64LIT(0xd818186018c07830), W64LIT(0x2623238c2305af46), W64LIT(0xb8c6c63fc67ef991), W64LIT(0xfbe8e887e8136fcd),
|
||||
W64LIT(0xcb878726874ca113), W64LIT(0x11b8b8dab8a9626d), W64LIT(0x0901010401080502), W64LIT(0x0d4f4f214f426e9e),
|
||||
W64LIT(0x9b3636d836adee6c), W64LIT(0xffa6a6a2a6590451), W64LIT(0x0cd2d26fd2debdb9), W64LIT(0x0ef5f5f3f5fb06f7),
|
||||
|
|
@ -245,9 +244,7 @@ static const word64 C1[256] = {
|
|||
W64LIT(0xd77070dd70a7ade0), W64LIT(0x6fb6b6e2b6d95471), W64LIT(0x1ed0d067d0ceb7bd), W64LIT(0xd6eded93ed3b7ec7),
|
||||
W64LIT(0xe2cccc17cc2edb85), W64LIT(0x68424215422a5784), W64LIT(0x2c98985a98b4c22d), W64LIT(0xeda4a4aaa4490e55),
|
||||
W64LIT(0x752828a0285d8850), W64LIT(0x865c5c6d5cda31b8), W64LIT(0x6bf8f8c7f8933fed), W64LIT(0xc28686228644a411),
|
||||
};
|
||||
|
||||
static const word64 C2[256] = {
|
||||
W64LIT(0x30d818186018c078), W64LIT(0x462623238c2305af), W64LIT(0x91b8c6c63fc67ef9), W64LIT(0xcdfbe8e887e8136f),
|
||||
W64LIT(0x13cb878726874ca1), W64LIT(0x6d11b8b8dab8a962), W64LIT(0x0209010104010805), W64LIT(0x9e0d4f4f214f426e),
|
||||
W64LIT(0x6c9b3636d836adee), W64LIT(0x51ffa6a6a2a65904), W64LIT(0xb90cd2d26fd2debd), W64LIT(0xf70ef5f5f3f5fb06),
|
||||
|
|
@ -312,9 +309,7 @@ static const word64 C2[256] = {
|
|||
W64LIT(0xe0d77070dd70a7ad), W64LIT(0x716fb6b6e2b6d954), W64LIT(0xbd1ed0d067d0ceb7), W64LIT(0xc7d6eded93ed3b7e),
|
||||
W64LIT(0x85e2cccc17cc2edb), W64LIT(0x8468424215422a57), W64LIT(0x2d2c98985a98b4c2), W64LIT(0x55eda4a4aaa4490e),
|
||||
W64LIT(0x50752828a0285d88), W64LIT(0xb8865c5c6d5cda31), W64LIT(0xed6bf8f8c7f8933f), W64LIT(0x11c28686228644a4),
|
||||
};
|
||||
|
||||
static const word64 C3[256] = {
|
||||
W64LIT(0x7830d818186018c0), W64LIT(0xaf462623238c2305), W64LIT(0xf991b8c6c63fc67e), W64LIT(0x6fcdfbe8e887e813),
|
||||
W64LIT(0xa113cb878726874c), W64LIT(0x626d11b8b8dab8a9), W64LIT(0x0502090101040108), W64LIT(0x6e9e0d4f4f214f42),
|
||||
W64LIT(0xee6c9b3636d836ad), W64LIT(0x0451ffa6a6a2a659), W64LIT(0xbdb90cd2d26fd2de), W64LIT(0x06f70ef5f5f3f5fb),
|
||||
|
|
@ -379,9 +374,7 @@ static const word64 C3[256] = {
|
|||
W64LIT(0xade0d77070dd70a7), W64LIT(0x54716fb6b6e2b6d9), W64LIT(0xb7bd1ed0d067d0ce), W64LIT(0x7ec7d6eded93ed3b),
|
||||
W64LIT(0xdb85e2cccc17cc2e), W64LIT(0x578468424215422a), W64LIT(0xc22d2c98985a98b4), W64LIT(0x0e55eda4a4aaa449),
|
||||
W64LIT(0x8850752828a0285d), W64LIT(0x31b8865c5c6d5cda), W64LIT(0x3fed6bf8f8c7f893), W64LIT(0xa411c28686228644),
|
||||
};
|
||||
|
||||
static const word64 rc[R] = {
|
||||
W64LIT(0x1823c6e887b8014f),
|
||||
W64LIT(0x36a6d2f5796f9152),
|
||||
W64LIT(0x60bc9b8ea30c7b35),
|
||||
|
|
@ -396,56 +389,293 @@ static const word64 rc[R] = {
|
|||
|
||||
// Whirlpool basic transformation. Transforms state based on block.
|
||||
void Whirlpool::Transform(word64 *digest, const word64 *block)
|
||||
{
|
||||
#ifdef CRYPTOPP_X86_ASM_AVAILABLE
|
||||
if (HasMMX())
|
||||
{
|
||||
// MMX version has the same structure as C version below
|
||||
#ifdef __GNUC__
|
||||
__asm__ __volatile__
|
||||
(
|
||||
".intel_syntax noprefix;"
|
||||
AS1( push ebx)
|
||||
AS2( mov ebx, eax)
|
||||
#else
|
||||
AS2( lea ebx, [Whirlpool_C])
|
||||
AS2( mov ecx, digest)
|
||||
AS2( mov edx, block)
|
||||
#endif
|
||||
AS2( mov eax, esp)
|
||||
AS2( and esp, 0xfffffff0)
|
||||
AS2( sub esp, 16*8)
|
||||
AS1( push eax)
|
||||
AS2( xor esi, esi)
|
||||
ASL(0)
|
||||
AS2( movq mm0, [ecx+8*esi])
|
||||
AS2( movq [esp+4+8*esi], mm0) // k
|
||||
AS2( pxor mm0, [edx+8*esi])
|
||||
AS2( movq [esp+4+64+8*esi], mm0) // s
|
||||
AS2( movq [ecx+8*esi], mm0)
|
||||
AS1( inc esi)
|
||||
AS2( cmp esi, 8)
|
||||
ASJ( jne, 0, b)
|
||||
|
||||
AS2( xor esi, esi)
|
||||
ASL(1)
|
||||
|
||||
#define KSL0(a, b) AS2(movq mm##a, b)
|
||||
#define KSL1(a, b) AS2(pxor mm##a, b)
|
||||
|
||||
#define KSL(op, i, a, b, c, d) \
|
||||
AS2(mov eax, [esp+4+8*i])\
|
||||
AS2(movzx edi, al)\
|
||||
KSL##op(a, [ebx+3*2048+8*edi])\
|
||||
AS2(movzx edi, ah)\
|
||||
KSL##op(b, [ebx+2*2048+8*edi])\
|
||||
AS2(shr eax, 16)\
|
||||
AS2(movzx edi, al)\
|
||||
AS2(shr eax, 8)\
|
||||
KSL##op(c, [ebx+1*2048+8*edi])\
|
||||
KSL##op(d, [ebx+0*2048+8*eax])
|
||||
|
||||
#define KSH0(a, b) \
|
||||
ASS(pshufw mm##a, mm##a, 1, 0, 3, 2)\
|
||||
AS2(pxor mm##a, b)
|
||||
#define KSH1(a, b) \
|
||||
AS2(pxor mm##a, b)
|
||||
#define KSH2(a, b) \
|
||||
AS2(pxor mm##a, b)\
|
||||
AS2(movq [esp+4+8*a], mm##a)
|
||||
|
||||
#define KSH(op, i, a, b, c, d) \
|
||||
AS2(mov eax, [esp+4+8*((i+4)-8*((i+4)/8))+4])\
|
||||
AS2(movzx edi, al)\
|
||||
KSH##op(a, [ebx+3*2048+8*edi])\
|
||||
AS2(movzx edi, ah)\
|
||||
KSH##op(b, [ebx+2*2048+8*edi])\
|
||||
AS2(shr eax, 16)\
|
||||
AS2(movzx edi, al)\
|
||||
AS2(shr eax, 8)\
|
||||
KSH##op(c, [ebx+1*2048+8*edi])\
|
||||
KSH##op(d, [ebx+0*2048+8*eax])
|
||||
|
||||
#define TSL(op, i, a, b, c, d) \
|
||||
AS2(mov eax, [esp+4+64+8*i])\
|
||||
AS2(movzx edi, al)\
|
||||
KSL##op(a, [ebx+3*2048+8*edi])\
|
||||
AS2(movzx edi, ah)\
|
||||
KSL##op(b, [ebx+2*2048+8*edi])\
|
||||
AS2(shr eax, 16)\
|
||||
AS2(movzx edi, al)\
|
||||
AS2(shr eax, 8)\
|
||||
KSL##op(c, [ebx+1*2048+8*edi])\
|
||||
KSL##op(d, [ebx+0*2048+8*eax])
|
||||
|
||||
#define TSH0(a, b) \
|
||||
ASS(pshufw mm##a, mm##a, 1, 0, 3, 2)\
|
||||
AS2(pxor mm##a, [esp+4+8*a])\
|
||||
AS2(pxor mm##a, b)
|
||||
#define TSH1(a, b) \
|
||||
AS2(pxor mm##a, b)
|
||||
#define TSH2(a, b) \
|
||||
AS2(pxor mm##a, b)\
|
||||
AS2(movq [esp+4+64+8*a], mm##a)
|
||||
#define TSH3(a, b) \
|
||||
AS2(pxor mm##a, b)\
|
||||
AS2(pxor mm##a, [ecx+8*a])\
|
||||
AS2(movq [ecx+8*a], mm##a)
|
||||
|
||||
#define TSH(op, i, a, b, c, d) \
|
||||
AS2(mov eax, [esp+4+64+8*((i+4)-8*((i+4)/8))+4])\
|
||||
AS2(movzx edi, al)\
|
||||
TSH##op(a, [ebx+3*2048+8*edi])\
|
||||
AS2(movzx edi, ah)\
|
||||
TSH##op(b, [ebx+2*2048+8*edi])\
|
||||
AS2(shr eax, 16)\
|
||||
AS2(movzx edi, al)\
|
||||
AS2(shr eax, 8)\
|
||||
TSH##op(c, [ebx+1*2048+8*edi])\
|
||||
TSH##op(d, [ebx+0*2048+8*eax])
|
||||
|
||||
KSL(0, 4, 3, 2, 1, 0)
|
||||
KSL(0, 0, 7, 6, 5, 4)
|
||||
KSL(1, 1, 0, 7, 6, 5)
|
||||
KSL(1, 2, 1, 0, 7, 6)
|
||||
KSL(1, 3, 2, 1, 0, 7)
|
||||
KSL(1, 5, 4, 3, 2, 1)
|
||||
KSL(1, 6, 5, 4, 3, 2)
|
||||
KSL(1, 7, 6, 5, 4, 3)
|
||||
KSH(0, 0, 7, 6, 5, 4)
|
||||
KSH(0, 4, 3, 2, 1, 0)
|
||||
KSH(1, 1, 0, 7, 6, 5)
|
||||
KSH(1, 2, 1, 0, 7, 6)
|
||||
KSH(1, 5, 4, 3, 2, 1)
|
||||
KSH(1, 6, 5, 4, 3, 2)
|
||||
KSH(2, 3, 2, 1, 0, 7)
|
||||
KSH(2, 7, 6, 5, 4, 3)
|
||||
|
||||
AS2( pxor mm0, [ebx + 8*1024 + esi*8])
|
||||
AS2( movq [esp+4], mm0)
|
||||
|
||||
TSL(0, 4, 3, 2, 1, 0)
|
||||
TSL(0, 0, 7, 6, 5, 4)
|
||||
TSL(1, 1, 0, 7, 6, 5)
|
||||
TSL(1, 2, 1, 0, 7, 6)
|
||||
TSL(1, 3, 2, 1, 0, 7)
|
||||
TSL(1, 5, 4, 3, 2, 1)
|
||||
TSL(1, 6, 5, 4, 3, 2)
|
||||
TSL(1, 7, 6, 5, 4, 3)
|
||||
TSH(0, 0, 7, 6, 5, 4)
|
||||
TSH(0, 4, 3, 2, 1, 0)
|
||||
TSH(1, 1, 0, 7, 6, 5)
|
||||
TSH(1, 2, 1, 0, 7, 6)
|
||||
TSH(1, 5, 4, 3, 2, 1)
|
||||
TSH(1, 6, 5, 4, 3, 2)
|
||||
|
||||
AS1( inc esi)
|
||||
AS2( cmp esi, 10)
|
||||
ASJ( je, 2, f)
|
||||
|
||||
TSH(2, 3, 2, 1, 0, 7)
|
||||
TSH(2, 7, 6, 5, 4, 3)
|
||||
|
||||
ASJ( jmp, 1, b)
|
||||
ASL(2)
|
||||
|
||||
TSH(3, 3, 2, 1, 0, 7)
|
||||
TSH(3, 7, 6, 5, 4, 3)
|
||||
|
||||
#undef KSL
|
||||
#undef KSH
|
||||
#undef TSL
|
||||
#undef TSH
|
||||
|
||||
AS1( emms)
|
||||
AS1( pop esp)
|
||||
|
||||
#ifdef __GNUC__
|
||||
AS1( pop ebx)
|
||||
".att_syntax prefix;"
|
||||
:
|
||||
: "a" (Whirlpool_C), "c" (digest), "d" (block)
|
||||
: "%esi", "%edi", "memory", "cc"
|
||||
);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
#endif // #ifdef CRYPTOPP_X86_ASM_AVAILABLE
|
||||
{
|
||||
word64 s[8]; // the cipher state
|
||||
word64 k[8]; // the round key
|
||||
|
||||
// Compute and apply K^0 to the cipher state
|
||||
// Also apply part of the Miyaguchi-Preneel compression function
|
||||
digest[0] = s[0] = block[0] ^ (k[0] = digest[0]);
|
||||
digest[1] = s[1] = block[1] ^ (k[1] = digest[1]);
|
||||
digest[2] = s[2] = block[2] ^ (k[2] = digest[2]);
|
||||
digest[3] = s[3] = block[3] ^ (k[3] = digest[3]);
|
||||
digest[4] = s[4] = block[4] ^ (k[4] = digest[4]);
|
||||
digest[5] = s[5] = block[5] ^ (k[5] = digest[5]);
|
||||
digest[6] = s[6] = block[6] ^ (k[6] = digest[6]);
|
||||
digest[7] = s[7] = block[7] ^ (k[7] = digest[7]);
|
||||
for (int i=0; i<8; i++)
|
||||
digest[i] = s[i] = block[i] ^ (k[i] = digest[i]);
|
||||
|
||||
#define KSL(op, i, a, b, c, d) \
|
||||
t = (word32)k[i];\
|
||||
w##a = Whirlpool_C[3*256 + (byte)t] ^ (op ? w##a : 0);\
|
||||
t >>= 8;\
|
||||
w##b = Whirlpool_C[2*256 + (byte)t] ^ (op ? w##b : 0);\
|
||||
t >>= 8;\
|
||||
w##c = Whirlpool_C[1*256 + (byte)t] ^ (op ? w##c : 0);\
|
||||
t >>= 8;\
|
||||
w##d = Whirlpool_C[0*256 + t] ^ (op ? w##d : 0);
|
||||
|
||||
#define KSH(op, i, a, b, c, d) \
|
||||
t = (word32)(k[(i+4)%8]>>32);\
|
||||
w##a = Whirlpool_C[3*256 + (byte)t] ^ (op ? w##a : rotrFixed(w##a, 32));\
|
||||
if (op==2) k[a] = w##a;\
|
||||
t >>= 8;\
|
||||
w##b = Whirlpool_C[2*256 + (byte)t] ^ (op ? w##b : rotrFixed(w##b, 32));\
|
||||
if (op==2) k[b] = w##b;\
|
||||
t >>= 8;\
|
||||
w##c = Whirlpool_C[1*256 + (byte)t] ^ (op ? w##c : rotrFixed(w##c, 32));\
|
||||
if (op==2) k[c] = w##c;\
|
||||
t >>= 8;\
|
||||
w##d = Whirlpool_C[0*256 + t] ^ (op ? w##d : rotrFixed(w##d, 32));\
|
||||
if (op==2) k[d] = w##d;\
|
||||
|
||||
#define TSL(op, i, a, b, c, d) \
|
||||
t = (word32)s[i];\
|
||||
w##a = Whirlpool_C[3*256 + (byte)t] ^ (op ? w##a : 0);\
|
||||
t >>= 8;\
|
||||
w##b = Whirlpool_C[2*256 + (byte)t] ^ (op ? w##b : 0);\
|
||||
t >>= 8;\
|
||||
w##c = Whirlpool_C[1*256 + (byte)t] ^ (op ? w##c : 0);\
|
||||
t >>= 8;\
|
||||
w##d = Whirlpool_C[0*256 + t] ^ (op ? w##d : 0);
|
||||
|
||||
#define TSH_OP(op, a, b) \
|
||||
w##a = Whirlpool_C[b*256 + (byte)t] ^ (op ? w##a : rotrFixed(w##a, 32) ^ k[a]);\
|
||||
if (op==2) s[a] = w##a;\
|
||||
if (op==3) digest[a] ^= w##a;\
|
||||
|
||||
#define TSH(op, i, a, b, c, d) \
|
||||
t = (word32)(s[(i+4)%8]>>32);\
|
||||
TSH_OP(op, a, 3);\
|
||||
t >>= 8;\
|
||||
TSH_OP(op, b, 2);\
|
||||
t >>= 8;\
|
||||
TSH_OP(op, c, 1);\
|
||||
t >>= 8;\
|
||||
TSH_OP(op, d, 0);\
|
||||
|
||||
// Iterate over all rounds:
|
||||
for (int r = 0; r < R; r++)
|
||||
int r=0;
|
||||
while (true)
|
||||
{
|
||||
word64 w0, w1, w2, w3, w4, w5, w6, w7; // temporary storage
|
||||
word64 t;
|
||||
word32 t;
|
||||
|
||||
// Compute K^r from K^{r-1}:
|
||||
#define K(i,j) GETBYTE(k[(i+j+1)%8], j)
|
||||
#define KS(i) \
|
||||
t = C0[K(i,3)] ^ C1[K(i,2)] ^ C2[K(i,1)] ^ C3[K(i,0)]; \
|
||||
w##i = rotrFixed(t, 32) ^ C0[K(i,7)] ^ C1[K(i,6)] ^ C2[K(i,5)] ^ C3[K(i,4)];
|
||||
KSL(0, 4, 3, 2, 1, 0)
|
||||
KSL(0, 0, 7, 6, 5, 4)
|
||||
KSL(1, 1, 0, 7, 6, 5)
|
||||
KSL(1, 2, 1, 0, 7, 6)
|
||||
KSL(1, 3, 2, 1, 0, 7)
|
||||
KSL(1, 5, 4, 3, 2, 1)
|
||||
KSL(1, 6, 5, 4, 3, 2)
|
||||
KSL(1, 7, 6, 5, 4, 3)
|
||||
KSH(0, 0, 7, 6, 5, 4)
|
||||
KSH(0, 4, 3, 2, 1, 0)
|
||||
KSH(1, 1, 0, 7, 6, 5)
|
||||
KSH(1, 2, 1, 0, 7, 6)
|
||||
KSH(1, 5, 4, 3, 2, 1)
|
||||
KSH(1, 6, 5, 4, 3, 2)
|
||||
KSH(2, 3, 2, 1, 0, 7)
|
||||
KSH(2, 7, 6, 5, 4, 3)
|
||||
|
||||
KS(0); KS(1); KS(2); KS(3); KS(4); KS(5); KS(6); KS(7);
|
||||
k[0] = w0 ^ rc[r];
|
||||
k[1] = w1; k[2] = w2; k[3] = w3; k[4] = w4; k[5] = w5; k[6] = w6; k[7] = w7;
|
||||
k[0] ^= Whirlpool_C[1024+r];
|
||||
|
||||
// Apply the r-th round transformation:
|
||||
#define S(i,j) GETBYTE(s[(i+j+1)%8], j)
|
||||
#define TS(i) \
|
||||
t = C0[S(i,3)] ^ C1[S(i,2)] ^ C2[S(i,1)] ^ C3[S(i,0)]; \
|
||||
w##i = rotrFixed(t, 32) ^ C0[S(i,7)] ^ C1[S(i,6)] ^ C2[S(i,5)] ^ C3[S(i,4)] ^ k[i];
|
||||
TSL(0, 4, 3, 2, 1, 0)
|
||||
TSL(0, 0, 7, 6, 5, 4)
|
||||
TSL(1, 1, 0, 7, 6, 5)
|
||||
TSL(1, 2, 1, 0, 7, 6)
|
||||
TSL(1, 3, 2, 1, 0, 7)
|
||||
TSL(1, 5, 4, 3, 2, 1)
|
||||
TSL(1, 6, 5, 4, 3, 2)
|
||||
TSL(1, 7, 6, 5, 4, 3)
|
||||
TSH(0, 0, 7, 6, 5, 4)
|
||||
TSH(0, 4, 3, 2, 1, 0)
|
||||
TSH(1, 1, 0, 7, 6, 5)
|
||||
TSH(1, 2, 1, 0, 7, 6)
|
||||
TSH(1, 5, 4, 3, 2, 1)
|
||||
TSH(1, 6, 5, 4, 3, 2)
|
||||
|
||||
TS(0); TS(1); TS(2); TS(3); TS(4); TS(5); TS(6); TS(7);
|
||||
s[0] = w0; s[1] = w1; s[2] = w2; s[3] = w3; s[4] = w4; s[5] = w5; s[6] = w6; s[7] = w7;
|
||||
if (++r < R)
|
||||
{
|
||||
TSH(2, 3, 2, 1, 0, 7)
|
||||
TSH(2, 7, 6, 5, 4, 3)
|
||||
}
|
||||
else
|
||||
{
|
||||
TSH(3, 3, 2, 1, 0, 7)
|
||||
TSH(3, 7, 6, 5, 4, 3)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Apply the rest of the Miyaguchi-Preneel compression function:
|
||||
digest[0] ^= s[0];
|
||||
digest[1] ^= s[1];
|
||||
digest[2] ^= s[2];
|
||||
digest[3] ^= s[3];
|
||||
digest[4] ^= s[4];
|
||||
digest[5] ^= s[5];
|
||||
digest[6] ^= s[6];
|
||||
digest[7] ^= s[7];
|
||||
}
|
||||
|
||||
NAMESPACE_END
|
||||
|
|
|
|||
|
|
@ -9,8 +9,7 @@
|
|||
|
||||
NAMESPACE_BEGIN(CryptoPP)
|
||||
|
||||
//! <a href="http://www.weidai.com/scan-mirror/md.html#Whirlpool">Whirlpool</a>
|
||||
/*! 512 Bit Hash */
|
||||
//! <a href="http://www.cryptolounge.org/wiki/Whirlpool">Whirlpool</a>
|
||||
class Whirlpool : public IteratedHashWithStaticTransform<word64, BigEndian, 64, 64, Whirlpool>
|
||||
{
|
||||
public:
|
||||
|
|
|
|||
Loading…
Reference in New Issue