diff --git a/config.h b/config.h index 67664b2f..2d4f970f 100644 --- a/config.h +++ b/config.h @@ -346,13 +346,10 @@ NAMESPACE_END // CRYPTOPP_ALIGN_DATA may not be reliable on AIX. #ifndef CRYPTOPP_ALIGN_DATA #if defined(_MSC_VER) - #define CRYPTOPP_ALIGN_ATTRIBUTE 1 #define CRYPTOPP_ALIGN_DATA(x) __declspec(align(x)) #elif defined(__GNUC__) || (__SUNPRO_CC >= 0x5100) - #define CRYPTOPP_ALIGN_ATTRIBUTE 1 #define CRYPTOPP_ALIGN_DATA(x) __attribute__((aligned(x))) #elif defined(__xlc__) || defined(__xlC__) - #define CRYPTOPP_ALIGN_ATTRIBUTE 1 #define CRYPTOPP_ALIGN_DATA(x) __attribute__((aligned(x))) #else #define CRYPTOPP_ALIGN_DATA(x) diff --git a/misc.h b/misc.h index 36101e7d..12d300e7 100644 --- a/misc.h +++ b/misc.h @@ -2072,8 +2072,8 @@ void ByteReverse(T *out, const T *in, size_t byteCount) { // Alignment check due to Issues 690 CRYPTOPP_ASSERT(byteCount % sizeof(T) == 0); - //CRYPTOPP_ASSERT(IsAligned(in)); - //CRYPTOPP_ASSERT(IsAligned(out)); + CRYPTOPP_ASSERT(IsAligned(in)); + CRYPTOPP_ASSERT(IsAligned(out)); size_t count = byteCount/sizeof(T); for (size_t i=0; i= 8) ? 1 : (Q >= 4) ? 2 : (Q >= 2) ? 4 : 8 }; + CRYPTOPP_ALIGN_DATA(8) T m_array[S+PAD]; + #else + T* GetAlignedArray() {return m_array;} T m_array[S]; #endif @@ -576,10 +601,8 @@ public: { if (ptr == GetAlignedArray()) { - // If the m_allocated assert fires then the bit twiddling for - // GetAlignedArray() is probably incorrect for the platform. - // Be sure to check CRYPTOPP_ALIGN_DATA(8). The platform may - // not have a way to declaritively align data to 8. + // If the m_allocated assert fires then + // something overwrote the flag. CRYPTOPP_ASSERT(size <= S); CRYPTOPP_ASSERT(m_allocated); m_allocated = false; diff --git a/sha.cpp b/sha.cpp index 73f1797f..b3ec31a7 100644 --- a/sha.cpp +++ b/sha.cpp @@ -1119,8 +1119,7 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL SHA512_HashBlock_SSE2(word64 *state, const #define SSE2_CombineState(i) \ AS2( movdqu xmm0, [edi+i*16])\ - AS2( movdqu xmm1, [ecx+i*16])\ - AS2( paddq xmm0, xmm1)\ + AS2( paddq xmm0, [ecx+i*16])\ AS2( movdqu [ecx+i*16], xmm0) SSE2_CombineState(0) @@ -1148,7 +1147,7 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL SHA512_HashBlock_SSE2(word64 *state, const // ANONYMOUS_NAMESPACE_END -#endif // CRYPTOPP_SSE2_ASM_AVAILABLE +#endif // CRYPTOPP_SSE2_ASM_AVAILABLE ANONYMOUS_NAMESPACE_BEGIN diff --git a/sosemanuk.cpp b/sosemanuk.cpp index ba049c3b..82cf8c92 100644 --- a/sosemanuk.cpp +++ b/sosemanuk.cpp @@ -412,10 +412,10 @@ void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *outpu AS2( lea WORD_REG(cx), [4*WORD_REG(cx)+WORD_REG(cx)]) AS2( lea WORD_REG(si), [4*WORD_REG(cx)]) AS2( mov SSE2_wordsLeft, WORD_REG(si)) - AS2( movdqu xmm0, [WORD_REG(ax)+0*16]) // copy state to stack to save a register - AS2( movdqu [SSE2_stateCopy+0*16], xmm0) - AS2( movdqu xmm0, [WORD_REG(ax)+1*16]) - AS2( movdqu [SSE2_stateCopy+1*16], xmm0) + AS2( movdqa xmm0, [WORD_REG(ax)+0*16]) // copy state to stack to save a register + AS2( movdqa [SSE2_stateCopy+0*16], xmm0) + AS2( movdqa xmm0, [WORD_REG(ax)+1*16]) + AS2( movdqa [SSE2_stateCopy+1*16], xmm0) AS2( movq xmm0, QWORD PTR [WORD_REG(ax)+2*16]) AS2( movq QWORD PTR [SSE2_stateCopy+2*16], xmm0) AS2( psrlq xmm0, 32) @@ -507,10 +507,10 @@ void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *outpu AS2( mov WORD_REG(si), SSE2_wordsLeft2) ASL(1) // second inner loop, 16 words each, 5 iterations - AS2( movdqu xmm0, [WORD_REG(di)+0*20*4]) - AS2( movdqu xmm2, [WORD_REG(di)+2*20*4]) - AS2( movdqu xmm3, [WORD_REG(di)+3*20*4]) - AS2( movdqu xmm1, [WORD_REG(di)+1*20*4]) + AS2( movdqa xmm0, [WORD_REG(di)+0*20*4]) + AS2( movdqa xmm2, [WORD_REG(di)+2*20*4]) + AS2( movdqa xmm3, [WORD_REG(di)+3*20*4]) + AS2( movdqa xmm1, [WORD_REG(di)+1*20*4]) // S2 AS2( movdqa xmm4, xmm0) AS2( pand xmm0, xmm2) @@ -596,10 +596,10 @@ void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *outpu ASL(6) // save state AS2( mov AS_REG_6, SSE2_state) - AS2( movdqu xmm0, [SSE2_stateCopy+0*16]) - AS2( movdqu [AS_REG_6+0*16], xmm0) - AS2( movdqu xmm0, [SSE2_stateCopy+1*16]) - AS2( movdqu [AS_REG_6+1*16], xmm0) + AS2( movdqa xmm0, [SSE2_stateCopy+0*16]) + AS2( movdqa [AS_REG_6+0*16], xmm0) + AS2( movdqa xmm0, [SSE2_stateCopy+1*16]) + AS2( movdqa [AS_REG_6+1*16], xmm0) AS2( movq xmm0, QWORD PTR [SSE2_stateCopy+2*16]) AS2( movq QWORD PTR [AS_REG_6+2*16], xmm0) AS2( mov [AS_REG_6+10*4], ecx) diff --git a/tiger.cpp b/tiger.cpp index 03d2ddb0..9b35a8c7 100644 --- a/tiger.cpp +++ b/tiger.cpp @@ -51,7 +51,7 @@ void Tiger::TruncatedFinal(byte *hash, size_t size) Restart(); // reinit for next use } -void Tiger::Transform (word64 *digest, const word64 *X) +void Tiger::Transform (word64 *state, const word64 *data) { #if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86 if (HasSSE2()) @@ -63,8 +63,8 @@ void Tiger::Transform (word64 *digest, const word64 *X) AS_PUSH_IF86(bx) #else AS2( lea edx, [table]) - AS2( mov eax, digest) - AS2( mov esi, X) + AS2( mov eax, state) + AS2( mov esi, data) #endif AS2( movq mm0, [eax]) AS2( movq mm1, [eax+1*8]) @@ -213,7 +213,7 @@ void Tiger::Transform (word64 *digest, const word64 *X) AS_POP_IF86(bx) ATT_PREFIX : - : "a" (digest), "S" (X), "d" (table) + : "a" (state), "S" (data), "d" (table) : "%ecx", "%edi", "memory", "cc" ); #endif @@ -221,9 +221,9 @@ void Tiger::Transform (word64 *digest, const word64 *X) else #endif { - word64 a = digest[0]; - word64 b = digest[1]; - word64 c = digest[2]; + word64 a = state[0]; + word64 b = state[1]; + word64 c = state[2]; word64 Y[8]; #define t1 (table) @@ -267,15 +267,15 @@ void Tiger::Transform (word64 *digest, const word64 *X) Y[6] += Y[5]; \ Y[7] -= Y[6] ^ W64LIT(0x0123456789ABCDEF) - pass(a,b,c,5,X); - key_schedule(Y,X); + pass(a,b,c,5,data); + key_schedule(Y,data); pass(c,a,b,7,Y); key_schedule(Y,Y); pass(b,c,a,9,Y); - digest[0] = a ^ digest[0]; - digest[1] = b - digest[1]; - digest[2] = c + digest[2]; + state[0] = a ^ state[0]; + state[1] = b - state[1]; + state[2] = c + state[2]; } }