From de1270656c275074dfc6c013484c157e9d62277b Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Fri, 26 May 2017 01:51:44 -0400 Subject: [PATCH] Avoid extra ByteReverse when using Intel SHA extensions This gains about 0.6 cpb. SHA-1 is down to 1.7 to 1.9 cpb. SHA-256 is not affected --- iterhash.cpp | 3 --- seal.cpp | 8 +++++++- sha.cpp | 28 ++++++++++++++++++++++++++-- sha.h | 3 +++ 4 files changed, 36 insertions(+), 6 deletions(-) diff --git a/iterhash.cpp b/iterhash.cpp index bdc99ec5..eedd5172 100644 --- a/iterhash.cpp +++ b/iterhash.cpp @@ -83,9 +83,6 @@ template byte * IteratedHashBase::CreateUpdateSpa template size_t IteratedHashBase::HashMultipleBlocks(const T *input, size_t length) { - // Hardware based SHA1 and SHA256 correct blocks themselves due to hardware requirements. - // For Intel, SHA1 will effectively call ByteReverse(). SHA256 formats data to Intel - // requirements, which means eight words ABCD EFGH are transformed to ABEF CDGH. unsigned int blockSize = this->BlockSize(); bool noReverse = NativeByteOrderIs(this->GetByteOrder()); T* dataBuf = this->DataBuf(); diff --git a/seal.cpp b/seal.cpp index acc4e4e5..4108feb5 100644 --- a/seal.cpp +++ b/seal.cpp @@ -4,6 +4,7 @@ #include "pch.h" #include "seal.h" +#include "cpu.h" #include "sha.h" #include "misc.h" #include "secblock.h" @@ -37,11 +38,16 @@ word32 SEAL_Gamma::Apply(word32 i) word32 shaIndex = i/5; if (shaIndex != lastIndex) { - memcpy(Z, H, 20); +#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE + D[0] = ConditionalByteReverse(HasSHA() ? BIG_ENDIAN_ORDER : LITTLE_ENDIAN_ORDER, shaIndex); +#else D[0] = shaIndex; +#endif + memcpy(Z, H, 20); SHA1::Transform(Z, D); lastIndex = shaIndex; } + return Z[i%5]; } diff --git a/sha.cpp b/sha.cpp index f64ff33d..9c691eb6 100644 --- a/sha.cpp +++ b/sha.cpp @@ -112,7 +112,7 @@ static void SHA1_SSE_SHA_Transform(word32 *state, const word32 *data) ABCD = _mm_loadu_si128((__m128i*) state); E0 = _mm_set_epi32(state[4], 0, 0, 0); ABCD = _mm_shuffle_epi32(ABCD, 0x1B); - MASK = _mm_set_epi8(3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12); + MASK = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15); // Save current hash ABCD_SAVE = ABCD; @@ -498,6 +498,30 @@ void SHA1::Transform(word32 *state, const word32 *data) s_pfn(state, data); } +#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE +size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length) +{ + static const bool noReverse = HasSHA() || NativeByteOrderIs(this->GetByteOrder()); + const unsigned int blockSize = this->BlockSize(); + word32* dataBuf = this->DataBuf(); + do + { + if (noReverse) + this->HashEndianCorrectedBlock(input); + else + { + ByteReverse(dataBuf, input, this->BlockSize()); + this->HashEndianCorrectedBlock(dataBuf); + } + + input += blockSize/sizeof(word32); + length -= blockSize; + } + while (length >= blockSize); + return length; +} +#endif + // ************************************************************* void SHA224::InitState(HashWordType *state) @@ -1641,7 +1665,7 @@ CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state // first 16 rounds ASL(0) - AS2( movq mm0, [edx+eax*8]) + AS2( movq mm0, [edx+eax*8]) AS2( movq [esi+eax*8], mm0) AS2( movq [esi+eax*8+16*8], mm0) AS2( paddq mm0, [ebx+eax*8]) diff --git a/sha.h b/sha.h index 806317d4..7660909e 100644 --- a/sha.h +++ b/sha.h @@ -25,6 +25,9 @@ NAMESPACE_BEGIN(CryptoPP) class CRYPTOPP_DLL SHA1 : public IteratedHashWithStaticTransform { public: +#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE + size_t HashMultipleBlocks(const word32 *input, size_t length); +#endif static void CRYPTOPP_API InitState(HashWordType *state); static void CRYPTOPP_API Transform(word32 *digest, const word32 *data); CRYPTOPP_STATIC_CONSTEXPR const char* CRYPTOPP_API StaticAlgorithmName() {return "SHA-1";}