Avoid extra ByteReverse when using Intel SHA extensions
This gains about 0.6 cpb. SHA-1 is down to 1.7 to 1.9 cpb. SHA-256 is not affectedpull/242/merge
parent
bd7aa155a6
commit
de1270656c
|
|
@ -83,9 +83,6 @@ template <class T, class BASE> byte * IteratedHashBase<T, BASE>::CreateUpdateSpa
|
|||
|
||||
template <class T, class BASE> size_t IteratedHashBase<T, BASE>::HashMultipleBlocks(const T *input, size_t length)
|
||||
{
|
||||
// Hardware based SHA1 and SHA256 correct blocks themselves due to hardware requirements.
|
||||
// For Intel, SHA1 will effectively call ByteReverse(). SHA256 formats data to Intel
|
||||
// requirements, which means eight words ABCD EFGH are transformed to ABEF CDGH.
|
||||
unsigned int blockSize = this->BlockSize();
|
||||
bool noReverse = NativeByteOrderIs(this->GetByteOrder());
|
||||
T* dataBuf = this->DataBuf();
|
||||
|
|
|
|||
8
seal.cpp
8
seal.cpp
|
|
@ -4,6 +4,7 @@
|
|||
#include "pch.h"
|
||||
|
||||
#include "seal.h"
|
||||
#include "cpu.h"
|
||||
#include "sha.h"
|
||||
#include "misc.h"
|
||||
#include "secblock.h"
|
||||
|
|
@ -37,11 +38,16 @@ word32 SEAL_Gamma::Apply(word32 i)
|
|||
word32 shaIndex = i/5;
|
||||
if (shaIndex != lastIndex)
|
||||
{
|
||||
memcpy(Z, H, 20);
|
||||
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
|
||||
D[0] = ConditionalByteReverse(HasSHA() ? BIG_ENDIAN_ORDER : LITTLE_ENDIAN_ORDER, shaIndex);
|
||||
#else
|
||||
D[0] = shaIndex;
|
||||
#endif
|
||||
memcpy(Z, H, 20);
|
||||
SHA1::Transform(Z, D);
|
||||
lastIndex = shaIndex;
|
||||
}
|
||||
|
||||
return Z[i%5];
|
||||
}
|
||||
|
||||
|
|
|
|||
28
sha.cpp
28
sha.cpp
|
|
@ -112,7 +112,7 @@ static void SHA1_SSE_SHA_Transform(word32 *state, const word32 *data)
|
|||
ABCD = _mm_loadu_si128((__m128i*) state);
|
||||
E0 = _mm_set_epi32(state[4], 0, 0, 0);
|
||||
ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
|
||||
MASK = _mm_set_epi8(3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12);
|
||||
MASK = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15);
|
||||
|
||||
// Save current hash
|
||||
ABCD_SAVE = ABCD;
|
||||
|
|
@ -498,6 +498,30 @@ void SHA1::Transform(word32 *state, const word32 *data)
|
|||
s_pfn(state, data);
|
||||
}
|
||||
|
||||
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
|
||||
size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length)
|
||||
{
|
||||
static const bool noReverse = HasSHA() || NativeByteOrderIs(this->GetByteOrder());
|
||||
const unsigned int blockSize = this->BlockSize();
|
||||
word32* dataBuf = this->DataBuf();
|
||||
do
|
||||
{
|
||||
if (noReverse)
|
||||
this->HashEndianCorrectedBlock(input);
|
||||
else
|
||||
{
|
||||
ByteReverse(dataBuf, input, this->BlockSize());
|
||||
this->HashEndianCorrectedBlock(dataBuf);
|
||||
}
|
||||
|
||||
input += blockSize/sizeof(word32);
|
||||
length -= blockSize;
|
||||
}
|
||||
while (length >= blockSize);
|
||||
return length;
|
||||
}
|
||||
#endif
|
||||
|
||||
// *************************************************************
|
||||
|
||||
void SHA224::InitState(HashWordType *state)
|
||||
|
|
@ -1641,7 +1665,7 @@ CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state
|
|||
|
||||
// first 16 rounds
|
||||
ASL(0)
|
||||
AS2( movq mm0, [edx+eax*8])
|
||||
AS2( movq mm0, [edx+eax*8])
|
||||
AS2( movq [esi+eax*8], mm0)
|
||||
AS2( movq [esi+eax*8+16*8], mm0)
|
||||
AS2( paddq mm0, [ebx+eax*8])
|
||||
|
|
|
|||
3
sha.h
3
sha.h
|
|
@ -25,6 +25,9 @@ NAMESPACE_BEGIN(CryptoPP)
|
|||
class CRYPTOPP_DLL SHA1 : public IteratedHashWithStaticTransform<word32, BigEndian, 64, 20, SHA1>
|
||||
{
|
||||
public:
|
||||
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
|
||||
size_t HashMultipleBlocks(const word32 *input, size_t length);
|
||||
#endif
|
||||
static void CRYPTOPP_API InitState(HashWordType *state);
|
||||
static void CRYPTOPP_API Transform(word32 *digest, const word32 *data);
|
||||
CRYPTOPP_STATIC_CONSTEXPR const char* CRYPTOPP_API StaticAlgorithmName() {return "SHA-1";}
|
||||
|
|
|
|||
Loading…
Reference in New Issue