Fix sources after sync with upstream
parent
c44f32d683
commit
a25e63fcb7
4
gcm.cpp
4
gcm.cpp
|
|
@ -134,7 +134,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
|
||||||
// Avoid "parameter not used" error and suppress Coverity finding
|
// Avoid "parameter not used" error and suppress Coverity finding
|
||||||
(void)params.GetIntValue(Name::TableSize(), tableSize);
|
(void)params.GetIntValue(Name::TableSize(), tableSize);
|
||||||
tableSize = s_cltableSizeInBlocks * blockSize;
|
tableSize = s_cltableSizeInBlocks * blockSize;
|
||||||
CRYPTOPP_ASSERT(tableSize > blockSize);
|
CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#elif CRYPTOPP_ARM_PMULL_AVAILABLE
|
#elif CRYPTOPP_ARM_PMULL_AVAILABLE
|
||||||
|
|
@ -143,7 +143,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
|
||||||
// Avoid "parameter not used" error and suppress Coverity finding
|
// Avoid "parameter not used" error and suppress Coverity finding
|
||||||
(void)params.GetIntValue(Name::TableSize(), tableSize);
|
(void)params.GetIntValue(Name::TableSize(), tableSize);
|
||||||
tableSize = s_cltableSizeInBlocks * blockSize;
|
tableSize = s_cltableSizeInBlocks * blockSize;
|
||||||
CRYPTOPP_ASSERT(tableSize > blockSize);
|
CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
116
sha-simd.cpp
116
sha-simd.cpp
|
|
@ -8,6 +8,7 @@
|
||||||
|
|
||||||
#include "pch.h"
|
#include "pch.h"
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
#include "sha.h"
|
||||||
#include "misc.h"
|
#include "misc.h"
|
||||||
|
|
||||||
// Clang and GCC hoops...
|
// Clang and GCC hoops...
|
||||||
|
|
@ -56,6 +57,10 @@
|
||||||
# define EXCEPTION_EXECUTE_HANDLER 1
|
# define EXCEPTION_EXECUTE_HANDLER 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Clang __m128i casts
|
||||||
|
#define M128_CAST(x) ((__m128i *)(void *)(x))
|
||||||
|
#define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x))
|
||||||
|
|
||||||
NAMESPACE_BEGIN(CryptoPP)
|
NAMESPACE_BEGIN(CryptoPP)
|
||||||
|
|
||||||
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
||||||
|
|
@ -220,30 +225,42 @@ extern const word32 SHA256_K[64];
|
||||||
|
|
||||||
#if CRYPTOPP_SHANI_AVAILABLE
|
#if CRYPTOPP_SHANI_AVAILABLE
|
||||||
// Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
|
// Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
|
||||||
void SHA1_Transform_SHANI(word32 *state, const word32 *data)
|
void SHA1_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order)
|
||||||
{
|
{
|
||||||
|
CRYPTOPP_ASSERT(state);
|
||||||
|
CRYPTOPP_ASSERT(data);
|
||||||
|
CRYPTOPP_ASSERT(length >= SHA1::BLOCKSIZE);
|
||||||
|
|
||||||
__m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1;
|
__m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1;
|
||||||
__m128i MASK, MSG0, MSG1, MSG2, MSG3;
|
__m128i MASK, MSG0, MSG1, MSG2, MSG3;
|
||||||
|
|
||||||
// Load initial values
|
// Load initial values
|
||||||
ABCD = _mm_loadu_si128((__m128i*) state);
|
ABCD = _mm_loadu_si128(CONST_M128_CAST(state));
|
||||||
E0 = _mm_set_epi32(state[4], 0, 0, 0);
|
E0 = _mm_set_epi32(state[4], 0, 0, 0);
|
||||||
ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
|
ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
|
||||||
MASK = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15);
|
|
||||||
|
|
||||||
|
// IA-32 SHA is little endian, SHA::Transform is big endian,
|
||||||
|
// and SHA::HashMultipleBlocks can be either. ByteOrder
|
||||||
|
// allows us to avoid extra endian reversals. It saves 1.0 cpb.
|
||||||
|
MASK = order == BIG_ENDIAN_ORDER ? // Data arrangement
|
||||||
|
_mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15) :
|
||||||
|
_mm_set_epi8(3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12) ;
|
||||||
|
|
||||||
|
while (length >= SHA1::BLOCKSIZE)
|
||||||
|
{
|
||||||
// Save current hash
|
// Save current hash
|
||||||
ABCD_SAVE = ABCD;
|
ABCD_SAVE = ABCD;
|
||||||
E0_SAVE = E0;
|
E0_SAVE = E0;
|
||||||
|
|
||||||
// Rounds 0-3
|
// Rounds 0-3
|
||||||
MSG0 = _mm_loadu_si128((__m128i*) data+0);
|
MSG0 = _mm_loadu_si128(CONST_M128_CAST(data+0));
|
||||||
MSG0 = _mm_shuffle_epi8(MSG0, MASK);
|
MSG0 = _mm_shuffle_epi8(MSG0, MASK);
|
||||||
E0 = _mm_add_epi32(E0, MSG0);
|
E0 = _mm_add_epi32(E0, MSG0);
|
||||||
E1 = ABCD;
|
E1 = ABCD;
|
||||||
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
|
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
|
||||||
|
|
||||||
// Rounds 4-7
|
// Rounds 4-7
|
||||||
MSG1 = _mm_loadu_si128((__m128i*) (data+4));
|
MSG1 = _mm_loadu_si128(CONST_M128_CAST(data+4));
|
||||||
MSG1 = _mm_shuffle_epi8(MSG1, MASK);
|
MSG1 = _mm_shuffle_epi8(MSG1, MASK);
|
||||||
E1 = _mm_sha1nexte_epu32(E1, MSG1);
|
E1 = _mm_sha1nexte_epu32(E1, MSG1);
|
||||||
E0 = ABCD;
|
E0 = ABCD;
|
||||||
|
|
@ -251,7 +268,7 @@ void SHA1_Transform_SHANI(word32 *state, const word32 *data)
|
||||||
MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
|
MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
|
||||||
|
|
||||||
// Rounds 8-11
|
// Rounds 8-11
|
||||||
MSG2 = _mm_loadu_si128((__m128i*) (data+8));
|
MSG2 = _mm_loadu_si128(CONST_M128_CAST(data+8));
|
||||||
MSG2 = _mm_shuffle_epi8(MSG2, MASK);
|
MSG2 = _mm_shuffle_epi8(MSG2, MASK);
|
||||||
E0 = _mm_sha1nexte_epu32(E0, MSG2);
|
E0 = _mm_sha1nexte_epu32(E0, MSG2);
|
||||||
E1 = ABCD;
|
E1 = ABCD;
|
||||||
|
|
@ -260,7 +277,7 @@ void SHA1_Transform_SHANI(word32 *state, const word32 *data)
|
||||||
MSG0 = _mm_xor_si128(MSG0, MSG2);
|
MSG0 = _mm_xor_si128(MSG0, MSG2);
|
||||||
|
|
||||||
// Rounds 12-15
|
// Rounds 12-15
|
||||||
MSG3 = _mm_loadu_si128((__m128i*) (data+12));
|
MSG3 = _mm_loadu_si128(CONST_M128_CAST(data+12));
|
||||||
MSG3 = _mm_shuffle_epi8(MSG3, MASK);
|
MSG3 = _mm_shuffle_epi8(MSG3, MASK);
|
||||||
E1 = _mm_sha1nexte_epu32(E1, MSG3);
|
E1 = _mm_sha1nexte_epu32(E1, MSG3);
|
||||||
E0 = ABCD;
|
E0 = ABCD;
|
||||||
|
|
@ -395,17 +412,22 @@ void SHA1_Transform_SHANI(word32 *state, const word32 *data)
|
||||||
E0 = _mm_sha1nexte_epu32(E0, E0_SAVE);
|
E0 = _mm_sha1nexte_epu32(E0, E0_SAVE);
|
||||||
ABCD = _mm_add_epi32(ABCD, ABCD_SAVE);
|
ABCD = _mm_add_epi32(ABCD, ABCD_SAVE);
|
||||||
|
|
||||||
|
data += SHA1::BLOCKSIZE/sizeof(word32);
|
||||||
|
length -= SHA1::BLOCKSIZE;
|
||||||
|
}
|
||||||
|
|
||||||
// Save state
|
// Save state
|
||||||
ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
|
ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
|
||||||
_mm_storeu_si128((__m128i*) state, ABCD);
|
_mm_storeu_si128(M128_CAST(state), ABCD);
|
||||||
state[4] = _mm_extract_epi32(E0, 3);
|
state[4] = _mm_extract_epi32(E0, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
|
// Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
|
||||||
void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data, size_t length)
|
void SHA256_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order)
|
||||||
{
|
{
|
||||||
CRYPTOPP_ASSERT(state); CRYPTOPP_ASSERT(data);
|
CRYPTOPP_ASSERT(state);
|
||||||
CRYPTOPP_ASSERT(length % 64 == 0);
|
CRYPTOPP_ASSERT(data);
|
||||||
|
CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE);
|
||||||
|
|
||||||
__m128i STATE0, STATE1;
|
__m128i STATE0, STATE1;
|
||||||
__m128i MSG, TMP, MASK;
|
__m128i MSG, TMP, MASK;
|
||||||
|
|
@ -413,24 +435,29 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data
|
||||||
__m128i ABEF_SAVE, CDGH_SAVE;
|
__m128i ABEF_SAVE, CDGH_SAVE;
|
||||||
|
|
||||||
// Load initial values
|
// Load initial values
|
||||||
TMP = _mm_loadu_si128((__m128i*) &state[0]);
|
TMP = _mm_loadu_si128(M128_CAST(&state[0]));
|
||||||
STATE1 = _mm_loadu_si128((__m128i*) &state[4]);
|
STATE1 = _mm_loadu_si128(M128_CAST(&state[4]));
|
||||||
MASK = _mm_set_epi64x(W64LIT(0x0c0d0e0f08090a0b), W64LIT(0x0405060700010203));
|
|
||||||
|
// IA-32 SHA is little endian, SHA::Transform is big endian,
|
||||||
|
// and SHA::HashMultipleBlocks can be either. ByteOrder
|
||||||
|
// allows us to avoid extra endian reversals. It saves 1.0 cpb.
|
||||||
|
MASK = order == BIG_ENDIAN_ORDER ? // Data arrangement
|
||||||
|
_mm_set_epi8(12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3) :
|
||||||
|
_mm_set_epi8(15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0) ;
|
||||||
|
|
||||||
TMP = _mm_shuffle_epi32(TMP, 0xB1); // CDAB
|
TMP = _mm_shuffle_epi32(TMP, 0xB1); // CDAB
|
||||||
STATE1 = _mm_shuffle_epi32(STATE1, 0x1B); // EFGH
|
STATE1 = _mm_shuffle_epi32(STATE1, 0x1B); // EFGH
|
||||||
STATE0 = _mm_alignr_epi8(TMP, STATE1, 8); // ABEF
|
STATE0 = _mm_alignr_epi8(TMP, STATE1, 8); // ABEF
|
||||||
STATE1 = _mm_blend_epi16(STATE1, TMP, 0xF0); // CDGH
|
STATE1 = _mm_blend_epi16(STATE1, TMP, 0xF0); // CDGH
|
||||||
|
|
||||||
const size_t BLOCKSIZE = 64;
|
while (length >= SHA256::BLOCKSIZE)
|
||||||
while (length >= BLOCKSIZE)
|
|
||||||
{
|
{
|
||||||
// Save current hash
|
// Save current hash
|
||||||
ABEF_SAVE = STATE0;
|
ABEF_SAVE = STATE0;
|
||||||
CDGH_SAVE = STATE1;
|
CDGH_SAVE = STATE1;
|
||||||
|
|
||||||
// Rounds 0-3
|
// Rounds 0-3
|
||||||
MSG = _mm_loadu_si128((__m128i*) data+0);
|
MSG = _mm_loadu_si128(CONST_M128_CAST(data+0));
|
||||||
TMSG0 = _mm_shuffle_epi8(MSG, MASK);
|
TMSG0 = _mm_shuffle_epi8(MSG, MASK);
|
||||||
MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0xE9B5DBA5B5C0FBCF), W64LIT(0x71374491428A2F98)));
|
MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0xE9B5DBA5B5C0FBCF), W64LIT(0x71374491428A2F98)));
|
||||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||||
|
|
@ -438,7 +465,7 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data
|
||||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||||
|
|
||||||
// Rounds 4-7
|
// Rounds 4-7
|
||||||
TMSG1 = _mm_loadu_si128((__m128i*) (data+4));
|
TMSG1 = _mm_loadu_si128(CONST_M128_CAST(data+4));
|
||||||
TMSG1 = _mm_shuffle_epi8(TMSG1, MASK);
|
TMSG1 = _mm_shuffle_epi8(TMSG1, MASK);
|
||||||
MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0xAB1C5ED5923F82A4), W64LIT(0x59F111F13956C25B)));
|
MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0xAB1C5ED5923F82A4), W64LIT(0x59F111F13956C25B)));
|
||||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||||
|
|
@ -447,7 +474,7 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data
|
||||||
TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
|
TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
|
||||||
|
|
||||||
// Rounds 8-11
|
// Rounds 8-11
|
||||||
TMSG2 = _mm_loadu_si128((__m128i*) (data+8));
|
TMSG2 = _mm_loadu_si128(CONST_M128_CAST(data+8));
|
||||||
TMSG2 = _mm_shuffle_epi8(TMSG2, MASK);
|
TMSG2 = _mm_shuffle_epi8(TMSG2, MASK);
|
||||||
MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0x550C7DC3243185BE), W64LIT(0x12835B01D807AA98)));
|
MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0x550C7DC3243185BE), W64LIT(0x12835B01D807AA98)));
|
||||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||||
|
|
@ -456,7 +483,7 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data
|
||||||
TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
|
TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
|
||||||
|
|
||||||
// Rounds 12-15
|
// Rounds 12-15
|
||||||
TMSG3 = _mm_loadu_si128((__m128i*) (data+12));
|
TMSG3 = _mm_loadu_si128(CONST_M128_CAST(data+12));
|
||||||
TMSG3 = _mm_shuffle_epi8(TMSG3, MASK);
|
TMSG3 = _mm_shuffle_epi8(TMSG3, MASK);
|
||||||
MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0xC19BF1749BDC06A7), W64LIT(0x80DEB1FE72BE5D74)));
|
MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0xC19BF1749BDC06A7), W64LIT(0x80DEB1FE72BE5D74)));
|
||||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||||
|
|
@ -585,8 +612,8 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data
|
||||||
STATE0 = _mm_add_epi32(STATE0, ABEF_SAVE);
|
STATE0 = _mm_add_epi32(STATE0, ABEF_SAVE);
|
||||||
STATE1 = _mm_add_epi32(STATE1, CDGH_SAVE);
|
STATE1 = _mm_add_epi32(STATE1, CDGH_SAVE);
|
||||||
|
|
||||||
data += BLOCKSIZE/sizeof(word32);
|
data += SHA256::BLOCKSIZE/sizeof(word32);
|
||||||
length -= BLOCKSIZE;
|
length -= SHA256::BLOCKSIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
TMP = _mm_shuffle_epi32(STATE0, 0x1B); // FEBA
|
TMP = _mm_shuffle_epi32(STATE0, 0x1B); // FEBA
|
||||||
|
|
@ -595,8 +622,8 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data
|
||||||
STATE1 = _mm_alignr_epi8(STATE1, TMP, 8); // ABEF
|
STATE1 = _mm_alignr_epi8(STATE1, TMP, 8); // ABEF
|
||||||
|
|
||||||
// Save state
|
// Save state
|
||||||
_mm_storeu_si128((__m128i*) &state[0], STATE0);
|
_mm_storeu_si128(M128_CAST(&state[0]), STATE0);
|
||||||
_mm_storeu_si128((__m128i*) &state[4], STATE1);
|
_mm_storeu_si128(M128_CAST(&state[4]), STATE1);
|
||||||
}
|
}
|
||||||
#endif // CRYPTOPP_SHANI_AVAILABLE
|
#endif // CRYPTOPP_SHANI_AVAILABLE
|
||||||
|
|
||||||
|
|
@ -609,8 +636,12 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#if CRYPTOPP_ARM_SHA_AVAILABLE
|
#if CRYPTOPP_ARM_SHA_AVAILABLE
|
||||||
void SHA1_Transform_ARMV8(word32 *state, const word32 *data)
|
void SHA1_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order)
|
||||||
{
|
{
|
||||||
|
CRYPTOPP_ASSERT(state);
|
||||||
|
CRYPTOPP_ASSERT(data);
|
||||||
|
CRYPTOPP_ASSERT(length >= SHA1::BLOCKSIZE);
|
||||||
|
|
||||||
uint32x4_t C0, C1, C2, C3;
|
uint32x4_t C0, C1, C2, C3;
|
||||||
uint32x4_t ABCD, ABCD_SAVED;
|
uint32x4_t ABCD, ABCD_SAVED;
|
||||||
uint32x4_t MSG0, MSG1, MSG2, MSG3;
|
uint32x4_t MSG0, MSG1, MSG2, MSG3;
|
||||||
|
|
@ -626,6 +657,8 @@ void SHA1_Transform_ARMV8(word32 *state, const word32 *data)
|
||||||
ABCD = vld1q_u32(&state[0]);
|
ABCD = vld1q_u32(&state[0]);
|
||||||
E0 = state[4];
|
E0 = state[4];
|
||||||
|
|
||||||
|
while (length >= SHA1::BLOCKSIZE)
|
||||||
|
{
|
||||||
// Save current hash
|
// Save current hash
|
||||||
ABCD_SAVED = ABCD;
|
ABCD_SAVED = ABCD;
|
||||||
E0_SAVED = E0;
|
E0_SAVED = E0;
|
||||||
|
|
@ -635,6 +668,14 @@ void SHA1_Transform_ARMV8(word32 *state, const word32 *data)
|
||||||
MSG2 = vld1q_u32(data + 8);
|
MSG2 = vld1q_u32(data + 8);
|
||||||
MSG3 = vld1q_u32(data + 12);
|
MSG3 = vld1q_u32(data + 12);
|
||||||
|
|
||||||
|
if (order == BIG_ENDIAN_ORDER) // Data arrangement
|
||||||
|
{
|
||||||
|
MSG0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG0)));
|
||||||
|
MSG1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG1)));
|
||||||
|
MSG2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG2)));
|
||||||
|
MSG3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG3)));
|
||||||
|
}
|
||||||
|
|
||||||
TMP0 = vaddq_u32(MSG0, C0);
|
TMP0 = vaddq_u32(MSG0, C0);
|
||||||
TMP1 = vaddq_u32(MSG1, C0);
|
TMP1 = vaddq_u32(MSG1, C0);
|
||||||
|
|
||||||
|
|
@ -773,13 +814,21 @@ void SHA1_Transform_ARMV8(word32 *state, const word32 *data)
|
||||||
E0 += E0_SAVED;
|
E0 += E0_SAVED;
|
||||||
ABCD = vaddq_u32(ABCD_SAVED, ABCD);
|
ABCD = vaddq_u32(ABCD_SAVED, ABCD);
|
||||||
|
|
||||||
|
data += SHA1::BLOCKSIZE/sizeof(word32);
|
||||||
|
length -= SHA1::BLOCKSIZE;
|
||||||
|
}
|
||||||
|
|
||||||
// Save state
|
// Save state
|
||||||
vst1q_u32(&state[0], ABCD);
|
vst1q_u32(&state[0], ABCD);
|
||||||
state[4] = E0;
|
state[4] = E0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CRYPTOPP_FASTCALL SHA256_HashBlocks_ARMV8(word32 *state, const word32 *data, size_t length)
|
void SHA256_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order)
|
||||||
{
|
{
|
||||||
|
CRYPTOPP_ASSERT(state);
|
||||||
|
CRYPTOPP_ASSERT(data);
|
||||||
|
CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE);
|
||||||
|
|
||||||
uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE;
|
uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE;
|
||||||
uint32x4_t MSG0, MSG1, MSG2, MSG3;
|
uint32x4_t MSG0, MSG1, MSG2, MSG3;
|
||||||
uint32x4_t TMP0, TMP1, TMP2;
|
uint32x4_t TMP0, TMP1, TMP2;
|
||||||
|
|
@ -788,8 +837,7 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_ARMV8(word32 *state, const word32 *data
|
||||||
STATE0 = vld1q_u32(&state[0]);
|
STATE0 = vld1q_u32(&state[0]);
|
||||||
STATE1 = vld1q_u32(&state[4]);
|
STATE1 = vld1q_u32(&state[4]);
|
||||||
|
|
||||||
const size_t BLOCKSIZE = 64;
|
while (length >= SHA256::BLOCKSIZE)
|
||||||
while (length >= BLOCKSIZE)
|
|
||||||
{
|
{
|
||||||
// Save current hash
|
// Save current hash
|
||||||
ABEF_SAVE = STATE0;
|
ABEF_SAVE = STATE0;
|
||||||
|
|
@ -801,6 +849,14 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_ARMV8(word32 *state, const word32 *data
|
||||||
MSG2 = vld1q_u32(data + 8);
|
MSG2 = vld1q_u32(data + 8);
|
||||||
MSG3 = vld1q_u32(data + 12);
|
MSG3 = vld1q_u32(data + 12);
|
||||||
|
|
||||||
|
if (order == BIG_ENDIAN_ORDER) // Data arrangement
|
||||||
|
{
|
||||||
|
MSG0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG0)));
|
||||||
|
MSG1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG1)));
|
||||||
|
MSG2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG2)));
|
||||||
|
MSG3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG3)));
|
||||||
|
}
|
||||||
|
|
||||||
TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x00]));
|
TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x00]));
|
||||||
|
|
||||||
// Rounds 0-3
|
// Rounds 0-3
|
||||||
|
|
@ -926,8 +982,8 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_ARMV8(word32 *state, const word32 *data
|
||||||
STATE0 = vaddq_u32(STATE0, ABEF_SAVE);
|
STATE0 = vaddq_u32(STATE0, ABEF_SAVE);
|
||||||
STATE1 = vaddq_u32(STATE1, CDGH_SAVE);
|
STATE1 = vaddq_u32(STATE1, CDGH_SAVE);
|
||||||
|
|
||||||
data += BLOCKSIZE/sizeof(word32);
|
data += SHA256::BLOCKSIZE/sizeof(word32);
|
||||||
length -= BLOCKSIZE;
|
length -= SHA256::BLOCKSIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save state
|
// Save state
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue