Fix sources after sync with upstream

pull/461/head
Jeffrey Walton 2017-08-14 03:19:20 -04:00
parent c44f32d683
commit a25e63fcb7
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
3 changed files with 495 additions and 1254 deletions

View File

@ -134,7 +134,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
// Avoid "parameter not used" error and suppress Coverity finding // Avoid "parameter not used" error and suppress Coverity finding
(void)params.GetIntValue(Name::TableSize(), tableSize); (void)params.GetIntValue(Name::TableSize(), tableSize);
tableSize = s_cltableSizeInBlocks * blockSize; tableSize = s_cltableSizeInBlocks * blockSize;
CRYPTOPP_ASSERT(tableSize > blockSize); CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
} }
else else
#elif CRYPTOPP_ARM_PMULL_AVAILABLE #elif CRYPTOPP_ARM_PMULL_AVAILABLE
@ -143,7 +143,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
// Avoid "parameter not used" error and suppress Coverity finding // Avoid "parameter not used" error and suppress Coverity finding
(void)params.GetIntValue(Name::TableSize(), tableSize); (void)params.GetIntValue(Name::TableSize(), tableSize);
tableSize = s_cltableSizeInBlocks * blockSize; tableSize = s_cltableSizeInBlocks * blockSize;
CRYPTOPP_ASSERT(tableSize > blockSize); CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
} }
else else
#endif #endif

View File

@ -8,6 +8,7 @@
#include "pch.h" #include "pch.h"
#include "config.h" #include "config.h"
#include "sha.h"
#include "misc.h" #include "misc.h"
// Clang and GCC hoops... // Clang and GCC hoops...
@ -56,6 +57,10 @@
# define EXCEPTION_EXECUTE_HANDLER 1 # define EXCEPTION_EXECUTE_HANDLER 1
#endif #endif
// Clang __m128i casts
#define M128_CAST(x) ((__m128i *)(void *)(x))
#define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x))
NAMESPACE_BEGIN(CryptoPP) NAMESPACE_BEGIN(CryptoPP)
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
@ -220,30 +225,42 @@ extern const word32 SHA256_K[64];
#if CRYPTOPP_SHANI_AVAILABLE #if CRYPTOPP_SHANI_AVAILABLE
// Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley. // Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
void SHA1_Transform_SHANI(word32 *state, const word32 *data) void SHA1_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order)
{ {
CRYPTOPP_ASSERT(state);
CRYPTOPP_ASSERT(data);
CRYPTOPP_ASSERT(length >= SHA1::BLOCKSIZE);
__m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1; __m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1;
__m128i MASK, MSG0, MSG1, MSG2, MSG3; __m128i MASK, MSG0, MSG1, MSG2, MSG3;
// Load initial values // Load initial values
ABCD = _mm_loadu_si128((__m128i*) state); ABCD = _mm_loadu_si128(CONST_M128_CAST(state));
E0 = _mm_set_epi32(state[4], 0, 0, 0); E0 = _mm_set_epi32(state[4], 0, 0, 0);
ABCD = _mm_shuffle_epi32(ABCD, 0x1B); ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
MASK = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15);
// IA-32 SHA is little endian, SHA::Transform is big endian,
// and SHA::HashMultipleBlocks can be either. ByteOrder
// allows us to avoid extra endian reversals. It saves 1.0 cpb.
MASK = order == BIG_ENDIAN_ORDER ? // Data arrangement
_mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15) :
_mm_set_epi8(3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12) ;
while (length >= SHA1::BLOCKSIZE)
{
// Save current hash // Save current hash
ABCD_SAVE = ABCD; ABCD_SAVE = ABCD;
E0_SAVE = E0; E0_SAVE = E0;
// Rounds 0-3 // Rounds 0-3
MSG0 = _mm_loadu_si128((__m128i*) data+0); MSG0 = _mm_loadu_si128(CONST_M128_CAST(data+0));
MSG0 = _mm_shuffle_epi8(MSG0, MASK); MSG0 = _mm_shuffle_epi8(MSG0, MASK);
E0 = _mm_add_epi32(E0, MSG0); E0 = _mm_add_epi32(E0, MSG0);
E1 = ABCD; E1 = ABCD;
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0); ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
// Rounds 4-7 // Rounds 4-7
MSG1 = _mm_loadu_si128((__m128i*) (data+4)); MSG1 = _mm_loadu_si128(CONST_M128_CAST(data+4));
MSG1 = _mm_shuffle_epi8(MSG1, MASK); MSG1 = _mm_shuffle_epi8(MSG1, MASK);
E1 = _mm_sha1nexte_epu32(E1, MSG1); E1 = _mm_sha1nexte_epu32(E1, MSG1);
E0 = ABCD; E0 = ABCD;
@ -251,7 +268,7 @@ void SHA1_Transform_SHANI(word32 *state, const word32 *data)
MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1); MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
// Rounds 8-11 // Rounds 8-11
MSG2 = _mm_loadu_si128((__m128i*) (data+8)); MSG2 = _mm_loadu_si128(CONST_M128_CAST(data+8));
MSG2 = _mm_shuffle_epi8(MSG2, MASK); MSG2 = _mm_shuffle_epi8(MSG2, MASK);
E0 = _mm_sha1nexte_epu32(E0, MSG2); E0 = _mm_sha1nexte_epu32(E0, MSG2);
E1 = ABCD; E1 = ABCD;
@ -260,7 +277,7 @@ void SHA1_Transform_SHANI(word32 *state, const word32 *data)
MSG0 = _mm_xor_si128(MSG0, MSG2); MSG0 = _mm_xor_si128(MSG0, MSG2);
// Rounds 12-15 // Rounds 12-15
MSG3 = _mm_loadu_si128((__m128i*) (data+12)); MSG3 = _mm_loadu_si128(CONST_M128_CAST(data+12));
MSG3 = _mm_shuffle_epi8(MSG3, MASK); MSG3 = _mm_shuffle_epi8(MSG3, MASK);
E1 = _mm_sha1nexte_epu32(E1, MSG3); E1 = _mm_sha1nexte_epu32(E1, MSG3);
E0 = ABCD; E0 = ABCD;
@ -395,17 +412,22 @@ void SHA1_Transform_SHANI(word32 *state, const word32 *data)
E0 = _mm_sha1nexte_epu32(E0, E0_SAVE); E0 = _mm_sha1nexte_epu32(E0, E0_SAVE);
ABCD = _mm_add_epi32(ABCD, ABCD_SAVE); ABCD = _mm_add_epi32(ABCD, ABCD_SAVE);
data += SHA1::BLOCKSIZE/sizeof(word32);
length -= SHA1::BLOCKSIZE;
}
// Save state // Save state
ABCD = _mm_shuffle_epi32(ABCD, 0x1B); ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
_mm_storeu_si128((__m128i*) state, ABCD); _mm_storeu_si128(M128_CAST(state), ABCD);
state[4] = _mm_extract_epi32(E0, 3); state[4] = _mm_extract_epi32(E0, 3);
} }
// Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley. // Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data, size_t length) void SHA256_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order)
{ {
CRYPTOPP_ASSERT(state); CRYPTOPP_ASSERT(data); CRYPTOPP_ASSERT(state);
CRYPTOPP_ASSERT(length % 64 == 0); CRYPTOPP_ASSERT(data);
CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE);
__m128i STATE0, STATE1; __m128i STATE0, STATE1;
__m128i MSG, TMP, MASK; __m128i MSG, TMP, MASK;
@ -413,24 +435,29 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data
__m128i ABEF_SAVE, CDGH_SAVE; __m128i ABEF_SAVE, CDGH_SAVE;
// Load initial values // Load initial values
TMP = _mm_loadu_si128((__m128i*) &state[0]); TMP = _mm_loadu_si128(M128_CAST(&state[0]));
STATE1 = _mm_loadu_si128((__m128i*) &state[4]); STATE1 = _mm_loadu_si128(M128_CAST(&state[4]));
MASK = _mm_set_epi64x(W64LIT(0x0c0d0e0f08090a0b), W64LIT(0x0405060700010203));
// IA-32 SHA is little endian, SHA::Transform is big endian,
// and SHA::HashMultipleBlocks can be either. ByteOrder
// allows us to avoid extra endian reversals. It saves 1.0 cpb.
MASK = order == BIG_ENDIAN_ORDER ? // Data arrangement
_mm_set_epi8(12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3) :
_mm_set_epi8(15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0) ;
TMP = _mm_shuffle_epi32(TMP, 0xB1); // CDAB TMP = _mm_shuffle_epi32(TMP, 0xB1); // CDAB
STATE1 = _mm_shuffle_epi32(STATE1, 0x1B); // EFGH STATE1 = _mm_shuffle_epi32(STATE1, 0x1B); // EFGH
STATE0 = _mm_alignr_epi8(TMP, STATE1, 8); // ABEF STATE0 = _mm_alignr_epi8(TMP, STATE1, 8); // ABEF
STATE1 = _mm_blend_epi16(STATE1, TMP, 0xF0); // CDGH STATE1 = _mm_blend_epi16(STATE1, TMP, 0xF0); // CDGH
const size_t BLOCKSIZE = 64; while (length >= SHA256::BLOCKSIZE)
while (length >= BLOCKSIZE)
{ {
// Save current hash // Save current hash
ABEF_SAVE = STATE0; ABEF_SAVE = STATE0;
CDGH_SAVE = STATE1; CDGH_SAVE = STATE1;
// Rounds 0-3 // Rounds 0-3
MSG = _mm_loadu_si128((__m128i*) data+0); MSG = _mm_loadu_si128(CONST_M128_CAST(data+0));
TMSG0 = _mm_shuffle_epi8(MSG, MASK); TMSG0 = _mm_shuffle_epi8(MSG, MASK);
MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0xE9B5DBA5B5C0FBCF), W64LIT(0x71374491428A2F98))); MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0xE9B5DBA5B5C0FBCF), W64LIT(0x71374491428A2F98)));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
@ -438,7 +465,7 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
// Rounds 4-7 // Rounds 4-7
TMSG1 = _mm_loadu_si128((__m128i*) (data+4)); TMSG1 = _mm_loadu_si128(CONST_M128_CAST(data+4));
TMSG1 = _mm_shuffle_epi8(TMSG1, MASK); TMSG1 = _mm_shuffle_epi8(TMSG1, MASK);
MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0xAB1C5ED5923F82A4), W64LIT(0x59F111F13956C25B))); MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0xAB1C5ED5923F82A4), W64LIT(0x59F111F13956C25B)));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
@ -447,7 +474,7 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data
TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1); TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
// Rounds 8-11 // Rounds 8-11
TMSG2 = _mm_loadu_si128((__m128i*) (data+8)); TMSG2 = _mm_loadu_si128(CONST_M128_CAST(data+8));
TMSG2 = _mm_shuffle_epi8(TMSG2, MASK); TMSG2 = _mm_shuffle_epi8(TMSG2, MASK);
MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0x550C7DC3243185BE), W64LIT(0x12835B01D807AA98))); MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0x550C7DC3243185BE), W64LIT(0x12835B01D807AA98)));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
@ -456,7 +483,7 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data
TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2); TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
// Rounds 12-15 // Rounds 12-15
TMSG3 = _mm_loadu_si128((__m128i*) (data+12)); TMSG3 = _mm_loadu_si128(CONST_M128_CAST(data+12));
TMSG3 = _mm_shuffle_epi8(TMSG3, MASK); TMSG3 = _mm_shuffle_epi8(TMSG3, MASK);
MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0xC19BF1749BDC06A7), W64LIT(0x80DEB1FE72BE5D74))); MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0xC19BF1749BDC06A7), W64LIT(0x80DEB1FE72BE5D74)));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
@ -585,8 +612,8 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data
STATE0 = _mm_add_epi32(STATE0, ABEF_SAVE); STATE0 = _mm_add_epi32(STATE0, ABEF_SAVE);
STATE1 = _mm_add_epi32(STATE1, CDGH_SAVE); STATE1 = _mm_add_epi32(STATE1, CDGH_SAVE);
data += BLOCKSIZE/sizeof(word32); data += SHA256::BLOCKSIZE/sizeof(word32);
length -= BLOCKSIZE; length -= SHA256::BLOCKSIZE;
} }
TMP = _mm_shuffle_epi32(STATE0, 0x1B); // FEBA TMP = _mm_shuffle_epi32(STATE0, 0x1B); // FEBA
@ -595,8 +622,8 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data
STATE1 = _mm_alignr_epi8(STATE1, TMP, 8); // ABEF STATE1 = _mm_alignr_epi8(STATE1, TMP, 8); // ABEF
// Save state // Save state
_mm_storeu_si128((__m128i*) &state[0], STATE0); _mm_storeu_si128(M128_CAST(&state[0]), STATE0);
_mm_storeu_si128((__m128i*) &state[4], STATE1); _mm_storeu_si128(M128_CAST(&state[4]), STATE1);
} }
#endif // CRYPTOPP_SHANI_AVAILABLE #endif // CRYPTOPP_SHANI_AVAILABLE
@ -609,8 +636,12 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_SHANI(word32 *state, const word32 *data
///////////////////////////////////////////////////////// /////////////////////////////////////////////////////////
#if CRYPTOPP_ARM_SHA_AVAILABLE #if CRYPTOPP_ARM_SHA_AVAILABLE
void SHA1_Transform_ARMV8(word32 *state, const word32 *data) void SHA1_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order)
{ {
CRYPTOPP_ASSERT(state);
CRYPTOPP_ASSERT(data);
CRYPTOPP_ASSERT(length >= SHA1::BLOCKSIZE);
uint32x4_t C0, C1, C2, C3; uint32x4_t C0, C1, C2, C3;
uint32x4_t ABCD, ABCD_SAVED; uint32x4_t ABCD, ABCD_SAVED;
uint32x4_t MSG0, MSG1, MSG2, MSG3; uint32x4_t MSG0, MSG1, MSG2, MSG3;
@ -626,6 +657,8 @@ void SHA1_Transform_ARMV8(word32 *state, const word32 *data)
ABCD = vld1q_u32(&state[0]); ABCD = vld1q_u32(&state[0]);
E0 = state[4]; E0 = state[4];
while (length >= SHA1::BLOCKSIZE)
{
// Save current hash // Save current hash
ABCD_SAVED = ABCD; ABCD_SAVED = ABCD;
E0_SAVED = E0; E0_SAVED = E0;
@ -635,6 +668,14 @@ void SHA1_Transform_ARMV8(word32 *state, const word32 *data)
MSG2 = vld1q_u32(data + 8); MSG2 = vld1q_u32(data + 8);
MSG3 = vld1q_u32(data + 12); MSG3 = vld1q_u32(data + 12);
if (order == BIG_ENDIAN_ORDER) // Data arrangement
{
MSG0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG0)));
MSG1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG1)));
MSG2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG2)));
MSG3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG3)));
}
TMP0 = vaddq_u32(MSG0, C0); TMP0 = vaddq_u32(MSG0, C0);
TMP1 = vaddq_u32(MSG1, C0); TMP1 = vaddq_u32(MSG1, C0);
@ -773,13 +814,21 @@ void SHA1_Transform_ARMV8(word32 *state, const word32 *data)
E0 += E0_SAVED; E0 += E0_SAVED;
ABCD = vaddq_u32(ABCD_SAVED, ABCD); ABCD = vaddq_u32(ABCD_SAVED, ABCD);
data += SHA1::BLOCKSIZE/sizeof(word32);
length -= SHA1::BLOCKSIZE;
}
// Save state // Save state
vst1q_u32(&state[0], ABCD); vst1q_u32(&state[0], ABCD);
state[4] = E0; state[4] = E0;
} }
void CRYPTOPP_FASTCALL SHA256_HashBlocks_ARMV8(word32 *state, const word32 *data, size_t length) void SHA256_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order)
{ {
CRYPTOPP_ASSERT(state);
CRYPTOPP_ASSERT(data);
CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE);
uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE; uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE;
uint32x4_t MSG0, MSG1, MSG2, MSG3; uint32x4_t MSG0, MSG1, MSG2, MSG3;
uint32x4_t TMP0, TMP1, TMP2; uint32x4_t TMP0, TMP1, TMP2;
@ -788,8 +837,7 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_ARMV8(word32 *state, const word32 *data
STATE0 = vld1q_u32(&state[0]); STATE0 = vld1q_u32(&state[0]);
STATE1 = vld1q_u32(&state[4]); STATE1 = vld1q_u32(&state[4]);
const size_t BLOCKSIZE = 64; while (length >= SHA256::BLOCKSIZE)
while (length >= BLOCKSIZE)
{ {
// Save current hash // Save current hash
ABEF_SAVE = STATE0; ABEF_SAVE = STATE0;
@ -801,6 +849,14 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_ARMV8(word32 *state, const word32 *data
MSG2 = vld1q_u32(data + 8); MSG2 = vld1q_u32(data + 8);
MSG3 = vld1q_u32(data + 12); MSG3 = vld1q_u32(data + 12);
if (order == BIG_ENDIAN_ORDER) // Data arrangement
{
MSG0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG0)));
MSG1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG1)));
MSG2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG2)));
MSG3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG3)));
}
TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x00])); TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x00]));
// Rounds 0-3 // Rounds 0-3
@ -926,8 +982,8 @@ void CRYPTOPP_FASTCALL SHA256_HashBlocks_ARMV8(word32 *state, const word32 *data
STATE0 = vaddq_u32(STATE0, ABEF_SAVE); STATE0 = vaddq_u32(STATE0, ABEF_SAVE);
STATE1 = vaddq_u32(STATE1, CDGH_SAVE); STATE1 = vaddq_u32(STATE1, CDGH_SAVE);
data += BLOCKSIZE/sizeof(word32); data += SHA256::BLOCKSIZE/sizeof(word32);
length -= BLOCKSIZE; length -= SHA256::BLOCKSIZE;
} }
// Save state // Save state

915
sha.cpp

File diff suppressed because it is too large Load Diff