From 2aff92ddb6e679fca04432f01a1a16a035e33008 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sun, 13 Aug 2017 16:05:39 -0400 Subject: [PATCH 1/4] Fix bad SHA::Transform calculation (Issue 455) Reworked SHA class internals to align all the implementations. Formerly all hashes were software based, IterHashBase handled endian conversions, IterHashBase repeatedly called the single block SHA{N}::Transform. The rework added SHA{N}::HashMultipleBlocks, and the SHA classes attempt to always use it. Now SHA{N}::Transform calls into SHA{N}_HashMultipleBlocks, which is a free standing function. An added wrinkle is hardware wants little endian data and software presents big endian data, so HashMultipleBlocks accepts a ByteOrder for the incoming data. Hardware based SHA{N}_HashMultipleBlocks can often perform the endian swap much easier by setting an EPI mask so it was profitable to defer to hardware when available. The rework also removed the hacked-in pointers to implementations. The class now looks more like AES, GCM, etc. --- rijndael.cpp | 2 +- seal.cpp | 6 +- sha.cpp | 1093 ++++++++++++++++++++++++++------------------------ sha.h | 45 +-- x64dll.asm | 4 +- 5 files changed, 589 insertions(+), 561 deletions(-) diff --git a/rijndael.cpp b/rijndael.cpp index fce3d737..ef36acf1 100644 --- a/rijndael.cpp +++ b/rijndael.cpp @@ -95,7 +95,7 @@ static void Rijndael_Dec_ProcessAndXorBlock_ARMV8(const byte *inBlock, const byt # define MAYBE_CONST const #endif -// Clang casts +// Clang __m128i casts #define M128I_CAST(x) ((__m128i *)(void *)(x)) #define CONST_M128I_CAST(x) ((const __m128i *)(const void *)(x)) diff --git a/seal.cpp b/seal.cpp index 4108feb5..fef2656c 100644 --- a/seal.cpp +++ b/seal.cpp @@ -38,12 +38,8 @@ word32 SEAL_Gamma::Apply(word32 i) word32 shaIndex = i/5; if (shaIndex != lastIndex) { -#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE - D[0] = ConditionalByteReverse(HasSHA() ? BIG_ENDIAN_ORDER : LITTLE_ENDIAN_ORDER, shaIndex); -#else - D[0] = shaIndex; -#endif memcpy(Z, H, 20); + D[0] = shaIndex; SHA1::Transform(Z, D); lastIndex = shaIndex; } diff --git a/sha.cpp b/sha.cpp index 935adc38..72a82e61 100644 --- a/sha.cpp +++ b/sha.cpp @@ -6,6 +6,16 @@ // code from Johannes Schneiders, Skip Hovsmith and Barry O'Rourke. // All code is in the public domain. +// In August 2017 Walton reworked the internals to align all the implementations. +// Formerly all hashes were software based, IterHashBase handled endian conversions, +// IterHashBase repeatedly called the single block SHA{N}::Transform. The rework +// added SHA{N}::HashMultipleBlocks, and the SHA classes attempt to always use it. +// Now SHA{N}::Transform calls into SHA{N}::HashMultipleBlocks. An added wrinkle is +// hardware is little endian and software is big endian, so HashMultipleBlocks +// accepts a ByteOrder for the incoming data. Hardware based SHA{N}::HashMultipleBlocks +// can often perform the endian swap much easier by setting an EPI mask. The rework +// also removed the hacked-in pointers to implementations. + // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code #include "pch.h" @@ -30,11 +40,11 @@ # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE #endif -NAMESPACE_BEGIN(CryptoPP) +// Clang __m128i casts +#define M128_CAST(x) ((__m128i *)(void *)(x)) +#define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x)) -// Function pointer for specific SHA1 or SHA256 Transform function -typedef void (*pfnSHATransform)(word32 *state, const word32 *data); -typedef void (CRYPTOPP_FASTCALL *pfnSHAHashBlocks)(word32 *state, const word32 *data, size_t length); +NAMESPACE_BEGIN(CryptoPP) //////////////////////////////// // start of Steve Reid's code // @@ -55,8 +65,11 @@ typedef void (CRYPTOPP_FASTCALL *pfnSHAHashBlocks)(word32 *state, const word32 * #define R3(v,w,x,y,z,i) z+=f3(w,x,y)+blk1(i)+0x8F1BBCDC+rotlFixed(v,5);w=rotlFixed(w,30); #define R4(v,w,x,y,z,i) z+=f4(w,x,y)+blk1(i)+0xCA62C1D6+rotlFixed(v,5);w=rotlFixed(w,30); -static void SHA1_CXX_Transform(word32 *state, const word32 *data) +static void SHA1_CXX_HashBlock(word32 *state, const word32 *data) { + CRYPTOPP_ASSERT(state); + CRYPTOPP_ASSERT(data); + word32 W[16]; /* Copy context->state[] to working vars */ word32 a = state[0]; @@ -103,184 +116,200 @@ static void SHA1_CXX_Transform(word32 *state, const word32 *data) #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE // Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley. -static void SHA1_SSE_SHA_Transform(word32 *state, const word32 *data) +static void SHA1_SHANI_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order) { + CRYPTOPP_ASSERT(state); + CRYPTOPP_ASSERT(data); + CRYPTOPP_ASSERT(length >= 64); + __m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1; __m128i MASK, MSG0, MSG1, MSG2, MSG3; // Load initial values - ABCD = _mm_loadu_si128((__m128i*) state); + ABCD = _mm_loadu_si128(CONST_M128_CAST(state)); E0 = _mm_set_epi32(state[4], 0, 0, 0); ABCD = _mm_shuffle_epi32(ABCD, 0x1B); - MASK = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15); - // Save current hash - ABCD_SAVE = ABCD; - E0_SAVE = E0; + // IA-32 SHA is little endian, SHA::Transform is big endian, + // and SHA::HashMultipleBlocks can be either. ByteOrder + // allows us to avoid extra endian reversals. It saves 1.0 cpb. + MASK = order == BIG_ENDIAN_ORDER ? // Data arrangement + _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15) : + _mm_set_epi8(3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12) ; - // Rounds 0-3 - MSG0 = _mm_loadu_si128((__m128i*) data+0); - MSG0 = _mm_shuffle_epi8(MSG0, MASK); - E0 = _mm_add_epi32(E0, MSG0); - E1 = ABCD; - ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0); + while (length >= 64) + { + // Save current hash + ABCD_SAVE = ABCD; + E0_SAVE = E0; - // Rounds 4-7 - MSG1 = _mm_loadu_si128((__m128i*) (data+4)); - MSG1 = _mm_shuffle_epi8(MSG1, MASK); - E1 = _mm_sha1nexte_epu32(E1, MSG1); - E0 = ABCD; - ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0); - MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1); + // Rounds 0-3 + MSG0 = _mm_loadu_si128(CONST_M128_CAST(data+0)); + MSG0 = _mm_shuffle_epi8(MSG0, MASK); + E0 = _mm_add_epi32(E0, MSG0); + E1 = ABCD; + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0); - // Rounds 8-11 - MSG2 = _mm_loadu_si128((__m128i*) (data+8)); - MSG2 = _mm_shuffle_epi8(MSG2, MASK); - E0 = _mm_sha1nexte_epu32(E0, MSG2); - E1 = ABCD; - ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0); - MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2); - MSG0 = _mm_xor_si128(MSG0, MSG2); + // Rounds 4-7 + MSG1 = _mm_loadu_si128(CONST_M128_CAST(data+4)); + MSG1 = _mm_shuffle_epi8(MSG1, MASK); + E1 = _mm_sha1nexte_epu32(E1, MSG1); + E0 = ABCD; + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0); + MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1); - // Rounds 12-15 - MSG3 = _mm_loadu_si128((__m128i*) (data+12)); - MSG3 = _mm_shuffle_epi8(MSG3, MASK); - E1 = _mm_sha1nexte_epu32(E1, MSG3); - E0 = ABCD; - MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3); - ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0); - MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3); - MSG1 = _mm_xor_si128(MSG1, MSG3); + // Rounds 8-11 + MSG2 = _mm_loadu_si128(CONST_M128_CAST(data+8)); + MSG2 = _mm_shuffle_epi8(MSG2, MASK); + E0 = _mm_sha1nexte_epu32(E0, MSG2); + E1 = ABCD; + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0); + MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2); + MSG0 = _mm_xor_si128(MSG0, MSG2); - // Rounds 16-19 - E0 = _mm_sha1nexte_epu32(E0, MSG0); - E1 = ABCD; - MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0); - ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0); - MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0); - MSG2 = _mm_xor_si128(MSG2, MSG0); + // Rounds 12-15 + MSG3 = _mm_loadu_si128(CONST_M128_CAST(data+12)); + MSG3 = _mm_shuffle_epi8(MSG3, MASK); + E1 = _mm_sha1nexte_epu32(E1, MSG3); + E0 = ABCD; + MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3); + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0); + MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3); + MSG1 = _mm_xor_si128(MSG1, MSG3); - // Rounds 20-23 - E1 = _mm_sha1nexte_epu32(E1, MSG1); - E0 = ABCD; - MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1); - ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1); - MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1); - MSG3 = _mm_xor_si128(MSG3, MSG1); + // Rounds 16-19 + E0 = _mm_sha1nexte_epu32(E0, MSG0); + E1 = ABCD; + MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0); + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0); + MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0); + MSG2 = _mm_xor_si128(MSG2, MSG0); - // Rounds 24-27 - E0 = _mm_sha1nexte_epu32(E0, MSG2); - E1 = ABCD; - MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2); - ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1); - MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2); - MSG0 = _mm_xor_si128(MSG0, MSG2); + // Rounds 20-23 + E1 = _mm_sha1nexte_epu32(E1, MSG1); + E0 = ABCD; + MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1); + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1); + MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1); + MSG3 = _mm_xor_si128(MSG3, MSG1); - // Rounds 28-31 - E1 = _mm_sha1nexte_epu32(E1, MSG3); - E0 = ABCD; - MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3); - ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1); - MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3); - MSG1 = _mm_xor_si128(MSG1, MSG3); + // Rounds 24-27 + E0 = _mm_sha1nexte_epu32(E0, MSG2); + E1 = ABCD; + MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2); + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1); + MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2); + MSG0 = _mm_xor_si128(MSG0, MSG2); - // Rounds 32-35 - E0 = _mm_sha1nexte_epu32(E0, MSG0); - E1 = ABCD; - MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0); - ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1); - MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0); - MSG2 = _mm_xor_si128(MSG2, MSG0); + // Rounds 28-31 + E1 = _mm_sha1nexte_epu32(E1, MSG3); + E0 = ABCD; + MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3); + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1); + MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3); + MSG1 = _mm_xor_si128(MSG1, MSG3); - // Rounds 36-39 - E1 = _mm_sha1nexte_epu32(E1, MSG1); - E0 = ABCD; - MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1); - ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1); - MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1); - MSG3 = _mm_xor_si128(MSG3, MSG1); + // Rounds 32-35 + E0 = _mm_sha1nexte_epu32(E0, MSG0); + E1 = ABCD; + MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0); + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1); + MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0); + MSG2 = _mm_xor_si128(MSG2, MSG0); - // Rounds 40-43 - E0 = _mm_sha1nexte_epu32(E0, MSG2); - E1 = ABCD; - MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2); - ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2); - MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2); - MSG0 = _mm_xor_si128(MSG0, MSG2); + // Rounds 36-39 + E1 = _mm_sha1nexte_epu32(E1, MSG1); + E0 = ABCD; + MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1); + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1); + MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1); + MSG3 = _mm_xor_si128(MSG3, MSG1); - // Rounds 44-47 - E1 = _mm_sha1nexte_epu32(E1, MSG3); - E0 = ABCD; - MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3); - ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2); - MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3); - MSG1 = _mm_xor_si128(MSG1, MSG3); + // Rounds 40-43 + E0 = _mm_sha1nexte_epu32(E0, MSG2); + E1 = ABCD; + MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2); + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2); + MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2); + MSG0 = _mm_xor_si128(MSG0, MSG2); - // Rounds 48-51 - E0 = _mm_sha1nexte_epu32(E0, MSG0); - E1 = ABCD; - MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0); - ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2); - MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0); - MSG2 = _mm_xor_si128(MSG2, MSG0); + // Rounds 44-47 + E1 = _mm_sha1nexte_epu32(E1, MSG3); + E0 = ABCD; + MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3); + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2); + MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3); + MSG1 = _mm_xor_si128(MSG1, MSG3); - // Rounds 52-55 - E1 = _mm_sha1nexte_epu32(E1, MSG1); - E0 = ABCD; - MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1); - ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2); - MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1); - MSG3 = _mm_xor_si128(MSG3, MSG1); + // Rounds 48-51 + E0 = _mm_sha1nexte_epu32(E0, MSG0); + E1 = ABCD; + MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0); + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2); + MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0); + MSG2 = _mm_xor_si128(MSG2, MSG0); - // Rounds 56-59 - E0 = _mm_sha1nexte_epu32(E0, MSG2); - E1 = ABCD; - MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2); - ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2); - MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2); - MSG0 = _mm_xor_si128(MSG0, MSG2); + // Rounds 52-55 + E1 = _mm_sha1nexte_epu32(E1, MSG1); + E0 = ABCD; + MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1); + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2); + MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1); + MSG3 = _mm_xor_si128(MSG3, MSG1); - // Rounds 60-63 - E1 = _mm_sha1nexte_epu32(E1, MSG3); - E0 = ABCD; - MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3); - ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3); - MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3); - MSG1 = _mm_xor_si128(MSG1, MSG3); + // Rounds 56-59 + E0 = _mm_sha1nexte_epu32(E0, MSG2); + E1 = ABCD; + MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2); + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2); + MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2); + MSG0 = _mm_xor_si128(MSG0, MSG2); - // Rounds 64-67 - E0 = _mm_sha1nexte_epu32(E0, MSG0); - E1 = ABCD; - MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0); - ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3); - MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0); - MSG2 = _mm_xor_si128(MSG2, MSG0); + // Rounds 60-63 + E1 = _mm_sha1nexte_epu32(E1, MSG3); + E0 = ABCD; + MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3); + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3); + MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3); + MSG1 = _mm_xor_si128(MSG1, MSG3); - // Rounds 68-71 - E1 = _mm_sha1nexte_epu32(E1, MSG1); - E0 = ABCD; - MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1); - ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3); - MSG3 = _mm_xor_si128(MSG3, MSG1); + // Rounds 64-67 + E0 = _mm_sha1nexte_epu32(E0, MSG0); + E1 = ABCD; + MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0); + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3); + MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0); + MSG2 = _mm_xor_si128(MSG2, MSG0); - // Rounds 72-75 - E0 = _mm_sha1nexte_epu32(E0, MSG2); - E1 = ABCD; - MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2); - ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3); + // Rounds 68-71 + E1 = _mm_sha1nexte_epu32(E1, MSG1); + E0 = ABCD; + MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1); + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3); + MSG3 = _mm_xor_si128(MSG3, MSG1); - // Rounds 76-79 - E1 = _mm_sha1nexte_epu32(E1, MSG3); - E0 = ABCD; - ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3); + // Rounds 72-75 + E0 = _mm_sha1nexte_epu32(E0, MSG2); + E1 = ABCD; + MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2); + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3); - // Add values back to state - E0 = _mm_sha1nexte_epu32(E0, E0_SAVE); - ABCD = _mm_add_epi32(ABCD, ABCD_SAVE); + // Rounds 76-79 + E1 = _mm_sha1nexte_epu32(E1, MSG3); + E0 = ABCD; + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3); + + // Add values back to state + E0 = _mm_sha1nexte_epu32(E0, E0_SAVE); + ABCD = _mm_add_epi32(ABCD, ABCD_SAVE); + + data += 16; + length -= 64; + } // Save state ABCD = _mm_shuffle_epi32(ABCD, 0x1B); - _mm_storeu_si128((__m128i*) state, ABCD); + _mm_storeu_si128(M128_CAST(state), ABCD); state[4] = _mm_extract_epi32(E0, 3); } #endif @@ -294,8 +323,12 @@ static void SHA1_SSE_SHA_Transform(word32 *state, const word32 *data) ////////////////////////////////////////////////////////////// #if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE -static void SHA1_ARM_SHA_Transform(word32 *state, const word32 *data) +static void SHA1_ARM_SHA_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order) { + CRYPTOPP_ASSERT(state); + CRYPTOPP_ASSERT(data); + CRYPTOPP_ASSERT(length >= 64); + uint32x4_t C0, C1, C2, C3; uint32x4_t ABCD, ABCD_SAVED; uint32x4_t MSG0, MSG1, MSG2, MSG3; @@ -311,152 +344,166 @@ static void SHA1_ARM_SHA_Transform(word32 *state, const word32 *data) ABCD = vld1q_u32(&state[0]); E0 = state[4]; - // Save current hash - ABCD_SAVED = ABCD; - E0_SAVED = E0; + while (length >= 64) + { + // Save current hash + ABCD_SAVED = ABCD; + E0_SAVED = E0; - MSG0 = vld1q_u32(data + 0); - MSG1 = vld1q_u32(data + 4); - MSG2 = vld1q_u32(data + 8); - MSG3 = vld1q_u32(data + 12); + MSG0 = vld1q_u32(data + 0); + MSG1 = vld1q_u32(data + 4); + MSG2 = vld1q_u32(data + 8); + MSG3 = vld1q_u32(data + 12); - TMP0 = vaddq_u32(MSG0, C0); - TMP1 = vaddq_u32(MSG1, C0); + if (order == BIG_ENDIAN_ORDER) // Data arrangement + { + MSG0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG0))); + MSG1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG1))); + MSG2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG2))); + MSG3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG3))); + } - // Rounds 0-3 - E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1cq_u32(ABCD, E0, TMP0); - TMP0 = vaddq_u32(MSG2, C0); - MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2); + TMP0 = vaddq_u32(MSG0, C0); + TMP1 = vaddq_u32(MSG1, C0); - // Rounds 4-7 - E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1cq_u32(ABCD, E1, TMP1); - TMP1 = vaddq_u32(MSG3, C0); - MSG0 = vsha1su1q_u32(MSG0, MSG3); - MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3); + // Rounds 0-3 + E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1cq_u32(ABCD, E0, TMP0); + TMP0 = vaddq_u32(MSG2, C0); + MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2); - // Rounds 8-11 - E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1cq_u32(ABCD, E0, TMP0); - TMP0 = vaddq_u32(MSG0, C0); - MSG1 = vsha1su1q_u32(MSG1, MSG0); - MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0); + // Rounds 4-7 + E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1cq_u32(ABCD, E1, TMP1); + TMP1 = vaddq_u32(MSG3, C0); + MSG0 = vsha1su1q_u32(MSG0, MSG3); + MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3); - // Rounds 12-15 - E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1cq_u32(ABCD, E1, TMP1); - TMP1 = vaddq_u32(MSG1, C1); - MSG2 = vsha1su1q_u32(MSG2, MSG1); - MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1); + // Rounds 8-11 + E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1cq_u32(ABCD, E0, TMP0); + TMP0 = vaddq_u32(MSG0, C0); + MSG1 = vsha1su1q_u32(MSG1, MSG0); + MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0); - // Rounds 16-19 - E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1cq_u32(ABCD, E0, TMP0); - TMP0 = vaddq_u32(MSG2, C1); - MSG3 = vsha1su1q_u32(MSG3, MSG2); - MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2); + // Rounds 12-15 + E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1cq_u32(ABCD, E1, TMP1); + TMP1 = vaddq_u32(MSG1, C1); + MSG2 = vsha1su1q_u32(MSG2, MSG1); + MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1); - // Rounds 20-23 - E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1pq_u32(ABCD, E1, TMP1); - TMP1 = vaddq_u32(MSG3, C1); - MSG0 = vsha1su1q_u32(MSG0, MSG3); - MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3); + // Rounds 16-19 + E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1cq_u32(ABCD, E0, TMP0); + TMP0 = vaddq_u32(MSG2, C1); + MSG3 = vsha1su1q_u32(MSG3, MSG2); + MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2); - // Rounds 24-27 - E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1pq_u32(ABCD, E0, TMP0); - TMP0 = vaddq_u32(MSG0, C1); - MSG1 = vsha1su1q_u32(MSG1, MSG0); - MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0); + // Rounds 20-23 + E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1pq_u32(ABCD, E1, TMP1); + TMP1 = vaddq_u32(MSG3, C1); + MSG0 = vsha1su1q_u32(MSG0, MSG3); + MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3); - // Rounds 28-31 - E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1pq_u32(ABCD, E1, TMP1); - TMP1 = vaddq_u32(MSG1, C1); - MSG2 = vsha1su1q_u32(MSG2, MSG1); - MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1); + // Rounds 24-27 + E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1pq_u32(ABCD, E0, TMP0); + TMP0 = vaddq_u32(MSG0, C1); + MSG1 = vsha1su1q_u32(MSG1, MSG0); + MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0); - // Rounds 32-35 - E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1pq_u32(ABCD, E0, TMP0); - TMP0 = vaddq_u32(MSG2, C2); - MSG3 = vsha1su1q_u32(MSG3, MSG2); - MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2); + // Rounds 28-31 + E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1pq_u32(ABCD, E1, TMP1); + TMP1 = vaddq_u32(MSG1, C1); + MSG2 = vsha1su1q_u32(MSG2, MSG1); + MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1); - // Rounds 36-39 - E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1pq_u32(ABCD, E1, TMP1); - TMP1 = vaddq_u32(MSG3, C2); - MSG0 = vsha1su1q_u32(MSG0, MSG3); - MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3); + // Rounds 32-35 + E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1pq_u32(ABCD, E0, TMP0); + TMP0 = vaddq_u32(MSG2, C2); + MSG3 = vsha1su1q_u32(MSG3, MSG2); + MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2); - // Rounds 40-43 - E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1mq_u32(ABCD, E0, TMP0); - TMP0 = vaddq_u32(MSG0, C2); - MSG1 = vsha1su1q_u32(MSG1, MSG0); - MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0); + // Rounds 36-39 + E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1pq_u32(ABCD, E1, TMP1); + TMP1 = vaddq_u32(MSG3, C2); + MSG0 = vsha1su1q_u32(MSG0, MSG3); + MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3); - // Rounds 44-47 - E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1mq_u32(ABCD, E1, TMP1); - TMP1 = vaddq_u32(MSG1, C2); - MSG2 = vsha1su1q_u32(MSG2, MSG1); - MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1); + // Rounds 40-43 + E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1mq_u32(ABCD, E0, TMP0); + TMP0 = vaddq_u32(MSG0, C2); + MSG1 = vsha1su1q_u32(MSG1, MSG0); + MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0); - // Rounds 48-51 - E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1mq_u32(ABCD, E0, TMP0); - TMP0 = vaddq_u32(MSG2, C2); - MSG3 = vsha1su1q_u32(MSG3, MSG2); - MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2); + // Rounds 44-47 + E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1mq_u32(ABCD, E1, TMP1); + TMP1 = vaddq_u32(MSG1, C2); + MSG2 = vsha1su1q_u32(MSG2, MSG1); + MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1); - // Rounds 52-55 - E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1mq_u32(ABCD, E1, TMP1); - TMP1 = vaddq_u32(MSG3, C3); - MSG0 = vsha1su1q_u32(MSG0, MSG3); - MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3); + // Rounds 48-51 + E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1mq_u32(ABCD, E0, TMP0); + TMP0 = vaddq_u32(MSG2, C2); + MSG3 = vsha1su1q_u32(MSG3, MSG2); + MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2); - // Rounds 56-59 - E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1mq_u32(ABCD, E0, TMP0); - TMP0 = vaddq_u32(MSG0, C3); - MSG1 = vsha1su1q_u32(MSG1, MSG0); - MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0); + // Rounds 52-55 + E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1mq_u32(ABCD, E1, TMP1); + TMP1 = vaddq_u32(MSG3, C3); + MSG0 = vsha1su1q_u32(MSG0, MSG3); + MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3); - // Rounds 60-63 - E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1pq_u32(ABCD, E1, TMP1); - TMP1 = vaddq_u32(MSG1, C3); - MSG2 = vsha1su1q_u32(MSG2, MSG1); - MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1); + // Rounds 56-59 + E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1mq_u32(ABCD, E0, TMP0); + TMP0 = vaddq_u32(MSG0, C3); + MSG1 = vsha1su1q_u32(MSG1, MSG0); + MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0); - // Rounds 64-67 - E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1pq_u32(ABCD, E0, TMP0); - TMP0 = vaddq_u32(MSG2, C3); - MSG3 = vsha1su1q_u32(MSG3, MSG2); - MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2); + // Rounds 60-63 + E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1pq_u32(ABCD, E1, TMP1); + TMP1 = vaddq_u32(MSG1, C3); + MSG2 = vsha1su1q_u32(MSG2, MSG1); + MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1); - // Rounds 68-71 - E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1pq_u32(ABCD, E1, TMP1); - TMP1 = vaddq_u32(MSG3, C3); - MSG0 = vsha1su1q_u32(MSG0, MSG3); + // Rounds 64-67 + E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1pq_u32(ABCD, E0, TMP0); + TMP0 = vaddq_u32(MSG2, C3); + MSG3 = vsha1su1q_u32(MSG3, MSG2); + MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2); - // Rounds 72-75 - E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1pq_u32(ABCD, E0, TMP0); + // Rounds 68-71 + E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1pq_u32(ABCD, E1, TMP1); + TMP1 = vaddq_u32(MSG3, C3); + MSG0 = vsha1su1q_u32(MSG0, MSG3); - // Rounds 76-79 - E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); - ABCD = vsha1pq_u32(ABCD, E1, TMP1); + // Rounds 72-75 + E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1pq_u32(ABCD, E0, TMP0); - E0 += E0_SAVED; - ABCD = vaddq_u32(ABCD_SAVED, ABCD); + // Rounds 76-79 + E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0)); + ABCD = vsha1pq_u32(ABCD, E1, TMP1); + + E0 += E0_SAVED; + ABCD = vaddq_u32(ABCD_SAVED, ABCD); + + data += 16; + length -= 64; + } // Save state vst1q_u32(&state[0], ABCD); @@ -468,21 +515,6 @@ static void SHA1_ARM_SHA_Transform(word32 *state, const word32 *data) // end of Walton/Schneiders/O'Rourke/Hovsmith's code // /////////////////////////////////////////////////////// -pfnSHATransform InitializeSHA1Transform() -{ -#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE - if (HasSHA()) - return &SHA1_SSE_SHA_Transform; - else -#endif -#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE - if (HasSHA1()) - return &SHA1_ARM_SHA_Transform; - else -#endif - return &SHA1_CXX_Transform; -} - void SHA1::InitState(HashWordType *state) { state[0] = 0x67452301L; @@ -494,53 +526,75 @@ void SHA1::InitState(HashWordType *state) void SHA1::Transform(word32 *state, const word32 *data) { - static const pfnSHATransform s_pfn = InitializeSHA1Transform(); - s_pfn(state, data); -} + CRYPTOPP_ASSERT(state); + CRYPTOPP_ASSERT(data); #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE + if (HasSHA()) + { + SHA1_SHANI_HashMultipleBlocks(state, data, SHA1::BLOCKSIZE, LITTLE_ENDIAN_ORDER); + return; + } +#endif +#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE + if (HasSHA1()) + { + SHA1_ARM_SHA_HashMultipleBlocks(state, data, SHA1::BLOCKSIZE, LITTLE_ENDIAN_ORDER); + return; + } +#endif + + SHA1_CXX_HashBlock(state, data); +} + size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length) { - static const bool noReverse = HasSHA() || NativeByteOrderIs(this->GetByteOrder()); - const unsigned int blockSize = this->BlockSize(); - word32* dataBuf = this->DataBuf(); + CRYPTOPP_ASSERT(input); + CRYPTOPP_ASSERT(length >= 64); + +#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE + if (HasSHA()) + { + SHA1_SHANI_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER); + return length & (SHA1::BLOCKSIZE - 1); + } +#endif +#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE + if (HasSHA1()) + { + SHA1_ARM_SHA_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER); + return length & (SHA1::BLOCKSIZE - 1); + } +#endif + + const bool noReverse = NativeByteOrderIs(this->GetByteOrder()); + word32 *dataBuf = this->DataBuf(); do { if (noReverse) - this->HashEndianCorrectedBlock(input); + { + // this->HashEndianCorrectedBlock(input); + SHA1_CXX_HashBlock(m_state, input); + } else { - ByteReverse(dataBuf, input, this->BlockSize()); - this->HashEndianCorrectedBlock(dataBuf); + ByteReverse(dataBuf, input, 64); + // this->HashEndianCorrectedBlock(dataBuf); + SHA1_CXX_HashBlock(m_state, dataBuf); } - input += blockSize/sizeof(word32); - length -= blockSize; + input += 16; + length -= 64; } - while (length >= blockSize); + while (length >= 64); return length; } -#endif // ************************************************************* -void SHA224::InitState(HashWordType *state) -{ - static const word32 s[8] = {0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4}; - memcpy(state, s, sizeof(s)); -} +CRYPTOPP_ALIGN_DATA(16) +extern const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = { -void SHA256::InitState(HashWordType *state) -{ - static const word32 s[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19}; - memcpy(state, s, sizeof(s)); -} - -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE -CRYPTOPP_ALIGN_DATA(16) extern const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = { -#else -extern const word32 SHA256_K[64] = { -#endif 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, @@ -559,11 +613,75 @@ extern const word32 SHA256_K[64] = { 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; +#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15])) + +#define Ch(x,y,z) (z^(x&(y^z))) +#define Maj(x,y,z) (y^((x^y)&(y^z))) + +#define a(i) T[(0-i)&7] +#define b(i) T[(1-i)&7] +#define c(i) T[(2-i)&7] +#define d(i) T[(3-i)&7] +#define e(i) T[(4-i)&7] +#define f(i) T[(5-i)&7] +#define g(i) T[(6-i)&7] +#define h(i) T[(7-i)&7] + +#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\ + d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)) + +// for SHA256 +#define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22)) +#define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25)) +#define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3)) +#define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10)) + +static void SHA256_CXX_HashBlock(word32 *state, const word32 *data) +{ + word32 W[16], T[8]; + /* Copy context->state[] to working vars */ + memcpy(T, state, sizeof(T)); + /* 64 operations, partially loop unrolled */ + for (unsigned int j=0; j<64; j+=16) + { + R( 0); R( 1); R( 2); R( 3); + R( 4); R( 5); R( 6); R( 7); + R( 8); R( 9); R(10); R(11); + R(12); R(13); R(14); R(15); + } + /* Add the working vars back into context.state[] */ + state[0] += a(0); + state[1] += b(0); + state[2] += c(0); + state[3] += d(0); + state[4] += e(0); + state[5] += f(0); + state[6] += g(0); + state[7] += h(0); +} + +#undef S0 +#undef S1 +#undef s0 +#undef s1 +#undef R + +void SHA224::InitState(HashWordType *state) +{ + static const word32 s[8] = {0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4}; + memcpy(state, s, sizeof(s)); +} + +void SHA256::InitState(HashWordType *state) +{ + static const word32 s[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19}; + memcpy(state, s, sizeof(s)); +} #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM #if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM)) -static void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len) +static void CRYPTOPP_FASTCALL SHA256_SSE_HashMultipleBlocks(word32 *state, const word32 *data, size_t len) { #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4] @@ -685,7 +803,7 @@ static void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 INTEL_NOPREFIX #elif defined(CRYPTOPP_GENERATE_X64_MASM) ALIGN 8 - X86_SHA256_HashBlocks PROC FRAME + SHA256_SSE_HashMultipleBlocks PROC FRAME rex_push_reg rsi push_reg rdi push_reg rbx @@ -864,7 +982,7 @@ INTEL_NOPREFIX pop rdi pop rsi ret - X86_SHA256_HashBlocks ENDP + SHA256_SSE_HashMultipleBlocks ENDP #endif #ifdef __GNUC__ @@ -888,200 +1006,109 @@ INTEL_NOPREFIX #ifdef CRYPTOPP_X64_MASM_AVAILABLE extern "C" { -void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len); +void CRYPTOPP_FASTCALL SHA256_SSE_HashMultipleBlocks(word32 *state, const word32 *data, size_t len); } #endif #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE -static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const word32 *data, size_t length); +static void SHA256_SHANI_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order); #elif CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE -static void CRYPTOPP_FASTCALL SHA256_ARM_SHA_HashBlocks(word32 *state, const word32 *data, size_t length); +static void SHA256_ARM_SHA_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order); #endif -#if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_SHA_ASM) - -pfnSHAHashBlocks InitializeSHA256HashBlocks() -{ -#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE - if (HasSHA()) - return &SHA256_SSE_SHA_HashBlocks; - else -#endif -#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE - if (HasSHA2()) - return &SHA256_ARM_SHA_HashBlocks; - else -#endif - - return &X86_SHA256_HashBlocks; -} - size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length) { - static const pfnSHAHashBlocks s_pfn = InitializeSHA256HashBlocks(); - s_pfn(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2()); - return length % BLOCKSIZE; + CRYPTOPP_ASSERT(input); + CRYPTOPP_ASSERT(length >= 64); + +#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE + if (HasSHA()) + { + SHA256_SHANI_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER); + return length & (SHA256::BLOCKSIZE - 1); + } +#endif +#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE + if (HasSSE2()) + { + const size_t res = length & (SHA256::BLOCKSIZE - 1); + SHA256_SSE_HashMultipleBlocks(m_state, input, length-res); + return res; + } +#endif +#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE + if (HasSHA2()) + { + SHA256_ARM_SHA_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER); + return length & (SHA256::BLOCKSIZE - 1); + } +#endif + + const bool noReverse = NativeByteOrderIs(this->GetByteOrder()); + word32 *dataBuf = this->DataBuf(); + do + { + if (noReverse) + { + // this->HashEndianCorrectedBlock(input); + SHA256_CXX_HashBlock(m_state, input); + } + else + { + ByteReverse(dataBuf, input, SHA256::BLOCKSIZE); + // this->HashEndianCorrectedBlock(dataBuf); + SHA256_CXX_HashBlock(m_state, dataBuf); + } + + input += SHA256::BLOCKSIZE/sizeof(word32); + length -= SHA256::BLOCKSIZE; + } + while (length >= SHA256::BLOCKSIZE); + return length; } size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length) { - static const pfnSHAHashBlocks s_pfn = InitializeSHA256HashBlocks(); - s_pfn(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2()); - return length % BLOCKSIZE; -} -#endif - -#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15])) - -#define Ch(x,y,z) (z^(x&(y^z))) -#define Maj(x,y,z) (y^((x^y)&(y^z))) - -#define a(i) T[(0-i)&7] -#define b(i) T[(1-i)&7] -#define c(i) T[(2-i)&7] -#define d(i) T[(3-i)&7] -#define e(i) T[(4-i)&7] -#define f(i) T[(5-i)&7] -#define g(i) T[(6-i)&7] -#define h(i) T[(7-i)&7] - -#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\ - d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)) - -// for SHA256 -#define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22)) -#define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25)) -#define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3)) -#define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10)) - -#if defined(__OPTIMIZE_SIZE__) -// Smaller but slower -void SHA256_CXX_Transform(word32 *state, const word32 *data) -{ - word32 W[32], T[20]; - unsigned int i = 0, j = 0; - word32 *t = T+8; - - memcpy(t, state, 8*4); - word32 e = t[4], a = t[0]; - - do - { - word32 w = data[j]; - W[j] = w; - w += SHA256_K[j]; - w += t[7]; - w += S1(e); - w += Ch(e, t[5], t[6]); - e = t[3] + w; - t[3] = t[3+8] = e; - w += S0(t[0]); - a = w + Maj(a, t[1], t[2]); - t[-1] = t[7] = a; - --t; - ++j; - if (j%8 == 0) - t += 8; - } while (j<16); - - do - { - i = j&0xf; - word32 w = s1(W[i+16-2]) + s0(W[i+16-15]) + W[i] + W[i+16-7]; - W[i+16] = W[i] = w; - w += SHA256_K[j]; - w += t[7]; - w += S1(e); - w += Ch(e, t[5], t[6]); - e = t[3] + w; - t[3] = t[3+8] = e; - w += S0(t[0]); - a = w + Maj(a, t[1], t[2]); - t[-1] = t[7] = a; - - w = s1(W[(i+1)+16-2]) + s0(W[(i+1)+16-15]) + W[(i+1)] + W[(i+1)+16-7]; - W[(i+1)+16] = W[(i+1)] = w; - w += SHA256_K[j+1]; - w += (t-1)[7]; - w += S1(e); - w += Ch(e, (t-1)[5], (t-1)[6]); - e = (t-1)[3] + w; - (t-1)[3] = (t-1)[3+8] = e; - w += S0((t-1)[0]); - a = w + Maj(a, (t-1)[1], (t-1)[2]); - (t-1)[-1] = (t-1)[7] = a; - - t-=2; - j+=2; - if (j%8 == 0) - t += 8; - } while (j<64); - - state[0] += a; - state[1] += t[1]; - state[2] += t[2]; - state[3] += t[3]; - state[4] += e; - state[5] += t[5]; - state[6] += t[6]; - state[7] += t[7]; -} -#else -// Bigger but faster -void SHA256_CXX_Transform(word32 *state, const word32 *data) -{ - word32 W[16], T[8]; - /* Copy context->state[] to working vars */ - memcpy(T, state, sizeof(T)); - /* 64 operations, partially loop unrolled */ - for (unsigned int j=0; j<64; j+=16) - { - R( 0); R( 1); R( 2); R( 3); - R( 4); R( 5); R( 6); R( 7); - R( 8); R( 9); R(10); R(11); - R(12); R(13); R(14); R(15); - } - /* Add the working vars back into context.state[] */ - state[0] += a(0); - state[1] += b(0); - state[2] += c(0); - state[3] += d(0); - state[4] += e(0); - state[5] += f(0); - state[6] += g(0); - state[7] += h(0); -} -#endif // __OPTIMIZE_SIZE__ - -#undef S0 -#undef S1 -#undef s0 -#undef s1 -#undef R - -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE -static void SHA256_SSE2_Transform(word32 *state, const word32 *data) -{ - // this byte reverse is a waste of time, but this function is only called by MDC - word32 W[16]; - ByteReverse(W, data, SHA256::BLOCKSIZE); - X86_SHA256_HashBlocks(state, W, SHA256::BLOCKSIZE - !HasSSE2()); -} -#endif // CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE + CRYPTOPP_ASSERT(input); + CRYPTOPP_ASSERT(length >= 64); #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE -static void SHA256_SSE_SHA_Transform(word32 *state, const word32 *data) -{ - return SHA256_SSE_SHA_HashBlocks(state, data, SHA256::BLOCKSIZE); -} -#endif // CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE - + if (HasSHA()) + { + SHA256_SHANI_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER); + return length & (SHA256::BLOCKSIZE - 1); + } +#endif #if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE -static void SHA256_ARM_SHA_Transform(word32 *state, const word32 *data) -{ - return SHA256_ARM_SHA_HashBlocks(state, data, SHA256::BLOCKSIZE); + if (HasSHA2()) + { + SHA256_ARM_SHA_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER); + return length & (SHA256::BLOCKSIZE - 1); + } +#endif + + const bool noReverse = NativeByteOrderIs(this->GetByteOrder()); + word32 *dataBuf = this->DataBuf(); + do + { + if (noReverse) + { + // this->HashEndianCorrectedBlock(input); + SHA256_CXX_HashBlock(m_state, input); + } + else + { + ByteReverse(dataBuf, input, SHA256::BLOCKSIZE); + // this->HashEndianCorrectedBlock(dataBuf); + SHA256_CXX_HashBlock(m_state, dataBuf); + } + + input += SHA256::BLOCKSIZE/sizeof(word32); + length -= SHA256::BLOCKSIZE; + } + while (length >= SHA256::BLOCKSIZE); + return length; } -#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE /////////////////////////////////// // start of Walton/Gulley's code // @@ -1089,10 +1116,11 @@ static void SHA256_ARM_SHA_Transform(word32 *state, const word32 *data) #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE // Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley. -static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const word32 *data, size_t length) +static void SHA256_SHANI_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order) { - CRYPTOPP_ASSERT(state); CRYPTOPP_ASSERT(data); - CRYPTOPP_ASSERT(length % SHA256::BLOCKSIZE == 0); + CRYPTOPP_ASSERT(state); + CRYPTOPP_ASSERT(data); + CRYPTOPP_ASSERT(length >= 64); __m128i STATE0, STATE1; __m128i MSG, TMP, MASK; @@ -1100,9 +1128,15 @@ static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const wor __m128i ABEF_SAVE, CDGH_SAVE; // Load initial values - TMP = _mm_loadu_si128((__m128i*) &state[0]); - STATE1 = _mm_loadu_si128((__m128i*) &state[4]); - MASK = _mm_set_epi64x(W64LIT(0x0c0d0e0f08090a0b), W64LIT(0x0405060700010203)); + TMP = _mm_loadu_si128(M128_CAST(&state[0])); + STATE1 = _mm_loadu_si128(M128_CAST(&state[4])); + + // IA-32 SHA is little endian, SHA::Transform is big endian, + // and SHA::HashMultipleBlocks can be either. ByteOrder + // allows us to avoid extra endian reversals. It saves 1.0 cpb. + MASK = order == BIG_ENDIAN_ORDER ? // Data arrangement + _mm_set_epi8(12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3) : + _mm_set_epi8(15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0) ; TMP = _mm_shuffle_epi32(TMP, 0xB1); // CDAB STATE1 = _mm_shuffle_epi32(STATE1, 0x1B); // EFGH @@ -1116,7 +1150,7 @@ static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const wor CDGH_SAVE = STATE1; // Rounds 0-3 - MSG = _mm_loadu_si128((__m128i*) data+0); + MSG = _mm_loadu_si128(CONST_M128_CAST(data+0)); TMSG0 = _mm_shuffle_epi8(MSG, MASK); MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0xE9B5DBA5B5C0FBCF), W64LIT(0x71374491428A2F98))); STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); @@ -1124,7 +1158,7 @@ static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const wor STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); // Rounds 4-7 - TMSG1 = _mm_loadu_si128((__m128i*) (data+4)); + TMSG1 = _mm_loadu_si128(CONST_M128_CAST(data+4)); TMSG1 = _mm_shuffle_epi8(TMSG1, MASK); MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0xAB1C5ED5923F82A4), W64LIT(0x59F111F13956C25B))); STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); @@ -1133,7 +1167,7 @@ static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const wor TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1); // Rounds 8-11 - TMSG2 = _mm_loadu_si128((__m128i*) (data+8)); + TMSG2 = _mm_loadu_si128(CONST_M128_CAST(data+8)); TMSG2 = _mm_shuffle_epi8(TMSG2, MASK); MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0x550C7DC3243185BE), W64LIT(0x12835B01D807AA98))); STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); @@ -1142,7 +1176,7 @@ static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const wor TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2); // Rounds 12-15 - TMSG3 = _mm_loadu_si128((__m128i*) (data+12)); + TMSG3 = _mm_loadu_si128(CONST_M128_CAST(data+12)); TMSG3 = _mm_shuffle_epi8(TMSG3, MASK); MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0xC19BF1749BDC06A7), W64LIT(0x80DEB1FE72BE5D74))); STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); @@ -1281,8 +1315,8 @@ static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const wor STATE1 = _mm_alignr_epi8(STATE1, TMP, 8); // ABEF // Save state - _mm_storeu_si128((__m128i*) &state[0], STATE0); - _mm_storeu_si128((__m128i*) &state[4], STATE1); + _mm_storeu_si128(M128_CAST(&state[0]), STATE0); + _mm_storeu_si128(M128_CAST(&state[4]), STATE1); } #endif // CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE @@ -1295,7 +1329,7 @@ static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const wor ///////////////////////////////////////////////////////// #if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE -static void CRYPTOPP_FASTCALL SHA256_ARM_SHA_HashBlocks(word32 *state, const word32 *data, size_t length) +static void SHA256_ARM_SHA_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order) { uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE; uint32x4_t MSG0, MSG1, MSG2, MSG3; @@ -1317,6 +1351,14 @@ static void CRYPTOPP_FASTCALL SHA256_ARM_SHA_HashBlocks(word32 *state, const wor MSG2 = vld1q_u32(data + 8); MSG3 = vld1q_u32(data + 12); + if (order == BIG_ENDIAN_ORDER) // Data arrangement + { + MSG0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG0))); + MSG1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG1))); + MSG2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG2))); + MSG3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG3))); + } + TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x00])); // Rounds 0-3 @@ -1456,31 +1498,24 @@ static void CRYPTOPP_FASTCALL SHA256_ARM_SHA_HashBlocks(word32 *state, const wor // end of Walton/Schneiders/O'Rourke/Hovsmith's code // /////////////////////////////////////////////////////// -pfnSHATransform InitializeSHA256Transform() +void SHA256::Transform(word32 *state, const word32 *data) { #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE if (HasSHA()) - return &SHA256_SSE_SHA_Transform; - else -#endif -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE - if (HasSSE2()) - return &SHA256_SSE2_Transform; - else + { + SHA256_SHANI_HashMultipleBlocks(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER); + return; + } #endif #if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE if (HasSHA2()) - return &SHA256_ARM_SHA_Transform; - else + { + SHA256_ARM_SHA_HashMultipleBlocks(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER); + return; + } #endif - return &SHA256_CXX_Transform; -} - -void SHA256::Transform(word32 *state, const word32 *data) -{ - static const pfnSHATransform s_pfn = InitializeSHA256Transform(); - s_pfn(state, data); + SHA256_CXX_HashBlock(state, data); } // ************************************************************* diff --git a/sha.h b/sha.h index 6be24415..30a859ac 100644 --- a/sha.h +++ b/sha.h @@ -38,21 +38,20 @@ public: //! \param digest the state of the hash //! \param data the data to be digested //! \details Transform operates the hash on data. When the call is invoked - //! digest holds initial state. Upon return digest holds the hash or - //! updated state. + //! digest holds initial state. Upon return digest holds the hash + //! or updated state. //! \details Hashes which derive from IteratedHashWithStaticTransform provide static //! member functions InitState and Transform. External classes, like SEAL and MDC, //! can initialize state with a user provided key and operate the hash on the data //! with the user supplied state. //! \note On Intel platforms the state array and data must be 16-byte aligned for SSE2. - static void CRYPTOPP_API Transform(word32 *digest, const word32 *data); + static void CRYPTOPP_API Transform(HashWordType *digest, const HashWordType *data); //! \brief The algorithm name //! \returns C-style string "SHA-1" CRYPTOPP_STATIC_CONSTEXPR const char* CRYPTOPP_API StaticAlgorithmName() {return "SHA-1";} -#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE - size_t HashMultipleBlocks(const word32 *input, size_t length); -#endif +protected: + size_t HashMultipleBlocks(const HashWordType *input, size_t length); }; //! \class SHA256 @@ -75,21 +74,20 @@ public: //! \param digest the state of the hash //! \param data the data to be digested //! \details Transform operates the hash on data. When the call is invoked - //! digest holds initial state. Upon return digest holds the hash or - //! updated state. + //! digest holds initial state. Upon return digest holds the hash + //! or updated state. //! \details Hashes which derive from IteratedHashWithStaticTransform provide static //! member functions InitState and Transform. External classes, like SEAL and MDC, //! can initialize state with a user provided key and operate the hash on the data //! with the user supplied state. //! \note On Intel platforms the state array and data must be 16-byte aligned for SSE2. - static void CRYPTOPP_API Transform(word32 *digest, const word32 *data); + static void CRYPTOPP_API Transform(HashWordType *digest, const HashWordType *data); //! \brief The algorithm name //! \returns C-style string "SHA-256" CRYPTOPP_STATIC_CONSTEXPR const char* CRYPTOPP_API StaticAlgorithmName() {return "SHA-256";} -#if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_SHA_ASM) - size_t HashMultipleBlocks(const word32 *input, size_t length); -#endif +protected: + size_t HashMultipleBlocks(const HashWordType *input, size_t length); }; //! \class SHA224 @@ -112,21 +110,20 @@ public: //! \param digest the state of the hash //! \param data the data to be digested //! \details Transform operates the hash on data. When the call is invoked - //! digest holds initial state. Upon return digest holds the hash or - //! updated state. + //! digest holds initial state. Upon return digest holds the hash + //! or updated state. //! \details Hashes which derive from IteratedHashWithStaticTransform provide static //! member functions InitState and Transform. External classes, like SEAL and MDC, //! can initialize state with a user provided key and operate the hash on the data //! with the user supplied state. //! \note On Intel platforms the state array and data must be 16-byte aligned for SSE2. - static void CRYPTOPP_API Transform(word32 *digest, const word32 *data) {SHA256::Transform(digest, data);} + static void CRYPTOPP_API Transform(HashWordType *digest, const HashWordType *data) {SHA256::Transform(digest, data);} //! \brief The algorithm name //! \returns C-style string "SHA-224" CRYPTOPP_STATIC_CONSTEXPR const char* CRYPTOPP_API StaticAlgorithmName() {return "SHA-224";} -#if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_SHA_ASM) - size_t HashMultipleBlocks(const word32 *input, size_t length); -#endif +protected: + size_t HashMultipleBlocks(const HashWordType *input, size_t length); }; //! \class SHA512 @@ -149,14 +146,14 @@ public: //! \param digest the state of the hash //! \param data the data to be digested //! \details Transform operates the hash on data. When the call is invoked - //! digest holds initial state. Upon return digest holds the hash or - //! updated state. + //! digest holds initial state. Upon return digest holds the hash + //! or updated state. //! \details Hashes which derive from IteratedHashWithStaticTransform provide static //! member functions InitState and Transform. External classes, like SEAL and MDC, //! can initialize state with a user provided key and operate the hash on the data //! with the user supplied state. //! \note On Intel platforms the state array and data must be 16-byte aligned for SSE2. - static void CRYPTOPP_API Transform(word64 *digest, const word64 *data); + static void CRYPTOPP_API Transform(HashWordType *digest, const HashWordType *data); //! \brief The algorithm name //! \returns C-style string "SHA-512" CRYPTOPP_STATIC_CONSTEXPR const char* CRYPTOPP_API StaticAlgorithmName() {return "SHA-512";} @@ -182,14 +179,14 @@ public: //! \param digest the state of the hash //! \param data the data to be digested //! \details Transform operates the hash on data. When the call is invoked - //! digest holds initial state. Upon return digest holds the hash or - //! updated state. + //! digest holds initial state. Upon return digest holds the hash + //! or updated state. //! \details Hashes which derive from IteratedHashWithStaticTransform provide static //! member functions InitState and Transform. External classes, like SEAL and MDC, //! can initialize state with a user provided key and operate the hash on the data //! with the user supplied state. //! \note On Intel platforms the state array and data must be 16-byte aligned for SSE2. - static void CRYPTOPP_API Transform(word64 *digest, const word64 *data) {SHA512::Transform(digest, data);} + static void CRYPTOPP_API Transform(HashWordType *digest, const HashWordType *data) {SHA512::Transform(digest, data);} //! \brief The algorithm name //! \returns C-style string "SHA-384" CRYPTOPP_STATIC_CONSTEXPR const char* CRYPTOPP_API StaticAlgorithmName() {return "SHA-384";} diff --git a/x64dll.asm b/x64dll.asm index 386f7511..97c9aba3 100644 --- a/x64dll.asm +++ b/x64dll.asm @@ -676,7 +676,7 @@ ret GCM_AuthenticateBlocks_64K ENDP ALIGN 8 -X86_SHA256_HashBlocks PROC FRAME +SHA256_SSE_HashMultipleBlocks PROC FRAME rex_push_reg rsi push_reg rdi push_reg rbx @@ -1962,7 +1962,7 @@ pop rbx pop rdi pop rsi ret -X86_SHA256_HashBlocks ENDP +SHA256_SSE_HashMultipleBlocks ENDP _TEXT ENDS END From 2ee8e3b26d48b70dc86bbd000b75fbe2f0bb7c6b Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Mon, 14 Aug 2017 00:08:55 -0400 Subject: [PATCH 2/4] Move free standing function into anonymous namespace Update comments and use class constants when available --- sha.cpp | 396 ++++++++++++++++++++++++++++++++------------------------ sha.h | 3 +- 2 files changed, 232 insertions(+), 167 deletions(-) diff --git a/sha.cpp b/sha.cpp index 72a82e61..cd0cdd5d 100644 --- a/sha.cpp +++ b/sha.cpp @@ -8,13 +8,21 @@ // In August 2017 Walton reworked the internals to align all the implementations. // Formerly all hashes were software based, IterHashBase handled endian conversions, -// IterHashBase repeatedly called the single block SHA{N}::Transform. The rework -// added SHA{N}::HashMultipleBlocks, and the SHA classes attempt to always use it. -// Now SHA{N}::Transform calls into SHA{N}::HashMultipleBlocks. An added wrinkle is -// hardware is little endian and software is big endian, so HashMultipleBlocks -// accepts a ByteOrder for the incoming data. Hardware based SHA{N}::HashMultipleBlocks -// can often perform the endian swap much easier by setting an EPI mask. The rework -// also removed the hacked-in pointers to implementations. +// and IterHashBase dispatched a single to block SHA{N}::Transform. SHA{N}::Transform +// then performed the single block hashing. It was repeated for multiple blocks. +// +// The rework added SHA{N}::HashMultipleBlocks (class) and SHA{N}_HashMultipleBlocks +// (free standing). There are also hardware accelerated variations. Callers enter +// SHA{N}::HashMultipleBlocks (class), and the function calls SHA{N}_HashMultipleBlocks +// (free standing) or SHA{N}_HashBlock (free standing) as a fallback. +// +// An added wrinkle is hardware is little endian, C++ is big endian, and callers use big endian, +// so SHA{N}_HashMultipleBlock accepts a ByteOrder for the incoming data arrangement. Hardware +// based SHA{N}_HashMultipleBlock can often perform the endian swap much easier by setting +// an EPI mask. Endian swap incurs no penalty on Intel SHA, and 4-instruction penaly on ARM SHA. +// Under C++ the full software based swap penalty is incurred due to use of ReverseBytes(). +// +// The rework also removed the hacked-in pointers to implementations. // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code @@ -44,12 +52,17 @@ #define M128_CAST(x) ((__m128i *)(void *)(x)) #define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x)) +// C++ makes const internal linkage +#define EXPORT_TABLE extern + NAMESPACE_BEGIN(CryptoPP) //////////////////////////////// // start of Steve Reid's code // //////////////////////////////// +ANONYMOUS_NAMESPACE_BEGIN + #define blk0(i) (W[i] = data[i]) #define blk1(i) (W[i&15] = rotlFixed(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15],1)) @@ -65,7 +78,7 @@ NAMESPACE_BEGIN(CryptoPP) #define R3(v,w,x,y,z,i) z+=f3(w,x,y)+blk1(i)+0x8F1BBCDC+rotlFixed(v,5);w=rotlFixed(w,30); #define R4(v,w,x,y,z,i) z+=f4(w,x,y)+blk1(i)+0xCA62C1D6+rotlFixed(v,5);w=rotlFixed(w,30); -static void SHA1_CXX_HashBlock(word32 *state, const word32 *data) +void SHA1_CXX_HashBlock(word32 *state, const word32 *data) { CRYPTOPP_ASSERT(state); CRYPTOPP_ASSERT(data); @@ -106,6 +119,8 @@ static void SHA1_CXX_HashBlock(word32 *state, const word32 *data) state[4] += e; } +ANONYMOUS_NAMESPACE_END + ////////////////////////////// // end of Steve Reid's code // ////////////////////////////// @@ -115,12 +130,15 @@ static void SHA1_CXX_HashBlock(word32 *state, const word32 *data) /////////////////////////////////// #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE + +ANONYMOUS_NAMESPACE_BEGIN + // Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley. -static void SHA1_SHANI_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order) +void SHA1_SHANI_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order) { CRYPTOPP_ASSERT(state); CRYPTOPP_ASSERT(data); - CRYPTOPP_ASSERT(length >= 64); + CRYPTOPP_ASSERT(length >= SHA1::BLOCKSIZE); __m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1; __m128i MASK, MSG0, MSG1, MSG2, MSG3; @@ -137,7 +155,7 @@ static void SHA1_SHANI_HashMultipleBlocks(word32 *state, const word32 *data, siz _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15) : _mm_set_epi8(3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12) ; - while (length >= 64) + while (length >= SHA1::BLOCKSIZE) { // Save current hash ABCD_SAVE = ABCD; @@ -303,8 +321,8 @@ static void SHA1_SHANI_HashMultipleBlocks(word32 *state, const word32 *data, siz E0 = _mm_sha1nexte_epu32(E0, E0_SAVE); ABCD = _mm_add_epi32(ABCD, ABCD_SAVE); - data += 16; - length -= 64; + data += SHA1::BLOCKSIZE/sizeof(word32); + length -= SHA1::BLOCKSIZE; } // Save state @@ -312,7 +330,10 @@ static void SHA1_SHANI_HashMultipleBlocks(word32 *state, const word32 *data, siz _mm_storeu_si128(M128_CAST(state), ABCD); state[4] = _mm_extract_epi32(E0, 3); } -#endif + +ANONYMOUS_NAMESPACE_END + +#endif // CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE ///////////////////////////////// // end of Walton/Gulley's code // @@ -323,11 +344,14 @@ static void SHA1_SHANI_HashMultipleBlocks(word32 *state, const word32 *data, siz ////////////////////////////////////////////////////////////// #if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE -static void SHA1_ARM_SHA_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order) + +ANONYMOUS_NAMESPACE_BEGIN + +void SHA1_ARM_SHA_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order) { CRYPTOPP_ASSERT(state); CRYPTOPP_ASSERT(data); - CRYPTOPP_ASSERT(length >= 64); + CRYPTOPP_ASSERT(length >= SHA1::BLOCKSIZE); uint32x4_t C0, C1, C2, C3; uint32x4_t ABCD, ABCD_SAVED; @@ -344,7 +368,7 @@ static void SHA1_ARM_SHA_HashMultipleBlocks(word32 *state, const word32 *data, s ABCD = vld1q_u32(&state[0]); E0 = state[4]; - while (length >= 64) + while (length >= SHA1::BLOCKSIZE) { // Save current hash ABCD_SAVED = ABCD; @@ -501,14 +525,17 @@ static void SHA1_ARM_SHA_HashMultipleBlocks(word32 *state, const word32 *data, s E0 += E0_SAVED; ABCD = vaddq_u32(ABCD_SAVED, ABCD); - data += 16; - length -= 64; + data += SHA1::BLOCKSIZE/sizeof(word32); + length -= SHA1::BLOCKSIZE; } // Save state vst1q_u32(&state[0], ABCD); state[4] = E0; } + +ANONYMOUS_NAMESPACE_END + #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE /////////////////////////////////////////////////////// @@ -550,7 +577,7 @@ void SHA1::Transform(word32 *state, const word32 *data) size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length) { CRYPTOPP_ASSERT(input); - CRYPTOPP_ASSERT(length >= 64); + CRYPTOPP_ASSERT(length >= SHA1::BLOCKSIZE); #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE if (HasSHA()) @@ -573,27 +600,25 @@ size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length) { if (noReverse) { - // this->HashEndianCorrectedBlock(input); SHA1_CXX_HashBlock(m_state, input); } else { - ByteReverse(dataBuf, input, 64); - // this->HashEndianCorrectedBlock(dataBuf); + ByteReverse(dataBuf, input, SHA1::BLOCKSIZE); SHA1_CXX_HashBlock(m_state, dataBuf); } - input += 16; - length -= 64; + input += SHA1::BLOCKSIZE/sizeof(word32); + length -= SHA1::BLOCKSIZE; } - while (length >= 64); + while (length >= SHA1::BLOCKSIZE); return length; } // ************************************************************* -CRYPTOPP_ALIGN_DATA(16) -extern const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = { +CRYPTOPP_ALIGN_DATA(16) EXPORT_TABLE +const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, @@ -613,6 +638,8 @@ extern const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = { 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; +ANONYMOUS_NAMESPACE_BEGIN + #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15])) #define Ch(x,y,z) (z^(x&(y^z))) @@ -636,7 +663,7 @@ extern const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = { #define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3)) #define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10)) -static void SHA256_CXX_HashBlock(word32 *state, const word32 *data) +void SHA256_CXX_HashBlock(word32 *state, const word32 *data) { word32 W[16], T[8]; /* Copy context->state[] to working vars */ @@ -666,6 +693,8 @@ static void SHA256_CXX_HashBlock(word32 *state, const word32 *data) #undef s1 #undef R +ANONYMOUS_NAMESPACE_END + void SHA224::InitState(HashWordType *state) { static const word32 s[8] = {0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4}; @@ -677,11 +706,13 @@ void SHA256::InitState(HashWordType *state) static const word32 s[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19}; memcpy(state, s, sizeof(s)); } -#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM +#endif // Not CRYPTOPP_GENERATE_X64_MASM #if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM)) -static void CRYPTOPP_FASTCALL SHA256_SSE_HashMultipleBlocks(word32 *state, const word32 *data, size_t len) +ANONYMOUS_NAMESPACE_BEGIN + +void CRYPTOPP_FASTCALL SHA256_SSE_HashMultipleBlocks(word32 *state, const word32 *data, size_t len) { #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4] @@ -1000,127 +1031,32 @@ INTEL_NOPREFIX #endif } -#endif // (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM)) +ANONYMOUS_NAMESPACE_END + +#endif // CRYPTOPP_X86_ASM_AVAILABLE or CRYPTOPP_GENERATE_X64_MASM #ifndef CRYPTOPP_GENERATE_X64_MASM #ifdef CRYPTOPP_X64_MASM_AVAILABLE -extern "C" { +EXPORT_TABLE "C" { void CRYPTOPP_FASTCALL SHA256_SSE_HashMultipleBlocks(word32 *state, const word32 *data, size_t len); } #endif -#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE -static void SHA256_SHANI_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order); -#elif CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE -static void SHA256_ARM_SHA_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order); -#endif - -size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length) -{ - CRYPTOPP_ASSERT(input); - CRYPTOPP_ASSERT(length >= 64); - -#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE - if (HasSHA()) - { - SHA256_SHANI_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER); - return length & (SHA256::BLOCKSIZE - 1); - } -#endif -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE - if (HasSSE2()) - { - const size_t res = length & (SHA256::BLOCKSIZE - 1); - SHA256_SSE_HashMultipleBlocks(m_state, input, length-res); - return res; - } -#endif -#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE - if (HasSHA2()) - { - SHA256_ARM_SHA_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER); - return length & (SHA256::BLOCKSIZE - 1); - } -#endif - - const bool noReverse = NativeByteOrderIs(this->GetByteOrder()); - word32 *dataBuf = this->DataBuf(); - do - { - if (noReverse) - { - // this->HashEndianCorrectedBlock(input); - SHA256_CXX_HashBlock(m_state, input); - } - else - { - ByteReverse(dataBuf, input, SHA256::BLOCKSIZE); - // this->HashEndianCorrectedBlock(dataBuf); - SHA256_CXX_HashBlock(m_state, dataBuf); - } - - input += SHA256::BLOCKSIZE/sizeof(word32); - length -= SHA256::BLOCKSIZE; - } - while (length >= SHA256::BLOCKSIZE); - return length; -} - -size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length) -{ - CRYPTOPP_ASSERT(input); - CRYPTOPP_ASSERT(length >= 64); - -#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE - if (HasSHA()) - { - SHA256_SHANI_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER); - return length & (SHA256::BLOCKSIZE - 1); - } -#endif -#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE - if (HasSHA2()) - { - SHA256_ARM_SHA_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER); - return length & (SHA256::BLOCKSIZE - 1); - } -#endif - - const bool noReverse = NativeByteOrderIs(this->GetByteOrder()); - word32 *dataBuf = this->DataBuf(); - do - { - if (noReverse) - { - // this->HashEndianCorrectedBlock(input); - SHA256_CXX_HashBlock(m_state, input); - } - else - { - ByteReverse(dataBuf, input, SHA256::BLOCKSIZE); - // this->HashEndianCorrectedBlock(dataBuf); - SHA256_CXX_HashBlock(m_state, dataBuf); - } - - input += SHA256::BLOCKSIZE/sizeof(word32); - length -= SHA256::BLOCKSIZE; - } - while (length >= SHA256::BLOCKSIZE); - return length; -} - /////////////////////////////////// // start of Walton/Gulley's code // /////////////////////////////////// #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE + +ANONYMOUS_NAMESPACE_BEGIN + // Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley. -static void SHA256_SHANI_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order) +void SHA256_SHANI_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order) { CRYPTOPP_ASSERT(state); CRYPTOPP_ASSERT(data); - CRYPTOPP_ASSERT(length >= 64); + CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE); __m128i STATE0, STATE1; __m128i MSG, TMP, MASK; @@ -1138,9 +1074,9 @@ static void SHA256_SHANI_HashMultipleBlocks(word32 *state, const word32 *data, s _mm_set_epi8(12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3) : _mm_set_epi8(15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0) ; - TMP = _mm_shuffle_epi32(TMP, 0xB1); // CDAB - STATE1 = _mm_shuffle_epi32(STATE1, 0x1B); // EFGH - STATE0 = _mm_alignr_epi8(TMP, STATE1, 8); // ABEF + TMP = _mm_shuffle_epi32(TMP, 0xB1); // CDAB + STATE1 = _mm_shuffle_epi32(STATE1, 0x1B); // EFGH + STATE0 = _mm_alignr_epi8(TMP, STATE1, 8); // ABEF STATE1 = _mm_blend_epi16(STATE1, TMP, 0xF0); // CDGH while (length >= SHA256::BLOCKSIZE) @@ -1309,15 +1245,18 @@ static void SHA256_SHANI_HashMultipleBlocks(word32 *state, const word32 *data, s length -= SHA256::BLOCKSIZE; } - TMP = _mm_shuffle_epi32(STATE0, 0x1B); // FEBA - STATE1 = _mm_shuffle_epi32(STATE1, 0xB1); // DCHG + TMP = _mm_shuffle_epi32(STATE0, 0x1B); // FEBA + STATE1 = _mm_shuffle_epi32(STATE1, 0xB1); // DCHG STATE0 = _mm_blend_epi16(TMP, STATE1, 0xF0); // DCBA - STATE1 = _mm_alignr_epi8(STATE1, TMP, 8); // ABEF + STATE1 = _mm_alignr_epi8(STATE1, TMP, 8); // ABEF // Save state _mm_storeu_si128(M128_CAST(&state[0]), STATE0); _mm_storeu_si128(M128_CAST(&state[4]), STATE1); } + +ANONYMOUS_NAMESPACE_END + #endif // CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE ///////////////////////////////// @@ -1329,8 +1268,15 @@ static void SHA256_SHANI_HashMultipleBlocks(word32 *state, const word32 *data, s ///////////////////////////////////////////////////////// #if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE -static void SHA256_ARM_SHA_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order) + +ANONYMOUS_NAMESPACE_BEGIN + +void SHA256_ARM_SHA_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order) { + CRYPTOPP_ASSERT(state); + CRYPTOPP_ASSERT(data); + CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE); + uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE; uint32x4_t MSG0, MSG1, MSG2, MSG3; uint32x4_t TMP0, TMP1, TMP2; @@ -1492,7 +1438,10 @@ static void SHA256_ARM_SHA_HashMultipleBlocks(word32 *state, const word32 *data, vst1q_u32(&state[0], STATE0); vst1q_u32(&state[4], STATE1); } -#endif + +ANONYMOUS_NAMESPACE_END + +#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE /////////////////////////////////////////////////////// // end of Walton/Schneiders/O'Rourke/Hovsmith's code // @@ -1500,6 +1449,9 @@ static void SHA256_ARM_SHA_HashMultipleBlocks(word32 *state, const word32 *data, void SHA256::Transform(word32 *state, const word32 *data) { + CRYPTOPP_ASSERT(state); + CRYPTOPP_ASSERT(data); + #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE if (HasSHA()) { @@ -1518,6 +1470,104 @@ void SHA256::Transform(word32 *state, const word32 *data) SHA256_CXX_HashBlock(state, data); } +size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length) +{ + CRYPTOPP_ASSERT(input); + CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE); + +#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE + if (HasSHA()) + { + SHA256_SHANI_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER); + return length & (SHA256::BLOCKSIZE - 1); + } +#endif +#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE + if (HasSSE2()) + { + const size_t res = length & (SHA256::BLOCKSIZE - 1); + SHA256_SSE_HashMultipleBlocks(m_state, input, length-res); + return res; + } +#endif +#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE + if (HasSHA2()) + { + SHA256_ARM_SHA_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER); + return length & (SHA256::BLOCKSIZE - 1); + } +#endif + + const bool noReverse = NativeByteOrderIs(this->GetByteOrder()); + word32 *dataBuf = this->DataBuf(); + do + { + if (noReverse) + { + SHA256_CXX_HashBlock(m_state, input); + } + else + { + ByteReverse(dataBuf, input, SHA256::BLOCKSIZE); + SHA256_CXX_HashBlock(m_state, dataBuf); + } + + input += SHA256::BLOCKSIZE/sizeof(word32); + length -= SHA256::BLOCKSIZE; + } + while (length >= SHA256::BLOCKSIZE); + return length; +} + +size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length) +{ + CRYPTOPP_ASSERT(input); + CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE); + +#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE + if (HasSHA()) + { + SHA256_SHANI_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER); + return length & (SHA256::BLOCKSIZE - 1); + } +#endif +#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE + if (HasSSE2()) + { + const size_t res = length & (SHA256::BLOCKSIZE - 1); + SHA256_SSE_HashMultipleBlocks(m_state, input, length-res); + return res; + } +#endif +#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE + if (HasSHA2()) + { + SHA256_ARM_SHA_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER); + return length & (SHA256::BLOCKSIZE - 1); + } +#endif + + const bool noReverse = NativeByteOrderIs(this->GetByteOrder()); + word32 *dataBuf = this->DataBuf(); + do + { + if (noReverse) + { + SHA256_CXX_HashBlock(m_state, input); + } + else + { + ByteReverse(dataBuf, input, SHA256::BLOCKSIZE); + SHA256_CXX_HashBlock(m_state, dataBuf); + } + + input += SHA256::BLOCKSIZE/sizeof(word32); + length -= SHA256::BLOCKSIZE; + } + while (length >= SHA256::BLOCKSIZE); + return length; +} + // ************************************************************* void SHA384::InitState(HashWordType *state) @@ -1540,11 +1590,8 @@ void SHA512::InitState(HashWordType *state) memcpy(state, s, sizeof(s)); } -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32) -CRYPTOPP_ALIGN_DATA(16) static const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN16 = { -#else -CRYPTOPP_ALIGN_DATA(16) static const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN16 = { -#endif +CRYPTOPP_ALIGN_DATA(16) +static const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN16 = { W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd), W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc), W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019), @@ -1588,8 +1635,10 @@ CRYPTOPP_ALIGN_DATA(16) static const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN1 }; #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32) -// put assembly version in separate function, otherwise MSVC 2005 SP1 doesn't generate correct code for the non-assembly version -CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state, const word64 *data) + +ANONYMOUS_NAMESPACE_BEGIN + +CRYPTOPP_NAKED void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state, const word64 *data) { #ifdef __GNUC__ __asm__ __volatile__ @@ -1782,28 +1831,25 @@ CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state AS1( ret) #endif } -#endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE -void SHA512::Transform(word64 *state, const word64 *data) -{ - CRYPTOPP_ASSERT(IsAlignedOn(state, GetAlignmentOf())); - CRYPTOPP_ASSERT(IsAlignedOn(data, GetAlignmentOf())); +ANONYMOUS_NAMESPACE_END -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32) - if (HasSSE2()) - { - SHA512_SSE2_Transform(state, data); - return; - } -#endif +#endif // CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE + +ANONYMOUS_NAMESPACE_BEGIN #define S0(x) (rotrFixed(x,28)^rotrFixed(x,34)^rotrFixed(x,39)) #define S1(x) (rotrFixed(x,14)^rotrFixed(x,18)^rotrFixed(x,41)) #define s0(x) (rotrFixed(x,1)^rotrFixed(x,8)^(x>>7)) #define s1(x) (rotrFixed(x,19)^rotrFixed(x,61)^(x>>6)) -#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA512_K[i+j]+(j?blk2(i):blk0(i));\ - d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)) +#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA512_K[i+j]+\ + (j?blk2(i):blk0(i));d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)) + +void SHA512_CXX_HashBlock(word64 *state, const word64 *data) +{ + CRYPTOPP_ASSERT(state); + CRYPTOPP_ASSERT(data); word64 W[16]; word64 T[8]; @@ -1828,7 +1874,25 @@ void SHA512::Transform(word64 *state, const word64 *data) state[7] += h(0); } +ANONYMOUS_NAMESPACE_END + +void SHA512::Transform(word64 *state, const word64 *data) +{ + CRYPTOPP_ASSERT(state); + CRYPTOPP_ASSERT(data); + +#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32) + if (HasSSE2()) + { + SHA512_SSE2_Transform(state, data); + return; + } +#endif + + SHA512_CXX_HashBlock(state, data); +} + NAMESPACE_END -#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM -#endif // #ifndef CRYPTOPP_IMPORTS +#endif // Not CRYPTOPP_GENERATE_X64_MASM +#endif // Not CRYPTOPP_IMPORTS diff --git a/sha.h b/sha.h index 30a859ac..0b2db7f3 100644 --- a/sha.h +++ b/sha.h @@ -2,7 +2,8 @@ //! \file sha.h //! \brief Classes for SHA-1 and SHA-2 family of message digests -//! \since SHA1 since Crypto++ 1.0, SHA2 since Crypto++ 4.0, Intel SHA extensions since Crypto++ 6.0 +//! \since SHA1 since Crypto++ 1.0, SHA2 since Crypto++ 4.0, +//! ARM SHA since Crypto++ 6.0, Intel SHA since Crypto++ 6.0 #ifndef CRYPTOPP_SHA_H #define CRYPTOPP_SHA_H From 6a29d8c56f3e63cdebe1d0e4d1735e248f4b7c02 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Mon, 14 Aug 2017 01:05:03 -0400 Subject: [PATCH 3/4] Use -O1 for Asan and UBsan --- GNUmakefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/GNUmakefile b/GNUmakefile index 0cf7d4ec..ddc99dc5 100755 --- a/GNUmakefile +++ b/GNUmakefile @@ -373,6 +373,9 @@ endif # No ASM # Undefined Behavior Sanitizer (UBsan) testing. Issue 'make ubsan'. ifeq ($(findstring ubsan,$(MAKECMDGOALS)),ubsan) +CXXFLAGS := $(CXXFLAGS:-g%=-g3) +CXXFLAGS := $(CXXFLAGS:-O%=-O1) +CXXFLAGS := $(CXXFLAGS:-xO%=-xO1) ifeq ($(findstring -fsanitize=undefined,$(CXXFLAGS)),) CXXFLAGS += -fsanitize=undefined endif # CXXFLAGS @@ -383,6 +386,9 @@ endif # UBsan # Address Sanitizer (Asan) testing. Issue 'make asan'. ifeq ($(findstring asan,$(MAKECMDGOALS)),asan) +CXXFLAGS := $(CXXFLAGS:-g%=-g3) +CXXFLAGS := $(CXXFLAGS:-O%=-O1) +CXXFLAGS := $(CXXFLAGS:-xO%=-xO1) ifeq ($(findstring -fsanitize=address,$(CXXFLAGS)),) CXXFLAGS += -fsanitize=address endif # CXXFLAGS From effa446a61e8e8ec4b9126ca41fc887ab91c6327 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Mon, 14 Aug 2017 01:48:11 -0400 Subject: [PATCH 4/4] Fix test script when running on machine with RO mount of / --- TestScripts/cryptest.sh | 204 ++++++++++++++++++++-------------------- cryptest.sh | 204 ++++++++++++++++++++-------------------- 2 files changed, 208 insertions(+), 200 deletions(-) diff --git a/TestScripts/cryptest.sh b/TestScripts/cryptest.sh index e2a058d6..dc70b07b 100755 --- a/TestScripts/cryptest.sh +++ b/TestScripts/cryptest.sh @@ -250,91 +250,95 @@ if [[ ("$SUNCC_510_OR_ABOVE" -ne "0") ]]; then HAVE_OFAST=0 fi -if [[ (-z "$TMP") ]]; then - if [[ (-d "/tmp") ]]; then - TMP=/tmp +# GCC compile farm is mounted RO +if [[ (-z "$TMPDIR") ]]; then + if [[ (-d "/tmp") ]] && [[ `touch "/tmp/ok-to-delete" &>/dev/null` ]]; then + TMPDIR=/tmp elif [[ (-d "/temp") ]]; then - TMP=/temp + TMPDIR=/temp elif [[ (-d "$HOME/tmp") ]]; then - TMP="$HOME/tmp" + TMPDIR="$HOME/tmp" else - echo "Please set TMP to a valid directory" + echo "Please set TMPDIR to a valid directory" [[ "$0" = "$BASH_SOURCE" ]] && exit 1 || return 1 fi fi +# Make temp if it does not exist +mkdir -p "$TMPDIR" &>/dev/null + # Sun Studio does not allow '-x c++'. Copy it here... rm -f adhoc.cpp > /dev/null 2>&1 cp adhoc.cpp.proto adhoc.cpp -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_CXX17") ]]; then HAVE_CXX17=0 - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++17 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++17 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_CXX17=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_GNU17") ]]; then HAVE_GNU17=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++17 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++17 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_GNU17=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_CXX14") ]]; then HAVE_CXX14=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++14 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++14 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_CXX14=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_GNU14") ]]; then HAVE_GNU14=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++14 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++14 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_GNU14=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_CXX11") ]]; then HAVE_CXX11=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++11 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++11 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_CXX11=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_GNU11") ]]; then HAVE_GNU11=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++11 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++11 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_GNU11=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_CXX03") ]]; then HAVE_CXX03=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++03 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++03 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_CXX03=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_GNU03") ]]; then HAVE_GNU03=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++03 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++03 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_GNU03=1 fi @@ -342,13 +346,13 @@ fi # Use a fallback strategy so OPT_O0 can be used with DEBUG_CXXFLAGS OPT_O0= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -O0 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -O0 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O0=-O0 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO0 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO0 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O0=-xO0 fi @@ -356,13 +360,13 @@ fi # Use a fallback strategy so OPT_O1 can be used with VALGRIND_CXXFLAGS OPT_O1= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -O1 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -O1 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O1=-O1 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO1 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO1 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O1=-xO1 fi @@ -370,13 +374,13 @@ fi # Use a fallback strategy so OPT_O2 can be used with RELEASE_CXXFLAGS OPT_O2= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -O2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -O2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O2=-O2 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O2=-xO2 fi @@ -385,14 +389,14 @@ fi if [[ (-z "$HAVE_O3") ]]; then HAVE_O3=0 OPT_O3= - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_O3=1 OPT_O3=-O3 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_O3=1 OPT_O3=-xO3 @@ -404,14 +408,14 @@ fi if [[ ( (-z "$HAVE_O5") && ("$CLANG_COMPILER" -eq "0") ) ]]; then HAVE_O5=0 OPT_O5= - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -O5 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -O5 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_O5=1 OPT_O5=-O5 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO5 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO5 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_O5=1 OPT_O5=-xO5 @@ -423,8 +427,8 @@ fi if [[ (-z "$HAVE_OS") ]]; then HAVE_OS=0 OPT_OS= - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -Os adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -Os adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_OS=1 OPT_OS=-Os @@ -435,8 +439,8 @@ fi if [[ (-z "$HAVE_OFAST") ]]; then HAVE_OFAST=0 OPT_OFAST= - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -Ofast adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -Ofast adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_OFAST=1 OPT_OFAST=-Ofast @@ -445,13 +449,13 @@ fi # Use a fallback strategy so OPT_G2 can be used with RELEASE_CXXFLAGS OPT_G2= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -g2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -g2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_G2=-g2 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_G2=-g fi @@ -459,13 +463,13 @@ fi # Use a fallback strategy so OPT_G3 can be used with DEBUG_CXXFLAGS OPT_G3= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -g3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -g3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_G3=-g3 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_G3=-g fi @@ -473,10 +477,10 @@ fi # Cygwin and noisy compiles OPT_PIC= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_PIC") ]]; then HAVE_PIC=0 - PIC_PROBLEMS=$("$CXX" -DCRYPTOPP_ADHOC_MAIN -fPIC adhoc.cpp -o "$TMP/adhoc.exe" 2>&1 | "$EGREP" -ic '(warning|error)') + PIC_PROBLEMS=$("$CXX" -DCRYPTOPP_ADHOC_MAIN -fPIC adhoc.cpp -o "$TMPDIR/adhoc.exe" 2>&1 | "$EGREP" -ic '(warning|error)') if [[ "$PIC_PROBLEMS" -eq "0" ]]; then HAVE_PIC=1 OPT_PIC=-fPIC @@ -484,12 +488,12 @@ if [[ (-z "$HAVE_PIC") ]]; then fi # GCC 4.8; Clang 3.4 -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_UBSAN") ]]; then HAVE_UBSAN=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=undefined adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=undefined adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then - "$TMP/adhoc.exe" > /dev/null 2>&1 + "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_UBSAN=1 fi @@ -497,12 +501,12 @@ if [[ (-z "$HAVE_UBSAN") ]]; then fi # GCC 4.8; Clang 3.4 -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_ASAN") ]]; then HAVE_ASAN=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=address adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=address adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then - "$TMP/adhoc.exe" > /dev/null 2>&1 + "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_ASAN=1 fi @@ -510,41 +514,41 @@ if [[ (-z "$HAVE_ASAN") ]]; then fi # GCC 6.0; maybe Clang -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_BSAN") ]]; then HAVE_BSAN=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=bounds-strict adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=bounds-strict adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then - "$TMP/adhoc.exe" > /dev/null 2>&1 + "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_BSAN=1 fi fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_OMP") ]]; then HAVE_OMP=0 if [[ "$GCC_COMPILER" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_OMP=1 OMP_FLAGS=(-fopenmp -O3) fi elif [[ "$INTEL_COMPILER" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -openmp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -openmp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_OMP=1 OMP_FLAGS=(-openmp -O3) fi elif [[ "$CLANG_COMPILER" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp=libomp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp=libomp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_OMP=1 OMP_FLAGS=(-fopenmp=libomp -O3) fi elif [[ "$SUN_COMPILER" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xopenmp=parallel -xO3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xopenmp=parallel -xO3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_OMP=1 OMP_FLAGS=(-xopenmp=parallel -xO3) @@ -552,33 +556,33 @@ if [[ (-z "$HAVE_OMP") ]]; then fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_INTEL_MULTIARCH") ]]; then HAVE_INTEL_MULTIARCH=0 if [[ ("$IS_DARWIN" -ne "0") && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0") ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_INTEL_MULTIARCH=1 fi fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_PPC_MULTIARCH") ]]; then HAVE_PPC_MULTIARCH=0 if [[ ("$IS_DARWIN" -ne "0") && ("$IS_PPC" -ne "0") ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch ppc -arch ppc64 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch ppc -arch ppc64 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_PPC_MULTIARCH=1 fi fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_X32") ]]; then HAVE_X32=0 if [[ "$IS_X32" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -mx32 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -mx32 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_X32=1 fi @@ -588,8 +592,8 @@ fi # Hit or miss, mostly hit if [[ (-z "$HAVE_NATIVE_ARCH") ]]; then HAVE_NATIVE_ARCH=0 - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=native adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=native adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_NATIVE_ARCH=1 fi @@ -603,7 +607,7 @@ if [[ (-z "$HAVE_LDGOLD") ]]; then if [[ (! -z "$LD_GOLD") && (! -z "$ELF_FILE") ]]; then LD_GOLD=$(file "$LD_GOLD" | cut -d":" -f 2 | "$EGREP" -i -c "elf") if [[ ("$LD_GOLD" -ne "0") ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fuse-ld=gold adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fuse-ld=gold adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_LDGOLD=1 fi @@ -688,10 +692,10 @@ fi # Used to disassemble object modules so we can verify some aspects of code generation if [[ (-z "$HAVE_DISASS") ]]; then - echo "int main(int argc, char* argv[]) {return 0;}" > "$TMP/test.cc" - "$CXX" "$TMP/test.cc" -o "$TMP/test.exe" > /dev/null 2>&1 + echo "int main(int argc, char* argv[]) {return 0;}" > "$TMPDIR/test.cc" + "$CXX" "$TMPDIR/test.cc" -o "$TMPDIR/test.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then - "$DISASS" "${DISASSARGS[@]}" "$TMP/test.exe" > /dev/null 2>&1 + "$DISASS" "${DISASSARGS[@]}" "$TMPDIR/test.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_DISASS=1 else @@ -1201,7 +1205,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # Test AES-NI code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_AESNI=1 fi @@ -1263,7 +1267,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # X86 carryless multiply code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -mpclmul adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -mpclmul adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_PCLMUL=1 fi @@ -1301,11 +1305,11 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # Test RDRAND and RDSEED code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdrnd adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdrnd adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_RDRAND=1 fi - "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdseed adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdseed adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_RDSEED=1 fi @@ -1347,7 +1351,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # X86 CRC32 code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -msse4.2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -msse4.2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_CRC32=1 fi @@ -1385,7 +1389,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # X86 SHA code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -msha adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -msha adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_SHA=1 fi @@ -5004,7 +5008,7 @@ fi if [[ ("$CLANG_COMPILER" -eq "0") ]]; then CLANG_CXX=$(which clang++ 2>&1 | "$GREP" -v "no clang++" | head -1) - "$CLANG_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CLANG_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5039,7 +5043,7 @@ fi if [[ ("$GCC_COMPILER" -eq "0") ]]; then GCC_CXX=$(which g++ 2>&1 | "$GREP" -v "no g++" | head -1) - "$GCC_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$GCC_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5077,7 +5081,7 @@ if [[ ("$INTEL_COMPILER" -eq "0") ]]; then if [[ (-z "$INTEL_CXX") ]]; then INTEL_CXX=$(find /opt/intel -name icpc 2>/dev/null | "$GREP" -iv composer | head -1) fi - "$INTEL_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$INTEL_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5113,7 +5117,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-4*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5146,7 +5150,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-5*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5179,7 +5183,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-6*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5212,7 +5216,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-7*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5245,7 +5249,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.7*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5277,7 +5281,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.8*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5309,7 +5313,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.9*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5341,7 +5345,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-4*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ diff --git a/cryptest.sh b/cryptest.sh index e2a058d6..dc70b07b 100755 --- a/cryptest.sh +++ b/cryptest.sh @@ -250,91 +250,95 @@ if [[ ("$SUNCC_510_OR_ABOVE" -ne "0") ]]; then HAVE_OFAST=0 fi -if [[ (-z "$TMP") ]]; then - if [[ (-d "/tmp") ]]; then - TMP=/tmp +# GCC compile farm is mounted RO +if [[ (-z "$TMPDIR") ]]; then + if [[ (-d "/tmp") ]] && [[ `touch "/tmp/ok-to-delete" &>/dev/null` ]]; then + TMPDIR=/tmp elif [[ (-d "/temp") ]]; then - TMP=/temp + TMPDIR=/temp elif [[ (-d "$HOME/tmp") ]]; then - TMP="$HOME/tmp" + TMPDIR="$HOME/tmp" else - echo "Please set TMP to a valid directory" + echo "Please set TMPDIR to a valid directory" [[ "$0" = "$BASH_SOURCE" ]] && exit 1 || return 1 fi fi +# Make temp if it does not exist +mkdir -p "$TMPDIR" &>/dev/null + # Sun Studio does not allow '-x c++'. Copy it here... rm -f adhoc.cpp > /dev/null 2>&1 cp adhoc.cpp.proto adhoc.cpp -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_CXX17") ]]; then HAVE_CXX17=0 - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++17 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++17 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_CXX17=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_GNU17") ]]; then HAVE_GNU17=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++17 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++17 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_GNU17=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_CXX14") ]]; then HAVE_CXX14=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++14 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++14 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_CXX14=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_GNU14") ]]; then HAVE_GNU14=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++14 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++14 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_GNU14=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_CXX11") ]]; then HAVE_CXX11=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++11 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++11 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_CXX11=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_GNU11") ]]; then HAVE_GNU11=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++11 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++11 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_GNU11=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_CXX03") ]]; then HAVE_CXX03=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++03 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=c++03 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_CXX03=1 fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_GNU03") ]]; then HAVE_GNU03=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++03 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -std=gnu++03 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_GNU03=1 fi @@ -342,13 +346,13 @@ fi # Use a fallback strategy so OPT_O0 can be used with DEBUG_CXXFLAGS OPT_O0= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -O0 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -O0 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O0=-O0 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO0 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO0 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O0=-xO0 fi @@ -356,13 +360,13 @@ fi # Use a fallback strategy so OPT_O1 can be used with VALGRIND_CXXFLAGS OPT_O1= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -O1 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -O1 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O1=-O1 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO1 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO1 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O1=-xO1 fi @@ -370,13 +374,13 @@ fi # Use a fallback strategy so OPT_O2 can be used with RELEASE_CXXFLAGS OPT_O2= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -O2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -O2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O2=-O2 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_O2=-xO2 fi @@ -385,14 +389,14 @@ fi if [[ (-z "$HAVE_O3") ]]; then HAVE_O3=0 OPT_O3= - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_O3=1 OPT_O3=-O3 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_O3=1 OPT_O3=-xO3 @@ -404,14 +408,14 @@ fi if [[ ( (-z "$HAVE_O5") && ("$CLANG_COMPILER" -eq "0") ) ]]; then HAVE_O5=0 OPT_O5= - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -O5 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -O5 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_O5=1 OPT_O5=-O5 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO5 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xO5 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_O5=1 OPT_O5=-xO5 @@ -423,8 +427,8 @@ fi if [[ (-z "$HAVE_OS") ]]; then HAVE_OS=0 OPT_OS= - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -Os adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -Os adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_OS=1 OPT_OS=-Os @@ -435,8 +439,8 @@ fi if [[ (-z "$HAVE_OFAST") ]]; then HAVE_OFAST=0 OPT_OFAST= - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -Ofast adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -Ofast adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_OFAST=1 OPT_OFAST=-Ofast @@ -445,13 +449,13 @@ fi # Use a fallback strategy so OPT_G2 can be used with RELEASE_CXXFLAGS OPT_G2= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -g2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -g2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_G2=-g2 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_G2=-g fi @@ -459,13 +463,13 @@ fi # Use a fallback strategy so OPT_G3 can be used with DEBUG_CXXFLAGS OPT_G3= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 -"$CXX" -DCRYPTOPP_ADHOC_MAIN -g3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 +"$CXX" -DCRYPTOPP_ADHOC_MAIN -g3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_G3=-g3 else - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -g adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then OPT_G3=-g fi @@ -473,10 +477,10 @@ fi # Cygwin and noisy compiles OPT_PIC= -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_PIC") ]]; then HAVE_PIC=0 - PIC_PROBLEMS=$("$CXX" -DCRYPTOPP_ADHOC_MAIN -fPIC adhoc.cpp -o "$TMP/adhoc.exe" 2>&1 | "$EGREP" -ic '(warning|error)') + PIC_PROBLEMS=$("$CXX" -DCRYPTOPP_ADHOC_MAIN -fPIC adhoc.cpp -o "$TMPDIR/adhoc.exe" 2>&1 | "$EGREP" -ic '(warning|error)') if [[ "$PIC_PROBLEMS" -eq "0" ]]; then HAVE_PIC=1 OPT_PIC=-fPIC @@ -484,12 +488,12 @@ if [[ (-z "$HAVE_PIC") ]]; then fi # GCC 4.8; Clang 3.4 -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_UBSAN") ]]; then HAVE_UBSAN=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=undefined adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=undefined adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then - "$TMP/adhoc.exe" > /dev/null 2>&1 + "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_UBSAN=1 fi @@ -497,12 +501,12 @@ if [[ (-z "$HAVE_UBSAN") ]]; then fi # GCC 4.8; Clang 3.4 -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_ASAN") ]]; then HAVE_ASAN=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=address adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=address adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then - "$TMP/adhoc.exe" > /dev/null 2>&1 + "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_ASAN=1 fi @@ -510,41 +514,41 @@ if [[ (-z "$HAVE_ASAN") ]]; then fi # GCC 6.0; maybe Clang -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_BSAN") ]]; then HAVE_BSAN=0 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=bounds-strict adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fsanitize=bounds-strict adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then - "$TMP/adhoc.exe" > /dev/null 2>&1 + "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_BSAN=1 fi fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_OMP") ]]; then HAVE_OMP=0 if [[ "$GCC_COMPILER" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_OMP=1 OMP_FLAGS=(-fopenmp -O3) fi elif [[ "$INTEL_COMPILER" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -openmp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -openmp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_OMP=1 OMP_FLAGS=(-openmp -O3) fi elif [[ "$CLANG_COMPILER" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp=libomp -O3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fopenmp=libomp -O3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_OMP=1 OMP_FLAGS=(-fopenmp=libomp -O3) fi elif [[ "$SUN_COMPILER" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -xopenmp=parallel -xO3 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -xopenmp=parallel -xO3 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_OMP=1 OMP_FLAGS=(-xopenmp=parallel -xO3) @@ -552,33 +556,33 @@ if [[ (-z "$HAVE_OMP") ]]; then fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_INTEL_MULTIARCH") ]]; then HAVE_INTEL_MULTIARCH=0 if [[ ("$IS_DARWIN" -ne "0") && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0") ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch i386 -arch x86_64 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_INTEL_MULTIARCH=1 fi fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_PPC_MULTIARCH") ]]; then HAVE_PPC_MULTIARCH=0 if [[ ("$IS_DARWIN" -ne "0") && ("$IS_PPC" -ne "0") ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch ppc -arch ppc64 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -arch ppc -arch ppc64 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_PPC_MULTIARCH=1 fi fi fi -rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 +rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ (-z "$HAVE_X32") ]]; then HAVE_X32=0 if [[ "$IS_X32" -ne "0" ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -mx32 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -mx32 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_X32=1 fi @@ -588,8 +592,8 @@ fi # Hit or miss, mostly hit if [[ (-z "$HAVE_NATIVE_ARCH") ]]; then HAVE_NATIVE_ARCH=0 - rm -f "$TMP/adhoc.exe" > /dev/null 2>&1 - "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=native adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + rm -f "$TMPDIR/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -march=native adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ ("$?" -eq "0") ]]; then HAVE_NATIVE_ARCH=1 fi @@ -603,7 +607,7 @@ if [[ (-z "$HAVE_LDGOLD") ]]; then if [[ (! -z "$LD_GOLD") && (! -z "$ELF_FILE") ]]; then LD_GOLD=$(file "$LD_GOLD" | cut -d":" -f 2 | "$EGREP" -i -c "elf") if [[ ("$LD_GOLD" -ne "0") ]]; then - "$CXX" -DCRYPTOPP_ADHOC_MAIN -fuse-ld=gold adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -fuse-ld=gold adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_LDGOLD=1 fi @@ -688,10 +692,10 @@ fi # Used to disassemble object modules so we can verify some aspects of code generation if [[ (-z "$HAVE_DISASS") ]]; then - echo "int main(int argc, char* argv[]) {return 0;}" > "$TMP/test.cc" - "$CXX" "$TMP/test.cc" -o "$TMP/test.exe" > /dev/null 2>&1 + echo "int main(int argc, char* argv[]) {return 0;}" > "$TMPDIR/test.cc" + "$CXX" "$TMPDIR/test.cc" -o "$TMPDIR/test.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then - "$DISASS" "${DISASSARGS[@]}" "$TMP/test.exe" > /dev/null 2>&1 + "$DISASS" "${DISASSARGS[@]}" "$TMPDIR/test.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then HAVE_DISASS=1 else @@ -1201,7 +1205,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # Test AES-NI code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -maes adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_AESNI=1 fi @@ -1263,7 +1267,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # X86 carryless multiply code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -mpclmul adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -mpclmul adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_PCLMUL=1 fi @@ -1301,11 +1305,11 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # Test RDRAND and RDSEED code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdrnd adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdrnd adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_RDRAND=1 fi - "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdseed adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -mrdseed adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_RDSEED=1 fi @@ -1347,7 +1351,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # X86 CRC32 code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -msse4.2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -msse4.2 adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_CRC32=1 fi @@ -1385,7 +1389,7 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t ############################################ # X86 SHA code generation - "$CXX" -DCRYPTOPP_ADHOC_MAIN -msha adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CXX" -DCRYPTOPP_ADHOC_MAIN -msha adhoc.cpp -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then X86_SHA=1 fi @@ -5004,7 +5008,7 @@ fi if [[ ("$CLANG_COMPILER" -eq "0") ]]; then CLANG_CXX=$(which clang++ 2>&1 | "$GREP" -v "no clang++" | head -1) - "$CLANG_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$CLANG_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5039,7 +5043,7 @@ fi if [[ ("$GCC_COMPILER" -eq "0") ]]; then GCC_CXX=$(which g++ 2>&1 | "$GREP" -v "no g++" | head -1) - "$GCC_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$GCC_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5077,7 +5081,7 @@ if [[ ("$INTEL_COMPILER" -eq "0") ]]; then if [[ (-z "$INTEL_CXX") ]]; then INTEL_CXX=$(find /opt/intel -name icpc 2>/dev/null | "$GREP" -iv composer | head -1) fi - "$INTEL_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$INTEL_CXX" -x c++ -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5113,7 +5117,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-4*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5146,7 +5150,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-5*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5179,7 +5183,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-6*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5212,7 +5216,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'g++-mp-7*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5245,7 +5249,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.7*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5277,7 +5281,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.8*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5309,7 +5313,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-3.9*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################ @@ -5341,7 +5345,7 @@ if [[ ("$IS_DARWIN" -ne "0" && "$MACPORTS_COMPILER" -eq "0") ]]; then MACPORTS_CXX=$(find /opt/local/bin -name 'clang++-mp-4*' 2>/dev/null | head -1) if [[ (! -z "$MACPORTS_CXX") ]]; then - "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMP/adhoc.exe" > /dev/null 2>&1 + "$MACPORTS_CXX" -x c++ -std=c++11 -DCRYPTOPP_ADHOC_MAIN adhoc.cpp.proto -o "$TMPDIR/adhoc.exe" > /dev/null 2>&1 if [[ "$?" -eq "0" ]]; then ############################################