From c1f025343a1cfd4d2acb7e1eeb83eea935a7a4cf Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Tue, 14 Jun 2016 19:14:09 -0400 Subject: [PATCH] Add C++11 alignas support. Deleting 'alignas' branch --- blake2.cpp | 4 ++++ camellia.cpp | 1 + config.h | 6 ++++++ config.recommend | 6 ++++++ gcm.cpp | 3 ++- rijndael.cpp | 39 ++++++++++++++++++++------------------- 6 files changed, 39 insertions(+), 20 deletions(-) diff --git a/blake2.cpp b/blake2.cpp index 0d110c0d..d580647a 100644 --- a/blake2.cpp +++ b/blake2.cpp @@ -69,6 +69,7 @@ struct CRYPTOPP_NO_VTABLE BLAKE2_IV CRYPTOPP_ALIGN_DATA(16) static const word32 iv[8]; }; +CRYPTOPP_ALIGN_DATA(16) const word32 BLAKE2_IV::iv[8] = { 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL @@ -84,6 +85,7 @@ struct CRYPTOPP_NO_VTABLE BLAKE2_IV CRYPTOPP_ALIGN_DATA(16) static const word64 iv[8]; }; +CRYPTOPP_ALIGN_DATA(16) const word64 BLAKE2_IV::iv[8] = { W64LIT(0x6a09e667f3bcc908), W64LIT(0xbb67ae8584caa73b), W64LIT(0x3c6ef372fe94f82b), W64LIT(0xa54ff53a5f1d36f1), @@ -105,6 +107,7 @@ struct CRYPTOPP_NO_VTABLE BLAKE2_Sigma CRYPTOPP_ALIGN_DATA(16) static const byte sigma[10][16]; }; +CRYPTOPP_ALIGN_DATA(16) const byte BLAKE2_Sigma::sigma[10][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, @@ -126,6 +129,7 @@ struct CRYPTOPP_NO_VTABLE BLAKE2_Sigma CRYPTOPP_ALIGN_DATA(16) static const byte sigma[12][16]; }; +CRYPTOPP_ALIGN_DATA(16) const byte BLAKE2_Sigma::sigma[12][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, diff --git a/camellia.cpp b/camellia.cpp index 3bb9b323..5afe8df5 100644 --- a/camellia.cpp +++ b/camellia.cpp @@ -250,6 +250,7 @@ void Camellia::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBloc // The Camellia s-boxes +CRYPTOPP_ALIGN_DATA(4) const byte Camellia::Base::s1[256] = { 112,130,44,236,179,39,192,229,228,133,87,53,234,12,174,65, diff --git a/config.h b/config.h index 486534d6..d502437d 100644 --- a/config.h +++ b/config.h @@ -829,6 +829,12 @@ NAMESPACE_END # define CRYPTOPP_NO_THROW #endif // CRYPTOPP_CXX11_NOEXCEPT +// Hack... CRYPTOPP_ALIGN_DATA is defined earlier, before C++11 alignas availability is determined +#if defined(CRYPTOPP_CXX11_ALIGNAS) +# undef CRYPTOPP_ALIGN_DATA +# define CRYPTOPP_ALIGN_DATA(x) alignas(x) +#endif // CRYPTOPP_CXX11_ALIGNAS + // OK to comment the following out, but please report it so we can fix it. #if (defined(__cplusplus) && (__cplusplus >= 199711L)) && !defined(CRYPTOPP_UNCAUGHT_EXCEPTION_AVAILABLE) # error "std::uncaught_exception is not available. This is likely a configuration error." diff --git a/config.recommend b/config.recommend index 66421a73..be30f54d 100644 --- a/config.recommend +++ b/config.recommend @@ -827,6 +827,12 @@ NAMESPACE_END # define CRYPTOPP_NO_THROW #endif // CRYPTOPP_CXX11_NOEXCEPT +// Hack... CRYPTOPP_ALIGN_DATA is defined earlier, before C++11 alignas availability is determined +#if defined(CRYPTOPP_CXX11_ALIGNAS) +# undef CRYPTOPP_ALIGN_DATA +# define CRYPTOPP_ALIGN_DATA(x) alignas(x) +#endif // CRYPTOPP_CXX11_ALIGNAS + // OK to comment the following out, but please report it so we can fix it. #if (defined(__cplusplus) && (__cplusplus >= 199711L)) && !defined(CRYPTOPP_UNCAUGHT_EXCEPTION_AVAILABLE) # error "std::uncaught_exception is not available. This is likely a configuration error." diff --git a/gcm.cpp b/gcm.cpp index b5299832..ecbfb0c9 100644 --- a/gcm.cpp +++ b/gcm.cpp @@ -100,7 +100,8 @@ inline static void Xor16(byte *a, const byte *b, const byte *c) } #if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE -static CRYPTOPP_ALIGN_DATA(16) const word64 s_clmulConstants64[] = { +CRYPTOPP_ALIGN_DATA(16) +static const word64 s_clmulConstants64[] = { W64LIT(0xe100000000000000), W64LIT(0xc200000000000000), W64LIT(0x08090a0b0c0d0e0f), W64LIT(0x0001020304050607), W64LIT(0x0001020304050607), W64LIT(0x08090a0b0c0d0e0f)}; diff --git a/rijndael.cpp b/rijndael.cpp index 46396205..27bb34e1 100644 --- a/rijndael.cpp +++ b/rijndael.cpp @@ -9,15 +9,15 @@ July 2010: Added support for AES-NI instructions via compiler intrinsics. */ /* -Feb 2009: The x86/x64 assembly code was rewritten in by Wei Dai to do counter mode -caching, which was invented by Hongjun Wu and popularized by Daniel J. Bernstein -and Peter Schwabe in their paper "New AES software speed records". The round -function was also modified to include a trick similar to one in Brian Gladman's -x86 assembly code, doing an 8-bit register move to minimize the number of -register spills. Also switched to compressed tables and copying round keys to +Feb 2009: The x86/x64 assembly code was rewritten in by Wei Dai to do counter mode +caching, which was invented by Hongjun Wu and popularized by Daniel J. Bernstein +and Peter Schwabe in their paper "New AES software speed records". The round +function was also modified to include a trick similar to one in Brian Gladman's +x86 assembly code, doing an 8-bit register move to minimize the number of +register spills. Also switched to compressed tables and copying round keys to the stack. -The C++ implementation now uses compressed tables if +The C++ implementation now uses compressed tables if CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS is defined. */ @@ -25,15 +25,15 @@ CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS is defined. July 2006: Defense against timing attacks was added in by Wei Dai. The code now uses smaller tables in the first and last rounds, -and preloads them into L1 cache before usage (by loading at least -one element in each cache line). +and preloads them into L1 cache before usage (by loading at least +one element in each cache line). -We try to delay subsequent accesses to each table (used in the first +We try to delay subsequent accesses to each table (used in the first and last rounds) until all of the table has been preloaded. Hopefully the compiler isn't smart enough to optimize that code away. After preloading the table, we also try not to access any memory location -other than the table and the stack, in order to prevent table entries from +other than the table and the stack, in order to prevent table entries from being unloaded from L1 cache, until that round is finished. (Some popular CPUs have 2-way associative caches.) */ @@ -95,8 +95,8 @@ static word64 Td[256]; // Unused; avoids linker error on Microsoft X64 non-AESNI platforms namespace rdtable {CRYPTOPP_ALIGN_DATA(16) word64 Te[256+2];} # endif -static CRYPTOPP_ALIGN_DATA(16) word32 Te[256*4]; -static CRYPTOPP_ALIGN_DATA(16) word32 Td[256*4]; +CRYPTOPP_ALIGN_DATA(16) static word32 Te[256*4]; +CRYPTOPP_ALIGN_DATA(16) static word32 Td[256*4]; #endif // CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS static volatile bool s_TeFilled = false, s_TdFilled = false; @@ -509,7 +509,7 @@ void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock #if !(defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) || defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)) // timing attack countermeasure. see comments at top for more details - // If CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS is defined, + // If CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS is defined, // QUARTER_ROUND_LD will use Td, which is already preloaded. u = _u; for (i=0; i<256; i+=cacheLineSize) @@ -1001,7 +1001,7 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l #endif #ifdef __GNUC__ ATT_PREFIX - : + : : "c" (locals), "d" (k), "S" (Te), "D" (g_cacheLineSize) : "memory", "cc", "%eax" #if CRYPTOPP_BOOL_X64 @@ -1103,7 +1103,8 @@ inline void AESNI_Dec_4_Blocks(__m128i &block0, __m128i &block1, __m128i &block2 block3 = _mm_aesdeclast_si128(block3, rk); } -static CRYPTOPP_ALIGN_DATA(16) const word32 s_one[] = {0, 0, 0, 1<<24}; +CRYPTOPP_ALIGN_DATA(16) +static const word32 s_one[] = {0, 0, 0, 1<<24}; template inline size_t AESNI_AdvancedProcessBlocks(F1 func1, F4 func4, const __m128i *subkeys, unsigned int rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) @@ -1201,7 +1202,7 @@ inline size_t AESNI_AdvancedProcessBlocks(F1 func1, F4 func4, const __m128i *sub if (xorBlocks && !(flags & BlockTransformation::BT_XorInput)) block = _mm_xor_si128(block, _mm_loadu_si128((const __m128i *)(const void *)xorBlocks)); - + _mm_storeu_si128((__m128i *)(void *)outBlocks, block); inBlocks += inIncrement; @@ -1220,7 +1221,7 @@ size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo if (HasAESNI()) return AESNI_AdvancedProcessBlocks(AESNI_Enc_Block, AESNI_Enc_4_Blocks, (const __m128i *)(const void *)m_key.begin(), m_rounds, inBlocks, xorBlocks, outBlocks, length, flags); #endif - + #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM) if (HasSSE2()) { @@ -1298,7 +1299,7 @@ size_t Rijndael::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo { if (HasAESNI()) return AESNI_AdvancedProcessBlocks(AESNI_Dec_Block, AESNI_Dec_4_Blocks, (const __m128i *)(const void *)m_key.begin(), m_rounds, inBlocks, xorBlocks, outBlocks, length, flags); - + return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags); }