From 32cc92e048ecdad5b5c1d901a14c5d4f2ab66444 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Mon, 4 Sep 2017 11:03:10 -0400 Subject: [PATCH] Fixup under-aligned buffers for stream ciphers on AltiVec and Power8 This commit supports the upcoming AltiVec and Power8 processor support for stream ciphers. This commit affects GlobalRNG() most because its an AES-based generator. The commit favors AlignedSecByteBlock over SecByteBlock in places where messages are handled on the AltiVec and Power8 processor data paths. The data paths include all block cipher modes of operation, and some filters like FilterWithBufferedInput. Intel and ARM processors are tolerant of under-aligned buffers when using crypto instructions. AltiVec and Power8 are less tolerant, and they simply ignore the three low-order bits to ensure an address is aligned. The AltiVec and Power8 have caused a fair number of wild writes on the stack and in the heap. Testing on a 64-bit Intel Skylake show a marked improvement in performance. We suspect GCC is generating better code since it knows the alignment of the pointers, and does not have to emit fixup code for under-aligned and mis-aligned data. Testing on an mid-2000s 32-bit VIA C7-D with SSE2+SSSE3 showed no improvement, and no performance was lost. --- strciphr.cpp | 33 +++++++++++++++++++++------------ strciphr.h | 7 +++---- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/strciphr.cpp b/strciphr.cpp index 6aae6318..82dd55ea 100644 --- a/strciphr.cpp +++ b/strciphr.cpp @@ -39,15 +39,27 @@ void AdditiveCipherTemplate::GenerateBlock(byte *outString, size_t length) if (!length) return; } - CRYPTOPP_ASSERT(m_leftOver == 0); PolicyInterface &policy = this->AccessPolicy(); - unsigned int bytesPerIteration = policy.GetBytesPerIteration(); + const unsigned int bytesPerIteration = policy.GetBytesPerIteration(); + const unsigned int alignment = policy.GetAlignment(); if (length >= bytesPerIteration) { - size_t iterations = length / bytesPerIteration; - policy.WriteKeystream(outString, iterations); + const size_t iterations = length / bytesPerIteration; + + // Intel and ARM SIMD units can handle unaligned loads of + // byte buffers, but AltiVec and Power8 cannot. + if (!IsAlignedOn(outString, alignment)) + { + AlignedSecByteBlock temp(iterations * bytesPerIteration); + policy.WriteKeystream(temp, iterations); + memcpy(outString, temp, iterations * bytesPerIteration); + } + else + { + policy.WriteKeystream(outString, iterations); + } outString += iterations * bytesPerIteration; length -= iterations * bytesPerIteration; } @@ -78,15 +90,14 @@ void AdditiveCipherTemplate::ProcessData(byte *outString, const byte *inStrin if (!length) return; } - CRYPTOPP_ASSERT(m_leftOver == 0); PolicyInterface &policy = this->AccessPolicy(); - unsigned int bytesPerIteration = policy.GetBytesPerIteration(); + const unsigned int bytesPerIteration = policy.GetBytesPerIteration(); + const unsigned int alignment = policy.GetAlignment(); if (policy.CanOperateKeystream() && length >= bytesPerIteration) { - size_t iterations = length / bytesPerIteration; - unsigned int alignment = policy.GetAlignment(); + const size_t iterations = length / bytesPerIteration; KeystreamOperation operation = KeystreamOperation((IsAlignedOn(inString, alignment) * 2) | (int)IsAlignedOn(outString, alignment)); policy.OperateKeystream(operation, outString, inString, iterations); @@ -179,8 +190,8 @@ void CFB_CipherTemplate::ProcessData(byte *outString, const byte *inString CRYPTOPP_ASSERT(length % this->MandatoryBlockSize() == 0); PolicyInterface &policy = this->AccessPolicy(); - unsigned int bytesPerIteration = policy.GetBytesPerIteration(); - unsigned int alignment = policy.GetAlignment(); + const unsigned int bytesPerIteration = policy.GetBytesPerIteration(); + const unsigned int alignment = policy.GetAlignment(); byte *reg = policy.GetRegisterBegin(); if (m_leftOver) @@ -196,8 +207,6 @@ void CFB_CipherTemplate::ProcessData(byte *outString, const byte *inString if (!length) return; - CRYPTOPP_ASSERT(m_leftOver == 0); - if (policy.CanIterate() && length >= bytesPerIteration && IsAlignedOn(outString, alignment)) { if (IsAlignedOn(inString, alignment)) diff --git a/strciphr.h b/strciphr.h index 10082907..bc105a27 100644 --- a/strciphr.h +++ b/strciphr.h @@ -187,13 +187,12 @@ struct CRYPTOPP_NO_VTABLE AdditiveCipherConcretePolicy : public BASE typedef WT WordType; CRYPTOPP_CONSTANT(BYTES_PER_ITERATION = sizeof(WordType) * W) -#if !(CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64) //! \brief Provides data alignment requirements //! \returns data alignment requirements, in bytes //! \details Internally, the default implementation returns 1. If the stream cipher is implemented - //! using an SSE2 ASM or intrinsics, then the value returned is usually 16. + //! using an SSE2 ASM or intrinsics, then the value returned is usually 16. If the cipher is + //! AES on AltiVec or Power 8 then 16 is returned. unsigned int GetAlignment() const {return GetAlignmentOf();} -#endif //! \brief Provides number of bytes operated upon during an iteration //! \returns bytes operated upon during an iteration, in bytes @@ -340,7 +339,7 @@ protected: inline byte * KeystreamBufferBegin() {return this->m_buffer.data();} inline byte * KeystreamBufferEnd() {return (this->m_buffer.data() + this->m_buffer.size());} - SecByteBlock m_buffer; + AlignedSecByteBlock m_buffer; size_t m_leftOver; };