Fixup under-aligned buffers for AltiVec and Power8

This commit supports the upcoming AltiVec and Power8 processor support. The commit favors AlignedSecByteBlock over SecByteBlock in places where messages are handled on the AltiVec and Power8 processor data paths. The data paths include all block cipher modes of operation, and some filters like

Intel and ARM processors are tolerant of under-aligned buffers when using crypto intstructions. AltiVec and Power8 are less tolerant, and they simply ignore the three low-order bits to ensure an address is aligned. The AltiVec and Power8 have caused a fair number of wild writes on the stack and in the heap.

Testing on a 64-bit Intel Skylake show a marked improvement in performance. We suspect GCC is generating better code since it knows the alignment of the pointers, and does not have to emit fixup code for under-aligned and mis-aligned data. Here are some data points:

  SecByteBlock
    - Poly1305: 3.4 cpb
    - Blake2s: 6.7 cpb
    - Blake2b: 4.5 cpb
    - SipHash-2-4: 3.1 cpb
    - SipHash-4-8: 3.5 cpb
    - ChaCha20: 7.4 cpb
    - ChaCha12: 4.6 cpb
    - ChaCha8: 3.5 cpb

  AlignedSecByteBlock
    - Poly1305: 2.9 cpb
    - Blake2s: 5.5. cpb
    - Blake2b: 3.9 cpb
    - SipHash-2-4: 1.9 cpb
    - SipHash-4-8: 3.3 cpb
    - ChaCha20: 6.0 cpb
    - ChaCha12: 4.0 cpb
    - ChaCha8: 2.9 cpb

Testing on an mid-2000's 32-bit VIA C7-D with SSE2+SSSE3 showed no improvement, and no performance was lost.
pull/489/head
Jeffrey Walton 2017-09-04 10:24:24 -04:00
parent 8578383e2c
commit 9c2a1c74fe
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
3 changed files with 129 additions and 48 deletions

View File

@ -210,7 +210,7 @@ struct CRYPTOPP_DLL FilterPutSpaceHelper
{return HelpCreatePutSpace(target, channel, minSize, minSize, bufferSize);} {return HelpCreatePutSpace(target, channel, minSize, minSize, bufferSize);}
//! \brief Temporay working space //! \brief Temporay working space
SecByteBlock m_tempSpace; AlignedSecByteBlock m_tempSpace;
}; };
//! \class MeterFilter //! \class MeterFilter
@ -414,7 +414,7 @@ protected:
size_t MaxSize() const {return m_buffer.size();} size_t MaxSize() const {return m_buffer.size();}
private: private:
SecByteBlock m_buffer; AlignedSecByteBlock m_buffer;
size_t m_blockSize, m_maxBlocks, m_size; size_t m_blockSize, m_maxBlocks, m_size;
byte *m_begin; byte *m_begin;
}; };

133
modes.cpp
View File

@ -43,15 +43,15 @@ void CFB_ModePolicy::Iterate(byte *output, const byte *input, CipherDir dir, siz
m_cipher->ProcessAndXorBlock(m_register, input, output); m_cipher->ProcessAndXorBlock(m_register, input, output);
if (iterationCount > 1) if (iterationCount > 1)
m_cipher->AdvancedProcessBlocks(output, input+s, output+s, (iterationCount-1)*s, 0); m_cipher->AdvancedProcessBlocks(output, input+s, output+s, (iterationCount-1)*s, 0);
memcpy(m_register, output+(iterationCount-1)*s, s); std::memcpy(m_register, output+(iterationCount-1)*s, s);
} }
else else
{ {
memcpy(m_temp, input+(iterationCount-1)*s, s); // make copy first in case of in-place decryption std::memcpy(m_temp, input+(iterationCount-1)*s, s); // make copy first in case of in-place decryption
if (iterationCount > 1) if (iterationCount > 1)
m_cipher->AdvancedProcessBlocks(input, input+s, output+s, (iterationCount-1)*s, BlockTransformation::BT_ReverseDirection); m_cipher->AdvancedProcessBlocks(input, input+s, output+s, (iterationCount-1)*s, BlockTransformation::BT_ReverseDirection);
m_cipher->ProcessAndXorBlock(m_register, input, output); m_cipher->ProcessAndXorBlock(m_register, input, output);
memcpy(m_register, m_temp, s); std::memcpy(m_register, m_temp, s);
} }
} }
@ -87,11 +87,11 @@ void CFB_ModePolicy::ResizeBuffers()
void OFB_ModePolicy::WriteKeystream(byte *keystreamBuffer, size_t iterationCount) void OFB_ModePolicy::WriteKeystream(byte *keystreamBuffer, size_t iterationCount)
{ {
CRYPTOPP_ASSERT(m_cipher->IsForwardTransformation()); // OFB mode needs the "encrypt" direction of the underlying block cipher, even to decrypt CRYPTOPP_ASSERT(m_cipher->IsForwardTransformation()); // OFB mode needs the "encrypt" direction of the underlying block cipher, even to decrypt
unsigned int s = BlockSize(); const unsigned int s = BlockSize();
m_cipher->ProcessBlock(m_register, keystreamBuffer); m_cipher->ProcessBlock(m_register, keystreamBuffer);
if (iterationCount > 1) if (iterationCount > 1)
m_cipher->AdvancedProcessBlocks(keystreamBuffer, NULLPTR, keystreamBuffer+s, s*(iterationCount-1), 0); m_cipher->AdvancedProcessBlocks(keystreamBuffer, NULLPTR, keystreamBuffer+s, s*(iterationCount-1), 0);
memcpy(m_register, keystreamBuffer+s*(iterationCount-1), s); std::memcpy(m_register, keystreamBuffer+s*(iterationCount-1), s);
} }
void OFB_ModePolicy::CipherResynchronize(byte *keystreamBuffer, const byte *iv, size_t length) void OFB_ModePolicy::CipherResynchronize(byte *keystreamBuffer, const byte *iv, size_t length)
@ -121,15 +121,28 @@ void CTR_ModePolicy::IncrementCounterBy256()
void CTR_ModePolicy::OperateKeystream(KeystreamOperation /*operation*/, byte *output, const byte *input, size_t iterationCount) void CTR_ModePolicy::OperateKeystream(KeystreamOperation /*operation*/, byte *output, const byte *input, size_t iterationCount)
{ {
CRYPTOPP_ASSERT(m_cipher->IsForwardTransformation()); // CTR mode needs the "encrypt" direction of the underlying block cipher, even to decrypt // CTR mode needs the "encrypt" direction of the underlying block cipher, even to decrypt
unsigned int s = BlockSize(); CRYPTOPP_ASSERT(m_cipher->IsForwardTransformation());
unsigned int inputIncrement = input ? s : 0; const unsigned int s = BlockSize();
const unsigned int inputIncrement = input ? s : 0;
const unsigned int alignment = m_cipher->OptimalDataAlignment();
while (iterationCount) while (iterationCount)
{ {
byte lsb = m_counterArray[s-1]; byte lsb = m_counterArray[s-1];
size_t blocks = UnsignedMin(iterationCount, 256U-lsb); const size_t blocks = UnsignedMin(iterationCount, 256U-lsb);
const bool align = !IsAlignedOn(input, alignment) || !IsAlignedOn(output, alignment);
if (align)
{
AlignedSecByteBlock i(input, blocks*s), o(blocks*s);
m_cipher->AdvancedProcessBlocks(m_counterArray, i, o, blocks*s, BlockTransformation::BT_InBlockIsCounter|BlockTransformation::BT_AllowParallel);
std::memcpy(output, o, blocks*s);
}
else
{
m_cipher->AdvancedProcessBlocks(m_counterArray, input, output, blocks*s, BlockTransformation::BT_InBlockIsCounter|BlockTransformation::BT_AllowParallel); m_cipher->AdvancedProcessBlocks(m_counterArray, input, output, blocks*s, BlockTransformation::BT_InBlockIsCounter|BlockTransformation::BT_AllowParallel);
}
if ((m_counterArray[s-1] = lsb + (byte)blocks) == 0) if ((m_counterArray[s-1] = lsb + (byte)blocks) == 0)
IncrementCounterBy256(); IncrementCounterBy256();
@ -168,32 +181,77 @@ void BlockOrientedCipherModeBase::ResizeBuffers()
void ECB_OneWay::ProcessData(byte *outString, const byte *inString, size_t length) void ECB_OneWay::ProcessData(byte *outString, const byte *inString, size_t length)
{ {
// If this fires you should align your buffers. There's a non-trival penalty for some processors
CRYPTOPP_ASSERT(IsAlignedOn(inString, m_cipher->OptimalDataAlignment()));
CRYPTOPP_ASSERT(IsAlignedOn(outString, m_cipher->OptimalDataAlignment()));
CRYPTOPP_ASSERT(length%BlockSize()==0); CRYPTOPP_ASSERT(length%BlockSize()==0);
const unsigned int blockSize = BlockSize();
const unsigned int alignment = m_cipher->OptimalDataAlignment();
bool align = !IsAlignedOn(inString, alignment) || !IsAlignedOn(outString, alignment);
if (align)
{
AlignedSecByteBlock i(length), o(length);
std::memcpy(i, inString, length);
std::memcpy(o, outString+length-blockSize, blockSize); // copy tail
m_cipher->AdvancedProcessBlocks(i, NULLPTR, o, length, BlockTransformation::BT_AllowParallel);
std::memcpy(outString, o, length);
}
else
{
m_cipher->AdvancedProcessBlocks(inString, NULLPTR, outString, length, BlockTransformation::BT_AllowParallel); m_cipher->AdvancedProcessBlocks(inString, NULLPTR, outString, length, BlockTransformation::BT_AllowParallel);
} }
}
void CBC_Encryption::ProcessData(byte *outString, const byte *inString, size_t length) void CBC_Encryption::ProcessData(byte *outString, const byte *inString, size_t length)
{ {
if (!length) // If this fires you should align your buffers. There's a non-trival penalty for some processors
return; // CRYPTOPP_ASSERT(IsAlignedOn(inString, m_cipher->OptimalDataAlignment()));
CRYPTOPP_ASSERT(IsAlignedOn(outString, m_cipher->OptimalDataAlignment()));
CRYPTOPP_ASSERT(length%BlockSize()==0); CRYPTOPP_ASSERT(length%BlockSize()==0);
unsigned int blockSize = BlockSize(); if (!length)
return;
const unsigned int blockSize = BlockSize();
const unsigned int alignment = m_cipher->OptimalDataAlignment();
bool align = !IsAlignedOn(inString, alignment) || !IsAlignedOn(outString, alignment);
if (align)
{
AlignedSecByteBlock i(length), o(length);
std::memcpy(i, inString, length);
std::memcpy(o, outString+length-blockSize, blockSize); // copy tail
m_cipher->AdvancedProcessBlocks(i, m_register, o, blockSize, BlockTransformation::BT_XorInput);
if (length > blockSize)
m_cipher->AdvancedProcessBlocks(i+blockSize, o, o+blockSize, length-blockSize, BlockTransformation::BT_XorInput);
std::memcpy(m_register, o + length - blockSize, blockSize);
std::memcpy(outString, o, length);
}
else
{
m_cipher->AdvancedProcessBlocks(inString, m_register, outString, blockSize, BlockTransformation::BT_XorInput); m_cipher->AdvancedProcessBlocks(inString, m_register, outString, blockSize, BlockTransformation::BT_XorInput);
if (length > blockSize) if (length > blockSize)
m_cipher->AdvancedProcessBlocks(inString+blockSize, outString, outString+blockSize, length-blockSize, BlockTransformation::BT_XorInput); m_cipher->AdvancedProcessBlocks(inString+blockSize, outString, outString+blockSize, length-blockSize, BlockTransformation::BT_XorInput);
memcpy(m_register, outString + length - blockSize, blockSize); std::memcpy(m_register, outString + length - blockSize, blockSize);
}
} }
void CBC_CTS_Encryption::ProcessLastBlock(byte *outString, const byte *inString, size_t length) void CBC_CTS_Encryption::ProcessLastBlock(byte *outString, const byte *inString, size_t length)
{ {
// If this fires you should align your buffers. There's a non-trival penalty for some processors
CRYPTOPP_ASSERT(IsAlignedOn(inString, m_cipher->OptimalDataAlignment()));
CRYPTOPP_ASSERT(IsAlignedOn(outString, m_cipher->OptimalDataAlignment()));
if (length <= BlockSize()) if (length <= BlockSize())
{ {
if (!m_stolenIV) if (!m_stolenIV)
throw InvalidArgument("CBC_Encryption: message is too short for ciphertext stealing"); throw InvalidArgument("CBC_Encryption: message is too short for ciphertext stealing");
// steal from IV // steal from IV
memcpy(outString, m_register, length); std::memcpy(outString, m_register, length);
outString = m_stolenIV; outString = m_stolenIV;
} }
else else
@ -203,13 +261,13 @@ void CBC_CTS_Encryption::ProcessLastBlock(byte *outString, const byte *inString,
m_cipher->ProcessBlock(m_register); m_cipher->ProcessBlock(m_register);
inString += BlockSize(); inString += BlockSize();
length -= BlockSize(); length -= BlockSize();
memcpy(outString+BlockSize(), m_register, length); std::memcpy(outString+BlockSize(), m_register, length);
} }
// output last full ciphertext block // output last full ciphertext block
xorbuf(m_register, inString, length); xorbuf(m_register, inString, length);
m_cipher->ProcessBlock(m_register); m_cipher->ProcessBlock(m_register);
memcpy(outString, m_register, BlockSize()); std::memcpy(outString, m_register, BlockSize());
} }
void CBC_Decryption::ResizeBuffers() void CBC_Decryption::ResizeBuffers()
@ -220,20 +278,47 @@ void CBC_Decryption::ResizeBuffers()
void CBC_Decryption::ProcessData(byte *outString, const byte *inString, size_t length) void CBC_Decryption::ProcessData(byte *outString, const byte *inString, size_t length)
{ {
if (!length) // If this fires you should align your buffers. There's a non-trival penalty for some processors
return; CRYPTOPP_ASSERT(IsAlignedOn(inString, m_cipher->OptimalDataAlignment()));
CRYPTOPP_ASSERT(IsAlignedOn(outString, m_cipher->OptimalDataAlignment()));
CRYPTOPP_ASSERT(length%BlockSize()==0); CRYPTOPP_ASSERT(length%BlockSize()==0);
unsigned int blockSize = BlockSize(); if (!length)
memcpy(m_temp, inString+length-blockSize, blockSize); // save copy now in case of in-place decryption return;
const unsigned int blockSize = BlockSize();
const unsigned int alignment = m_cipher->OptimalDataAlignment();
bool align = !IsAlignedOn(inString, alignment) || !IsAlignedOn(outString, alignment);
if (align)
{
AlignedSecByteBlock i(length), o(length);
std::memcpy(i, inString, length);
std::memcpy(o, outString+length-blockSize, blockSize); // copy tail
std::memcpy(m_temp, i+length-blockSize, blockSize); // save copy now in case of in-place decryption
if (length > blockSize)
m_cipher->AdvancedProcessBlocks(i+blockSize, i, o+blockSize, length-blockSize, BlockTransformation::BT_ReverseDirection|BlockTransformation::BT_AllowParallel);
m_cipher->ProcessAndXorBlock(i, m_register, o);
m_register.swap(m_temp);
std::memcpy(outString, o, length);
}
else
{
std::memcpy(m_temp, inString+length-blockSize, blockSize); // save copy now in case of in-place decryption
if (length > blockSize) if (length > blockSize)
m_cipher->AdvancedProcessBlocks(inString+blockSize, inString, outString+blockSize, length-blockSize, BlockTransformation::BT_ReverseDirection|BlockTransformation::BT_AllowParallel); m_cipher->AdvancedProcessBlocks(inString+blockSize, inString, outString+blockSize, length-blockSize, BlockTransformation::BT_ReverseDirection|BlockTransformation::BT_AllowParallel);
m_cipher->ProcessAndXorBlock(inString, m_register, outString); m_cipher->ProcessAndXorBlock(inString, m_register, outString);
m_register.swap(m_temp); m_register.swap(m_temp);
} }
}
void CBC_CTS_Decryption::ProcessLastBlock(byte *outString, const byte *inString, size_t length) void CBC_CTS_Decryption::ProcessLastBlock(byte *outString, const byte *inString, size_t length)
{ {
// If this fires you should align your buffers. There's a non-trival penalty for some processors
CRYPTOPP_ASSERT(IsAlignedOn(inString, m_cipher->OptimalDataAlignment()));
CRYPTOPP_ASSERT(IsAlignedOn(outString, m_cipher->OptimalDataAlignment()));
const byte *pn, *pn1; const byte *pn, *pn1;
bool stealIV = length <= BlockSize(); bool stealIV = length <= BlockSize();
@ -250,17 +335,17 @@ void CBC_CTS_Decryption::ProcessLastBlock(byte *outString, const byte *inString,
} }
// decrypt last partial plaintext block // decrypt last partial plaintext block
memcpy(m_temp, pn1, BlockSize()); std::memcpy(m_temp, pn1, BlockSize());
m_cipher->ProcessBlock(m_temp); m_cipher->ProcessBlock(m_temp);
xorbuf(m_temp, pn, length); xorbuf(m_temp, pn, length);
if (stealIV) if (stealIV)
memcpy(outString, m_temp, length); std::memcpy(outString, m_temp, length);
else else
{ {
memcpy(outString+BlockSize(), m_temp, length); std::memcpy(outString+BlockSize(), m_temp, length);
// decrypt next to last plaintext block // decrypt next to last plaintext block
memcpy(m_temp, pn, length); std::memcpy(m_temp, pn, length);
m_cipher->ProcessBlock(m_temp); m_cipher->ProcessBlock(m_temp);
xorbuf(outString, m_temp, m_register, BlockSize()); xorbuf(outString, m_temp, m_register, BlockSize());
} }

22
modes.h
View File

@ -81,7 +81,7 @@ public:
} }
protected: protected:
CipherModeBase() : m_cipher(NULLPTR) {} CipherModeBase() : m_cipher(NULLPTR), m_register(0) {}
inline unsigned int BlockSize() const {CRYPTOPP_ASSERT(m_register.size() > 0); return (unsigned int)m_register.size();} inline unsigned int BlockSize() const {CRYPTOPP_ASSERT(m_register.size() > 0); return (unsigned int)m_register.size();}
virtual void SetFeedbackSize(unsigned int feedbackSize) virtual void SetFeedbackSize(unsigned int feedbackSize)
{ {
@ -134,7 +134,7 @@ protected:
void SetFeedbackSize(unsigned int feedbackSize); void SetFeedbackSize(unsigned int feedbackSize);
void ResizeBuffers(); void ResizeBuffers();
SecByteBlock m_temp; AlignedSecByteBlock m_temp;
unsigned int m_feedbackSize; unsigned int m_feedbackSize;
}; };
@ -209,7 +209,7 @@ protected:
bool RequireAlignedInput() const {return true;} bool RequireAlignedInput() const {return true;}
virtual void ResizeBuffers(); virtual void ResizeBuffers();
SecByteBlock m_buffer; AlignedSecByteBlock m_buffer;
}; };
//! \class ECB_OneWay //! \class ECB_OneWay
@ -436,7 +436,8 @@ struct ECB_Mode_ExternalCipher : public CipherModeDocumentation
typedef Encryption Decryption; typedef Encryption Decryption;
}; };
//! CBC mode //! \class CBC_Mode
//! \brief CBC block cipher mode of operation.
template <class CIPHER> template <class CIPHER>
struct CBC_Mode : public CipherModeDocumentation struct CBC_Mode : public CipherModeDocumentation
{ {
@ -447,14 +448,16 @@ struct CBC_Mode : public CipherModeDocumentation
CRYPTOPP_DLL_TEMPLATE_CLASS CipherModeFinalTemplate_ExternalCipher<CBC_Encryption>; CRYPTOPP_DLL_TEMPLATE_CLASS CipherModeFinalTemplate_ExternalCipher<CBC_Encryption>;
CRYPTOPP_DLL_TEMPLATE_CLASS CipherModeFinalTemplate_ExternalCipher<CBC_Decryption>; CRYPTOPP_DLL_TEMPLATE_CLASS CipherModeFinalTemplate_ExternalCipher<CBC_Decryption>;
//! CBC mode, external cipher //! \class CBC_Mode_ExternalCipher
//! \brief CBC mode, external cipher.
struct CBC_Mode_ExternalCipher : public CipherModeDocumentation struct CBC_Mode_ExternalCipher : public CipherModeDocumentation
{ {
typedef CipherModeFinalTemplate_ExternalCipher<CBC_Encryption> Encryption; typedef CipherModeFinalTemplate_ExternalCipher<CBC_Encryption> Encryption;
typedef CipherModeFinalTemplate_ExternalCipher<CBC_Decryption> Decryption; typedef CipherModeFinalTemplate_ExternalCipher<CBC_Decryption> Decryption;
}; };
//! CBC mode with ciphertext stealing //! \class CBC_CTS_Mode
//! \brief CTS block cipher mode of operation.
template <class CIPHER> template <class CIPHER>
struct CBC_CTS_Mode : public CipherModeDocumentation struct CBC_CTS_Mode : public CipherModeDocumentation
{ {
@ -473,13 +476,6 @@ struct CBC_CTS_Mode_ExternalCipher : public CipherModeDocumentation
typedef CipherModeFinalTemplate_ExternalCipher<CBC_CTS_Decryption> Decryption; typedef CipherModeFinalTemplate_ExternalCipher<CBC_CTS_Decryption> Decryption;
}; };
//#ifdef CRYPTOPP_MAINTAIN_BACKWARDS_COMPATIBILITY
//typedef CFB_Mode_ExternalCipher::Encryption CFBEncryption;
//typedef CFB_Mode_ExternalCipher::Decryption CFBDecryption;
//typedef OFB_Mode_ExternalCipher::Encryption OFB;
//typedef CTR_Mode_ExternalCipher::Encryption CounterMode;
//#endif
NAMESPACE_END NAMESPACE_END
// Issue 340 // Issue 340