Enable SIMD implementation for ChaChaTLS (GH #265)
parent
caaaaa7691
commit
d25ba0c59a
116
chacha.cpp
116
chacha.cpp
|
|
@ -337,9 +337,6 @@ std::string ChaChaTLS_Policy::AlgorithmName() const
|
||||||
|
|
||||||
std::string ChaChaTLS_Policy::AlgorithmProvider() const
|
std::string ChaChaTLS_Policy::AlgorithmProvider() const
|
||||||
{
|
{
|
||||||
// Disable SIMD until we obtain large block test vectors and handle wrap.
|
|
||||||
// https://mailarchive.ietf.org/arch/msg/saag/S0_YjVkzEx2s2bHd8KIzjK1CwZ4
|
|
||||||
#if 0
|
|
||||||
#if (CRYPTOPP_AVX2_AVAILABLE)
|
#if (CRYPTOPP_AVX2_AVAILABLE)
|
||||||
if (HasAVX2())
|
if (HasAVX2())
|
||||||
return "AVX2";
|
return "AVX2";
|
||||||
|
|
@ -363,7 +360,6 @@ std::string ChaChaTLS_Policy::AlgorithmProvider() const
|
||||||
if (HasAltivec())
|
if (HasAltivec())
|
||||||
return "Altivec";
|
return "Altivec";
|
||||||
else
|
else
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
return "C++";
|
return "C++";
|
||||||
}
|
}
|
||||||
|
|
@ -418,9 +414,6 @@ void ChaChaTLS_Policy::SeekToIteration(lword iterationCount)
|
||||||
|
|
||||||
unsigned int ChaChaTLS_Policy::GetAlignment() const
|
unsigned int ChaChaTLS_Policy::GetAlignment() const
|
||||||
{
|
{
|
||||||
// Disable SIMD until we obtain large block test vectors and handle wrap.
|
|
||||||
// https://mailarchive.ietf.org/arch/msg/saag/S0_YjVkzEx2s2bHd8KIzjK1CwZ4
|
|
||||||
#if 0
|
|
||||||
#if (CRYPTOPP_AVX2_AVAILABLE)
|
#if (CRYPTOPP_AVX2_AVAILABLE)
|
||||||
if (HasAVX2())
|
if (HasAVX2())
|
||||||
return 16;
|
return 16;
|
||||||
|
|
@ -435,16 +428,12 @@ unsigned int ChaChaTLS_Policy::GetAlignment() const
|
||||||
if (HasAltivec())
|
if (HasAltivec())
|
||||||
return 16;
|
return 16;
|
||||||
else
|
else
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
return GetAlignmentOf<word32>();
|
return GetAlignmentOf<word32>();
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int ChaChaTLS_Policy::GetOptimalBlockSize() const
|
unsigned int ChaChaTLS_Policy::GetOptimalBlockSize() const
|
||||||
{
|
{
|
||||||
// Disable SIMD until we obtain large block test vectors and handle wrap.
|
|
||||||
// https://mailarchive.ietf.org/arch/msg/saag/S0_YjVkzEx2s2bHd8KIzjK1CwZ4
|
|
||||||
#if 0
|
|
||||||
#if (CRYPTOPP_AVX2_AVAILABLE)
|
#if (CRYPTOPP_AVX2_AVAILABLE)
|
||||||
if (HasAVX2())
|
if (HasAVX2())
|
||||||
return 8 * BYTES_PER_ITERATION;
|
return 8 * BYTES_PER_ITERATION;
|
||||||
|
|
@ -464,32 +453,33 @@ unsigned int ChaChaTLS_Policy::GetOptimalBlockSize() const
|
||||||
if (HasAltivec())
|
if (HasAltivec())
|
||||||
return 4*BYTES_PER_ITERATION;
|
return 4*BYTES_PER_ITERATION;
|
||||||
else
|
else
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
return BYTES_PER_ITERATION;
|
return BYTES_PER_ITERATION;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ChaChaTLS_Policy::MultiBlockSafe(unsigned int blocks) const
|
||||||
|
{
|
||||||
|
return 0xffffffff - m_state[12] > blocks;
|
||||||
|
}
|
||||||
|
|
||||||
// OperateKeystream always produces a key stream. The key stream is written
|
// OperateKeystream always produces a key stream. The key stream is written
|
||||||
// to output. Optionally a message may be supplied to xor with the key stream.
|
// to output. Optionally a message may be supplied to xor with the key stream.
|
||||||
// The message is input, and output = output ^ input.
|
// The message is input, and output = output ^ input.
|
||||||
void ChaChaTLS_Policy::OperateKeystream(KeystreamOperation operation,
|
void ChaChaTLS_Policy::OperateKeystream(KeystreamOperation operation,
|
||||||
byte *output, const byte *input, size_t iterationCount)
|
byte *output, const byte *input, size_t iterationCount)
|
||||||
{
|
{
|
||||||
// Disable SIMD until we obtain large block test vectors and handle wrap.
|
do
|
||||||
// https://mailarchive.ietf.org/arch/msg/saag/S0_YjVkzEx2s2bHd8KIzjK1CwZ4
|
{
|
||||||
#if 0
|
|
||||||
#if (CRYPTOPP_AVX2_AVAILABLE)
|
#if (CRYPTOPP_AVX2_AVAILABLE)
|
||||||
if (HasAVX2())
|
if (HasAVX2())
|
||||||
{
|
{
|
||||||
while (iterationCount >= 8)
|
while (iterationCount >= 8 && MultiBlockSafe(8))
|
||||||
{
|
{
|
||||||
const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
|
const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
|
||||||
ChaCha_OperateKeystream_AVX2(m_state, xorInput ? input : NULLPTR, output, m_rounds);
|
ChaCha_OperateKeystream_AVX2(m_state, xorInput ? input : NULLPTR, output, m_rounds);
|
||||||
|
|
||||||
// MultiBlockSafe avoids overflow on the counter words
|
// MultiBlockSafe avoids overflow on the counter word
|
||||||
m_state[12] += 8;
|
m_state[12] += 8;
|
||||||
//if (m_state[12] < 8)
|
|
||||||
// m_state[13]++;
|
|
||||||
|
|
||||||
input += (!!xorInput) * 8 * BYTES_PER_ITERATION;
|
input += (!!xorInput) * 8 * BYTES_PER_ITERATION;
|
||||||
output += 8 * BYTES_PER_ITERATION;
|
output += 8 * BYTES_PER_ITERATION;
|
||||||
|
|
@ -501,15 +491,13 @@ void ChaChaTLS_Policy::OperateKeystream(KeystreamOperation operation,
|
||||||
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE)
|
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE)
|
||||||
if (HasSSE2())
|
if (HasSSE2())
|
||||||
{
|
{
|
||||||
while (iterationCount >= 4)
|
while (iterationCount >= 4 && MultiBlockSafe(4))
|
||||||
{
|
{
|
||||||
const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
|
const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
|
||||||
ChaCha_OperateKeystream_SSE2(m_state, xorInput ? input : NULLPTR, output, m_rounds);
|
ChaCha_OperateKeystream_SSE2(m_state, xorInput ? input : NULLPTR, output, m_rounds);
|
||||||
|
|
||||||
// MultiBlockSafe avoids overflow on the counter words
|
// MultiBlockSafe avoids overflow on the counter word
|
||||||
m_state[12] += 4;
|
m_state[12] += 4;
|
||||||
//if (m_state[12] < 4)
|
|
||||||
// m_state[13]++;
|
|
||||||
|
|
||||||
input += (!!xorInput)*4*BYTES_PER_ITERATION;
|
input += (!!xorInput)*4*BYTES_PER_ITERATION;
|
||||||
output += 4*BYTES_PER_ITERATION;
|
output += 4*BYTES_PER_ITERATION;
|
||||||
|
|
@ -521,15 +509,13 @@ void ChaChaTLS_Policy::OperateKeystream(KeystreamOperation operation,
|
||||||
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
||||||
if (HasNEON())
|
if (HasNEON())
|
||||||
{
|
{
|
||||||
while (iterationCount >= 4)
|
while (iterationCount >= 4 && MultiBlockSafe(4))
|
||||||
{
|
{
|
||||||
const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
|
const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
|
||||||
ChaCha_OperateKeystream_NEON(m_state, xorInput ? input : NULLPTR, output, m_rounds);
|
ChaCha_OperateKeystream_NEON(m_state, xorInput ? input : NULLPTR, output, m_rounds);
|
||||||
|
|
||||||
// MultiBlockSafe avoids overflow on the counter words
|
// MultiBlockSafe avoids overflow on the counter word
|
||||||
m_state[12] += 4;
|
m_state[12] += 4;
|
||||||
//if (m_state[12] < 4)
|
|
||||||
// m_state[13]++;
|
|
||||||
|
|
||||||
input += (!!xorInput)*4*BYTES_PER_ITERATION;
|
input += (!!xorInput)*4*BYTES_PER_ITERATION;
|
||||||
output += 4*BYTES_PER_ITERATION;
|
output += 4*BYTES_PER_ITERATION;
|
||||||
|
|
@ -541,15 +527,13 @@ void ChaChaTLS_Policy::OperateKeystream(KeystreamOperation operation,
|
||||||
#if (CRYPTOPP_POWER7_AVAILABLE)
|
#if (CRYPTOPP_POWER7_AVAILABLE)
|
||||||
if (HasPower7())
|
if (HasPower7())
|
||||||
{
|
{
|
||||||
while (iterationCount >= 4)
|
while (iterationCount >= 4 && MultiBlockSafe(4))
|
||||||
{
|
{
|
||||||
const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
|
const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
|
||||||
ChaCha_OperateKeystream_POWER7(m_state, xorInput ? input : NULLPTR, output, m_rounds);
|
ChaCha_OperateKeystream_POWER7(m_state, xorInput ? input : NULLPTR, output, m_rounds);
|
||||||
|
|
||||||
// MultiBlockSafe avoids overflow on the counter words
|
// MultiBlockSafe avoids overflow on the counter word
|
||||||
m_state[12] += 4;
|
m_state[12] += 4;
|
||||||
//if (m_state[12] < 4)
|
|
||||||
// m_state[13]++;
|
|
||||||
|
|
||||||
input += (!!xorInput)*4*BYTES_PER_ITERATION;
|
input += (!!xorInput)*4*BYTES_PER_ITERATION;
|
||||||
output += 4*BYTES_PER_ITERATION;
|
output += 4*BYTES_PER_ITERATION;
|
||||||
|
|
@ -559,58 +543,58 @@ void ChaChaTLS_Policy::OperateKeystream(KeystreamOperation operation,
|
||||||
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
|
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||||
if (HasAltivec())
|
if (HasAltivec())
|
||||||
{
|
{
|
||||||
while (iterationCount >= 4)
|
while (iterationCount >= 4 && MultiBlockSafe(4))
|
||||||
{
|
{
|
||||||
const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
|
const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
|
||||||
ChaCha_OperateKeystream_ALTIVEC(m_state, xorInput ? input : NULLPTR, output, m_rounds);
|
ChaCha_OperateKeystream_ALTIVEC(m_state, xorInput ? input : NULLPTR, output, m_rounds);
|
||||||
|
|
||||||
// MultiBlockSafe avoids overflow on the counter words
|
// MultiBlockSafe avoids overflow on the counter word
|
||||||
m_state[12] += 4;
|
m_state[12] += 4;
|
||||||
//if (m_state[12] < 4)
|
|
||||||
// m_state[13]++;
|
|
||||||
|
|
||||||
input += (!!xorInput)*4*BYTES_PER_ITERATION;
|
input += (!!xorInput)*4*BYTES_PER_ITERATION;
|
||||||
output += 4*BYTES_PER_ITERATION;
|
output += 4*BYTES_PER_ITERATION;
|
||||||
iterationCount -= 4;
|
iterationCount -= 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
while (iterationCount--)
|
if (iterationCount)
|
||||||
{
|
|
||||||
word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
|
|
||||||
|
|
||||||
x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3];
|
|
||||||
x4 = m_state[4]; x5 = m_state[5]; x6 = m_state[6]; x7 = m_state[7];
|
|
||||||
x8 = m_state[8]; x9 = m_state[9]; x10 = m_state[10]; x11 = m_state[11];
|
|
||||||
x12 = m_state[12]; x13 = m_state[13]; x14 = m_state[14]; x15 = m_state[15];
|
|
||||||
|
|
||||||
for (int i = static_cast<int>(m_rounds); i > 0; i -= 2)
|
|
||||||
{
|
{
|
||||||
CHACHA_QUARTER_ROUND(x0, x4, x8, x12);
|
word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
|
||||||
CHACHA_QUARTER_ROUND(x1, x5, x9, x13);
|
|
||||||
CHACHA_QUARTER_ROUND(x2, x6, x10, x14);
|
|
||||||
CHACHA_QUARTER_ROUND(x3, x7, x11, x15);
|
|
||||||
|
|
||||||
CHACHA_QUARTER_ROUND(x0, x5, x10, x15);
|
x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3];
|
||||||
CHACHA_QUARTER_ROUND(x1, x6, x11, x12);
|
x4 = m_state[4]; x5 = m_state[5]; x6 = m_state[6]; x7 = m_state[7];
|
||||||
CHACHA_QUARTER_ROUND(x2, x7, x8, x13);
|
x8 = m_state[8]; x9 = m_state[9]; x10 = m_state[10]; x11 = m_state[11];
|
||||||
CHACHA_QUARTER_ROUND(x3, x4, x9, x14);
|
x12 = m_state[12]; x13 = m_state[13]; x14 = m_state[14]; x15 = m_state[15];
|
||||||
|
|
||||||
|
for (int i = static_cast<int>(m_rounds); i > 0; i -= 2)
|
||||||
|
{
|
||||||
|
CHACHA_QUARTER_ROUND(x0, x4, x8, x12);
|
||||||
|
CHACHA_QUARTER_ROUND(x1, x5, x9, x13);
|
||||||
|
CHACHA_QUARTER_ROUND(x2, x6, x10, x14);
|
||||||
|
CHACHA_QUARTER_ROUND(x3, x7, x11, x15);
|
||||||
|
|
||||||
|
CHACHA_QUARTER_ROUND(x0, x5, x10, x15);
|
||||||
|
CHACHA_QUARTER_ROUND(x1, x6, x11, x12);
|
||||||
|
CHACHA_QUARTER_ROUND(x2, x7, x8, x13);
|
||||||
|
CHACHA_QUARTER_ROUND(x3, x4, x9, x14);
|
||||||
|
}
|
||||||
|
|
||||||
|
CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(CHACHA_OUTPUT, BYTES_PER_ITERATION);
|
||||||
|
|
||||||
|
if (++m_state[12] == 0)
|
||||||
|
{
|
||||||
|
// m_state[13]++;
|
||||||
|
|
||||||
|
// RFC 7539 does not say what to do here. ChaCha-TLS uses state[13] for
|
||||||
|
// part of the nonce, and we can't carry into it. Shit or go blind...
|
||||||
|
// https://mailarchive.ietf.org/arch/msg/saag/S0_YjVkzEx2s2bHd8KIzjK1CwZ4
|
||||||
|
CRYPTOPP_ASSERT(0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(CHACHA_OUTPUT, BYTES_PER_ITERATION);
|
// We may re-enter a SIMD keystream operation from here.
|
||||||
|
} while (iterationCount--);
|
||||||
if (++m_state[12] == 0)
|
|
||||||
{
|
|
||||||
// m_state[13]++;
|
|
||||||
|
|
||||||
// RFC 7539 does not say what to do here. ChaCha-TLS uses state[13] for
|
|
||||||
// part of the nonce, and we can't carry into it. Shit or go blind...
|
|
||||||
// https://mailarchive.ietf.org/arch/msg/saag/S0_YjVkzEx2s2bHd8KIzjK1CwZ4
|
|
||||||
CRYPTOPP_ASSERT(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
NAMESPACE_END
|
NAMESPACE_END
|
||||||
|
|
|
||||||
7
chacha.h
7
chacha.h
|
|
@ -131,6 +131,13 @@ protected:
|
||||||
std::string AlgorithmName() const;
|
std::string AlgorithmName() const;
|
||||||
std::string AlgorithmProvider() const;
|
std::string AlgorithmProvider() const;
|
||||||
|
|
||||||
|
// MultiBlockSafe detects a condition that can arise in the SIMD
|
||||||
|
// implementations where we overflow one of the 32-bit state words
|
||||||
|
// during addition in an intermediate result. Conditions to trigger
|
||||||
|
// issue include a user seeks to around 2^32 blocks (256 GB of data).
|
||||||
|
// https://github.com/weidai11/cryptopp/issues/732
|
||||||
|
inline bool MultiBlockSafe(unsigned int blocks) const;
|
||||||
|
|
||||||
FixedSizeAlignedSecBlock<word32, 16+1> m_state;
|
FixedSizeAlignedSecBlock<word32, 16+1> m_state;
|
||||||
CRYPTOPP_CONSTANT(m_rounds = ChaChaTLS_Info::ROUNDS)
|
CRYPTOPP_CONSTANT(m_rounds = ChaChaTLS_Info::ROUNDS)
|
||||||
};
|
};
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue