From a65d55a3fd0b752fd23e10ec1555192621c322a8 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Tue, 20 Nov 2018 23:32:35 -0500 Subject: [PATCH] Rewrite BLAKE2 classes The ParameterBlocks for BLAKE2 had undefined behavior. We relied on the compiler packing the bytes in the structure, then we used the first byte as the start of an array. This rewrite does things correctly. We don't memset the structure, and we don't treat the structure as a contiguous array. --- blake2.cpp | 628 +++++++++++++++++++++++------------------------ blake2.h | 180 +++++++++----- blake2b_simd.cpp | 75 +++--- blake2s_simd.cpp | 39 +-- 4 files changed, 492 insertions(+), 430 deletions(-) diff --git a/blake2.cpp b/blake2.cpp index b0c54af2..c05856d0 100644 --- a/blake2.cpp +++ b/blake2.cpp @@ -172,194 +172,24 @@ extern void BLAKE2_Compress32_ALTIVEC(const byte* input, BLAKE2s_State& state); extern void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state); #endif -BLAKE2s_ParameterBlock::BLAKE2s_ParameterBlock(size_t digestLen, size_t keyLen, - const byte* saltStr, size_t saltLen, - const byte* personalizationStr, size_t personalizationLen) +unsigned int BLAKE2b::OptimalDataAlignment() const { - digestLength = (byte)digestLen; - keyLength = (byte)keyLen; - fanout = depth = 1; - nodeDepth = innerLength = 0; - - std::memset(leafLength, 0x00, COUNTOF(leafLength)); - std::memset(nodeOffset, 0x00, COUNTOF(nodeOffset)); - - if (saltStr && saltLen) - { - memcpy_s(salt, COUNTOF(salt), saltStr, saltLen); - size_t rem = SaturatingSubtract(COUNTOF(salt), saltLen); - size_t off = COUNTOF(salt) - rem; - if (rem) - std::memset(salt+off, 0x00, rem); - } +#if defined(CRYPTOPP_SSE41_AVAILABLE) + if (HasSSE41()) + return 16; else - { - std::memset(salt, 0x00, COUNTOF(salt)); - } - - if (personalizationStr && personalizationLen) - { - memcpy_s(personalization, COUNTOF(personalization), personalizationStr, personalizationLen); - size_t rem = SaturatingSubtract(COUNTOF(personalization), personalizationLen); - size_t off = COUNTOF(personalization) - rem; - if (rem) - std::memset(personalization+off, 0x00, rem); - } +#endif +#if (CRYPTOPP_ARM_NEON_AVAILABLE) + if (HasNEON()) + return 4; else - { - std::memset(personalization, 0x00, COUNTOF(personalization)); - } -} - -BLAKE2b_ParameterBlock::BLAKE2b_ParameterBlock(size_t digestLen, size_t keyLen, - const byte* saltStr, size_t saltLen, - const byte* personalizationStr, size_t personalizationLen) -{ - digestLength = (byte)digestLen; - keyLength = (byte)keyLen; - fanout = depth = 1; - nodeDepth = innerLength = 0; - - std::memset(rfu, 0x00, COUNTOF(rfu)); - std::memset(leafLength, 0x00, COUNTOF(leafLength)); - std::memset(nodeOffset, 0x00, COUNTOF(nodeOffset)); - - if (saltStr && saltLen) - { - memcpy_s(salt, COUNTOF(salt), saltStr, saltLen); - size_t rem = SaturatingSubtract(COUNTOF(salt), saltLen); - size_t off = COUNTOF(salt) - rem; - if (rem) - std::memset(salt+off, 0x00, rem); - } +#endif +#if (CRYPTOPP_POWER8_AVAILABLE) + if (HasPower8()) + return 16; else - { - std::memset(salt, 0x00, COUNTOF(salt)); - } - - if (personalizationStr && personalizationLen) - { - memcpy_s(personalization, COUNTOF(personalization), personalizationStr, personalizationLen); - size_t rem = SaturatingSubtract(COUNTOF(personalization), personalizationLen); - size_t off = COUNTOF(personalization) - rem; - if (rem) - std::memset(personalization+off, 0x00, rem); - } - else - { - std::memset(personalization, 0x00, COUNTOF(personalization)); - } -} - -void BLAKE2s::UncheckedSetKey(const byte *key, unsigned int length, const CryptoPP::NameValuePairs& params) -{ - if (key && length) - { - AlignedSecByteBlock temp(BLOCKSIZE); - memcpy_s(temp, BLOCKSIZE, key, length); - - size_t rem = SaturatingSubtract((unsigned int)BLOCKSIZE, length); - if (rem) - std::memset(temp+length, 0x00, rem); - - m_key.swap(temp); - } - else - { - m_key.resize(0); - } - - ParameterBlock& block = *m_block.data(); - std::memset(block.leafLength, 0x00, COUNTOF(block.leafLength)); - std::memset(block.nodeOffset, 0x00, COUNTOF(block.nodeOffset)); - - block.nodeDepth = block.innerLength = 0; - block.keyLength = (byte)length; - block.digestLength = (byte)params.GetIntValueWithDefault(Name::DigestSize(), DIGESTSIZE); - block.fanout = block.depth = 1; - - ConstByteArrayParameter t; - if (params.GetValue(Name::Salt(), t) && t.begin() && t.size()) - { - memcpy_s(block.salt, COUNTOF(block.salt), t.begin(), t.size()); - size_t rem = SaturatingSubtract(COUNTOF(block.salt), t.size()); - size_t off = COUNTOF(block.salt) - rem; - if (rem) - std::memset(block.salt+off, 0x00, rem); - } - else - { - std::memset(block.salt, 0x00, COUNTOF(block.salt)); - } - - if (params.GetValue(Name::Personalization(), t) && t.begin() && t.size()) - { - memcpy_s(block.personalization, COUNTOF(block.personalization), t.begin(), t.size()); - size_t rem = SaturatingSubtract(COUNTOF(block.personalization), t.size()); - size_t off = COUNTOF(block.personalization) - rem; - if (rem) - std::memset(block.personalization+off, 0x00, rem); - } - else - { - std::memset(block.personalization, 0x00, COUNTOF(block.personalization)); - } -} - -void BLAKE2b::UncheckedSetKey(const byte *key, unsigned int length, const CryptoPP::NameValuePairs& params) -{ - if (key && length) - { - AlignedSecByteBlock temp(BLOCKSIZE); - memcpy_s(temp, BLOCKSIZE, key, length); - - size_t rem = SaturatingSubtract((unsigned int)BLOCKSIZE, length); - if (rem) - std::memset(temp+length, 0x00, rem); - - m_key.swap(temp); - } - else - { - m_key.resize(0); - } - - ParameterBlock& block = *m_block.data(); - std::memset(block.leafLength, 0x00, COUNTOF(block.leafLength)); - std::memset(block.nodeOffset, 0x00, COUNTOF(block.nodeOffset)); - std::memset(block.rfu, 0x00, COUNTOF(block.rfu)); - - block.nodeDepth = block.innerLength = 0; - block.keyLength = (byte)length; - block.digestLength = (byte)params.GetIntValueWithDefault(Name::DigestSize(), DIGESTSIZE); - block.fanout = block.depth = 1; - - ConstByteArrayParameter t; - if (params.GetValue(Name::Salt(), t) && t.begin() && t.size()) - { - memcpy_s(block.salt, COUNTOF(block.salt), t.begin(), t.size()); - size_t rem = SaturatingSubtract(COUNTOF(block.salt), t.size()); - size_t off = COUNTOF(block.salt) - rem; - if (rem) - std::memset(block.salt+off, 0x00, rem); - } - else - { - std::memset(block.salt, 0x00, COUNTOF(block.salt)); - } - - if (params.GetValue(Name::Personalization(), t) && t.begin() && t.size()) - { - memcpy_s(block.personalization, COUNTOF(block.personalization), t.begin(), t.size()); - size_t rem = SaturatingSubtract(COUNTOF(block.personalization), t.size()); - size_t off = COUNTOF(block.personalization) - rem; - if (rem) - std::memset(block.personalization+off, 0x00, rem); - } - else - { - std::memset(block.personalization, 0x00, COUNTOF(block.personalization)); - } +#endif + return GetAlignmentOf(); } std::string BLAKE2b::AlgorithmProvider() const @@ -367,225 +197,387 @@ std::string BLAKE2b::AlgorithmProvider() const #if defined(CRYPTOPP_SSE41_AVAILABLE) if (HasSSE41()) return "SSE4.1"; + else #endif #if (CRYPTOPP_ARM_NEON_AVAILABLE) if (HasNEON()) return "NEON"; + else #endif #if (CRYPTOPP_POWER8_AVAILABLE) if (HasPower8()) return "Power8"; + else #endif return "C++"; } +unsigned int BLAKE2s::OptimalDataAlignment() const +{ +#if defined(CRYPTOPP_SSE41_AVAILABLE) + if (HasSSE41()) + return 16; + else +#endif +#if (CRYPTOPP_ARM_NEON_AVAILABLE) + if (HasNEON()) + return 4; + else +#endif +#if (CRYPTOPP_POWER7_AVAILABLE) + if (HasPower7()) + return 16; + else +#elif (CRYPTOPP_ALTIVEC_AVAILABLE) + if (HasAltivec()) + return 16; + else +#endif + return GetAlignmentOf(); +} + std::string BLAKE2s::AlgorithmProvider() const { #if defined(CRYPTOPP_SSE41_AVAILABLE) if (HasSSE41()) return "SSE4.1"; + else #endif #if (CRYPTOPP_ARM_NEON_AVAILABLE) if (HasNEON()) return "NEON"; + else #endif #if (CRYPTOPP_POWER7_AVAILABLE) if (HasPower7()) return "Power7"; + else #elif (CRYPTOPP_ALTIVEC_AVAILABLE) if (HasAltivec()) return "Altivec"; + else #endif return "C++"; } -BLAKE2s::BLAKE2s(bool treeMode, unsigned int digestSize) : m_state(1), m_block(1), m_digestSize(digestSize), m_treeMode(treeMode) +void BLAKE2s_State::Reset() { - CRYPTOPP_ASSERT(digestSize <= DIGESTSIZE); - - UncheckedSetKey(NULLPTR, 0, MakeParameters(Name::DigestSize(), (int)digestSize)(Name::TreeMode(), treeMode, false)); - Restart(); + std::memset(m_hft, 0x00, m_hft.SizeInBytes()); + m_len = 0; } -BLAKE2b::BLAKE2b(bool treeMode, unsigned int digestSize) : m_state(1), m_block(1), m_digestSize(digestSize), m_treeMode(treeMode) +void BLAKE2b_State::Reset() +{ + std::memset(m_hft, 0x00, m_hft.SizeInBytes()); + m_len = 0; +} + +BLAKE2s_ParameterBlock::BLAKE2s_ParameterBlock(size_t digestLen, size_t keyLen, + const byte* saltStr, size_t saltLen, + const byte* personalizationStr, size_t personalizationLen) +{ + Reset(digestLen, keyLen); + + if (saltStr && saltLen) + memcpy_s(salt(), SALTSIZE, saltStr, saltLen); + + if (personalizationStr && personalizationLen) + memcpy_s(personalization(), PERSONALIZATIONSIZE, personalizationStr, personalizationLen); +} + +BLAKE2b_ParameterBlock::BLAKE2b_ParameterBlock(size_t digestLen, size_t keyLen, + const byte* saltStr, size_t saltLen, + const byte* personalizationStr, size_t personalizationLen) +{ + Reset(digestLen, keyLen); + + if (saltStr && saltLen) + memcpy_s(salt(), SALTSIZE, saltStr, saltLen); + + if (personalizationStr && personalizationLen) + memcpy_s(personalization(), PERSONALIZATIONSIZE, personalizationStr, personalizationLen); +} + +void BLAKE2s_ParameterBlock::Reset(size_t digestLen, size_t keyLen) +{ + std::memset(m_data, 0x00, m_data.size()); + m_data[DigestOff] = static_cast(digestLen); + m_data[KeyOff] = static_cast(keyLen); + m_data[FanoutOff] = m_data[DepthOff] = 1; +} + +void BLAKE2b_ParameterBlock::Reset(size_t digestLen, size_t keyLen) +{ + std::memset(m_data, 0x00, m_data.size()); + m_data[DigestOff] = static_cast(digestLen); + m_data[KeyOff] = static_cast(keyLen); + m_data[FanoutOff] = m_data[DepthOff] = 1; +} + +BLAKE2s::BLAKE2s(bool treeMode, unsigned int digestSize) + : m_digestSize(digestSize), m_keyLength(0), m_treeMode(treeMode) { CRYPTOPP_ASSERT(digestSize <= DIGESTSIZE); - UncheckedSetKey(NULLPTR, 0, MakeParameters(Name::DigestSize(), (int)digestSize)(Name::TreeMode(), treeMode, false)); - Restart(); + UncheckedSetKey(NULLPTR, 0, MakeParameters + (Name::DigestSize(), (int)digestSize) + (Name::TreeMode(), treeMode)); +} + +BLAKE2b::BLAKE2b(bool treeMode, unsigned int digestSize) + : m_digestSize(digestSize), m_keyLength(0), m_treeMode(treeMode) +{ + CRYPTOPP_ASSERT(digestSize <= DIGESTSIZE); + + UncheckedSetKey(NULLPTR, 0, MakeParameters + (Name::DigestSize(), (int)digestSize) + (Name::TreeMode(), treeMode)); } BLAKE2s::BLAKE2s(const byte *key, size_t keyLength, const byte* salt, size_t saltLength, const byte* personalization, size_t personalizationLength, bool treeMode, unsigned int digestSize) - : m_state(1), m_block(1), m_digestSize(digestSize), m_treeMode(treeMode) + : m_digestSize(digestSize), m_keyLength(keyLength), m_treeMode(treeMode) { CRYPTOPP_ASSERT(keyLength <= MAX_KEYLENGTH); CRYPTOPP_ASSERT(digestSize <= DIGESTSIZE); CRYPTOPP_ASSERT(saltLength <= SALTSIZE); CRYPTOPP_ASSERT(personalizationLength <= PERSONALIZATIONSIZE); - UncheckedSetKey(key, static_cast(keyLength), MakeParameters(Name::DigestSize(),(int)digestSize)(Name::TreeMode(),treeMode, false) - (Name::Salt(), ConstByteArrayParameter(salt, saltLength))(Name::Personalization(), ConstByteArrayParameter(personalization, personalizationLength))); - Restart(); + UncheckedSetKey(key, static_cast(keyLength), MakeParameters + (Name::DigestSize(),(int)digestSize) + (Name::TreeMode(),treeMode) + (Name::Salt(), ConstByteArrayParameter(salt, saltLength)) + (Name::Personalization(), ConstByteArrayParameter(personalization, personalizationLength))); } BLAKE2b::BLAKE2b(const byte *key, size_t keyLength, const byte* salt, size_t saltLength, const byte* personalization, size_t personalizationLength, bool treeMode, unsigned int digestSize) - : m_state(1), m_block(1), m_digestSize(digestSize), m_treeMode(treeMode) + : m_digestSize(digestSize), m_keyLength(keyLength), m_treeMode(treeMode) { CRYPTOPP_ASSERT(keyLength <= MAX_KEYLENGTH); CRYPTOPP_ASSERT(digestSize <= DIGESTSIZE); CRYPTOPP_ASSERT(saltLength <= SALTSIZE); CRYPTOPP_ASSERT(personalizationLength <= PERSONALIZATIONSIZE); - UncheckedSetKey(key, static_cast(keyLength), MakeParameters(Name::DigestSize(),(int)digestSize)(Name::TreeMode(),treeMode, false) - (Name::Salt(), ConstByteArrayParameter(salt, saltLength))(Name::Personalization(), ConstByteArrayParameter(personalization, personalizationLength))); + UncheckedSetKey(key, static_cast(keyLength), MakeParameters + (Name::DigestSize(),(int)digestSize) + (Name::TreeMode(),treeMode) + (Name::Salt(), ConstByteArrayParameter(salt, saltLength)) + (Name::Personalization(), ConstByteArrayParameter(personalization, personalizationLength))); +} + +void BLAKE2s::UncheckedSetKey(const byte *key, unsigned int length, const CryptoPP::NameValuePairs& params) +{ + if (key && length) + { + m_key.New(BLOCKSIZE); + std::memcpy(m_key, key, length); + std::memset(m_key + length, 0x00, BLOCKSIZE - length); + m_keyLength = length; + } + else + { + m_key.resize(0); + m_keyLength = 0; + } + + m_digestSize = params.GetIntValueWithDefault(Name::DigestSize(), DIGESTSIZE); + + m_state.Reset(); + m_block.Reset(m_digestSize, m_keyLength); + (void)params.GetValue(Name::TreeMode(), m_treeMode); + + ConstByteArrayParameter t; + if (params.GetValue(Name::Salt(), t) && t.begin() && t.size()) + memcpy_s(m_block.salt(), SALTSIZE, t.begin(), t.size()); + + if (params.GetValue(Name::Personalization(), t) && t.begin() && t.size()) + memcpy_s(m_block.personalization(), PERSONALIZATIONSIZE, t.begin(), t.size()); + + Restart(); +} + +void BLAKE2b::UncheckedSetKey(const byte *key, unsigned int length, const CryptoPP::NameValuePairs& params) +{ + if (key && length) + { + m_key.New(BLOCKSIZE); + std::memcpy(m_key, key, length); + std::memset(m_key + length, 0x00, BLOCKSIZE - length); + m_keyLength = length; + } + else + { + m_key.resize(0); + m_keyLength = 0; + } + + m_digestSize = params.GetIntValueWithDefault(Name::DigestSize(), DIGESTSIZE); + + m_state.Reset(); + m_block.Reset(m_digestSize, m_keyLength); + (void)params.GetValue(Name::TreeMode(), m_treeMode); + + ConstByteArrayParameter t; + if (params.GetValue(Name::Salt(), t) && t.begin() && t.size()) + memcpy_s(m_block.salt(), SALTSIZE, t.begin(), t.size()); + + if (params.GetValue(Name::Personalization(), t) && t.begin() && t.size()) + memcpy_s(m_block.personalization(), PERSONALIZATIONSIZE, t.begin(), t.size()); + Restart(); } void BLAKE2s::Restart() { static const word32 zero[2] = {0,0}; - Restart(*m_block.data(), zero); + Restart(m_block, zero); } void BLAKE2b::Restart() { static const word64 zero[2] = {0,0}; - Restart(*m_block.data(), zero); + Restart(m_block, zero); } void BLAKE2s::Restart(const BLAKE2s_ParameterBlock& block, const word32 counter[2]) { - // We take a parameter block as a parameter to allow customized state. - // Avoid the copy of the parameter block when we are passing our own block. - if (&block != m_block.data()) - { - memcpy_s(m_block.data(), sizeof(ParameterBlock), &block, sizeof(ParameterBlock)); - m_block.data()->digestLength = (byte)m_digestSize; - m_block.data()->keyLength = (byte)m_key.size(); - } - - State& state = *m_state.data(); - state.tf[0] = state.tf[1] = 0, state.tf[2] = state.tf[3] = 0, state.length = 0; - + // We take a counter as a parameter to allow customized state. + m_state.Reset(); if (counter != NULLPTR) { - state.tf[0] = counter[0]; - state.tf[1] = counter[1]; + word32* t = m_state.t(); + t[0] = counter[0]; + t[1] = counter[1]; + } + + // We take a parameter block as a parameter to allow customized state. + // Avoid the copy of the parameter block when we are passing our own block. + if (block.data() == m_block.data()) + m_block.Reset(m_digestSize, m_keyLength); + else + { + std::memcpy(m_block.data(), block.data(), m_block.size()); + m_block.m_data[BLAKE2s_ParameterBlock::DigestOff] = (byte)m_digestSize; + m_block.m_data[BLAKE2s_ParameterBlock::KeyOff] = (byte)m_keyLength; } const word32* iv = BLAKE2S_IV; - PutBlock put(m_block.data(), &state.h[0]); + PutBlock put(m_block.data(), m_state.h()); put(iv[0])(iv[1])(iv[2])(iv[3])(iv[4])(iv[5])(iv[6])(iv[7]); - // When BLAKE2 is keyed, the input stream is simply {key||message}. Key it - // during Restart to avoid FirstPut and friends. Key size == 0 means no key. - if (m_key.size()) - Update(m_key, m_key.size()); + // When BLAKE2 is keyed, the input stream is simply {key || 0 || message}. + // The key is padded to a full Blocksize with 0. Key it during Restart to + // avoid FirstPut and friends. Key size == 0 means no key. + if (m_keyLength) + Update(m_key, BLOCKSIZE); } - void BLAKE2b::Restart(const BLAKE2b_ParameterBlock& block, const word64 counter[2]) { - // We take a parameter block as a parameter to allow customized state. - // Avoid the copy of the parameter block when we are passing our own block. - if (&block != m_block.data()) - { - memcpy_s(m_block.data(), sizeof(ParameterBlock), &block, sizeof(ParameterBlock)); - m_block.data()->digestLength = (byte)m_digestSize; - m_block.data()->keyLength = (byte)m_key.size(); - } - - State& state = *m_state.data(); - state.tf[0] = state.tf[1] = 0, state.tf[2] = state.tf[3] = 0, state.length = 0; - + // We take a counter as a parameter to allow customized state. + m_state.Reset(); if (counter != NULLPTR) { - state.tf[0] = counter[0]; - state.tf[1] = counter[1]; + word64* t = m_state.t(); + t[0] = counter[0]; + t[1] = counter[1]; + } + + // We take a parameter block as a parameter to allow customized state. + // Avoid the copy of the parameter block when we are passing our own block. + if (block.data() == m_block.data()) + m_block.Reset(m_digestSize, m_keyLength); + else + { + std::memcpy(m_block.data(), block.data(), m_block.size()); + m_block.m_data[BLAKE2b_ParameterBlock::DigestOff] = (byte)m_digestSize; + m_block.m_data[BLAKE2b_ParameterBlock::KeyOff] = (byte)m_keyLength; } const word64* iv = BLAKE2B_IV; - PutBlock put(m_block.data(), &state.h[0]); + PutBlock put(m_block.data(), m_state.h()); put(iv[0])(iv[1])(iv[2])(iv[3])(iv[4])(iv[5])(iv[6])(iv[7]); - // When BLAKE2 is keyed, the input stream is simply {key||message}. Key it - // during Restart to avoid FirstPut and friends. Key size == 0 means no key. - if (m_key.size()) - Update(m_key, m_key.size()); + // When BLAKE2 is keyed, the input stream is simply {key || 0 || message}. + // The key is padded to a full Blocksize with 0. Key it during Restart to + // avoid FirstPut and friends. Key size == 0 means no key. + if (m_keyLength) + Update(m_key, BLOCKSIZE); } void BLAKE2s::Update(const byte *input, size_t length) { - CRYPTOPP_ASSERT(!(input == NULLPTR && length != 0)); - if (length == 0) { return; } + CRYPTOPP_ASSERT(input != NULLPTR || length == 0); - State& state = *m_state.data(); - if (state.length + length > BLOCKSIZE) + if (length > BLOCKSIZE - m_state.m_len) { - // Complete current block - const size_t fill = BLOCKSIZE - state.length; - memcpy_s(&state.buffer[state.length], fill, input, fill); + if (m_state.m_len != 0) + { + // Complete current block + const size_t fill = BLOCKSIZE - m_state.m_len; + std::memcpy(m_state.m_buf+m_state.m_len, input, fill); - IncrementCounter(); - Compress(state.buffer); - state.length = 0; + IncrementCounter(BLOCKSIZE); + Compress(m_state.m_buf); + m_state.m_len = 0; - length -= fill, input += fill; + length -= fill, input += fill; + } // Compress in-place to avoid copies while (length > BLOCKSIZE) { - IncrementCounter(); + IncrementCounter(BLOCKSIZE); Compress(input); length -= BLOCKSIZE, input += BLOCKSIZE; } } // Copy tail bytes - if (input && length) + if (length) { - CRYPTOPP_ASSERT(length <= BLOCKSIZE - state.length); - memcpy_s(&state.buffer[state.length], length, input, length); - state.length += static_cast(length); + CRYPTOPP_ASSERT(length <= BLOCKSIZE - m_state.m_len); + std::memcpy(m_state.m_buf+m_state.m_len, input, length); + m_state.m_len += static_cast(length); } } - void BLAKE2b::Update(const byte *input, size_t length) { - CRYPTOPP_ASSERT(!(input == NULLPTR && length != 0)); - if (length == 0) { return; } + CRYPTOPP_ASSERT(input != NULLPTR || length == 0); - State& state = *m_state.data(); - if (state.length + length > BLOCKSIZE) + if (length > BLOCKSIZE - m_state.m_len) { - // Complete current block - const size_t fill = BLOCKSIZE - state.length; - memcpy_s(&state.buffer[state.length], fill, input, fill); + if (m_state.m_len != 0) + { + // Complete current block + const size_t fill = BLOCKSIZE - m_state.m_len; + std::memcpy(m_state.m_buf+m_state.m_len, input, fill); - IncrementCounter(); - Compress(state.buffer); - state.length = 0; + IncrementCounter(BLOCKSIZE); + Compress(m_state.m_buf); + m_state.m_len = 0; - length -= fill, input += fill; + length -= fill, input += fill; + } // Compress in-place to avoid copies while (length > BLOCKSIZE) { - IncrementCounter(); + CRYPTOPP_ASSERT(m_state.m_len == 0); + IncrementCounter(BLOCKSIZE); Compress(input); length -= BLOCKSIZE, input += BLOCKSIZE; } } // Copy tail bytes - if (input && length) + if (length) { - CRYPTOPP_ASSERT(length <= BLOCKSIZE - state.length); - memcpy_s(&state.buffer[state.length], length, input, length); - state.length += static_cast(length); + CRYPTOPP_ASSERT(length <= BLOCKSIZE - m_state.m_len); + std::memcpy(m_state.m_buf + m_state.m_len, input, length); + m_state.m_len += static_cast(length); } } @@ -593,23 +585,23 @@ void BLAKE2s::TruncatedFinal(byte *hash, size_t size) { CRYPTOPP_ASSERT(hash != NULLPTR); this->ThrowIfInvalidTruncatedSize(size); + word32* f = m_state.f(); // Set last block unconditionally - State& state = *m_state.data(); - state.tf[2] = ~static_cast(0); + f[0] = ~static_cast(0); // Set last node if tree mode if (m_treeMode) - state.tf[3] = ~static_cast(0); + f[1] = ~static_cast(0); // Increment counter for tail bytes only - IncrementCounter(state.length); + IncrementCounter(m_state.m_len); - std::memset(state.buffer + state.length, 0x00, BLOCKSIZE - state.length); - Compress(state.buffer); + std::memset(m_state.m_buf + m_state.m_len, 0x00, BLOCKSIZE - m_state.m_len); + Compress(m_state.m_buf); // Copy to caller buffer - memcpy_s(hash, size, &state.h[0], size); + std::memcpy(hash, m_state.h(), size); Restart(); } @@ -618,39 +610,39 @@ void BLAKE2b::TruncatedFinal(byte *hash, size_t size) { CRYPTOPP_ASSERT(hash != NULLPTR); this->ThrowIfInvalidTruncatedSize(size); + word64* f = m_state.f(); // Set last block unconditionally - State& state = *m_state.data(); - state.tf[2] = ~static_cast(0); + f[0] = ~static_cast(0); // Set last node if tree mode if (m_treeMode) - state.tf[3] = ~static_cast(0); + f[1] = ~static_cast(0); // Increment counter for tail bytes only - IncrementCounter(state.length); + IncrementCounter(m_state.m_len); - std::memset(state.buffer + state.length, 0x00, BLOCKSIZE - state.length); - Compress(state.buffer); + std::memset(m_state.m_buf + m_state.m_len, 0x00, BLOCKSIZE - m_state.m_len); + Compress(m_state.m_buf); // Copy to caller buffer - memcpy_s(hash, size, &state.h[0], size); + std::memcpy(hash, m_state.h(), size); Restart(); } void BLAKE2s::IncrementCounter(size_t count) { - State& state = *m_state.data(); - state.tf[0] += static_cast(count); - state.tf[1] += !!(state.tf[0] < count); + word32* t = m_state.t(); + t[0] += static_cast(count); + t[1] += !!(t[0] < count); } void BLAKE2b::IncrementCounter(size_t count) { - State& state = *m_state.data(); - state.tf[0] += static_cast(count); - state.tf[1] += !!(state.tf[0] < count); + word64* t = m_state.t(); + t[0] += static_cast(count); + t[1] += !!(t[0] < count); } void BLAKE2s::Compress(const byte *input) @@ -658,27 +650,27 @@ void BLAKE2s::Compress(const byte *input) #if CRYPTOPP_SSE41_AVAILABLE if(HasSSE41()) { - return BLAKE2_Compress32_SSE4(input, *m_state.data()); + return BLAKE2_Compress32_SSE4(input, m_state); } #endif #if CRYPTOPP_ARM_NEON_AVAILABLE if(HasNEON()) { - return BLAKE2_Compress32_NEON(input, *m_state.data()); + return BLAKE2_Compress32_NEON(input, m_state); } #endif #if CRYPTOPP_POWER7_AVAILABLE if(HasPower7()) { - return BLAKE2_Compress32_POWER7(input, *m_state.data()); + return BLAKE2_Compress32_POWER7(input, m_state); } #elif CRYPTOPP_ALTIVEC_AVAILABLE if(HasAltivec()) { - return BLAKE2_Compress32_ALTIVEC(input, *m_state.data()); + return BLAKE2_Compress32_ALTIVEC(input, m_state); } #endif - return BLAKE2_Compress32_CXX(input, *m_state.data()); + return BLAKE2_Compress32_CXX(input, m_state); } void BLAKE2b::Compress(const byte *input) @@ -686,22 +678,22 @@ void BLAKE2b::Compress(const byte *input) #if CRYPTOPP_SSE41_AVAILABLE if(HasSSE41()) { - return BLAKE2_Compress64_SSE4(input, *m_state.data()); + return BLAKE2_Compress64_SSE4(input, m_state); } #endif #if CRYPTOPP_ARM_NEON_AVAILABLE if(HasNEON()) { - return BLAKE2_Compress64_NEON(input, *m_state.data()); + return BLAKE2_Compress64_NEON(input, m_state); } #endif #if CRYPTOPP_POWER8_AVAILABLE if(HasPower8()) { - return BLAKE2_Compress64_POWER8(input, *m_state.data()); + return BLAKE2_Compress64_POWER8(input, m_state); } #endif - return BLAKE2_Compress64_CXX(input, *m_state.data()); + return BLAKE2_Compress64_CXX(input, m_state); } void BLAKE2_Compress64_CXX(const byte* input, BLAKE2b_State& state) @@ -711,18 +703,19 @@ void BLAKE2_Compress64_CXX(const byte* input, BLAKE2b_State& state) GetBlock get1(input); get1(m[0])(m[1])(m[2])(m[3])(m[4])(m[5])(m[6])(m[7])(m[8])(m[9])(m[10])(m[11])(m[12])(m[13])(m[14])(m[15]); - GetBlock get2(&state.h[0]); + GetBlock get2(state.h()); get2(v[0])(v[1])(v[2])(v[3])(v[4])(v[5])(v[6])(v[7]); const word64* iv = BLAKE2B_IV; + const word64* tf = state.t(); v[ 8] = iv[0]; v[ 9] = iv[1]; v[10] = iv[2]; v[11] = iv[3]; - v[12] = state.tf[0] ^ iv[4]; - v[13] = state.tf[1] ^ iv[5]; - v[14] = state.tf[2] ^ iv[6]; - v[15] = state.tf[3] ^ iv[7]; + v[12] = tf[0] ^ iv[4]; + v[13] = tf[1] ^ iv[5]; + v[14] = tf[2] ^ iv[6]; + v[15] = tf[3] ^ iv[7]; BLAKE2B_ROUND<0>(m, v); BLAKE2B_ROUND<1>(m, v); @@ -737,8 +730,9 @@ void BLAKE2_Compress64_CXX(const byte* input, BLAKE2b_State& state) BLAKE2B_ROUND<10>(m, v); BLAKE2B_ROUND<11>(m, v); - for(unsigned int i = 0; i < 8; ++i) - state.h[i] = state.h[i] ^ ConditionalByteReverse(LittleEndian::ToEnum(), v[i] ^ v[i + 8]); + word64* h = state.h(); + for (unsigned int i = 0; i < 8; ++i) + h[i] = h[i] ^ ConditionalByteReverse(LITTLE_ENDIAN_ORDER, v[i] ^ v[i + 8]); } void BLAKE2_Compress32_CXX(const byte* input, BLAKE2s_State& state) @@ -748,18 +742,19 @@ void BLAKE2_Compress32_CXX(const byte* input, BLAKE2s_State& state) GetBlock get1(input); get1(m[0])(m[1])(m[2])(m[3])(m[4])(m[5])(m[6])(m[7])(m[8])(m[9])(m[10])(m[11])(m[12])(m[13])(m[14])(m[15]); - GetBlock get2(&state.h[0]); + GetBlock get2(state.h()); get2(v[0])(v[1])(v[2])(v[3])(v[4])(v[5])(v[6])(v[7]); const word32* iv = BLAKE2S_IV; + const word32* tf = state.t(); v[ 8] = iv[0]; v[ 9] = iv[1]; v[10] = iv[2]; v[11] = iv[3]; - v[12] = state.tf[0] ^ iv[4]; - v[13] = state.tf[1] ^ iv[5]; - v[14] = state.tf[2] ^ iv[6]; - v[15] = state.tf[3] ^ iv[7]; + v[12] = tf[0] ^ iv[4]; + v[13] = tf[1] ^ iv[5]; + v[14] = tf[2] ^ iv[6]; + v[15] = tf[3] ^ iv[7]; BLAKE2S_ROUND<0>(m, v); BLAKE2S_ROUND<1>(m, v); @@ -772,8 +767,9 @@ void BLAKE2_Compress32_CXX(const byte* input, BLAKE2s_State& state) BLAKE2S_ROUND<8>(m, v); BLAKE2S_ROUND<9>(m, v); - for(unsigned int i = 0; i < 8; ++i) - state.h[i] = state.h[i] ^ ConditionalByteReverse(LittleEndian::ToEnum(), v[i] ^ v[i + 8]); + word32* h = state.h(); + for (unsigned int i = 0; i < 8; ++i) + h[i] = h[i] ^ ConditionalByteReverse(LITTLE_ENDIAN_ORDER, v[i] ^ v[i + 8]); } NAMESPACE_END diff --git a/blake2.h b/blake2.h index 8a6936f1..56a28158 100644 --- a/blake2.h +++ b/blake2.h @@ -65,29 +65,46 @@ struct CRYPTOPP_NO_VTABLE BLAKE2s_ParameterBlock BLAKE2s_ParameterBlock() { - memset(this, 0x00, sizeof(*this)); - digestLength = DIGESTSIZE; - fanout = depth = 1; + Reset(); } BLAKE2s_ParameterBlock(size_t digestSize) { - CRYPTOPP_ASSERT(digestSize <= DIGESTSIZE); - memset(this, 0x00, sizeof(*this)); - digestLength = (byte)digestSize; - fanout = depth = 1; + Reset(digestSize); } BLAKE2s_ParameterBlock(size_t digestSize, size_t keyLength, const byte* salt, size_t saltLength, const byte* personalization, size_t personalizationLength); - byte digestLength; - byte keyLength, fanout, depth; - byte leafLength[4]; - byte nodeOffset[6]; - byte nodeDepth, innerLength; - byte salt[SALTSIZE]; - byte personalization[PERSONALIZATIONSIZE]; + void Reset(size_t digestLength=DIGESTSIZE, size_t keyLength=0); + + byte* data() { + return m_data.data(); + } + + const byte* data() const { + return m_data.data(); + } + + size_t size() const { + return m_data.size(); + } + + byte* salt() { + return m_data + SaltOff; + } + + byte* personalization() { + return m_data + PersonalizationOff; + } + + // Offsets into the byte array + enum { + DigestOff = 0, KeyOff = 1, FanoutOff = 2, DepthOff = 3, LeafOff = 4, NodeOff = 8, + NodeDepthOff = 14, InnerOff = 15, SaltOff = 16, PersonalizationOff = 24 + }; + + FixedSizeAlignedSecBlock m_data; }; /// \brief BLAKE2b parameter block @@ -99,65 +116,112 @@ struct CRYPTOPP_NO_VTABLE BLAKE2b_ParameterBlock BLAKE2b_ParameterBlock() { - memset(this, 0x00, sizeof(*this)); - digestLength = DIGESTSIZE; - fanout = depth = 1; + Reset(); } BLAKE2b_ParameterBlock(size_t digestSize) { - CRYPTOPP_ASSERT(digestSize <= DIGESTSIZE); - memset(this, 0x00, sizeof(*this)); - digestLength = (byte)digestSize; - fanout = depth = 1; + Reset(digestSize); } BLAKE2b_ParameterBlock(size_t digestSize, size_t keyLength, const byte* salt, size_t saltLength, const byte* personalization, size_t personalizationLength); - byte digestLength; - byte keyLength, fanout, depth; - byte leafLength[4]; - byte nodeOffset[8]; - byte nodeDepth, innerLength, rfu[14]; - byte salt[SALTSIZE]; - byte personalization[PERSONALIZATIONSIZE]; + void Reset(size_t digestLength=DIGESTSIZE, size_t keyLength=0); + + byte* data() { + return m_data.data(); + } + + const byte* data() const { + return m_data.data(); + } + + size_t size() const { + return m_data.size(); + } + + byte* salt() { + return m_data + SaltOff; + } + + byte* personalization() { + return m_data + PersonalizationOff; + } + + // Offsets into the byte array + enum { + DigestOff = 0, KeyOff = 1, FanoutOff = 2, DepthOff = 3, LeafOff = 4, NodeOff = 8, + NodeDepthOff = 16, InnerOff = 17, RfuOff = 18, SaltOff = 32, PersonalizationOff = 48 + }; + + FixedSizeAlignedSecBlock m_data; }; /// \brief BLAKE2s state information /// \since Crypto++ 5.6.4 struct CRYPTOPP_NO_VTABLE BLAKE2s_State { - BLAKE2s_State() - { - // Set all members except scratch buffer[] - h[0]=h[1]=h[2]=h[3]=h[4]=h[5]=h[6]=h[7] = 0; - tf[0]=tf[1]=tf[2]=tf[3] = 0; - length = 0; + BLAKE2s_State() { + Reset(); + } + + void Reset(); + + inline word32* h() { + return m_hft.data(); + } + + inline word32* t() { + return m_hft.data() + 8; + } + + inline word32* f() { + return m_hft.data() + 10; + } + + inline byte* data() { + return m_buf.data(); } // SSE4, Power7 and NEON depend upon t[] and f[] being side-by-side - word32 h[8], tf[4]; // t[2], f[2]; - byte buffer[BLAKE2s_Info::BLOCKSIZE]; - size_t length; + CRYPTOPP_CONSTANT(BLOCKSIZE = BLAKE2s_Info::BLOCKSIZE); + FixedSizeAlignedSecBlock m_hft; + FixedSizeAlignedSecBlock m_buf; + size_t m_len; }; /// \brief BLAKE2b state information /// \since Crypto++ 5.6.4 struct CRYPTOPP_NO_VTABLE BLAKE2b_State { - BLAKE2b_State() - { - // Set all members except scratch buffer[] - h[0]=h[1]=h[2]=h[3]=h[4]=h[5]=h[6]=h[7] = 0; - tf[0]=tf[1]=tf[2]=tf[3] = 0; - length = 0; + BLAKE2b_State() { + Reset(); + } + + void Reset(); + + inline word64* h() { + return m_hft.data(); + } + + inline word64* t() { + return m_hft.data() + 8; + } + + inline word64* f() { + return m_hft.data() + 10; + } + + inline byte* data() { + return m_buf.data(); } // SSE4, Power8 and NEON depend upon t[] and f[] being side-by-side - word64 h[8], tf[4]; // t[2], f[2]; - byte buffer[BLAKE2b_Info::BLOCKSIZE]; - size_t length; + CRYPTOPP_CONSTANT(BLOCKSIZE = BLAKE2b_Info::BLOCKSIZE); + FixedSizeAlignedSecBlock m_hft; + FixedSizeAlignedSecBlock m_buf; + size_t m_len; }; /// \brief The BLAKE2s cryptographic hash function @@ -184,8 +248,6 @@ public: typedef BLAKE2s_State State; typedef BLAKE2s_ParameterBlock ParameterBlock; - typedef SecBlock > AlignedState; - typedef SecBlock > AlignedParameterBlock; CRYPTOPP_STATIC_CONSTEXPR const char* StaticAlgorithmName() {return "BLAKE2s";} @@ -214,10 +276,10 @@ public: /// \details Object algorithm name follows the naming described in /// RFC 7693, The BLAKE2 Cryptographic Hash and /// Message Authentication Code (MAC). For example, "BLAKE2b-512" and "BLAKE2s-256". - std::string AlgorithmName() const {return std::string(BLAKE2s_Info::StaticAlgorithmName()) + "-" + IntToString(this->DigestSize()*8);} + std::string AlgorithmName() const {return std::string(BLAKE2s_Info::StaticAlgorithmName()) + "-" + IntToString(DigestSize()*8);} unsigned int DigestSize() const {return m_digestSize;} - unsigned int OptimalDataAlignment() const {return (CRYPTOPP_BOOL_ALIGN16 ? 16 : GetAlignmentOf());} + unsigned int OptimalDataAlignment() const; void Update(const byte *input, size_t length); void Restart(); @@ -253,10 +315,10 @@ protected: void UncheckedSetKey(const byte* key, unsigned int length, const CryptoPP::NameValuePairs& params); private: - AlignedState m_state; - AlignedParameterBlock m_block; + State m_state; + ParameterBlock m_block; AlignedSecByteBlock m_key; - word32 m_digestSize; + word32 m_digestSize, m_keyLength; bool m_treeMode; }; @@ -284,8 +346,6 @@ public: typedef BLAKE2b_State State; typedef BLAKE2b_ParameterBlock ParameterBlock; - typedef SecBlock > AlignedState; - typedef SecBlock > AlignedParameterBlock; CRYPTOPP_STATIC_CONSTEXPR const char* StaticAlgorithmName() {return "BLAKE2b";} @@ -314,10 +374,10 @@ public: /// \details Object algorithm name follows the naming described in /// RFC 7693, The BLAKE2 Cryptographic Hash and /// Message Authentication Code (MAC). For example, "BLAKE2b-512" and "BLAKE2s-256". - std::string AlgorithmName() const {return std::string(BLAKE2b_Info::StaticAlgorithmName()) + "-" + IntToString(this->DigestSize()*8);} + std::string AlgorithmName() const {return std::string(BLAKE2b_Info::StaticAlgorithmName()) + "-" + IntToString(DigestSize()*8);} unsigned int DigestSize() const {return m_digestSize;} - unsigned int OptimalDataAlignment() const {return (CRYPTOPP_BOOL_ALIGN16 ? 16 : GetAlignmentOf());} + unsigned int OptimalDataAlignment() const; void Update(const byte *input, size_t length); void Restart(); @@ -354,10 +414,10 @@ protected: void UncheckedSetKey(const byte* key, unsigned int length, const CryptoPP::NameValuePairs& params); private: - AlignedState m_state; - AlignedParameterBlock m_block; + State m_state; + ParameterBlock m_block; AlignedSecByteBlock m_key; - word32 m_digestSize; + word32 m_digestSize, m_keyLength; bool m_treeMode; }; diff --git a/blake2b_simd.cpp b/blake2b_simd.cpp index ee701bd4..2f26c816 100644 --- a/blake2b_simd.cpp +++ b/blake2b_simd.cpp @@ -45,6 +45,9 @@ # include "ppc_simd.h" #endif +// Squash MS LNK4221 and libtool warnings +extern const char BLAKE2B_SIMD_FNAME[] = __FILE__; + NAMESPACE_BEGIN(CryptoPP) // Exported by blake2.cpp @@ -451,14 +454,14 @@ void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2b_State& state) const __m128i m6 = LOADU(input + 96); const __m128i m7 = LOADU(input + 112); - row1l = LOADU(&state.h[0]); - row1h = LOADU(&state.h[2]); - row2l = LOADU(&state.h[4]); - row2h = LOADU(&state.h[6]); - row3l = LOADU(&BLAKE2B_IV[0]); - row3h = LOADU(&BLAKE2B_IV[2]); - row4l = _mm_xor_si128(LOADU(&BLAKE2B_IV[4]), LOADU(&state.tf[0])); - row4h = _mm_xor_si128(LOADU(&BLAKE2B_IV[6]), LOADU(&state.tf[2])); + row1l = LOADU(state.h()+0); + row1h = LOADU(state.h()+2); + row2l = LOADU(state.h()+4); + row2h = LOADU(state.h()+6); + row3l = LOADU(BLAKE2B_IV+0); + row3h = LOADU(BLAKE2B_IV+2); + row4l = _mm_xor_si128(LOADU(BLAKE2B_IV+4), LOADU(state.t()+0)); + row4h = _mm_xor_si128(LOADU(BLAKE2B_IV+6), LOADU(state.f()+0)); BLAKE2B_ROUND(0); BLAKE2B_ROUND(1); @@ -475,12 +478,12 @@ void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2b_State& state) row1l = _mm_xor_si128(row3l, row1l); row1h = _mm_xor_si128(row3h, row1h); - STOREU(&state.h[0], _mm_xor_si128(LOADU(&state.h[0]), row1l)); - STOREU(&state.h[2], _mm_xor_si128(LOADU(&state.h[2]), row1h)); + STOREU(state.h()+0, _mm_xor_si128(LOADU(state.h()+0), row1l)); + STOREU(state.h()+2, _mm_xor_si128(LOADU(state.h()+2), row1h)); row2l = _mm_xor_si128(row4l, row2l); row2h = _mm_xor_si128(row4h, row2h); - STOREU(&state.h[4], _mm_xor_si128(LOADU(&state.h[4]), row2l)); - STOREU(&state.h[6], _mm_xor_si128(LOADU(&state.h[6]), row2h)); + STOREU(state.h()+4, _mm_xor_si128(LOADU(state.h()+4), row2l)); + STOREU(state.h()+6, _mm_xor_si128(LOADU(state.h()+6), row2h)); } #endif // CRYPTOPP_SSE41_AVAILABLE @@ -710,15 +713,15 @@ void BLAKE2_Compress64_NEON(const byte* input, BLAKE2b_State& state) uint64x2_t row1l, row1h, row2l, row2h; uint64x2_t row3l, row3h, row4l, row4h; - const uint64x2_t h0 = row1l = vld1q_u64(&state.h[0]); - const uint64x2_t h1 = row1h = vld1q_u64(&state.h[2]); - const uint64x2_t h2 = row2l = vld1q_u64(&state.h[4]); - const uint64x2_t h3 = row2h = vld1q_u64(&state.h[6]); + const uint64x2_t h0 = row1l = vld1q_u64(state.h()+0); + const uint64x2_t h1 = row1h = vld1q_u64(state.h()+2); + const uint64x2_t h2 = row2l = vld1q_u64(state.h()+4); + const uint64x2_t h3 = row2h = vld1q_u64(state.h()+6); - row3l = vld1q_u64(&BLAKE2B_IV[0]); - row3h = vld1q_u64(&BLAKE2B_IV[2]); - row4l = veorq_u64(vld1q_u64(&BLAKE2B_IV[4]), vld1q_u64(&state.tf[0])); - row4h = veorq_u64(vld1q_u64(&BLAKE2B_IV[6]), vld1q_u64(&state.tf[2])); + row3l = vld1q_u64(BLAKE2B_IV+0); + row3h = vld1q_u64(BLAKE2B_IV+2); + row4l = veorq_u64(vld1q_u64(BLAKE2B_IV+4), vld1q_u64(state.t()+0)); + row4h = veorq_u64(vld1q_u64(BLAKE2B_IV+6), vld1q_u64(state.f()+0)); BLAKE2B_ROUND(0); BLAKE2B_ROUND(1); @@ -733,10 +736,10 @@ void BLAKE2_Compress64_NEON(const byte* input, BLAKE2b_State& state) BLAKE2B_ROUND(10); BLAKE2B_ROUND(11); - vst1q_u64(&state.h[0], veorq_u64(h0, veorq_u64(row1l, row3l))); - vst1q_u64(&state.h[2], veorq_u64(h1, veorq_u64(row1h, row3h))); - vst1q_u64(&state.h[4], veorq_u64(h2, veorq_u64(row2l, row4l))); - vst1q_u64(&state.h[6], veorq_u64(h3, veorq_u64(row2h, row4h))); + vst1q_u64(state.h()+0, veorq_u64(h0, veorq_u64(row1l, row3l))); + vst1q_u64(state.h()+2, veorq_u64(h1, veorq_u64(row1h, row3h))); + vst1q_u64(state.h()+4, veorq_u64(h2, veorq_u64(row2l, row4l))); + vst1q_u64(state.h()+6, veorq_u64(h3, veorq_u64(row2h, row4h))); } #endif // CRYPTOPP_ARM_NEON_AVAILABLE @@ -1187,15 +1190,15 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state) uint64x2_p row1l, row1h, row2l, row2h; uint64x2_p row3l, row3h, row4l, row4h; - const uint64x2_p h0 = row1l = VecLoad64LE(&state.h[0]); - const uint64x2_p h1 = row1h = VecLoad64LE(&state.h[2]); - const uint64x2_p h2 = row2l = VecLoad64LE(&state.h[4]); - const uint64x2_p h3 = row2h = VecLoad64LE(&state.h[6]); + const uint64x2_p h0 = row1l = VecLoad64LE(state.h()+0); + const uint64x2_p h1 = row1h = VecLoad64LE(state.h()+2); + const uint64x2_p h2 = row2l = VecLoad64LE(state.h()+4); + const uint64x2_p h3 = row2h = VecLoad64LE(state.h()+6); - row3l = VecLoad64(&BLAKE2B_IV[0]); - row3h = VecLoad64(&BLAKE2B_IV[2]); - row4l = VecXor(VecLoad64(&BLAKE2B_IV[4]), VecLoad64(&state.tf[0])); - row4h = VecXor(VecLoad64(&BLAKE2B_IV[6]), VecLoad64(&state.tf[2])); + row3l = VecLoad64(BLAKE2B_IV+0); + row3h = VecLoad64(BLAKE2B_IV+2); + row4l = VecXor(VecLoad64(BLAKE2B_IV+4), VecLoad64(state.t()+0)); + row4h = VecXor(VecLoad64(BLAKE2B_IV+6), VecLoad64(state.f()+0)); BLAKE2B_ROUND(0); BLAKE2B_ROUND(1); @@ -1210,10 +1213,10 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state) BLAKE2B_ROUND(10); BLAKE2B_ROUND(11); - VecStore64LE(&state.h[0], VecXor(h0, VecXor(row1l, row3l))); - VecStore64LE(&state.h[2], VecXor(h1, VecXor(row1h, row3h))); - VecStore64LE(&state.h[4], VecXor(h2, VecXor(row2l, row4l))); - VecStore64LE(&state.h[6], VecXor(h3, VecXor(row2h, row4h))); + VecStore64LE(state.h()+0, VecXor(h0, VecXor(row1l, row3l))); + VecStore64LE(state.h()+2, VecXor(h1, VecXor(row1h, row3h))); + VecStore64LE(state.h()+4, VecXor(h2, VecXor(row2l, row4l))); + VecStore64LE(state.h()+6, VecXor(h3, VecXor(row2h, row4h))); } #endif // CRYPTOPP_POWER8_AVAILABLE diff --git a/blake2s_simd.cpp b/blake2s_simd.cpp index a481b447..9ad56252 100644 --- a/blake2s_simd.cpp +++ b/blake2s_simd.cpp @@ -55,6 +55,9 @@ # include "ppc_simd.h" #endif +// Squash MS LNK4221 and libtool warnings +extern const char BLAKE2S_SIMD_FNAME[] = __FILE__; + NAMESPACE_BEGIN(CryptoPP) // Exported by blake2.cpp @@ -342,10 +345,10 @@ void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2s_State& state) const __m128i m2 = LOADU(input + 32); const __m128i m3 = LOADU(input + 48); - row1 = ff0 = LOADU(&state.h[0]); - row2 = ff1 = LOADU(&state.h[4]); - row3 = LOADU(&BLAKE2S_IV[0]); - row4 = _mm_xor_si128(LOADU(&BLAKE2S_IV[4]), LOADU(&state.tf[0])); + row1 = ff0 = LOADU(state.h()+0); + row2 = ff1 = LOADU(state.h()+4); + row3 = LOADU(BLAKE2S_IV+0); + row4 = _mm_xor_si128(LOADU(&BLAKE2S_IV[4]), LOADU(state.t()+0)); BLAKE2S_ROUND(0); BLAKE2S_ROUND(1); @@ -358,8 +361,8 @@ void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2s_State& state) BLAKE2S_ROUND(8); BLAKE2S_ROUND(9); - STOREU(&state.h[0], _mm_xor_si128(ff0, _mm_xor_si128(row1, row3))); - STOREU(&state.h[4], _mm_xor_si128(ff1, _mm_xor_si128(row2, row4))); + STOREU(state.h()+0, _mm_xor_si128(ff0, _mm_xor_si128(row1, row3))); + STOREU(state.h()+4, _mm_xor_si128(ff1, _mm_xor_si128(row2, row4))); } #endif // CRYPTOPP_SSE41_AVAILABLE @@ -660,10 +663,10 @@ void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state) uint32x4_t row1, row2, row3, row4; - const uint32x4_t f0 = row1 = vld1q_u32(&state.h[0]); - const uint32x4_t f1 = row2 = vld1q_u32(&state.h[4]); - row3 = vld1q_u32(&BLAKE2S_IV[0]); - row4 = veorq_u32(vld1q_u32(&BLAKE2S_IV[4]), vld1q_u32(&state.tf[0])); + const uint32x4_t f0 = row1 = vld1q_u32(state.h()+0); + const uint32x4_t f1 = row2 = vld1q_u32(state.h()+4); + row3 = vld1q_u32(BLAKE2S_IV+0); + row4 = veorq_u32(vld1q_u32(&BLAKE2S_IV[4]), vld1q_u32(state.t()+0)); BLAKE2S_ROUND(0); BLAKE2S_ROUND(1); @@ -676,8 +679,8 @@ void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state) BLAKE2S_ROUND(8); BLAKE2S_ROUND(9); - vst1q_u32(&state.h[0], veorq_u32(f0, veorq_u32(row1, row3))); - vst1q_u32(&state.h[4], veorq_u32(f1, veorq_u32(row2, row4))); + vst1q_u32(state.h()+0, veorq_u32(f0, veorq_u32(row1, row3))); + vst1q_u32(state.h()+4, veorq_u32(f1, veorq_u32(row2, row4))); } #endif // CRYPTOPP_ARM_NEON_AVAILABLE @@ -983,10 +986,10 @@ void BLAKE2_Compress32_CORE(const byte* input, BLAKE2s_State& state) const uint32x4_p m8 = VecLoad32LE(input + 32); const uint32x4_p m12 = VecLoad32LE(input + 48); - row1 = ff0 = VecLoad32LE(&state.h[0]); - row2 = ff1 = VecLoad32LE(&state.h[4]); - row3 = VecLoad32(&BLAKE2S_IV[0]); - row4 = VecXor(VecLoad32(&BLAKE2S_IV[4]), VecLoad32(&state.tf[0])); + row1 = ff0 = VecLoad32LE(state.h()+0); + row2 = ff1 = VecLoad32LE(state.h()+4); + row3 = VecLoad32(BLAKE2S_IV+0); + row4 = VecXor(VecLoad32(&BLAKE2S_IV[4]), VecLoad32(state.t()+0)); BLAKE2S_ROUND(0); BLAKE2S_ROUND(1); @@ -999,8 +1002,8 @@ void BLAKE2_Compress32_CORE(const byte* input, BLAKE2s_State& state) BLAKE2S_ROUND(8); BLAKE2S_ROUND(9); - VecStore32LE(&state.h[0], VecXor(ff0, VecXor(row1, row3))); - VecStore32LE(&state.h[4], VecXor(ff1, VecXor(row2, row4))); + VecStore32LE(state.h()+0, VecXor(ff0, VecXor(row1, row3))); + VecStore32LE(state.h()+4, VecXor(ff1, VecXor(row2, row4))); } #endif // CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE