Fix MSVC 2017 hang on BLAKE2 (GH #527)
It looks like the macros for BLAKE2B and BLAKE2S round functions were too much for the compiler to handlepull/548/head
parent
86acc8ed45
commit
b436411de5
232
blake2.cpp
232
blake2.cpp
|
|
@ -10,8 +10,6 @@
|
||||||
#include "blake2.h"
|
#include "blake2.h"
|
||||||
#include "cpu.h"
|
#include "cpu.h"
|
||||||
|
|
||||||
NAMESPACE_BEGIN(CryptoPP)
|
|
||||||
|
|
||||||
// Uncomment for benchmarking C++ against SSE2 or NEON.
|
// Uncomment for benchmarking C++ against SSE2 or NEON.
|
||||||
// Do so in both blake2.cpp and blake2-simd.cpp.
|
// Do so in both blake2.cpp and blake2-simd.cpp.
|
||||||
// #undef CRYPTOPP_SSE41_AVAILABLE
|
// #undef CRYPTOPP_SSE41_AVAILABLE
|
||||||
|
|
@ -23,21 +21,13 @@ NAMESPACE_BEGIN(CryptoPP)
|
||||||
# undef CRYPTOPP_ARM_NEON_AVAILABLE
|
# undef CRYPTOPP_ARM_NEON_AVAILABLE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void BLAKE2_Compress32_CXX(const byte* input, BLAKE2_State<word32, false>& state);
|
|
||||||
void BLAKE2_Compress64_CXX(const byte* input, BLAKE2_State<word64, true>& state);
|
|
||||||
|
|
||||||
#if CRYPTOPP_SSE41_AVAILABLE
|
|
||||||
extern void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2_State<word32, false>& state);
|
|
||||||
extern void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2_State<word64, true>& state);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if CRYPTOPP_ARM_NEON_AVAILABLE
|
|
||||||
extern void BLAKE2_Compress32_NEON(const byte* input, BLAKE2_State<word32, false>& state);
|
|
||||||
extern void BLAKE2_Compress64_NEON(const byte* input, BLAKE2_State<word64, true>& state);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ANONYMOUS_NAMESPACE_BEGIN
|
ANONYMOUS_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
using CryptoPP::byte;
|
||||||
|
using CryptoPP::word32;
|
||||||
|
using CryptoPP::word64;
|
||||||
|
using CryptoPP::rotrConstant;
|
||||||
|
|
||||||
template <class W, bool T_64bit>
|
template <class W, bool T_64bit>
|
||||||
struct BLAKE2_IV
|
struct BLAKE2_IV
|
||||||
{
|
{
|
||||||
|
|
@ -89,35 +79,75 @@ const byte BLAKE2B_SIGMA[12][16] = {
|
||||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*pfnCompress32)(const byte*, BLAKE2_State<word32, false>&);
|
template <unsigned int rnd, unsigned int idx>
|
||||||
typedef void (*pfnCompress64)(const byte*, BLAKE2_State<word64, true>&);
|
inline void BLAKE2B_G(word64 m[], word64& a, word64& b, word64& c, word64& d)
|
||||||
|
|
||||||
pfnCompress64 InitializeCompress64Fn()
|
|
||||||
{
|
{
|
||||||
return
|
a = a + b + m[BLAKE2B_SIGMA[rnd][2*idx+0]];
|
||||||
#if CRYPTOPP_SSE41_AVAILABLE
|
d = rotrConstant<32>(d ^ a);
|
||||||
HasSSE41() ? &BLAKE2_Compress64_SSE4 :
|
c = c + d;
|
||||||
#endif
|
b = rotrConstant<24>(b ^ c);
|
||||||
#if CRYPTOPP_ARM_NEON_AVAILABLE
|
a = a + b + m[BLAKE2B_SIGMA[rnd][2*idx+1]];
|
||||||
HasNEON() ? &BLAKE2_Compress64_NEON :
|
d = rotrConstant<16>(d ^ a);
|
||||||
#endif
|
c = c + d;
|
||||||
&BLAKE2_Compress64_CXX;
|
b = rotrConstant<63>(b ^ c);
|
||||||
}
|
}
|
||||||
|
|
||||||
pfnCompress32 InitializeCompress32Fn()
|
template <unsigned int rnd>
|
||||||
|
inline void BLAKE2B_ROUND(word64 m[], word64 v[])
|
||||||
{
|
{
|
||||||
return
|
BLAKE2B_G<rnd,0>(m,v[ 0],v[ 4],v[ 8],v[12]);
|
||||||
#if CRYPTOPP_SSE41_AVAILABLE
|
BLAKE2B_G<rnd,1>(m,v[ 1],v[ 5],v[ 9],v[13]);
|
||||||
HasSSE41() ? &BLAKE2_Compress32_SSE4 :
|
BLAKE2B_G<rnd,2>(m,v[ 2],v[ 6],v[10],v[14]);
|
||||||
#endif
|
BLAKE2B_G<rnd,3>(m,v[ 3],v[ 7],v[11],v[15]);
|
||||||
#if CRYPTOPP_ARM_NEON_AVAILABLE
|
BLAKE2B_G<rnd,4>(m,v[ 0],v[ 5],v[10],v[15]);
|
||||||
HasNEON() ? &BLAKE2_Compress32_NEON :
|
BLAKE2B_G<rnd,5>(m,v[ 1],v[ 6],v[11],v[12]);
|
||||||
#endif
|
BLAKE2B_G<rnd,6>(m,v[ 2],v[ 7],v[ 8],v[13]);
|
||||||
&BLAKE2_Compress32_CXX;
|
BLAKE2B_G<rnd,7>(m,v[ 3],v[ 4],v[ 9],v[14]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <unsigned int rnd, unsigned int idx>
|
||||||
|
inline void BLAKE2S_G(word32 m[], word32& a, word32& b, word32& c, word32& d)
|
||||||
|
{
|
||||||
|
a = a + b + m[BLAKE2S_SIGMA[rnd][2*idx+0]];
|
||||||
|
d = rotrConstant<16>(d ^ a);
|
||||||
|
c = c + d;
|
||||||
|
b = rotrConstant<12>(b ^ c);
|
||||||
|
a = a + b + m[BLAKE2S_SIGMA[rnd][2*idx+1]];
|
||||||
|
d = rotrConstant<8>(d ^ a);
|
||||||
|
c = c + d;
|
||||||
|
b = rotrConstant<7>(b ^ c);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <unsigned int rnd>
|
||||||
|
inline void BLAKE2S_ROUND(word32 m[], word32 v[])
|
||||||
|
{
|
||||||
|
BLAKE2S_G<rnd,0>(m,v[ 0],v[ 4],v[ 8],v[12]);
|
||||||
|
BLAKE2S_G<rnd,1>(m,v[ 1],v[ 5],v[ 9],v[13]);
|
||||||
|
BLAKE2S_G<rnd,2>(m,v[ 2],v[ 6],v[10],v[14]);
|
||||||
|
BLAKE2S_G<rnd,3>(m,v[ 3],v[ 7],v[11],v[15]);
|
||||||
|
BLAKE2S_G<rnd,4>(m,v[ 0],v[ 5],v[10],v[15]);
|
||||||
|
BLAKE2S_G<rnd,5>(m,v[ 1],v[ 6],v[11],v[12]);
|
||||||
|
BLAKE2S_G<rnd,6>(m,v[ 2],v[ 7],v[ 8],v[13]);
|
||||||
|
BLAKE2S_G<rnd,7>(m,v[ 3],v[ 4],v[ 9],v[14]);
|
||||||
}
|
}
|
||||||
|
|
||||||
ANONYMOUS_NAMESPACE_END
|
ANONYMOUS_NAMESPACE_END
|
||||||
|
|
||||||
|
NAMESPACE_BEGIN(CryptoPP)
|
||||||
|
|
||||||
|
void BLAKE2_Compress32_CXX(const byte* input, BLAKE2_State<word32, false>& state);
|
||||||
|
void BLAKE2_Compress64_CXX(const byte* input, BLAKE2_State<word64, true>& state);
|
||||||
|
|
||||||
|
#if CRYPTOPP_SSE41_AVAILABLE
|
||||||
|
extern void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2_State<word32, false>& state);
|
||||||
|
extern void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2_State<word64, true>& state);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CRYPTOPP_ARM_NEON_AVAILABLE
|
||||||
|
extern void BLAKE2_Compress32_NEON(const byte* input, BLAKE2_State<word32, false>& state);
|
||||||
|
extern void BLAKE2_Compress64_NEON(const byte* input, BLAKE2_State<word64, true>& state);
|
||||||
|
#endif
|
||||||
|
|
||||||
BLAKE2_ParameterBlock<false>::BLAKE2_ParameterBlock(size_t digestLen, size_t keyLen,
|
BLAKE2_ParameterBlock<false>::BLAKE2_ParameterBlock(size_t digestLen, size_t keyLen,
|
||||||
const byte* saltStr, size_t saltLen,
|
const byte* saltStr, size_t saltLen,
|
||||||
const byte* personalizationStr, size_t personalizationLen)
|
const byte* personalizationStr, size_t personalizationLen)
|
||||||
|
|
@ -399,48 +429,41 @@ void BLAKE2_Base<W, T_64bit>::IncrementCounter(size_t count)
|
||||||
template <>
|
template <>
|
||||||
void BLAKE2_Base<word64, true>::Compress(const byte *input)
|
void BLAKE2_Base<word64, true>::Compress(const byte *input)
|
||||||
{
|
{
|
||||||
// Selects the most advanced implementation at runtime
|
#if CRYPTOPP_SSE41_AVAILABLE
|
||||||
static const pfnCompress64 s_pfn = InitializeCompress64Fn();
|
if(HasSSE41())
|
||||||
s_pfn(input, *m_state.data());
|
{
|
||||||
|
return BLAKE2_Compress64_SSE4(input, *m_state.data());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if CRYPTOPP_ARM_NEON_AVAILABLE
|
||||||
|
if(HasNEON())
|
||||||
|
{
|
||||||
|
return BLAKE2_Compress64_NEON(input, *m_state.data());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return BLAKE2_Compress64_CXX(input, *m_state.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void BLAKE2_Base<word32, false>::Compress(const byte *input)
|
void BLAKE2_Base<word32, false>::Compress(const byte *input)
|
||||||
{
|
{
|
||||||
// Selects the most advanced implementation at runtime
|
#if CRYPTOPP_SSE41_AVAILABLE
|
||||||
static const pfnCompress32 s_pfn = InitializeCompress32Fn();
|
if(HasSSE41())
|
||||||
s_pfn(input, *m_state.data());
|
{
|
||||||
|
return BLAKE2_Compress32_SSE4(input, *m_state.data());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if CRYPTOPP_ARM_NEON_AVAILABLE
|
||||||
|
if(HasNEON())
|
||||||
|
{
|
||||||
|
return BLAKE2_Compress32_NEON(input, *m_state.data());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return BLAKE2_Compress32_CXX(input, *m_state.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
void BLAKE2_Compress64_CXX(const byte* input, BLAKE2_State<word64, true>& state)
|
void BLAKE2_Compress64_CXX(const byte* input, BLAKE2_State<word64, true>& state)
|
||||||
{
|
{
|
||||||
#undef BLAKE2_G
|
|
||||||
#undef BLAKE2_ROUND
|
|
||||||
|
|
||||||
#define BLAKE2_G(r,i,a,b,c,d) \
|
|
||||||
do { \
|
|
||||||
a = a + b + m[BLAKE2B_SIGMA[r][2*i+0]]; \
|
|
||||||
d = rotrVariable<word64>(d ^ a, 32); \
|
|
||||||
c = c + d; \
|
|
||||||
b = rotrVariable<word64>(b ^ c, 24); \
|
|
||||||
a = a + b + m[BLAKE2B_SIGMA[r][2*i+1]]; \
|
|
||||||
d = rotrVariable<word64>(d ^ a, 16); \
|
|
||||||
c = c + d; \
|
|
||||||
b = rotrVariable<word64>(b ^ c, 63); \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#define BLAKE2_ROUND(r) \
|
|
||||||
do { \
|
|
||||||
BLAKE2_G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
|
|
||||||
BLAKE2_G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
|
|
||||||
BLAKE2_G(r,2,v[ 2],v[ 6],v[10],v[14]); \
|
|
||||||
BLAKE2_G(r,3,v[ 3],v[ 7],v[11],v[15]); \
|
|
||||||
BLAKE2_G(r,4,v[ 0],v[ 5],v[10],v[15]); \
|
|
||||||
BLAKE2_G(r,5,v[ 1],v[ 6],v[11],v[12]); \
|
|
||||||
BLAKE2_G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
|
|
||||||
BLAKE2_G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
word64 m[16], v[16];
|
word64 m[16], v[16];
|
||||||
|
|
||||||
GetBlock<word64, LittleEndian, true> get1(input);
|
GetBlock<word64, LittleEndian, true> get1(input);
|
||||||
|
|
@ -459,18 +482,18 @@ void BLAKE2_Compress64_CXX(const byte* input, BLAKE2_State<word64, true>& state)
|
||||||
v[14] = state.f[0] ^ iv[6];
|
v[14] = state.f[0] ^ iv[6];
|
||||||
v[15] = state.f[1] ^ iv[7];
|
v[15] = state.f[1] ^ iv[7];
|
||||||
|
|
||||||
BLAKE2_ROUND(0);
|
BLAKE2B_ROUND<0>(m, v);
|
||||||
BLAKE2_ROUND(1);
|
BLAKE2B_ROUND<1>(m, v);
|
||||||
BLAKE2_ROUND(2);
|
BLAKE2B_ROUND<2>(m, v);
|
||||||
BLAKE2_ROUND(3);
|
BLAKE2B_ROUND<3>(m, v);
|
||||||
BLAKE2_ROUND(4);
|
BLAKE2B_ROUND<4>(m, v);
|
||||||
BLAKE2_ROUND(5);
|
BLAKE2B_ROUND<5>(m, v);
|
||||||
BLAKE2_ROUND(6);
|
BLAKE2B_ROUND<6>(m, v);
|
||||||
BLAKE2_ROUND(7);
|
BLAKE2B_ROUND<7>(m, v);
|
||||||
BLAKE2_ROUND(8);
|
BLAKE2B_ROUND<8>(m, v);
|
||||||
BLAKE2_ROUND(9);
|
BLAKE2B_ROUND<9>(m, v);
|
||||||
BLAKE2_ROUND(10);
|
BLAKE2B_ROUND<10>(m, v);
|
||||||
BLAKE2_ROUND(11);
|
BLAKE2B_ROUND<11>(m, v);
|
||||||
|
|
||||||
for(unsigned int i = 0; i < 8; ++i)
|
for(unsigned int i = 0; i < 8; ++i)
|
||||||
state.h[i] = state.h[i] ^ ConditionalByteReverse(LittleEndian::ToEnum(), v[i] ^ v[i + 8]);
|
state.h[i] = state.h[i] ^ ConditionalByteReverse(LittleEndian::ToEnum(), v[i] ^ v[i + 8]);
|
||||||
|
|
@ -478,33 +501,6 @@ void BLAKE2_Compress64_CXX(const byte* input, BLAKE2_State<word64, true>& state)
|
||||||
|
|
||||||
void BLAKE2_Compress32_CXX(const byte* input, BLAKE2_State<word32, false>& state)
|
void BLAKE2_Compress32_CXX(const byte* input, BLAKE2_State<word32, false>& state)
|
||||||
{
|
{
|
||||||
#undef BLAKE2_G
|
|
||||||
#undef BLAKE2_ROUND
|
|
||||||
|
|
||||||
#define BLAKE2_G(r,i,a,b,c,d) \
|
|
||||||
do { \
|
|
||||||
a = a + b + m[BLAKE2S_SIGMA[r][2*i+0]]; \
|
|
||||||
d = rotrVariable<word32>(d ^ a, 16); \
|
|
||||||
c = c + d; \
|
|
||||||
b = rotrVariable<word32>(b ^ c, 12); \
|
|
||||||
a = a + b + m[BLAKE2S_SIGMA[r][2*i+1]]; \
|
|
||||||
d = rotrVariable<word32>(d ^ a, 8); \
|
|
||||||
c = c + d; \
|
|
||||||
b = rotrVariable<word32>(b ^ c, 7); \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#define BLAKE2_ROUND(r) \
|
|
||||||
do { \
|
|
||||||
BLAKE2_G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
|
|
||||||
BLAKE2_G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
|
|
||||||
BLAKE2_G(r,2,v[ 2],v[ 6],v[10],v[14]); \
|
|
||||||
BLAKE2_G(r,3,v[ 3],v[ 7],v[11],v[15]); \
|
|
||||||
BLAKE2_G(r,4,v[ 0],v[ 5],v[10],v[15]); \
|
|
||||||
BLAKE2_G(r,5,v[ 1],v[ 6],v[11],v[12]); \
|
|
||||||
BLAKE2_G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
|
|
||||||
BLAKE2_G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
word32 m[16], v[16];
|
word32 m[16], v[16];
|
||||||
|
|
||||||
GetBlock<word32, LittleEndian, true> get1(input);
|
GetBlock<word32, LittleEndian, true> get1(input);
|
||||||
|
|
@ -523,16 +519,16 @@ void BLAKE2_Compress32_CXX(const byte* input, BLAKE2_State<word32, false>& state
|
||||||
v[14] = state.f[0] ^ iv[6];
|
v[14] = state.f[0] ^ iv[6];
|
||||||
v[15] = state.f[1] ^ iv[7];
|
v[15] = state.f[1] ^ iv[7];
|
||||||
|
|
||||||
BLAKE2_ROUND(0);
|
BLAKE2S_ROUND<0>(m, v);
|
||||||
BLAKE2_ROUND(1);
|
BLAKE2S_ROUND<1>(m, v);
|
||||||
BLAKE2_ROUND(2);
|
BLAKE2S_ROUND<2>(m, v);
|
||||||
BLAKE2_ROUND(3);
|
BLAKE2S_ROUND<3>(m, v);
|
||||||
BLAKE2_ROUND(4);
|
BLAKE2S_ROUND<4>(m, v);
|
||||||
BLAKE2_ROUND(5);
|
BLAKE2S_ROUND<5>(m, v);
|
||||||
BLAKE2_ROUND(6);
|
BLAKE2S_ROUND<6>(m, v);
|
||||||
BLAKE2_ROUND(7);
|
BLAKE2S_ROUND<7>(m, v);
|
||||||
BLAKE2_ROUND(8);
|
BLAKE2S_ROUND<8>(m, v);
|
||||||
BLAKE2_ROUND(9);
|
BLAKE2S_ROUND<9>(m, v);
|
||||||
|
|
||||||
for(unsigned int i = 0; i < 8; ++i)
|
for(unsigned int i = 0; i < 8; ++i)
|
||||||
state.h[i] = state.h[i] ^ ConditionalByteReverse(LittleEndian::ToEnum(), v[i] ^ v[i + 8]);
|
state.h[i] = state.h[i] ^ ConditionalByteReverse(LittleEndian::ToEnum(), v[i] ^ v[i + 8]);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue