Fix MSVC 2017 hang on BLAKE2 (GH #527)

It looks like the macros for BLAKE2B and BLAKE2S round functions were too much for the compiler to handle
pull/548/head
Jeffrey Walton 2017-12-06 14:02:28 -05:00
parent 86acc8ed45
commit b436411de5
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
1 changed files with 114 additions and 118 deletions

View File

@ -10,8 +10,6 @@
#include "blake2.h" #include "blake2.h"
#include "cpu.h" #include "cpu.h"
NAMESPACE_BEGIN(CryptoPP)
// Uncomment for benchmarking C++ against SSE2 or NEON. // Uncomment for benchmarking C++ against SSE2 or NEON.
// Do so in both blake2.cpp and blake2-simd.cpp. // Do so in both blake2.cpp and blake2-simd.cpp.
// #undef CRYPTOPP_SSE41_AVAILABLE // #undef CRYPTOPP_SSE41_AVAILABLE
@ -23,21 +21,13 @@ NAMESPACE_BEGIN(CryptoPP)
# undef CRYPTOPP_ARM_NEON_AVAILABLE # undef CRYPTOPP_ARM_NEON_AVAILABLE
#endif #endif
void BLAKE2_Compress32_CXX(const byte* input, BLAKE2_State<word32, false>& state);
void BLAKE2_Compress64_CXX(const byte* input, BLAKE2_State<word64, true>& state);
#if CRYPTOPP_SSE41_AVAILABLE
extern void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2_State<word32, false>& state);
extern void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2_State<word64, true>& state);
#endif
#if CRYPTOPP_ARM_NEON_AVAILABLE
extern void BLAKE2_Compress32_NEON(const byte* input, BLAKE2_State<word32, false>& state);
extern void BLAKE2_Compress64_NEON(const byte* input, BLAKE2_State<word64, true>& state);
#endif
ANONYMOUS_NAMESPACE_BEGIN ANONYMOUS_NAMESPACE_BEGIN
using CryptoPP::byte;
using CryptoPP::word32;
using CryptoPP::word64;
using CryptoPP::rotrConstant;
template <class W, bool T_64bit> template <class W, bool T_64bit>
struct BLAKE2_IV struct BLAKE2_IV
{ {
@ -89,35 +79,75 @@ const byte BLAKE2B_SIGMA[12][16] = {
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
}; };
typedef void (*pfnCompress32)(const byte*, BLAKE2_State<word32, false>&); template <unsigned int rnd, unsigned int idx>
typedef void (*pfnCompress64)(const byte*, BLAKE2_State<word64, true>&); inline void BLAKE2B_G(word64 m[], word64& a, word64& b, word64& c, word64& d)
pfnCompress64 InitializeCompress64Fn()
{ {
return a = a + b + m[BLAKE2B_SIGMA[rnd][2*idx+0]];
#if CRYPTOPP_SSE41_AVAILABLE d = rotrConstant<32>(d ^ a);
HasSSE41() ? &BLAKE2_Compress64_SSE4 : c = c + d;
#endif b = rotrConstant<24>(b ^ c);
#if CRYPTOPP_ARM_NEON_AVAILABLE a = a + b + m[BLAKE2B_SIGMA[rnd][2*idx+1]];
HasNEON() ? &BLAKE2_Compress64_NEON : d = rotrConstant<16>(d ^ a);
#endif c = c + d;
&BLAKE2_Compress64_CXX; b = rotrConstant<63>(b ^ c);
} }
pfnCompress32 InitializeCompress32Fn() template <unsigned int rnd>
inline void BLAKE2B_ROUND(word64 m[], word64 v[])
{ {
return BLAKE2B_G<rnd,0>(m,v[ 0],v[ 4],v[ 8],v[12]);
#if CRYPTOPP_SSE41_AVAILABLE BLAKE2B_G<rnd,1>(m,v[ 1],v[ 5],v[ 9],v[13]);
HasSSE41() ? &BLAKE2_Compress32_SSE4 : BLAKE2B_G<rnd,2>(m,v[ 2],v[ 6],v[10],v[14]);
#endif BLAKE2B_G<rnd,3>(m,v[ 3],v[ 7],v[11],v[15]);
#if CRYPTOPP_ARM_NEON_AVAILABLE BLAKE2B_G<rnd,4>(m,v[ 0],v[ 5],v[10],v[15]);
HasNEON() ? &BLAKE2_Compress32_NEON : BLAKE2B_G<rnd,5>(m,v[ 1],v[ 6],v[11],v[12]);
#endif BLAKE2B_G<rnd,6>(m,v[ 2],v[ 7],v[ 8],v[13]);
&BLAKE2_Compress32_CXX; BLAKE2B_G<rnd,7>(m,v[ 3],v[ 4],v[ 9],v[14]);
}
template <unsigned int rnd, unsigned int idx>
inline void BLAKE2S_G(word32 m[], word32& a, word32& b, word32& c, word32& d)
{
a = a + b + m[BLAKE2S_SIGMA[rnd][2*idx+0]];
d = rotrConstant<16>(d ^ a);
c = c + d;
b = rotrConstant<12>(b ^ c);
a = a + b + m[BLAKE2S_SIGMA[rnd][2*idx+1]];
d = rotrConstant<8>(d ^ a);
c = c + d;
b = rotrConstant<7>(b ^ c);
}
template <unsigned int rnd>
inline void BLAKE2S_ROUND(word32 m[], word32 v[])
{
BLAKE2S_G<rnd,0>(m,v[ 0],v[ 4],v[ 8],v[12]);
BLAKE2S_G<rnd,1>(m,v[ 1],v[ 5],v[ 9],v[13]);
BLAKE2S_G<rnd,2>(m,v[ 2],v[ 6],v[10],v[14]);
BLAKE2S_G<rnd,3>(m,v[ 3],v[ 7],v[11],v[15]);
BLAKE2S_G<rnd,4>(m,v[ 0],v[ 5],v[10],v[15]);
BLAKE2S_G<rnd,5>(m,v[ 1],v[ 6],v[11],v[12]);
BLAKE2S_G<rnd,6>(m,v[ 2],v[ 7],v[ 8],v[13]);
BLAKE2S_G<rnd,7>(m,v[ 3],v[ 4],v[ 9],v[14]);
} }
ANONYMOUS_NAMESPACE_END ANONYMOUS_NAMESPACE_END
NAMESPACE_BEGIN(CryptoPP)
void BLAKE2_Compress32_CXX(const byte* input, BLAKE2_State<word32, false>& state);
void BLAKE2_Compress64_CXX(const byte* input, BLAKE2_State<word64, true>& state);
#if CRYPTOPP_SSE41_AVAILABLE
extern void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2_State<word32, false>& state);
extern void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2_State<word64, true>& state);
#endif
#if CRYPTOPP_ARM_NEON_AVAILABLE
extern void BLAKE2_Compress32_NEON(const byte* input, BLAKE2_State<word32, false>& state);
extern void BLAKE2_Compress64_NEON(const byte* input, BLAKE2_State<word64, true>& state);
#endif
BLAKE2_ParameterBlock<false>::BLAKE2_ParameterBlock(size_t digestLen, size_t keyLen, BLAKE2_ParameterBlock<false>::BLAKE2_ParameterBlock(size_t digestLen, size_t keyLen,
const byte* saltStr, size_t saltLen, const byte* saltStr, size_t saltLen,
const byte* personalizationStr, size_t personalizationLen) const byte* personalizationStr, size_t personalizationLen)
@ -399,48 +429,41 @@ void BLAKE2_Base<W, T_64bit>::IncrementCounter(size_t count)
template <> template <>
void BLAKE2_Base<word64, true>::Compress(const byte *input) void BLAKE2_Base<word64, true>::Compress(const byte *input)
{ {
// Selects the most advanced implementation at runtime #if CRYPTOPP_SSE41_AVAILABLE
static const pfnCompress64 s_pfn = InitializeCompress64Fn(); if(HasSSE41())
s_pfn(input, *m_state.data()); {
return BLAKE2_Compress64_SSE4(input, *m_state.data());
}
#endif
#if CRYPTOPP_ARM_NEON_AVAILABLE
if(HasNEON())
{
return BLAKE2_Compress64_NEON(input, *m_state.data());
}
#endif
return BLAKE2_Compress64_CXX(input, *m_state.data());
} }
template <> template <>
void BLAKE2_Base<word32, false>::Compress(const byte *input) void BLAKE2_Base<word32, false>::Compress(const byte *input)
{ {
// Selects the most advanced implementation at runtime #if CRYPTOPP_SSE41_AVAILABLE
static const pfnCompress32 s_pfn = InitializeCompress32Fn(); if(HasSSE41())
s_pfn(input, *m_state.data()); {
return BLAKE2_Compress32_SSE4(input, *m_state.data());
}
#endif
#if CRYPTOPP_ARM_NEON_AVAILABLE
if(HasNEON())
{
return BLAKE2_Compress32_NEON(input, *m_state.data());
}
#endif
return BLAKE2_Compress32_CXX(input, *m_state.data());
} }
void BLAKE2_Compress64_CXX(const byte* input, BLAKE2_State<word64, true>& state) void BLAKE2_Compress64_CXX(const byte* input, BLAKE2_State<word64, true>& state)
{ {
#undef BLAKE2_G
#undef BLAKE2_ROUND
#define BLAKE2_G(r,i,a,b,c,d) \
do { \
a = a + b + m[BLAKE2B_SIGMA[r][2*i+0]]; \
d = rotrVariable<word64>(d ^ a, 32); \
c = c + d; \
b = rotrVariable<word64>(b ^ c, 24); \
a = a + b + m[BLAKE2B_SIGMA[r][2*i+1]]; \
d = rotrVariable<word64>(d ^ a, 16); \
c = c + d; \
b = rotrVariable<word64>(b ^ c, 63); \
} while(0)
#define BLAKE2_ROUND(r) \
do { \
BLAKE2_G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
BLAKE2_G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
BLAKE2_G(r,2,v[ 2],v[ 6],v[10],v[14]); \
BLAKE2_G(r,3,v[ 3],v[ 7],v[11],v[15]); \
BLAKE2_G(r,4,v[ 0],v[ 5],v[10],v[15]); \
BLAKE2_G(r,5,v[ 1],v[ 6],v[11],v[12]); \
BLAKE2_G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
BLAKE2_G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
} while(0)
word64 m[16], v[16]; word64 m[16], v[16];
GetBlock<word64, LittleEndian, true> get1(input); GetBlock<word64, LittleEndian, true> get1(input);
@ -459,18 +482,18 @@ void BLAKE2_Compress64_CXX(const byte* input, BLAKE2_State<word64, true>& state)
v[14] = state.f[0] ^ iv[6]; v[14] = state.f[0] ^ iv[6];
v[15] = state.f[1] ^ iv[7]; v[15] = state.f[1] ^ iv[7];
BLAKE2_ROUND(0); BLAKE2B_ROUND<0>(m, v);
BLAKE2_ROUND(1); BLAKE2B_ROUND<1>(m, v);
BLAKE2_ROUND(2); BLAKE2B_ROUND<2>(m, v);
BLAKE2_ROUND(3); BLAKE2B_ROUND<3>(m, v);
BLAKE2_ROUND(4); BLAKE2B_ROUND<4>(m, v);
BLAKE2_ROUND(5); BLAKE2B_ROUND<5>(m, v);
BLAKE2_ROUND(6); BLAKE2B_ROUND<6>(m, v);
BLAKE2_ROUND(7); BLAKE2B_ROUND<7>(m, v);
BLAKE2_ROUND(8); BLAKE2B_ROUND<8>(m, v);
BLAKE2_ROUND(9); BLAKE2B_ROUND<9>(m, v);
BLAKE2_ROUND(10); BLAKE2B_ROUND<10>(m, v);
BLAKE2_ROUND(11); BLAKE2B_ROUND<11>(m, v);
for(unsigned int i = 0; i < 8; ++i) for(unsigned int i = 0; i < 8; ++i)
state.h[i] = state.h[i] ^ ConditionalByteReverse(LittleEndian::ToEnum(), v[i] ^ v[i + 8]); state.h[i] = state.h[i] ^ ConditionalByteReverse(LittleEndian::ToEnum(), v[i] ^ v[i + 8]);
@ -478,33 +501,6 @@ void BLAKE2_Compress64_CXX(const byte* input, BLAKE2_State<word64, true>& state)
void BLAKE2_Compress32_CXX(const byte* input, BLAKE2_State<word32, false>& state) void BLAKE2_Compress32_CXX(const byte* input, BLAKE2_State<word32, false>& state)
{ {
#undef BLAKE2_G
#undef BLAKE2_ROUND
#define BLAKE2_G(r,i,a,b,c,d) \
do { \
a = a + b + m[BLAKE2S_SIGMA[r][2*i+0]]; \
d = rotrVariable<word32>(d ^ a, 16); \
c = c + d; \
b = rotrVariable<word32>(b ^ c, 12); \
a = a + b + m[BLAKE2S_SIGMA[r][2*i+1]]; \
d = rotrVariable<word32>(d ^ a, 8); \
c = c + d; \
b = rotrVariable<word32>(b ^ c, 7); \
} while(0)
#define BLAKE2_ROUND(r) \
do { \
BLAKE2_G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
BLAKE2_G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
BLAKE2_G(r,2,v[ 2],v[ 6],v[10],v[14]); \
BLAKE2_G(r,3,v[ 3],v[ 7],v[11],v[15]); \
BLAKE2_G(r,4,v[ 0],v[ 5],v[10],v[15]); \
BLAKE2_G(r,5,v[ 1],v[ 6],v[11],v[12]); \
BLAKE2_G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
BLAKE2_G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
} while(0)
word32 m[16], v[16]; word32 m[16], v[16];
GetBlock<word32, LittleEndian, true> get1(input); GetBlock<word32, LittleEndian, true> get1(input);
@ -523,16 +519,16 @@ void BLAKE2_Compress32_CXX(const byte* input, BLAKE2_State<word32, false>& state
v[14] = state.f[0] ^ iv[6]; v[14] = state.f[0] ^ iv[6];
v[15] = state.f[1] ^ iv[7]; v[15] = state.f[1] ^ iv[7];
BLAKE2_ROUND(0); BLAKE2S_ROUND<0>(m, v);
BLAKE2_ROUND(1); BLAKE2S_ROUND<1>(m, v);
BLAKE2_ROUND(2); BLAKE2S_ROUND<2>(m, v);
BLAKE2_ROUND(3); BLAKE2S_ROUND<3>(m, v);
BLAKE2_ROUND(4); BLAKE2S_ROUND<4>(m, v);
BLAKE2_ROUND(5); BLAKE2S_ROUND<5>(m, v);
BLAKE2_ROUND(6); BLAKE2S_ROUND<6>(m, v);
BLAKE2_ROUND(7); BLAKE2S_ROUND<7>(m, v);
BLAKE2_ROUND(8); BLAKE2S_ROUND<8>(m, v);
BLAKE2_ROUND(9); BLAKE2S_ROUND<9>(m, v);
for(unsigned int i = 0; i < 8; ++i) for(unsigned int i = 0; i < 8; ++i)
state.h[i] = state.h[i] ^ ConditionalByteReverse(LittleEndian::ToEnum(), v[i] ^ v[i + 8]); state.h[i] = state.h[i] ^ ConditionalByteReverse(LittleEndian::ToEnum(), v[i] ^ v[i + 8]);