Fix global optimization bug for ChaCha AVX2 under VS2017 (GH #735)
Also see https://github.com/weidai11/cryptopp/issues/649. The 649 issue is the one affecting AES. It appears to be the same problem.pull/737/head
parent
af9fb9d21e
commit
092309b266
|
|
@ -36,10 +36,21 @@ extern const char CHACHA_AVX_FNAME[] = __FILE__;
|
||||||
# define MAYBE_CONST const
|
# define MAYBE_CONST const
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (CRYPTOPP_AVX2_AVAILABLE)
|
// VS2017 and global optimization bug. TODO, figure out when
|
||||||
|
// we can re-enable full optimizations for VS2017. Also see
|
||||||
|
// https://github.com/weidai11/cryptopp/issues/649 and
|
||||||
|
// https://github.com/weidai11/cryptopp/issues/735. The
|
||||||
|
// 649 issue affects AES but it is the same here. The 735
|
||||||
|
// issue is ChaCha AVX2 cut-in where it surfaced again.
|
||||||
|
#if (_MSC_VER >= 1910) && defined(NDEBUG)
|
||||||
|
# pragma optimize("", off)
|
||||||
|
# pragma optimize("ts", on)
|
||||||
|
#endif
|
||||||
|
|
||||||
ANONYMOUS_NAMESPACE_BEGIN
|
ANONYMOUS_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
#if (CRYPTOPP_AVX2_AVAILABLE)
|
||||||
|
|
||||||
template <unsigned int R>
|
template <unsigned int R>
|
||||||
inline __m256i RotateLeft(const __m256i val)
|
inline __m256i RotateLeft(const __m256i val)
|
||||||
{
|
{
|
||||||
|
|
@ -62,10 +73,14 @@ inline __m256i RotateLeft<16>(const __m256i val)
|
||||||
return _mm256_shuffle_epi8(val, mask);
|
return _mm256_shuffle_epi8(val, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif CRYPTOPP_AVX2_AVAILABLE
|
||||||
|
|
||||||
ANONYMOUS_NAMESPACE_END
|
ANONYMOUS_NAMESPACE_END
|
||||||
|
|
||||||
NAMESPACE_BEGIN(CryptoPP)
|
NAMESPACE_BEGIN(CryptoPP)
|
||||||
|
|
||||||
|
#if (CRYPTOPP_AVX2_AVAILABLE)
|
||||||
|
|
||||||
void ChaCha_OperateKeystream_AVX2(const word32 *state, const byte* input, byte *output, unsigned int rounds)
|
void ChaCha_OperateKeystream_AVX2(const word32 *state, const byte* input, byte *output, unsigned int rounds)
|
||||||
{
|
{
|
||||||
MAYBE_CONST __m128i* state_mm = (MAYBE_CONST __m128i*)(state);
|
MAYBE_CONST __m128i* state_mm = (MAYBE_CONST __m128i*)(state);
|
||||||
|
|
@ -358,6 +373,6 @@ void ChaCha_OperateKeystream_AVX2(const word32 *state, const byte* input, byte *
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NAMESPACE_END
|
|
||||||
|
|
||||||
#endif // CRYPTOPP_AVX2_AVAILABLE
|
#endif // CRYPTOPP_AVX2_AVAILABLE
|
||||||
|
|
||||||
|
NAMESPACE_END
|
||||||
|
|
|
||||||
|
|
@ -88,10 +88,10 @@ being unloaded from L1 cache, until that round is finished.
|
||||||
#include "misc.h"
|
#include "misc.h"
|
||||||
#include "cpu.h"
|
#include "cpu.h"
|
||||||
|
|
||||||
// MSVC bug, still don't know how to fix it. TODO, figure out
|
// VS2017 and global optimization bug. TODO, figure out when
|
||||||
// when we can re-enable optimizations for MSVC. Also see
|
// we can re-enable full optimizations for VS2017. Also see
|
||||||
// https://github.com/weidai11/cryptopp/issues/649
|
// https://github.com/weidai11/cryptopp/issues/649
|
||||||
#if defined(_MSC_VER) && (_MSC_VER >= 1910)
|
#if (_MSC_VER >= 1910) && defined(NDEBUG)
|
||||||
# pragma optimize("", off)
|
# pragma optimize("", off)
|
||||||
# pragma optimize("ts", on)
|
# pragma optimize("ts", on)
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue