Fix SunCC 12.2 compiler crash with GCM_Xor16_SSE2

SunCC 12.3 through 12.5 still cannot handle CLMUL, though. It would be nice if Sun fixed the regression.
pull/548/head
Jeffrey Walton 2017-11-16 02:38:53 -05:00
parent 6f83a4fb7d
commit 8b52a03d08
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
5 changed files with 55 additions and 13 deletions

View File

@ -273,9 +273,13 @@ ifeq ($(SUN_COMPILER),1)
ARIA_FLAG = -xarch=ssse3 -D__SSSE3__=1
LDFLAGS += -xarch=ssse3
endif
COUNT := $(shell $(CXX) $(CXXFLAGS) -E -xarch=sse4_1 -xdumpmacros /dev/null 2>&1 | $(GREP) -i -c "illegal")
ifeq ($(COUNT),0)
BLAKE2_FLAG = -xarch=sse4_1 -D__SSE4_1__=1
LDFLAGS += -xarch=sse4_1
endif
COUNT := $(shell $(CXX) $(CXXFLAGS) -E -xarch=sse4_2 -xdumpmacros /dev/null 2>&1 | $(GREP) -i -c "illegal")
ifeq ($(COUNT),0)
BLAKE2_FLAG = -xarch=sse4_2 -D__SSE4_2__=1
CRC_FLAG = -xarch=sse4_2 -D__SSE4_2__=1
LDFLAGS += -xarch=sse4_2
endif

View File

@ -247,7 +247,8 @@ static bool CPU_ProbeSSE2()
bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false;
bool CRYPTOPP_SECTION_INIT CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false;
bool CRYPTOPP_SECTION_INIT g_hasSSE41 = false, CRYPTOPP_SECTION_INIT g_hasSSE42 = false;
bool CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false, CRYPTOPP_SECTION_INIT g_hasSHA = false;
bool CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false;
bool CRYPTOPP_SECTION_INIT g_hasADX = false, CRYPTOPP_SECTION_INIT g_hasSHA = false;
bool CRYPTOPP_SECTION_INIT g_hasRDRAND = false, CRYPTOPP_SECTION_INIT g_hasRDSEED = false, CRYPTOPP_SECTION_INIT g_isP4 = false;
bool CRYPTOPP_SECTION_INIT g_hasPadlockRNG = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE2 = false;
bool CRYPTOPP_SECTION_INIT g_hasPadlockPHE = false, CRYPTOPP_SECTION_INIT g_hasPadlockPMM = false;
@ -301,6 +302,7 @@ void DetectX86Features()
{
CRYPTOPP_CONSTANT(RDRAND_FLAG = (1 << 30))
CRYPTOPP_CONSTANT(RDSEED_FLAG = (1 << 18))
CRYPTOPP_CONSTANT( ADX_FLAG = (1 << 19))
CRYPTOPP_CONSTANT( SHA_FLAG = (1 << 29))
g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf;
@ -312,6 +314,7 @@ void DetectX86Features()
if (CpuId(7, 0, cpuid2))
{
g_hasRDSEED = !!(cpuid2[1] /*EBX*/ & RDSEED_FLAG);
g_hasADX = !!(cpuid2[1] /*EBX*/ & ADX_FLAG);
g_hasSHA = !!(cpuid2[1] /*EBX*/ & SHA_FLAG);
}
}
@ -320,6 +323,7 @@ void DetectX86Features()
{
CRYPTOPP_CONSTANT(RDRAND_FLAG = (1 << 30))
CRYPTOPP_CONSTANT(RDSEED_FLAG = (1 << 18))
CRYPTOPP_CONSTANT( ADX_FLAG = (1 << 19))
CRYPTOPP_CONSTANT( SHA_FLAG = (1 << 29))
CpuId(0x80000005, 0, cpuid2);
@ -331,6 +335,7 @@ void DetectX86Features()
if (CpuId(7, 0, cpuid2))
{
g_hasRDSEED = !!(cpuid2[1] /*EBX*/ & RDSEED_FLAG);
g_hasADX = !!(cpuid2[1] /*EBX*/ & ADX_FLAG);
g_hasSHA = !!(cpuid2[1] /*EBX*/ & SHA_FLAG);
}
}

12
cpu.h
View File

@ -66,6 +66,7 @@ extern CRYPTOPP_DLL bool g_hasSSE42;
extern CRYPTOPP_DLL bool g_hasAESNI;
extern CRYPTOPP_DLL bool g_hasCLMUL;
extern CRYPTOPP_DLL bool g_hasSHA;
extern CRYPTOPP_DLL bool g_hasADX;
extern CRYPTOPP_DLL bool g_isP4;
extern CRYPTOPP_DLL bool g_hasRDRAND;
extern CRYPTOPP_DLL bool g_hasRDSEED;
@ -165,6 +166,17 @@ inline bool HasSHA()
return g_hasSHA;
}
//! \brief Determines ADX availability
//! \returns true if ADX is determined to be available, false otherwise
//! \details HasADX() is a runtime check performed using CPUID
//! \note This function is only available on Intel IA-32 platforms
inline bool HasADX()
{
if (!g_x86DetectionDone)
DetectX86Features();
return g_hasADX;
}
//! \brief Determines if the CPU is an Intel P4
//! \returns true if the CPU is a P4, false otherwise
//! \details IsP4() is a runtime check performed using CPUID

View File

@ -30,6 +30,10 @@
# undef CRYPTOPP_ARM_PMULL_AVAILABLE
#endif
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE)
# include <emmintrin.h>
#endif
#if (CRYPTOPP_CLMUL_AVAILABLE)
# include <tmmintrin.h>
# include <wmmintrin.h>
@ -428,6 +432,19 @@ void GCM_ReverseHashBufferIfNeeded_PMULL(byte *hashBuffer)
}
#endif
#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
# if defined (__SUNPRO_CC)
// SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in
// a source file with a SSE architecture switch. Also see GH #226 and GH #284.
void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c)
{
_mm_store_si128(M128_CAST(a), _mm_xor_si128(
_mm_load_si128(CONST_M128_CAST(b)),
_mm_load_si128(CONST_M128_CAST(c))));
}
# endif
#endif
#if CRYPTOPP_CLMUL_AVAILABLE
ANONYMOUS_NAMESPACE_BEGIN

26
gcm.cpp
View File

@ -27,15 +27,15 @@
# undef CRYPTOPP_CLMUL_AVAILABLE
#endif
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE)
# include <emmintrin.h>
#endif
#include "gcm.h"
#include "cpu.h"
NAMESPACE_BEGIN(CryptoPP)
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE)
# include "emmintrin.h"
#endif
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
// Different assemblers accept different mnemonics: 'movd eax, xmm0' vs
// 'movd rax, xmm0' vs 'mov eax, xmm0' vs 'mov rax, xmm0'
@ -77,20 +77,24 @@ static inline void Xor16(byte *a, const byte *b, const byte *c)
}
#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
// SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in
// a source file with a SSE architecture switch. Also see GH #226 and GH #284.
# if defined (__SUNPRO_CC)
extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c);
# else
static inline void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c)
{
// SunCC 5.14 crash. Also see http://github.com/weidai11/cryptopp/issues/226
// and http://github.com/weidai11/cryptopp/issues/284
# if CRYPTOPP_SSE2_ASM_AVAILABLE && !defined(__SUNPRO_CC)
# if CRYPTOPP_SSE2_ASM_AVAILABLE
asm ("movdqa %1, %%xmm0; pxor %2, %%xmm0; movdqa %%xmm0, %0;"
: "=m" (a[0]) : "m"(b[0]), "m"(c[0]));
# else // CRYPTOPP_SSE2_INTRIN_AVAILABLE
: "=m" (a[0]) : "rm"(b[0]), "rm"(c[0]));
# else // CRYPTOPP_SSE2_INTRIN_AVAILABLE
_mm_store_si128(M128_CAST(a), _mm_xor_si128(
_mm_load_si128(CONST_M128_CAST(b)),
_mm_load_si128(CONST_M128_CAST(c))));
# endif
# endif
}
#endif
# endif // SunCC
#endif // SSE2
#if CRYPTOPP_CLMUL_AVAILABLE
extern void GCM_SetKeyWithoutResync_CLMUL(const byte *hashKey, byte *mulTable, unsigned int tableSize);