Fix SunCC 12.2 compiler crash with GCM_Xor16_SSE2
SunCC 12.3 through 12.5 still cannot handle CLMUL, though. It would be nice if Sun fixed the regression.pull/548/head
parent
6f83a4fb7d
commit
8b52a03d08
|
|
@ -273,9 +273,13 @@ ifeq ($(SUN_COMPILER),1)
|
||||||
ARIA_FLAG = -xarch=ssse3 -D__SSSE3__=1
|
ARIA_FLAG = -xarch=ssse3 -D__SSSE3__=1
|
||||||
LDFLAGS += -xarch=ssse3
|
LDFLAGS += -xarch=ssse3
|
||||||
endif
|
endif
|
||||||
|
COUNT := $(shell $(CXX) $(CXXFLAGS) -E -xarch=sse4_1 -xdumpmacros /dev/null 2>&1 | $(GREP) -i -c "illegal")
|
||||||
|
ifeq ($(COUNT),0)
|
||||||
|
BLAKE2_FLAG = -xarch=sse4_1 -D__SSE4_1__=1
|
||||||
|
LDFLAGS += -xarch=sse4_1
|
||||||
|
endif
|
||||||
COUNT := $(shell $(CXX) $(CXXFLAGS) -E -xarch=sse4_2 -xdumpmacros /dev/null 2>&1 | $(GREP) -i -c "illegal")
|
COUNT := $(shell $(CXX) $(CXXFLAGS) -E -xarch=sse4_2 -xdumpmacros /dev/null 2>&1 | $(GREP) -i -c "illegal")
|
||||||
ifeq ($(COUNT),0)
|
ifeq ($(COUNT),0)
|
||||||
BLAKE2_FLAG = -xarch=sse4_2 -D__SSE4_2__=1
|
|
||||||
CRC_FLAG = -xarch=sse4_2 -D__SSE4_2__=1
|
CRC_FLAG = -xarch=sse4_2 -D__SSE4_2__=1
|
||||||
LDFLAGS += -xarch=sse4_2
|
LDFLAGS += -xarch=sse4_2
|
||||||
endif
|
endif
|
||||||
|
|
|
||||||
7
cpu.cpp
7
cpu.cpp
|
|
@ -247,7 +247,8 @@ static bool CPU_ProbeSSE2()
|
||||||
bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false;
|
bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false;
|
||||||
bool CRYPTOPP_SECTION_INIT CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false;
|
bool CRYPTOPP_SECTION_INIT CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false;
|
||||||
bool CRYPTOPP_SECTION_INIT g_hasSSE41 = false, CRYPTOPP_SECTION_INIT g_hasSSE42 = false;
|
bool CRYPTOPP_SECTION_INIT g_hasSSE41 = false, CRYPTOPP_SECTION_INIT g_hasSSE42 = false;
|
||||||
bool CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false, CRYPTOPP_SECTION_INIT g_hasSHA = false;
|
bool CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false;
|
||||||
|
bool CRYPTOPP_SECTION_INIT g_hasADX = false, CRYPTOPP_SECTION_INIT g_hasSHA = false;
|
||||||
bool CRYPTOPP_SECTION_INIT g_hasRDRAND = false, CRYPTOPP_SECTION_INIT g_hasRDSEED = false, CRYPTOPP_SECTION_INIT g_isP4 = false;
|
bool CRYPTOPP_SECTION_INIT g_hasRDRAND = false, CRYPTOPP_SECTION_INIT g_hasRDSEED = false, CRYPTOPP_SECTION_INIT g_isP4 = false;
|
||||||
bool CRYPTOPP_SECTION_INIT g_hasPadlockRNG = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE2 = false;
|
bool CRYPTOPP_SECTION_INIT g_hasPadlockRNG = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE2 = false;
|
||||||
bool CRYPTOPP_SECTION_INIT g_hasPadlockPHE = false, CRYPTOPP_SECTION_INIT g_hasPadlockPMM = false;
|
bool CRYPTOPP_SECTION_INIT g_hasPadlockPHE = false, CRYPTOPP_SECTION_INIT g_hasPadlockPMM = false;
|
||||||
|
|
@ -301,6 +302,7 @@ void DetectX86Features()
|
||||||
{
|
{
|
||||||
CRYPTOPP_CONSTANT(RDRAND_FLAG = (1 << 30))
|
CRYPTOPP_CONSTANT(RDRAND_FLAG = (1 << 30))
|
||||||
CRYPTOPP_CONSTANT(RDSEED_FLAG = (1 << 18))
|
CRYPTOPP_CONSTANT(RDSEED_FLAG = (1 << 18))
|
||||||
|
CRYPTOPP_CONSTANT( ADX_FLAG = (1 << 19))
|
||||||
CRYPTOPP_CONSTANT( SHA_FLAG = (1 << 29))
|
CRYPTOPP_CONSTANT( SHA_FLAG = (1 << 29))
|
||||||
|
|
||||||
g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf;
|
g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf;
|
||||||
|
|
@ -312,6 +314,7 @@ void DetectX86Features()
|
||||||
if (CpuId(7, 0, cpuid2))
|
if (CpuId(7, 0, cpuid2))
|
||||||
{
|
{
|
||||||
g_hasRDSEED = !!(cpuid2[1] /*EBX*/ & RDSEED_FLAG);
|
g_hasRDSEED = !!(cpuid2[1] /*EBX*/ & RDSEED_FLAG);
|
||||||
|
g_hasADX = !!(cpuid2[1] /*EBX*/ & ADX_FLAG);
|
||||||
g_hasSHA = !!(cpuid2[1] /*EBX*/ & SHA_FLAG);
|
g_hasSHA = !!(cpuid2[1] /*EBX*/ & SHA_FLAG);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -320,6 +323,7 @@ void DetectX86Features()
|
||||||
{
|
{
|
||||||
CRYPTOPP_CONSTANT(RDRAND_FLAG = (1 << 30))
|
CRYPTOPP_CONSTANT(RDRAND_FLAG = (1 << 30))
|
||||||
CRYPTOPP_CONSTANT(RDSEED_FLAG = (1 << 18))
|
CRYPTOPP_CONSTANT(RDSEED_FLAG = (1 << 18))
|
||||||
|
CRYPTOPP_CONSTANT( ADX_FLAG = (1 << 19))
|
||||||
CRYPTOPP_CONSTANT( SHA_FLAG = (1 << 29))
|
CRYPTOPP_CONSTANT( SHA_FLAG = (1 << 29))
|
||||||
|
|
||||||
CpuId(0x80000005, 0, cpuid2);
|
CpuId(0x80000005, 0, cpuid2);
|
||||||
|
|
@ -331,6 +335,7 @@ void DetectX86Features()
|
||||||
if (CpuId(7, 0, cpuid2))
|
if (CpuId(7, 0, cpuid2))
|
||||||
{
|
{
|
||||||
g_hasRDSEED = !!(cpuid2[1] /*EBX*/ & RDSEED_FLAG);
|
g_hasRDSEED = !!(cpuid2[1] /*EBX*/ & RDSEED_FLAG);
|
||||||
|
g_hasADX = !!(cpuid2[1] /*EBX*/ & ADX_FLAG);
|
||||||
g_hasSHA = !!(cpuid2[1] /*EBX*/ & SHA_FLAG);
|
g_hasSHA = !!(cpuid2[1] /*EBX*/ & SHA_FLAG);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
12
cpu.h
12
cpu.h
|
|
@ -66,6 +66,7 @@ extern CRYPTOPP_DLL bool g_hasSSE42;
|
||||||
extern CRYPTOPP_DLL bool g_hasAESNI;
|
extern CRYPTOPP_DLL bool g_hasAESNI;
|
||||||
extern CRYPTOPP_DLL bool g_hasCLMUL;
|
extern CRYPTOPP_DLL bool g_hasCLMUL;
|
||||||
extern CRYPTOPP_DLL bool g_hasSHA;
|
extern CRYPTOPP_DLL bool g_hasSHA;
|
||||||
|
extern CRYPTOPP_DLL bool g_hasADX;
|
||||||
extern CRYPTOPP_DLL bool g_isP4;
|
extern CRYPTOPP_DLL bool g_isP4;
|
||||||
extern CRYPTOPP_DLL bool g_hasRDRAND;
|
extern CRYPTOPP_DLL bool g_hasRDRAND;
|
||||||
extern CRYPTOPP_DLL bool g_hasRDSEED;
|
extern CRYPTOPP_DLL bool g_hasRDSEED;
|
||||||
|
|
@ -165,6 +166,17 @@ inline bool HasSHA()
|
||||||
return g_hasSHA;
|
return g_hasSHA;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//! \brief Determines ADX availability
|
||||||
|
//! \returns true if ADX is determined to be available, false otherwise
|
||||||
|
//! \details HasADX() is a runtime check performed using CPUID
|
||||||
|
//! \note This function is only available on Intel IA-32 platforms
|
||||||
|
inline bool HasADX()
|
||||||
|
{
|
||||||
|
if (!g_x86DetectionDone)
|
||||||
|
DetectX86Features();
|
||||||
|
return g_hasADX;
|
||||||
|
}
|
||||||
|
|
||||||
//! \brief Determines if the CPU is an Intel P4
|
//! \brief Determines if the CPU is an Intel P4
|
||||||
//! \returns true if the CPU is a P4, false otherwise
|
//! \returns true if the CPU is a P4, false otherwise
|
||||||
//! \details IsP4() is a runtime check performed using CPUID
|
//! \details IsP4() is a runtime check performed using CPUID
|
||||||
|
|
|
||||||
17
gcm-simd.cpp
17
gcm-simd.cpp
|
|
@ -30,6 +30,10 @@
|
||||||
# undef CRYPTOPP_ARM_PMULL_AVAILABLE
|
# undef CRYPTOPP_ARM_PMULL_AVAILABLE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE)
|
||||||
|
# include <emmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#if (CRYPTOPP_CLMUL_AVAILABLE)
|
#if (CRYPTOPP_CLMUL_AVAILABLE)
|
||||||
# include <tmmintrin.h>
|
# include <tmmintrin.h>
|
||||||
# include <wmmintrin.h>
|
# include <wmmintrin.h>
|
||||||
|
|
@ -428,6 +432,19 @@ void GCM_ReverseHashBufferIfNeeded_PMULL(byte *hashBuffer)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
|
||||||
|
# if defined (__SUNPRO_CC)
|
||||||
|
// SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in
|
||||||
|
// a source file with a SSE architecture switch. Also see GH #226 and GH #284.
|
||||||
|
void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c)
|
||||||
|
{
|
||||||
|
_mm_store_si128(M128_CAST(a), _mm_xor_si128(
|
||||||
|
_mm_load_si128(CONST_M128_CAST(b)),
|
||||||
|
_mm_load_si128(CONST_M128_CAST(c))));
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#if CRYPTOPP_CLMUL_AVAILABLE
|
#if CRYPTOPP_CLMUL_AVAILABLE
|
||||||
|
|
||||||
ANONYMOUS_NAMESPACE_BEGIN
|
ANONYMOUS_NAMESPACE_BEGIN
|
||||||
|
|
|
||||||
26
gcm.cpp
26
gcm.cpp
|
|
@ -27,15 +27,15 @@
|
||||||
# undef CRYPTOPP_CLMUL_AVAILABLE
|
# undef CRYPTOPP_CLMUL_AVAILABLE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE)
|
||||||
|
# include <emmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "gcm.h"
|
#include "gcm.h"
|
||||||
#include "cpu.h"
|
#include "cpu.h"
|
||||||
|
|
||||||
NAMESPACE_BEGIN(CryptoPP)
|
NAMESPACE_BEGIN(CryptoPP)
|
||||||
|
|
||||||
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE)
|
|
||||||
# include "emmintrin.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
|
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
|
||||||
// Different assemblers accept different mnemonics: 'movd eax, xmm0' vs
|
// Different assemblers accept different mnemonics: 'movd eax, xmm0' vs
|
||||||
// 'movd rax, xmm0' vs 'mov eax, xmm0' vs 'mov rax, xmm0'
|
// 'movd rax, xmm0' vs 'mov eax, xmm0' vs 'mov rax, xmm0'
|
||||||
|
|
@ -77,20 +77,24 @@ static inline void Xor16(byte *a, const byte *b, const byte *c)
|
||||||
}
|
}
|
||||||
|
|
||||||
#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
|
#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
|
||||||
|
// SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in
|
||||||
|
// a source file with a SSE architecture switch. Also see GH #226 and GH #284.
|
||||||
|
# if defined (__SUNPRO_CC)
|
||||||
|
extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c);
|
||||||
|
# else
|
||||||
static inline void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c)
|
static inline void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c)
|
||||||
{
|
{
|
||||||
// SunCC 5.14 crash. Also see http://github.com/weidai11/cryptopp/issues/226
|
# if CRYPTOPP_SSE2_ASM_AVAILABLE
|
||||||
// and http://github.com/weidai11/cryptopp/issues/284
|
|
||||||
# if CRYPTOPP_SSE2_ASM_AVAILABLE && !defined(__SUNPRO_CC)
|
|
||||||
asm ("movdqa %1, %%xmm0; pxor %2, %%xmm0; movdqa %%xmm0, %0;"
|
asm ("movdqa %1, %%xmm0; pxor %2, %%xmm0; movdqa %%xmm0, %0;"
|
||||||
: "=m" (a[0]) : "m"(b[0]), "m"(c[0]));
|
: "=m" (a[0]) : "rm"(b[0]), "rm"(c[0]));
|
||||||
# else // CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
# else // CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
||||||
_mm_store_si128(M128_CAST(a), _mm_xor_si128(
|
_mm_store_si128(M128_CAST(a), _mm_xor_si128(
|
||||||
_mm_load_si128(CONST_M128_CAST(b)),
|
_mm_load_si128(CONST_M128_CAST(b)),
|
||||||
_mm_load_si128(CONST_M128_CAST(c))));
|
_mm_load_si128(CONST_M128_CAST(c))));
|
||||||
# endif
|
# endif
|
||||||
}
|
}
|
||||||
#endif
|
# endif // SunCC
|
||||||
|
#endif // SSE2
|
||||||
|
|
||||||
#if CRYPTOPP_CLMUL_AVAILABLE
|
#if CRYPTOPP_CLMUL_AVAILABLE
|
||||||
extern void GCM_SetKeyWithoutResync_CLMUL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
|
extern void GCM_SetKeyWithoutResync_CLMUL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue