Remove SSE2 from cpu.cpp, add sse-simd.cpp
We need to ensure SSE2 does not cross pollinate into other CPU functions since SSE2 is greater than the minimum arch. The minimum arch is i586/i686, and both lack SSE2 instructionspull/548/head
parent
241ff3065e
commit
bd41c3d5dd
|
|
@ -283,6 +283,7 @@ sosemanuk.h
|
||||||
square.cpp
|
square.cpp
|
||||||
square.h
|
square.h
|
||||||
squaretb.cpp
|
squaretb.cpp
|
||||||
|
sse-simd.cpp
|
||||||
stdcpp.h
|
stdcpp.h
|
||||||
strciphr.cpp
|
strciphr.cpp
|
||||||
strciphr.h
|
strciphr.h
|
||||||
|
|
|
||||||
|
|
@ -226,9 +226,10 @@ endif # -DCRYPTOPP_DISABLE_SSSE3
|
||||||
endif # -DCRYPTOPP_DISABLE_ASM
|
endif # -DCRYPTOPP_DISABLE_ASM
|
||||||
endif # CXXFLAGS
|
endif # CXXFLAGS
|
||||||
|
|
||||||
|
# SSE2 is a core feature of x86_64
|
||||||
ifeq ($(findstring -DCRYPTOPP_DISABLE_ASM,$(CXXFLAGS)),)
|
ifeq ($(findstring -DCRYPTOPP_DISABLE_ASM,$(CXXFLAGS)),)
|
||||||
ifeq ($(IS_X86),1)
|
ifeq ($(IS_X86),1)
|
||||||
CPU_FLAG = -msse2
|
SSE_FLAG = -msse2
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
ifeq ($(findstring -DCRYPTOPP_DISABLE_SSSE3,$(CXXFLAGS)),)
|
ifeq ($(findstring -DCRYPTOPP_DISABLE_SSSE3,$(CXXFLAGS)),)
|
||||||
|
|
@ -996,13 +997,13 @@ endif
|
||||||
aria-simd.o : aria-simd.cpp
|
aria-simd.o : aria-simd.cpp
|
||||||
$(CXX) $(strip $(CXXFLAGS) $(ARIA_FLAG) -c) $<
|
$(CXX) $(strip $(CXXFLAGS) $(ARIA_FLAG) -c) $<
|
||||||
|
|
||||||
# SSE4.2 or ARMv8a available
|
# SSE4.1 or ARMv8a available
|
||||||
blake2-simd.o : blake2-simd.cpp
|
blake2-simd.o : blake2-simd.cpp
|
||||||
$(CXX) $(strip $(CXXFLAGS) $(BLAKE2_FLAG) -c) $<
|
$(CXX) $(strip $(CXXFLAGS) $(BLAKE2_FLAG) -c) $<
|
||||||
|
|
||||||
# SSE2 on i586
|
# SSE2 on i586
|
||||||
cpu.o : cpu.cpp
|
sse-simd.o : sse-simd.cpp
|
||||||
$(CXX) $(strip $(CXXFLAGS) $(CPU_FLAG) -c) $<
|
$(CXX) $(strip $(CXXFLAGS) $(SSE_FLAG) -c) $<
|
||||||
|
|
||||||
# SSE4.2 or ARMv8a available
|
# SSE4.2 or ARMv8a available
|
||||||
crc-simd.o : crc-simd.cpp
|
crc-simd.o : crc-simd.cpp
|
||||||
|
|
|
||||||
73
cpu.cpp
73
cpu.cpp
|
|
@ -48,11 +48,6 @@ unsigned long int getauxval(unsigned long int) { return 0; }
|
||||||
# include <setjmp.h>
|
# include <setjmp.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Needed by SunCC and MSVC
|
|
||||||
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
|
|
||||||
# include <emmintrin.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
NAMESPACE_BEGIN(CryptoPP)
|
NAMESPACE_BEGIN(CryptoPP)
|
||||||
|
|
||||||
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
||||||
|
|
@ -63,7 +58,9 @@ extern "C" {
|
||||||
|
|
||||||
// *************************** IA-32 CPUs ***************************
|
// *************************** IA-32 CPUs ***************************
|
||||||
|
|
||||||
#ifdef CRYPTOPP_CPUID_AVAILABLE
|
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
|
||||||
|
|
||||||
|
extern bool CPU_ProbeSSE2();
|
||||||
|
|
||||||
#if _MSC_VER >= 1500
|
#if _MSC_VER >= 1500
|
||||||
|
|
||||||
|
|
@ -94,12 +91,6 @@ extern "C"
|
||||||
{
|
{
|
||||||
longjmp(s_jmpNoCPUID, 1);
|
longjmp(s_jmpNoCPUID, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static jmp_buf s_jmpNoSSE2;
|
|
||||||
static void SigIllHandlerSSE2(int)
|
|
||||||
{
|
|
||||||
longjmp(s_jmpNoSSE2, 1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
@ -186,64 +177,6 @@ bool CpuId(word32 func, word32 subfunc, word32 output[4])
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static bool CPU_ProbeSSE2()
|
|
||||||
{
|
|
||||||
#if CRYPTOPP_BOOL_X64
|
|
||||||
return true;
|
|
||||||
#elif defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
|
||||||
return false;
|
|
||||||
#elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
|
||||||
__try
|
|
||||||
{
|
|
||||||
# if CRYPTOPP_SSE2_ASM_AVAILABLE
|
|
||||||
AS2(por xmm0, xmm0) // executing SSE2 instruction
|
|
||||||
# elif CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
|
||||||
__m128i x = _mm_setzero_si128();
|
|
||||||
return _mm_cvtsi128_si32(x) == 0;
|
|
||||||
# endif
|
|
||||||
}
|
|
||||||
// GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
|
|
||||||
__except (EXCEPTION_EXECUTE_HANDLER)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
#else
|
|
||||||
// longjmp and clobber warnings. Volatile is required.
|
|
||||||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
|
||||||
volatile bool result = true;
|
|
||||||
|
|
||||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2);
|
|
||||||
if (oldHandler == SIG_ERR)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
# ifndef __MINGW32__
|
|
||||||
volatile sigset_t oldMask;
|
|
||||||
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
|
||||||
return false;
|
|
||||||
# endif
|
|
||||||
|
|
||||||
if (setjmp(s_jmpNoSSE2))
|
|
||||||
result = false;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
# if CRYPTOPP_SSE2_ASM_AVAILABLE
|
|
||||||
__asm __volatile ("por %xmm0, %xmm0");
|
|
||||||
# elif CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
|
||||||
__m128i x = _mm_setzero_si128();
|
|
||||||
result = _mm_cvtsi128_si32(x) == 0;
|
|
||||||
# endif
|
|
||||||
}
|
|
||||||
|
|
||||||
# ifndef __MINGW32__
|
|
||||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
|
||||||
# endif
|
|
||||||
|
|
||||||
signal(SIGILL, oldHandler);
|
|
||||||
return result;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false;
|
bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false;
|
||||||
bool CRYPTOPP_SECTION_INIT CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false;
|
bool CRYPTOPP_SECTION_INIT CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false;
|
||||||
bool CRYPTOPP_SECTION_INIT g_hasSSE41 = false, CRYPTOPP_SECTION_INIT g_hasSSE42 = false;
|
bool CRYPTOPP_SECTION_INIT g_hasSSE41 = false, CRYPTOPP_SECTION_INIT g_hasSSE42 = false;
|
||||||
|
|
|
||||||
|
|
@ -237,6 +237,7 @@
|
||||||
<ClCompile Include="sha-simd.cpp" />
|
<ClCompile Include="sha-simd.cpp" />
|
||||||
<ClCompile Include="simple.cpp" />
|
<ClCompile Include="simple.cpp" />
|
||||||
<ClCompile Include="skipjack.cpp" />
|
<ClCompile Include="skipjack.cpp" />
|
||||||
|
<ClCompile Include="sse-simd.cpp" />
|
||||||
<ClCompile Include="strciphr.cpp" />
|
<ClCompile Include="strciphr.cpp" />
|
||||||
<ClCompile Include="trdlocal.cpp" />
|
<ClCompile Include="trdlocal.cpp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
|
||||||
|
|
@ -294,6 +294,7 @@
|
||||||
<ClCompile Include="sosemanuk.cpp" />
|
<ClCompile Include="sosemanuk.cpp" />
|
||||||
<ClCompile Include="square.cpp" />
|
<ClCompile Include="square.cpp" />
|
||||||
<ClCompile Include="squaretb.cpp" />
|
<ClCompile Include="squaretb.cpp" />
|
||||||
|
<ClCompile Include="sse-simd.cpp" />
|
||||||
<ClCompile Include="strciphr.cpp" />
|
<ClCompile Include="strciphr.cpp" />
|
||||||
<ClCompile Include="tea.cpp" />
|
<ClCompile Include="tea.cpp" />
|
||||||
<ClCompile Include="tftables.cpp" />
|
<ClCompile Include="tftables.cpp" />
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// crc-simd.cpp - written and placed in the public domain by
|
// ppc-simd.cpp - written and placed in the public domain by
|
||||||
// Jeffrey Walton, Uri Blumenthal and Marcel Raad.
|
// Jeffrey Walton, Uri Blumenthal and Marcel Raad.
|
||||||
//
|
//
|
||||||
// This source file uses intrinsics to gain access to AltiVec,
|
// This source file uses intrinsics to gain access to AltiVec,
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,101 @@
|
||||||
|
// sse-simd.cpp - written and placed in the public domain by
|
||||||
|
// Jeffrey Walton, Uri Blumenthal and Marcel Raad.
|
||||||
|
//
|
||||||
|
// This source file uses intrinsics to gain access to SSE for CPU
|
||||||
|
// feature testing. A separate source file is needed because additional
|
||||||
|
// CXXFLAGS are required to enable the appropriate instructions set in
|
||||||
|
// some build configurations.
|
||||||
|
|
||||||
|
#include "pch.h"
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
||||||
|
# include <signal.h>
|
||||||
|
# include <setjmp.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef EXCEPTION_EXECUTE_HANDLER
|
||||||
|
# define EXCEPTION_EXECUTE_HANDLER 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Needed by SunCC and MSVC
|
||||||
|
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
|
||||||
|
# include <emmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
NAMESPACE_BEGIN(CryptoPP)
|
||||||
|
|
||||||
|
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
||||||
|
extern "C" {
|
||||||
|
typedef void (*SigHandler)(int);
|
||||||
|
};
|
||||||
|
|
||||||
|
extern "C"
|
||||||
|
{
|
||||||
|
static jmp_buf s_jmpNoSSE2;
|
||||||
|
static void SigIllHandlerSSE2(int)
|
||||||
|
{
|
||||||
|
longjmp(s_jmpNoSSE2, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
||||||
|
|
||||||
|
bool CPU_ProbeSSE2()
|
||||||
|
{
|
||||||
|
#if CRYPTOPP_BOOL_X64
|
||||||
|
return true;
|
||||||
|
#elif defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
||||||
|
return false;
|
||||||
|
#elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
||||||
|
__try
|
||||||
|
{
|
||||||
|
# if CRYPTOPP_SSE2_ASM_AVAILABLE
|
||||||
|
AS2(por xmm0, xmm0) // executing SSE2 instruction
|
||||||
|
# elif CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
||||||
|
__m128i x = _mm_setzero_si128();
|
||||||
|
return _mm_cvtsi128_si32(x) == 0;
|
||||||
|
# endif
|
||||||
|
}
|
||||||
|
// GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
|
||||||
|
__except (EXCEPTION_EXECUTE_HANDLER)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
|
// longjmp and clobber warnings. Volatile is required.
|
||||||
|
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||||
|
volatile bool result = true;
|
||||||
|
|
||||||
|
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2);
|
||||||
|
if (oldHandler == SIG_ERR)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
# ifndef __MINGW32__
|
||||||
|
volatile sigset_t oldMask;
|
||||||
|
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
||||||
|
return false;
|
||||||
|
# endif
|
||||||
|
|
||||||
|
if (setjmp(s_jmpNoSSE2))
|
||||||
|
result = false;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
# if CRYPTOPP_SSE2_ASM_AVAILABLE
|
||||||
|
__asm __volatile ("por %xmm0, %xmm0");
|
||||||
|
# elif CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
||||||
|
__m128i x = _mm_setzero_si128();
|
||||||
|
result = _mm_cvtsi128_si32(x) == 0;
|
||||||
|
# endif
|
||||||
|
}
|
||||||
|
|
||||||
|
# ifndef __MINGW32__
|
||||||
|
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
||||||
|
# endif
|
||||||
|
|
||||||
|
signal(SIGILL, oldHandler);
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
NAMESPACE_END
|
||||||
Loading…
Reference in New Issue