fix compile with Intel compiler
parent
ab6b054cf4
commit
68a38c0b9a
|
|
@ -27,10 +27,11 @@ endif
|
||||||
|
|
||||||
ifeq ($(ISX86),1)
|
ifeq ($(ISX86),1)
|
||||||
|
|
||||||
GCC33_OR_LATER = $(shell $(CXX) -v 2>&1 | $(EGREP) -c "gcc version (3.[3-9]|[4-9])")
|
GCC33_OR_LATER = $(shell $(CXX) --version 2>&1 | $(EGREP) -c "\(GCC\) (3.[3-9]|[4-9])")
|
||||||
|
INTEL_COMPILER = $(shell $(CXX) --version 2>&1 | $(EGREP) -c "\(ICC\)")
|
||||||
GAS210_OR_LATER = $(shell echo "" | $(AS) -v 2>&1 | $(EGREP) -c "GNU assembler version (2.[1-9][0-9]|[3-9])")
|
GAS210_OR_LATER = $(shell echo "" | $(AS) -v 2>&1 | $(EGREP) -c "GNU assembler version (2.[1-9][0-9]|[3-9])")
|
||||||
|
|
||||||
ifeq ($(GCC33_OR_LATER),1)
|
ifneq ($(GCC33_OR_LATER) $(INTEL_COMPILER),0 0)
|
||||||
CXXFLAGS += -msse2
|
CXXFLAGS += -msse2
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
|
||||||
68
integer.cpp
68
integer.cpp
|
|
@ -413,18 +413,17 @@ inline word DWord::operator%(word a)
|
||||||
#if defined(__GNUC__)
|
#if defined(__GNUC__)
|
||||||
#define CRYPTOPP_NAKED
|
#define CRYPTOPP_NAKED
|
||||||
#define AddPrologue \
|
#define AddPrologue \
|
||||||
|
word32 result; \
|
||||||
__asm__ __volatile__ \
|
__asm__ __volatile__ \
|
||||||
( \
|
( \
|
||||||
"push %%ebx;" /* save this manually, in case of -fPIC */ \
|
|
||||||
"mov %2, %%ebx;" \
|
|
||||||
".intel_syntax noprefix;"
|
".intel_syntax noprefix;"
|
||||||
#define AddEpilogue \
|
#define AddEpilogue \
|
||||||
".att_syntax prefix;" \
|
".att_syntax prefix;" \
|
||||||
"pop %%ebx;" \
|
: "=a" (result)\
|
||||||
: \
|
: "d" (C), "a" (A), "D" (B), "c" (N) \
|
||||||
: "d" (C), "a" (A), "m" (B), "c" (N) \
|
|
||||||
: "%esi", "memory", "cc" \
|
: "%esi", "memory", "cc" \
|
||||||
);
|
);\
|
||||||
|
return result;
|
||||||
#define MulPrologue \
|
#define MulPrologue \
|
||||||
__asm__ __volatile__ \
|
__asm__ __volatile__ \
|
||||||
( \
|
( \
|
||||||
|
|
@ -457,13 +456,13 @@ inline word DWord::operator%(word a)
|
||||||
#else
|
#else
|
||||||
#define CRYPTOPP_NAKED __declspec(naked)
|
#define CRYPTOPP_NAKED __declspec(naked)
|
||||||
#define AddPrologue \
|
#define AddPrologue \
|
||||||
__asm push ebx \
|
__asm push edi \
|
||||||
__asm push esi \
|
__asm push esi \
|
||||||
__asm mov eax, [esp+12] \
|
__asm mov eax, [esp+12] \
|
||||||
__asm mov ebx, [esp+16]
|
__asm mov edi, [esp+16]
|
||||||
#define AddEpilogue \
|
#define AddEpilogue \
|
||||||
__asm pop esi \
|
__asm pop esi \
|
||||||
__asm pop ebx \
|
__asm pop edi \
|
||||||
__asm ret 8
|
__asm ret 8
|
||||||
#define SquPrologue \
|
#define SquPrologue \
|
||||||
AS2( mov eax, A) \
|
AS2( mov eax, A) \
|
||||||
|
|
@ -495,9 +494,9 @@ CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word
|
||||||
{
|
{
|
||||||
AddPrologue
|
AddPrologue
|
||||||
|
|
||||||
// now: eax = A, ebx = B, edx = C, ecx = N
|
// now: eax = A, edi = B, edx = C, ecx = N
|
||||||
AS2( lea eax, [eax+4*ecx])
|
AS2( lea eax, [eax+4*ecx])
|
||||||
AS2( lea ebx, [ebx+4*ecx])
|
AS2( lea edi, [edi+4*ecx])
|
||||||
AS2( lea edx, [edx+4*ecx])
|
AS2( lea edx, [edx+4*ecx])
|
||||||
|
|
||||||
AS1( neg ecx) // ecx is negative index
|
AS1( neg ecx) // ecx is negative index
|
||||||
|
|
@ -509,17 +508,17 @@ CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word
|
||||||
ASL(0)
|
ASL(0)
|
||||||
ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
|
ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
|
||||||
AS2( mov esi,[eax+4*ecx])
|
AS2( mov esi,[eax+4*ecx])
|
||||||
AS2( adc esi,[ebx+4*ecx])
|
AS2( adc esi,[edi+4*ecx])
|
||||||
AS2( mov [edx+4*ecx],esi)
|
AS2( mov [edx+4*ecx],esi)
|
||||||
AS2( mov esi,[eax+4*ecx+4])
|
AS2( mov esi,[eax+4*ecx+4])
|
||||||
AS2( adc esi,[ebx+4*ecx+4])
|
AS2( adc esi,[edi+4*ecx+4])
|
||||||
AS2( mov [edx+4*ecx+4],esi)
|
AS2( mov [edx+4*ecx+4],esi)
|
||||||
ASL(1)
|
ASL(1)
|
||||||
AS2( mov esi,[eax+4*ecx+8])
|
AS2( mov esi,[eax+4*ecx+8])
|
||||||
AS2( adc esi,[ebx+4*ecx+8])
|
AS2( adc esi,[edi+4*ecx+8])
|
||||||
AS2( mov [edx+4*ecx+8],esi)
|
AS2( mov [edx+4*ecx+8],esi)
|
||||||
AS2( mov esi,[eax+4*ecx+12])
|
AS2( mov esi,[eax+4*ecx+12])
|
||||||
AS2( adc esi,[ebx+4*ecx+12])
|
AS2( adc esi,[edi+4*ecx+12])
|
||||||
AS2( mov [edx+4*ecx+12],esi)
|
AS2( mov [edx+4*ecx+12],esi)
|
||||||
|
|
||||||
AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
|
AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
|
||||||
|
|
@ -536,9 +535,9 @@ CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word
|
||||||
{
|
{
|
||||||
AddPrologue
|
AddPrologue
|
||||||
|
|
||||||
// now: eax = A, ebx = B, edx = C, ecx = N
|
// now: eax = A, edi = B, edx = C, ecx = N
|
||||||
AS2( lea eax, [eax+4*ecx])
|
AS2( lea eax, [eax+4*ecx])
|
||||||
AS2( lea ebx, [ebx+4*ecx])
|
AS2( lea edi, [edi+4*ecx])
|
||||||
AS2( lea edx, [edx+4*ecx])
|
AS2( lea edx, [edx+4*ecx])
|
||||||
|
|
||||||
AS1( neg ecx) // ecx is negative index
|
AS1( neg ecx) // ecx is negative index
|
||||||
|
|
@ -550,17 +549,17 @@ CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word
|
||||||
ASL(0)
|
ASL(0)
|
||||||
ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
|
ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
|
||||||
AS2( mov esi,[eax+4*ecx])
|
AS2( mov esi,[eax+4*ecx])
|
||||||
AS2( sbb esi,[ebx+4*ecx])
|
AS2( sbb esi,[edi+4*ecx])
|
||||||
AS2( mov [edx+4*ecx],esi)
|
AS2( mov [edx+4*ecx],esi)
|
||||||
AS2( mov esi,[eax+4*ecx+4])
|
AS2( mov esi,[eax+4*ecx+4])
|
||||||
AS2( sbb esi,[ebx+4*ecx+4])
|
AS2( sbb esi,[edi+4*ecx+4])
|
||||||
AS2( mov [edx+4*ecx+4],esi)
|
AS2( mov [edx+4*ecx+4],esi)
|
||||||
ASL(1)
|
ASL(1)
|
||||||
AS2( mov esi,[eax+4*ecx+8])
|
AS2( mov esi,[eax+4*ecx+8])
|
||||||
AS2( sbb esi,[ebx+4*ecx+8])
|
AS2( sbb esi,[edi+4*ecx+8])
|
||||||
AS2( mov [edx+4*ecx+8],esi)
|
AS2( mov [edx+4*ecx+8],esi)
|
||||||
AS2( mov esi,[eax+4*ecx+12])
|
AS2( mov esi,[eax+4*ecx+12])
|
||||||
AS2( sbb esi,[ebx+4*ecx+12])
|
AS2( sbb esi,[edi+4*ecx+12])
|
||||||
AS2( mov [edx+4*ecx+12],esi)
|
AS2( mov [edx+4*ecx+12],esi)
|
||||||
|
|
||||||
AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
|
AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
|
||||||
|
|
@ -573,13 +572,14 @@ CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word
|
||||||
AddEpilogue
|
AddEpilogue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
|
||||||
CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
|
CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
|
||||||
{
|
{
|
||||||
AddPrologue
|
AddPrologue
|
||||||
|
|
||||||
// now: eax = A, ebx = B, edx = C, ecx = N
|
// now: eax = A, edi = B, edx = C, ecx = N
|
||||||
AS2( lea eax, [eax+4*ecx])
|
AS2( lea eax, [eax+4*ecx])
|
||||||
AS2( lea ebx, [ebx+4*ecx])
|
AS2( lea edi, [edi+4*ecx])
|
||||||
AS2( lea edx, [edx+4*ecx])
|
AS2( lea edx, [edx+4*ecx])
|
||||||
|
|
||||||
AS1( neg ecx) // ecx is negative index
|
AS1( neg ecx) // ecx is negative index
|
||||||
|
|
@ -592,14 +592,14 @@ CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A,
|
||||||
|
|
||||||
ASL(0)
|
ASL(0)
|
||||||
AS2( movd mm0, DWORD PTR [eax+4*ecx])
|
AS2( movd mm0, DWORD PTR [eax+4*ecx])
|
||||||
AS2( movd mm1, DWORD PTR [ebx+4*ecx])
|
AS2( movd mm1, DWORD PTR [edi+4*ecx])
|
||||||
AS2( paddq mm0, mm1)
|
AS2( paddq mm0, mm1)
|
||||||
AS2( paddq mm2, mm0)
|
AS2( paddq mm2, mm0)
|
||||||
AS2( movd DWORD PTR [edx+4*ecx], mm2)
|
AS2( movd DWORD PTR [edx+4*ecx], mm2)
|
||||||
AS2( psrlq mm2, 32)
|
AS2( psrlq mm2, 32)
|
||||||
|
|
||||||
AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
|
AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
|
||||||
AS2( movd mm1, DWORD PTR [ebx+4*ecx+4])
|
AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
|
||||||
AS2( paddq mm0, mm1)
|
AS2( paddq mm0, mm1)
|
||||||
AS2( paddq mm2, mm0)
|
AS2( paddq mm2, mm0)
|
||||||
AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
|
AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
|
||||||
|
|
@ -607,14 +607,14 @@ CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A,
|
||||||
|
|
||||||
ASL(1)
|
ASL(1)
|
||||||
AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
|
AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
|
||||||
AS2( movd mm1, DWORD PTR [ebx+4*ecx+8])
|
AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
|
||||||
AS2( paddq mm0, mm1)
|
AS2( paddq mm0, mm1)
|
||||||
AS2( paddq mm2, mm0)
|
AS2( paddq mm2, mm0)
|
||||||
AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
|
AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
|
||||||
AS2( psrlq mm2, 32)
|
AS2( psrlq mm2, 32)
|
||||||
|
|
||||||
AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
|
AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
|
||||||
AS2( movd mm1, DWORD PTR [ebx+4*ecx+12])
|
AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
|
||||||
AS2( paddq mm0, mm1)
|
AS2( paddq mm0, mm1)
|
||||||
AS2( paddq mm2, mm0)
|
AS2( paddq mm2, mm0)
|
||||||
AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
|
AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
|
||||||
|
|
@ -629,14 +629,13 @@ CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A,
|
||||||
|
|
||||||
AddEpilogue
|
AddEpilogue
|
||||||
}
|
}
|
||||||
|
|
||||||
CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
|
CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
|
||||||
{
|
{
|
||||||
AddPrologue
|
AddPrologue
|
||||||
|
|
||||||
// now: eax = A, ebx = B, edx = C, ecx = N
|
// now: eax = A, edi = B, edx = C, ecx = N
|
||||||
AS2( lea eax, [eax+4*ecx])
|
AS2( lea eax, [eax+4*ecx])
|
||||||
AS2( lea ebx, [ebx+4*ecx])
|
AS2( lea edi, [edi+4*ecx])
|
||||||
AS2( lea edx, [edx+4*ecx])
|
AS2( lea edx, [edx+4*ecx])
|
||||||
|
|
||||||
AS1( neg ecx) // ecx is negative index
|
AS1( neg ecx) // ecx is negative index
|
||||||
|
|
@ -649,14 +648,14 @@ CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A,
|
||||||
|
|
||||||
ASL(0)
|
ASL(0)
|
||||||
AS2( movd mm0, DWORD PTR [eax+4*ecx])
|
AS2( movd mm0, DWORD PTR [eax+4*ecx])
|
||||||
AS2( movd mm1, DWORD PTR [ebx+4*ecx])
|
AS2( movd mm1, DWORD PTR [edi+4*ecx])
|
||||||
AS2( psubq mm0, mm1)
|
AS2( psubq mm0, mm1)
|
||||||
AS2( psubq mm0, mm2)
|
AS2( psubq mm0, mm2)
|
||||||
AS2( movd DWORD PTR [edx+4*ecx], mm0)
|
AS2( movd DWORD PTR [edx+4*ecx], mm0)
|
||||||
AS2( psrlq mm0, 63)
|
AS2( psrlq mm0, 63)
|
||||||
|
|
||||||
AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
|
AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
|
||||||
AS2( movd mm1, DWORD PTR [ebx+4*ecx+4])
|
AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
|
||||||
AS2( psubq mm2, mm1)
|
AS2( psubq mm2, mm1)
|
||||||
AS2( psubq mm2, mm0)
|
AS2( psubq mm2, mm0)
|
||||||
AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
|
AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
|
||||||
|
|
@ -664,14 +663,14 @@ CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A,
|
||||||
|
|
||||||
ASL(1)
|
ASL(1)
|
||||||
AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
|
AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
|
||||||
AS2( movd mm1, DWORD PTR [ebx+4*ecx+8])
|
AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
|
||||||
AS2( psubq mm0, mm1)
|
AS2( psubq mm0, mm1)
|
||||||
AS2( psubq mm0, mm2)
|
AS2( psubq mm0, mm2)
|
||||||
AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
|
AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
|
||||||
AS2( psrlq mm0, 63)
|
AS2( psrlq mm0, 63)
|
||||||
|
|
||||||
AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
|
AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
|
||||||
AS2( movd mm1, DWORD PTR [ebx+4*ecx+12])
|
AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
|
||||||
AS2( psubq mm2, mm1)
|
AS2( psubq mm2, mm1)
|
||||||
AS2( psubq mm2, mm0)
|
AS2( psubq mm2, mm0)
|
||||||
AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
|
AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
|
||||||
|
|
@ -686,6 +685,7 @@ CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A,
|
||||||
|
|
||||||
AddEpilogue
|
AddEpilogue
|
||||||
}
|
}
|
||||||
|
#endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
|
||||||
#else
|
#else
|
||||||
int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
|
int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue