Cleanup donna_32.cpp arithmetic shift
parent
a09d4024b7
commit
459035fcaa
95
donna_32.cpp
95
donna_32.cpp
|
|
@ -56,10 +56,9 @@
|
||||||
#include "stdcpp.h"
|
#include "stdcpp.h"
|
||||||
#include "cpu.h"
|
#include "cpu.h"
|
||||||
|
|
||||||
// This macro is not in a header like config.h because
|
// This macro is not in a header like config.h because we don't want it
|
||||||
// we don't want it exposed to user code. We also need
|
// exposed to user code. We also need a standard header like <stdint.h>
|
||||||
// a standard header like <stdint.h> or <stdef.h>.
|
// or <stdef.h>. Langley uses uint128_t in the 64-bit code paths so
|
||||||
// Langley uses uint128_t in the 64-bit code paths so
|
|
||||||
// we further restrict 64-bit code.
|
// we further restrict 64-bit code.
|
||||||
#if (UINTPTR_MAX == 0xffffffff) || !defined(CRYPTOPP_WORD128_AVAILABLE)
|
#if (UINTPTR_MAX == 0xffffffff) || !defined(CRYPTOPP_WORD128_AVAILABLE)
|
||||||
# define CRYPTOPP_32BIT 1
|
# define CRYPTOPP_32BIT 1
|
||||||
|
|
@ -67,6 +66,16 @@
|
||||||
# define CRYPTOPP_64BIT 1
|
# define CRYPTOPP_64BIT 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Some compilers don't handle the code for the arithmetic shifts well.
|
||||||
|
// For compilers we know how to support we will issue the asm to sidestep
|
||||||
|
// believe the problem. We also switch to a slightly different pattern.
|
||||||
|
// We believe the pattern preserves the existing properties without a
|
||||||
|
// branch. The ASM uses one intruction, while the C statement use 2 to 6
|
||||||
|
// instructions, depending on the compiler.
|
||||||
|
#if (__GNUC__ >= 3) || (__SUNPRO_CC >= 0x5100)
|
||||||
|
# define CRYPTOPP_ASR_ASM 1
|
||||||
|
#endif
|
||||||
|
|
||||||
// Squash MS LNK4221 and libtool warnings
|
// Squash MS LNK4221 and libtool warnings
|
||||||
extern const char DONNA32_FNAME[] = __FILE__;
|
extern const char DONNA32_FNAME[] = __FILE__;
|
||||||
|
|
||||||
|
|
@ -84,41 +93,53 @@ using CryptoPP::sword64;
|
||||||
typedef sword64 limb;
|
typedef sword64 limb;
|
||||||
|
|
||||||
// Added by JW for SunCC. Avoid the bit twiddling hacks.
|
// Added by JW for SunCC. Avoid the bit twiddling hacks.
|
||||||
|
template <unsigned int S>
|
||||||
inline int SignExtend(int val)
|
inline int SignExtend(int val)
|
||||||
{
|
{
|
||||||
#if (__GNUC__ >= 3) || (__SUNPRO_CC >= 0x5100)
|
#if defined(CRYPTOPP_ASR_ASM) && (CRYPTOPP_BOOL_X86)
|
||||||
# if CRYPTOPP_BOOL_X86
|
|
||||||
__asm__
|
__asm__
|
||||||
(
|
(
|
||||||
"sar $31, %0 \n"
|
"sar %1, %0 \n"
|
||||||
: "+g" (val) : : "cc"
|
: "+g" (val) : "I" (S) : "cc"
|
||||||
);
|
);
|
||||||
return val;
|
return val;
|
||||||
# endif
|
#elif defined(CRYPTOPP_ASR_ASM) && (CRYPTOPP_BOOL_ARM32)
|
||||||
// TODO: ARM
|
__asm__
|
||||||
#endif
|
(
|
||||||
|
"asr %0, %0, %1 \n"
|
||||||
|
: "+r" (val) : "I" (S) :
|
||||||
|
);
|
||||||
|
return val;
|
||||||
|
#else
|
||||||
// GCC and SunCC compile down to a shift and neg.
|
// GCC and SunCC compile down to a shift and neg.
|
||||||
return (val >> 31) * -1;
|
// Also see the comments for CRYPTOPP_ASR_ASM.
|
||||||
|
return (val >> S) * -1;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Added by JW for SunCC. Avoid the bit twiddling hacks.
|
template <unsigned int S>
|
||||||
inline unsigned int SignExtend(unsigned int val)
|
inline unsigned int SignExtend(unsigned int val)
|
||||||
{
|
{
|
||||||
#if (__GNUC__ >= 3) || (__SUNPRO_CC >= 0x5100)
|
#if defined(CRYPTOPP_ASR_ASM) && (CRYPTOPP_BOOL_X86)
|
||||||
# if CRYPTOPP_BOOL_X86
|
|
||||||
__asm__
|
__asm__
|
||||||
(
|
(
|
||||||
"sar $31, %0 \n"
|
"sar %1, %0 \n"
|
||||||
: "+g" (val) : : "cc"
|
: "+g" (val) : "I" (S) : "cc"
|
||||||
);
|
);
|
||||||
return val;
|
return val;
|
||||||
# endif
|
#elif defined(CRYPTOPP_ASR_ASM) && (CRYPTOPP_BOOL_ARM32)
|
||||||
// TODO: ARM
|
__asm__
|
||||||
#endif
|
(
|
||||||
|
"asr %0, %0, %1 \n"
|
||||||
|
: "+r" (val) : "I" (S) :
|
||||||
|
);
|
||||||
|
return val;
|
||||||
|
#else
|
||||||
// GCC and SunCC compile down to a shift and neg.
|
// GCC and SunCC compile down to a shift and neg.
|
||||||
return (unsigned int)(((signed int)(val >> 31)) * -1);
|
// Also see the comments for CRYPTOPP_ASR_ASM.
|
||||||
|
const signed int v = (signed int)(val >> S)
|
||||||
|
return (unsigned int)(v * -1);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Field element representation:
|
/* Field element representation:
|
||||||
|
|
@ -304,7 +325,7 @@ void freduce_degree(limb *output)
|
||||||
output[0] += output[10];
|
output[0] += output[10];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Modified for SunCC. See comments for SignExtexnd function.
|
// Modified for SunCC. See comments for CRYPTOPP_ASR_ASM.
|
||||||
// #if (-1 & 3) != 3
|
// #if (-1 & 3) != 3
|
||||||
// #error "This code only works on a two's complement system"
|
// #error "This code only works on a two's complement system"
|
||||||
// #endif
|
// #endif
|
||||||
|
|
@ -317,13 +338,15 @@ inline limb div_by_2_26(const limb v)
|
||||||
/* High word of v; no shift needed. */
|
/* High word of v; no shift needed. */
|
||||||
const word32 highword = (word32) (((word64) v) >> 32);
|
const word32 highword = (word32) (((word64) v) >> 32);
|
||||||
|
|
||||||
// Modified for SunCC. See comments for SignExtexnd function.
|
// Modified for SunCC. See comments for CRYPTOPP_ASR_ASM.
|
||||||
/* Set to all 1s if v was negative; else set to 0s. */
|
/* Set to all 1s if v was negative; else set to 0s. */
|
||||||
/* const sword32 sign = ((sword32) highword) >> 31; */
|
/* const sword32 sign = ((sword32) highword) >> 31; */
|
||||||
const sword32 sign = SignExtend(highword);
|
const word32 sign = SignExtend<31>(highword);
|
||||||
|
|
||||||
/* Set to 0x3ffffff if v was negative; else set to 0. */
|
/* Set to 0x3ffffff if v was negative; else set to 0. */
|
||||||
const sword32 roundoff = ((word32) sign) >> 6;
|
/* const sword32 roundoff = ((word32) sign) >> 6; */
|
||||||
|
const sword32 roundoff = (sword32)(sign >> 6);
|
||||||
|
|
||||||
/* Should return v / (1<<26) */
|
/* Should return v / (1<<26) */
|
||||||
return (v + roundoff) >> 26;
|
return (v + roundoff) >> 26;
|
||||||
}
|
}
|
||||||
|
|
@ -336,13 +359,15 @@ inline limb div_by_2_25(const limb v)
|
||||||
/* High word of v; no shift needed*/
|
/* High word of v; no shift needed*/
|
||||||
const word32 highword = (word32) (((word64) v) >> 32);
|
const word32 highword = (word32) (((word64) v) >> 32);
|
||||||
|
|
||||||
// Modified for SunCC. See comments for SignExtexnd function.
|
// Modified for SunCC. See comments for CRYPTOPP_ASR_ASM.
|
||||||
/* Set to all 1s if v was negative; else set to 0s. */
|
/* Set to all 1s if v was negative; else set to 0s. */
|
||||||
/* const sword32 sign = ((sword32) highword) >> 31; */
|
/* const sword32 sign = ((sword32) highword) >> 31; */
|
||||||
const sword32 sign = SignExtend(highword);
|
const word32 sign = SignExtend<31>(highword);
|
||||||
|
|
||||||
/* Set to 0x1ffffff if v was negative; else set to 0. */
|
/* Set to 0x1ffffff if v was negative; else set to 0. */
|
||||||
const sword32 roundoff = ((word32) sign) >> 7;
|
/* const sword32 roundoff = ((word32) sign) >> 7; */
|
||||||
|
const sword32 roundoff = (sword32)(sign >> 7);
|
||||||
|
|
||||||
/* Should return v / (1<<25) */
|
/* Should return v / (1<<25) */
|
||||||
return (v + roundoff) >> 25;
|
return (v + roundoff) >> 25;
|
||||||
}
|
}
|
||||||
|
|
@ -517,7 +542,7 @@ void fexpand(limb *output, const byte *input)
|
||||||
#undef F
|
#undef F
|
||||||
}
|
}
|
||||||
|
|
||||||
// Modified for SunCC. See comments for SignExtexnd function.
|
// Modified for SunCC. See comments for CRYPTOPP_ASR_ASM.
|
||||||
// #if (-32 >> 1) != -16
|
// #if (-32 >> 1) != -16
|
||||||
// #error "This code only works when >> does sign-extension on negative numbers"
|
// #error "This code only works when >> does sign-extension on negative numbers"
|
||||||
// #endif
|
// #endif
|
||||||
|
|
@ -525,7 +550,7 @@ void fexpand(limb *output, const byte *input)
|
||||||
/* sword32_eq returns 0xffffffff iff a == b and zero otherwise. */
|
/* sword32_eq returns 0xffffffff iff a == b and zero otherwise. */
|
||||||
sword32 sword32_eq(sword32 a, sword32 b)
|
sword32 sword32_eq(sword32 a, sword32 b)
|
||||||
{
|
{
|
||||||
// Modified for SunCC. See comments for SignExtexnd function.
|
// Modified for SunCC. See comments for CRYPTOPP_ASR_ASM.
|
||||||
a = ~(a ^ b);
|
a = ~(a ^ b);
|
||||||
a &= a << 16;
|
a &= a << 16;
|
||||||
a &= a << 8;
|
a &= a << 8;
|
||||||
|
|
@ -533,18 +558,18 @@ sword32 sword32_eq(sword32 a, sword32 b)
|
||||||
a &= a << 2;
|
a &= a << 2;
|
||||||
a &= a << 1;
|
a &= a << 1;
|
||||||
/* return a >> 31; */
|
/* return a >> 31; */
|
||||||
return (sword32)SignExtend(a);
|
return (sword32)SignExtend<31>(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* sword32_gte returns 0xffffffff if a >= b and zero otherwise, where a and b are
|
/* sword32_gte returns 0xffffffff if a >= b and zero otherwise, where a and b are
|
||||||
* both non-negative. */
|
* both non-negative. */
|
||||||
sword32 sword32_gte(sword32 a, sword32 b)
|
sword32 sword32_gte(sword32 a, sword32 b)
|
||||||
{
|
{
|
||||||
// Modified for SunCC. See comments for SignExtexnd function.
|
// Modified for SunCC. See comments for CRYPTOPP_ASR_ASM.
|
||||||
a -= b;
|
a -= b;
|
||||||
/* a >= 0 iff a >= b. */
|
/* a >= 0 iff a >= b. */
|
||||||
/* return ~(a >> 31); */
|
/* return ~(a >> 31); */
|
||||||
return ~(sword32)SignExtend(a);
|
return ~(sword32)SignExtend<31>(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Take a fully reduced polynomial form number and contract it into a
|
/* Take a fully reduced polynomial form number and contract it into a
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue