changes to support optimizations

pull/2/head
weidai 2007-04-16 00:39:09 +00:00
parent 3802d23c07
commit a30a7f4ed3
1 changed files with 107 additions and 53 deletions

150
config.h
View File

@ -114,7 +114,7 @@ typedef unsigned int word32;
#define W64LIT(x) x##ui64
#endif
// define largest word type
// define large word type, used for file offsets and such
#ifdef WORD64_AVAILABLE
typedef word64 lword;
const lword LWORD_MAX = W64LIT(0)-1;
@ -123,24 +123,26 @@ typedef unsigned int word32;
const lword LWORD_MAX = lword(0)-1;
#endif
#if defined(__alpha__) || defined(__ia64__) || defined(_ARCH_PPC64) || defined(__x86_64__) || defined(__mips64) || defined(_M_X64)
// These platforms have 64-bit CPU registers. Unfortunately most C++ compilers doesn't
// allow any way to access the 64-bit by 64-bit multiply instruction without using
// assembly, so in order to use word64 as word, the assembly instruction must be defined
// in Dword::Multiply().
#if defined(__SUNPRO_CC) // no Dword::Multiply() for these compilers yet
// define hword, word, and dword. these are used for multiprecision integer arithmetic
// Intel compiler won't have _umul128 until version 10.0. See http://softwarecommunity.intel.com/isn/Community/en-US/forums/thread/30231625.aspx
#if (defined(_MSC_VER) && (!defined(__INTEL_COMPILER) || __INTEL_COMPILER >= 1000) && (defined(_M_X64) || defined(_M_IA64))) || (defined(__DECCXX) && defined(__alpha__))
typedef word32 hword;
typedef word64 word;
#else
#define CRYPTOPP_NATIVE_DWORD_AVAILABLE
#if defined(__alpha__) || defined(__ia64__) || defined(_ARCH_PPC64) || defined(__x86_64__) || defined(__mips64) || defined(__sparc64__)
#if defined(__GNUC__)
typedef word32 hword;
typedef word64 word;
typedef __uint128_t dword;
#else
// if we're here, it means we're on a 64-bit CPU but we don't have a way to obtain 128-bit multiplication results
typedef word16 hword;
typedef word32 word;
typedef word64 dword;
#else
typedef word32 hword;
typedef word64 word;
#endif
#else
#define CRYPTOPP_NATIVE_DWORD_AVAILABLE
#ifdef WORD64_AVAILABLE
#define CRYPTOPP_SLOW_WORD64 // defined this if your CPU is not 64-bit to use alternative code that avoids word64
#elif defined(WORD64_AVAILABLE)
#define CRYPTOPP_SLOW_WORD64 // use alternative code that avoids word64
typedef word16 hword;
typedef word32 word;
typedef word64 dword;
@ -156,21 +158,14 @@ const unsigned int WORD_BITS = WORD_SIZE * 8;
NAMESPACE_END
#if defined(_MSC_VER) // || defined(__BORLANDC__) intrinsics don't work on BCB 2006
#define INTEL_INTRINSICS
#define FAST_ROTATE
#elif defined(__MWERKS__) && TARGET_CPU_PPC
#define PPC_INTRINSICS
#define FAST_ROTATE
#elif defined(__GNUC__) && defined(__i386__)
// GCC does peephole optimizations which should result in using rotate instructions
#define FAST_ROTATE
#endif
#ifndef CRYPTOPP_L1_CACHE_LINE_SIZE
// This should be a lower bound on the L1 cache line size. It's used for defense against timing attacks.
#if defined(_M_X64) || defined(__x86_64__)
#define CRYPTOPP_L1_CACHE_LINE_SIZE 64
#else
// L1 cache line size is 32 on Pentium III and earlier
#define CRYPTOPP_L1_CACHE_LINE_SIZE 32
#endif
#endif
#if defined(_MSC_VER)
@ -179,20 +174,38 @@ NAMESPACE_END
#endif
#if _MSC_VER > 1200 || defined(_mm_free)
#define CRYPTOPP_MSVC6PP_OR_LATER // VC 6 processor pack or later
#else
#define CRYPTOPP_MSVC6_NO_PP // VC 6 without processor pack
#endif
#endif
#ifndef CRYPTOPP_L1_CACHE_ALIGN
#ifdef __GNUC__
#define CRYPTOPP_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#endif
#ifndef CRYPTOPP_ALIGN_DATA
#if defined(CRYPTOPP_MSVC6PP_OR_LATER)
#define CRYPTOPP_L1_CACHE_ALIGN(x) __declspec(align(CRYPTOPP_L1_CACHE_LINE_SIZE)) x
#define CRYPTOPP_ALIGN_DATA(x) __declspec(align(x))
#elif defined(__GNUC__)
#define CRYPTOPP_L1_CACHE_ALIGN(x) x __attribute__((aligned(CRYPTOPP_L1_CACHE_LINE_SIZE)))
#else
#define CRYPTOPP_L1_CACHE_ALIGN_NOT_AVAILABLE
#define CRYPTOPP_L1_CACHE_ALIGN(x) x
#define CRYPTOPP_ALIGN_DATA(x) __attribute__((aligned(x)))
#endif
#endif
#ifndef CRYPTOPP_SECTION_ALIGN16
#ifdef __GNUC__
// the alignment attribute doesn't seem to work without this section attribute when -fdata-sections is turned on
#define CRYPTOPP_SECTION_ALIGN16 __attribute__((section ("CryptoPP_Align16")))
#else
#define CRYPTOPP_SECTION_ALIGN16
#endif
#endif
#if defined(_MSC_VER) || defined(__fastcall)
#define CRYPTOPP_FASTCALL __fastcall
#else
#define CRYPTOPP_FASTCALL
#endif
// VC60 workaround: it doesn't allow typename in some places
#if defined(_MSC_VER) && (_MSC_VER < 1300)
#define CPP_TYPENAME
@ -238,9 +251,51 @@ NAMESPACE_END
#define CRYPTOPP_UNCAUGHT_EXCEPTION_AVAILABLE
#endif
#ifdef CRYPTOPP_DISABLE_X86ASM // for backwards compatibility: this macro had both meanings
#define CRYPTOPP_DISABLE_ASM
#define CRYPTOPP_DISABLE_SSE2
#endif
// CodeWarrior defines _MSC_VER
#if !defined(CRYPTOPP_DISABLE_X86ASM) && ((defined(_MSC_VER) && !defined(__MWERKS__) && defined(_M_IX86)) || (defined(__GNUC__) && defined(__i386__)))
#define CRYPTOPP_X86ASM_AVAILABLE
#if !defined(CRYPTOPP_DISABLE_ASM) && ((defined(_MSC_VER) && !defined(__MWERKS__) && defined(_M_IX86)) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))))
#define CRYPTOPP_X86_ASM_AVAILABLE
#if !defined(CRYPTOPP_DISABLE_SSE2) && (defined(CRYPTOPP_MSVC6PP_OR_LATER) || CRYPTOPP_GCC_VERSION >= 30300)
#define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 1
#else
#define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
#endif
// SSSE3 was actually introduced in GNU as 2.17, which was released 6/23/2006, but we can't tell what version of binutils is installed.
// GCC 4.1.2 was released on 2/13/2007, so we'll use that as a proxy for the binutils version.
#if !defined(CRYPTOPP_DISABLE_SSSE3) && (_MSC_VER >= 1400 || CRYPTOPP_GCC_VERSION >= 40102)
#define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 1
#else
#define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0
#endif
#endif
#if !defined(CRYPTOPP_DISABLE_SSE2) && (defined(CRYPTOPP_MSVC6PP_OR_LATER) || defined(__SSE2__))
#define CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE 1
#else
#define CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE 0
#endif
// how to allocate 16-byte aligned memory (for SSE2)
#if defined(CRYPTOPP_MSVC6PP_OR_LATER)
#define CRYPTOPP_MM_MALLOC_AVAILABLE
#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
#define CRYPTOPP_MALLOC_ALIGNMENT_IS_16
#elif defined(__linux__) || defined(__sun__) || defined(__CYGWIN__)
#define CRYPTOPP_MEMALIGN_AVAILABLE
#elif defined(__MINGW32__)
#ifndef _mm_malloc
#define _mm_malloc(a, b) __mingw_aligned_malloc(a, b)
#define _mm_free(a) __mingw_aligned_free(a)
#endif
#define CRYPTOPP_MM_MALLOC_AVAILABLE
#else
#define CRYPTOPP_NO_ALIGNED_ALLOC
#endif
// how to disable inlining
@ -262,21 +317,20 @@ NAMESPACE_END
# define CRYPTOPP_CONSTANT(x) static const int x;
#endif
#ifdef CRYPTOPP_X86ASM_AVAILABLE
#if defined(CRYPTOPP_MSVC6PP_OR_LATER) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 500)) || (defined(__ICL) && (__ICL >= 500))
#define SSE2_INTRINSICS_AVAILABLE
#define CRYPTOPP_MM_MALLOC_AVAILABLE
#endif
// SSE2 intrinsics work in GCC 3.3 or later
#if defined(__SSE2__) && (__GNUC__ > 3 || __GNUC_MINOR__ > 2)
#define SSE2_INTRINSICS_AVAILABLE
// how to allocate 16-byte aligned memory (for SSE2)
#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
#define CRYPTOPP_MALLOC_ALIGNMENT_IS_16
#elif defined(__linux__) || defined(__sun__) || defined(__CYGWIN__)
#define CRYPTOPP_MEMALIGN_AVAILABLE
#endif
#endif
#if defined(_M_X64) || defined(__x86_64__)
#define CRYPTOPP_BOOL_X64 1
#else
#define CRYPTOPP_BOOL_X64 0
#endif
#if defined(_M_IX86) || defined(__i386__)
#define CRYPTOPP_BOOL_X86 1
#else
#define CRYPTOPP_BOOL_X86 0
#endif
#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86
#define CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
#endif
// ***************** determine availability of OS features ********************