From a30a7f4ed306c92c11304bb9d7ce5b641f557597 Mon Sep 17 00:00:00 2001 From: weidai Date: Mon, 16 Apr 2007 00:39:09 +0000 Subject: [PATCH] changes to support optimizations --- config.h | 160 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 107 insertions(+), 53 deletions(-) diff --git a/config.h b/config.h index 8937a191..82fd4951 100644 --- a/config.h +++ b/config.h @@ -114,7 +114,7 @@ typedef unsigned int word32; #define W64LIT(x) x##ui64 #endif -// define largest word type +// define large word type, used for file offsets and such #ifdef WORD64_AVAILABLE typedef word64 lword; const lword LWORD_MAX = W64LIT(0)-1; @@ -123,24 +123,26 @@ typedef unsigned int word32; const lword LWORD_MAX = lword(0)-1; #endif -#if defined(__alpha__) || defined(__ia64__) || defined(_ARCH_PPC64) || defined(__x86_64__) || defined(__mips64) || defined(_M_X64) - // These platforms have 64-bit CPU registers. Unfortunately most C++ compilers doesn't - // allow any way to access the 64-bit by 64-bit multiply instruction without using - // assembly, so in order to use word64 as word, the assembly instruction must be defined - // in Dword::Multiply(). - #if defined(__SUNPRO_CC) // no Dword::Multiply() for these compilers yet - #define CRYPTOPP_NATIVE_DWORD_AVAILABLE - typedef word16 hword; - typedef word32 word; - typedef word64 dword; - #else - typedef word32 hword; - typedef word64 word; - #endif +// define hword, word, and dword. these are used for multiprecision integer arithmetic +// Intel compiler won't have _umul128 until version 10.0. See http://softwarecommunity.intel.com/isn/Community/en-US/forums/thread/30231625.aspx +#if (defined(_MSC_VER) && (!defined(__INTEL_COMPILER) || __INTEL_COMPILER >= 1000) && (defined(_M_X64) || defined(_M_IA64))) || (defined(__DECCXX) && defined(__alpha__)) + typedef word32 hword; + typedef word64 word; #else #define CRYPTOPP_NATIVE_DWORD_AVAILABLE - #ifdef WORD64_AVAILABLE - #define CRYPTOPP_SLOW_WORD64 // defined this if your CPU is not 64-bit to use alternative code that avoids word64 + #if defined(__alpha__) || defined(__ia64__) || defined(_ARCH_PPC64) || defined(__x86_64__) || defined(__mips64) || defined(__sparc64__) + #if defined(__GNUC__) + typedef word32 hword; + typedef word64 word; + typedef __uint128_t dword; + #else + // if we're here, it means we're on a 64-bit CPU but we don't have a way to obtain 128-bit multiplication results + typedef word16 hword; + typedef word32 word; + typedef word64 dword; + #endif + #elif defined(WORD64_AVAILABLE) + #define CRYPTOPP_SLOW_WORD64 // use alternative code that avoids word64 typedef word16 hword; typedef word32 word; typedef word64 dword; @@ -156,21 +158,14 @@ const unsigned int WORD_BITS = WORD_SIZE * 8; NAMESPACE_END -#if defined(_MSC_VER) // || defined(__BORLANDC__) intrinsics don't work on BCB 2006 - #define INTEL_INTRINSICS - #define FAST_ROTATE -#elif defined(__MWERKS__) && TARGET_CPU_PPC - #define PPC_INTRINSICS - #define FAST_ROTATE -#elif defined(__GNUC__) && defined(__i386__) - // GCC does peephole optimizations which should result in using rotate instructions - #define FAST_ROTATE -#endif - #ifndef CRYPTOPP_L1_CACHE_LINE_SIZE // This should be a lower bound on the L1 cache line size. It's used for defense against timing attacks. - // L1 cache line size is 32 on Pentium III and earlier - #define CRYPTOPP_L1_CACHE_LINE_SIZE 32 + #if defined(_M_X64) || defined(__x86_64__) + #define CRYPTOPP_L1_CACHE_LINE_SIZE 64 + #else + // L1 cache line size is 32 on Pentium III and earlier + #define CRYPTOPP_L1_CACHE_LINE_SIZE 32 + #endif #endif #if defined(_MSC_VER) @@ -179,20 +174,38 @@ NAMESPACE_END #endif #if _MSC_VER > 1200 || defined(_mm_free) #define CRYPTOPP_MSVC6PP_OR_LATER // VC 6 processor pack or later + #else + #define CRYPTOPP_MSVC6_NO_PP // VC 6 without processor pack #endif #endif -#ifndef CRYPTOPP_L1_CACHE_ALIGN +#ifdef __GNUC__ + #define CRYPTOPP_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#endif + +#ifndef CRYPTOPP_ALIGN_DATA #if defined(CRYPTOPP_MSVC6PP_OR_LATER) - #define CRYPTOPP_L1_CACHE_ALIGN(x) __declspec(align(CRYPTOPP_L1_CACHE_LINE_SIZE)) x + #define CRYPTOPP_ALIGN_DATA(x) __declspec(align(x)) #elif defined(__GNUC__) - #define CRYPTOPP_L1_CACHE_ALIGN(x) x __attribute__((aligned(CRYPTOPP_L1_CACHE_LINE_SIZE))) - #else - #define CRYPTOPP_L1_CACHE_ALIGN_NOT_AVAILABLE - #define CRYPTOPP_L1_CACHE_ALIGN(x) x + #define CRYPTOPP_ALIGN_DATA(x) __attribute__((aligned(x))) #endif #endif +#ifndef CRYPTOPP_SECTION_ALIGN16 + #ifdef __GNUC__ + // the alignment attribute doesn't seem to work without this section attribute when -fdata-sections is turned on + #define CRYPTOPP_SECTION_ALIGN16 __attribute__((section ("CryptoPP_Align16"))) + #else + #define CRYPTOPP_SECTION_ALIGN16 + #endif +#endif + +#if defined(_MSC_VER) || defined(__fastcall) + #define CRYPTOPP_FASTCALL __fastcall +#else + #define CRYPTOPP_FASTCALL +#endif + // VC60 workaround: it doesn't allow typename in some places #if defined(_MSC_VER) && (_MSC_VER < 1300) #define CPP_TYPENAME @@ -238,9 +251,51 @@ NAMESPACE_END #define CRYPTOPP_UNCAUGHT_EXCEPTION_AVAILABLE #endif +#ifdef CRYPTOPP_DISABLE_X86ASM // for backwards compatibility: this macro had both meanings +#define CRYPTOPP_DISABLE_ASM +#define CRYPTOPP_DISABLE_SSE2 +#endif + // CodeWarrior defines _MSC_VER -#if !defined(CRYPTOPP_DISABLE_X86ASM) && ((defined(_MSC_VER) && !defined(__MWERKS__) && defined(_M_IX86)) || (defined(__GNUC__) && defined(__i386__))) -#define CRYPTOPP_X86ASM_AVAILABLE +#if !defined(CRYPTOPP_DISABLE_ASM) && ((defined(_MSC_VER) && !defined(__MWERKS__) && defined(_M_IX86)) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))) + #define CRYPTOPP_X86_ASM_AVAILABLE + + #if !defined(CRYPTOPP_DISABLE_SSE2) && (defined(CRYPTOPP_MSVC6PP_OR_LATER) || CRYPTOPP_GCC_VERSION >= 30300) + #define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 1 + #else + #define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0 + #endif + + // SSSE3 was actually introduced in GNU as 2.17, which was released 6/23/2006, but we can't tell what version of binutils is installed. + // GCC 4.1.2 was released on 2/13/2007, so we'll use that as a proxy for the binutils version. + #if !defined(CRYPTOPP_DISABLE_SSSE3) && (_MSC_VER >= 1400 || CRYPTOPP_GCC_VERSION >= 40102) + #define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 1 + #else + #define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0 + #endif +#endif + +#if !defined(CRYPTOPP_DISABLE_SSE2) && (defined(CRYPTOPP_MSVC6PP_OR_LATER) || defined(__SSE2__)) + #define CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE 1 +#else + #define CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE 0 +#endif + +// how to allocate 16-byte aligned memory (for SSE2) +#if defined(CRYPTOPP_MSVC6PP_OR_LATER) + #define CRYPTOPP_MM_MALLOC_AVAILABLE +#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) + #define CRYPTOPP_MALLOC_ALIGNMENT_IS_16 +#elif defined(__linux__) || defined(__sun__) || defined(__CYGWIN__) + #define CRYPTOPP_MEMALIGN_AVAILABLE +#elif defined(__MINGW32__) + #ifndef _mm_malloc + #define _mm_malloc(a, b) __mingw_aligned_malloc(a, b) + #define _mm_free(a) __mingw_aligned_free(a) + #endif + #define CRYPTOPP_MM_MALLOC_AVAILABLE +#else + #define CRYPTOPP_NO_ALIGNED_ALLOC #endif // how to disable inlining @@ -262,21 +317,20 @@ NAMESPACE_END # define CRYPTOPP_CONSTANT(x) static const int x; #endif -#ifdef CRYPTOPP_X86ASM_AVAILABLE - #if defined(CRYPTOPP_MSVC6PP_OR_LATER) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 500)) || (defined(__ICL) && (__ICL >= 500)) - #define SSE2_INTRINSICS_AVAILABLE - #define CRYPTOPP_MM_MALLOC_AVAILABLE - #endif - // SSE2 intrinsics work in GCC 3.3 or later - #if defined(__SSE2__) && (__GNUC__ > 3 || __GNUC_MINOR__ > 2) - #define SSE2_INTRINSICS_AVAILABLE - // how to allocate 16-byte aligned memory (for SSE2) - #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) - #define CRYPTOPP_MALLOC_ALIGNMENT_IS_16 - #elif defined(__linux__) || defined(__sun__) || defined(__CYGWIN__) - #define CRYPTOPP_MEMALIGN_AVAILABLE - #endif - #endif +#if defined(_M_X64) || defined(__x86_64__) + #define CRYPTOPP_BOOL_X64 1 +#else + #define CRYPTOPP_BOOL_X64 0 +#endif + +#if defined(_M_IX86) || defined(__i386__) + #define CRYPTOPP_BOOL_X86 1 +#else + #define CRYPTOPP_BOOL_X86 0 +#endif + +#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 + #define CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS #endif // ***************** determine availability of OS features ********************