Cut-in CRC test for SSE4.2 and ARMv8a

Also see https://groups.google.com/forum/#!topic/cryptopp-users/-1fZCx8JSRE
pull/461/head
Jeffrey Walton 2017-07-29 00:24:07 -04:00
parent b5191dde6f
commit fe9e21ddd7
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
12 changed files with 218 additions and 127 deletions

View File

@ -53,6 +53,7 @@ config.h
cpu.cpp
cpu.h
crc.cpp
crc-simd.cpp
crc.h
cryptdll.vcxproj
cryptdll.vcxproj.filters

View File

@ -2,6 +2,8 @@
##### System Attributes and Programs #####
###########################################################
TEMPDIR ?= /tmp
AR ?= ar
ARFLAGS ?= -cr # ar needs the dash on OpenBSD
RANLIB ?= ranlib
@ -19,8 +21,9 @@ UNAME := $(shell uname)
IS_X86 := $(shell uname -m | $(EGREP) -v "x86_64" | $(EGREP) -i -c "i.86|x86|i86")
IS_X64 := $(shell uname -m | $(EGREP) -i -c "(_64|d64)")
IS_PPC := $(shell uname -m | $(EGREP) -i -c "ppc|power")
IS_ARM32 := $(shell uname -m | $(EGREP) -i -c "arm")
IS_ARM32 := $(shell uname -m | $(EGREP) -v "arm64" | $(EGREP) -i -c "arm")
IS_ARM64 := $(shell uname -m | $(EGREP) -i -c "aarch64")
IS_ARMV8 ?= $(shell uname -m | $(EGREP) -i -c 'aarch32|aarch64')
IS_SPARC := $(shell uname -m | $(EGREP) -i -c "sparc")
IS_SPARC64 := $(shell uname -m | $(EGREP) -i -c "sparc64")
@ -81,12 +84,12 @@ HAS_NEWLIB := $(shell $(CXX) -x c++ $(CXXFLAGS) -dM -E adhoc.cpp.proto 2>&1 | $(
# Base CXXFLAGS used if the user did not specify them
ifeq ($(SUN_COMPILER),1)
ifeq ($(SUNCC_512_OR_LATER),1)
CXXFLAGS ?= -DNDEBUG -g3 -xO2
CXXFLAGS ?= -DNDEBUG -g3 -xO3
else
CXXFLAGS ?= -DNDEBUG -g -xO2
CXXFLAGS ?= -DNDEBUG -g -xO3
endif
else
CXXFLAGS ?= -DNDEBUG -g2 -O2
CXXFLAGS ?= -DNDEBUG -g2 -O3
endif
# Default prefix for make install
@ -191,6 +194,11 @@ endif # -DCRYPTOPP_DISABLE_SSSE3
endif # -DCRYPTOPP_DISABLE_ASM
endif # CXXFLAGS
HAS_CRC := $(shell $(CXX) $(CXXFLAGS) -msse4.2 -o $(TEMPDIR)/t.o -c crc-simd.cpp 2>/dev/null; echo $$?)
ifeq ($(HAS_CRC),0)
CRC_FLAG := -msse4.2
endif
# BEGIN_NATIVE_ARCH
# Guard use of -march=native (or -m{32|64} on some platforms)
# Don't add anything if -march=XXX or -mtune=XXX is specified
@ -280,6 +288,13 @@ CXXFLAGS += -pipe
endif
endif
ifeq ($(IS_ARMV8),1)
HAS_CRC := $(shell $(CXX) $(CXXFLAGS) -march=armv8-a+crc -o $(TEMPDIR)/t.o -c crc-simd.cpp 2>/dev/null; echo $$?)
ifeq ($(HAS_CRC),0)
CRC_FLAG := -march=armv8-a+crc
endif
endif
endif # IS_X86
###########################################################
@ -287,7 +302,7 @@ endif # IS_X86
###########################################################
# For SunOS, create a Mapfile that allows our object files
# to cantain additional bits (like SSE4 and AES on old Xeon)
# to contain additional bits (like SSE4 and AES on old Xeon)
# http://www.oracle.com/technetwork/server-storage/solaris/hwcap-modification-139536.html
ifeq ($(IS_SUN)$(SUN_COMPILER),11)
ifneq ($(IS_X86)$(IS_X32)$(IS_X64),000)
@ -526,7 +541,7 @@ TESTOBJS := $(TESTSRCS:.cpp=.o)
LIBOBJS := $(filter-out $(TESTOBJS),$(OBJS))
# List cryptlib.cpp first, then cpu.cpp, then integer.cpp to tame C++ static initialization problems.
DLLSRCS := cryptlib.cpp cpu.cpp integer.cpp shacal2.cpp md5.cpp shark.cpp zinflate.cpp gf2n.cpp salsa.cpp xtr.cpp oaep.cpp poly1305.cpp polynomi.cpp rc2.cpp default.cpp wait.cpp wake.cpp twofish.cpp iterhash.cpp adler32.cpp elgamal.cpp marss.cpp blowfish.cpp ecp.cpp filters.cpp strciphr.cpp camellia.cpp ida.cpp zlib.cpp des.cpp crc.cpp algparam.cpp dessp.cpp tea.cpp eax.cpp network.cpp emsa2.cpp pkcspad.cpp squaretb.cpp idea.cpp authenc.cpp kalyna.cpp threefish.cpp hmac.cpp zdeflate.cpp xtrcrypt.cpp queue.cpp mars.cpp rc5.cpp blake2.cpp hrtimer.cpp eprecomp.cpp hex.cpp dsa.cpp sha.cpp fips140.cpp gzip.cpp seal.cpp files.cpp base32.cpp vmac.cpp tigertab.cpp sharkbox.cpp safer.cpp randpool.cpp esign.cpp arc4.cpp osrng.cpp skipjack.cpp seed.cpp sha3.cpp sosemanuk.cpp bfinit.cpp rabin.cpp 3way.cpp rw.cpp rdrand.cpp rsa.cpp rdtables.cpp gost.cpp socketft.cpp tftables.cpp nbtheory.cpp panama.cpp modes.cpp rijndael.cpp casts.cpp chacha.cpp gfpcrypt.cpp poly1305.cpp dll.cpp ec2n.cpp blumshub.cpp algebra.cpp basecode.cpp base64.cpp cbcmac.cpp rc6.cpp dh2.cpp gf256.cpp mqueue.cpp misc.cpp pssr.cpp channels.cpp tiger.cpp cast.cpp rng.cpp square.cpp asn.cpp whrlpool.cpp md4.cpp dh.cpp ccm.cpp md2.cpp mqv.cpp gf2_32.cpp ttmac.cpp luc.cpp trdlocal.cpp pubkey.cpp gcm.cpp ripemd.cpp eccrypto.cpp serpent.cpp cmac.cpp
DLLSRCS := cryptlib.cpp cpu.cpp integer.cpp shacal2.cpp md5.cpp shark.cpp zinflate.cpp gf2n.cpp salsa.cpp xtr.cpp oaep.cpp poly1305.cpp polynomi.cpp rc2.cpp default.cpp wait.cpp wake.cpp twofish.cpp iterhash.cpp adler32.cpp elgamal.cpp marss.cpp blowfish.cpp ecp.cpp filters.cpp strciphr.cpp camellia.cpp ida.cpp zlib.cpp des.cpp crc.cpp crc-simd.cpp algparam.cpp dessp.cpp tea.cpp eax.cpp network.cpp emsa2.cpp pkcspad.cpp squaretb.cpp idea.cpp authenc.cpp kalyna.cpp threefish.cpp hmac.cpp zdeflate.cpp xtrcrypt.cpp queue.cpp mars.cpp rc5.cpp blake2.cpp hrtimer.cpp eprecomp.cpp hex.cpp dsa.cpp sha.cpp fips140.cpp gzip.cpp seal.cpp files.cpp base32.cpp vmac.cpp tigertab.cpp sharkbox.cpp safer.cpp randpool.cpp esign.cpp arc4.cpp osrng.cpp skipjack.cpp seed.cpp sha3.cpp sosemanuk.cpp bfinit.cpp rabin.cpp 3way.cpp rw.cpp rdrand.cpp rsa.cpp rdtables.cpp gost.cpp socketft.cpp tftables.cpp nbtheory.cpp panama.cpp modes.cpp rijndael.cpp casts.cpp chacha.cpp gfpcrypt.cpp poly1305.cpp dll.cpp ec2n.cpp blumshub.cpp algebra.cpp basecode.cpp base64.cpp cbcmac.cpp rc6.cpp dh2.cpp gf256.cpp mqueue.cpp misc.cpp pssr.cpp channels.cpp tiger.cpp cast.cpp rng.cpp square.cpp asn.cpp whrlpool.cpp md4.cpp dh.cpp ccm.cpp md2.cpp mqv.cpp gf2_32.cpp ttmac.cpp luc.cpp trdlocal.cpp pubkey.cpp gcm.cpp ripemd.cpp eccrypto.cpp serpent.cpp cmac.cpp
DLLOBJS := $(DLLSRCS:.cpp=.export.o)
# Import lib testing
@ -817,6 +832,10 @@ rdrand-%.o:
./rdrand-nasm.sh
endif
# crc.cpp may have SSE4.2 or ARMv8a available
crc-simd.o : crc-simd.cpp
$(CXX) $(strip $(CXXFLAGS) $(CRC_FLAG) -c) $<
# Don't build Threefish with UBsan on Travis CI. Timeouts cause the build to fail.
# Also see https://stackoverflow.com/q/12983137/608639.
ifeq ($(findstring true,$(CI)),true)

View File

@ -13,13 +13,13 @@
NAMESPACE_BEGIN(CryptoPP)
// Uncomment for benchmarking C++ against SSE2 or NEON
// #undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE
// #undef CRYPTOPP_SSE42_AVAILABLE
// #undef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
// Apple Clang 6.0/Clang 3.5 does not have SSSE3 intrinsics
// http://llvm.org/bugs/show_bug.cgi?id=20213
#if (defined(CRYPTOPP_APPLE_CLANG_VERSION) && (CRYPTOPP_APPLE_CLANG_VERSION <= 60000)) || (defined(CRYPTOPP_LLVM_CLANG_VERSION) && (CRYPTOPP_LLVM_CLANG_VERSION <= 30500))
# undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE
# undef CRYPTOPP_SSE42_AVAILABLE
#endif
// Sun Studio 12.3 and earlier lack SSE2's _mm_set_epi64x. Win32 lacks _mm_set_epi64x, Win64 supplies it except for VS2008.
@ -47,7 +47,7 @@ static void BLAKE2_SSE2_Compress64(const byte* input, BLAKE2_State<word64, true>
# endif
#endif
#if CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE
#if CRYPTOPP_SSE42_AVAILABLE
static void BLAKE2_SSE4_Compress32(const byte* input, BLAKE2_State<word32, false>& state);
static void BLAKE2_SSE4_Compress64(const byte* input, BLAKE2_State<word64, true>& state);
#endif
@ -155,7 +155,7 @@ typedef void (*pfnCompress64)(const byte*, BLAKE2_State<word64, true>&);
pfnCompress64 InitializeCompress64Fn()
{
#if CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE
#if CRYPTOPP_SSE42_AVAILABLE
if (HasSSE4())
return &BLAKE2_SSE4_Compress64;
else
@ -177,7 +177,7 @@ pfnCompress64 InitializeCompress64Fn()
pfnCompress32 InitializeCompress32Fn()
{
#if CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE
#if CRYPTOPP_SSE42_AVAILABLE
if (HasSSE4())
return &BLAKE2_SSE4_Compress32;
else
@ -1929,7 +1929,7 @@ static void BLAKE2_SSE2_Compress64(const byte* input, BLAKE2_State<word64, true>
# endif // (__SUNPRO_CC != 0x5120)
#endif // CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
#if CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE
#if CRYPTOPP_SSE42_AVAILABLE
static void BLAKE2_SSE4_Compress32(const byte* input, BLAKE2_State<word32, false>& state)
{
__m128i row1, row2, row3, row4;
@ -3459,7 +3459,7 @@ static void BLAKE2_SSE4_Compress64(const byte* input, BLAKE2_State<word64, true>
_mm_storeu_si128((__m128i *)(void*)(&state.h[4]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[4])), row2l));
_mm_storeu_si128((__m128i *)(void*)(&state.h[6]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[6])), row2h));
}
#endif // CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE
#endif // CRYPTOPP_SSE42_AVAILABLE
// Disable NEON for Cortex-A53 and A57. Also see http://github.com/weidai11/cryptopp/issues/367
#if CRYPTOPP_BOOL_ARM32 && CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE

112
config.h
View File

@ -386,7 +386,56 @@ NAMESPACE_END
#define CRYPTOPP_UNCAUGHT_EXCEPTION_AVAILABLE
#endif
// Apple's Clang prior to 5.0 cannot handle SSE2 (and Apple does not use LLVM Clang numbering...)
// ***************** Platform and CPU features ********************
// Linux provides X32, which is 32-bit integers, longs and pointers on x86_64 using the full x86_64 register set.
// Detect via __ILP32__ (http://wiki.debian.org/X32Port). However, __ILP32__ shows up in more places than
// the System V ABI specs calls out, like on some Solaris installations and just about any 32-bit system with Clang.
#if (defined(__ILP32__) || defined(_ILP32)) && defined(__x86_64__)
#define CRYPTOPP_BOOL_X32 1
#else
#define CRYPTOPP_BOOL_X32 0
#endif
// see http://predef.sourceforge.net/prearch.html
#if (defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_X86_) || defined(__I86__) || defined(__INTEL__)) && !CRYPTOPP_BOOL_X32
#define CRYPTOPP_BOOL_X86 1
#else
#define CRYPTOPP_BOOL_X86 0
#endif
#if (defined(_M_X64) || defined(__x86_64__)) && !CRYPTOPP_BOOL_X32
#define CRYPTOPP_BOOL_X64 1
#else
#define CRYPTOPP_BOOL_X64 0
#endif
// Undo the ASM and Intrinsic related defines due to X32.
#if CRYPTOPP_BOOL_X32
# undef CRYPTOPP_BOOL_X64
# undef CRYPTOPP_X64_ASM_AVAILABLE
# undef CRYPTOPP_X64_MASM_AVAILABLE
#endif
#if defined(__arm__) || defined(__aarch32__) || defined(_M_ARM)
#define CRYPTOPP_BOOL_ARM32 1
#else
#define CRYPTOPP_BOOL_ARM32 0
#endif
// Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available
#if defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM64)
#define CRYPTOPP_BOOL_ARM64 1
#else
#define CRYPTOPP_BOOL_ARM64 0
#endif
// ***************** IA32 CPU features ********************
#if defined(CRYPTOPP_BOOL_X86) || defined(CRYPTOPP_BOOL_X32) || defined(CRYPTOPP_BOOL_X64)
// Apple Clang prior to 5.0 cannot handle SSE2
#if defined(CRYPTOPP_APPLE_CLANG_VERSION) && (CRYPTOPP_APPLE_CLANG_VERSION < 50000)
# define CRYPTOPP_DISABLE_ASM
#endif
@ -438,9 +487,7 @@ NAMESPACE_END
// MSVC 2008 (http://msdn.microsoft.com/en-us/library/bb892950%28v=vs.90%29.aspx)
// SunCC could generate SSE4 at 12.1, but the intrinsics are missing until 12.4.
#if !defined(CRYPTOPP_DISABLE_ASM) && !defined(CRYPTOPP_DISABLE_SSE4) && !defined(_M_ARM) && ((_MSC_VER >= 1500) || (defined(__SSE4_1__) && defined(__SSE4_2__)))
#define CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE 1
#else
#define CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE 0
#define CRYPTOPP_SSE42_AVAILABLE 1
#endif
// Don't disgorge AES-NI from CLMUL. There will be two to four subtle breaks
@ -456,6 +503,12 @@ NAMESPACE_END
#define CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE 0
#endif
#endif // X86, X32, X64
// ***************** ARM CPU features ********************
#if defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)
// Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under most toolchains.
#if !defined(CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
# if defined(__ARM_NEON__) || defined(__ARM_NEON) || defined(_M_ARM)
@ -467,9 +520,9 @@ NAMESPACE_END
// LLVM Clang requires 3.5. Apple Clang is unknown at the moment.
// Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
#if !defined(CRYPTOPP_ARMV8A_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
# if defined(__ARM_FEATURE_CRC32)
# define CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE 1
# define CRYPTOPP_ARMV8A_CRC32_AVAILABLE 1
# endif
#endif
@ -495,6 +548,10 @@ NAMESPACE_END
# endif
#endif
#endif // ARM32, ARM64
// ***************** Miscellaneous ********************
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE || CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
#define CRYPTOPP_BOOL_ALIGN16 1
#else
@ -536,49 +593,6 @@ NAMESPACE_END
# define CRYPTOPP_CONSTANT(x) static const int x;
#endif
// Linux provides X32, which is 32-bit integers, longs and pointers on x86_64 using the full x86_64 register set.
// Detect via __ILP32__ (http://wiki.debian.org/X32Port). However, __ILP32__ shows up in more places than
// the System V ABI specs calls out, like on some Solaris installations and just about any 32-bit system with Clang.
#if (defined(__ILP32__) || defined(_ILP32)) && defined(__x86_64__)
#define CRYPTOPP_BOOL_X32 1
#else
#define CRYPTOPP_BOOL_X32 0
#endif
// see http://predef.sourceforge.net/prearch.html
#if (defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_X86_) || defined(__I86__) || defined(__INTEL__)) && !CRYPTOPP_BOOL_X32
#define CRYPTOPP_BOOL_X86 1
#else
#define CRYPTOPP_BOOL_X86 0
#endif
#if (defined(_M_X64) || defined(__x86_64__)) && !CRYPTOPP_BOOL_X32
#define CRYPTOPP_BOOL_X64 1
#else
#define CRYPTOPP_BOOL_X64 0
#endif
// Undo the ASM and Intrinsic related defines due to X32.
#if CRYPTOPP_BOOL_X32
# undef CRYPTOPP_BOOL_X64
# undef CRYPTOPP_X64_ASM_AVAILABLE
# undef CRYPTOPP_X64_MASM_AVAILABLE
#endif
#if defined(__arm__) || defined(__aarch32__) || defined(_M_ARM)
#define CRYPTOPP_BOOL_ARM32 1
#else
#define CRYPTOPP_BOOL_ARM32 0
#endif
// Microsoft plans to support ARM-64, but its not clear how to detect it.
// TODO: Add MSC_VER and ARM-64 platform define when available
#if defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM64)
#define CRYPTOPP_BOOL_ARM64 1
#else
#define CRYPTOPP_BOOL_ARM64 0
#endif
// ***************** Initialization and Constructor priorities ********************
// CRYPTOPP_INIT_PRIORITY attempts to manage initialization of C++ static objects.

View File

@ -511,7 +511,7 @@ static bool TryPMULL()
static bool TryCRC32()
{
#if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try
@ -560,7 +560,7 @@ static bool TryCRC32()
# endif
#else
return false;
#endif // CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE
#endif // CRYPTOPP_ARMV8A_CRC32_AVAILABLE
}
static bool TryAES()

2
cpu.h
View File

@ -62,7 +62,7 @@
#if CRYPTOPP_BOOL_SSSE3_INTRINSICS_AVAILABLE
# include <tmmintrin.h> // _mm_shuffle_pi8, _mm_shuffle_epi8
#endif // tmmintrin.h
#if CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE
#if CRYPTOPP_SSE42_AVAILABLE
# include <smmintrin.h> // _mm_blend_epi16
# include <nmmintrin.h> // _mm_crc32_u{8|16|32}
#endif // smmintrin.h

64
crc-simd.cpp Normal file
View File

@ -0,0 +1,64 @@
// crc-simd.cpp - written and placed in the public domain by
// Jeffrey Walton, Uri Blumenthal and Marcel Raad.
//
// The source file uses intrinsics to gain access to SSE4.2 and
// ARMv8a CRC-32 and CRC-32C instructions. A separate source file
// is needed because we need additional CXXFLAGS to enable the
// appropriate instructions sets.
#include "pch.h"
#include "config.h"
#include "misc.h"
#if defined(CRYPTOPP_SSE42_AVAILABLE)
# include "nmmintrin.h"
#endif
#if defined(CRYPTOPP_ARMV8A_CRC32_AVAILABLE) && defined(__GNUC__)
# include "arm_neon.h"
# include "arm_acle.h"
#endif
NAMESPACE_BEGIN(CryptoPP)
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
void CRC32_Update_ARMV8(const byte *s, size_t n, word32& c)
{
for(; !IsAligned<word32>(s) && n > 0; s++, n--)
c = __crc32b(c, *s);
for(; n > 4; s+=4, n-=4)
c = __crc32w(c, *(const word32 *)(void*)s);
for(; n > 0; s++, n--)
c = __crc32b(c, *s);
}
void CRC32C_Update_ARMV8(const byte *s, size_t n, word32& c)
{
for(; !IsAligned<word32>(s) && n > 0; s++, n--)
c = __crc32cb(c, *s);
for(; n > 4; s+=4, n-=4)
c = __crc32cw(c, *(const word32 *)(void*)s);
for(; n > 0; s++, n--)
c = __crc32cb(c, *s);
}
#endif
#if CRYPTOPP_SSE42_AVAILABLE
void CRC32C_Update_SSE42(const byte *s, size_t n, word32& c)
{
for(; !IsAligned<word32>(s) && n > 0; s++, n--)
c = _mm_crc32_u8(c, *s);
for(; n > 4; s+=4, n-=4)
c = _mm_crc32_u32(c, *(const word32 *)(void*)s);
for(; n > 0; s++, n--)
c = _mm_crc32_u8(c, *s);
}
#endif
NAMESPACE_END

72
crc.cpp
View File

@ -7,38 +7,16 @@
NAMESPACE_BEGIN(CryptoPP)
// Visual Studio needs VS2008 (1500)
// http://msdn.microsoft.com/en-us/library/bb531394%28v=vs.90%29.aspx
#if defined(_MSC_VER) && (_MSC_VER < 1500)
# undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE
// crc-simd.cpp
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
extern void CRC32_Update_ARMV8(const byte *s, size_t n, word32& c);
extern void CRC32C_Update_ARMV8(const byte *s, size_t n, word32& c);
#endif
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
#if (CRYPTOPP_GCC_VERSION >= 40300 || __INTEL_COMPILER >= 1000 || __SUNPRO_CC >= 0x5110 || CRYPTOPP_LLVM_CLANG_VERSION >= 20300 || CRYPTOPP_APPLE_CLANG_VERSION >= 40000) && !defined(__SSE4_2__) && !defined(_MSC_VER)
GCC_INLINE unsigned int GCC_INLINE_ATTRIB
MM_CRC32_U8(unsigned int crc, unsigned char val)
{
asm ("crc32 %1, %0" : "+r"(crc) : "r"(val));
return crc;
}
GCC_INLINE unsigned int GCC_INLINE_ATTRIB
MM_CRC32_U16(unsigned int crc, unsigned short val)
{
asm ("crc32 %1, %0" : "+r"(crc) : "r"(val));
return crc;
}
GCC_INLINE unsigned int GCC_INLINE_ATTRIB
MM_CRC32_U32(unsigned int crc, unsigned int val)
{
asm ("crc32 %1, %0" : "+r"(crc) : "r"(val));
return crc;
}
#else
#define MM_CRC32_U8(a,b) _mm_crc32_u8(a,b)
#define MM_CRC32_U16(a,b) _mm_crc32_u16(a,b)
#define MM_CRC32_U32(a,b) _mm_crc32_u32(a,b)
// crc-simd.cpp
#if (CRYPTOPP_SSE42_AVAILABLE)
extern void CRC32C_Update_SSE42(const byte *s, size_t n, word32& c);
#endif
#endif // X86/X32/X64
/* Table of CRC-32's of all single byte values (made by makecrc.c) */
const word32 CRC32::m_tab[] = {
@ -158,18 +136,10 @@ CRC32::CRC32()
void CRC32::Update(const byte *s, size_t n)
{
#if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
if (HasCRC32())
{
for(; !IsAligned<word32>(s) && n > 0; s++, n--)
m_crc = __crc32b(m_crc, *s);
for(; n > 4; s+=4, n-=4)
m_crc = __crc32w(m_crc, *(const word32 *)(void*)s);
for(; n > 0; s++, n--)
m_crc = __crc32b(m_crc, *s);
CRC32_Update_ARMV8(s, n, m_crc);
return;
}
#endif
@ -326,32 +296,16 @@ CRC32C::CRC32C()
void CRC32C::Update(const byte *s, size_t n)
{
#if CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE
#if CRYPTOPP_SSE42_AVAILABLE
if (HasSSE4())
{
for(; !IsAligned<word32>(s) && n > 0; s++, n--)
m_crc = MM_CRC32_U8(m_crc, *s);
for(; n > 4; s+=4, n-=4)
m_crc = MM_CRC32_U32(m_crc, *(const word32 *)(void*)s);
for(; n > 0; s++, n--)
m_crc = MM_CRC32_U8(m_crc, *s);
CRC32C_Update_SSE42(s, n, m_crc);
return;
}
#elif (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
#elif (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
if (HasCRC32())
{
for(; !IsAligned<word32>(s) && n > 0; s++, n--)
m_crc = __crc32cb(m_crc, *s);
for(; n > 4; s+=4, n-=4)
m_crc = __crc32cw(m_crc, *(const word32 *)(void*)s);
for(; n > 0; s++, n--)
m_crc = __crc32cb(m_crc, *s);
CRC32C_Update_ARMV8(s, n, m_crc);
return;
}
#endif

View File

@ -48,9 +48,9 @@
# If you use 'make sources' from Linux makefile, then add 'winpipes.cpp'. Platform specific
# classes, like 'rdrand.cpp', should not be included. Add them under the X86 and X64 rules.
LIB_SRCS = cryptlib.cpp cpu.cpp integer.cpp shacal2.cpp md5.cpp shark.cpp zinflate.cpp gf2n.cpp salsa.cpp xtr.cpp oaep.cpp rc2.cpp default.cpp wait.cpp wake.cpp twofish.cpp iterhash.cpp adler32.cpp algparam.cpp marss.cpp blowfish.cpp ecp.cpp strciphr.cpp aria.cpp camellia.cpp dh2.cpp ida.cpp zlib.cpp elgamal.cpp crc.cpp dessp.cpp tea.cpp eax.cpp network.cpp sha.cpp emsa2.cpp pkcspad.cpp squaretb.cpp idea.cpp authenc.cpp hmac.cpp xtrcrypt.cpp queue.cpp mars.cpp rc5.cpp md2.cpp hrtimer.cpp vmac.cpp eprecomp.cpp hex.cpp dsa.cpp fips140.cpp gzip.cpp seal.cpp blake2.cpp files.cpp base32.cpp sharkbox.cpp safer.cpp randpool.cpp sosemanuk.cpp arc4.cpp osrng.cpp skipjack.cpp seed.cpp sha3.cpp filters.cpp bfinit.cpp rabin.cpp 3way.cpp rw.cpp rdtables.cpp rsa.cpp tftables.cpp gost.cpp socketft.cpp nbtheory.cpp panama.cpp modes.cpp rijndael.cpp casts.cpp algebra.cpp esign.cpp gfpcrypt.cpp dll.cpp ec2n.cpp poly1305.cpp polynomi.cpp blumshub.cpp des.cpp basecode.cpp zdeflate.cpp base64.cpp rc6.cpp gf256.cpp mqueue.cpp misc.cpp pssr.cpp channels.cpp rng.cpp threefish.cpp tiger.cpp cast.cpp square.cpp asn.cpp chacha.cpp whrlpool.cpp md4.cpp dh.cpp ccm.cpp mqv.cpp tigertab.cpp gf2_32.cpp cbcmac.cpp ttmac.cpp luc.cpp trdlocal.cpp pubkey.cpp gcm.cpp ripemd.cpp kalyna.cpp kalynatab.cpp keccak.cpp eccrypto.cpp serpent.cpp cmac.cpp winpipes.cpp
LIB_SRCS = cryptlib.cpp cpu.cpp integer.cpp shacal2.cpp md5.cpp shark.cpp zinflate.cpp gf2n.cpp salsa.cpp xtr.cpp oaep.cpp rc2.cpp default.cpp wait.cpp wake.cpp twofish.cpp iterhash.cpp adler32.cpp algparam.cpp marss.cpp blowfish.cpp ecp.cpp strciphr.cpp aria.cpp camellia.cpp dh2.cpp ida.cpp zlib.cpp elgamal.cpp crc.cpp crc-simd.cpp dessp.cpp tea.cpp eax.cpp network.cpp sha.cpp emsa2.cpp pkcspad.cpp squaretb.cpp idea.cpp authenc.cpp hmac.cpp xtrcrypt.cpp queue.cpp mars.cpp rc5.cpp md2.cpp hrtimer.cpp vmac.cpp eprecomp.cpp hex.cpp dsa.cpp fips140.cpp gzip.cpp seal.cpp blake2.cpp files.cpp base32.cpp sharkbox.cpp safer.cpp randpool.cpp sosemanuk.cpp arc4.cpp osrng.cpp skipjack.cpp seed.cpp sha3.cpp filters.cpp bfinit.cpp rabin.cpp 3way.cpp rw.cpp rdtables.cpp rsa.cpp tftables.cpp gost.cpp socketft.cpp nbtheory.cpp panama.cpp modes.cpp rijndael.cpp casts.cpp algebra.cpp esign.cpp gfpcrypt.cpp dll.cpp ec2n.cpp poly1305.cpp polynomi.cpp blumshub.cpp des.cpp basecode.cpp zdeflate.cpp base64.cpp rc6.cpp gf256.cpp mqueue.cpp misc.cpp pssr.cpp channels.cpp rng.cpp threefish.cpp tiger.cpp cast.cpp square.cpp asn.cpp chacha.cpp whrlpool.cpp md4.cpp dh.cpp ccm.cpp mqv.cpp tigertab.cpp gf2_32.cpp cbcmac.cpp ttmac.cpp luc.cpp trdlocal.cpp pubkey.cpp gcm.cpp ripemd.cpp kalyna.cpp kalynatab.cpp keccak.cpp eccrypto.cpp serpent.cpp cmac.cpp winpipes.cpp
LIB_OBJS = cryptlib.obj cpu.obj integer.obj shacal2.obj md5.obj shark.obj zinflate.obj gf2n.obj salsa.obj xtr.obj oaep.obj rc2.obj default.obj wait.obj wake.obj twofish.obj iterhash.obj adler32.obj algparam.obj marss.obj blowfish.obj ecp.obj strciphr.obj aria.obj camellia.obj dh2.obj ida.obj zlib.obj elgamal.obj crc.obj dessp.obj tea.obj eax.obj network.obj sha.obj emsa2.obj pkcspad.obj squaretb.obj idea.obj authenc.obj hmac.obj xtrcrypt.obj queue.obj mars.obj rc5.obj md2.obj hrtimer.obj vmac.obj eprecomp.obj hex.obj dsa.obj fips140.obj gzip.obj seal.obj blake2.obj files.obj base32.obj sharkbox.obj safer.obj randpool.obj sosemanuk.obj arc4.obj osrng.obj skipjack.obj seed.obj sha3.obj filters.obj bfinit.obj rabin.obj 3way.obj rw.obj rdtables.obj rsa.obj tftables.obj gost.obj socketft.obj nbtheory.obj panama.obj modes.obj rijndael.obj casts.obj algebra.obj esign.obj gfpcrypt.obj dll.obj ec2n.obj poly1305.obj polynomi.obj blumshub.obj des.obj basecode.obj zdeflate.obj base64.obj rc6.obj gf256.obj mqueue.obj misc.obj pssr.obj channels.obj rng.obj threefish.obj tiger.obj cast.obj square.obj asn.obj chacha.obj whrlpool.obj md4.obj dh.obj ccm.obj mqv.obj tigertab.obj gf2_32.obj cbcmac.obj ttmac.obj luc.obj trdlocal.obj pubkey.obj gcm.obj ripemd.obj kalyna.obj kalynatab.obj keccak.obj eccrypto.obj serpent.obj cmac.obj winpipes.obj
LIB_OBJS = cryptlib.obj cpu.obj integer.obj shacal2.obj md5.obj shark.obj zinflate.obj gf2n.obj salsa.obj xtr.obj oaep.obj rc2.obj default.obj wait.obj wake.obj twofish.obj iterhash.obj adler32.obj algparam.obj marss.obj blowfish.obj ecp.obj strciphr.obj aria.obj camellia.obj dh2.obj ida.obj zlib.obj elgamal.obj crc.obj crc-simd.obj dessp.obj tea.obj eax.obj network.obj sha.obj emsa2.obj pkcspad.obj squaretb.obj idea.obj authenc.obj hmac.obj xtrcrypt.obj queue.obj mars.obj rc5.obj md2.obj hrtimer.obj vmac.obj eprecomp.obj hex.obj dsa.obj fips140.obj gzip.obj seal.obj blake2.obj files.obj base32.obj sharkbox.obj safer.obj randpool.obj sosemanuk.obj arc4.obj osrng.obj skipjack.obj seed.obj sha3.obj filters.obj bfinit.obj rabin.obj 3way.obj rw.obj rdtables.obj rsa.obj tftables.obj gost.obj socketft.obj nbtheory.obj panama.obj modes.obj rijndael.obj casts.obj algebra.obj esign.obj gfpcrypt.obj dll.obj ec2n.obj poly1305.obj polynomi.obj blumshub.obj des.obj basecode.obj zdeflate.obj base64.obj rc6.obj gf256.obj mqueue.obj misc.obj pssr.obj channels.obj rng.obj threefish.obj tiger.obj cast.obj square.obj asn.obj chacha.obj whrlpool.obj md4.obj dh.obj ccm.obj mqv.obj tigertab.obj gf2_32.obj cbcmac.obj ttmac.obj luc.obj trdlocal.obj pubkey.obj gcm.obj ripemd.obj kalyna.obj kalynatab.obj keccak.obj eccrypto.obj serpent.obj cmac.obj winpipes.obj
TEST_SRCS = bench1.cpp bench2.cpp test.cpp validat0.cpp validat1.cpp validat2.cpp validat3.cpp datatest.cpp regtest1.cpp regtest2.cpp regtest3.cpp fipsalgt.cpp dlltest.cpp fipstest.cpp

View File

@ -1198,6 +1198,44 @@ if [[ ("$HAVE_DISASS" -ne "0" && ("$IS_X86" -ne "0" || "$IS_X64" -ne "0")) ]]; t
fi
fi
############################################
# Test CRC-32C code generation
"$CXX" -DCRYPTOPP_ADHOC_MAIN -msse4.2 adhoc.cpp -o "$TMP/adhoc.exe" > /dev/null 2>&1
if [[ "$?" -eq "0" ]]; then
X86_CRC32=1
fi
if [[ ("$X86_CRC32" -ne "0") ]]; then
echo
echo "************************************" | tee -a "$TEST_RESULTS"
echo "Testing: X86 CRC32 code generation" | tee -a "$TEST_RESULTS"
echo
OBJFILE=crc-simd.o; rm -f "$OBJFILE" 2>/dev/null
CXX="$CXX" CXXFLAGS="$RELEASE_CXXFLAGS -DDISABLE_NATIVE_ARCH=1" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"
COUNT=0
FAILED=0
DISASS_TEXT=$("$DISASS" "${DISASSARGS[@]}" "$OBJFILE" 2>/dev/null)
COUNT=$(echo -n "$DISASS_TEXT" | "$GREP" -i -c crc32b)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate crc32b instruction" | tee -a "$TEST_RESULTS"
fi
COUNT=$(echo -n "$DISASS_TEXT" | "$GREP" -i -c crc32l)
if [[ ("$COUNT" -eq "0") ]]; then
FAILED=1
echo "ERROR: failed to generate crc32l instruction" | tee -a "$TEST_RESULTS"
fi
if [[ ("$FAILED" -eq "0") ]]; then
echo "Verified crc32b and crc32l machine instructions" | tee -a "$TEST_RESULTS"
fi
fi
############################################
# Test AES-NI code generation

View File

@ -176,6 +176,7 @@
<ClCompile Include="channels.cpp" />
<ClCompile Include="cmac.cpp" />
<ClCompile Include="crc.cpp" />
<ClCompile Include="crc-simd.cpp" />
<ClCompile Include="default.cpp" />
<ClCompile Include="des.cpp" />
<ClCompile Include="dessp.cpp" />

View File

@ -223,7 +223,7 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, c
word32 *rk = m_key;
#if (CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE && CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE && (!defined(_MSC_VER) || _MSC_VER >= 1600 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32))
#if (CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE && CRYPTOPP_SSE42_AVAILABLE && (!defined(_MSC_VER) || _MSC_VER >= 1600 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32))
// MSVC 2008 SP1 generates bad code for _mm_extract_epi32() when compiling for X64
if (HasAESNI() && HasSSE4())
{