Fixed ARMv7a and NEON detection. Initial cut-in of GCM
parent
4b51eadc73
commit
b4f6882237
10
GNUmakefile
10
GNUmakefile
|
|
@ -2,8 +2,12 @@
|
||||||
##### System Attributes and Programs #####
|
##### System Attributes and Programs #####
|
||||||
###########################################################
|
###########################################################
|
||||||
|
|
||||||
|
# If needed
|
||||||
TMPDIR ?= /tmp
|
TMPDIR ?= /tmp
|
||||||
|
# Used for ARMv7 and NEON.
|
||||||
|
FP_ABI ?= hard
|
||||||
|
|
||||||
|
# Command ard arguments
|
||||||
AR ?= ar
|
AR ?= ar
|
||||||
ARFLAGS ?= -cr # ar needs the dash on OpenBSD
|
ARFLAGS ?= -cr # ar needs the dash on OpenBSD
|
||||||
RANLIB ?= ranlib
|
RANLIB ?= ranlib
|
||||||
|
|
@ -297,7 +301,7 @@ endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(IS_NEON),1)
|
ifeq ($(IS_NEON),1)
|
||||||
NEON_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv7-a -mfloat-abi=softfp -mfpu=neon -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv7-a -mfloat-abi=softfp -mfpu=neon")
|
NEON_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon")
|
||||||
GCM_FLAG = $(NEON_FLAG)
|
GCM_FLAG = $(NEON_FLAG)
|
||||||
ARIA_FLAG = $(NEON_FLAG)
|
ARIA_FLAG = $(NEON_FLAG)
|
||||||
BLAKE2_FLAG = $(NEON_FLAG)
|
BLAKE2_FLAG = $(NEON_FLAG)
|
||||||
|
|
@ -868,6 +872,10 @@ blake2-simd.o : blake2-simd.cpp
|
||||||
crc-simd.o : crc-simd.cpp
|
crc-simd.o : crc-simd.cpp
|
||||||
$(CXX) $(strip $(CXXFLAGS) $(CRC_FLAG) -c) $<
|
$(CXX) $(strip $(CXXFLAGS) $(CRC_FLAG) -c) $<
|
||||||
|
|
||||||
|
# PCLMUL or ARMv7a/ARMv8a available
|
||||||
|
gcm-simd.o : gcm-simd.cpp
|
||||||
|
$(CXX) $(strip $(CXXFLAGS) $(GCM_FLAG) -c) $<
|
||||||
|
|
||||||
# SSE4.2/SHA-NI or ARMv8a available
|
# SSE4.2/SHA-NI or ARMv8a available
|
||||||
sha-simd.o : sha-simd.cpp
|
sha-simd.o : sha-simd.cpp
|
||||||
$(CXX) $(strip $(CXXFLAGS) $(SHA_FLAG) -c) $<
|
$(CXX) $(strip $(CXXFLAGS) $(SHA_FLAG) -c) $<
|
||||||
|
|
|
||||||
21
config.h
21
config.h
|
|
@ -517,7 +517,8 @@ NAMESPACE_END
|
||||||
|
|
||||||
// Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under most toolchains.
|
// Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under most toolchains.
|
||||||
#if !defined(CRYPTOPP_ARM_NEON_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
|
#if !defined(CRYPTOPP_ARM_NEON_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
|
||||||
# if defined(__ARM_NEON__) || defined(__ARM_NEON) || (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
|
# if defined(__ARM_NEON__) || defined(__ARM_NEON) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) || \
|
||||||
|
(CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
|
||||||
# define CRYPTOPP_ARM_NEON_AVAILABLE 1
|
# define CRYPTOPP_ARM_NEON_AVAILABLE 1
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -527,17 +528,21 @@ NAMESPACE_END
|
||||||
// Microsoft plans to support ARM-64, but its not clear how to detect it.
|
// Microsoft plans to support ARM-64, but its not clear how to detect it.
|
||||||
// TODO: Add MSC_VER and ARM-64 platform define when available
|
// TODO: Add MSC_VER and ARM-64 platform define when available
|
||||||
#if !defined(CRYPTOPP_ARMV8A_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
|
#if !defined(CRYPTOPP_ARMV8A_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
|
||||||
# if defined(__ARM_FEATURE_CRC32) || (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
|
# if defined(__ARM_FEATURE_CRC32) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \
|
||||||
|
(defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \
|
||||||
|
(defined(__AARCH32EL__) || defined(__AARCH64EL__))
|
||||||
# define CRYPTOPP_ARMV8A_CRC32_AVAILABLE 1
|
# define CRYPTOPP_ARMV8A_CRC32_AVAILABLE 1
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Requires ARMv8, ACLE 2.0 and Aarch64. GCC requires 4.8 and above.
|
// Requires ARMv8, but we are not sure of the define because the ACLE does not discuss it.
|
||||||
// LLVM Clang requires 3.5. Apple Clang does not support it at the moment.
|
// GCC seems to requires 4.8 and above. LLVM Clang requires 3.5. Apple Clang does not support
|
||||||
// Microsoft plans to support ARM-64, but its not clear how to detect it.
|
// it at the moment. Microsoft plans to support ARM-64, but its not clear how to detect it.
|
||||||
// TODO: Add MSC_VER and ARM-64 platform define when available
|
// TODO: Add MSC_VER and ARM-64 platform define when available
|
||||||
#if !defined(CRYPTOPP_ARMV8A_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__apple_build_version__)
|
#if !defined(CRYPTOPP_ARMV8A_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__apple_build_version__)
|
||||||
# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
|
# if defined(__ARM_FEATURE_CRYPTO) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \
|
||||||
|
(defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \
|
||||||
|
(defined(__AARCH32EL__) || defined(__AARCH64EL__))
|
||||||
# define CRYPTOPP_ARMV8A_PMULL_AVAILABLE 1
|
# define CRYPTOPP_ARMV8A_PMULL_AVAILABLE 1
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -547,7 +552,9 @@ NAMESPACE_END
|
||||||
// Microsoft plans to support ARM-64, but its not clear how to detect it.
|
// Microsoft plans to support ARM-64, but its not clear how to detect it.
|
||||||
// TODO: Add MSC_VER and ARM-64 platform define when available
|
// TODO: Add MSC_VER and ARM-64 platform define when available
|
||||||
#if !defined(CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
|
#if !defined(CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
|
||||||
# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
|
# if defined(__ARM_FEATURE_CRYPTO) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \
|
||||||
|
(defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \
|
||||||
|
(defined(__AARCH32EL__) || defined(__AARCH64EL__))
|
||||||
# define CRYPTOPP_ARMV8A_AES_AVAILABLE 1
|
# define CRYPTOPP_ARMV8A_AES_AVAILABLE 1
|
||||||
# define CRYPTOPP_ARMV8A_SHA_AVAILABLE 1
|
# define CRYPTOPP_ARMV8A_SHA_AVAILABLE 1
|
||||||
# define CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE 1
|
# define CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE 1
|
||||||
|
|
|
||||||
72
cpu.cpp
72
cpu.cpp
|
|
@ -344,12 +344,6 @@ extern bool CPU_TryPMULL_ARMV8();
|
||||||
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
||||||
extern "C"
|
extern "C"
|
||||||
{
|
{
|
||||||
static jmp_buf s_jmpNoPMULL;
|
|
||||||
static void SigIllHandlerPMULL(int)
|
|
||||||
{
|
|
||||||
longjmp(s_jmpNoPMULL, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static jmp_buf s_jmpNoAES;
|
static jmp_buf s_jmpNoAES;
|
||||||
static void SigIllHandlerAES(int)
|
static void SigIllHandlerAES(int)
|
||||||
{
|
{
|
||||||
|
|
@ -360,8 +354,8 @@ extern "C"
|
||||||
|
|
||||||
static bool TryNEON()
|
static bool TryNEON()
|
||||||
{
|
{
|
||||||
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
|
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
||||||
return CPU_TryCRC32_ARMV8();
|
return CPU_TryNEON_ARM();
|
||||||
#else
|
#else
|
||||||
return false;
|
return false;
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -379,68 +373,10 @@ static bool TryCRC32()
|
||||||
static bool TryPMULL()
|
static bool TryPMULL()
|
||||||
{
|
{
|
||||||
#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE)
|
#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE)
|
||||||
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
return CPU_TryPMULL_ARMV8();
|
||||||
volatile bool result = true;
|
|
||||||
__try
|
|
||||||
{
|
|
||||||
const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
|
|
||||||
const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
|
|
||||||
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
|
||||||
|
|
||||||
const poly128_t r1 = vmull_p64(a1, b1);
|
|
||||||
const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2));
|
|
||||||
|
|
||||||
// Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233.
|
|
||||||
const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum}
|
|
||||||
const uint64x2_t& t2 = (uint64x2_t)(r2); // {bignum,bignum}
|
|
||||||
|
|
||||||
result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
|
|
||||||
vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
|
|
||||||
}
|
|
||||||
__except (EXCEPTION_EXECUTE_HANDLER)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
# else
|
|
||||||
// longjmp and clobber warnings. Volatile is required.
|
|
||||||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
|
||||||
volatile bool result = true;
|
|
||||||
|
|
||||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL);
|
|
||||||
if (oldHandler == SIG_ERR)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
volatile sigset_t oldMask;
|
|
||||||
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (setjmp(s_jmpNoPMULL))
|
|
||||||
result = false;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
|
|
||||||
const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
|
|
||||||
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
|
||||||
|
|
||||||
const poly128_t r1 = vmull_p64(a1, b1);
|
|
||||||
const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2));
|
|
||||||
|
|
||||||
// Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233.
|
|
||||||
const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum}
|
|
||||||
const uint64x2_t& t2 = (uint64x2_t)(r2); // {bignum,bignum}
|
|
||||||
|
|
||||||
result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
|
|
||||||
vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
|
|
||||||
}
|
|
||||||
|
|
||||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
|
||||||
signal(SIGILL, oldHandler);
|
|
||||||
return result;
|
|
||||||
# endif
|
|
||||||
#else
|
#else
|
||||||
return false;
|
return false;
|
||||||
#endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool TryAES()
|
static bool TryAES()
|
||||||
|
|
|
||||||
|
|
@ -14,10 +14,12 @@
|
||||||
# include "nmmintrin.h"
|
# include "nmmintrin.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE) && defined(__GNUC__)
|
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
|
||||||
# include "arm_neon.h"
|
# include "arm_neon.h"
|
||||||
|
#if defined(__GNUC__)
|
||||||
# include "arm_acle.h"
|
# include "arm_acle.h"
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
||||||
# include <signal.h>
|
# include <signal.h>
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,118 @@
|
||||||
|
// crc-simd.cpp - written and placed in the public domain by
|
||||||
|
// Jeffrey Walton, Uri Blumenthal and Marcel Raad.
|
||||||
|
//
|
||||||
|
// This source file uses intrinsics to gain access to SSE4.2 and
|
||||||
|
// ARMv8a CRC-32 and CRC-32C instructions. A separate source file
|
||||||
|
// is needed because additional CXXFLAGS are required to enable
|
||||||
|
// the appropriate instructions sets in some build configurations.
|
||||||
|
|
||||||
|
#include "pch.h"
|
||||||
|
#include "config.h"
|
||||||
|
#include "misc.h"
|
||||||
|
|
||||||
|
#if (CRYPTOPP_AESNI_AVAILABLE)
|
||||||
|
# include "wmmintrin.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
||||||
|
# include "arm_neon.h"
|
||||||
|
#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE)
|
||||||
|
# include "arm_acle.h"
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
||||||
|
# include <signal.h>
|
||||||
|
# include <setjmp.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
NAMESPACE_BEGIN(CryptoPP)
|
||||||
|
|
||||||
|
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
||||||
|
extern "C" {
|
||||||
|
typedef void (*SigHandler)(int);
|
||||||
|
|
||||||
|
static jmp_buf s_jmpSIGILL;
|
||||||
|
static void SigIllHandler(int)
|
||||||
|
{
|
||||||
|
longjmp(s_jmpSIGILL, 1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
||||||
|
|
||||||
|
#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE)
|
||||||
|
bool CPU_TryPMULL_ARMV8()
|
||||||
|
{
|
||||||
|
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
||||||
|
volatile bool result = true;
|
||||||
|
__try
|
||||||
|
{
|
||||||
|
const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
|
||||||
|
const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
|
||||||
|
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
||||||
|
|
||||||
|
const poly128_t r1 = vmull_p64(a1, b1);
|
||||||
|
const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2));
|
||||||
|
|
||||||
|
// Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233.
|
||||||
|
const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum}
|
||||||
|
const uint64x2_t& t2 = (uint64x2_t)(r2); // {bignum,bignum}
|
||||||
|
|
||||||
|
result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
|
||||||
|
vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
|
||||||
|
}
|
||||||
|
__except (EXCEPTION_EXECUTE_HANDLER)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
# else
|
||||||
|
// longjmp and clobber warnings. Volatile is required.
|
||||||
|
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||||
|
volatile bool result = true;
|
||||||
|
|
||||||
|
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL);
|
||||||
|
if (oldHandler == SIG_ERR)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
volatile sigset_t oldMask;
|
||||||
|
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (setjmp(s_jmpNoPMULL))
|
||||||
|
result = false;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
|
||||||
|
const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
|
||||||
|
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
||||||
|
|
||||||
|
const poly128_t r1 = vmull_p64(a1, b1);
|
||||||
|
const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2));
|
||||||
|
|
||||||
|
// Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233.
|
||||||
|
const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum}
|
||||||
|
const uint64x2_t& t2 = (uint64x2_t)(r2); // {bignum,bignum}
|
||||||
|
|
||||||
|
result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
|
||||||
|
vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
|
||||||
|
}
|
||||||
|
|
||||||
|
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
||||||
|
signal(SIGILL, oldHandler);
|
||||||
|
return result;
|
||||||
|
# endif
|
||||||
|
}
|
||||||
|
#endif // CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||||
|
|
||||||
|
#if CRYPTOPP_ARM_NEON_AVAILABLE
|
||||||
|
void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c)
|
||||||
|
{
|
||||||
|
CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<uint64x2_t>()));
|
||||||
|
CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<uint64x2_t>()));
|
||||||
|
CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<uint64x2_t>()));
|
||||||
|
*(uint64x2_t*)a = veorq_u64(*(uint64x2_t*)b, *(uint64x2_t*)c);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
NAMESPACE_END
|
||||||
38
gcm.cpp
38
gcm.cpp
|
|
@ -49,6 +49,10 @@ NAMESPACE_BEGIN(CryptoPP)
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if CRYPTOPP_ARM_NEON_AVAILABLE
|
||||||
|
extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
|
||||||
|
#endif
|
||||||
|
|
||||||
#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||||
#if defined(__GNUC__)
|
#if defined(__GNUC__)
|
||||||
// Schneiders, Hovsmith and O'Rourke used this trick.
|
// Schneiders, Hovsmith and O'Rourke used this trick.
|
||||||
|
|
@ -193,6 +197,15 @@ __m128i _mm_clmulepi64_si128(const __m128i &a, const __m128i &b, int i)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
inline static void Xor16(byte *a, const byte *b, const byte *c)
|
||||||
|
{
|
||||||
|
CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<word64>()));
|
||||||
|
CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<word64>()));
|
||||||
|
CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<word64>()));
|
||||||
|
((word64 *)(void *)a)[0] = ((word64 *)(void *)b)[0] ^ ((word64 *)(void *)c)[0];
|
||||||
|
((word64 *)(void *)a)[1] = ((word64 *)(void *)b)[1] ^ ((word64 *)(void *)c)[1];
|
||||||
|
}
|
||||||
|
|
||||||
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE || CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
|
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE || CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
|
||||||
inline static void SSE2_Xor16(byte *a, const byte *b, const byte *c)
|
inline static void SSE2_Xor16(byte *a, const byte *b, const byte *c)
|
||||||
{
|
{
|
||||||
|
|
@ -211,25 +224,6 @@ inline static void SSE2_Xor16(byte *a, const byte *b, const byte *c)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if CRYPTOPP_ARM_NEON_AVAILABLE
|
|
||||||
inline static void NEON_Xor16(byte *a, const byte *b, const byte *c)
|
|
||||||
{
|
|
||||||
CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<uint64x2_t>()));
|
|
||||||
CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<uint64x2_t>()));
|
|
||||||
CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<uint64x2_t>()));
|
|
||||||
*(uint64x2_t*)a = veorq_u64(*(uint64x2_t*)b, *(uint64x2_t*)c);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
inline static void Xor16(byte *a, const byte *b, const byte *c)
|
|
||||||
{
|
|
||||||
CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<word64>()));
|
|
||||||
CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<word64>()));
|
|
||||||
CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<word64>()));
|
|
||||||
((word64 *)(void *)a)[0] = ((word64 *)(void *)b)[0] ^ ((word64 *)(void *)c)[0];
|
|
||||||
((word64 *)(void *)a)[1] = ((word64 *)(void *)b)[1] ^ ((word64 *)(void *)c)[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
|
#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
|
||||||
CRYPTOPP_ALIGN_DATA(16)
|
CRYPTOPP_ALIGN_DATA(16)
|
||||||
static const word64 s_clmulConstants64[] = {
|
static const word64 s_clmulConstants64[] = {
|
||||||
|
|
@ -441,7 +435,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
|
||||||
if (HasNEON())
|
if (HasNEON())
|
||||||
for (j=2; j<=0x80; j*=2)
|
for (j=2; j<=0x80; j*=2)
|
||||||
for (k=1; k<j; k++)
|
for (k=1; k<j; k++)
|
||||||
NEON_Xor16(table+i*256*16+(j+k)*16, table+i*256*16+j*16, table+i*256*16+k*16);
|
GCM_Xor16_NEON(table+i*256*16+(j+k)*16, table+i*256*16+j*16, table+i*256*16+k*16);
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
for (j=2; j<=0x80; j*=2)
|
for (j=2; j<=0x80; j*=2)
|
||||||
|
|
@ -497,8 +491,8 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
|
||||||
for (j=2; j<=8; j*=2)
|
for (j=2; j<=8; j*=2)
|
||||||
for (k=1; k<j; k++)
|
for (k=1; k<j; k++)
|
||||||
{
|
{
|
||||||
NEON_Xor16(table+i*256+(j+k)*16, table+i*256+j*16, table+i*256+k*16);
|
GCM_Xor16_NEON(table+i*256+(j+k)*16, table+i*256+j*16, table+i*256+k*16);
|
||||||
NEON_Xor16(table+1024+i*256+(j+k)*16, table+1024+i*256+j*16, table+1024+i*256+k*16);
|
GCM_Xor16_NEON(table+1024+i*256+(j+k)*16, table+1024+i*256+j*16, table+1024+i*256+k*16);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -264,14 +264,14 @@ bool TestSettings()
|
||||||
// Don't assert the alignment of testvals. That's what this test is for.
|
// Don't assert the alignment of testvals. That's what this test is for.
|
||||||
byte testvals[10] = {1,2,2,3,3,3,3,2,2,1};
|
byte testvals[10] = {1,2,2,3,3,3,3,2,2,1};
|
||||||
if (*(word32 *)(void *)(testvals+3) == 0x03030303 && *(word64 *)(void *)(testvals+1) == W64LIT(0x0202030303030202))
|
if (*(word32 *)(void *)(testvals+3) == 0x03030303 && *(word64 *)(void *)(testvals+1) == W64LIT(0x0202030303030202))
|
||||||
std::cout << "passed: Your machine allows unaligned data access.\n";
|
std::cout << "passed: Unaligned data access (CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS).\n";
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::cout << "FAILED: Unaligned data access gave incorrect results.\n";
|
std::cout << "FAILED: Unaligned data access gave incorrect results.\n";
|
||||||
pass = false;
|
pass = false;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
std::cout << "passed: CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS is not defined. Will restrict to aligned data access.\n";
|
std::cout << "passed: Aligned data access (no CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS).\n";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (sizeof(byte) == 1)
|
if (sizeof(byte) == 1)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue