Fixed ARMv7a and NEON detection. Initial cut-in of GCM
parent
4b51eadc73
commit
b4f6882237
10
GNUmakefile
10
GNUmakefile
|
|
@ -2,8 +2,12 @@
|
|||
##### System Attributes and Programs #####
|
||||
###########################################################
|
||||
|
||||
# If needed
|
||||
TMPDIR ?= /tmp
|
||||
# Used for ARMv7 and NEON.
|
||||
FP_ABI ?= hard
|
||||
|
||||
# Command ard arguments
|
||||
AR ?= ar
|
||||
ARFLAGS ?= -cr # ar needs the dash on OpenBSD
|
||||
RANLIB ?= ranlib
|
||||
|
|
@ -297,7 +301,7 @@ endif
|
|||
endif
|
||||
|
||||
ifeq ($(IS_NEON),1)
|
||||
NEON_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv7-a -mfloat-abi=softfp -mfpu=neon -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv7-a -mfloat-abi=softfp -mfpu=neon")
|
||||
NEON_FLAG = $(shell echo | $(CXX) $(CXXFLAGS) -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon -dM -E - | grep -i -c -q __ARM_NEON && echo "-march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon")
|
||||
GCM_FLAG = $(NEON_FLAG)
|
||||
ARIA_FLAG = $(NEON_FLAG)
|
||||
BLAKE2_FLAG = $(NEON_FLAG)
|
||||
|
|
@ -868,6 +872,10 @@ blake2-simd.o : blake2-simd.cpp
|
|||
crc-simd.o : crc-simd.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS) $(CRC_FLAG) -c) $<
|
||||
|
||||
# PCLMUL or ARMv7a/ARMv8a available
|
||||
gcm-simd.o : gcm-simd.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS) $(GCM_FLAG) -c) $<
|
||||
|
||||
# SSE4.2/SHA-NI or ARMv8a available
|
||||
sha-simd.o : sha-simd.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS) $(SHA_FLAG) -c) $<
|
||||
|
|
|
|||
21
config.h
21
config.h
|
|
@ -517,7 +517,8 @@ NAMESPACE_END
|
|||
|
||||
// Requires ARMv7 and ACLE 1.0. Testing shows ARMv7 is really ARMv7a under most toolchains.
|
||||
#if !defined(CRYPTOPP_ARM_NEON_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
|
||||
# if defined(__ARM_NEON__) || defined(__ARM_NEON) || (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
|
||||
# if defined(__ARM_NEON__) || defined(__ARM_NEON) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) || \
|
||||
(CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
|
||||
# define CRYPTOPP_ARM_NEON_AVAILABLE 1
|
||||
# endif
|
||||
#endif
|
||||
|
|
@ -527,17 +528,21 @@ NAMESPACE_END
|
|||
// Microsoft plans to support ARM-64, but its not clear how to detect it.
|
||||
// TODO: Add MSC_VER and ARM-64 platform define when available
|
||||
#if !defined(CRYPTOPP_ARMV8A_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
|
||||
# if defined(__ARM_FEATURE_CRC32) || (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
|
||||
# if defined(__ARM_FEATURE_CRC32) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \
|
||||
(defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \
|
||||
(defined(__AARCH32EL__) || defined(__AARCH64EL__))
|
||||
# define CRYPTOPP_ARMV8A_CRC32_AVAILABLE 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// Requires ARMv8, ACLE 2.0 and Aarch64. GCC requires 4.8 and above.
|
||||
// LLVM Clang requires 3.5. Apple Clang does not support it at the moment.
|
||||
// Microsoft plans to support ARM-64, but its not clear how to detect it.
|
||||
// Requires ARMv8, but we are not sure of the define because the ACLE does not discuss it.
|
||||
// GCC seems to requires 4.8 and above. LLVM Clang requires 3.5. Apple Clang does not support
|
||||
// it at the moment. Microsoft plans to support ARM-64, but its not clear how to detect it.
|
||||
// TODO: Add MSC_VER and ARM-64 platform define when available
|
||||
#if !defined(CRYPTOPP_ARMV8A_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__apple_build_version__)
|
||||
# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
|
||||
# if defined(__ARM_FEATURE_CRYPTO) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \
|
||||
(defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \
|
||||
(defined(__AARCH32EL__) || defined(__AARCH64EL__))
|
||||
# define CRYPTOPP_ARMV8A_PMULL_AVAILABLE 1
|
||||
# endif
|
||||
#endif
|
||||
|
|
@ -547,7 +552,9 @@ NAMESPACE_END
|
|||
// Microsoft plans to support ARM-64, but its not clear how to detect it.
|
||||
// TODO: Add MSC_VER and ARM-64 platform define when available
|
||||
#if !defined(CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
|
||||
# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
|
||||
# if defined(__ARM_FEATURE_CRYPTO) || (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || \
|
||||
(defined(__ARM_32BIT_STATE_) || defined(__ARM_64BIT_STATE_)) || \
|
||||
(defined(__AARCH32EL__) || defined(__AARCH64EL__))
|
||||
# define CRYPTOPP_ARMV8A_AES_AVAILABLE 1
|
||||
# define CRYPTOPP_ARMV8A_SHA_AVAILABLE 1
|
||||
# define CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE 1
|
||||
|
|
|
|||
72
cpu.cpp
72
cpu.cpp
|
|
@ -344,12 +344,6 @@ extern bool CPU_TryPMULL_ARMV8();
|
|||
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
||||
extern "C"
|
||||
{
|
||||
static jmp_buf s_jmpNoPMULL;
|
||||
static void SigIllHandlerPMULL(int)
|
||||
{
|
||||
longjmp(s_jmpNoPMULL, 1);
|
||||
}
|
||||
|
||||
static jmp_buf s_jmpNoAES;
|
||||
static void SigIllHandlerAES(int)
|
||||
{
|
||||
|
|
@ -360,8 +354,8 @@ extern "C"
|
|||
|
||||
static bool TryNEON()
|
||||
{
|
||||
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
|
||||
return CPU_TryCRC32_ARMV8();
|
||||
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
||||
return CPU_TryNEON_ARM();
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
|
|
@ -379,68 +373,10 @@ static bool TryCRC32()
|
|||
static bool TryPMULL()
|
||||
{
|
||||
#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE)
|
||||
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
||||
volatile bool result = true;
|
||||
__try
|
||||
{
|
||||
const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
|
||||
const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
|
||||
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
||||
|
||||
const poly128_t r1 = vmull_p64(a1, b1);
|
||||
const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2));
|
||||
|
||||
// Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233.
|
||||
const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum}
|
||||
const uint64x2_t& t2 = (uint64x2_t)(r2); // {bignum,bignum}
|
||||
|
||||
result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
|
||||
vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
|
||||
}
|
||||
__except (EXCEPTION_EXECUTE_HANDLER)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return result;
|
||||
# else
|
||||
// longjmp and clobber warnings. Volatile is required.
|
||||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||
volatile bool result = true;
|
||||
|
||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL);
|
||||
if (oldHandler == SIG_ERR)
|
||||
return false;
|
||||
|
||||
volatile sigset_t oldMask;
|
||||
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
||||
return false;
|
||||
|
||||
if (setjmp(s_jmpNoPMULL))
|
||||
result = false;
|
||||
else
|
||||
{
|
||||
const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
|
||||
const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
|
||||
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
||||
|
||||
const poly128_t r1 = vmull_p64(a1, b1);
|
||||
const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2));
|
||||
|
||||
// Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233.
|
||||
const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum}
|
||||
const uint64x2_t& t2 = (uint64x2_t)(r2); // {bignum,bignum}
|
||||
|
||||
result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
|
||||
vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
|
||||
}
|
||||
|
||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
||||
signal(SIGILL, oldHandler);
|
||||
return result;
|
||||
# endif
|
||||
return CPU_TryPMULL_ARMV8();
|
||||
#else
|
||||
return false;
|
||||
#endif // CRYPTOPP_ARMV8A_CRYPTO_AVAILABLE
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool TryAES()
|
||||
|
|
|
|||
|
|
@ -14,10 +14,12 @@
|
|||
# include "nmmintrin.h"
|
||||
#endif
|
||||
|
||||
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE) && defined(__GNUC__)
|
||||
#if (CRYPTOPP_ARMV8A_CRC32_AVAILABLE)
|
||||
# include "arm_neon.h"
|
||||
#if defined(__GNUC__)
|
||||
# include "arm_acle.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
||||
# include <signal.h>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,118 @@
|
|||
// crc-simd.cpp - written and placed in the public domain by
|
||||
// Jeffrey Walton, Uri Blumenthal and Marcel Raad.
|
||||
//
|
||||
// This source file uses intrinsics to gain access to SSE4.2 and
|
||||
// ARMv8a CRC-32 and CRC-32C instructions. A separate source file
|
||||
// is needed because additional CXXFLAGS are required to enable
|
||||
// the appropriate instructions sets in some build configurations.
|
||||
|
||||
#include "pch.h"
|
||||
#include "config.h"
|
||||
#include "misc.h"
|
||||
|
||||
#if (CRYPTOPP_AESNI_AVAILABLE)
|
||||
# include "wmmintrin.h"
|
||||
#endif
|
||||
|
||||
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
|
||||
# include "arm_neon.h"
|
||||
#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE)
|
||||
# include "arm_acle.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
||||
# include <signal.h>
|
||||
# include <setjmp.h>
|
||||
#endif
|
||||
|
||||
NAMESPACE_BEGIN(CryptoPP)
|
||||
|
||||
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
||||
extern "C" {
|
||||
typedef void (*SigHandler)(int);
|
||||
|
||||
static jmp_buf s_jmpSIGILL;
|
||||
static void SigIllHandler(int)
|
||||
{
|
||||
longjmp(s_jmpSIGILL, 1);
|
||||
}
|
||||
};
|
||||
#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
||||
|
||||
#if (CRYPTOPP_ARMV8A_PMULL_AVAILABLE)
|
||||
bool CPU_TryPMULL_ARMV8()
|
||||
{
|
||||
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
||||
volatile bool result = true;
|
||||
__try
|
||||
{
|
||||
const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
|
||||
const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
|
||||
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
||||
|
||||
const poly128_t r1 = vmull_p64(a1, b1);
|
||||
const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2));
|
||||
|
||||
// Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233.
|
||||
const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum}
|
||||
const uint64x2_t& t2 = (uint64x2_t)(r2); // {bignum,bignum}
|
||||
|
||||
result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
|
||||
vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
|
||||
}
|
||||
__except (EXCEPTION_EXECUTE_HANDLER)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return result;
|
||||
# else
|
||||
// longjmp and clobber warnings. Volatile is required.
|
||||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||
volatile bool result = true;
|
||||
|
||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL);
|
||||
if (oldHandler == SIG_ERR)
|
||||
return false;
|
||||
|
||||
volatile sigset_t oldMask;
|
||||
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
||||
return false;
|
||||
|
||||
if (setjmp(s_jmpNoPMULL))
|
||||
result = false;
|
||||
else
|
||||
{
|
||||
const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
|
||||
const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
|
||||
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
||||
|
||||
const poly128_t r1 = vmull_p64(a1, b1);
|
||||
const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2));
|
||||
|
||||
// Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233.
|
||||
const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum}
|
||||
const uint64x2_t& t2 = (uint64x2_t)(r2); // {bignum,bignum}
|
||||
|
||||
result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
|
||||
vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
|
||||
}
|
||||
|
||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
||||
signal(SIGILL, oldHandler);
|
||||
return result;
|
||||
# endif
|
||||
}
|
||||
#endif // CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
|
||||
#if CRYPTOPP_ARM_NEON_AVAILABLE
|
||||
void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c)
|
||||
{
|
||||
CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<uint64x2_t>()));
|
||||
CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<uint64x2_t>()));
|
||||
CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<uint64x2_t>()));
|
||||
*(uint64x2_t*)a = veorq_u64(*(uint64x2_t*)b, *(uint64x2_t*)c);
|
||||
}
|
||||
#endif
|
||||
|
||||
NAMESPACE_END
|
||||
38
gcm.cpp
38
gcm.cpp
|
|
@ -49,6 +49,10 @@ NAMESPACE_BEGIN(CryptoPP)
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if CRYPTOPP_ARM_NEON_AVAILABLE
|
||||
extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
|
||||
#endif
|
||||
|
||||
#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) && CRYPTOPP_ARMV8A_PMULL_AVAILABLE
|
||||
#if defined(__GNUC__)
|
||||
// Schneiders, Hovsmith and O'Rourke used this trick.
|
||||
|
|
@ -193,6 +197,15 @@ __m128i _mm_clmulepi64_si128(const __m128i &a, const __m128i &b, int i)
|
|||
}
|
||||
#endif
|
||||
|
||||
inline static void Xor16(byte *a, const byte *b, const byte *c)
|
||||
{
|
||||
CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<word64>()));
|
||||
CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<word64>()));
|
||||
CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<word64>()));
|
||||
((word64 *)(void *)a)[0] = ((word64 *)(void *)b)[0] ^ ((word64 *)(void *)c)[0];
|
||||
((word64 *)(void *)a)[1] = ((word64 *)(void *)b)[1] ^ ((word64 *)(void *)c)[1];
|
||||
}
|
||||
|
||||
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE || CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
|
||||
inline static void SSE2_Xor16(byte *a, const byte *b, const byte *c)
|
||||
{
|
||||
|
|
@ -211,25 +224,6 @@ inline static void SSE2_Xor16(byte *a, const byte *b, const byte *c)
|
|||
}
|
||||
#endif
|
||||
|
||||
#if CRYPTOPP_ARM_NEON_AVAILABLE
|
||||
inline static void NEON_Xor16(byte *a, const byte *b, const byte *c)
|
||||
{
|
||||
CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<uint64x2_t>()));
|
||||
CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<uint64x2_t>()));
|
||||
CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<uint64x2_t>()));
|
||||
*(uint64x2_t*)a = veorq_u64(*(uint64x2_t*)b, *(uint64x2_t*)c);
|
||||
}
|
||||
#endif
|
||||
|
||||
inline static void Xor16(byte *a, const byte *b, const byte *c)
|
||||
{
|
||||
CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<word64>()));
|
||||
CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<word64>()));
|
||||
CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<word64>()));
|
||||
((word64 *)(void *)a)[0] = ((word64 *)(void *)b)[0] ^ ((word64 *)(void *)c)[0];
|
||||
((word64 *)(void *)a)[1] = ((word64 *)(void *)b)[1] ^ ((word64 *)(void *)c)[1];
|
||||
}
|
||||
|
||||
#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
static const word64 s_clmulConstants64[] = {
|
||||
|
|
@ -441,7 +435,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
|
|||
if (HasNEON())
|
||||
for (j=2; j<=0x80; j*=2)
|
||||
for (k=1; k<j; k++)
|
||||
NEON_Xor16(table+i*256*16+(j+k)*16, table+i*256*16+j*16, table+i*256*16+k*16);
|
||||
GCM_Xor16_NEON(table+i*256*16+(j+k)*16, table+i*256*16+j*16, table+i*256*16+k*16);
|
||||
else
|
||||
#endif
|
||||
for (j=2; j<=0x80; j*=2)
|
||||
|
|
@ -497,8 +491,8 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
|
|||
for (j=2; j<=8; j*=2)
|
||||
for (k=1; k<j; k++)
|
||||
{
|
||||
NEON_Xor16(table+i*256+(j+k)*16, table+i*256+j*16, table+i*256+k*16);
|
||||
NEON_Xor16(table+1024+i*256+(j+k)*16, table+1024+i*256+j*16, table+1024+i*256+k*16);
|
||||
GCM_Xor16_NEON(table+i*256+(j+k)*16, table+i*256+j*16, table+i*256+k*16);
|
||||
GCM_Xor16_NEON(table+1024+i*256+(j+k)*16, table+1024+i*256+j*16, table+1024+i*256+k*16);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -264,14 +264,14 @@ bool TestSettings()
|
|||
// Don't assert the alignment of testvals. That's what this test is for.
|
||||
byte testvals[10] = {1,2,2,3,3,3,3,2,2,1};
|
||||
if (*(word32 *)(void *)(testvals+3) == 0x03030303 && *(word64 *)(void *)(testvals+1) == W64LIT(0x0202030303030202))
|
||||
std::cout << "passed: Your machine allows unaligned data access.\n";
|
||||
std::cout << "passed: Unaligned data access (CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS).\n";
|
||||
else
|
||||
{
|
||||
std::cout << "FAILED: Unaligned data access gave incorrect results.\n";
|
||||
pass = false;
|
||||
}
|
||||
#else
|
||||
std::cout << "passed: CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS is not defined. Will restrict to aligned data access.\n";
|
||||
std::cout << "passed: Aligned data access (no CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS).\n";
|
||||
#endif
|
||||
|
||||
if (sizeof(byte) == 1)
|
||||
|
|
|
|||
Loading…
Reference in New Issue