Cleanup ARMv7 and ARMv8

pull/461/head
Jeffrey Walton 2017-07-31 07:13:34 -04:00
parent 9159992938
commit 9d8a89256d
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
6 changed files with 51 additions and 42 deletions

View File

@ -551,7 +551,7 @@ NAMESPACE_END
// TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_ARM_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
# if defined(__ARM_FEATURE_CRC32) || (CRYPTOPP_MSC_VERSION >= 2000) || \
(CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
defined(__aarch32__) || defined(__aarch64__)
# define CRYPTOPP_ARM_CRC32_AVAILABLE 1
# endif
#endif
@ -562,7 +562,7 @@ NAMESPACE_END
// TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_ARM_PMULL_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM) && !defined(__apple_build_version__)
# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VERSION >= 2000) || \
(CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
defined(__aarch32__) || defined(__aarch64__)
# define CRYPTOPP_ARM_PMULL_AVAILABLE 1
# endif
#endif
@ -573,7 +573,7 @@ NAMESPACE_END
// TODO: Add MSC_VER and ARM-64 platform define when available
#if !defined(CRYPTOPP_ARM_CRYPTO_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ASM)
# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_MSC_VERSION >= 2000) || \
(CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30500)
defined(__aarch32__) || defined(__aarch64__)
# define CRYPTOPP_ARM_AES_AVAILABLE 1
# define CRYPTOPP_ARM_PMULL_AVAILABLE 1
# define CRYPTOPP_ARM_SHA_AVAILABLE 1

View File

@ -11,7 +11,7 @@
#include "misc.h"
// Clang and GCC hoops...
#if !(defined(__ARM_FEATURE_CRC32) || defined(__aarch32__) || defined(__aarch64__) || defined(_MSC_VER))
#if !(defined(__ARM_FEATURE_CRC32) || defined(_MSC_VER))
# undef CRYPTOPP_ARM_CRC32_AVAILABLE
#endif
@ -20,7 +20,6 @@
#endif
#if (CRYPTOPP_ARM_CRC32_AVAILABLE)
# include "arm_neon.h"
# include "arm_acle.h"
#endif

View File

@ -1,6 +1,7 @@
// crc.cpp - originally written and placed in the public domain by Wei Dai
#include "pch.h"
#include "config.h"
#include "crc.h"
#include "misc.h"
#include "cpu.h"

View File

@ -11,7 +11,7 @@
#include "misc.h"
// Clang and GCC hoops...
#if !(defined(__ARM_FEATURE_CRYPTO) || defined(__aarch32__) || defined(__aarch64__) || defined(_MSC_VER))
#if !(defined(__ARM_FEATURE_CRYPTO) || defined(_MSC_VER))
# undef CRYPTOPP_ARM_PMULL_AVAILABLE
#endif
@ -20,8 +20,11 @@
# include "wmmintrin.h"
#endif
#if (CRYPTOPP_ARM_PMULL_AVAILABLE)
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
# include "arm_neon.h"
#endif
#if (CRYPTOPP_ARM_PMULL_AVAILABLE)
# include "arm_acle.h"
#endif
@ -307,6 +310,15 @@ void GCM_SetKeyWithoutResync_PMULL(const byte *hashKey, byte *mulTable, unsigned
vst1_u64((uint64_t *)(mulTable+i+8), vget_low_u64(h1));
}
void GCM_ReverseHashBufferIfNeeded_NEON(byte *hashBuffer)
{
if (GetNativeByteOrder() != BIG_ENDIAN_ORDER)
{
const uint8x16_t x = vrev64q_u8(vld1q_u8(hashBuffer));
vst1q_u8(hashBuffer, vextq_u8(x, x, 8));
}
}
size_t GCM_AuthenticateBlocks_PMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer)
{
const uint64x2_t* table = reinterpret_cast<const uint64x2_t*>(mtable);
@ -499,12 +511,6 @@ void GCM_SetKeyWithoutResync_CLMUL(const byte *hashKey, byte *mulTable, unsigned
_mm_storel_epi64((__m128i *)(void *)(mulTable+i+8), h1);
}
void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer)
{
__m128i &x = *(__m128i *)(void *)hashBuffer;
x = _mm_shuffle_epi8(x, s_clmulConstants[1]);
}
size_t GCM_AuthenticateBlocks_CLMUL(const byte *data, size_t len, const byte *mtable, byte *hbuffer)
{
const __m128i *table = (const __m128i *)(const void *)mtable;
@ -521,9 +527,9 @@ size_t GCM_AuthenticateBlocks_CLMUL(const byte *data, size_t len, const byte *mt
while (true)
{
__m128i h0 = _mm_load_si128(table+i);
__m128i h1 = _mm_load_si128(table+i+1);
__m128i h2 = _mm_xor_si128(h0, h1);
const __m128i h0 = _mm_load_si128(table+i);
const __m128i h1 = _mm_load_si128(table+i+1);
const __m128i h2 = _mm_xor_si128(h0, h1);
if (++i == s)
{
@ -569,7 +575,14 @@ size_t GCM_AuthenticateBlocks_CLMUL(const byte *data, size_t len, const byte *mt
_mm_store_si128((__m128i *)(void *)hbuffer, x);
return len;
}
#endif
#if CRYPTOPP_SSSE3_AVAILABLE
void GCM_ReverseHashBufferIfNeeded_SSSE3(byte *hashBuffer)
{
__m128i &x = *(__m128i *)(void *)hashBuffer;
x = _mm_shuffle_epi8(x, s_clmulConstants[1]);
}
#endif
NAMESPACE_END

42
gcm.cpp
View File

@ -9,10 +9,6 @@
#include "pch.h"
#include "config.h"
#if CRYPTOPP_MSC_VERSION
# pragma warning(disable: 4189)
#endif
#ifndef CRYPTOPP_IMPORTS
#ifndef CRYPTOPP_GENERATE_X64_MASM
@ -23,9 +19,9 @@
// SunCC 5.13 and below crash with AES-NI/CLMUL and C++{03|11}. Disable one or the other.
// Also see http://github.com/weidai11/cryptopp/issues/226
#if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x513)
# undef CRYPTOPP_CLMUL_AVAILABLE
#endif
// #if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x513)
// # undef CRYPTOPP_CLMUL_AVAILABLE
// #endif
#include "gcm.h"
#include "cpu.h"
@ -71,7 +67,7 @@ inline static void Xor16(byte *a, const byte *b, const byte *c)
}
#if CRYPTOPP_SSE2_AVAILABLE || CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
inline static void SSE2_Xor16(byte *a, const byte *b, const byte *c)
inline static void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c)
{
// SunCC 5.14 crash (bewildering since asserts are not in effect in release builds)
// Also see http://github.com/weidai11/cryptopp/issues/226 and http://github.com/weidai11/cryptopp/issues/284
@ -88,11 +84,13 @@ inline static void SSE2_Xor16(byte *a, const byte *b, const byte *c)
}
#endif
#if CRYPTOPP_SSSE3_AVAILABLE
extern void GCM_ReverseHashBufferIfNeeded_SSSE3(byte *hashBuffer);
#endif
#if CRYPTOPP_CLMUL_AVAILABLE
extern __m128i GCM_Multiply_CLMUL(const __m128i &x, const __m128i &h, const __m128i &r);
extern __m128i GCM_Reduce_CLMUL(__m128i c0, __m128i c1, __m128i c2, const __m128i &r);
extern void GCM_SetKeyWithoutResync_CLMUL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
extern void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer);
extern size_t GCM_AuthenticateBlocks_CLMUL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
CRYPTOPP_ALIGN_DATA(16)
@ -105,10 +103,14 @@ const __m128i *s_clmulConstants = (const __m128i *)(const void *)s_clmulConstant
const unsigned int s_cltableSizeInBlocks = 8;
#endif // CRYPTOPP_CLMUL_AVAILABLE
#if CRYPTOPP_ARM_NEON_AVAILABLE
extern void GCM_ReverseHashBufferIfNeeded_NEON(byte *hashBuffer);
#endif
#if CRYPTOPP_ARM_PMULL_AVAILABLE
extern size_t GCM_AuthenticateBlocks_PMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
extern uint64x2_t GCM_Multiply_PMULL(const uint64x2_t &x, const uint64x2_t &h, const uint64x2_t &r);
extern void GCM_SetKeyWithoutResync_PMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
extern size_t GCM_AuthenticateBlocks_PMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
CRYPTOPP_ALIGN_DATA(16)
const word64 s_clmulConstants64[] = {
@ -203,7 +205,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
if (HasSSE2())
for (j=2; j<=0x80; j*=2)
for (k=1; k<j; k++)
SSE2_Xor16(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
GCM_Xor16_SSE2(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
else
#elif CRYPTOPP_ARM_NEON_AVAILABLE
if (HasNEON())
@ -256,8 +258,8 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
for (j=2; j<=8; j*=2)
for (k=1; k<j; k++)
{
SSE2_Xor16(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
SSE2_Xor16(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
GCM_Xor16_SSE2(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
GCM_Xor16_SSE2(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
}
else
#elif CRYPTOPP_ARM_NEON_AVAILABLE
@ -285,16 +287,12 @@ inline void GCM_Base::ReverseHashBufferIfNeeded()
#if CRYPTOPP_CLMUL_AVAILABLE
if (HasCLMUL())
{
GCM_ReverseHashBufferIfNeeded_CLMUL(HashBuffer());
GCM_ReverseHashBufferIfNeeded_SSSE3(HashBuffer());
}
#elif CRYPTOPP_ARM_PMULL_AVAILABLE
if (HasPMULL())
#elif CRYPTOPP_ARM_NEON_AVAILABLE
if (HasNEON())
{
if (GetNativeByteOrder() != BIG_ENDIAN_ORDER)
{
const uint8x16_t x = vrev64q_u8(vld1q_u8(HashBuffer()));
vst1q_u8(HashBuffer(), vextq_u8(x, x, 8));
}
GCM_ReverseHashBufferIfNeeded_NEON(HashBuffer());
}
#endif
}

View File

@ -11,7 +11,7 @@
#include "misc.h"
// Clang and GCC hoops...
#if !(defined(__ARM_FEATURE_CRYPTO) || defined(__aarch32__) || defined(__aarch64__) || defined(_MSC_VER))
#if !(defined(__ARM_FEATURE_CRYPTO) || defined(_MSC_VER))
# undef CRYPTOPP_ARM_SHA_AVAILABLE
#endif
@ -23,11 +23,9 @@
# include "immintrin.h"
#endif
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
#if (CRYPTOPP_ARM_SHA_AVAILABLE)
# include "arm_neon.h"
# if (CRYPTOPP_ARM_SHA_AVAILABLE)
# include "arm_acle.h"
# endif
#endif
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY