Merge branch 'master' into hmqv

pull/263/head
Jeffrey Walton 2016-07-24 20:43:25 -04:00
commit 11c723e1b9
11 changed files with 139 additions and 86 deletions

View File

@ -14,21 +14,9 @@
NAMESPACE_BEGIN(CryptoPP) NAMESPACE_BEGIN(CryptoPP)
// Uncomment for benchmarking C++ against SSE2 or NEON // Uncomment for benchmarking C++ against SSE2 or NEON
#undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE // #undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE
// #undef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE // #undef CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
// Visual Studio needs both VS2005 (1400) and _M_64 for SSE2 and _mm_set_epi64x()
// http://msdn.microsoft.com/en-us/library/y0dh78ez%28v=vs.80%29.aspx
#if defined(_MSC_VER) && ((_MSC_VER < 1400) || !defined(_M_X64))
# undef CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
#endif
// Visual Studio needs VS2008 (1500); no dependency on _mm_set_epi64x()
// http://msdn.microsoft.com/en-us/library/bb892950%28v=vs.90%29.aspx
#if defined(_MSC_VER) && (_MSC_VER < 1500)
# undef CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE
#endif
// Apple Clang 6.0/Clang 3.5 does not have SSSE3 intrinsics // Apple Clang 6.0/Clang 3.5 does not have SSSE3 intrinsics
// http://llvm.org/bugs/show_bug.cgi?id=20213 // http://llvm.org/bugs/show_bug.cgi?id=20213
#if (defined(CRYPTOPP_APPLE_CLANG_VERSION) && (CRYPTOPP_APPLE_CLANG_VERSION <= 60000)) || (defined(CRYPTOPP_LLVM_CLANG_VERSION) && (CRYPTOPP_LLVM_CLANG_VERSION <= 30500)) #if (defined(CRYPTOPP_APPLE_CLANG_VERSION) && (CRYPTOPP_APPLE_CLANG_VERSION <= 60000)) || (defined(CRYPTOPP_LLVM_CLANG_VERSION) && (CRYPTOPP_LLVM_CLANG_VERSION <= 30500))
@ -37,12 +25,12 @@ NAMESPACE_BEGIN(CryptoPP)
// Sun Studio 12.3 and earlier lack SSE2's _mm_set_epi64x. // Sun Studio 12.3 and earlier lack SSE2's _mm_set_epi64x.
// Also see http://stackoverflow.com/a/38547909/608639 // Also see http://stackoverflow.com/a/38547909/608639
#if defined(__SUNPRO_CC) && (__SUNPRO_CC < 0x5130) #if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && ((__SUNPRO_CC >= 0x5100 && __SUNPRO_CC < 0x5130) || (_MSC_VER >= 1200 && _MSC_VER < 1600))
inline __m128i _mm_set_epi64x(const uint64_t a, const uint64_t b) inline __m128i _mm_set_epi64x(const word64 a, const word64 b)
{ {
union INT_128_64x2 { union INT_128_64x2 {
__m128i v128; __m128i v128;
uint64_t v64[2]; word64 v64[2];
}; };
INT_128_64x2 val; INT_128_64x2 val;

View File

@ -217,6 +217,9 @@ typedef unsigned int word32;
#if defined(_MSC_VER) || defined(__BORLANDC__) #if defined(_MSC_VER) || defined(__BORLANDC__)
typedef unsigned __int64 word64; typedef unsigned __int64 word64;
#define W64LIT(x) x##ui64 #define W64LIT(x) x##ui64
#elif ((__arm64__ || __aarch64__) && (_LP64 || __LP64__))
typedef unsigned long word64;
#define W64LIT(x) x##UL
#else #else
typedef unsigned long long word64; typedef unsigned long long word64;
#define W64LIT(x) x##ULL #define W64LIT(x) x##ULL

View File

@ -217,6 +217,9 @@ typedef unsigned int word32;
#if defined(_MSC_VER) || defined(__BORLANDC__) #if defined(_MSC_VER) || defined(__BORLANDC__)
typedef unsigned __int64 word64; typedef unsigned __int64 word64;
#define W64LIT(x) x##ui64 #define W64LIT(x) x##ui64
#elif ((__arm64__ || __aarch64__) && (_LP64 || __LP64__))
typedef unsigned long word64;
#define W64LIT(x) x##UL
#else #else
typedef unsigned long long word64; typedef unsigned long long word64;
#define W64LIT(x) x##ULL #define W64LIT(x) x##ULL

64
cpu.cpp
View File

@ -319,8 +319,8 @@ void DetectX86Features()
// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu // http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
// //
bool CRYPTOPP_SECTION_INIT g_ArmDetectionDone = false; bool CRYPTOPP_SECTION_INIT g_ArmDetectionDone = false;
bool CRYPTOPP_SECTION_INIT g_hasNEON = false, CRYPTOPP_SECTION_INIT g_hasCRC32 = false, CRYPTOPP_SECTION_INIT g_hasAES = false, CRYPTOPP_SECTION_INIT g_hasSHA1 = false; bool CRYPTOPP_SECTION_INIT g_hasNEON = false, CRYPTOPP_SECTION_INIT g_hasPMULL = false, CRYPTOPP_SECTION_INIT g_hasCRC32 = false;
bool CRYPTOPP_SECTION_INIT g_hasSHA2 = false; bool CRYPTOPP_SECTION_INIT g_hasAES = false, CRYPTOPP_SECTION_INIT g_hasSHA1 = false, CRYPTOPP_SECTION_INIT g_hasSHA2 = false;
word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
@ -332,6 +332,12 @@ extern "C"
longjmp(s_jmpNoNEON, 1); longjmp(s_jmpNoNEON, 1);
} }
static jmp_buf s_jmpNoPMULL;
static void SigIllHandlerPMULL(int)
{
longjmp(s_jmpNoPMULL, 1);
}
static jmp_buf s_jmpNoCRC32; static jmp_buf s_jmpNoCRC32;
static void SigIllHandlerCRC32(int) static void SigIllHandlerCRC32(int)
{ {
@ -426,6 +432,59 @@ static bool TryNEON()
#endif // CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE #endif // CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
} }
static bool TryPMULL()
{
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try
{
const poly64_t a1={1}, b1={2};
const poly64x2_t a2={1}, b2={2};
const poly128_t r1 = vmull_p64(a1, b1);
const poly128_t r2 = vmull_high_p64(a2, b2);
result = (r1 != r2);
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
return false;
}
return result;
# else
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile bool result = true;
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL);
if (oldHandler == SIG_ERR)
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpNoPMULL))
result = false;
else
{
const poly64_t a1={1}, b1={2};
const poly64x2_t a2={1}, b2={2};
const poly128_t r1 = vmull_p64(a1, b1);
const poly128_t r2 = vmull_high_p64(a2, b2);
result = (r1 != r2);
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
signal(SIGILL, oldHandler);
return result;
# endif
#else
return false;
#endif // CRYPTOPP_BOOL_CRYPTO_INTRINSICS_AVAILABLE
}
static bool TryCRC32() static bool TryCRC32()
{ {
#if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE) #if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
@ -660,6 +719,7 @@ void DetectArmFeatures()
#endif #endif
{ {
g_hasNEON = TryNEON(); g_hasNEON = TryNEON();
g_hasPMULL = TryPMULL();
g_hasCRC32 = TryCRC32(); g_hasCRC32 = TryCRC32();
g_hasAES = TryAES(); g_hasAES = TryAES();
g_hasSHA1 = TrySHA1(); g_hasSHA1 = TrySHA1();

59
cpu.h
View File

@ -364,7 +364,7 @@ inline int GetCacheLineSize()
#elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
extern bool g_ArmDetectionDone; extern bool g_ArmDetectionDone;
extern bool g_hasNEON, g_hasCRC32, g_hasAES, g_hasSHA1, g_hasSHA2; extern bool g_hasNEON, g_hasPMULL, g_hasCRC32, g_hasAES, g_hasSHA1, g_hasSHA2;
void CRYPTOPP_API DetectArmFeatures(); void CRYPTOPP_API DetectArmFeatures();
//! \brief Determine if an ARM processor has Advanced SIMD available //! \brief Determine if an ARM processor has Advanced SIMD available
@ -380,6 +380,19 @@ inline bool HasNEON()
return g_hasNEON; return g_hasNEON;
} }
//! \brief Determine if an ARM processor provides Polynomial Multiplication (long)
//! \returns true if the hardware is capable of polynomial multiplications at runtime, false otherwise.
//! \details The multiplication instructions are available under Aarch64 (ARM-64) and Aarch32 (ARM-32).
//! \details Runtime support requires compile time support. When compiling with GCC, you may
//! need to compile with <tt>-march=armv8-a+crypto</tt>; while Apple requires
//! <tt>-arch arm64</tt>. Also see ARM's <tt>__ARM_FEATURE_CRYPTO</tt> preprocessor macro.
inline bool HasPMULL()
{
if (!g_ArmDetectionDone)
DetectArmFeatures();
return g_hasPMULL;
}
//! \brief Determine if an ARM processor has CRC32 available //! \brief Determine if an ARM processor has CRC32 available
//! \returns true if the hardware is capable of CRC32 at runtime, false otherwise. //! \returns true if the hardware is capable of CRC32 at runtime, false otherwise.
//! \details CRC32 instructions provide access to the processor's CRC32 and CRC32-C intructions. //! \details CRC32 instructions provide access to the processor's CRC32 and CRC32-C intructions.
@ -485,20 +498,6 @@ inline int GetCacheLineSize()
#else #else
#define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY #define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
#if defined(CRYPTOPP_LLVM_CLANG_VERSION) || defined(CRYPTOPP_APPLE_CLANG_VERSION) || defined(CRYPTOPP_CLANG_INTEGRATED_ASSEMBLER)
#define NEW_LINE "\n"
#define INTEL_PREFIX ".intel_syntax;"
#define INTEL_NOPREFIX ".intel_syntax;"
#define ATT_PREFIX ".att_syntax;"
#define ATT_NOPREFIX ".att_syntax;"
#else
#define NEW_LINE
#define INTEL_PREFIX ".intel_syntax prefix;"
#define INTEL_NOPREFIX ".intel_syntax noprefix;"
#define ATT_PREFIX ".att_syntax prefix;"
#define ATT_NOPREFIX ".att_syntax noprefix;"
#endif
// define these in two steps to allow arguments to be expanded // define these in two steps to allow arguments to be expanded
#define GNU_AS1(x) #x ";" NEW_LINE #define GNU_AS1(x) #x ";" NEW_LINE
#define GNU_AS2(x, y) #x ", " #y ";" NEW_LINE #define GNU_AS2(x, y) #x ", " #y ";" NEW_LINE
@ -519,21 +518,6 @@ inline int GetCacheLineSize()
#define IF0(y) #define IF0(y)
#define IF1(y) y #define IF1(y) y
// Should be confined to GCC, but its used to help manage Clang 3.4 compiler error.
// Also see LLVM Bug 24232, http://llvm.org/bugs/show_bug.cgi?id=24232 .
#ifndef INTEL_PREFIX
#define INTEL_PREFIX
#endif
#ifndef INTEL_NOPREFIX
#define INTEL_NOPREFIX
#endif
#ifndef ATT_PREFIX
#define ATT_PREFIX
#endif
#ifndef ATT_NOPREFIX
#define ATT_NOPREFIX
#endif
#ifdef CRYPTOPP_GENERATE_X64_MASM #ifdef CRYPTOPP_GENERATE_X64_MASM
#define ASM_MOD(x, y) ((x) MOD (y)) #define ASM_MOD(x, y) ((x) MOD (y))
#define XMMWORD_PTR XMMWORD PTR #define XMMWORD_PTR XMMWORD PTR
@ -666,6 +650,21 @@ inline int GetCacheLineSize()
#endif // X86/X32/X64 #endif // X86/X32/X64
// Applies to both X86/X32/X64 and ARM32/ARM64
#if defined(CRYPTOPP_LLVM_CLANG_VERSION) || defined(CRYPTOPP_APPLE_CLANG_VERSION) || defined(CRYPTOPP_CLANG_INTEGRATED_ASSEMBLER)
#define NEW_LINE "\n"
#define INTEL_PREFIX ".intel_syntax;"
#define INTEL_NOPREFIX ".intel_syntax;"
#define ATT_PREFIX ".att_syntax;"
#define ATT_NOPREFIX ".att_syntax;"
#else
#define NEW_LINE
#define INTEL_PREFIX ".intel_syntax prefix;"
#define INTEL_NOPREFIX ".intel_syntax noprefix;"
#define ATT_PREFIX ".att_syntax prefix;"
#define ATT_NOPREFIX ".att_syntax noprefix;"
#endif
NAMESPACE_END NAMESPACE_END
#endif // CRYPTOPP_CPU_H #endif // CRYPTOPP_CPU_H

View File

@ -112,7 +112,6 @@ done
# Defaults if not set # Defaults if not set
if [ -z "$APPLE_SDK" ]; then if [ -z "$APPLE_SDK" ]; then
APPLE_SDK=iPhoneOS APPLE_SDK=iPhoneOS
IOS_ARCH=armv7
fi fi
if [ -z "$IOS_ARCH" ]; then if [ -z "$IOS_ARCH" ]; then

View File

@ -317,13 +317,14 @@ bool TestSettings()
#elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
bool hasNEON = HasNEON(); bool hasNEON = HasNEON();
bool hasPMULL = HasPMULL();
bool hasCRC32 = HasCRC32(); bool hasCRC32 = HasCRC32();
bool hasAES = HasAES(); bool hasAES = HasAES();
bool hasSHA1 = HasSHA1(); bool hasSHA1 = HasSHA1();
bool hasSHA2 = HasSHA2(); bool hasSHA2 = HasSHA2();
cout << "passed: "; cout << "passed: ";
cout << "hasNEON == " << hasNEON << ", hasCRC32 == " << hasCRC32 << ", hasAES == " << hasAES << ", hasSHA1 == " << hasSHA1 << ", hasSHA2 == " << hasSHA2 << endl; cout << "hasNEON == " << hasNEON << ", hasPMULL == " << hasPMULL << ", hasCRC32 == " << hasCRC32 << ", hasAES == " << hasAES << ", hasSHA1 == " << hasSHA1 << ", hasSHA2 == " << hasSHA2 << endl;
#endif #endif
if (!pass) if (!pass)