Prepare for POWER8 carryless multiplies using vpmsum
parent
6cd7f83346
commit
9ff731824b
5
config.h
5
config.h
|
|
@ -785,9 +785,10 @@ NAMESPACE_END
|
||||||
|
|
||||||
#if !defined(CRYPTOPP_POWER8_AES_AVAILABLE) && !defined(CRYPTOPP_DISABLE_POWER8_AES) && defined(CRYPTOPP_POWER8_AVAILABLE)
|
#if !defined(CRYPTOPP_POWER8_AES_AVAILABLE) && !defined(CRYPTOPP_DISABLE_POWER8_AES) && defined(CRYPTOPP_POWER8_AVAILABLE)
|
||||||
# if defined(__CRYPTO__) || defined(_ARCH_PWR8) || (CRYPTOPP_XLC_VERSION >= 130000) || (CRYPTOPP_GCC_VERSION >= 40800)
|
# if defined(__CRYPTO__) || defined(_ARCH_PWR8) || (CRYPTOPP_XLC_VERSION >= 130000) || (CRYPTOPP_GCC_VERSION >= 40800)
|
||||||
# define CRYPTOPP_POWER8_AES_AVAILABLE 1
|
|
||||||
# define CRYPTOPP_POWER8_SHA_AVAILABLE 1
|
|
||||||
//# define CRYPTOPP_POWER8_CRC_AVAILABLE 1
|
//# define CRYPTOPP_POWER8_CRC_AVAILABLE 1
|
||||||
|
# define CRYPTOPP_POWER8_AES_AVAILABLE 1
|
||||||
|
// # define CRYPTOPP_POWER8_PMULL_AVAILABLE 1
|
||||||
|
# define CRYPTOPP_POWER8_SHA_AVAILABLE 1
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
||||||
18
cpu.cpp
18
cpu.cpp
|
|
@ -804,6 +804,7 @@ bool CRYPTOPP_SECTION_INIT g_hasAltivec = false;
|
||||||
bool CRYPTOPP_SECTION_INIT g_hasPower7 = false;
|
bool CRYPTOPP_SECTION_INIT g_hasPower7 = false;
|
||||||
bool CRYPTOPP_SECTION_INIT g_hasPower8 = false;
|
bool CRYPTOPP_SECTION_INIT g_hasPower8 = false;
|
||||||
bool CRYPTOPP_SECTION_INIT g_hasAES = false;
|
bool CRYPTOPP_SECTION_INIT g_hasAES = false;
|
||||||
|
bool CRYPTOPP_SECTION_INIT g_hasPMULL = false;
|
||||||
bool CRYPTOPP_SECTION_INIT g_hasSHA256 = false;
|
bool CRYPTOPP_SECTION_INIT g_hasSHA256 = false;
|
||||||
bool CRYPTOPP_SECTION_INIT g_hasSHA512 = false;
|
bool CRYPTOPP_SECTION_INIT g_hasSHA512 = false;
|
||||||
word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
|
word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
|
||||||
|
|
@ -812,6 +813,7 @@ extern bool CPU_ProbeAltivec();
|
||||||
extern bool CPU_ProbePower7();
|
extern bool CPU_ProbePower7();
|
||||||
extern bool CPU_ProbePower8();
|
extern bool CPU_ProbePower8();
|
||||||
extern bool CPU_ProbeAES();
|
extern bool CPU_ProbeAES();
|
||||||
|
extern bool CPU_ProbePMULL();
|
||||||
extern bool CPU_ProbeSHA256();
|
extern bool CPU_ProbeSHA256();
|
||||||
extern bool CPU_ProbeSHA512();
|
extern bool CPU_ProbeSHA512();
|
||||||
|
|
||||||
|
|
@ -884,6 +886,20 @@ inline bool CPU_QueryAES()
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline bool CPU_QueryPMULL()
|
||||||
|
{
|
||||||
|
// Power8 and ISA 2.07 provide in-core crypto. Glibc
|
||||||
|
// 2.24 or higher is required for PPC_FEATURE2_VEC_CRYPTO.
|
||||||
|
#if defined(__linux__)
|
||||||
|
if ((getauxval(AT_HWCAP2) & PPC_FEATURE2_VEC_CRYPTO) != 0)
|
||||||
|
return true;
|
||||||
|
#elif defined(_AIX)
|
||||||
|
if (__power_8_andup() != 0)
|
||||||
|
return true;
|
||||||
|
#endif
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
inline bool CPU_QuerySHA256()
|
inline bool CPU_QuerySHA256()
|
||||||
{
|
{
|
||||||
// Power8 and ISA 2.07 provide in-core crypto. Glibc
|
// Power8 and ISA 2.07 provide in-core crypto. Glibc
|
||||||
|
|
@ -918,7 +934,7 @@ void DetectPowerpcFeatures()
|
||||||
g_hasAltivec = CPU_QueryAltivec() || CPU_ProbeAltivec();
|
g_hasAltivec = CPU_QueryAltivec() || CPU_ProbeAltivec();
|
||||||
g_hasPower7 = CPU_QueryPower7() || CPU_ProbePower7();
|
g_hasPower7 = CPU_QueryPower7() || CPU_ProbePower7();
|
||||||
g_hasPower8 = CPU_QueryPower8() || CPU_ProbePower8();
|
g_hasPower8 = CPU_QueryPower8() || CPU_ProbePower8();
|
||||||
//g_hasPMULL = CPU_QueryPMULL() || CPU_ProbePMULL();
|
g_hasPMULL = CPU_QueryPMULL() || CPU_ProbePMULL();
|
||||||
g_hasAES = CPU_QueryAES() || CPU_ProbeAES();
|
g_hasAES = CPU_QueryAES() || CPU_ProbeAES();
|
||||||
g_hasSHA256 = CPU_QuerySHA256() || CPU_ProbeSHA256();
|
g_hasSHA256 = CPU_QuerySHA256() || CPU_ProbeSHA256();
|
||||||
g_hasSHA512 = CPU_QuerySHA512() || CPU_ProbeSHA512();
|
g_hasSHA512 = CPU_QuerySHA512() || CPU_ProbeSHA512();
|
||||||
|
|
|
||||||
56
cpu.h
56
cpu.h
|
|
@ -342,7 +342,17 @@ inline int GetCacheLineSize()
|
||||||
// Hide from Doxygen
|
// Hide from Doxygen
|
||||||
#ifndef CRYPTOPP_DOXYGEN_PROCESSING
|
#ifndef CRYPTOPP_DOXYGEN_PROCESSING
|
||||||
extern bool g_ArmDetectionDone;
|
extern bool g_ArmDetectionDone;
|
||||||
extern bool g_hasARMv7, g_hasNEON, g_hasPMULL, g_hasCRC32, g_hasAES, g_hasSHA1, g_hasSHA2, g_hasSHA512, g_hasSHA3, g_hasSM3, g_hasSM4;
|
extern bool g_hasARMv7;
|
||||||
|
extern bool g_hasNEON;
|
||||||
|
extern bool g_hasPMULL;
|
||||||
|
extern bool g_hasCRC32;
|
||||||
|
extern bool g_hasAES;
|
||||||
|
extern bool g_hasSHA1;
|
||||||
|
extern bool g_hasSHA2;
|
||||||
|
extern bool g_hasSHA512;
|
||||||
|
extern bool g_hasSHA3;
|
||||||
|
extern bool g_hasSM3;
|
||||||
|
extern bool g_hasSM4;
|
||||||
void CRYPTOPP_API DetectArmFeatures();
|
void CRYPTOPP_API DetectArmFeatures();
|
||||||
#endif // CRYPTOPP_DOXYGEN_PROCESSING
|
#endif // CRYPTOPP_DOXYGEN_PROCESSING
|
||||||
|
|
||||||
|
|
@ -578,7 +588,13 @@ inline bool HasSM4()
|
||||||
// Hide from Doxygen
|
// Hide from Doxygen
|
||||||
#ifndef CRYPTOPP_DOXYGEN_PROCESSING
|
#ifndef CRYPTOPP_DOXYGEN_PROCESSING
|
||||||
extern bool g_PowerpcDetectionDone;
|
extern bool g_PowerpcDetectionDone;
|
||||||
extern bool g_hasAltivec, g_hasPower7, g_hasPower8, g_hasAES, g_hasSHA256, g_hasSHA512;
|
extern bool g_hasAltivec;
|
||||||
|
extern bool g_hasPower7;
|
||||||
|
extern bool g_hasPower8;
|
||||||
|
extern bool g_hasAES;
|
||||||
|
extern bool g_hasPMULL;
|
||||||
|
extern bool g_hasSHA256;
|
||||||
|
extern bool g_hasSHA512;
|
||||||
extern word32 g_cacheLineSize;
|
extern word32 g_cacheLineSize;
|
||||||
void CRYPTOPP_API DetectPowerpcFeatures();
|
void CRYPTOPP_API DetectPowerpcFeatures();
|
||||||
#endif // CRYPTOPP_DOXYGEN_PROCESSING
|
#endif // CRYPTOPP_DOXYGEN_PROCESSING
|
||||||
|
|
@ -590,11 +606,11 @@ void CRYPTOPP_API DetectPowerpcFeatures();
|
||||||
/// \returns true if the hardware is capable of Altivec at runtime, false otherwise.
|
/// \returns true if the hardware is capable of Altivec at runtime, false otherwise.
|
||||||
/// \details Altivec instructions are available under most modern PowerPCs.
|
/// \details Altivec instructions are available under most modern PowerPCs.
|
||||||
/// \details Runtime support requires compile time support. When compiling with GCC, you may
|
/// \details Runtime support requires compile time support. When compiling with GCC, you may
|
||||||
/// need to compile with <tt>-mcpu=power7</tt>; while IBM XL C/C++ compilers require
|
/// need to compile with <tt>-mcpu=power4</tt>; while IBM XL C/C++ compilers require
|
||||||
/// <tt>-qarch=pwr7 -qaltivec</tt>. Also see PowerPC's <tt>_ALTIVEC_</tt> preprocessor macro.
|
/// <tt>-qarch=pwr6 -qaltivec</tt>. Also see PowerPC's <tt>_ALTIVEC_</tt> preprocessor macro.
|
||||||
/// \details Atilvec was first available on Power4 platforms. However Crypto++ releies on unaligned
|
/// \details Atilvec was first available on Power4 platforms. However Crypto++ releies heavily
|
||||||
/// loads and stores which is a Power7 feature. If the platform lacks Power7 extensions, then the
|
/// on unaligned loads and stores which is a Power7 feature. If the platform lacks Power7
|
||||||
/// GNUmakefile sets <tt>-DCRYPTOPP_DISABLE_ALTIVEC</tt>.
|
/// extensions, then the GNUmakefile sets <tt>-DCRYPTOPP_DISABLE_POWER7</tt>.
|
||||||
/// \note This function is only available on PowerPC and PowerPC-64 platforms
|
/// \note This function is only available on PowerPC and PowerPC-64 platforms
|
||||||
inline bool HasAltivec()
|
inline bool HasAltivec()
|
||||||
{
|
{
|
||||||
|
|
@ -609,9 +625,9 @@ inline bool HasAltivec()
|
||||||
/// \details Runtime support requires compile time support. When compiling with GCC, you may
|
/// \details Runtime support requires compile time support. When compiling with GCC, you may
|
||||||
/// need to compile with <tt>-mcpu=power8</tt>; while IBM XL C/C++ compilers require
|
/// need to compile with <tt>-mcpu=power8</tt>; while IBM XL C/C++ compilers require
|
||||||
/// <tt>-qarch=pwr8 -qaltivec</tt>. Also see PowerPC's <tt>_ALTIVEC_</tt> preprocessor macro.
|
/// <tt>-qarch=pwr8 -qaltivec</tt>. Also see PowerPC's <tt>_ALTIVEC_</tt> preprocessor macro.
|
||||||
/// \details Atilvec was first available on Power4 platforms. However Crypto++ releies on unaligned
|
/// \details Atilvec was first available on Power4 platforms. However Crypto++ releies heavily
|
||||||
/// loads and stores which is a Power7 feature. If the platform lacks Power7 extensions, then the
|
/// on unaligned loads and stores which is a Power7 feature. If the platform lacks Power7
|
||||||
/// GNUmakefile sets <tt>-DCRYPTOPP_DISABLE_ALTIVEC</tt>.
|
/// extensions, then the GNUmakefile sets <tt>-DCRYPTOPP_DISABLE_POWER7</tt>.
|
||||||
/// \note This function is only available on PowerPC and PowerPC-64 platforms
|
/// \note This function is only available on PowerPC and PowerPC-64 platforms
|
||||||
inline bool HasPower7()
|
inline bool HasPower7()
|
||||||
{
|
{
|
||||||
|
|
@ -626,9 +642,9 @@ inline bool HasPower7()
|
||||||
/// \details Runtime support requires compile time support. When compiling with GCC, you may
|
/// \details Runtime support requires compile time support. When compiling with GCC, you may
|
||||||
/// need to compile with <tt>-mcpu=power8</tt>; while IBM XL C/C++ compilers require
|
/// need to compile with <tt>-mcpu=power8</tt>; while IBM XL C/C++ compilers require
|
||||||
/// <tt>-qarch=pwr8 -qaltivec</tt>. Also see PowerPC's <tt>_ALTIVEC_</tt> preprocessor macro.
|
/// <tt>-qarch=pwr8 -qaltivec</tt>. Also see PowerPC's <tt>_ALTIVEC_</tt> preprocessor macro.
|
||||||
/// \details Atilvec was first available on Power4 platforms. However Crypto++ releies on unaligned
|
/// \details Atilvec was first available on Power4 platforms. However Crypto++ releies heavily
|
||||||
/// loads and stores which is a Power7 feature. If the platform lacks Power7 extensions, then the
|
/// on unaligned loads and stores which is a Power7 feature. If the platform lacks Power7
|
||||||
/// GNUmakefile sets <tt>-DCRYPTOPP_DISABLE_ALTIVEC</tt>.
|
/// extensions, then the GNUmakefile sets <tt>-DCRYPTOPP_DISABLE_POWER7</tt>.
|
||||||
/// \note This function is only available on PowerPC and PowerPC-64 platforms
|
/// \note This function is only available on PowerPC and PowerPC-64 platforms
|
||||||
inline bool HasPower8()
|
inline bool HasPower8()
|
||||||
{
|
{
|
||||||
|
|
@ -651,6 +667,20 @@ inline bool HasAES()
|
||||||
return g_hasAES;
|
return g_hasAES;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// \brief Determine if a PowerPC processor has Polynomial Multiply available
|
||||||
|
/// \returns true if the hardware is capable of PMULL at runtime, false otherwise.
|
||||||
|
/// \details PMULL is part of the in-crypto extensions on Power8 and Power9.
|
||||||
|
/// \details Runtime support requires compile time support. When compiling with GCC, you may
|
||||||
|
/// need to compile with <tt>-mcpu=power8</tt>; while IBM XL C/C++ compilers require
|
||||||
|
/// <tt>-qarch=pwr8 -qaltivec</tt>. Also see PowerPC's <tt>__CRYPTO</tt> preprocessor macro.
|
||||||
|
/// \note This function is only available on PowerPC and PowerPC-64 platforms
|
||||||
|
inline bool HasPMULL()
|
||||||
|
{
|
||||||
|
if (!g_PowerpcDetectionDone)
|
||||||
|
DetectPowerpcFeatures();
|
||||||
|
return g_hasPMULL;
|
||||||
|
}
|
||||||
|
|
||||||
/// \brief Determine if a PowerPC processor has SHA256 available
|
/// \brief Determine if a PowerPC processor has SHA256 available
|
||||||
/// \returns true if the hardware is capable of SHA256 at runtime, false otherwise.
|
/// \returns true if the hardware is capable of SHA256 at runtime, false otherwise.
|
||||||
/// \details SHA is part of the in-crypto extensions on Power8 and Power9.
|
/// \details SHA is part of the in-crypto extensions on Power8 and Power9.
|
||||||
|
|
|
||||||
115
gcm-simd.cpp
115
gcm-simd.cpp
|
|
@ -39,6 +39,10 @@
|
||||||
# include <arm_acle.h>
|
# include <arm_acle.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(CRYPTOPP_POWER8_PMULL_AVAILABLE)
|
||||||
|
# include "ppc-simd.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
||||||
# include <signal.h>
|
# include <signal.h>
|
||||||
# include <setjmp.h>
|
# include <setjmp.h>
|
||||||
|
|
@ -61,6 +65,8 @@ extern const char GCM_SIMD_FNAME[] = __FILE__;
|
||||||
|
|
||||||
ANONYMOUS_NAMESPACE_BEGIN
|
ANONYMOUS_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
// ************************* Miscellaneous ************************* //
|
||||||
|
|
||||||
// GCC 4.8 is missing PMULL gear
|
// GCC 4.8 is missing PMULL gear
|
||||||
#if (CRYPTOPP_ARM_PMULL_AVAILABLE)
|
#if (CRYPTOPP_ARM_PMULL_AVAILABLE)
|
||||||
# if (CRYPTOPP_GCC_VERSION >= 40800) && (CRYPTOPP_GCC_VERSION < 49000)
|
# if (CRYPTOPP_GCC_VERSION >= 40800) && (CRYPTOPP_GCC_VERSION < 49000)
|
||||||
|
|
@ -182,10 +188,45 @@ inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
|
||||||
#endif // Microsoft and compatibles
|
#endif // Microsoft and compatibles
|
||||||
#endif // CRYPTOPP_ARM_PMULL_AVAILABLE
|
#endif // CRYPTOPP_ARM_PMULL_AVAILABLE
|
||||||
|
|
||||||
|
#if CRYPTOPP_POWER8_PMULL_AVAILABLE
|
||||||
|
using CryptoPP::uint8x16_p;
|
||||||
|
using CryptoPP::uint64x2_p;
|
||||||
|
using CryptoPP::VectorXor;
|
||||||
|
using CryptoPP::VectorShiftLeft;
|
||||||
|
using CryptoPP::VectorShiftRight;
|
||||||
|
|
||||||
|
inline uint64x2_p VMULL_P64(uint64x2_p a, uint64x2_p b)
|
||||||
|
{
|
||||||
|
// Multiplies low dwords
|
||||||
|
#if defined(__xlc__) || defined(__xlC__)
|
||||||
|
return __vpmsumd (a, b);
|
||||||
|
#else
|
||||||
|
return __builtin_crypto_vpmsumd (a, b);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint64x2_p VMULL_HIGH_P64(uint64x2_p a, uint64x2_p b)
|
||||||
|
{
|
||||||
|
#if defined(__xlc__) || defined(__xlC__)
|
||||||
|
const uint64x2_p z = VectorXor(a, a);
|
||||||
|
const uint64x2_p s = VectorShiftRight<8>(a, z);
|
||||||
|
const uint64x2_p t = VectorShiftRight<8>(b, z);
|
||||||
|
return __vpmsumd (s, t);
|
||||||
|
#else
|
||||||
|
const uint64x2_p z = VectorXor(a, a);
|
||||||
|
const uint64x2_p s = VectorShiftRight<8>(a, z);
|
||||||
|
const uint64x2_p t = VectorShiftRight<8>(b, z);
|
||||||
|
return __builtin_crypto_vpmsumd (s, t);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif // CRYPTOPP_POWER8_PMULL_AVAILABLE
|
||||||
|
|
||||||
ANONYMOUS_NAMESPACE_END
|
ANONYMOUS_NAMESPACE_END
|
||||||
|
|
||||||
NAMESPACE_BEGIN(CryptoPP)
|
NAMESPACE_BEGIN(CryptoPP)
|
||||||
|
|
||||||
|
// ************************* Feature Probes ************************* //
|
||||||
|
|
||||||
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
||||||
extern "C" {
|
extern "C" {
|
||||||
typedef void (*SigHandler)(int);
|
typedef void (*SigHandler)(int);
|
||||||
|
|
@ -209,8 +250,10 @@ bool CPU_ProbePMULL()
|
||||||
__try
|
__try
|
||||||
{
|
{
|
||||||
const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
|
const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
|
||||||
const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
|
const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,
|
||||||
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
|
||||||
|
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,
|
||||||
|
0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
||||||
|
|
||||||
const poly128_t r1 = vmull_p64(a1, b1);
|
const poly128_t r1 = vmull_p64(a1, b1);
|
||||||
const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2));
|
const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2));
|
||||||
|
|
@ -219,8 +262,10 @@ bool CPU_ProbePMULL()
|
||||||
const uint64x2_t t1 = (uint64x2_t)(r1); // {bignum,bignum}
|
const uint64x2_t t1 = (uint64x2_t)(r1); // {bignum,bignum}
|
||||||
const uint64x2_t t2 = (uint64x2_t)(r2); // {bignum,bignum}
|
const uint64x2_t t2 = (uint64x2_t)(r2); // {bignum,bignum}
|
||||||
|
|
||||||
result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
|
result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 &&
|
||||||
vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
|
vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
|
||||||
|
vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 &&
|
||||||
|
vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
|
||||||
}
|
}
|
||||||
__except (EXCEPTION_EXECUTE_HANDLER)
|
__except (EXCEPTION_EXECUTE_HANDLER)
|
||||||
{
|
{
|
||||||
|
|
@ -246,8 +291,10 @@ bool CPU_ProbePMULL()
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
|
const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
|
||||||
const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
|
const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,
|
||||||
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
|
||||||
|
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,
|
||||||
|
0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
||||||
|
|
||||||
const poly128_t r1 = VMULL_P64(a1, b1);
|
const poly128_t r1 = VMULL_P64(a1, b1);
|
||||||
const poly128_t r2 = VMULL_HIGH_P64((poly64x2_t)(a2), (poly64x2_t)(b2));
|
const poly128_t r2 = VMULL_HIGH_P64((poly64x2_t)(a2), (poly64x2_t)(b2));
|
||||||
|
|
@ -256,8 +303,10 @@ bool CPU_ProbePMULL()
|
||||||
const uint64x2_t t1 = (uint64x2_t)(r1); // {bignum,bignum}
|
const uint64x2_t t1 = (uint64x2_t)(r1); // {bignum,bignum}
|
||||||
const uint64x2_t t2 = (uint64x2_t)(r2); // {bignum,bignum}
|
const uint64x2_t t2 = (uint64x2_t)(r2); // {bignum,bignum}
|
||||||
|
|
||||||
result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
|
result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 &&
|
||||||
vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
|
vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
|
||||||
|
vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 &&
|
||||||
|
vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
|
||||||
}
|
}
|
||||||
|
|
||||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
||||||
|
|
@ -270,6 +319,54 @@ bool CPU_ProbePMULL()
|
||||||
}
|
}
|
||||||
#endif // ARM32 or ARM64
|
#endif // ARM32 or ARM64
|
||||||
|
|
||||||
|
#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64)
|
||||||
|
bool CPU_ProbePMULL()
|
||||||
|
{
|
||||||
|
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
||||||
|
return false;
|
||||||
|
#elif (CRYPTOPP_POWER8_PMULL_AVAILABLE)
|
||||||
|
// longjmp and clobber warnings. Volatile is required.
|
||||||
|
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||||
|
volatile bool result = true;
|
||||||
|
|
||||||
|
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
|
||||||
|
if (oldHandler == SIG_ERR)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
volatile sigset_t oldMask;
|
||||||
|
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (setjmp(s_jmpSIGILL))
|
||||||
|
result = false;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const uint64x2_p a1={0x9090909090909090ull}, b1={0xb0b0b0b0b0b0b0b0ull};
|
||||||
|
const uint8x16_p a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,
|
||||||
|
0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
|
||||||
|
b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,
|
||||||
|
0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
|
||||||
|
|
||||||
|
const uint64x2_p r1 = VMULL_P64(a1, b1);
|
||||||
|
const uint64x2_p r2 = VMULL_HIGH_P64((uint64x2_p)(a2), (uint64x2_p)(b2));
|
||||||
|
|
||||||
|
word64 w1[2], w2[2];
|
||||||
|
VectorStore(r1, (byte*)w1); VectorStore(r2, (byte*)w2);
|
||||||
|
result = !!(w1[0] == 0x5300530053005300ull && w1[1] == 0x5300530053005300ull &&
|
||||||
|
w2[0] == 0x6c006c006c006c00ull && w2[1] == 0x6c006c006c006c00ull);
|
||||||
|
}
|
||||||
|
|
||||||
|
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
||||||
|
signal(SIGILL, oldHandler);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif // CRYPTOPP_POWER8_PMULL_AVAILABLE
|
||||||
|
}
|
||||||
|
#endif // PPC32 or PPC64
|
||||||
|
|
||||||
|
// *************************** ARM NEON *************************** //
|
||||||
|
|
||||||
#if CRYPTOPP_ARM_NEON_AVAILABLE
|
#if CRYPTOPP_ARM_NEON_AVAILABLE
|
||||||
void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c)
|
void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c)
|
||||||
{
|
{
|
||||||
|
|
@ -413,6 +510,8 @@ void GCM_ReverseHashBufferIfNeeded_PMULL(byte *hashBuffer)
|
||||||
}
|
}
|
||||||
#endif // CRYPTOPP_ARM_PMULL_AVAILABLE
|
#endif // CRYPTOPP_ARM_PMULL_AVAILABLE
|
||||||
|
|
||||||
|
// ***************************** SSE ***************************** //
|
||||||
|
|
||||||
#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
|
#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
|
||||||
// SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in
|
// SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in
|
||||||
// a source file with a SSE architecture switch. Also see GH #226 and GH #284.
|
// a source file with a SSE architecture switch. Also see GH #226 and GH #284.
|
||||||
|
|
|
||||||
|
|
@ -374,14 +374,16 @@ bool TestSettings()
|
||||||
const bool hasAltivec = HasAltivec();
|
const bool hasAltivec = HasAltivec();
|
||||||
const bool hasPower7 = HasPower7();
|
const bool hasPower7 = HasPower7();
|
||||||
const bool hasPower8 = HasPower8();
|
const bool hasPower8 = HasPower8();
|
||||||
|
const bool hasPMULL = HasPMULL();
|
||||||
const bool hasAES = HasAES();
|
const bool hasAES = HasAES();
|
||||||
const bool hasSHA256 = HasSHA256();
|
const bool hasSHA256 = HasSHA256();
|
||||||
const bool hasSHA512 = HasSHA512();
|
const bool hasSHA512 = HasSHA512();
|
||||||
|
|
||||||
std::cout << "passed: ";
|
std::cout << "passed: ";
|
||||||
std::cout << "hasAltivec == " << hasAltivec << ", hasPower7 == " << hasPower7;
|
std::cout << "hasAltivec == " << hasAltivec << ", hasPower7 == " << hasPower7;
|
||||||
std::cout << ", hasPower8 == " << hasPower8 << ", hasAES == " << hasAES;
|
std::cout << ", hasPower8 == " << hasPower8 << ", hasPMULL == " << hasPMULL;
|
||||||
std::cout << ", hasSHA256 == " << hasSHA256 << ", hasSHA512 == " << hasSHA512 << "\n";
|
std::cout << ", hasAES == " << hasAES << ", hasSHA256 == " << hasSHA256;
|
||||||
|
std::cout << ", hasSHA512 == " << hasSHA512 << "\n";
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue