Rework Makefile and ppc-simd.h for XLC and LLVM front-end changes
parent
da00422d3c
commit
1dd0e321a6
138
GNUmakefile
138
GNUmakefile
|
|
@ -101,6 +101,16 @@ ifeq ($(wildcard adhoc.cpp),)
|
||||||
$(shell cp adhoc.cpp.proto adhoc.cpp)
|
$(shell cp adhoc.cpp.proto adhoc.cpp)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
# Fixup AIX
|
||||||
|
ifeq ($(IS_AIX),1)
|
||||||
|
BITNESS=$(shell getconf KERNEL_BITMODE)
|
||||||
|
ifeq ($(BITNESS),64)
|
||||||
|
IS_PPC64=1
|
||||||
|
else
|
||||||
|
IS_PPC32=1
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
###########################################################
|
###########################################################
|
||||||
##### General Variables #####
|
##### General Variables #####
|
||||||
###########################################################
|
###########################################################
|
||||||
|
|
@ -400,63 +410,93 @@ ifeq ($(IS_ARMV8),1)
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# PowerPC and PowerPC-64. Altivec is available with Power4
|
# PowerPC and PowerPC-64. Altivec is available with Power4.
|
||||||
ifneq ($(IS_PPC32)$(IS_PPC64)$(IS_AIX),000)
|
# The tests below are crafted for IBM XLC and the LLVM front-end.
|
||||||
|
# XLC/LLVM only supplies POWER8. So we set the flags for XLC/LLVM
|
||||||
|
# and lower it if POWER7 or ALTIVEC is available. I've got a
|
||||||
|
# feeling LLVM is going to cause a lot of troubles.
|
||||||
|
ifneq ($(IS_PPC32)$(IS_PPC64),00)
|
||||||
|
HAVE_POWER8 = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -mcpu=power8 -maltivec -dM -E adhoc.cpp 2>&1 | $(GREP) -i -c -E '_ARCH_PWR8|_ARCH_PWR9|__CRYPTO')
|
||||||
|
ifneq ($(HAVE_POWER8),0)
|
||||||
|
POWER8_FLAG = -mcpu=power8 -maltivec
|
||||||
|
AES_FLAG = $(POWER8_FLAG)
|
||||||
|
GCM_FLAG = $(POWER8_FLAG)
|
||||||
|
SHA_FLAG = $(POWER8_FLAG)
|
||||||
|
SM4_FLAG = $(POWER8_FLAG)
|
||||||
|
endif
|
||||||
|
|
||||||
|
# GCC and some compatibles
|
||||||
|
HAVE_POWER7 = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -mcpu=power7 -maltivec -dM -E adhoc.cpp 2>&1 | $(GREP) -i -c '_ARCH_PWR7')
|
||||||
|
ifneq ($(HAVE_POWER7),0)
|
||||||
|
POWER7_FLAG = -mcpu=power7 -maltivec
|
||||||
|
ARIA_FLAG = $(POWER7_FLAG)
|
||||||
|
BLAKE2_FLAG = $(POWER7_FLAG)
|
||||||
|
CHAM_FLAG = $(POWER7_FLAG)
|
||||||
|
LEA_FLAG = $(POWER7_FLAG)
|
||||||
|
SIMON_FLAG = $(POWER7_FLAG)
|
||||||
|
SPECK_FLAG = $(POWER7_FLAG)
|
||||||
|
SIMECK_FLAG = $(POWER7_FLAG)
|
||||||
|
endif
|
||||||
|
|
||||||
# GCC and some compatibles
|
# GCC and some compatibles
|
||||||
HAVE_ALTIVEC = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -mcpu=power4 -maltivec -dM -E adhoc.cpp 2>&1 | $(GREP) -i -c '__ALTIVEC__')
|
HAVE_ALTIVEC = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -mcpu=power4 -maltivec -dM -E adhoc.cpp 2>&1 | $(GREP) -i -c '__ALTIVEC__')
|
||||||
ifneq ($(HAVE_ALTIVEC),0)
|
ifneq ($(HAVE_ALTIVEC),0)
|
||||||
ALTIVEC_FLAG = -mcpu=power4 -maltivec
|
ALTIVEC_FLAG = -mcpu=power4 -maltivec
|
||||||
ARIA_FLAG = -mcpu=power4 -maltivec
|
|
||||||
BLAKE2_FLAG = -mcpu=power4 -maltivec
|
|
||||||
CHAM_FLAG = -mcpu=power4 -maltivec
|
|
||||||
LEA_FLAG = -mcpu=power4 -maltivec
|
|
||||||
SIMON_FLAG = -mcpu=power4 -maltivec
|
|
||||||
SPECK_FLAG = -mcpu=power4 -maltivec
|
|
||||||
SIMECK_FLAG = -mcpu=power4 -maltivec
|
|
||||||
SM4_FLAG = -mcpu=power7 -maltivec
|
|
||||||
endif
|
|
||||||
# GCC and some compatibles
|
|
||||||
HAVE_CRYPTO = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -mcpu=power8 -maltivec -dM -E adhoc.cpp 2>&1 | $(GREP) -i -c -E '_ARCH_PWR8|_ARCH_PWR9|__CRYPTO')
|
|
||||||
ifneq ($(HAVE_CRYPTO),0)
|
|
||||||
ALTIVEC_FLAG = -mcpu=power8 -maltivec
|
|
||||||
AES_FLAG = -mcpu=power8 -maltivec
|
|
||||||
GCM_FLAG = -mcpu=power8 -maltivec
|
|
||||||
SHA_FLAG = -mcpu=power8 -maltivec
|
|
||||||
CHAM_FLAG = -mcpu=power8 -maltivec
|
|
||||||
LEA_FLAG = -mcpu=power8 -maltivec
|
|
||||||
SIMON_FLAG = -mcpu=power8 -maltivec
|
|
||||||
SPECK_FLAG = -mcpu=power8 -maltivec
|
|
||||||
SIMECK_FLAG = -mcpu=power8 -maltivec
|
|
||||||
SM4_FLAG = -mcpu=power8 -maltivec
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# IBM XL C/C++
|
# IBM XL C/C++
|
||||||
HAVE_ALTIVEC = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr7 -qaltivec -E adhoc.cpp 2>&1 | $(GREP) -i -c '__ALTIVEC__')
|
HAVE_POWER8 = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr8 -qaltivec -E adhoc.cpp 2>&1 | $(GREP) -i -c -E '_ARCH_PWR8|_ARCH_PWR9|__CRYPTO')
|
||||||
|
ifneq ($(HAVE_POWER8),0)
|
||||||
|
POWER8_FLAG = -qarch=pwr8 -qaltivec
|
||||||
|
AES_FLAG = $(POWER8_FLAG)
|
||||||
|
GCM_FLAG = $(POWER8_FLAG)
|
||||||
|
SHA_FLAG = $(POWER8_FLAG)
|
||||||
|
SM4_FLAG = $(POWER8_FLAG)
|
||||||
|
endif
|
||||||
|
|
||||||
|
# IBM XL C/C++
|
||||||
|
HAVE_POWER7 = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr7 -qaltivec -E adhoc.cpp 2>&1 | $(GREP) -i -c -E '_ARCH_PWR7')
|
||||||
|
ifneq ($(HAVE_POWER7),0)
|
||||||
|
POWER7_FLAG = -qarch=pwr7 -qaltivec
|
||||||
|
ARIA_FLAG = $(POWER7_FLAG)
|
||||||
|
BLAKE2_FLAG = $(POWER7_FLAG)
|
||||||
|
CHAM_FLAG = $(POWER7_FLAG)
|
||||||
|
LEA_FLAG = $(POWER7_FLAG)
|
||||||
|
SIMECK_FLAG = $(POWER7_FLAG)
|
||||||
|
SIMON_FLAG = $(POWER7_FLAG)
|
||||||
|
SPECK_FLAG = $(POWER7_FLAG)
|
||||||
|
endif
|
||||||
|
|
||||||
|
# IBM XL C/C++
|
||||||
|
HAVE_ALTIVEC = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr6 -qaltivec -E adhoc.cpp 2>&1 | $(GREP) -i -c '__ALTIVEC__')
|
||||||
ifneq ($(HAVE_ALTIVEC),0)
|
ifneq ($(HAVE_ALTIVEC),0)
|
||||||
ALTIVEC_FLAG = -qarch=pwr7 -qaltivec
|
ALTIVEC_FLAG = -qarch=pwr6 -qaltivec
|
||||||
ARIA_FLAG = -qarch=pwr7 -qaltivec
|
|
||||||
BLAKE2_FLAG = -qarch=pwr7 -qaltivec
|
|
||||||
CHAM_FLAG = -qarch=pwr7 -qaltivec
|
|
||||||
LEA_FLAG = -qarch=pwr7 -qaltivec
|
|
||||||
SIMECK_FLAG = -qarch=pwr7 -qaltivec
|
|
||||||
SIMON_FLAG = -qarch=pwr7 -qaltivec
|
|
||||||
SPECK_FLAG = -qarch=pwr7 -qaltivec
|
|
||||||
SM4_FLAG = -qarch=pwr7 -qaltivec
|
|
||||||
endif
|
endif
|
||||||
# IBM XL C/C++
|
|
||||||
HAVE_CRYPTO = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr8 -qaltivec -E adhoc.cpp 2>&1 | $(GREP) -i -c -E '_ARCH_PWR8|_ARCH_PWR9|__CRYPTO')
|
# LLVM front-ends only provide Power8. It really jambs us up
|
||||||
ifneq ($(HAVE_CRYPTO),0)
|
# for ppc-simd.cpp which needs ALTIVEC/POWER4. We have similar
|
||||||
ALTIVEC_FLAG = -qarch=pwr8 -qaltivec
|
# problems {lea|cham|simon|speck|...}-simd.cpp and POWER7.
|
||||||
AES_FLAG = -qarch=pwr8 -qaltivec
|
HAVE_LLVM = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -E adhoc.cpp 2>&1 | $(GREP) -i -c '__llvm__')
|
||||||
GCM_FLAG = -qarch=pwr8 -qaltivec
|
ifneq ($(HAVE_LLVM),0)
|
||||||
SHA_FLAG = -qarch=pwr8 -qaltivec
|
POWER7_FLAG = $(POWER8_FLAG)
|
||||||
ARIA_FLAG = -qarch=pwr8 -qaltivec
|
ARIA_FLAG = $(POWER8_FLAG)
|
||||||
BLAKE2_FLAG = -qarch=pwr8 -qaltivec
|
BLAKE2_FLAG = $(POWER8_FLAG)
|
||||||
CHAM_FLAG = -qarch=pwr8 -qaltivec
|
CHAM_FLAG = $(POWER8_FLAG)
|
||||||
LEA_FLAG = -qarch=pwr8 -qaltivec
|
LEA_FLAG = $(POWER8_FLAG)
|
||||||
SIMECK_FLAG = -qarch=pwr8 -qaltivec
|
SIMECK_FLAG = $(POWER8_FLAG)
|
||||||
SIMON_FLAG = -qarch=pwr8 -qaltivec
|
SIMON_FLAG = $(POWER8_FLAG)
|
||||||
SPECK_FLAG = -qarch=pwr8 -qaltivec
|
SPECK_FLAG = $(POWER8_FLAG)
|
||||||
SM4_FLAG = -qarch=pwr8 -qaltivec
|
ALTIVEC_FLAG = $(POWER8_FLAG)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(ALTIVEC_FLAG),)
|
||||||
|
CXXFLAGS += -DCRYPTOPP_DISABLE_ALTIVEC
|
||||||
|
endif
|
||||||
|
ifeq ($(POWER7_FLAG),)
|
||||||
|
CXXFLAGS += -DCRYPTOPP_DISABLE_POWER7
|
||||||
|
endif
|
||||||
|
ifeq ($(POWER8_FLAG),)
|
||||||
|
CXXFLAGS += -DCRYPTOPP_DISABLE_POWER8
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
|
||||||
244
ppc-simd.cpp
244
ppc-simd.cpp
|
|
@ -6,14 +6,6 @@
|
||||||
// is needed because additional CXXFLAGS are required to enable the
|
// is needed because additional CXXFLAGS are required to enable the
|
||||||
// appropriate instructions sets in some build configurations.
|
// appropriate instructions sets in some build configurations.
|
||||||
|
|
||||||
// TODO: Bob Wilkinson reported we are misdetecting CRYPTOPP_POWER8_AVAILABLE.
|
|
||||||
// The problem is, the updated compiler supports them but the down-level
|
|
||||||
// assembler and linker do not. We will probably need to fix it through
|
|
||||||
// the makefile, similar to the way x86 AES and SHA are handled. For the time
|
|
||||||
// being CRYPTOPP_DISABLE_POWER8 will have to be applied manually. Another
|
|
||||||
// twist is, we don't have access to a test machine and it must be fixed
|
|
||||||
// for two compilers (IBM XL C/C++ and GCC). Ugh...
|
|
||||||
|
|
||||||
#include "pch.h"
|
#include "pch.h"
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include "stdcpp.h"
|
#include "stdcpp.h"
|
||||||
|
|
@ -53,7 +45,7 @@ bool CPU_ProbeAltivec()
|
||||||
{
|
{
|
||||||
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
||||||
return false;
|
return false;
|
||||||
#elif (CRYPTOPP_ALTIVEC_AVAILABLE) || (CRYPTOPP_POWER7_AVAILABLE) || (CRYPTOPP_POWER8_AVAILABLE)
|
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||||
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
|
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
|
||||||
|
|
||||||
// longjmp and clobber warnings. Volatile is required.
|
// longjmp and clobber warnings. Volatile is required.
|
||||||
|
|
@ -96,239 +88,5 @@ bool CPU_ProbeAltivec()
|
||||||
#endif // CRYPTOPP_ALTIVEC_AVAILABLE
|
#endif // CRYPTOPP_ALTIVEC_AVAILABLE
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CPU_ProbePower7()
|
|
||||||
{
|
|
||||||
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
|
||||||
return false;
|
|
||||||
#elif (CRYPTOPP_POWER7_AVAILABLE) || (CRYPTOPP_POWER8_AVAILABLE)
|
|
||||||
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
|
|
||||||
|
|
||||||
// longjmp and clobber warnings. Volatile is required.
|
|
||||||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
|
||||||
volatile int result = false;
|
|
||||||
|
|
||||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
|
|
||||||
if (oldHandler == SIG_ERR)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
volatile sigset_t oldMask;
|
|
||||||
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (setjmp(s_jmpSIGILL))
|
|
||||||
result = false;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// POWER7 added unaligned loads and store operations
|
|
||||||
byte b1[19] = {255, 255, 255, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, b2[17];
|
|
||||||
|
|
||||||
// Specifically call the VSX loads and stores
|
|
||||||
#if defined(__xlc__) || defined(__xlC__)
|
|
||||||
vec_xst(vec_xl(0, b1+3), 0, b2+1);
|
|
||||||
#else
|
|
||||||
vec_vsx_st(vec_vsx_ld(0, b1+3), 0, b2+1);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
result = (0 == std::memcmp(b1+3, b2+1, 16));
|
|
||||||
}
|
|
||||||
|
|
||||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
|
||||||
signal(SIGILL, oldHandler);
|
|
||||||
return result;
|
|
||||||
# endif
|
|
||||||
#else
|
|
||||||
return false;
|
|
||||||
#endif // CRYPTOPP_POWER7_AVAILABLE
|
|
||||||
}
|
|
||||||
|
|
||||||
bool CPU_ProbePower8()
|
|
||||||
{
|
|
||||||
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
|
||||||
return false;
|
|
||||||
#elif (CRYPTOPP_POWER8_AVAILABLE)
|
|
||||||
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
|
|
||||||
|
|
||||||
// longjmp and clobber warnings. Volatile is required.
|
|
||||||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
|
||||||
volatile int result = true;
|
|
||||||
|
|
||||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
|
|
||||||
if (oldHandler == SIG_ERR)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
volatile sigset_t oldMask;
|
|
||||||
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (setjmp(s_jmpSIGILL))
|
|
||||||
result = false;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// POWER8 added 64-bit SIMD operations
|
|
||||||
const word64 x = W64LIT(0xffffffffffffffff);
|
|
||||||
word64 w1[2] = {x, x}, w2[2] = {4, 6}, w3[2];
|
|
||||||
|
|
||||||
// Specifically call the VSX loads and stores
|
|
||||||
#if defined(__xlc__) || defined(__xlC__)
|
|
||||||
const uint64x2_p v1 = (uint64x2_p)vec_xl(0, (byte*)w1);
|
|
||||||
const uint64x2_p v2 = (uint64x2_p)vec_xl(0, (byte*)w2);
|
|
||||||
const uint64x2_p v3 = vec_add(v1, v2); // 64-bit add
|
|
||||||
vec_xst((uint8x16_p)v3, 0, (byte*)w3);
|
|
||||||
#else
|
|
||||||
const uint64x2_p v1 = (uint64x2_p)vec_vsx_ld(0, (byte*)w1);
|
|
||||||
const uint64x2_p v2 = (uint64x2_p)vec_vsx_ld(0, (byte*)w2);
|
|
||||||
const uint64x2_p v3 = vec_add(v1, v2); // 64-bit add
|
|
||||||
vec_vsx_st((uint8x16_p)v3, 0, (byte*)w3);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Relies on integer wrap
|
|
||||||
result = (w3[0] == 3 && w3[1] == 5);
|
|
||||||
}
|
|
||||||
|
|
||||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
|
||||||
signal(SIGILL, oldHandler);
|
|
||||||
return result;
|
|
||||||
# endif
|
|
||||||
#else
|
|
||||||
return false;
|
|
||||||
#endif // CRYPTOPP_POWER8_AVAILABLE
|
|
||||||
}
|
|
||||||
|
|
||||||
bool CPU_ProbeAES()
|
|
||||||
{
|
|
||||||
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
|
||||||
return false;
|
|
||||||
#elif (CRYPTOPP_POWER8_AVAILABLE)
|
|
||||||
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
|
|
||||||
|
|
||||||
// longjmp and clobber warnings. Volatile is required.
|
|
||||||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
|
||||||
volatile int result = true;
|
|
||||||
|
|
||||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
|
|
||||||
if (oldHandler == SIG_ERR)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
volatile sigset_t oldMask;
|
|
||||||
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (setjmp(s_jmpSIGILL))
|
|
||||||
result = false;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
byte key[16] = {0xA0, 0xFA, 0xFE, 0x17, 0x88, 0x54, 0x2c, 0xb1,
|
|
||||||
0x23, 0xa3, 0x39, 0x39, 0x2a, 0x6c, 0x76, 0x05};
|
|
||||||
byte state[16] = {0x19, 0x3d, 0xe3, 0xb3, 0xa0, 0xf4, 0xe2, 0x2b,
|
|
||||||
0x9a, 0xc6, 0x8d, 0x2a, 0xe9, 0xf8, 0x48, 0x08};
|
|
||||||
byte r[16] = {255}, z[16] = {};
|
|
||||||
|
|
||||||
uint8x16_p k = (uint8x16_p)VectorLoad(0, key);
|
|
||||||
uint8x16_p s = (uint8x16_p)VectorLoad(0, state);
|
|
||||||
s = VectorEncrypt(s, k);
|
|
||||||
s = VectorEncryptLast(s, k);
|
|
||||||
s = VectorDecrypt(s, k);
|
|
||||||
s = VectorDecryptLast(s, k);
|
|
||||||
VectorStore(s, r);
|
|
||||||
|
|
||||||
result = (0 != std::memcmp(r, z, 16));
|
|
||||||
}
|
|
||||||
|
|
||||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
|
||||||
signal(SIGILL, oldHandler);
|
|
||||||
return result;
|
|
||||||
# endif
|
|
||||||
#else
|
|
||||||
return false;
|
|
||||||
#endif // CRYPTOPP_ALTIVEC_AVAILABLE
|
|
||||||
}
|
|
||||||
|
|
||||||
bool CPU_ProbeSHA256()
|
|
||||||
{
|
|
||||||
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
|
||||||
return false;
|
|
||||||
#elif (CRYPTOPP_POWER8_AVAILABLE)
|
|
||||||
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
|
|
||||||
|
|
||||||
// longjmp and clobber warnings. Volatile is required.
|
|
||||||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
|
||||||
volatile int result = false;
|
|
||||||
|
|
||||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
|
|
||||||
if (oldHandler == SIG_ERR)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
volatile sigset_t oldMask;
|
|
||||||
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (setjmp(s_jmpSIGILL))
|
|
||||||
result = false;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
byte r[16], z[16] = {0};
|
|
||||||
uint8x16_p x = ((uint8x16_p){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0});
|
|
||||||
|
|
||||||
x = VectorSHA256<0,0>(x);
|
|
||||||
x = VectorSHA256<0,1>(x);
|
|
||||||
x = VectorSHA256<1,0>(x);
|
|
||||||
x = VectorSHA256<1,1>(x);
|
|
||||||
VectorStore(x, r);
|
|
||||||
|
|
||||||
result = (0 == std::memcmp(r, z, 16));
|
|
||||||
}
|
|
||||||
|
|
||||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
|
||||||
signal(SIGILL, oldHandler);
|
|
||||||
return result;
|
|
||||||
# endif
|
|
||||||
#else
|
|
||||||
return false;
|
|
||||||
#endif // CRYPTOPP_ALTIVEC_AVAILABLE
|
|
||||||
}
|
|
||||||
|
|
||||||
bool CPU_ProbeSHA512()
|
|
||||||
{
|
|
||||||
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
|
||||||
return false;
|
|
||||||
#elif (CRYPTOPP_POWER8_AVAILABLE)
|
|
||||||
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
|
|
||||||
|
|
||||||
// longjmp and clobber warnings. Volatile is required.
|
|
||||||
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
|
||||||
volatile int result = false;
|
|
||||||
|
|
||||||
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
|
|
||||||
if (oldHandler == SIG_ERR)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
volatile sigset_t oldMask;
|
|
||||||
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (setjmp(s_jmpSIGILL))
|
|
||||||
result = false;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
byte r[16], z[16] = {0};
|
|
||||||
uint8x16_p x = ((uint8x16_p){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0});
|
|
||||||
|
|
||||||
x = VectorSHA512<0,0>(x);
|
|
||||||
x = VectorSHA512<0,1>(x);
|
|
||||||
x = VectorSHA512<1,0>(x);
|
|
||||||
x = VectorSHA512<1,1>(x);
|
|
||||||
VectorStore(x, r);
|
|
||||||
|
|
||||||
result = (0 == std::memcmp(r, z, 16));
|
|
||||||
}
|
|
||||||
|
|
||||||
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
|
||||||
signal(SIGILL, oldHandler);
|
|
||||||
return result;
|
|
||||||
# endif
|
|
||||||
#else
|
|
||||||
return false;
|
|
||||||
#endif // CRYPTOPP_POWER8_AVAILABLE
|
|
||||||
}
|
|
||||||
# endif // CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64
|
# endif // CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64
|
||||||
NAMESPACE_END
|
NAMESPACE_END
|
||||||
|
|
|
||||||
308
ppc-simd.h
308
ppc-simd.h
|
|
@ -19,6 +19,26 @@
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include "misc.h"
|
#include "misc.h"
|
||||||
|
|
||||||
|
// We are boxed into undefining macros like CRYPTOPP_POWER8_AVAILABLE.
|
||||||
|
// We set CRYPTOPP_POWER8_AVAILABLE based on compiler versions because
|
||||||
|
// we needed them for the SIMD and non-SIMD files. When the SIMD file is
|
||||||
|
// compiled it may only get -mcpu=power4 or -mcpu=power7, so the POWER7
|
||||||
|
// or POWER8 stuff is not actually available when this header is included.
|
||||||
|
#if !defined(__ALTIVEC__)
|
||||||
|
# undef CRYPTOPP_ALTIVEC_AVAILABLE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(_ARCH_PWR7)
|
||||||
|
# undef CRYPTOPP_POWER7_AVAILABLE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !(defined(_ARCH_PWR8) || defined(_ARCH_PWR9) || defined(_CRYPTO))
|
||||||
|
# undef CRYPTOPP_POWER8_AVAILABLE
|
||||||
|
# undef CRYPTOPP_POWER8_AES_AVAILABLE
|
||||||
|
# undef CRYPTOPP_POWER8_SHA_AVAILABLE
|
||||||
|
# undef CRYPTOPP_POWER8_PMULL_AVAILABLE
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
||||||
# include <altivec.h>
|
# include <altivec.h>
|
||||||
# undef vector
|
# undef vector
|
||||||
|
|
@ -28,84 +48,112 @@
|
||||||
|
|
||||||
NAMESPACE_BEGIN(CryptoPP)
|
NAMESPACE_BEGIN(CryptoPP)
|
||||||
|
|
||||||
|
// Datatypes
|
||||||
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
||||||
|
|
||||||
typedef __vector unsigned char uint8x16_p;
|
typedef __vector unsigned char uint8x16_p;
|
||||||
typedef __vector unsigned short uint16x8_p;
|
typedef __vector unsigned short uint16x8_p;
|
||||||
typedef __vector unsigned int uint32x4_p;
|
typedef __vector unsigned int uint32x4_p;
|
||||||
|
#if defined(CRYPTOPP_POWER8_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
||||||
#if defined(CRYPTOPP_POWER8_AVAILABLE)
|
|
||||||
typedef __vector unsigned long long uint64x2_p;
|
typedef __vector unsigned long long uint64x2_p;
|
||||||
#endif
|
#endif
|
||||||
|
#endif // ALTIVEC/POWER4 datatypes
|
||||||
|
|
||||||
#endif // CRYPTOPP_ALTIVEC_AVAILABLE
|
// POWER4 and above
|
||||||
|
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
||||||
|
|
||||||
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) && !defined(CRYPTOPP_POWER7_AVAILABLE)
|
/// \brief Reverse a vector
|
||||||
|
/// \tparam T vector type
|
||||||
inline uint32x4_p VectorLoad(const byte src[16])
|
/// \param src the vector
|
||||||
|
/// \details Reverse() endian swaps the bytes in a vector
|
||||||
|
/// \sa Reverse(), VectorLoadBE(), VectorLoad(), VectorLoadKey()
|
||||||
|
/// \since Crypto++ 6.0
|
||||||
|
template <class T>
|
||||||
|
inline T Reverse(const T& src)
|
||||||
{
|
{
|
||||||
uint8x16_p data;
|
|
||||||
if (IsAlignedOn(src, 16))
|
|
||||||
{
|
|
||||||
data = vec_ld(0, src);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
|
|
||||||
const uint8x16_p perm = vec_lvsl(0, src);
|
|
||||||
const uint8x16_p low = vec_ld(0, src);
|
|
||||||
const uint8x16_p high = vec_ld(15, src);
|
|
||||||
data = vec_perm(low, high, perm);
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(CRYPTOPP_BIG_ENDIAN)
|
|
||||||
return (uint32x4_p)data;
|
|
||||||
#else
|
|
||||||
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
|
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
|
||||||
return (uint32x4_p)vec_perm(data, data, mask);
|
return vec_perm(src, src, mask);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void VectorStore(const uint32x4_p data, byte dest[16])
|
/// \brief Permutes two vectors
|
||||||
|
/// \tparam T1 vector type
|
||||||
|
/// \tparam T2 vector type
|
||||||
|
/// \param vec1 the first vector
|
||||||
|
/// \param vec2 the second vector
|
||||||
|
/// \param mask vector mask
|
||||||
|
/// \details VectorPermute returns a new vector from vec1 and vec2
|
||||||
|
/// based on mask. mask is an uint8x16_p type vector. The return
|
||||||
|
/// vector is the same type as vec1.
|
||||||
|
/// \since Crypto++ 6.0
|
||||||
|
template <class T1, class T2>
|
||||||
|
inline T1 VectorPermute(const T1& vec1, const T1& vec2, const T2& mask)
|
||||||
|
{
|
||||||
|
return (T1)vec_perm(vec1, vec2, (uint8x16_p)mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief XOR two vectors
|
||||||
|
/// \tparam T1 vector type
|
||||||
|
/// \tparam T2 vector type
|
||||||
|
/// \param vec1 the first vector
|
||||||
|
/// \param vec2 the second vector
|
||||||
|
/// \details VectorXor returns a new vector from vec1 and vec2. The return
|
||||||
|
/// vector is the same type as vec1.
|
||||||
|
/// \since Crypto++ 6.0
|
||||||
|
template <class T1, class T2>
|
||||||
|
inline T1 VectorXor(const T1& vec1, const T2& vec2)
|
||||||
|
{
|
||||||
|
return (T1)vec_xor(vec1, (T1)vec2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Add two vector
|
||||||
|
/// \tparam T1 vector type
|
||||||
|
/// \tparam T2 vector type
|
||||||
|
/// \param vec1 the first vector
|
||||||
|
/// \param vec2 the second vector
|
||||||
|
/// \details VectorAdd returns a new vector from vec1 and vec2.
|
||||||
|
/// vec2 is cast to the same type as vec1. The return vector
|
||||||
|
/// is the same type as vec1.
|
||||||
|
/// \since Crypto++ 6.0
|
||||||
|
template <class T1, class T2>
|
||||||
|
inline T1 VectorAdd(const T1& vec1, const T2& vec2)
|
||||||
|
{
|
||||||
|
return (T1)vec_add(vec1, (T1)vec2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Shift two vectors left
|
||||||
|
/// \tparam C shift byte count
|
||||||
|
/// \tparam T1 vector type
|
||||||
|
/// \tparam T2 vector type
|
||||||
|
/// \param vec1 the first vector
|
||||||
|
/// \param vec2 the second vector
|
||||||
|
/// \details VectorShiftLeft() concatenates vec1 and vec2 and returns a
|
||||||
|
/// new vector after shifting the concatenation by the specified number
|
||||||
|
/// of bytes. Both vec1 and vec2 are cast to uint8x16_p. The return
|
||||||
|
/// vector is the same type as vec1.
|
||||||
|
/// \details On big endian machines VectorShiftLeft() is <tt>vec_sld(a, b,
|
||||||
|
/// c)</tt>. On little endian machines VectorShiftLeft() is translated to
|
||||||
|
/// <tt>vec_sld(b, a, 16-c)</tt>. You should always call the function as
|
||||||
|
/// if on a big endian machine as shown below.
|
||||||
|
/// <pre>
|
||||||
|
/// uint8x16_p r0 = {0};
|
||||||
|
/// uint8x16_p r1 = VectorLoad(ptr);
|
||||||
|
/// uint8x16_p r5 = VectorShiftLeft<12>(r0, r1);
|
||||||
|
/// </pre>
|
||||||
|
/// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
|
||||||
|
/// endian sensitive?</A> on Stack Overflow
|
||||||
|
/// \since Crypto++ 6.0
|
||||||
|
template <unsigned int C, class T1, class T2>
|
||||||
|
inline T1 VectorShiftLeft(const T1& vec1, const T2& vec2)
|
||||||
{
|
{
|
||||||
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||||
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
|
return (T1)vec_sld((uint8x16_p)vec2, (uint8x16_p)vec1, 16-C);
|
||||||
const uint8x16_p t1 = (uint8x16_p)vec_perm(data, data, mask);
|
|
||||||
#else
|
#else
|
||||||
const uint8x16_p t1 = (uint8x16_p)data;
|
return (T1)vec_sld((uint8x16_p)vec1, (uint8x16_p)vec2, C);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (IsAlignedOn(dest, 16))
|
|
||||||
{
|
|
||||||
vec_st(t1, 0, dest);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
|
|
||||||
const uint8x16_p t2 = vec_perm(t1, t1, vec_lvsr(0, dest));
|
|
||||||
vec_ste((uint8x16_p) t2, 0, (unsigned char*) dest);
|
|
||||||
vec_ste((uint16x8_p) t2, 1, (unsigned short*)dest);
|
|
||||||
vec_ste((uint32x4_p) t2, 3, (unsigned int*) dest);
|
|
||||||
vec_ste((uint32x4_p) t2, 4, (unsigned int*) dest);
|
|
||||||
vec_ste((uint32x4_p) t2, 8, (unsigned int*) dest);
|
|
||||||
vec_ste((uint32x4_p) t2, 12, (unsigned int*) dest);
|
|
||||||
vec_ste((uint16x8_p) t2, 14, (unsigned short*)dest);
|
|
||||||
vec_ste((uint8x16_p) t2, 15, (unsigned char*) dest);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline uint32x4_p VectorXor(const uint32x4_p vec1, const uint32x4_p vec2)
|
#endif // POWER4 and above
|
||||||
{
|
|
||||||
return vec_xor(vec1, vec2);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline uint32x4_p VectorAdd(const uint32x4_p vec1, const uint32x4_p vec2)
|
|
||||||
{
|
|
||||||
return vec_add(vec1, vec2);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
// POWER7/POWER4 load and store
|
||||||
#if defined(CRYPTOPP_POWER7_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
#if defined(CRYPTOPP_POWER7_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
||||||
|
|
||||||
/// \brief Reverse a 16-byte array
|
/// \brief Reverse a 16-byte array
|
||||||
|
|
@ -124,19 +172,6 @@ inline void ReverseByteArrayLE(byte src[16])
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \brief Reverse a vector
|
|
||||||
/// \tparam T vector type
|
|
||||||
/// \param src the vector
|
|
||||||
/// \details Reverse() endian swaps the bytes in a vector
|
|
||||||
/// \sa Reverse(), VectorLoadBE(), VectorLoad(), VectorLoadKey()
|
|
||||||
/// \since Crypto++ 6.0
|
|
||||||
template <class T>
|
|
||||||
inline T Reverse(const T& src)
|
|
||||||
{
|
|
||||||
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
|
|
||||||
return vec_perm(src, src, mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief Loads a vector from a byte array
|
/// \brief Loads a vector from a byte array
|
||||||
/// \param src the byte array
|
/// \param src the byte array
|
||||||
/// \details Loads a vector in big endian format from a byte array.
|
/// \details Loads a vector in big endian format from a byte array.
|
||||||
|
|
@ -346,86 +381,65 @@ inline void VectorStore(const T& src, int off, byte dest[16])
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \brief Permutes two vectors
|
#else // not CRYPTOPP_POWER7_AVAILABLE
|
||||||
/// \tparam T1 vector type
|
|
||||||
/// \tparam T2 vector type
|
|
||||||
/// \param vec1 the first vector
|
|
||||||
/// \param vec2 the second vector
|
|
||||||
/// \param mask vector mask
|
|
||||||
/// \details VectorPermute returns a new vector from vec1 and vec2
|
|
||||||
/// based on mask. mask is an uint8x16_p type vector. The return
|
|
||||||
/// vector is the same type as vec1.
|
|
||||||
/// \since Crypto++ 6.0
|
|
||||||
template <class T1, class T2>
|
|
||||||
inline T1 VectorPermute(const T1& vec1, const T1& vec2, const T2& mask)
|
|
||||||
{
|
|
||||||
return (T1)vec_perm(vec1, vec2, (uint8x16_p)mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief XOR two vectors
|
// POWER7 is not available. Slow Altivec loads and stores.
|
||||||
/// \tparam T1 vector type
|
inline uint32x4_p VectorLoad(const byte src[16])
|
||||||
/// \tparam T2 vector type
|
|
||||||
/// \param vec1 the first vector
|
|
||||||
/// \param vec2 the second vector
|
|
||||||
/// \details VectorXor returns a new vector from vec1 and vec2. The return
|
|
||||||
/// vector is the same type as vec1.
|
|
||||||
/// \since Crypto++ 6.0
|
|
||||||
template <class T1, class T2>
|
|
||||||
inline T1 VectorXor(const T1& vec1, const T2& vec2)
|
|
||||||
{
|
{
|
||||||
return (T1)vec_xor(vec1, (T1)vec2);
|
uint8x16_p data;
|
||||||
}
|
if (IsAlignedOn(src, 16))
|
||||||
|
{
|
||||||
|
data = vec_ld(0, src);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
|
||||||
|
const uint8x16_p perm = vec_lvsl(0, src);
|
||||||
|
const uint8x16_p low = vec_ld(0, src);
|
||||||
|
const uint8x16_p high = vec_ld(15, src);
|
||||||
|
data = vec_perm(low, high, perm);
|
||||||
|
}
|
||||||
|
|
||||||
/// \brief Add two vector
|
#if defined(CRYPTOPP_BIG_ENDIAN)
|
||||||
/// \tparam T1 vector type
|
return (uint32x4_p)data;
|
||||||
/// \tparam T2 vector type
|
|
||||||
/// \param vec1 the first vector
|
|
||||||
/// \param vec2 the second vector
|
|
||||||
/// \details VectorAdd returns a new vector from vec1 and vec2.
|
|
||||||
/// vec2 is cast to the same type as vec1. The return vector
|
|
||||||
/// is the same type as vec1.
|
|
||||||
/// \since Crypto++ 6.0
|
|
||||||
template <class T1, class T2>
|
|
||||||
inline T1 VectorAdd(const T1& vec1, const T2& vec2)
|
|
||||||
{
|
|
||||||
return (T1)vec_add(vec1, (T1)vec2);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief Shift two vectors left
|
|
||||||
/// \tparam C shift byte count
|
|
||||||
/// \tparam T1 vector type
|
|
||||||
/// \tparam T2 vector type
|
|
||||||
/// \param vec1 the first vector
|
|
||||||
/// \param vec2 the second vector
|
|
||||||
/// \details VectorShiftLeft() concatenates vec1 and vec2 and returns a
|
|
||||||
/// new vector after shifting the concatenation by the specified number
|
|
||||||
/// of bytes. Both vec1 and vec2 are cast to uint8x16_p. The return
|
|
||||||
/// vector is the same type as vec1.
|
|
||||||
/// \details On big endian machines VectorShiftLeft() is <tt>vec_sld(a, b,
|
|
||||||
/// c)</tt>. On little endian machines VectorShiftLeft() is translated to
|
|
||||||
/// <tt>vec_sld(b, a, 16-c)</tt>. You should always call the function as
|
|
||||||
/// if on a big endian machine as shown below.
|
|
||||||
/// <pre>
|
|
||||||
/// uint8x16_p r0 = {0};
|
|
||||||
/// uint8x16_p r1 = VectorLoad(ptr);
|
|
||||||
/// uint8x16_p r5 = VectorShiftLeft<12>(r0, r1);
|
|
||||||
/// </pre>
|
|
||||||
/// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
|
|
||||||
/// endian sensitive?</A> on Stack Overflow
|
|
||||||
/// \since Crypto++ 6.0
|
|
||||||
template <unsigned int C, class T1, class T2>
|
|
||||||
inline T1 VectorShiftLeft(const T1& vec1, const T2& vec2)
|
|
||||||
{
|
|
||||||
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
|
||||||
return (T1)vec_sld((uint8x16_p)vec2, (uint8x16_p)vec1, 16-C);
|
|
||||||
#else
|
#else
|
||||||
return (T1)vec_sld((uint8x16_p)vec1, (uint8x16_p)vec2, C);
|
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
|
||||||
|
return (uint32x4_p)vec_perm(data, data, mask);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // CRYPTOPP_POWER7_AVAILABLE
|
inline void VectorStore(const uint32x4_p data, byte dest[16])
|
||||||
|
{
|
||||||
|
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||||
|
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
|
||||||
|
const uint8x16_p t1 = (uint8x16_p)vec_perm(data, data, mask);
|
||||||
|
#else
|
||||||
|
const uint8x16_p t1 = (uint8x16_p)data;
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(CRYPTOPP_POWER8_AES_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
if (IsAlignedOn(dest, 16))
|
||||||
|
{
|
||||||
|
vec_st(t1, 0, dest);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
|
||||||
|
const uint8x16_p t2 = vec_perm(t1, t1, vec_lvsr(0, dest));
|
||||||
|
vec_ste((uint8x16_p) t2, 0, (unsigned char*) dest);
|
||||||
|
vec_ste((uint16x8_p) t2, 1, (unsigned short*)dest);
|
||||||
|
vec_ste((uint32x4_p) t2, 3, (unsigned int*) dest);
|
||||||
|
vec_ste((uint32x4_p) t2, 4, (unsigned int*) dest);
|
||||||
|
vec_ste((uint32x4_p) t2, 8, (unsigned int*) dest);
|
||||||
|
vec_ste((uint32x4_p) t2, 12, (unsigned int*) dest);
|
||||||
|
vec_ste((uint16x8_p) t2, 14, (unsigned short*)dest);
|
||||||
|
vec_ste((uint8x16_p) t2, 15, (unsigned char*) dest);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // POWER4/POWER7 load and store
|
||||||
|
|
||||||
|
// POWER8 crypto
|
||||||
|
#if defined(CRYPTOPP_POWER8_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
||||||
|
|
||||||
/// \brief One round of AES encryption
|
/// \brief One round of AES encryption
|
||||||
/// \tparam T1 vector type
|
/// \tparam T1 vector type
|
||||||
|
|
@ -507,9 +521,9 @@ inline T1 VectorDecryptLast(const T1& state, const T2& key)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // CRYPTOPP_POWER8_AES_AVAILABLE
|
#endif // POWER8 crypto
|
||||||
|
|
||||||
#if defined(CRYPTOPP_POWER8_SHA_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
#if defined(CRYPTOPP_POWER8_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
||||||
|
|
||||||
/// \brief SHA256 Sigma functions
|
/// \brief SHA256 Sigma functions
|
||||||
/// \tparam func function
|
/// \tparam func function
|
||||||
|
|
@ -551,7 +565,7 @@ inline T VectorSHA512(const T& vec)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // CRYPTOPP_POWER8_SHA_AVAILABLE
|
#endif // POWER8 crypto
|
||||||
|
|
||||||
NAMESPACE_END
|
NAMESPACE_END
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,13 +25,6 @@
|
||||||
#include "misc.h"
|
#include "misc.h"
|
||||||
#include "adv-simd.h"
|
#include "adv-simd.h"
|
||||||
|
|
||||||
// We set CRYPTOPP_POWER8_CRYPTO_AVAILABLE based on compiler version.
|
|
||||||
// If the crypto is not available, then we have to disable it here.
|
|
||||||
#if !(defined(__CRYPTO) || defined(_ARCH_PWR8) || defined(_ARCH_PWR9))
|
|
||||||
# undef CRYPTOPP_POWER8_CRYPTO_AVAILABLE
|
|
||||||
# undef CRYPTOPP_POWER8_AES_AVAILABLE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if (CRYPTOPP_AESNI_AVAILABLE)
|
#if (CRYPTOPP_AESNI_AVAILABLE)
|
||||||
# include <smmintrin.h>
|
# include <smmintrin.h>
|
||||||
# include <wmmintrin.h>
|
# include <wmmintrin.h>
|
||||||
|
|
@ -68,6 +61,8 @@ extern const char RIJNDAEL_SIMD_FNAME[] = __FILE__;
|
||||||
|
|
||||||
NAMESPACE_BEGIN(CryptoPP)
|
NAMESPACE_BEGIN(CryptoPP)
|
||||||
|
|
||||||
|
// ************************* Feature Probes ************************* //
|
||||||
|
|
||||||
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
|
||||||
extern "C" {
|
extern "C" {
|
||||||
typedef void (*SigHandler)(int);
|
typedef void (*SigHandler)(int);
|
||||||
|
|
@ -142,6 +137,155 @@ bool CPU_ProbeAES()
|
||||||
}
|
}
|
||||||
#endif // ARM32 or ARM64
|
#endif // ARM32 or ARM64
|
||||||
|
|
||||||
|
#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64)
|
||||||
|
bool CPU_ProbePower7()
|
||||||
|
{
|
||||||
|
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
||||||
|
return false;
|
||||||
|
#elif (CRYPTOPP_POWER7_AVAILABLE) || (CRYPTOPP_POWER8_AVAILABLE)
|
||||||
|
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
|
||||||
|
|
||||||
|
// longjmp and clobber warnings. Volatile is required.
|
||||||
|
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||||
|
volatile int result = false;
|
||||||
|
|
||||||
|
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
|
||||||
|
if (oldHandler == SIG_ERR)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
volatile sigset_t oldMask;
|
||||||
|
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (setjmp(s_jmpSIGILL))
|
||||||
|
result = false;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// POWER7 added unaligned loads and store operations
|
||||||
|
byte b1[19] = {255, 255, 255, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, b2[17];
|
||||||
|
|
||||||
|
// Specifically call the VSX loads and stores
|
||||||
|
#if defined(__xlc__) || defined(__xlC__)
|
||||||
|
vec_xst(vec_xl(0, b1+3), 0, b2+1);
|
||||||
|
#else
|
||||||
|
vec_vsx_st(vec_vsx_ld(0, b1+3), 0, b2+1);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
result = (0 == std::memcmp(b1+3, b2+1, 16));
|
||||||
|
}
|
||||||
|
|
||||||
|
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
||||||
|
signal(SIGILL, oldHandler);
|
||||||
|
return result;
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif // CRYPTOPP_POWER7_AVAILABLE
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CPU_ProbePower8()
|
||||||
|
{
|
||||||
|
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
||||||
|
return false;
|
||||||
|
#elif (CRYPTOPP_POWER8_AVAILABLE)
|
||||||
|
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
|
||||||
|
|
||||||
|
// longjmp and clobber warnings. Volatile is required.
|
||||||
|
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||||
|
volatile int result = true;
|
||||||
|
|
||||||
|
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
|
||||||
|
if (oldHandler == SIG_ERR)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
volatile sigset_t oldMask;
|
||||||
|
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (setjmp(s_jmpSIGILL))
|
||||||
|
result = false;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// POWER8 added 64-bit SIMD operations
|
||||||
|
const word64 x = W64LIT(0xffffffffffffffff);
|
||||||
|
word64 w1[2] = {x, x}, w2[2] = {4, 6}, w3[2];
|
||||||
|
|
||||||
|
// Specifically call the VSX loads and stores
|
||||||
|
#if defined(__xlc__) || defined(__xlC__)
|
||||||
|
const uint64x2_p v1 = (uint64x2_p)vec_xl(0, (byte*)w1);
|
||||||
|
const uint64x2_p v2 = (uint64x2_p)vec_xl(0, (byte*)w2);
|
||||||
|
const uint64x2_p v3 = vec_add(v1, v2); // 64-bit add
|
||||||
|
vec_xst((uint8x16_p)v3, 0, (byte*)w3);
|
||||||
|
#else
|
||||||
|
const uint64x2_p v1 = (uint64x2_p)vec_vsx_ld(0, (byte*)w1);
|
||||||
|
const uint64x2_p v2 = (uint64x2_p)vec_vsx_ld(0, (byte*)w2);
|
||||||
|
const uint64x2_p v3 = vec_add(v1, v2); // 64-bit add
|
||||||
|
vec_vsx_st((uint8x16_p)v3, 0, (byte*)w3);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Relies on integer wrap
|
||||||
|
result = (w3[0] == 3 && w3[1] == 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
||||||
|
signal(SIGILL, oldHandler);
|
||||||
|
return result;
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif // CRYPTOPP_POWER8_AVAILABLE
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CPU_ProbeAES()
|
||||||
|
{
|
||||||
|
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
||||||
|
return false;
|
||||||
|
#elif (CRYPTOPP_POWER8_AES_AVAILABLE)
|
||||||
|
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
|
||||||
|
|
||||||
|
// longjmp and clobber warnings. Volatile is required.
|
||||||
|
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||||
|
volatile int result = true;
|
||||||
|
|
||||||
|
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
|
||||||
|
if (oldHandler == SIG_ERR)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
volatile sigset_t oldMask;
|
||||||
|
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (setjmp(s_jmpSIGILL))
|
||||||
|
result = false;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
byte key[16] = {0xA0, 0xFA, 0xFE, 0x17, 0x88, 0x54, 0x2c, 0xb1,
|
||||||
|
0x23, 0xa3, 0x39, 0x39, 0x2a, 0x6c, 0x76, 0x05};
|
||||||
|
byte state[16] = {0x19, 0x3d, 0xe3, 0xb3, 0xa0, 0xf4, 0xe2, 0x2b,
|
||||||
|
0x9a, 0xc6, 0x8d, 0x2a, 0xe9, 0xf8, 0x48, 0x08};
|
||||||
|
byte r[16] = {255}, z[16] = {};
|
||||||
|
|
||||||
|
uint8x16_p k = (uint8x16_p)VectorLoad(0, key);
|
||||||
|
uint8x16_p s = (uint8x16_p)VectorLoad(0, state);
|
||||||
|
s = VectorEncrypt(s, k);
|
||||||
|
s = VectorEncryptLast(s, k);
|
||||||
|
s = VectorDecrypt(s, k);
|
||||||
|
s = VectorDecryptLast(s, k);
|
||||||
|
VectorStore(s, r);
|
||||||
|
|
||||||
|
result = (0 != std::memcmp(r, z, 16));
|
||||||
|
}
|
||||||
|
|
||||||
|
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
||||||
|
signal(SIGILL, oldHandler);
|
||||||
|
return result;
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif // CRYPTOPP_POWER8_AES_AVAILABLE
|
||||||
|
}
|
||||||
|
#endif // PPC32 or PPC64
|
||||||
|
|
||||||
// ***************************** ARMv8 ***************************** //
|
// ***************************** ARMv8 ***************************** //
|
||||||
|
|
||||||
#if (CRYPTOPP_ARM_AES_AVAILABLE)
|
#if (CRYPTOPP_ARM_AES_AVAILABLE)
|
||||||
|
|
|
||||||
90
sha-simd.cpp
90
sha-simd.cpp
|
|
@ -185,6 +185,96 @@ bool CPU_ProbeSHA2()
|
||||||
}
|
}
|
||||||
#endif // ARM32 or ARM64
|
#endif // ARM32 or ARM64
|
||||||
|
|
||||||
|
#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64)
|
||||||
|
bool CPU_ProbeSHA256()
|
||||||
|
{
|
||||||
|
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
||||||
|
return false;
|
||||||
|
#elif (CRYPTOPP_POWER8_AVAILABLE)
|
||||||
|
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
|
||||||
|
|
||||||
|
// longjmp and clobber warnings. Volatile is required.
|
||||||
|
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||||
|
volatile int result = false;
|
||||||
|
|
||||||
|
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
|
||||||
|
if (oldHandler == SIG_ERR)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
volatile sigset_t oldMask;
|
||||||
|
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (setjmp(s_jmpSIGILL))
|
||||||
|
result = false;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
byte r[16], z[16] = {0};
|
||||||
|
uint8x16_p x = ((uint8x16_p){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0});
|
||||||
|
|
||||||
|
x = VectorSHA256<0,0>(x);
|
||||||
|
x = VectorSHA256<0,1>(x);
|
||||||
|
x = VectorSHA256<1,0>(x);
|
||||||
|
x = VectorSHA256<1,1>(x);
|
||||||
|
VectorStore(x, r);
|
||||||
|
|
||||||
|
result = (0 == std::memcmp(r, z, 16));
|
||||||
|
}
|
||||||
|
|
||||||
|
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
||||||
|
signal(SIGILL, oldHandler);
|
||||||
|
return result;
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif // CRYPTOPP_ALTIVEC_AVAILABLE
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CPU_ProbeSHA512()
|
||||||
|
{
|
||||||
|
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
|
||||||
|
return false;
|
||||||
|
#elif (CRYPTOPP_POWER8_AVAILABLE)
|
||||||
|
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
|
||||||
|
|
||||||
|
// longjmp and clobber warnings. Volatile is required.
|
||||||
|
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
||||||
|
volatile int result = false;
|
||||||
|
|
||||||
|
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
|
||||||
|
if (oldHandler == SIG_ERR)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
volatile sigset_t oldMask;
|
||||||
|
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (setjmp(s_jmpSIGILL))
|
||||||
|
result = false;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
byte r[16], z[16] = {0};
|
||||||
|
uint8x16_p x = ((uint8x16_p){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0});
|
||||||
|
|
||||||
|
x = VectorSHA512<0,0>(x);
|
||||||
|
x = VectorSHA512<0,1>(x);
|
||||||
|
x = VectorSHA512<1,0>(x);
|
||||||
|
x = VectorSHA512<1,1>(x);
|
||||||
|
VectorStore(x, r);
|
||||||
|
|
||||||
|
result = (0 == std::memcmp(r, z, 16));
|
||||||
|
}
|
||||||
|
|
||||||
|
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
|
||||||
|
signal(SIGILL, oldHandler);
|
||||||
|
return result;
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif // CRYPTOPP_POWER8_AVAILABLE
|
||||||
|
}
|
||||||
|
#endif // PPC32 or PPC64
|
||||||
|
|
||||||
// ***************** Intel x86 SHA ********************
|
// ***************** Intel x86 SHA ********************
|
||||||
|
|
||||||
// provided by sha.cpp, 16-byte aigned
|
// provided by sha.cpp, 16-byte aigned
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue