Migrate from Power7 to Power8 unaligned loads
parent
d451751eb2
commit
3a8f87490a
50
GNUmakefile
50
GNUmakefile
|
|
@ -443,26 +443,26 @@ ifeq ($(DETECT_FEATURES),1)
|
||||||
ifneq ($(IS_ARM32),0)
|
ifneq ($(IS_ARM32),0)
|
||||||
|
|
||||||
TPROG = TestPrograms/test_arm_neon.cxx
|
TPROG = TestPrograms/test_arm_neon.cxx
|
||||||
TOPT = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
TOPT = -march=armv7-a -mfpu=neon
|
||||||
HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
||||||
ifeq ($(strip $(HAVE_OPT)),0)
|
ifeq ($(strip $(HAVE_OPT)),0)
|
||||||
NEON_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
NEON_FLAG = -march=armv7-a -mfpu=neon
|
||||||
ARIA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
ARIA_FLAG = -march=armv7-a -mfpu=neon
|
||||||
AES_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
AES_FLAG = -march=armv7-a -mfpu=neon
|
||||||
CRC_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
CRC_FLAG = -march=armv7-a -mfpu=neon
|
||||||
GCM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
GCM_FLAG = -march=armv7-a -mfpu=neon
|
||||||
BLAKE2B_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
BLAKE2B_FLAG = -march=armv7-a -mfpu=neon
|
||||||
BLAKE2S_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
BLAKE2S_FLAG = -march=armv7-a -mfpu=neon
|
||||||
CHACHA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
CHACHA_FLAG = -march=armv7-a -mfpu=neon
|
||||||
CHAM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
CHAM_FLAG = -march=armv7-a -mfpu=neon
|
||||||
LEA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
LEA_FLAG = -march=armv7-a -mfpu=neon
|
||||||
SHA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
SHA_FLAG = -march=armv7-a -mfpu=neon
|
||||||
SIMECK_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
SIMECK_FLAG = -march=armv7-a -mfpu=neon
|
||||||
SIMON64_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
SIMON64_FLAG = -march=armv7-a -mfpu=neon
|
||||||
SIMON128_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
SIMON128_FLAG = -march=armv7-a -mfpu=neon
|
||||||
SPECK64_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
SPECK64_FLAG = -march=armv7-a -mfpu=neon
|
||||||
SPECK128_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
SPECK128_FLAG = -march=armv7-a -mfpu=neon
|
||||||
SM4_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
|
SM4_FLAG = -march=armv7-a -mfpu=neon
|
||||||
else
|
else
|
||||||
CXXFLAGS += -DCRYPTOPP_DISABLE_ASM
|
CXXFLAGS += -DCRYPTOPP_DISABLE_ASM
|
||||||
endif
|
endif
|
||||||
|
|
@ -621,12 +621,16 @@ ifeq ($(DETECT_FEATURES),1)
|
||||||
HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
||||||
ifeq ($(strip $(HAVE_OPT)),0)
|
ifeq ($(strip $(HAVE_OPT)),0)
|
||||||
BLAKE2B_FLAG = $(POWER8_FLAG)
|
BLAKE2B_FLAG = $(POWER8_FLAG)
|
||||||
|
BLAKE2S_FLAG = $(POWER8_FLAG)
|
||||||
|
CHACHA_FLAG = $(POWER8_FLAG)
|
||||||
CRC_FLAG = $(POWER8_FLAG)
|
CRC_FLAG = $(POWER8_FLAG)
|
||||||
GCM_FLAG = $(POWER8_FLAG)
|
GCM_FLAG = $(POWER8_FLAG)
|
||||||
GF2N_FLAG = $(POWER8_FLAG)
|
GF2N_FLAG = $(POWER8_FLAG)
|
||||||
AES_FLAG = $(POWER8_FLAG)
|
AES_FLAG = $(POWER8_FLAG)
|
||||||
SHA_FLAG = $(POWER8_FLAG)
|
SHA_FLAG = $(POWER8_FLAG)
|
||||||
SHACAL2_FLAG = $(POWER8_FLAG)
|
SHACAL2_FLAG = $(POWER8_FLAG)
|
||||||
|
SIMON64_FLAG = $(POWER8_FLAG)
|
||||||
|
SPECK64_FLAG = $(POWER8_FLAG)
|
||||||
SIMON128_FLAG = $(POWER8_FLAG)
|
SIMON128_FLAG = $(POWER8_FLAG)
|
||||||
SPECK128_FLAG = $(POWER8_FLAG)
|
SPECK128_FLAG = $(POWER8_FLAG)
|
||||||
else
|
else
|
||||||
|
|
@ -641,13 +645,9 @@ ifeq ($(DETECT_FEATURES),1)
|
||||||
HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
HAVE_OPT = $(shell $(CXX) $(TCXXFLAGS) $(ZOPT) $(TOPT) $(TPROG) -o $(TOUT) 2>&1 | tr ' ' '\n' | wc -l)
|
||||||
ifeq ($(strip $(HAVE_OPT)),0)
|
ifeq ($(strip $(HAVE_OPT)),0)
|
||||||
ARIA_FLAG = $(POWER7_FLAG)
|
ARIA_FLAG = $(POWER7_FLAG)
|
||||||
BLAKE2S_FLAG = $(POWER7_FLAG)
|
|
||||||
CHACHA_FLAG = $(POWER7_FLAG)
|
|
||||||
CHAM_FLAG = $(POWER7_FLAG)
|
CHAM_FLAG = $(POWER7_FLAG)
|
||||||
LEA_FLAG = $(POWER7_FLAG)
|
LEA_FLAG = $(POWER7_FLAG)
|
||||||
SIMECK_FLAG = $(POWER7_FLAG)
|
SIMECK_FLAG = $(POWER7_FLAG)
|
||||||
SIMON64_FLAG = $(POWER7_FLAG)
|
|
||||||
SPECK64_FLAG = $(POWER7_FLAG)
|
|
||||||
else
|
else
|
||||||
POWER7_FLAG =
|
POWER7_FLAG =
|
||||||
endif
|
endif
|
||||||
|
|
@ -698,8 +698,8 @@ ifeq ($(DETECT_FEATURES),1)
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# Drop to Power4 if Power7 not available
|
# Drop to Power4 if Power8 not available
|
||||||
ifeq ($(POWER7_FLAG),)
|
ifeq ($(POWER8_FLAG),)
|
||||||
ifneq ($(ALTIVEC_FLAG),)
|
ifneq ($(ALTIVEC_FLAG),)
|
||||||
BLAKE2S_FLAG = $(ALTIVEC_FLAG)
|
BLAKE2S_FLAG = $(ALTIVEC_FLAG)
|
||||||
CHACHA_FLAG = $(ALTIVEC_FLAG)
|
CHACHA_FLAG = $(ALTIVEC_FLAG)
|
||||||
|
|
@ -1428,7 +1428,7 @@ endif # Dependencies
|
||||||
|
|
||||||
# Cryptogams ARM asm implementation.
|
# Cryptogams ARM asm implementation.
|
||||||
aes_armv4.o : aes_armv4.S
|
aes_armv4.o : aes_armv4.S
|
||||||
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_AES_FLAG) -mfloat-abi=$(FP_ABI) -c) $<
|
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_AES_FLAG) -c) $<
|
||||||
|
|
||||||
# SSSE3 or NEON available
|
# SSSE3 or NEON available
|
||||||
aria_simd.o : aria_simd.cpp
|
aria_simd.o : aria_simd.cpp
|
||||||
|
|
|
||||||
22
blake2.cpp
22
blake2.cpp
|
|
@ -38,8 +38,8 @@
|
||||||
// https://github.com/weidai11/cryptopp/issues/743
|
// https://github.com/weidai11/cryptopp/issues/743
|
||||||
#if defined(__xlC__) && (__xlC__ < 0x0d01)
|
#if defined(__xlC__) && (__xlC__ < 0x0d01)
|
||||||
# define CRYPTOPP_DISABLE_ALTIVEC 1
|
# define CRYPTOPP_DISABLE_ALTIVEC 1
|
||||||
# define CRYPTOPP_POWER7_ALTIVEC 1
|
# define CRYPTOPP_POWER8_ALTIVEC 1
|
||||||
# undef CRYPTOPP_POWER7_AVAILABLE
|
# undef CRYPTOPP_POWER8_AVAILABLE
|
||||||
# undef CRYPTOPP_ALTIVEC_AVAILABLE
|
# undef CRYPTOPP_ALTIVEC_AVAILABLE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
@ -171,8 +171,8 @@ extern void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state);
|
||||||
extern void BLAKE2_Compress64_NEON(const byte* input, BLAKE2b_State& state);
|
extern void BLAKE2_Compress64_NEON(const byte* input, BLAKE2b_State& state);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if CRYPTOPP_POWER7_AVAILABLE
|
#if CRYPTOPP_POWER8_AVAILABLE
|
||||||
extern void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state);
|
extern void BLAKE2_Compress32_POWER8(const byte* input, BLAKE2s_State& state);
|
||||||
#elif CRYPTOPP_ALTIVEC_AVAILABLE
|
#elif CRYPTOPP_ALTIVEC_AVAILABLE
|
||||||
extern void BLAKE2_Compress32_ALTIVEC(const byte* input, BLAKE2s_State& state);
|
extern void BLAKE2_Compress32_ALTIVEC(const byte* input, BLAKE2s_State& state);
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -233,8 +233,8 @@ unsigned int BLAKE2s::OptimalDataAlignment() const
|
||||||
return 4;
|
return 4;
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
#if (CRYPTOPP_POWER7_AVAILABLE)
|
#if (CRYPTOPP_POWER8_AVAILABLE)
|
||||||
if (HasPower7())
|
if (HasPower8())
|
||||||
return 16;
|
return 16;
|
||||||
else
|
else
|
||||||
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
|
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||||
|
|
@ -257,8 +257,8 @@ std::string BLAKE2s::AlgorithmProvider() const
|
||||||
return "NEON";
|
return "NEON";
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
#if (CRYPTOPP_POWER7_AVAILABLE)
|
#if (CRYPTOPP_POWER8_AVAILABLE)
|
||||||
if (HasPower7())
|
if (HasPower8())
|
||||||
return "Power7";
|
return "Power7";
|
||||||
else
|
else
|
||||||
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
|
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||||
|
|
@ -690,10 +690,10 @@ void BLAKE2s::Compress(const byte *input)
|
||||||
return BLAKE2_Compress32_NEON(input, m_state);
|
return BLAKE2_Compress32_NEON(input, m_state);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if CRYPTOPP_POWER7_AVAILABLE
|
#if CRYPTOPP_POWER8_AVAILABLE
|
||||||
if(HasPower7())
|
if(HasPower8())
|
||||||
{
|
{
|
||||||
return BLAKE2_Compress32_POWER7(input, m_state);
|
return BLAKE2_Compress32_POWER8(input, m_state);
|
||||||
}
|
}
|
||||||
#elif CRYPTOPP_ALTIVEC_AVAILABLE
|
#elif CRYPTOPP_ALTIVEC_AVAILABLE
|
||||||
if(HasAltivec())
|
if(HasAltivec())
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@
|
||||||
// https://github.com/weidai11/cryptopp/issues/743
|
// https://github.com/weidai11/cryptopp/issues/743
|
||||||
#if defined(__xlC__) && (__xlC__ < 0x0d01)
|
#if defined(__xlC__) && (__xlC__ < 0x0d01)
|
||||||
# define CRYPTOPP_DISABLE_ALTIVEC 1
|
# define CRYPTOPP_DISABLE_ALTIVEC 1
|
||||||
# undef CRYPTOPP_POWER7_AVAILABLE
|
# undef CRYPTOPP_POWER8_AVAILABLE
|
||||||
# undef CRYPTOPP_ALTIVEC_AVAILABLE
|
# undef CRYPTOPP_ALTIVEC_AVAILABLE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -38,8 +38,8 @@
|
||||||
// https://github.com/weidai11/cryptopp/issues/743
|
// https://github.com/weidai11/cryptopp/issues/743
|
||||||
#if defined(__xlC__) && (__xlC__ < 0x0d01)
|
#if defined(__xlC__) && (__xlC__ < 0x0d01)
|
||||||
# define CRYPTOPP_DISABLE_ALTIVEC 1
|
# define CRYPTOPP_DISABLE_ALTIVEC 1
|
||||||
# define CRYPTOPP_POWER7_ALTIVEC 1
|
# define CRYPTOPP_POWER8_ALTIVEC 1
|
||||||
# undef CRYPTOPP_POWER7_AVAILABLE
|
# undef CRYPTOPP_POWER8_AVAILABLE
|
||||||
# undef CRYPTOPP_ALTIVEC_AVAILABLE
|
# undef CRYPTOPP_ALTIVEC_AVAILABLE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
@ -692,7 +692,7 @@ void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state)
|
||||||
}
|
}
|
||||||
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
|
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
|
||||||
|
|
||||||
#if (CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE)
|
#if (CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||||
|
|
||||||
inline uint32x4_p VecLoad32(const void* p)
|
inline uint32x4_p VecLoad32(const void* p)
|
||||||
{
|
{
|
||||||
|
|
@ -1015,11 +1015,11 @@ void BLAKE2_Compress32_CORE(const byte* input, BLAKE2s_State& state)
|
||||||
VecStore32LE(state.h()+0, VecXor(ff0, VecXor(row1, row3)));
|
VecStore32LE(state.h()+0, VecXor(ff0, VecXor(row1, row3)));
|
||||||
VecStore32LE(state.h()+4, VecXor(ff1, VecXor(row2, row4)));
|
VecStore32LE(state.h()+4, VecXor(ff1, VecXor(row2, row4)));
|
||||||
}
|
}
|
||||||
#endif // CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE
|
#endif // CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE
|
||||||
|
|
||||||
#if (CRYPTOPP_POWER7_AVAILABLE)
|
#if (CRYPTOPP_POWER8_AVAILABLE)
|
||||||
|
|
||||||
void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state)
|
void BLAKE2_Compress32_POWER8(const byte* input, BLAKE2s_State& state)
|
||||||
{
|
{
|
||||||
BLAKE2_Compress32_CORE(input, state);
|
BLAKE2_Compress32_CORE(input, state);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
14
chacha.cpp
14
chacha.cpp
|
|
@ -28,8 +28,8 @@ extern void ChaCha_OperateKeystream_AVX2(const word32 *state, const byte* input,
|
||||||
extern void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *output, unsigned int rounds);
|
extern void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *output, unsigned int rounds);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (CRYPTOPP_POWER7_AVAILABLE)
|
#if (CRYPTOPP_POWER8_AVAILABLE)
|
||||||
extern void ChaCha_OperateKeystream_POWER7(const word32 *state, const byte* input, byte *output, unsigned int rounds);
|
extern void ChaCha_OperateKeystream_POWER8(const word32 *state, const byte* input, byte *output, unsigned int rounds);
|
||||||
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
|
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||||
extern void ChaCha_OperateKeystream_ALTIVEC(const word32 *state, const byte* input, byte *output, unsigned int rounds);
|
extern void ChaCha_OperateKeystream_ALTIVEC(const word32 *state, const byte* input, byte *output, unsigned int rounds);
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -153,13 +153,13 @@ void ChaCha_OperateKeystream(KeystreamOperation operation,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (CRYPTOPP_POWER7_AVAILABLE)
|
#if (CRYPTOPP_POWER8_AVAILABLE)
|
||||||
if (HasPower7())
|
if (HasPower8())
|
||||||
{
|
{
|
||||||
while (iterationCount >= 4 && MultiBlockSafe(state[12], 4))
|
while (iterationCount >= 4 && MultiBlockSafe(state[12], 4))
|
||||||
{
|
{
|
||||||
const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
|
const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
|
||||||
ChaCha_OperateKeystream_POWER7(state, xorInput ? input : NULLPTR, output, rounds);
|
ChaCha_OperateKeystream_POWER8(state, xorInput ? input : NULLPTR, output, rounds);
|
||||||
|
|
||||||
// MultiBlockSafe avoids overflow on the counter words
|
// MultiBlockSafe avoids overflow on the counter words
|
||||||
state[12] += 4;
|
state[12] += 4;
|
||||||
|
|
@ -267,8 +267,8 @@ std::string ChaCha_AlgorithmProvider()
|
||||||
return "NEON";
|
return "NEON";
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
#if (CRYPTOPP_POWER7_AVAILABLE)
|
#if (CRYPTOPP_POWER8_AVAILABLE)
|
||||||
if (HasPower7())
|
if (HasPower8())
|
||||||
return "Power7";
|
return "Power7";
|
||||||
else
|
else
|
||||||
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
|
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||||
|
|
|
||||||
|
|
@ -209,7 +209,7 @@ inline __m128i RotateLeft<16>(const __m128i val)
|
||||||
|
|
||||||
#if (CRYPTOPP_ALTIVEC_AVAILABLE)
|
#if (CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||||
|
|
||||||
// ChaCha_OperateKeystream_POWER7 is optimized for POWER7. However, Altivec
|
// ChaCha_OperateKeystream_POWER8 is optimized for POWER7. However, Altivec
|
||||||
// is supported by using vec_ld and vec_st, and using a composite VecAdd
|
// is supported by using vec_ld and vec_st, and using a composite VecAdd
|
||||||
// that supports 64-bit element adds. vec_ld and vec_st add significant
|
// that supports 64-bit element adds. vec_ld and vec_st add significant
|
||||||
// overhead when memory is not aligned. Despite the drawbacks Altivec
|
// overhead when memory is not aligned. Despite the drawbacks Altivec
|
||||||
|
|
@ -827,7 +827,7 @@ void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *
|
||||||
|
|
||||||
#endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
#endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
||||||
|
|
||||||
#if (CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE)
|
#if (CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||||
|
|
||||||
// ChaCha_OperateKeystream_CORE will use either POWER7 or ALTIVEC,
|
// ChaCha_OperateKeystream_CORE will use either POWER7 or ALTIVEC,
|
||||||
// depending on the flags used to compile this source file. The
|
// depending on the flags used to compile this source file. The
|
||||||
|
|
@ -1096,11 +1096,11 @@ inline void ChaCha_OperateKeystream_CORE(const word32 *state, const byte* input,
|
||||||
VecStore32LE(output + 15*16, r3_3);
|
VecStore32LE(output + 15*16, r3_3);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE
|
#endif // CRYPTOPP_POWER8_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE
|
||||||
|
|
||||||
#if (CRYPTOPP_POWER7_AVAILABLE)
|
#if (CRYPTOPP_POWER8_AVAILABLE)
|
||||||
|
|
||||||
void ChaCha_OperateKeystream_POWER7(const word32 *state, const byte* input, byte *output, unsigned int rounds)
|
void ChaCha_OperateKeystream_POWER8(const word32 *state, const byte* input, byte *output, unsigned int rounds)
|
||||||
{
|
{
|
||||||
ChaCha_OperateKeystream_CORE(state, input, output, rounds);
|
ChaCha_OperateKeystream_CORE(state, input, output, rounds);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
22
gcm.cpp
22
gcm.cpp
|
|
@ -75,8 +75,8 @@ extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c);
|
||||||
extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
|
extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if CRYPTOPP_POWER7_AVAILABLE
|
#if CRYPTOPP_POWER8_AVAILABLE
|
||||||
extern void GCM_Xor16_POWER7(byte *a, const byte *b, const byte *c);
|
extern void GCM_Xor16_POWER8(byte *a, const byte *b, const byte *c);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if CRYPTOPP_CLMUL_AVAILABLE
|
#if CRYPTOPP_CLMUL_AVAILABLE
|
||||||
|
|
@ -213,11 +213,11 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
|
||||||
for (k=1; k<j; k++)
|
for (k=1; k<j; k++)
|
||||||
GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
|
GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
|
||||||
else
|
else
|
||||||
#elif CRYPTOPP_POWER7_AVAILABLE
|
#elif CRYPTOPP_POWER8_AVAILABLE
|
||||||
if (HasPower7())
|
if (HasPower8())
|
||||||
for (j=2; j<=0x80; j*=2)
|
for (j=2; j<=0x80; j*=2)
|
||||||
for (k=1; k<j; k++)
|
for (k=1; k<j; k++)
|
||||||
GCM_Xor16_POWER7(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
|
GCM_Xor16_POWER8(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
for (j=2; j<=0x80; j*=2)
|
for (j=2; j<=0x80; j*=2)
|
||||||
|
|
@ -277,13 +277,13 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
|
||||||
GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
|
GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#elif CRYPTOPP_POWER7_AVAILABLE
|
#elif CRYPTOPP_POWER8_AVAILABLE
|
||||||
if (HasPower7())
|
if (HasPower8())
|
||||||
for (j=2; j<=8; j*=2)
|
for (j=2; j<=8; j*=2)
|
||||||
for (k=1; k<j; k++)
|
for (k=1; k<j; k++)
|
||||||
{
|
{
|
||||||
GCM_Xor16_POWER7(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
|
GCM_Xor16_POWER8(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
|
||||||
GCM_Xor16_POWER7(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
|
GCM_Xor16_POWER8(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -369,8 +369,8 @@ unsigned int GCM_Base::OptimalDataAlignment() const
|
||||||
HasSSE2() ? 16 :
|
HasSSE2() ? 16 :
|
||||||
#elif CRYPTOPP_ARM_NEON_AVAILABLE
|
#elif CRYPTOPP_ARM_NEON_AVAILABLE
|
||||||
HasNEON() ? 4 :
|
HasNEON() ? 4 :
|
||||||
#elif CRYPTOPP_POWER7_AVAILABLE
|
#elif CRYPTOPP_POWER8_AVAILABLE
|
||||||
HasPower7() ? 16 :
|
HasPower8() ? 16 :
|
||||||
#endif
|
#endif
|
||||||
GetBlockCipher().OptimalDataAlignment();
|
GetBlockCipher().OptimalDataAlignment();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -569,12 +569,12 @@ void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer)
|
||||||
|
|
||||||
// ***************************** POWER8 ***************************** //
|
// ***************************** POWER8 ***************************** //
|
||||||
|
|
||||||
#if CRYPTOPP_POWER7_AVAILABLE
|
#if CRYPTOPP_POWER8_AVAILABLE
|
||||||
void GCM_Xor16_POWER7(byte *a, const byte *b, const byte *c)
|
void GCM_Xor16_POWER8(byte *a, const byte *b, const byte *c)
|
||||||
{
|
{
|
||||||
VecStore(VecXor(VecLoad(b), VecLoad(c)), a);
|
VecStore(VecXor(VecLoad(b), VecLoad(c)), a);
|
||||||
}
|
}
|
||||||
#endif // CRYPTOPP_POWER7_AVAILABLE
|
#endif // CRYPTOPP_POWER8_AVAILABLE
|
||||||
|
|
||||||
#if CRYPTOPP_POWER8_VMULL_AVAILABLE
|
#if CRYPTOPP_POWER8_VMULL_AVAILABLE
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -255,8 +255,8 @@ std::string SIMON64::Base::AlgorithmProvider() const
|
||||||
if (HasNEON())
|
if (HasNEON())
|
||||||
return "NEON";
|
return "NEON";
|
||||||
# endif
|
# endif
|
||||||
# if (CRYPTOPP_POWER7_AVAILABLE)
|
# if (CRYPTOPP_POWER8_AVAILABLE)
|
||||||
if (HasPower7())
|
if (HasPower8())
|
||||||
return "Power7";
|
return "Power7";
|
||||||
# endif
|
# endif
|
||||||
# if (CRYPTOPP_ALTIVEC_AVAILABLE)
|
# if (CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@
|
||||||
# include <arm_acle.h>
|
# include <arm_acle.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(CRYPTOPP_POWER7_AVAILABLE)
|
#if defined(CRYPTOPP_POWER8_AVAILABLE)
|
||||||
# include "adv_simd.h"
|
# include "adv_simd.h"
|
||||||
# include "ppc_simd.h"
|
# include "ppc_simd.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -576,7 +576,7 @@ inline void SIMON64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
|
||||||
|
|
||||||
for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
|
for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
|
||||||
{
|
{
|
||||||
#if CRYPTOPP_POWER7_AVAILABLE
|
#if CRYPTOPP_POWER8_AVAILABLE
|
||||||
const uint32x4_p rk1 = vec_splats(subkeys[i]);
|
const uint32x4_p rk1 = vec_splats(subkeys[i]);
|
||||||
const uint32x4_p rk2 = vec_splats(subkeys[i+1]);
|
const uint32x4_p rk2 = vec_splats(subkeys[i+1]);
|
||||||
#else
|
#else
|
||||||
|
|
@ -592,7 +592,7 @@ inline void SIMON64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
|
||||||
|
|
||||||
if (rounds & 1)
|
if (rounds & 1)
|
||||||
{
|
{
|
||||||
#if CRYPTOPP_POWER7_AVAILABLE
|
#if CRYPTOPP_POWER8_AVAILABLE
|
||||||
const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
|
const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
|
||||||
#else
|
#else
|
||||||
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
|
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
|
||||||
|
|
@ -634,7 +634,7 @@ inline void SIMON64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
|
||||||
if (rounds & 1)
|
if (rounds & 1)
|
||||||
{
|
{
|
||||||
std::swap(x1, y1);
|
std::swap(x1, y1);
|
||||||
#if CRYPTOPP_POWER7_AVAILABLE
|
#if CRYPTOPP_POWER8_AVAILABLE
|
||||||
const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
|
const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
|
||||||
#else
|
#else
|
||||||
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
|
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
|
||||||
|
|
@ -647,7 +647,7 @@ inline void SIMON64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
|
||||||
|
|
||||||
for (int i = static_cast<int>(rounds-2); i >= 0; i -= 2)
|
for (int i = static_cast<int>(rounds-2); i >= 0; i -= 2)
|
||||||
{
|
{
|
||||||
#if CRYPTOPP_POWER7_AVAILABLE
|
#if CRYPTOPP_POWER8_AVAILABLE
|
||||||
const uint32x4_p rk1 = vec_splats(subkeys[i+1]);
|
const uint32x4_p rk1 = vec_splats(subkeys[i+1]);
|
||||||
const uint32x4_p rk2 = vec_splats(subkeys[i]);
|
const uint32x4_p rk2 = vec_splats(subkeys[i]);
|
||||||
#else
|
#else
|
||||||
|
|
@ -696,7 +696,7 @@ inline void SIMON64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
||||||
|
|
||||||
for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
|
for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
|
||||||
{
|
{
|
||||||
#if CRYPTOPP_POWER7_AVAILABLE
|
#if CRYPTOPP_POWER8_AVAILABLE
|
||||||
const uint32x4_p rk1 = vec_splats(subkeys[i]);
|
const uint32x4_p rk1 = vec_splats(subkeys[i]);
|
||||||
const uint32x4_p rk2 = vec_splats(subkeys[i+1]);
|
const uint32x4_p rk2 = vec_splats(subkeys[i+1]);
|
||||||
#else
|
#else
|
||||||
|
|
@ -717,7 +717,7 @@ inline void SIMON64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
||||||
|
|
||||||
if (rounds & 1)
|
if (rounds & 1)
|
||||||
{
|
{
|
||||||
#if CRYPTOPP_POWER7_AVAILABLE
|
#if CRYPTOPP_POWER8_AVAILABLE
|
||||||
const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
|
const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
|
||||||
#else
|
#else
|
||||||
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
|
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
|
||||||
|
|
@ -771,7 +771,7 @@ inline void SIMON64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
||||||
{
|
{
|
||||||
std::swap(x1, y1); std::swap(x2, y2); std::swap(x3, y3);
|
std::swap(x1, y1); std::swap(x2, y2); std::swap(x3, y3);
|
||||||
|
|
||||||
#if CRYPTOPP_POWER7_AVAILABLE
|
#if CRYPTOPP_POWER8_AVAILABLE
|
||||||
const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
|
const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
|
||||||
#else
|
#else
|
||||||
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
|
const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
|
||||||
|
|
@ -786,7 +786,7 @@ inline void SIMON64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
||||||
|
|
||||||
for (int i = static_cast<int>(rounds-2); i >= 0; i -= 2)
|
for (int i = static_cast<int>(rounds-2); i >= 0; i -= 2)
|
||||||
{
|
{
|
||||||
#if CRYPTOPP_POWER7_AVAILABLE
|
#if CRYPTOPP_POWER8_AVAILABLE
|
||||||
const uint32x4_p rk1 = vec_splats(subkeys[i+1]);
|
const uint32x4_p rk1 = vec_splats(subkeys[i+1]);
|
||||||
const uint32x4_p rk2 = vec_splats(subkeys[i]);
|
const uint32x4_p rk2 = vec_splats(subkeys[i]);
|
||||||
#else
|
#else
|
||||||
|
|
|
||||||
|
|
@ -235,8 +235,8 @@ std::string SPECK64::Base::AlgorithmProvider() const
|
||||||
if (HasNEON())
|
if (HasNEON())
|
||||||
return "NEON";
|
return "NEON";
|
||||||
# endif
|
# endif
|
||||||
# if (CRYPTOPP_POWER7_AVAILABLE)
|
# if (CRYPTOPP_POWER8_AVAILABLE)
|
||||||
if (HasPower7())
|
if (HasPower8())
|
||||||
return "Power7";
|
return "Power7";
|
||||||
# endif
|
# endif
|
||||||
# if (CRYPTOPP_ALTIVEC_AVAILABLE)
|
# if (CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||||
|
|
|
||||||
|
|
@ -517,7 +517,7 @@ void SPECK64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
|
||||||
|
|
||||||
for (int i=0; i < static_cast<int>(rounds); ++i)
|
for (int i=0; i < static_cast<int>(rounds); ++i)
|
||||||
{
|
{
|
||||||
#if CRYPTOPP_POWER7_AVAILABLE
|
#if CRYPTOPP_POWER8_AVAILABLE
|
||||||
const uint32x4_p rk = vec_splats(subkeys[i]);
|
const uint32x4_p rk = vec_splats(subkeys[i]);
|
||||||
#else
|
#else
|
||||||
// subkeys has extra elements so memory backs the last subkey
|
// subkeys has extra elements so memory backs the last subkey
|
||||||
|
|
@ -564,7 +564,7 @@ void SPECK64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
|
||||||
|
|
||||||
for (int i = static_cast<int>(rounds-1); i >= 0; --i)
|
for (int i = static_cast<int>(rounds-1); i >= 0; --i)
|
||||||
{
|
{
|
||||||
#if CRYPTOPP_POWER7_AVAILABLE
|
#if CRYPTOPP_POWER8_AVAILABLE
|
||||||
const uint32x4_p rk = vec_splats(subkeys[i]);
|
const uint32x4_p rk = vec_splats(subkeys[i]);
|
||||||
#else
|
#else
|
||||||
// subkeys has extra elements so memory backs the last subkey
|
// subkeys has extra elements so memory backs the last subkey
|
||||||
|
|
@ -616,7 +616,7 @@ void SPECK64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
||||||
|
|
||||||
for (int i=0; i < static_cast<int>(rounds); ++i)
|
for (int i=0; i < static_cast<int>(rounds); ++i)
|
||||||
{
|
{
|
||||||
#if CRYPTOPP_POWER7_AVAILABLE
|
#if CRYPTOPP_POWER8_AVAILABLE
|
||||||
const uint32x4_p rk = vec_splats(subkeys[i]);
|
const uint32x4_p rk = vec_splats(subkeys[i]);
|
||||||
#else
|
#else
|
||||||
// subkeys has extra elements so memory backs the last subkey
|
// subkeys has extra elements so memory backs the last subkey
|
||||||
|
|
@ -685,7 +685,7 @@ void SPECK64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
|
||||||
|
|
||||||
for (int i = static_cast<int>(rounds-1); i >= 0; --i)
|
for (int i = static_cast<int>(rounds-1); i >= 0; --i)
|
||||||
{
|
{
|
||||||
#if CRYPTOPP_POWER7_AVAILABLE
|
#if CRYPTOPP_POWER8_AVAILABLE
|
||||||
const uint32x4_p rk = vec_splats(subkeys[i]);
|
const uint32x4_p rk = vec_splats(subkeys[i]);
|
||||||
#else
|
#else
|
||||||
// subkeys has extra elements so memory backs the last subkey
|
// subkeys has extra elements so memory backs the last subkey
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue