Add CRYPTOPP_POWER5_AVAILABLE
Power4 lacks 'vector long long' Rename datatypes such as 'uint8x16_p8' to 'uint8x16_p'. Originally the p8 suffix indicated use with Power8 in-core crypto. We are now using Altivec/Power4 for general vector operations.pull/548/head
parent
b7e636ac51
commit
15d637771f
10
config.h
10
config.h
|
|
@ -629,7 +629,6 @@ NAMESPACE_END
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// An old Apple G5 with GCC 4.01 has AltiVec, but its only Power4 or so.
|
// An old Apple G5 with GCC 4.01 has AltiVec, but its only Power4 or so.
|
||||||
// We need Power7 or above, so the makefile defines CRYPTOPP_DISABLE_ALTIVEC.
|
|
||||||
#if !defined(CRYPTOPP_ALTIVEC_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ALTIVEC)
|
#if !defined(CRYPTOPP_ALTIVEC_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ALTIVEC)
|
||||||
# if defined(_ARCH_PWR4) || defined(__ALTIVEC__) || \
|
# if defined(_ARCH_PWR4) || defined(__ALTIVEC__) || \
|
||||||
(CRYPTOPP_XLC_VERSION >= 100000) || (CRYPTOPP_GCC_VERSION >= 40001)
|
(CRYPTOPP_XLC_VERSION >= 100000) || (CRYPTOPP_GCC_VERSION >= 40001)
|
||||||
|
|
@ -637,12 +636,21 @@ NAMESPACE_END
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// We need Power5 for 'vector unsigned long long'
|
||||||
|
#if !defined(CRYPTOPP_POWER5_AVAILABLE) && !defined(CRYPTOPP_DISABLE_POWER5) && defined(CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||||
|
# if defined(_ARCH_PWR5) || (CRYPTOPP_XLC_VERSION >= 100000) || (CRYPTOPP_GCC_VERSION >= 40100)
|
||||||
|
# define CRYPTOPP_POWER5_AVAILABLE 1
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// We need Power7 for unaligned loads and stores
|
||||||
#if !defined(CRYPTOPP_POWER7_AVAILABLE) && !defined(CRYPTOPP_DISABLE_POWER7) && defined(CRYPTOPP_ALTIVEC_AVAILABLE)
|
#if !defined(CRYPTOPP_POWER7_AVAILABLE) && !defined(CRYPTOPP_DISABLE_POWER7) && defined(CRYPTOPP_ALTIVEC_AVAILABLE)
|
||||||
# if defined(_ARCH_PWR7) || (CRYPTOPP_XLC_VERSION >= 100000) || (CRYPTOPP_GCC_VERSION >= 40100)
|
# if defined(_ARCH_PWR7) || (CRYPTOPP_XLC_VERSION >= 100000) || (CRYPTOPP_GCC_VERSION >= 40100)
|
||||||
# define CRYPTOPP_POWER7_AVAILABLE 1
|
# define CRYPTOPP_POWER7_AVAILABLE 1
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// We need Power8 for in-core crypto
|
||||||
#if !defined(CRYPTOPP_POWER8_AVAILABLE) && !defined(CRYPTOPP_DISABLE_POWER8) && defined(CRYPTOPP_POWER7_AVAILABLE)
|
#if !defined(CRYPTOPP_POWER8_AVAILABLE) && !defined(CRYPTOPP_DISABLE_POWER8) && defined(CRYPTOPP_POWER7_AVAILABLE)
|
||||||
# if defined(_ARCH_PWR8) || (CRYPTOPP_XLC_VERSION >= 130000) || (CRYPTOPP_GCC_VERSION >= 40800)
|
# if defined(_ARCH_PWR8) || (CRYPTOPP_XLC_VERSION >= 130000) || (CRYPTOPP_GCC_VERSION >= 40800)
|
||||||
# define CRYPTOPP_POWER8_AVAILABLE 1
|
# define CRYPTOPP_POWER8_AVAILABLE 1
|
||||||
|
|
|
||||||
74
ppc-simd.h
74
ppc-simd.h
|
|
@ -29,27 +29,25 @@ NAMESPACE_BEGIN(CryptoPP)
|
||||||
|
|
||||||
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
||||||
|
|
||||||
typedef __vector unsigned char uint8x16_p8;
|
typedef __vector unsigned char uint8x16_p;
|
||||||
typedef __vector unsigned int uint32x4_p8;
|
typedef __vector unsigned int uint32x4_p;
|
||||||
|
|
||||||
#if defined(CRYPTOPP_POWER7_AVAILABLE)
|
#if defined(CRYPTOPP_POWER5_AVAILABLE)
|
||||||
typedef __vector unsigned long long uint64x2_p8;
|
typedef __vector unsigned long long uint64x2_p;
|
||||||
#else
|
|
||||||
typedef __vector unsigned int uint64x2_p8;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Use 8x16 for documentation because it is used frequently
|
// Use 8x16 for documentation because it is used frequently
|
||||||
#if defined(CRYPTOPP_XLC_VERSION)
|
#if defined(CRYPTOPP_XLC_VERSION)
|
||||||
typedef uint8x16_p8 VectorType;
|
typedef uint8x16_p VectorType;
|
||||||
#elif defined(CRYPTOPP_GCC_VERSION)
|
#elif defined(CRYPTOPP_GCC_VERSION)
|
||||||
typedef uint64x2_p8 VectorType;
|
typedef uint64x2_p VectorType;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
#if defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
||||||
/// \brief Default vector typedef
|
/// \brief Default vector typedef
|
||||||
/// \details IBM XL C/C++ provides equally good support for all vector types,
|
/// \details IBM XL C/C++ provides equally good support for all vector types,
|
||||||
/// including <tt>uint8x16_p8</tt>. GCC provides good support for
|
/// including <tt>uint8x16_p</tt>. GCC provides good support for
|
||||||
/// <tt>uint64x2_p8</tt>. <tt>VectorType</tt> is typedef'd accordingly to
|
/// <tt>uint64x2_p</tt>. <tt>VectorType</tt> is typedef'd accordingly to
|
||||||
/// minimize casting to and from buit-in function calls.
|
/// minimize casting to and from buit-in function calls.
|
||||||
# define VectorType ...
|
# define VectorType ...
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -68,8 +66,8 @@ inline void ReverseByteArrayLE(byte src[16])
|
||||||
#if defined(CRYPTOPP_XLC_VERSION) && defined(CRYPTOPP_LITTLE_ENDIAN)
|
#if defined(CRYPTOPP_XLC_VERSION) && defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||||
vec_st(vec_reve(vec_ld(0, src)), 0, src);
|
vec_st(vec_reve(vec_ld(0, src)), 0, src);
|
||||||
#elif defined(CRYPTOPP_LITTLE_ENDIAN)
|
#elif defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||||
const uint8x16_p8 mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
|
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
|
||||||
const uint8x16_p8 zero = {0};
|
const uint8x16_p zero = {0};
|
||||||
vec_vsx_st(vec_perm(vec_vsx_ld(0, src), zero, mask), 0, src);
|
vec_vsx_st(vec_perm(vec_vsx_ld(0, src), zero, mask), 0, src);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
@ -83,8 +81,8 @@ inline void ReverseByteArrayLE(byte src[16])
|
||||||
template <class T>
|
template <class T>
|
||||||
inline T Reverse(const T& src)
|
inline T Reverse(const T& src)
|
||||||
{
|
{
|
||||||
const uint8x16_p8 mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
|
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
|
||||||
const uint8x16_p8 zero = {0};
|
const uint8x16_p zero = {0};
|
||||||
return vec_perm(src, zero, mask);
|
return vec_perm(src, zero, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -216,12 +214,12 @@ template <class T>
|
||||||
inline void VectorStoreBE(const T& src, uint8_t dest[16])
|
inline void VectorStoreBE(const T& src, uint8_t dest[16])
|
||||||
{
|
{
|
||||||
#if defined(CRYPTOPP_XLC_VERSION)
|
#if defined(CRYPTOPP_XLC_VERSION)
|
||||||
vec_xst_be((uint8x16_p8)src, 0, (uint8_t*)dest);
|
vec_xst_be((uint8x16_p)src, 0, (uint8_t*)dest);
|
||||||
#else
|
#else
|
||||||
# if defined(CRYPTOPP_LITTLE_ENDIAN)
|
# if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||||
vec_vsx_st(Reverse((uint8x16_p8)src), 0, (uint8_t*)dest);
|
vec_vsx_st(Reverse((uint8x16_p)src), 0, (uint8_t*)dest);
|
||||||
# else
|
# else
|
||||||
vec_vsx_st((uint8x16_p8)src, 0, (uint8_t*)dest);
|
vec_vsx_st((uint8x16_p)src, 0, (uint8_t*)dest);
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
@ -239,12 +237,12 @@ template <class T>
|
||||||
inline void VectorStoreBE(const T& src, int off, uint8_t dest[16])
|
inline void VectorStoreBE(const T& src, int off, uint8_t dest[16])
|
||||||
{
|
{
|
||||||
#if defined(CRYPTOPP_XLC_VERSION)
|
#if defined(CRYPTOPP_XLC_VERSION)
|
||||||
vec_xst_be((uint8x16_p8)src, off, (uint8_t*)dest);
|
vec_xst_be((uint8x16_p)src, off, (uint8_t*)dest);
|
||||||
#else
|
#else
|
||||||
# if defined(CRYPTOPP_LITTLE_ENDIAN)
|
# if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||||
vec_vsx_st(Reverse((uint8x16_p8)src), off, (uint8_t*)dest);
|
vec_vsx_st(Reverse((uint8x16_p)src), off, (uint8_t*)dest);
|
||||||
# else
|
# else
|
||||||
vec_vsx_st((uint8x16_p8)src, off, (uint8_t*)dest);
|
vec_vsx_st((uint8x16_p)src, off, (uint8_t*)dest);
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
@ -262,12 +260,12 @@ inline void VectorStore(const T& src, byte dest[16])
|
||||||
{
|
{
|
||||||
// Do not call VectorStoreBE. It slows us down by about 0.5 cpb on LE.
|
// Do not call VectorStoreBE. It slows us down by about 0.5 cpb on LE.
|
||||||
#if defined(CRYPTOPP_XLC_VERSION)
|
#if defined(CRYPTOPP_XLC_VERSION)
|
||||||
vec_xst_be((uint8x16_p8)src, 0, (uint8_t*)dest);
|
vec_xst_be((uint8x16_p)src, 0, (uint8_t*)dest);
|
||||||
#else
|
#else
|
||||||
# if defined(CRYPTOPP_LITTLE_ENDIAN)
|
# if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||||
vec_vsx_st(Reverse((uint8x16_p8)src), 0, (uint8_t*)dest);
|
vec_vsx_st(Reverse((uint8x16_p)src), 0, (uint8_t*)dest);
|
||||||
# else
|
# else
|
||||||
vec_vsx_st((uint8x16_p8)src, 0, (uint8_t*)dest);
|
vec_vsx_st((uint8x16_p)src, 0, (uint8_t*)dest);
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
@ -286,12 +284,12 @@ inline void VectorStore(const T& src, int off, byte dest[16])
|
||||||
{
|
{
|
||||||
// Do not call VectorStoreBE. It slows us down by about 0.5 cpb on LE.
|
// Do not call VectorStoreBE. It slows us down by about 0.5 cpb on LE.
|
||||||
#if defined(CRYPTOPP_XLC_VERSION)
|
#if defined(CRYPTOPP_XLC_VERSION)
|
||||||
vec_xst_be((uint8x16_p8)src, off, (uint8_t*)dest);
|
vec_xst_be((uint8x16_p)src, off, (uint8_t*)dest);
|
||||||
#else
|
#else
|
||||||
# if defined(CRYPTOPP_LITTLE_ENDIAN)
|
# if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||||
vec_vsx_st(Reverse((uint8x16_p8)src), off, (uint8_t*)dest);
|
vec_vsx_st(Reverse((uint8x16_p)src), off, (uint8_t*)dest);
|
||||||
# else
|
# else
|
||||||
vec_vsx_st((uint8x16_p8)src, off, (uint8_t*)dest);
|
vec_vsx_st((uint8x16_p)src, off, (uint8_t*)dest);
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
@ -303,13 +301,13 @@ inline void VectorStore(const T& src, int off, byte dest[16])
|
||||||
/// \param vec2 the second vector
|
/// \param vec2 the second vector
|
||||||
/// \param mask vector mask
|
/// \param mask vector mask
|
||||||
/// \details VectorPermute returns a new vector from vec1 and vec2
|
/// \details VectorPermute returns a new vector from vec1 and vec2
|
||||||
/// based on mask. mask is an uint8x16_p8 type vector. The return
|
/// based on mask. mask is an uint8x16_p type vector. The return
|
||||||
/// vector is the same type as vec1.
|
/// vector is the same type as vec1.
|
||||||
/// \since Crypto++ 6.0
|
/// \since Crypto++ 6.0
|
||||||
template <class T1, class T2>
|
template <class T1, class T2>
|
||||||
inline T1 VectorPermute(const T1& vec1, const T1& vec2, const T2& mask)
|
inline T1 VectorPermute(const T1& vec1, const T1& vec2, const T2& mask)
|
||||||
{
|
{
|
||||||
return (T1)vec_perm(vec1, vec2, (uint8x16_p8)mask);
|
return (T1)vec_perm(vec1, vec2, (uint8x16_p)mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \brief XOR two vectors
|
/// \brief XOR two vectors
|
||||||
|
|
@ -349,16 +347,16 @@ inline T1 VectorAdd(const T1& vec1, const T2& vec2)
|
||||||
/// \param vec2 the second vector
|
/// \param vec2 the second vector
|
||||||
/// \details VectorShiftLeft() concatenates vec1 and vec2 and returns a
|
/// \details VectorShiftLeft() concatenates vec1 and vec2 and returns a
|
||||||
/// new vector after shifting the concatenation by the specified number
|
/// new vector after shifting the concatenation by the specified number
|
||||||
/// of bytes. Both vec1 and vec2 are cast to uint8x16_p8. The return
|
/// of bytes. Both vec1 and vec2 are cast to uint8x16_p. The return
|
||||||
/// vector is the same type as vec1.
|
/// vector is the same type as vec1.
|
||||||
/// \details On big endian machines VectorShiftLeft() is <tt>vec_sld(a, b,
|
/// \details On big endian machines VectorShiftLeft() is <tt>vec_sld(a, b,
|
||||||
/// c)</tt>. On little endian machines VectorShiftLeft() is translated to
|
/// c)</tt>. On little endian machines VectorShiftLeft() is translated to
|
||||||
/// <tt>vec_sld(b, a, 16-c)</tt>. You should always call the function as
|
/// <tt>vec_sld(b, a, 16-c)</tt>. You should always call the function as
|
||||||
/// if on a big endian machine as shown below.
|
/// if on a big endian machine as shown below.
|
||||||
/// <pre>
|
/// <pre>
|
||||||
/// uint8x16_p8 r0 = {0};
|
/// uint8x16_p r0 = {0};
|
||||||
/// uint8x16_p8 r1 = VectorLoad(ptr);
|
/// uint8x16_p r1 = VectorLoad(ptr);
|
||||||
/// uint8x16_p8 r5 = VectorShiftLeft<12>(r0, r1);
|
/// uint8x16_p r5 = VectorShiftLeft<12>(r0, r1);
|
||||||
/// </pre>
|
/// </pre>
|
||||||
/// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
|
/// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
|
||||||
/// endian sensitive?</A> on Stack Overflow
|
/// endian sensitive?</A> on Stack Overflow
|
||||||
|
|
@ -367,9 +365,9 @@ template <unsigned int C, class T1, class T2>
|
||||||
inline T1 VectorShiftLeft(const T1& vec1, const T2& vec2)
|
inline T1 VectorShiftLeft(const T1& vec1, const T2& vec2)
|
||||||
{
|
{
|
||||||
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
#if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||||
return (T1)vec_sld((uint8x16_p8)vec2, (uint8x16_p8)vec1, 16-C);
|
return (T1)vec_sld((uint8x16_p)vec2, (uint8x16_p)vec1, 16-C);
|
||||||
#else
|
#else
|
||||||
return (T1)vec_sld((uint8x16_p8)vec1, (uint8x16_p8)vec2, C);
|
return (T1)vec_sld((uint8x16_p)vec1, (uint8x16_p)vec2, C);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -469,9 +467,9 @@ template <int func, int subfunc, class T>
|
||||||
inline T VectorSHA256(const T& vec)
|
inline T VectorSHA256(const T& vec)
|
||||||
{
|
{
|
||||||
#if defined(CRYPTOPP_XLC_VERSION)
|
#if defined(CRYPTOPP_XLC_VERSION)
|
||||||
return (T)__vshasigmaw((uint32x4_p8)vec, func, subfunc);
|
return (T)__vshasigmaw((uint32x4_p)vec, func, subfunc);
|
||||||
#elif defined(CRYPTOPP_GCC_VERSION)
|
#elif defined(CRYPTOPP_GCC_VERSION)
|
||||||
return (T)__builtin_crypto_vshasigmaw((uint32x4_p8)vec, func, subfunc);
|
return (T)__builtin_crypto_vshasigmaw((uint32x4_p)vec, func, subfunc);
|
||||||
#else
|
#else
|
||||||
CRYPTOPP_ASSERT(0);
|
CRYPTOPP_ASSERT(0);
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -489,9 +487,9 @@ template <int func, int subfunc, class T>
|
||||||
inline T VectorSHA512(const T& vec)
|
inline T VectorSHA512(const T& vec)
|
||||||
{
|
{
|
||||||
#if defined(CRYPTOPP_XLC_VERSION)
|
#if defined(CRYPTOPP_XLC_VERSION)
|
||||||
return (T)__vshasigmad((uint64x2_p8)vec, func, subfunc);
|
return (T)__vshasigmad((uint64x2_p)vec, func, subfunc);
|
||||||
#elif defined(CRYPTOPP_GCC_VERSION)
|
#elif defined(CRYPTOPP_GCC_VERSION)
|
||||||
return (T)__builtin_crypto_vshasigmad((uint64x2_p8)vec, func, subfunc);
|
return (T)__builtin_crypto_vshasigmad((uint64x2_p)vec, func, subfunc);
|
||||||
#else
|
#else
|
||||||
CRYPTOPP_ASSERT(0);
|
CRYPTOPP_ASSERT(0);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue