Add Power4 unaligned Load and Store

pull/548/merge
Jeffrey Walton 2018-01-05 21:27:27 -05:00
parent bfc2b5f02d
commit 4f2c605209
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
2 changed files with 32 additions and 15 deletions

View File

@ -1266,6 +1266,7 @@ size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6, const word32 *su
CRYPTOPP_ASSERT(length >= 16); CRYPTOPP_ASSERT(length >= 16);
const ptrdiff_t blockSize = 16; const ptrdiff_t blockSize = 16;
// const ptrdiff_t vexBlockSize = 16;
ptrdiff_t inIncrement = (flags & (BT_InBlockIsCounter|BT_DontIncrementInOutPointers)) ? 0 : blockSize; ptrdiff_t inIncrement = (flags & (BT_InBlockIsCounter|BT_DontIncrementInOutPointers)) ? 0 : blockSize;
ptrdiff_t xorIncrement = xorBlocks ? blockSize : 0; ptrdiff_t xorIncrement = xorBlocks ? blockSize : 0;

View File

@ -17,6 +17,7 @@
#define CRYPTOPP_PPC_CRYPTO_H #define CRYPTOPP_PPC_CRYPTO_H
#include "config.h" #include "config.h"
#include "misc.h"
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING) #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
# include <altivec.h> # include <altivec.h>
@ -44,11 +45,19 @@ typedef __vector unsigned long long uint64x2_p;
inline uint32x4_p VectorLoad(const byte src[16]) inline uint32x4_p VectorLoad(const byte src[16])
{ {
// http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf uint8x16_p data;
const uint8x16_p perm = vec_lvsl(0, (uint8_t*)src); if (IsAlignedOn(src, 16))
const uint8x16_p low = vec_ld(0, (uint8_t*)src); {
const uint8x16_p high = vec_ld(15, (uint8_t*)src); data = vec_ld(0, (uint8_t*)src);
const uint8x16_p data = vec_perm(low, high, perm); }
else
{
// http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
const uint8x16_p perm = vec_lvsl(0, (uint8_t*)src);
const uint8x16_p low = vec_ld(0, (uint8_t*)src);
const uint8x16_p high = vec_ld(15, (uint8_t*)src);
data = vec_perm(low, high, perm);
}
#if defined(CRYPTOPP_BIG_ENDIAN) #if defined(CRYPTOPP_BIG_ENDIAN)
return (uint32x4_p)data; return (uint32x4_p)data;
@ -67,16 +76,23 @@ inline void VectorStore(const uint32x4_p data, byte dest[16])
const uint8x16_p t1 = (uint8x16_p)data; const uint8x16_p t1 = (uint8x16_p)data;
#endif #endif
// http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf if (IsAlignedOn(dest, 16))
const uint8x16_p t2 = vec_perm(t1, t1, vec_lvsr(0, dest)); {
vec_ste((uint8x16_p) t2, 0, (unsigned char*) dest); vec_st(t1, 0, (uint8_t*) dest);
vec_ste((uint16x8_p) t2, 1, (unsigned short*)dest); }
vec_ste((uint32x4_p) t2, 3, (unsigned int*) dest); else
vec_ste((uint32x4_p) t2, 4, (unsigned int*) dest); {
vec_ste((uint32x4_p) t2, 8, (unsigned int*) dest); // http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
vec_ste((uint32x4_p) t2, 12, (unsigned int*) dest); const uint8x16_p t2 = vec_perm(t1, t1, vec_lvsr(0, dest));
vec_ste((uint16x8_p) t2, 14, (unsigned short*)dest); vec_ste((uint8x16_p) t2, 0, (unsigned char*) dest);
vec_ste((uint8x16_p) t2, 15, (unsigned char*) dest); vec_ste((uint16x8_p) t2, 1, (unsigned short*)dest);
vec_ste((uint32x4_p) t2, 3, (unsigned int*) dest);
vec_ste((uint32x4_p) t2, 4, (unsigned int*) dest);
vec_ste((uint32x4_p) t2, 8, (unsigned int*) dest);
vec_ste((uint32x4_p) t2, 12, (unsigned int*) dest);
vec_ste((uint16x8_p) t2, 14, (unsigned short*)dest);
vec_ste((uint8x16_p) t2, 15, (unsigned char*) dest);
}
} }
inline uint32x4_p VectorXor(const uint32x4_p vec1, const uint32x4_p vec2) inline uint32x4_p VectorXor(const uint32x4_p vec1, const uint32x4_p vec2)